aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBill Yi <byi@google.com>2015-06-23 13:53:11 -0700
committerBill Yi <byi@google.com>2015-06-23 13:53:11 -0700
commit4e213d510f437769f8a28578dd4f786fb7d16c44 (patch)
tree0d5cbd5a7eee87b3dca5820d282ef618a7e25991
downloadfec-4e213d510f437769f8a28578dd4f786fb7d16c44.tar.gz
Initial codenougat-mr1-arc
-rw-r--r--INSTALL39
-rw-r--r--README120
-rw-r--r--ccsds.h5
-rw-r--r--char.h24
-rw-r--r--config.guess1516
-rw-r--r--config.h.in19
-rwxr-xr-xconfig.sub1362
-rwxr-xr-xconfigure4357
-rw-r--r--configure.in83
-rw-r--r--cpu_features.s15
-rw-r--r--cpu_mode_ppc.c40
-rw-r--r--cpu_mode_x86.c33
-rw-r--r--decode_rs.c262
-rw-r--r--decode_rs.h298
-rw-r--r--decode_rs_8.c24
-rw-r--r--decode_rs_ccsds.c26
-rw-r--r--decode_rs_char.c22
-rw-r--r--decode_rs_int.c22
-rw-r--r--dotprod.c94
-rw-r--r--dotprod.h15
-rw-r--r--dotprod_av.c93
-rw-r--r--dotprod_mmx.c81
-rw-r--r--dotprod_mmx_assist.s83
-rw-r--r--dotprod_port.c58
-rw-r--r--dotprod_sse2.c72
-rw-r--r--dotprod_sse2_assist.s85
-rw-r--r--dsp.363
-rw-r--r--dtest.c99
-rw-r--r--encode_rs.c52
-rw-r--r--encode_rs.h58
-rw-r--r--encode_rs_8.c109
-rw-r--r--encode_rs_av.c61
-rw-r--r--encode_rs_ccsds.c24
-rw-r--r--encode_rs_char.c15
-rw-r--r--encode_rs_int.c15
-rw-r--r--exercise.c122
-rw-r--r--fec.c66
-rw-r--r--fec.h347
-rw-r--r--fixed.h33
-rw-r--r--gen_ccsds.c39
-rw-r--r--gen_ccsds_tal.c53
-rw-r--r--init_rs.c39
-rw-r--r--init_rs.h106
-rw-r--r--init_rs_char.c35
-rw-r--r--init_rs_int.c35
-rwxr-xr-xinstall-sh251
-rw-r--r--int.h22
-rw-r--r--lesser.txt504
-rw-r--r--makefile.in242
-rw-r--r--mmxbfly27.s148
-rw-r--r--mmxbfly29.s161
-rw-r--r--peak_mmx_assist.s70
-rw-r--r--peak_sse2_assist.s51
-rw-r--r--peak_sse_assist.s49
-rw-r--r--peaktest.c38
-rw-r--r--peakval.c39
-rw-r--r--peakval_av.c61
-rw-r--r--peakval_mmx.c34
-rw-r--r--peakval_mmx_assist.s70
-rw-r--r--peakval_port.c16
-rw-r--r--peakval_sse.c35
-rw-r--r--peakval_sse2.c34
-rw-r--r--peakval_sse2_assist.s51
-rw-r--r--peakval_sse_assist.s49
-rw-r--r--rs-common.h26
-rw-r--r--rs.3198
-rw-r--r--rs_speedtest.c54
-rw-r--r--rstest.c296
-rw-r--r--sim.c43
-rw-r--r--simd-viterbi.3247
-rw-r--r--sqtest.c42
-rw-r--r--sse2bfly27.s202
-rw-r--r--sse2bfly29.s245
-rw-r--r--ssebfly27.s205
-rw-r--r--ssebfly29.s271
-rw-r--r--sumsq.c40
-rw-r--r--sumsq_av.c78
-rw-r--r--sumsq_mmx.c35
-rw-r--r--sumsq_mmx_assist.s83
-rw-r--r--sumsq_port.c16
-rw-r--r--sumsq_sse2.c33
-rw-r--r--sumsq_sse2_assist.s49
-rw-r--r--sumsq_test.c101
-rw-r--r--viterbi27.c161
-rw-r--r--viterbi27_av.c210
-rw-r--r--viterbi27_mmx.c115
-rw-r--r--viterbi27_port.c191
-rw-r--r--viterbi27_sse.c113
-rw-r--r--viterbi27_sse2.c180
-rw-r--r--viterbi29.c152
-rw-r--r--viterbi29_av.c190
-rw-r--r--viterbi29_mmx.c118
-rw-r--r--viterbi29_port.c166
-rw-r--r--viterbi29_sse.c114
-rw-r--r--viterbi29_sse2.c119
-rw-r--r--viterbi39.c153
-rw-r--r--viterbi39_av.c251
-rw-r--r--viterbi39_mmx.c185
-rw-r--r--viterbi39_port.c168
-rw-r--r--viterbi39_sse.c201
-rw-r--r--viterbi39_sse2.c200
-rw-r--r--viterbi615.c155
-rw-r--r--viterbi615_av.c257
-rw-r--r--viterbi615_mmx.c183
-rw-r--r--viterbi615_port.c156
-rw-r--r--viterbi615_sse.c201
-rw-r--r--viterbi615_sse2.c204
-rw-r--r--vtest27.c184
-rw-r--r--vtest29.c185
-rw-r--r--vtest39.c186
-rw-r--r--vtest615.c191
111 files changed, 19297 insertions, 0 deletions
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..e9e5baf
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,39 @@
+INSTALLATION INSTRUCTIONS
+
+To build and install the libfec libraries, simply say
+
+./configure
+make
+make test (optional)
+make install (as root)
+
+By default, "make install" puts the libfec libraries in
+/usr/local/lib, the include files in /usr/local/include, and the
+manual page in /usr/local/man.
+
+You may have an old version of the GNU assembler that cannot handle
+the relatively new SSE2 mnemonics. Update your version of the GNU
+"binutils" package.
+
+You may obtain the latest binutils package through your normal
+distribution channels or from:
+
+http://sources.redhat.com/binutils/
+
+TESTING THE FEC LIBRARY
+
+After running the ./configure script, optional tests can be built and
+run as follows:
+
+make test
+
+"make test" tests each routine, using the SIMD versions as
+appropriate, verifying correct operation and estimating Viterbi
+decoding speeds. These tests should always succeed unless something is
+broken.
+
+28 Mar 2004
+Phil Karn, karn@ka9q.net
+
+
+
diff --git a/README b/README
new file mode 100644
index 0000000..95253e2
--- /dev/null
+++ b/README
@@ -0,0 +1,120 @@
+COPYRIGHT
+
+This package is copyright 2006 by Phil Karn, KA9Q. It may be used
+under the terms of the GNU Lesser General Public License (LGPL). See
+the file "lesser.txt" in this package for license details.
+
+INTRODUCTION
+
+This package provides a set of functions that implement several
+popular forward error correction (FEC) algorithms and several low-level routines
+useful in modems implemented with digital signal processing (DSP).
+
+The following routines are provided:
+
+1. Viterbi decoders for the following convolutional codes:
+
+r=1/2 k=7 ("Voyager" code, now a widely used industry standard)
+r=1/2 k=9 (Used on the IS-95 CDMA forward link)
+r=1/6 k=15 ("Cassini" code, used by several NASA/JPL deep space missions)
+
+2. Reed-Solomon encoders and decoders for any user-specified code.
+
+3. Optimized encoder and decoder for the CCSDS-standard (255,223)
+Reed-Solomon code, with and without the CCSDS-standard "dual basis"
+symbol representation.
+
+4. Compute dot product between a 16-bit buffer and a set of 16-bit
+coefficients. This is the basic DSP primitive for digital filtering
+and correlation.
+
+4. Compute sum of squares of a buffer of 16-bit signed integers. This is
+useful in DSP for finding the total energy in a signal.
+
+5. Find peak value in a buffer of 16-bit signed integers, useful for
+scaling a signal to prevent overflow.
+
+SIMD SUPPORT
+
+This package automatically makes use of various SIMD (Single
+Instruction stream, Multiple Data stream) instruction sets, when
+available: MMX, SSE and SSE2 on the IA-32 (Intel) architecture, and
+Altivec on the PowerPC G4 and G5 used by Power Macintoshes.
+
+"Altivec" is a Motorola trademark; Apple calls it "Velocity Engine",
+and IBM calls it "VMX". Altivec is roughly comparable to SSE2 on the
+IA-32.
+
+Many of the SIMD versions run more than an order of
+magnitude faster than their portable C versions. The available SIMD
+instruction sets, if any, are determined at run time and the proper
+version of each routine is automatically selected. If no SIMD
+instructions are available, the portable C version is invoked by
+default. On targets other than IA-32 and PPC, only the portable C
+version is built.
+
+The SIMD-assisted versions generally produce the same results as the C
+versions, with a few minor exceptions. The Viterbi decoders in C have
+a very slightly greater Eb/No performance due to their use of 32-bit
+path metrics. On the other hand, the SIMD versions use the
+"saturating" arithmetic available in these instructions to avoid the
+integer wraparounds that can occur in C when argument ranges are not
+properly constrained. This applies primarily to the "dotprod" (dot
+product) function.
+
+The MMX (MultiMedia eXtensions) instruction set was introduced on
+later Pentium CPUs; it is also implemented on the Pentium II and most
+AMD CPUs starting with the K6. SSE (SIMD Streaming Extensions) was
+introduced in the Pentium III; AMD calls it "3D Now! Professional".
+Intel introduced SSE2 on the Pentium 4, and it has been picked up by
+later AMD CPUs. SSE support implies MMX support, while SSE2 support
+implies both SSE and MMX support.
+
+The latest IA-32 SIMD instruction set, SSE3 (also known as "Prescott
+New Instructions") was introduced in early 2004 with the latest
+("Prescott") revision of the Pentium 4. Relatively little was
+introduced with SSE3, and this library currently makes no use of it.
+
+See the various manual pages for details on how to use the library
+routines.
+
+Copyright 2006, Phil Karn, KA9Q
+karn@ka9q.net
+http://www.ka9q.net/
+
+This software may be used under the terms of the GNU Lesser General
+Public License (LGPL); see the file lesser.txt for details.
+
+Revision history:
+Version 1.0 released 29 May 2001
+
+Version 2.0 released 3 Dec 2001:
+Restructured to add support for shared libraries.
+
+Version 2.0.1 released 8 Dec 2001:
+Includes autoconf/configure script
+
+Version 2.0.2 released 4 Feb 2002:
+Add SIMD version override options
+Test for lack of SSE2 mnemonic support in 'as'
+Build only selected version
+
+Version 2.0.3 released 6 Feb 2002:
+Fix to parityb function in parity.h
+
+feclib version 1.0 released November 2003
+Merged SIMD-Viterbi, RS and DSP libraries
+Changed SIMD Viterbi decoder to detect SSE2/SSE/MMX at runtime rather than build time
+
+feclib version 2.0 (unreleased) Mar 2004
+General speedups and cleanups
+Switch from 4 to 8-bit input symbols on all Viterbi decoders
+Support for Altivec on PowerPC
+Support for k=15 r=1/6 Cassini/Mars Pathfinder/Mars Exploration Rover/STEREO code
+Changed license to GNU Lesser General Public License (LGPL)
+
+feclib version 2.1 June 5 2006
+Added error checking, fixed alignment bug in SSE2 versions of Viterbi decoders causing segfaults
+
+feclib version 2.1.1 June 6 2006
+Fix test/benchmark time measurement on Linux
diff --git a/ccsds.h b/ccsds.h
new file mode 100644
index 0000000..ae65468
--- /dev/null
+++ b/ccsds.h
@@ -0,0 +1,5 @@
+typedef unsigned char data_t;
+extern unsigned char Taltab[],Tal1tab[];
+#define NN 255
+#define NROOTS 32
+
diff --git a/char.h b/char.h
new file mode 100644
index 0000000..25efd65
--- /dev/null
+++ b/char.h
@@ -0,0 +1,24 @@
+/* Stuff specific to the 8-bit symbol version of the general purpose RS codecs
+ *
+ * Copyright 2003, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+typedef unsigned char data_t;
+
+#define MODNN(x) modnn(rs,x)
+
+#define MM (rs->mm)
+#define NN (rs->nn)
+#define ALPHA_TO (rs->alpha_to)
+#define INDEX_OF (rs->index_of)
+#define GENPOLY (rs->genpoly)
+#define NROOTS (rs->nroots)
+#define FCR (rs->fcr)
+#define PRIM (rs->prim)
+#define IPRIM (rs->iprim)
+#define PAD (rs->pad)
+#define A0 (NN)
+
+
+
+
diff --git a/config.guess b/config.guess
new file mode 100644
index 0000000..0f0fe71
--- /dev/null
+++ b/config.guess
@@ -0,0 +1,1516 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+# 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation,
+# Inc.
+
+timestamp='2007-03-06'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+
+# Originally written by Per Bothner <per@bothner.com>.
+# Please send patches to <config-patches@gnu.org>. Submit a context
+# diff and a properly formatted ChangeLog entry.
+#
+# This script attempts to guess a canonical system name similar to
+# config.sub. If it succeeds, it prints the system name on stdout, and
+# exits with 0. Otherwise, it exits with 1.
+#
+# The plan is that this can be called by configure scripts if you
+# don't specify an explicit build system type.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Operation modes:
+ -h, --help print this help, then exit
+ -t, --time-stamp print date of last modification, then exit
+ -v, --version print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+ case $1 in
+ --time-stamp | --time* | -t )
+ echo "$timestamp" ; exit ;;
+ --version | -v )
+ echo "$version" ; exit ;;
+ --help | --h* | -h )
+ echo "$usage"; exit ;;
+ -- ) # Stop option processing
+ shift; break ;;
+ - ) # Use stdin as input.
+ break ;;
+ -* )
+ echo "$me: invalid option $1$help" >&2
+ exit 1 ;;
+ * )
+ break ;;
+ esac
+done
+
+if test $# != 0; then
+ echo "$me: too many arguments$help" >&2
+ exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,) echo "int x;" > $dummy.c ;
+ for c in cc gcc c89 c99 ; do
+ if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
+ CC_FOR_BUILD="$c"; break ;
+ fi ;
+ done ;
+ if test x"$CC_FOR_BUILD" = x ; then
+ CC_FOR_BUILD=no_compiler_found ;
+ fi
+ ;;
+ ,,*) CC_FOR_BUILD=$CC ;;
+ ,*,*) CC_FOR_BUILD=$HOST_CC ;;
+esac ; set_cc_for_build= ;'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+ PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+ *:NetBSD:*:*)
+ # NetBSD (nbsd) targets should (where applicable) match one or
+ # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+ # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
+ # switched to ELF, *-*-netbsd* would select the old
+ # object file format. This provides both forward
+ # compatibility and a consistent mechanism for selecting the
+ # object file format.
+ #
+ # Note: NetBSD doesn't particularly care about the vendor
+ # portion of the name. We always set it to "unknown".
+ sysctl="sysctl -n hw.machine_arch"
+ UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
+ /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+ case "${UNAME_MACHINE_ARCH}" in
+ armeb) machine=armeb-unknown ;;
+ arm*) machine=arm-unknown ;;
+ sh3el) machine=shl-unknown ;;
+ sh3eb) machine=sh-unknown ;;
+ sh5el) machine=sh5le-unknown ;;
+ *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+ esac
+ # The Operating System including object format, if it has switched
+ # to ELF recently, or will in the future.
+ case "${UNAME_MACHINE_ARCH}" in
+ arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+ eval $set_cc_for_build
+ if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+ | grep __ELF__ >/dev/null
+ then
+ # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+ # Return netbsd for either. FIX?
+ os=netbsd
+ else
+ os=netbsdelf
+ fi
+ ;;
+ *)
+ os=netbsd
+ ;;
+ esac
+ # The OS release
+ # Debian GNU/NetBSD machines have a different userland, and
+ # thus, need a distinct triplet. However, they do not need
+ # kernel version information, so it can be replaced with a
+ # suitable tag, in the style of linux-gnu.
+ case "${UNAME_VERSION}" in
+ Debian*)
+ release='-gnu'
+ ;;
+ *)
+ release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ ;;
+ esac
+ # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+ # contains redundant information, the shorter form:
+ # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+ echo "${machine}-${os}${release}"
+ exit ;;
+ *:OpenBSD:*:*)
+ UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+ echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
+ exit ;;
+ *:ekkoBSD:*:*)
+ echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
+ exit ;;
+ *:SolidBSD:*:*)
+ echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
+ exit ;;
+ macppc:MirBSD:*:*)
+ echo powerpc-unknown-mirbsd${UNAME_RELEASE}
+ exit ;;
+ *:MirBSD:*:*)
+ echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
+ exit ;;
+ alpha:OSF1:*:*)
+ case $UNAME_RELEASE in
+ *4.0)
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+ ;;
+ *5.*)
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+ ;;
+ esac
+ # According to Compaq, /usr/sbin/psrinfo has been available on
+ # OSF/1 and Tru64 systems produced since 1995. I hope that
+ # covers most systems running today. This code pipes the CPU
+ # types through head -n 1, so we only detect the type of CPU 0.
+ ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+ case "$ALPHA_CPU_TYPE" in
+ "EV4 (21064)")
+ UNAME_MACHINE="alpha" ;;
+ "EV4.5 (21064)")
+ UNAME_MACHINE="alpha" ;;
+ "LCA4 (21066/21068)")
+ UNAME_MACHINE="alpha" ;;
+ "EV5 (21164)")
+ UNAME_MACHINE="alphaev5" ;;
+ "EV5.6 (21164A)")
+ UNAME_MACHINE="alphaev56" ;;
+ "EV5.6 (21164PC)")
+ UNAME_MACHINE="alphapca56" ;;
+ "EV5.7 (21164PC)")
+ UNAME_MACHINE="alphapca57" ;;
+ "EV6 (21264)")
+ UNAME_MACHINE="alphaev6" ;;
+ "EV6.7 (21264A)")
+ UNAME_MACHINE="alphaev67" ;;
+ "EV6.8CB (21264C)")
+ UNAME_MACHINE="alphaev68" ;;
+ "EV6.8AL (21264B)")
+ UNAME_MACHINE="alphaev68" ;;
+ "EV6.8CX (21264D)")
+ UNAME_MACHINE="alphaev68" ;;
+ "EV6.9A (21264/EV69A)")
+ UNAME_MACHINE="alphaev69" ;;
+ "EV7 (21364)")
+ UNAME_MACHINE="alphaev7" ;;
+ "EV7.9 (21364A)")
+ UNAME_MACHINE="alphaev79" ;;
+ esac
+ # A Pn.n version is a patched version.
+ # A Vn.n version is a released version.
+ # A Tn.n version is a released field test version.
+ # A Xn.n version is an unreleased experimental baselevel.
+ # 1.2 uses "1.2" for uname -r.
+ echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+ exit ;;
+ Alpha\ *:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # Should we change UNAME_MACHINE based on the output of uname instead
+ # of the specific Alpha model?
+ echo alpha-pc-interix
+ exit ;;
+ 21064:Windows_NT:50:3)
+ echo alpha-dec-winnt3.5
+ exit ;;
+ Amiga*:UNIX_System_V:4.0:*)
+ echo m68k-unknown-sysv4
+ exit ;;
+ *:[Aa]miga[Oo][Ss]:*:*)
+ echo ${UNAME_MACHINE}-unknown-amigaos
+ exit ;;
+ *:[Mm]orph[Oo][Ss]:*:*)
+ echo ${UNAME_MACHINE}-unknown-morphos
+ exit ;;
+ *:OS/390:*:*)
+ echo i370-ibm-openedition
+ exit ;;
+ *:z/VM:*:*)
+ echo s390-ibm-zvmoe
+ exit ;;
+ *:OS400:*:*)
+ echo powerpc-ibm-os400
+ exit ;;
+ arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+ echo arm-acorn-riscix${UNAME_RELEASE}
+ exit ;;
+ arm:riscos:*:*|arm:RISCOS:*:*)
+ echo arm-unknown-riscos
+ exit ;;
+ SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+ echo hppa1.1-hitachi-hiuxmpp
+ exit ;;
+ Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+ # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+ if test "`(/bin/universe) 2>/dev/null`" = att ; then
+ echo pyramid-pyramid-sysv3
+ else
+ echo pyramid-pyramid-bsd
+ fi
+ exit ;;
+ NILE*:*:*:dcosx)
+ echo pyramid-pyramid-svr4
+ exit ;;
+ DRS?6000:unix:4.0:6*)
+ echo sparc-icl-nx6
+ exit ;;
+ DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
+ case `/usr/bin/uname -p` in
+ sparc) echo sparc-icl-nx7; exit ;;
+ esac ;;
+ sun4H:SunOS:5.*:*)
+ echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+ echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ i86pc:SunOS:5.*:*)
+ echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ sun4*:SunOS:6*:*)
+ # According to config.sub, this is the proper way to canonicalize
+ # SunOS6. Hard to guess exactly what SunOS6 will be like, but
+ # it's likely to be more like Solaris than SunOS4.
+ echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ sun4*:SunOS:*:*)
+ case "`/usr/bin/arch -k`" in
+ Series*|S4*)
+ UNAME_RELEASE=`uname -v`
+ ;;
+ esac
+ # Japanese Language versions have a version number like `4.1.3-JL'.
+ echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+ exit ;;
+ sun3*:SunOS:*:*)
+ echo m68k-sun-sunos${UNAME_RELEASE}
+ exit ;;
+ sun*:*:4.2BSD:*)
+ UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+ test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+ case "`/bin/arch`" in
+ sun3)
+ echo m68k-sun-sunos${UNAME_RELEASE}
+ ;;
+ sun4)
+ echo sparc-sun-sunos${UNAME_RELEASE}
+ ;;
+ esac
+ exit ;;
+ aushp:SunOS:*:*)
+ echo sparc-auspex-sunos${UNAME_RELEASE}
+ exit ;;
+ # The situation for MiNT is a little confusing. The machine name
+ # can be virtually everything (everything which is not
+ # "atarist" or "atariste" at least should have a processor
+ # > m68000). The system name ranges from "MiNT" over "FreeMiNT"
+ # to the lowercase version "mint" (or "freemint"). Finally
+ # the system name "TOS" denotes a system which is actually not
+ # MiNT. But MiNT is downward compatible to TOS, so this should
+ # be no problem.
+ atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit ;;
+ atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit ;;
+ *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit ;;
+ milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+ echo m68k-milan-mint${UNAME_RELEASE}
+ exit ;;
+ hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+ echo m68k-hades-mint${UNAME_RELEASE}
+ exit ;;
+ *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+ echo m68k-unknown-mint${UNAME_RELEASE}
+ exit ;;
+ m68k:machten:*:*)
+ echo m68k-apple-machten${UNAME_RELEASE}
+ exit ;;
+ powerpc:machten:*:*)
+ echo powerpc-apple-machten${UNAME_RELEASE}
+ exit ;;
+ RISC*:Mach:*:*)
+ echo mips-dec-mach_bsd4.3
+ exit ;;
+ RISC*:ULTRIX:*:*)
+ echo mips-dec-ultrix${UNAME_RELEASE}
+ exit ;;
+ VAX*:ULTRIX*:*:*)
+ echo vax-dec-ultrix${UNAME_RELEASE}
+ exit ;;
+ 2020:CLIX:*:* | 2430:CLIX:*:*)
+ echo clipper-intergraph-clix${UNAME_RELEASE}
+ exit ;;
+ mips:*:*:UMIPS | mips:*:*:RISCos)
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h> /* for printf() prototype */
+ int main (int argc, char *argv[]) {
+#else
+ int main (argc, argv) int argc; char *argv[]; {
+#endif
+ #if defined (host_mips) && defined (MIPSEB)
+ #if defined (SYSTYPE_SYSV)
+ printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+ #endif
+ #if defined (SYSTYPE_SVR4)
+ printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+ #endif
+ #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+ printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+ #endif
+ #endif
+ exit (-1);
+ }
+EOF
+ $CC_FOR_BUILD -o $dummy $dummy.c &&
+ dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+ SYSTEM_NAME=`$dummy $dummyarg` &&
+ { echo "$SYSTEM_NAME"; exit; }
+ echo mips-mips-riscos${UNAME_RELEASE}
+ exit ;;
+ Motorola:PowerMAX_OS:*:*)
+ echo powerpc-motorola-powermax
+ exit ;;
+ Motorola:*:4.3:PL8-*)
+ echo powerpc-harris-powermax
+ exit ;;
+ Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+ echo powerpc-harris-powermax
+ exit ;;
+ Night_Hawk:Power_UNIX:*:*)
+ echo powerpc-harris-powerunix
+ exit ;;
+ m88k:CX/UX:7*:*)
+ echo m88k-harris-cxux7
+ exit ;;
+ m88k:*:4*:R4*)
+ echo m88k-motorola-sysv4
+ exit ;;
+ m88k:*:3*:R3*)
+ echo m88k-motorola-sysv3
+ exit ;;
+ AViiON:dgux:*:*)
+ # DG/UX returns AViiON for all architectures
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
+ if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+ then
+ if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+ [ ${TARGET_BINARY_INTERFACE}x = x ]
+ then
+ echo m88k-dg-dgux${UNAME_RELEASE}
+ else
+ echo m88k-dg-dguxbcs${UNAME_RELEASE}
+ fi
+ else
+ echo i586-dg-dgux${UNAME_RELEASE}
+ fi
+ exit ;;
+ M88*:DolphinOS:*:*) # DolphinOS (SVR3)
+ echo m88k-dolphin-sysv3
+ exit ;;
+ M88*:*:R3*:*)
+ # Delta 88k system running SVR3
+ echo m88k-motorola-sysv3
+ exit ;;
+ XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+ echo m88k-tektronix-sysv3
+ exit ;;
+ Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+ echo m68k-tektronix-bsd
+ exit ;;
+ *:IRIX*:*:*)
+ echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+ exit ;;
+ ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+ echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id
+ exit ;; # Note that: echo "'`uname -s`'" gives 'AIX '
+ i*86:AIX:*:*)
+ echo i386-ibm-aix
+ exit ;;
+ ia64:AIX:*:*)
+ if [ -x /usr/bin/oslevel ] ; then
+ IBM_REV=`/usr/bin/oslevel`
+ else
+ IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+ fi
+ echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+ exit ;;
+ *:AIX:2:3)
+ if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #include <sys/systemcfg.h>
+
+ main()
+ {
+ if (!__power_pc())
+ exit(1);
+ puts("powerpc-ibm-aix3.2.5");
+ exit(0);
+ }
+EOF
+ if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+ then
+ echo "$SYSTEM_NAME"
+ else
+ echo rs6000-ibm-aix3.2.5
+ fi
+ elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+ echo rs6000-ibm-aix3.2.4
+ else
+ echo rs6000-ibm-aix3.2
+ fi
+ exit ;;
+ *:AIX:*:[45])
+ IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+ if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+ IBM_ARCH=rs6000
+ else
+ IBM_ARCH=powerpc
+ fi
+ if [ -x /usr/bin/oslevel ] ; then
+ IBM_REV=`/usr/bin/oslevel`
+ else
+ IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+ fi
+ echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+ exit ;;
+ *:AIX:*:*)
+ echo rs6000-ibm-aix
+ exit ;;
+ ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+ echo romp-ibm-bsd4.4
+ exit ;;
+ ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and
+ echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to
+ exit ;; # report: romp-ibm BSD 4.3
+ *:BOSX:*:*)
+ echo rs6000-bull-bosx
+ exit ;;
+ DPX/2?00:B.O.S.:*:*)
+ echo m68k-bull-sysv3
+ exit ;;
+ 9000/[34]??:4.3bsd:1.*:*)
+ echo m68k-hp-bsd
+ exit ;;
+ hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+ echo m68k-hp-bsd4.4
+ exit ;;
+ 9000/[34678]??:HP-UX:*:*)
+ HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+ case "${UNAME_MACHINE}" in
+ 9000/31? ) HP_ARCH=m68000 ;;
+ 9000/[34]?? ) HP_ARCH=m68k ;;
+ 9000/[678][0-9][0-9])
+ if [ -x /usr/bin/getconf ]; then
+ sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+ sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+ case "${sc_cpu_version}" in
+ 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+ 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+ 532) # CPU_PA_RISC2_0
+ case "${sc_kernel_bits}" in
+ 32) HP_ARCH="hppa2.0n" ;;
+ 64) HP_ARCH="hppa2.0w" ;;
+ '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
+ esac ;;
+ esac
+ fi
+ if [ "${HP_ARCH}" = "" ]; then
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+
+ #define _HPUX_SOURCE
+ #include <stdlib.h>
+ #include <unistd.h>
+
+ int main ()
+ {
+ #if defined(_SC_KERNEL_BITS)
+ long bits = sysconf(_SC_KERNEL_BITS);
+ #endif
+ long cpu = sysconf (_SC_CPU_VERSION);
+
+ switch (cpu)
+ {
+ case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+ case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+ case CPU_PA_RISC2_0:
+ #if defined(_SC_KERNEL_BITS)
+ switch (bits)
+ {
+ case 64: puts ("hppa2.0w"); break;
+ case 32: puts ("hppa2.0n"); break;
+ default: puts ("hppa2.0"); break;
+ } break;
+ #else /* !defined(_SC_KERNEL_BITS) */
+ puts ("hppa2.0"); break;
+ #endif
+ default: puts ("hppa1.0"); break;
+ }
+ exit (0);
+ }
+EOF
+ (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+ test -z "$HP_ARCH" && HP_ARCH=hppa
+ fi ;;
+ esac
+ if [ ${HP_ARCH} = "hppa2.0w" ]
+ then
+ eval $set_cc_for_build
+
+ # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+ # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler
+ # generating 64-bit code. GNU and HP use different nomenclature:
+ #
+ # $ CC_FOR_BUILD=cc ./config.guess
+ # => hppa2.0w-hp-hpux11.23
+ # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+ # => hppa64-hp-hpux11.23
+
+ if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+ grep __LP64__ >/dev/null
+ then
+ HP_ARCH="hppa2.0w"
+ else
+ HP_ARCH="hppa64"
+ fi
+ fi
+ echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+ exit ;;
+ ia64:HP-UX:*:*)
+ HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+ echo ia64-hp-hpux${HPUX_REV}
+ exit ;;
+ 3050*:HI-UX:*:*)
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #include <unistd.h>
+ int
+ main ()
+ {
+ long cpu = sysconf (_SC_CPU_VERSION);
+ /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+ true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct
+ results, however. */
+ if (CPU_IS_PA_RISC (cpu))
+ {
+ switch (cpu)
+ {
+ case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+ case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+ case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+ default: puts ("hppa-hitachi-hiuxwe2"); break;
+ }
+ }
+ else if (CPU_IS_HP_MC68K (cpu))
+ puts ("m68k-hitachi-hiuxwe2");
+ else puts ("unknown-hitachi-hiuxwe2");
+ exit (0);
+ }
+EOF
+ $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+ { echo "$SYSTEM_NAME"; exit; }
+ echo unknown-hitachi-hiuxwe2
+ exit ;;
+ 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+ echo hppa1.1-hp-bsd
+ exit ;;
+ 9000/8??:4.3bsd:*:*)
+ echo hppa1.0-hp-bsd
+ exit ;;
+ *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+ echo hppa1.0-hp-mpeix
+ exit ;;
+ hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+ echo hppa1.1-hp-osf
+ exit ;;
+ hp8??:OSF1:*:*)
+ echo hppa1.0-hp-osf
+ exit ;;
+ i*86:OSF1:*:*)
+ if [ -x /usr/sbin/sysversion ] ; then
+ echo ${UNAME_MACHINE}-unknown-osf1mk
+ else
+ echo ${UNAME_MACHINE}-unknown-osf1
+ fi
+ exit ;;
+ parisc*:Lites*:*:*)
+ echo hppa1.1-hp-lites
+ exit ;;
+ C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+ echo c1-convex-bsd
+ exit ;;
+ C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+ if getsysinfo -f scalar_acc
+ then echo c32-convex-bsd
+ else echo c2-convex-bsd
+ fi
+ exit ;;
+ C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+ echo c34-convex-bsd
+ exit ;;
+ C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+ echo c38-convex-bsd
+ exit ;;
+ C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+ echo c4-convex-bsd
+ exit ;;
+ CRAY*Y-MP:*:*:*)
+ echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit ;;
+ CRAY*[A-Z]90:*:*:*)
+ echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+ -e 's/\.[^.]*$/.X/'
+ exit ;;
+ CRAY*TS:*:*:*)
+ echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit ;;
+ CRAY*T3E:*:*:*)
+ echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit ;;
+ CRAY*SV1:*:*:*)
+ echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit ;;
+ *:UNICOS/mp:*:*)
+ echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit ;;
+ F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+ FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+ echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ exit ;;
+ 5000:UNIX_System_V:4.*:*)
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+ echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ exit ;;
+ i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+ echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+ exit ;;
+ sparc*:BSD/OS:*:*)
+ echo sparc-unknown-bsdi${UNAME_RELEASE}
+ exit ;;
+ *:BSD/OS:*:*)
+ echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+ exit ;;
+ *:FreeBSD:*:*)
+ case ${UNAME_MACHINE} in
+ pc98)
+ echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ amd64)
+ echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ *)
+ echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ esac
+ exit ;;
+ i*:CYGWIN*:*)
+ echo ${UNAME_MACHINE}-pc-cygwin
+ exit ;;
+ *:MINGW*:*)
+ echo ${UNAME_MACHINE}-pc-mingw32
+ exit ;;
+ i*:windows32*:*)
+ # uname -m includes "-pc" on this system.
+ echo ${UNAME_MACHINE}-mingw32
+ exit ;;
+ i*:PW*:*)
+ echo ${UNAME_MACHINE}-pc-pw32
+ exit ;;
+ *:Interix*:[3456]*)
+ case ${UNAME_MACHINE} in
+ x86)
+ echo i586-pc-interix${UNAME_RELEASE}
+ exit ;;
+ EM64T | authenticamd)
+ echo x86_64-unknown-interix${UNAME_RELEASE}
+ exit ;;
+ esac ;;
+ [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+ echo i${UNAME_MACHINE}-pc-mks
+ exit ;;
+ i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+ # UNAME_MACHINE based on the output of uname instead of i386?
+ echo i586-pc-interix
+ exit ;;
+ i*:UWIN*:*)
+ echo ${UNAME_MACHINE}-pc-uwin
+ exit ;;
+ amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+ echo x86_64-unknown-cygwin
+ exit ;;
+ p*:CYGWIN*:*)
+ echo powerpcle-unknown-cygwin
+ exit ;;
+ prep*:SunOS:5.*:*)
+ echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit ;;
+ *:GNU:*:*)
+ # the GNU system
+ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+ exit ;;
+ *:GNU/*:*:*)
+ # other systems with GNU libc and userland
+ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
+ exit ;;
+ i*86:Minix:*:*)
+ echo ${UNAME_MACHINE}-pc-minix
+ exit ;;
+ arm*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ avr32*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ cris:Linux:*:*)
+ echo cris-axis-linux-gnu
+ exit ;;
+ crisv32:Linux:*:*)
+ echo crisv32-axis-linux-gnu
+ exit ;;
+ frv:Linux:*:*)
+ echo frv-unknown-linux-gnu
+ exit ;;
+ ia64:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ m32r*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ m68*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ mips:Linux:*:*)
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #undef CPU
+ #undef mips
+ #undef mipsel
+ #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+ CPU=mipsel
+ #else
+ #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+ CPU=mips
+ #else
+ CPU=
+ #endif
+ #endif
+EOF
+ eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
+ /^CPU/{
+ s: ::g
+ p
+ }'`"
+ test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
+ ;;
+ mips64:Linux:*:*)
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #undef CPU
+ #undef mips64
+ #undef mips64el
+ #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+ CPU=mips64el
+ #else
+ #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+ CPU=mips64
+ #else
+ CPU=
+ #endif
+ #endif
+EOF
+ eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
+ /^CPU/{
+ s: ::g
+ p
+ }'`"
+ test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
+ ;;
+ or32:Linux:*:*)
+ echo or32-unknown-linux-gnu
+ exit ;;
+ ppc:Linux:*:*)
+ echo powerpc-unknown-linux-gnu
+ exit ;;
+ ppc64:Linux:*:*)
+ echo powerpc64-unknown-linux-gnu
+ exit ;;
+ alpha:Linux:*:*)
+ case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+ EV5) UNAME_MACHINE=alphaev5 ;;
+ EV56) UNAME_MACHINE=alphaev56 ;;
+ PCA56) UNAME_MACHINE=alphapca56 ;;
+ PCA57) UNAME_MACHINE=alphapca56 ;;
+ EV6) UNAME_MACHINE=alphaev6 ;;
+ EV67) UNAME_MACHINE=alphaev67 ;;
+ EV68*) UNAME_MACHINE=alphaev68 ;;
+ esac
+ objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
+ if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+ echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+ exit ;;
+ parisc:Linux:*:* | hppa:Linux:*:*)
+ # Look for CPU level
+ case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+ PA7*) echo hppa1.1-unknown-linux-gnu ;;
+ PA8*) echo hppa2.0-unknown-linux-gnu ;;
+ *) echo hppa-unknown-linux-gnu ;;
+ esac
+ exit ;;
+ parisc64:Linux:*:* | hppa64:Linux:*:*)
+ echo hppa64-unknown-linux-gnu
+ exit ;;
+ s390:Linux:*:* | s390x:Linux:*:*)
+ echo ${UNAME_MACHINE}-ibm-linux
+ exit ;;
+ sh64*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ sh*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ sparc:Linux:*:* | sparc64:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ vax:Linux:*:*)
+ echo ${UNAME_MACHINE}-dec-linux-gnu
+ exit ;;
+ x86_64:Linux:*:*)
+ echo x86_64-unknown-linux-gnu
+ exit ;;
+ xtensa:Linux:*:*)
+ echo xtensa-unknown-linux-gnu
+ exit ;;
+ i*86:Linux:*:*)
+ # The BFD linker knows what the default object file format is, so
+ # first see if it will tell us. cd to the root directory to prevent
+ # problems with other programs or directories called `ld' in the path.
+ # Set LC_ALL=C to ensure ld outputs messages in English.
+ ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
+ | sed -ne '/supported targets:/!d
+ s/[ ][ ]*/ /g
+ s/.*supported targets: *//
+ s/ .*//
+ p'`
+ case "$ld_supported_targets" in
+ elf32-i386)
+ TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
+ ;;
+ a.out-i386-linux)
+ echo "${UNAME_MACHINE}-pc-linux-gnuaout"
+ exit ;;
+ coff-i386)
+ echo "${UNAME_MACHINE}-pc-linux-gnucoff"
+ exit ;;
+ "")
+ # Either a pre-BFD a.out linker (linux-gnuoldld) or
+ # one that does not give us useful --help.
+ echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
+ exit ;;
+ esac
+ # Determine whether the default compiler is a.out or elf
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #include <features.h>
+ #ifdef __ELF__
+ # ifdef __GLIBC__
+ # if __GLIBC__ >= 2
+ LIBC=gnu
+ # else
+ LIBC=gnulibc1
+ # endif
+ # else
+ LIBC=gnulibc1
+ # endif
+ #else
+ #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+ LIBC=gnu
+ #else
+ LIBC=gnuaout
+ #endif
+ #endif
+ #ifdef __dietlibc__
+ LIBC=dietlibc
+ #endif
+EOF
+ eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
+ /^LIBC/{
+ s: ::g
+ p
+ }'`"
+ test x"${LIBC}" != x && {
+ echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
+ exit
+ }
+ test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; }
+ ;;
+ i*86:DYNIX/ptx:4*:*)
+ # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+ # earlier versions are messed up and put the nodename in both
+ # sysname and nodename.
+ echo i386-sequent-sysv4
+ exit ;;
+ i*86:UNIX_SV:4.2MP:2.*)
+ # Unixware is an offshoot of SVR4, but it has its own version
+ # number series starting with 2...
+ # I am not positive that other SVR4 systems won't match this,
+ # I just have to hope. -- rms.
+ # Use sysv4.2uw... so that sysv4* matches it.
+ echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+ exit ;;
+ i*86:OS/2:*:*)
+ # If we were able to find `uname', then EMX Unix compatibility
+ # is probably installed.
+ echo ${UNAME_MACHINE}-pc-os2-emx
+ exit ;;
+ i*86:XTS-300:*:STOP)
+ echo ${UNAME_MACHINE}-unknown-stop
+ exit ;;
+ i*86:atheos:*:*)
+ echo ${UNAME_MACHINE}-unknown-atheos
+ exit ;;
+ i*86:syllable:*:*)
+ echo ${UNAME_MACHINE}-pc-syllable
+ exit ;;
+ i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+ echo i386-unknown-lynxos${UNAME_RELEASE}
+ exit ;;
+ i*86:*DOS:*:*)
+ echo ${UNAME_MACHINE}-pc-msdosdjgpp
+ exit ;;
+ i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+ UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+ if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+ echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+ else
+ echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+ fi
+ exit ;;
+ i*86:*:5:[678]*)
+ # UnixWare 7.x, OpenUNIX and OpenServer 6.
+ case `/bin/uname -X | grep "^Machine"` in
+ *486*) UNAME_MACHINE=i486 ;;
+ *Pentium) UNAME_MACHINE=i586 ;;
+ *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+ esac
+ echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+ exit ;;
+ i*86:*:3.2:*)
+ if test -f /usr/options/cb.name; then
+ UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+ echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+ elif /bin/uname -X 2>/dev/null >/dev/null ; then
+ UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+ (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+ (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
+ && UNAME_MACHINE=i586
+ (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+ && UNAME_MACHINE=i686
+ (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+ && UNAME_MACHINE=i686
+ echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+ else
+ echo ${UNAME_MACHINE}-pc-sysv32
+ fi
+ exit ;;
+ pc:*:*:*)
+ # Left here for compatibility:
+ # uname -m prints for DJGPP always 'pc', but it prints nothing about
+ # the processor, so we play safe by assuming i386.
+ echo i386-pc-msdosdjgpp
+ exit ;;
+ Intel:Mach:3*:*)
+ echo i386-pc-mach3
+ exit ;;
+ paragon:*:*:*)
+ echo i860-intel-osf1
+ exit ;;
+ i860:*:4.*:*) # i860-SVR4
+ if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+ echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+ else # Add other i860-SVR4 vendors below as they are discovered.
+ echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4
+ fi
+ exit ;;
+ mini*:CTIX:SYS*5:*)
+ # "miniframe"
+ echo m68010-convergent-sysv
+ exit ;;
+ mc68k:UNIX:SYSTEM5:3.51m)
+ echo m68k-convergent-sysv
+ exit ;;
+ M680?0:D-NIX:5.3:*)
+ echo m68k-diab-dnix
+ exit ;;
+ M68*:*:R3V[5678]*:*)
+ test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
+ 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
+ OS_REL=''
+ test -r /etc/.relid \
+ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+ /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+ 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && { echo i486-ncr-sysv4; exit; } ;;
+ m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+ echo m68k-unknown-lynxos${UNAME_RELEASE}
+ exit ;;
+ mc68030:UNIX_System_V:4.*:*)
+ echo m68k-atari-sysv4
+ exit ;;
+ TSUNAMI:LynxOS:2.*:*)
+ echo sparc-unknown-lynxos${UNAME_RELEASE}
+ exit ;;
+ rs6000:LynxOS:2.*:*)
+ echo rs6000-unknown-lynxos${UNAME_RELEASE}
+ exit ;;
+ PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+ echo powerpc-unknown-lynxos${UNAME_RELEASE}
+ exit ;;
+ SM[BE]S:UNIX_SV:*:*)
+ echo mips-dde-sysv${UNAME_RELEASE}
+ exit ;;
+ RM*:ReliantUNIX-*:*:*)
+ echo mips-sni-sysv4
+ exit ;;
+ RM*:SINIX-*:*:*)
+ echo mips-sni-sysv4
+ exit ;;
+ *:SINIX-*:*:*)
+ if uname -p 2>/dev/null >/dev/null ; then
+ UNAME_MACHINE=`(uname -p) 2>/dev/null`
+ echo ${UNAME_MACHINE}-sni-sysv4
+ else
+ echo ns32k-sni-sysv
+ fi
+ exit ;;
+ PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+ # says <Richard.M.Bartel@ccMail.Census.GOV>
+ echo i586-unisys-sysv4
+ exit ;;
+ *:UNIX_System_V:4*:FTX*)
+ # From Gerald Hewes <hewes@openmarket.com>.
+ # How about differentiating between stratus architectures? -djm
+ echo hppa1.1-stratus-sysv4
+ exit ;;
+ *:*:*:FTX*)
+ # From seanf@swdc.stratus.com.
+ echo i860-stratus-sysv4
+ exit ;;
+ i*86:VOS:*:*)
+ # From Paul.Green@stratus.com.
+ echo ${UNAME_MACHINE}-stratus-vos
+ exit ;;
+ *:VOS:*:*)
+ # From Paul.Green@stratus.com.
+ echo hppa1.1-stratus-vos
+ exit ;;
+ mc68*:A/UX:*:*)
+ echo m68k-apple-aux${UNAME_RELEASE}
+ exit ;;
+ news*:NEWS-OS:6*:*)
+ echo mips-sony-newsos6
+ exit ;;
+ R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+ if [ -d /usr/nec ]; then
+ echo mips-nec-sysv${UNAME_RELEASE}
+ else
+ echo mips-unknown-sysv${UNAME_RELEASE}
+ fi
+ exit ;;
+ BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
+ echo powerpc-be-beos
+ exit ;;
+ BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only.
+ echo powerpc-apple-beos
+ exit ;;
+ BePC:BeOS:*:*) # BeOS running on Intel PC compatible.
+ echo i586-pc-beos
+ exit ;;
+ SX-4:SUPER-UX:*:*)
+ echo sx4-nec-superux${UNAME_RELEASE}
+ exit ;;
+ SX-5:SUPER-UX:*:*)
+ echo sx5-nec-superux${UNAME_RELEASE}
+ exit ;;
+ SX-6:SUPER-UX:*:*)
+ echo sx6-nec-superux${UNAME_RELEASE}
+ exit ;;
+ SX-7:SUPER-UX:*:*)
+ echo sx7-nec-superux${UNAME_RELEASE}
+ exit ;;
+ SX-8:SUPER-UX:*:*)
+ echo sx8-nec-superux${UNAME_RELEASE}
+ exit ;;
+ SX-8R:SUPER-UX:*:*)
+ echo sx8r-nec-superux${UNAME_RELEASE}
+ exit ;;
+ Power*:Rhapsody:*:*)
+ echo powerpc-apple-rhapsody${UNAME_RELEASE}
+ exit ;;
+ *:Rhapsody:*:*)
+ echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+ exit ;;
+ *:Darwin:*:*)
+ UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
+ case $UNAME_PROCESSOR in
+ unknown) UNAME_PROCESSOR=powerpc ;;
+ esac
+ echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+ exit ;;
+ *:procnto*:*:* | *:QNX:[0123456789]*:*)
+ UNAME_PROCESSOR=`uname -p`
+ if test "$UNAME_PROCESSOR" = "x86"; then
+ UNAME_PROCESSOR=i386
+ UNAME_MACHINE=pc
+ fi
+ echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+ exit ;;
+ *:QNX:*:4*)
+ echo i386-pc-qnx
+ exit ;;
+ NSE-?:NONSTOP_KERNEL:*:*)
+ echo nse-tandem-nsk${UNAME_RELEASE}
+ exit ;;
+ NSR-?:NONSTOP_KERNEL:*:*)
+ echo nsr-tandem-nsk${UNAME_RELEASE}
+ exit ;;
+ *:NonStop-UX:*:*)
+ echo mips-compaq-nonstopux
+ exit ;;
+ BS2000:POSIX*:*:*)
+ echo bs2000-siemens-sysv
+ exit ;;
+ DS/*:UNIX_System_V:*:*)
+ echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+ exit ;;
+ *:Plan9:*:*)
+ # "uname -m" is not consistent, so use $cputype instead. 386
+ # is converted to i386 for consistency with other x86
+ # operating systems.
+ if test "$cputype" = "386"; then
+ UNAME_MACHINE=i386
+ else
+ UNAME_MACHINE="$cputype"
+ fi
+ echo ${UNAME_MACHINE}-unknown-plan9
+ exit ;;
+ *:TOPS-10:*:*)
+ echo pdp10-unknown-tops10
+ exit ;;
+ *:TENEX:*:*)
+ echo pdp10-unknown-tenex
+ exit ;;
+ KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+ echo pdp10-dec-tops20
+ exit ;;
+ XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+ echo pdp10-xkl-tops20
+ exit ;;
+ *:TOPS-20:*:*)
+ echo pdp10-unknown-tops20
+ exit ;;
+ *:ITS:*:*)
+ echo pdp10-unknown-its
+ exit ;;
+ SEI:*:*:SEIUX)
+ echo mips-sei-seiux${UNAME_RELEASE}
+ exit ;;
+ *:DragonFly:*:*)
+ echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+ exit ;;
+ *:*VMS:*:*)
+ UNAME_MACHINE=`(uname -p) 2>/dev/null`
+ case "${UNAME_MACHINE}" in
+ A*) echo alpha-dec-vms ; exit ;;
+ I*) echo ia64-dec-vms ; exit ;;
+ V*) echo vax-dec-vms ; exit ;;
+ esac ;;
+ *:XENIX:*:SysV)
+ echo i386-pc-xenix
+ exit ;;
+ i*86:skyos:*:*)
+ echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+ exit ;;
+ i*86:rdos:*:*)
+ echo ${UNAME_MACHINE}-pc-rdos
+ exit ;;
+esac
+
+#echo '(No uname command or uname output not recognized.)' 1>&2
+#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
+
+eval $set_cc_for_build
+cat >$dummy.c <<EOF
+#ifdef _SEQUENT_
+# include <sys/types.h>
+# include <sys/utsname.h>
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+ /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
+ I don't know.... */
+ printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+ printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+ "4"
+#else
+ ""
+#endif
+ ); exit (0);
+#endif
+#endif
+
+#if defined (__arm) && defined (__acorn) && defined (__unix)
+ printf ("arm-acorn-riscix\n"); exit (0);
+#endif
+
+#if defined (hp300) && !defined (hpux)
+ printf ("m68k-hp-bsd\n"); exit (0);
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+ int version;
+ version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+ if (version < 4)
+ printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+ else
+ printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+ exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+ printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+ printf ("ns32k-encore-mach\n"); exit (0);
+#else
+ printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+ printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+ printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+ printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+ struct utsname un;
+
+ uname(&un);
+
+ if (strncmp(un.version, "V2", 2) == 0) {
+ printf ("i386-sequent-ptx2\n"); exit (0);
+ }
+ if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+ printf ("i386-sequent-ptx1\n"); exit (0);
+ }
+ printf ("i386-sequent-ptx\n"); exit (0);
+
+#endif
+
+#if defined (vax)
+# if !defined (ultrix)
+# include <sys/param.h>
+# if defined (BSD)
+# if BSD == 43
+ printf ("vax-dec-bsd4.3\n"); exit (0);
+# else
+# if BSD == 199006
+ printf ("vax-dec-bsd4.3reno\n"); exit (0);
+# else
+ printf ("vax-dec-bsd\n"); exit (0);
+# endif
+# endif
+# else
+ printf ("vax-dec-bsd\n"); exit (0);
+# endif
+# else
+ printf ("vax-dec-ultrix\n"); exit (0);
+# endif
+#endif
+
+#if defined (alliant) && defined (i860)
+ printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+ exit (1);
+}
+EOF
+
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
+ { echo "$SYSTEM_NAME"; exit; }
+
+# Apollos put the system type in the environment.
+
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
+
+# Convex versions that predate uname can use getsysinfo(1)
+
+if [ -x /usr/convex/getsysinfo ]
+then
+ case `getsysinfo -f cpu_type` in
+ c1*)
+ echo c1-convex-bsd
+ exit ;;
+ c2*)
+ if getsysinfo -f scalar_acc
+ then echo c32-convex-bsd
+ else echo c2-convex-bsd
+ fi
+ exit ;;
+ c34*)
+ echo c34-convex-bsd
+ exit ;;
+ c38*)
+ echo c38-convex-bsd
+ exit ;;
+ c4*)
+ echo c4-convex-bsd
+ exit ;;
+ esac
+fi
+
+cat >&2 <<EOF
+$0: unable to guess system type
+
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
+
+ http://savannah.gnu.org/cgi-bin/viewcvs/*checkout*/config/config/config.guess
+and
+ http://savannah.gnu.org/cgi-bin/viewcvs/*checkout*/config/config/config.sub
+
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo = `(hostinfo) 2>/dev/null`
+/bin/universe = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = ${UNAME_MACHINE}
+UNAME_RELEASE = ${UNAME_RELEASE}
+UNAME_SYSTEM = ${UNAME_SYSTEM}
+UNAME_VERSION = ${UNAME_VERSION}
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/config.h.in b/config.h.in
new file mode 100644
index 0000000..f0a5c51
--- /dev/null
+++ b/config.h.in
@@ -0,0 +1,19 @@
+/* config.h.in. Generated automatically from configure.in by autoheader. */
+
+/* Define if you have the getopt_long function. */
+#undef HAVE_GETOPT_LONG
+
+/* Define if you have the <getopt.h> header file. */
+#undef HAVE_GETOPT_H
+
+/* Define if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define if you have the <stdio.h> header file. */
+#undef HAVE_STDIO_H
+
+/* Define if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define if you have the c library (-lc). */
+#undef HAVE_LIBC
diff --git a/config.sub b/config.sub
new file mode 100755
index 0000000..a06a480
--- /dev/null
+++ b/config.sub
@@ -0,0 +1,1362 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+# Free Software Foundation, Inc.
+
+timestamp='2001-04-20'
+
+# This file is (in principle) common to ALL GNU software.
+# The presence of a machine in this file suggests that SOME GNU software
+# can handle that machine. It does not imply ALL GNU software can.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Please send patches to <config-patches@gnu.org>.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support. The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+ $0 [OPTION] ALIAS
+
+Canonicalize a configuration name.
+
+Operation modes:
+ -h, --help print this help, then exit
+ -t, --time-stamp print date of last modification, then exit
+ -v, --version print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+ case $1 in
+ --time-stamp | --time* | -t )
+ echo "$timestamp" ; exit 0 ;;
+ --version | -v )
+ echo "$version" ; exit 0 ;;
+ --help | --h* | -h )
+ echo "$usage"; exit 0 ;;
+ -- ) # Stop option processing
+ shift; break ;;
+ - ) # Use stdin as input.
+ break ;;
+ -* )
+ echo "$me: invalid option $1$help"
+ exit 1 ;;
+
+ *local*)
+ # First pass through any local machine types.
+ echo $1
+ exit 0;;
+
+ * )
+ break ;;
+ esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+ exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+ exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+ nto-qnx* | linux-gnu* | storm-chaos* | os2-emx*)
+ os=-$maybe_os
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+ ;;
+ *)
+ basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+ if [ $basic_machine != $1 ]
+ then os=`echo $1 | sed 's/.*-/-/'`
+ else os=; fi
+ ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work. We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+ -sun*os*)
+ # Prevent following clause from handling this invalid input.
+ ;;
+ -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+ -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+ -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+ -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+ -apple | -axis)
+ os=
+ basic_machine=$1
+ ;;
+ -sim | -cisco | -oki | -wec | -winbond)
+ os=
+ basic_machine=$1
+ ;;
+ -scout)
+ ;;
+ -wrs)
+ os=-vxworks
+ basic_machine=$1
+ ;;
+ -hiux*)
+ os=-hiuxwe2
+ ;;
+ -sco5)
+ os=-sco3.2v5
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco4)
+ os=-sco3.2v4
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco3.2.[4-9]*)
+ os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco3.2v[4-9]*)
+ # Don't forget version if it is 3.2v4 or newer.
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco*)
+ os=-sco3.2v2
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -udk*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -isc)
+ os=-isc2.2
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -clix*)
+ basic_machine=clipper-intergraph
+ ;;
+ -isc*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -lynx*)
+ os=-lynxos
+ ;;
+ -ptx*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+ ;;
+ -windowsnt*)
+ os=`echo $os | sed -e 's/windowsnt/winnt/'`
+ ;;
+ -psos*)
+ os=-psos
+ ;;
+ -mint | -mint[0-9]*)
+ basic_machine=m68k-atari
+ os=-mint
+ ;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+ # Recognize the basic CPU types without company name.
+ # Some are omitted here because they have special meanings below.
+ tahoe | i860 | ia64 | m32r | m68k | m68000 | m88k | ns32k | arc \
+ | arm | arme[lb] | arm[bl]e | armv[2345] | armv[345][lb] | strongarm | xscale \
+ | pyramid | mn10200 | mn10300 | tron | a29k \
+ | 580 | i960 | h8300 \
+ | x86 | ppcbe | mipsbe | mipsle | shbe | shle \
+ | hppa | hppa1.0 | hppa1.1 | hppa2.0 | hppa2.0w | hppa2.0n \
+ | hppa64 \
+ | alpha | alphaev[4-8] | alphaev56 | alphapca5[67] \
+ | alphaev6[78] \
+ | we32k | ns16k | clipper | i370 | sh | sh[34] \
+ | powerpc | powerpcle \
+ | 1750a | dsp16xx | pdp10 | pdp11 \
+ | mips16 | mips64 | mipsel | mips64el \
+ | mips64orion | mips64orionel | mipstx39 | mipstx39el \
+ | mips64vr4300 | mips64vr4300el | mips64vr4100 | mips64vr4100el \
+ | mips64vr5000 | miprs64vr5000el | mcore | s390 | s390x \
+ | sparc | sparclet | sparclite | sparc64 | sparcv9 | sparcv9b \
+ | v850 | c4x \
+ | thumb | d10v | d30v | fr30 | avr | openrisc | tic80 \
+ | pj | pjl | h8500)
+ basic_machine=$basic_machine-unknown
+ ;;
+ m6811 | m68hc11 | m6812 | m68hc12)
+ # Motorola 68HC11/12.
+ basic_machine=$basic_machine-unknown
+ os=-none
+ ;;
+ m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | z8k | v70 | w65)
+ ;;
+
+ # We use `pc' rather than `unknown'
+ # because (1) that's what they normally are, and
+ # (2) the word "unknown" tends to confuse beginning users.
+ i*86 | x86_64)
+ basic_machine=$basic_machine-pc
+ ;;
+ # Object if more than one company name word.
+ *-*-*)
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+ exit 1
+ ;;
+ # Recognize the basic CPU types with company name.
+ # FIXME: clean up the formatting here.
+ vax-* | tahoe-* | i*86-* | i860-* | ia64-* | m32r-* | m68k-* | m68000-* \
+ | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | c[123]* \
+ | arm-* | armbe-* | armle-* | armv*-* | strongarm-* | xscale-* \
+ | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \
+ | power-* | none-* | 580-* | cray2-* | h8300-* | h8500-* | i960-* \
+ | xmp-* | ymp-* \
+ | x86-* | ppcbe-* | mipsbe-* | mipsle-* | shbe-* | shle-* \
+ | hppa-* | hppa1.0-* | hppa1.1-* | hppa2.0-* | hppa2.0w-* \
+ | hppa2.0n-* | hppa64-* \
+ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphapca5[67]-* \
+ | alphaev6[78]-* \
+ | we32k-* | cydra-* | ns16k-* | pn-* | np1-* | xps100-* \
+ | clipper-* | orion-* \
+ | sparclite-* | pdp10-* | pdp11-* | sh-* | powerpc-* | powerpcle-* \
+ | sparc64-* | sparcv9-* | sparcv9b-* | sparc86x-* \
+ | mips16-* | mips64-* | mipsel-* \
+ | mips64el-* | mips64orion-* | mips64orionel-* \
+ | mips64vr4100-* | mips64vr4100el-* | mips64vr4300-* | mips64vr4300el-* \
+ | mipstx39-* | mipstx39el-* | mcore-* \
+ | f30[01]-* | f700-* | s390-* | s390x-* | sv1-* | t3e-* \
+ | [cjt]90-* \
+ | m88110-* | m680[01234]0-* | m683?2-* | m68360-* | z8k-* | d10v-* \
+ | thumb-* | v850-* | d30v-* | tic30-* | tic80-* | c30-* | fr30-* \
+ | bs2000-* | tic54x-* | c54x-* | x86_64-* | pj-* | pjl-*)
+ ;;
+ # Recognize the various machine names and aliases which stand
+ # for a CPU type and a company and sometimes even an OS.
+ 386bsd)
+ basic_machine=i386-unknown
+ os=-bsd
+ ;;
+ 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+ basic_machine=m68000-att
+ ;;
+ 3b*)
+ basic_machine=we32k-att
+ ;;
+ a29khif)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ adobe68k)
+ basic_machine=m68010-adobe
+ os=-scout
+ ;;
+ alliant | fx80)
+ basic_machine=fx80-alliant
+ ;;
+ altos | altos3068)
+ basic_machine=m68k-altos
+ ;;
+ am29k)
+ basic_machine=a29k-none
+ os=-bsd
+ ;;
+ amdahl)
+ basic_machine=580-amdahl
+ os=-sysv
+ ;;
+ amiga | amiga-*)
+ basic_machine=m68k-unknown
+ ;;
+ amigaos | amigados)
+ basic_machine=m68k-unknown
+ os=-amigaos
+ ;;
+ amigaunix | amix)
+ basic_machine=m68k-unknown
+ os=-sysv4
+ ;;
+ apollo68)
+ basic_machine=m68k-apollo
+ os=-sysv
+ ;;
+ apollo68bsd)
+ basic_machine=m68k-apollo
+ os=-bsd
+ ;;
+ aux)
+ basic_machine=m68k-apple
+ os=-aux
+ ;;
+ balance)
+ basic_machine=ns32k-sequent
+ os=-dynix
+ ;;
+ convex-c1)
+ basic_machine=c1-convex
+ os=-bsd
+ ;;
+ convex-c2)
+ basic_machine=c2-convex
+ os=-bsd
+ ;;
+ convex-c32)
+ basic_machine=c32-convex
+ os=-bsd
+ ;;
+ convex-c34)
+ basic_machine=c34-convex
+ os=-bsd
+ ;;
+ convex-c38)
+ basic_machine=c38-convex
+ os=-bsd
+ ;;
+ cray | ymp)
+ basic_machine=ymp-cray
+ os=-unicos
+ ;;
+ cray2)
+ basic_machine=cray2-cray
+ os=-unicos
+ ;;
+ [cjt]90)
+ basic_machine=${basic_machine}-cray
+ os=-unicos
+ ;;
+ crds | unos)
+ basic_machine=m68k-crds
+ ;;
+ cris | cris-* | etrax*)
+ basic_machine=cris-axis
+ ;;
+ da30 | da30-*)
+ basic_machine=m68k-da30
+ ;;
+ decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+ basic_machine=mips-dec
+ ;;
+ delta | 3300 | motorola-3300 | motorola-delta \
+ | 3300-motorola | delta-motorola)
+ basic_machine=m68k-motorola
+ ;;
+ delta88)
+ basic_machine=m88k-motorola
+ os=-sysv3
+ ;;
+ dpx20 | dpx20-*)
+ basic_machine=rs6000-bull
+ os=-bosx
+ ;;
+ dpx2* | dpx2*-bull)
+ basic_machine=m68k-bull
+ os=-sysv3
+ ;;
+ ebmon29k)
+ basic_machine=a29k-amd
+ os=-ebmon
+ ;;
+ elxsi)
+ basic_machine=elxsi-elxsi
+ os=-bsd
+ ;;
+ encore | umax | mmax)
+ basic_machine=ns32k-encore
+ ;;
+ es1800 | OSE68k | ose68k | ose | OSE)
+ basic_machine=m68k-ericsson
+ os=-ose
+ ;;
+ fx2800)
+ basic_machine=i860-alliant
+ ;;
+ genix)
+ basic_machine=ns32k-ns
+ ;;
+ gmicro)
+ basic_machine=tron-gmicro
+ os=-sysv
+ ;;
+ go32)
+ basic_machine=i386-pc
+ os=-go32
+ ;;
+ h3050r* | hiux*)
+ basic_machine=hppa1.1-hitachi
+ os=-hiuxwe2
+ ;;
+ h8300hms)
+ basic_machine=h8300-hitachi
+ os=-hms
+ ;;
+ h8300xray)
+ basic_machine=h8300-hitachi
+ os=-xray
+ ;;
+ h8500hms)
+ basic_machine=h8500-hitachi
+ os=-hms
+ ;;
+ harris)
+ basic_machine=m88k-harris
+ os=-sysv3
+ ;;
+ hp300-*)
+ basic_machine=m68k-hp
+ ;;
+ hp300bsd)
+ basic_machine=m68k-hp
+ os=-bsd
+ ;;
+ hp300hpux)
+ basic_machine=m68k-hp
+ os=-hpux
+ ;;
+ hp3k9[0-9][0-9] | hp9[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hp9k2[0-9][0-9] | hp9k31[0-9])
+ basic_machine=m68000-hp
+ ;;
+ hp9k3[2-9][0-9])
+ basic_machine=m68k-hp
+ ;;
+ hp9k6[0-9][0-9] | hp6[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hp9k7[0-79][0-9] | hp7[0-79][0-9])
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k78[0-9] | hp78[0-9])
+ # FIXME: really hppa2.0-hp
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+ # FIXME: really hppa2.0-hp
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k8[0-9][13679] | hp8[0-9][13679])
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k8[0-9][0-9] | hp8[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hppa-next)
+ os=-nextstep3
+ ;;
+ hppaosf)
+ basic_machine=hppa1.1-hp
+ os=-osf
+ ;;
+ hppro)
+ basic_machine=hppa1.1-hp
+ os=-proelf
+ ;;
+ i370-ibm* | ibm*)
+ basic_machine=i370-ibm
+ ;;
+# I'm not sure what "Sysv32" means. Should this be sysv3.2?
+ i*86v32)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv32
+ ;;
+ i*86v4*)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv4
+ ;;
+ i*86v)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv
+ ;;
+ i*86sol2)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-solaris2
+ ;;
+ i386mach)
+ basic_machine=i386-mach
+ os=-mach
+ ;;
+ i386-vsta | vsta)
+ basic_machine=i386-unknown
+ os=-vsta
+ ;;
+ iris | iris4d)
+ basic_machine=mips-sgi
+ case $os in
+ -irix*)
+ ;;
+ *)
+ os=-irix4
+ ;;
+ esac
+ ;;
+ isi68 | isi)
+ basic_machine=m68k-isi
+ os=-sysv
+ ;;
+ m88k-omron*)
+ basic_machine=m88k-omron
+ ;;
+ magnum | m3230)
+ basic_machine=mips-mips
+ os=-sysv
+ ;;
+ merlin)
+ basic_machine=ns32k-utek
+ os=-sysv
+ ;;
+ mingw32)
+ basic_machine=i386-pc
+ os=-mingw32
+ ;;
+ miniframe)
+ basic_machine=m68000-convergent
+ ;;
+ *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+ basic_machine=m68k-atari
+ os=-mint
+ ;;
+ mipsel*-linux*)
+ basic_machine=mipsel-unknown
+ os=-linux-gnu
+ ;;
+ mips*-linux*)
+ basic_machine=mips-unknown
+ os=-linux-gnu
+ ;;
+ mips3*-*)
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+ ;;
+ mips3*)
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+ ;;
+ mmix*)
+ basic_machine=mmix-knuth
+ os=-mmixware
+ ;;
+ monitor)
+ basic_machine=m68k-rom68k
+ os=-coff
+ ;;
+ msdos)
+ basic_machine=i386-pc
+ os=-msdos
+ ;;
+ mvs)
+ basic_machine=i370-ibm
+ os=-mvs
+ ;;
+ ncr3000)
+ basic_machine=i486-ncr
+ os=-sysv4
+ ;;
+ netbsd386)
+ basic_machine=i386-unknown
+ os=-netbsd
+ ;;
+ netwinder)
+ basic_machine=armv4l-rebel
+ os=-linux
+ ;;
+ news | news700 | news800 | news900)
+ basic_machine=m68k-sony
+ os=-newsos
+ ;;
+ news1000)
+ basic_machine=m68030-sony
+ os=-newsos
+ ;;
+ news-3600 | risc-news)
+ basic_machine=mips-sony
+ os=-newsos
+ ;;
+ necv70)
+ basic_machine=v70-nec
+ os=-sysv
+ ;;
+ next | m*-next )
+ basic_machine=m68k-next
+ case $os in
+ -nextstep* )
+ ;;
+ -ns2*)
+ os=-nextstep2
+ ;;
+ *)
+ os=-nextstep3
+ ;;
+ esac
+ ;;
+ nh3000)
+ basic_machine=m68k-harris
+ os=-cxux
+ ;;
+ nh[45]000)
+ basic_machine=m88k-harris
+ os=-cxux
+ ;;
+ nindy960)
+ basic_machine=i960-intel
+ os=-nindy
+ ;;
+ mon960)
+ basic_machine=i960-intel
+ os=-mon960
+ ;;
+ nonstopux)
+ basic_machine=mips-compaq
+ os=-nonstopux
+ ;;
+ np1)
+ basic_machine=np1-gould
+ ;;
+ nsr-tandem)
+ basic_machine=nsr-tandem
+ ;;
+ op50n-* | op60c-*)
+ basic_machine=hppa1.1-oki
+ os=-proelf
+ ;;
+ OSE68000 | ose68000)
+ basic_machine=m68000-ericsson
+ os=-ose
+ ;;
+ os68k)
+ basic_machine=m68k-none
+ os=-os68k
+ ;;
+ pa-hitachi)
+ basic_machine=hppa1.1-hitachi
+ os=-hiuxwe2
+ ;;
+ paragon)
+ basic_machine=i860-intel
+ os=-osf
+ ;;
+ pbd)
+ basic_machine=sparc-tti
+ ;;
+ pbb)
+ basic_machine=m68k-tti
+ ;;
+ pc532 | pc532-*)
+ basic_machine=ns32k-pc532
+ ;;
+ pentium | p5 | k5 | k6 | nexgen)
+ basic_machine=i586-pc
+ ;;
+ pentiumpro | p6 | 6x86 | athlon)
+ basic_machine=i686-pc
+ ;;
+ pentiumii | pentium2)
+ basic_machine=i686-pc
+ ;;
+ pentium-* | p5-* | k5-* | k6-* | nexgen-*)
+ basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentiumpro-* | p6-* | 6x86-* | athlon-*)
+ basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentiumii-* | pentium2-*)
+ basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pn)
+ basic_machine=pn-gould
+ ;;
+ power) basic_machine=power-ibm
+ ;;
+ ppc) basic_machine=powerpc-unknown
+ ;;
+ ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ppcle | powerpclittle | ppc-le | powerpc-little)
+ basic_machine=powerpcle-unknown
+ ;;
+ ppcle-* | powerpclittle-*)
+ basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ps2)
+ basic_machine=i386-ibm
+ ;;
+ pw32)
+ basic_machine=i586-unknown
+ os=-pw32
+ ;;
+ rom68k)
+ basic_machine=m68k-rom68k
+ os=-coff
+ ;;
+ rm[46]00)
+ basic_machine=mips-siemens
+ ;;
+ rtpc | rtpc-*)
+ basic_machine=romp-ibm
+ ;;
+ sa29200)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ sequent)
+ basic_machine=i386-sequent
+ ;;
+ sh)
+ basic_machine=sh-hitachi
+ os=-hms
+ ;;
+ sparclite-wrs)
+ basic_machine=sparclite-wrs
+ os=-vxworks
+ ;;
+ sps7)
+ basic_machine=m68k-bull
+ os=-sysv2
+ ;;
+ spur)
+ basic_machine=spur-unknown
+ ;;
+ st2000)
+ basic_machine=m68k-tandem
+ ;;
+ stratus)
+ basic_machine=i860-stratus
+ os=-sysv4
+ ;;
+ sun2)
+ basic_machine=m68000-sun
+ ;;
+ sun2os3)
+ basic_machine=m68000-sun
+ os=-sunos3
+ ;;
+ sun2os4)
+ basic_machine=m68000-sun
+ os=-sunos4
+ ;;
+ sun3os3)
+ basic_machine=m68k-sun
+ os=-sunos3
+ ;;
+ sun3os4)
+ basic_machine=m68k-sun
+ os=-sunos4
+ ;;
+ sun4os3)
+ basic_machine=sparc-sun
+ os=-sunos3
+ ;;
+ sun4os4)
+ basic_machine=sparc-sun
+ os=-sunos4
+ ;;
+ sun4sol2)
+ basic_machine=sparc-sun
+ os=-solaris2
+ ;;
+ sun3 | sun3-*)
+ basic_machine=m68k-sun
+ ;;
+ sun4)
+ basic_machine=sparc-sun
+ ;;
+ sun386 | sun386i | roadrunner)
+ basic_machine=i386-sun
+ ;;
+ sv1)
+ basic_machine=sv1-cray
+ os=-unicos
+ ;;
+ symmetry)
+ basic_machine=i386-sequent
+ os=-dynix
+ ;;
+ t3e)
+ basic_machine=t3e-cray
+ os=-unicos
+ ;;
+ tic54x | c54x*)
+ basic_machine=tic54x-unknown
+ os=-coff
+ ;;
+ tx39)
+ basic_machine=mipstx39-unknown
+ ;;
+ tx39el)
+ basic_machine=mipstx39el-unknown
+ ;;
+ tower | tower-32)
+ basic_machine=m68k-ncr
+ ;;
+ udi29k)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ ultra3)
+ basic_machine=a29k-nyu
+ os=-sym1
+ ;;
+ v810 | necv810)
+ basic_machine=v810-nec
+ os=-none
+ ;;
+ vaxv)
+ basic_machine=vax-dec
+ os=-sysv
+ ;;
+ vms)
+ basic_machine=vax-dec
+ os=-vms
+ ;;
+ vpp*|vx|vx-*)
+ basic_machine=f301-fujitsu
+ ;;
+ vxworks960)
+ basic_machine=i960-wrs
+ os=-vxworks
+ ;;
+ vxworks68)
+ basic_machine=m68k-wrs
+ os=-vxworks
+ ;;
+ vxworks29k)
+ basic_machine=a29k-wrs
+ os=-vxworks
+ ;;
+ w65*)
+ basic_machine=w65-wdc
+ os=-none
+ ;;
+ w89k-*)
+ basic_machine=hppa1.1-winbond
+ os=-proelf
+ ;;
+ xmp)
+ basic_machine=xmp-cray
+ os=-unicos
+ ;;
+ xps | xps100)
+ basic_machine=xps100-honeywell
+ ;;
+ z8k-*-coff)
+ basic_machine=z8k-unknown
+ os=-sim
+ ;;
+ none)
+ basic_machine=none-none
+ os=-none
+ ;;
+
+# Here we handle the default manufacturer of certain CPU types. It is in
+# some cases the only manufacturer, in others, it is the most popular.
+ w89k)
+ basic_machine=hppa1.1-winbond
+ ;;
+ op50n)
+ basic_machine=hppa1.1-oki
+ ;;
+ op60c)
+ basic_machine=hppa1.1-oki
+ ;;
+ mips)
+ if [ x$os = x-linux-gnu ]; then
+ basic_machine=mips-unknown
+ else
+ basic_machine=mips-mips
+ fi
+ ;;
+ romp)
+ basic_machine=romp-ibm
+ ;;
+ rs6000)
+ basic_machine=rs6000-ibm
+ ;;
+ vax)
+ basic_machine=vax-dec
+ ;;
+ pdp10)
+ # there are many clones, so DEC is not a safe bet
+ basic_machine=pdp10-unknown
+ ;;
+ pdp11)
+ basic_machine=pdp11-dec
+ ;;
+ we32k)
+ basic_machine=we32k-att
+ ;;
+ sh3 | sh4)
+ basic_machine=sh-unknown
+ ;;
+ sparc | sparcv9 | sparcv9b)
+ basic_machine=sparc-sun
+ ;;
+ cydra)
+ basic_machine=cydra-cydrome
+ ;;
+ orion)
+ basic_machine=orion-highlevel
+ ;;
+ orion105)
+ basic_machine=clipper-highlevel
+ ;;
+ mac | mpw | mac-mpw)
+ basic_machine=m68k-apple
+ ;;
+ pmac | pmac-mpw)
+ basic_machine=powerpc-apple
+ ;;
+ c4x*)
+ basic_machine=c4x-none
+ os=-coff
+ ;;
+ *-unknown)
+ # Make sure to match an already-canonicalized machine name.
+ ;;
+ *)
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+ exit 1
+ ;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+ *-digital*)
+ basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+ ;;
+ *-commodore*)
+ basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+ ;;
+ *)
+ ;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+ # First match some system type aliases
+ # that might get confused with valid system types.
+ # -solaris* is a basic system type, with this one exception.
+ -solaris1 | -solaris1.*)
+ os=`echo $os | sed -e 's|solaris1|sunos4|'`
+ ;;
+ -solaris)
+ os=-solaris2
+ ;;
+ -svr4*)
+ os=-sysv4
+ ;;
+ -unixware*)
+ os=-sysv4.2uw
+ ;;
+ -gnu/linux*)
+ os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+ ;;
+ # First accept the basic system types.
+ # The portable systems comes first.
+ # Each alternative MUST END IN A *, to match a version number.
+ # -sysv* is not here because it comes later, after sysvr4.
+ -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+ | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+ | -aos* \
+ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+ | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \
+ | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+ | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+ | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \
+ | -interix* | -uwin* | -rhapsody* | -darwin* | -opened* \
+ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* | -os2*)
+ # Remember, each alternative MUST END IN *, to match a version number.
+ ;;
+ -qnx*)
+ case $basic_machine in
+ x86-* | i*86-*)
+ ;;
+ *)
+ os=-nto$os
+ ;;
+ esac
+ ;;
+ -nto*)
+ os=-nto-qnx
+ ;;
+ -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+ | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
+ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+ ;;
+ -mac*)
+ os=`echo $os | sed -e 's|mac|macos|'`
+ ;;
+ -linux*)
+ os=`echo $os | sed -e 's|linux|linux-gnu|'`
+ ;;
+ -sunos5*)
+ os=`echo $os | sed -e 's|sunos5|solaris2|'`
+ ;;
+ -sunos6*)
+ os=`echo $os | sed -e 's|sunos6|solaris3|'`
+ ;;
+ -opened*)
+ os=-openedition
+ ;;
+ -wince*)
+ os=-wince
+ ;;
+ -osfrose*)
+ os=-osfrose
+ ;;
+ -osf*)
+ os=-osf
+ ;;
+ -utek*)
+ os=-bsd
+ ;;
+ -dynix*)
+ os=-bsd
+ ;;
+ -acis*)
+ os=-aos
+ ;;
+ -386bsd)
+ os=-bsd
+ ;;
+ -ctix* | -uts*)
+ os=-sysv
+ ;;
+ -ns2 )
+ os=-nextstep2
+ ;;
+ -nsk*)
+ os=-nsk
+ ;;
+ # Preserve the version number of sinix5.
+ -sinix5.*)
+ os=`echo $os | sed -e 's|sinix|sysv|'`
+ ;;
+ -sinix*)
+ os=-sysv4
+ ;;
+ -triton*)
+ os=-sysv3
+ ;;
+ -oss*)
+ os=-sysv3
+ ;;
+ -svr4)
+ os=-sysv4
+ ;;
+ -svr3)
+ os=-sysv3
+ ;;
+ -sysvr4)
+ os=-sysv4
+ ;;
+ # This must come after -sysvr4.
+ -sysv*)
+ ;;
+ -ose*)
+ os=-ose
+ ;;
+ -es1800*)
+ os=-ose
+ ;;
+ -xenix)
+ os=-xenix
+ ;;
+ -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+ os=-mint
+ ;;
+ -none)
+ ;;
+ *)
+ # Get rid of the `-' at the beginning of $os.
+ os=`echo $os | sed 's/[^-]*-//'`
+ echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+ exit 1
+ ;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system. Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+ *-acorn)
+ os=-riscix1.2
+ ;;
+ arm*-rebel)
+ os=-linux
+ ;;
+ arm*-semi)
+ os=-aout
+ ;;
+ pdp10-*)
+ os=-tops20
+ ;;
+ pdp11-*)
+ os=-none
+ ;;
+ *-dec | vax-*)
+ os=-ultrix4.2
+ ;;
+ m68*-apollo)
+ os=-domain
+ ;;
+ i386-sun)
+ os=-sunos4.0.2
+ ;;
+ m68000-sun)
+ os=-sunos3
+ # This also exists in the configure program, but was not the
+ # default.
+ # os=-sunos4
+ ;;
+ m68*-cisco)
+ os=-aout
+ ;;
+ mips*-cisco)
+ os=-elf
+ ;;
+ mips*-*)
+ os=-elf
+ ;;
+ *-tti) # must be before sparc entry or we get the wrong os.
+ os=-sysv3
+ ;;
+ sparc-* | *-sun)
+ os=-sunos4.1.1
+ ;;
+ *-be)
+ os=-beos
+ ;;
+ *-ibm)
+ os=-aix
+ ;;
+ *-wec)
+ os=-proelf
+ ;;
+ *-winbond)
+ os=-proelf
+ ;;
+ *-oki)
+ os=-proelf
+ ;;
+ *-hp)
+ os=-hpux
+ ;;
+ *-hitachi)
+ os=-hiux
+ ;;
+ i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+ os=-sysv
+ ;;
+ *-cbm)
+ os=-amigaos
+ ;;
+ *-dg)
+ os=-dgux
+ ;;
+ *-dolphin)
+ os=-sysv3
+ ;;
+ m68k-ccur)
+ os=-rtu
+ ;;
+ m88k-omron*)
+ os=-luna
+ ;;
+ *-next )
+ os=-nextstep
+ ;;
+ *-sequent)
+ os=-ptx
+ ;;
+ *-crds)
+ os=-unos
+ ;;
+ *-ns)
+ os=-genix
+ ;;
+ i370-*)
+ os=-mvs
+ ;;
+ *-next)
+ os=-nextstep3
+ ;;
+ *-gould)
+ os=-sysv
+ ;;
+ *-highlevel)
+ os=-bsd
+ ;;
+ *-encore)
+ os=-bsd
+ ;;
+ *-sgi)
+ os=-irix
+ ;;
+ *-siemens)
+ os=-sysv4
+ ;;
+ *-masscomp)
+ os=-rtu
+ ;;
+ f30[01]-fujitsu | f700-fujitsu)
+ os=-uxpv
+ ;;
+ *-rom68k)
+ os=-coff
+ ;;
+ *-*bug)
+ os=-coff
+ ;;
+ *-apple)
+ os=-macos
+ ;;
+ *-atari*)
+ os=-mint
+ ;;
+ *)
+ os=-none
+ ;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer. We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+ *-unknown)
+ case $os in
+ -riscix*)
+ vendor=acorn
+ ;;
+ -sunos*)
+ vendor=sun
+ ;;
+ -aix*)
+ vendor=ibm
+ ;;
+ -beos*)
+ vendor=be
+ ;;
+ -hpux*)
+ vendor=hp
+ ;;
+ -mpeix*)
+ vendor=hp
+ ;;
+ -hiux*)
+ vendor=hitachi
+ ;;
+ -unos*)
+ vendor=crds
+ ;;
+ -dgux*)
+ vendor=dg
+ ;;
+ -luna*)
+ vendor=omron
+ ;;
+ -genix*)
+ vendor=ns
+ ;;
+ -mvs* | -opened*)
+ vendor=ibm
+ ;;
+ -ptx*)
+ vendor=sequent
+ ;;
+ -vxsim* | -vxworks*)
+ vendor=wrs
+ ;;
+ -aux*)
+ vendor=apple
+ ;;
+ -hms*)
+ vendor=hitachi
+ ;;
+ -mpw* | -macos*)
+ vendor=apple
+ ;;
+ -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+ vendor=atari
+ ;;
+ esac
+ basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+ ;;
+esac
+
+echo $basic_machine$os
+exit 0
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/configure b/configure
new file mode 100755
index 0000000..47015d5
--- /dev/null
+++ b/configure
@@ -0,0 +1,4357 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.59.
+#
+# Copyright (C) 2003 Free Software Foundation, Inc.
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+ set -o posix
+fi
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+
+# Work around bugs in pre-3.0 UWIN ksh.
+$as_unset ENV MAIL MAILPATH
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+ LC_TELEPHONE LC_TIME
+do
+ if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+ eval $as_var=C; export $as_var
+ else
+ $as_unset $as_var
+ fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)$' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+ /^X\/\(\/\/\)$/{ s//\1/; q; }
+ /^X\/\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+
+
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ echo "#! /bin/sh" >conf$$.sh
+ echo "exit 0" >>conf$$.sh
+ chmod +x conf$$.sh
+ if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+ PATH_SEPARATOR=';'
+ else
+ PATH_SEPARATOR=:
+ fi
+ rm -f conf$$.sh
+fi
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" || {
+ # Find who we are. Look in the path if we contain no path at all
+ # relative or not.
+ case $0 in
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+
+ ;;
+ esac
+ # We did not find ourselves, most probably we were run as `sh COMMAND'
+ # in which case we are not to be found in the path.
+ if test "x$as_myself" = x; then
+ as_myself=$0
+ fi
+ if test ! -f "$as_myself"; then
+ { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2
+ { (exit 1); exit 1; }; }
+ fi
+ case $CONFIG_SHELL in
+ '')
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for as_base in sh bash ksh sh5; do
+ case $as_dir in
+ /*)
+ if ("$as_dir/$as_base" -c '
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
+ $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
+ $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
+ CONFIG_SHELL=$as_dir/$as_base
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+ fi;;
+ esac
+ done
+done
+;;
+ esac
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line before each line; the second 'sed' does the real
+ # work. The second script uses 'N' to pair each line-number line
+ # with the numbered line, and appends trailing '-' during
+ # substitution so that $LINENO is not a special case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
+ sed '=' <$as_myself |
+ sed '
+ N
+ s,$,-,
+ : loop
+ s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+ t loop
+ s,-$,,
+ s,^['$as_cr_digits']*\n,,
+ ' >$as_me.lineno &&
+ chmod +x $as_me.lineno ||
+ { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensible to this).
+ . ./$as_me.lineno
+ # Exit status is that of the last command.
+ exit
+}
+
+
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+ *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T=' ' ;;
+ *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+ *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+ # We could just check for DJGPP; but this test a) works b) is more generic
+ # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+ if test -f conf$$.exe; then
+ # Don't use ln at all; we don't have any links
+ as_ln_s='cp -p'
+ else
+ as_ln_s='ln -s'
+ fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p=:
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+as_executable_p="test -f"
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" $as_nl"
+
+# CDPATH.
+$as_unset CDPATH
+
+
+# Name of the host.
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+exec 6>&1
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_config_libobj_dir=.
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Maximum number of lines to put in a shell here document.
+# This variable seems obsolete. It should probably be removed, and
+# only ac_max_sed_lines should be used.
+: ${ac_max_here_lines=38}
+
+# Identity of this package.
+PACKAGE_NAME=
+PACKAGE_TARNAME=
+PACKAGE_VERSION=
+PACKAGE_STRING=
+PACKAGE_BUGREPORT=
+
+ac_unique_file="viterbi27.c"
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#if STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# if HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#else
+# if HAVE_STDINT_H
+# include <stdint.h>
+# endif
+#endif
+#if HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS SO_NAME VERSION CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os SH_LIB REBIND MLIBS ARCH_OPTION LIBOBJS LTLIBOBJS'
+ac_subst_files=''
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datadir='${prefix}/share'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+libdir='${exec_prefix}/lib'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+infodir='${prefix}/info'
+mandir='${prefix}/man'
+
+ac_prev=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval "$ac_prev=\$ac_option"
+ ac_prev=
+ continue
+ fi
+
+ ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'`
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case $ac_option in
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir=$ac_optarg ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build_alias ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build_alias=$ac_optarg ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file=$ac_optarg ;;
+
+ --config-cache | -C)
+ cache_file=config.cache ;;
+
+ -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
+ | --da=*)
+ datadir=$ac_optarg ;;
+
+ -disable-* | --disable-*)
+ ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+ { (exit 1); exit 1; }; }
+ ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+ eval "enable_$ac_feature=no" ;;
+
+ -enable-* | --enable-*)
+ ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+ { (exit 1); exit 1; }; }
+ ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+ case $ac_option in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "enable_$ac_feature='$ac_optarg'" ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix=$ac_optarg ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he | -h)
+ ac_init_help=long ;;
+ -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+ ac_init_help=recursive ;;
+ -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+ ac_init_help=short ;;
+
+ -host | --host | --hos | --ho)
+ ac_prev=host_alias ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host_alias=$ac_optarg ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir=$ac_optarg ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir=$ac_optarg ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir=$ac_optarg ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir=$ac_optarg ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst \
+ | --locals | --local | --loca | --loc | --lo)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* \
+ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
+ localstatedir=$ac_optarg ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir=$ac_optarg ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c | -n)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir=$ac_optarg ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix=$ac_optarg ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix=$ac_optarg ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix=$ac_optarg ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name=$ac_optarg ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir=$ac_optarg ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir=$ac_optarg ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site=$ac_optarg ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir=$ac_optarg ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir=$ac_optarg ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target_alias ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target_alias=$ac_optarg ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers | -V)
+ ac_init_version=: ;;
+
+ -with-* | --with-*)
+ ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid package name: $ac_package" >&2
+ { (exit 1); exit 1; }; }
+ ac_package=`echo $ac_package| sed 's/-/_/g'`
+ case $ac_option in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "with_$ac_package='$ac_optarg'" ;;
+
+ -without-* | --without-*)
+ ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid package name: $ac_package" >&2
+ { (exit 1); exit 1; }; }
+ ac_package=`echo $ac_package | sed 's/-/_/g'`
+ eval "with_$ac_package=no" ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes=$ac_optarg ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries=$ac_optarg ;;
+
+ -*) { echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; }
+ ;;
+
+ *=*)
+ ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+ { (exit 1); exit 1; }; }
+ ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`
+ eval "$ac_envvar='$ac_optarg'"
+ export $ac_envvar ;;
+
+ *)
+ # FIXME: should be removed in autoconf 3.0.
+ echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+ expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+ : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+ { echo "$as_me: error: missing argument to $ac_option" >&2
+ { (exit 1); exit 1; }; }
+fi
+
+# Be sure to have absolute paths.
+for ac_var in exec_prefix prefix
+do
+ eval ac_val=$`echo $ac_var`
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* | NONE | '' ) ;;
+ *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# Be sure to have absolute paths.
+for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \
+ localstatedir libdir includedir oldincludedir infodir mandir
+do
+ eval ac_val=$`echo $ac_var`
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* ) ;;
+ *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+ if test "x$build_alias" = x; then
+ cross_compiling=maybe
+ echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+ If a cross compiler is detected then cross compile mode will be used." >&2
+ elif test "x$build_alias" != "x$host_alias"; then
+ cross_compiling=yes
+ fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then its parent.
+ ac_confdir=`(dirname "$0") 2>/dev/null ||
+$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$0" : 'X\(//\)[^/]' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$0" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ srcdir=$ac_confdir
+ if test ! -r $srcdir/$ac_unique_file; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r $srcdir/$ac_unique_file; then
+ if test "$ac_srcdir_defaulted" = yes; then
+ { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2
+ { (exit 1); exit 1; }; }
+ else
+ { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+ { (exit 1); exit 1; }; }
+ fi
+fi
+(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null ||
+ { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2
+ { (exit 1); exit 1; }; }
+srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'`
+ac_env_build_alias_set=${build_alias+set}
+ac_env_build_alias_value=$build_alias
+ac_cv_env_build_alias_set=${build_alias+set}
+ac_cv_env_build_alias_value=$build_alias
+ac_env_host_alias_set=${host_alias+set}
+ac_env_host_alias_value=$host_alias
+ac_cv_env_host_alias_set=${host_alias+set}
+ac_cv_env_host_alias_value=$host_alias
+ac_env_target_alias_set=${target_alias+set}
+ac_env_target_alias_value=$target_alias
+ac_cv_env_target_alias_set=${target_alias+set}
+ac_cv_env_target_alias_value=$target_alias
+ac_env_CC_set=${CC+set}
+ac_env_CC_value=$CC
+ac_cv_env_CC_set=${CC+set}
+ac_cv_env_CC_value=$CC
+ac_env_CFLAGS_set=${CFLAGS+set}
+ac_env_CFLAGS_value=$CFLAGS
+ac_cv_env_CFLAGS_set=${CFLAGS+set}
+ac_cv_env_CFLAGS_value=$CFLAGS
+ac_env_LDFLAGS_set=${LDFLAGS+set}
+ac_env_LDFLAGS_value=$LDFLAGS
+ac_cv_env_LDFLAGS_set=${LDFLAGS+set}
+ac_cv_env_LDFLAGS_value=$LDFLAGS
+ac_env_CPPFLAGS_set=${CPPFLAGS+set}
+ac_env_CPPFLAGS_value=$CPPFLAGS
+ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set}
+ac_cv_env_CPPFLAGS_value=$CPPFLAGS
+ac_env_CPP_set=${CPP+set}
+ac_env_CPP_value=$CPP
+ac_cv_env_CPP_set=${CPP+set}
+ac_cv_env_CPP_value=$CPP
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat <<_ACEOF
+\`configure' configures this package to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE. See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help display this help and exit
+ --help=short display options specific to this package
+ --help=recursive display the short help of all the included packages
+ -V, --version display version information and exit
+ -q, --quiet, --silent do not print \`checking...' messages
+ --cache-file=FILE cache test results in FILE [disabled]
+ -C, --config-cache alias for \`--cache-file=config.cache'
+ -n, --no-create do not create output files
+ --srcdir=DIR find the sources in DIR [configure dir or \`..']
+
+_ACEOF
+
+ cat <<_ACEOF
+Installation directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+ --bindir=DIR user executables [EPREFIX/bin]
+ --sbindir=DIR system admin executables [EPREFIX/sbin]
+ --libexecdir=DIR program executables [EPREFIX/libexec]
+ --datadir=DIR read-only architecture-independent data [PREFIX/share]
+ --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --libdir=DIR object code libraries [EPREFIX/lib]
+ --includedir=DIR C header files [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc [/usr/include]
+ --infodir=DIR info documentation [PREFIX/info]
+ --mandir=DIR man documentation [PREFIX/man]
+_ACEOF
+
+ cat <<\_ACEOF
+
+System types:
+ --build=BUILD configure for building on BUILD [guessed]
+ --host=HOST cross-compile to build programs to run on HOST [BUILD]
+ --target=TARGET configure for building compilers for TARGET [HOST]
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+
+ cat <<\_ACEOF
+
+Some influential environment variables:
+ CC C compiler command
+ CFLAGS C compiler flags
+ LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
+ nonstandard directory <lib dir>
+ CPPFLAGS C/C++ preprocessor flags, e.g. -I<include dir> if you have
+ headers in a nonstandard directory <include dir>
+ CPP C preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+_ACEOF
+fi
+
+if test "$ac_init_help" = "recursive"; then
+ # If there are subdirs, report their specific --help.
+ ac_popdir=`pwd`
+ for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+ test -d $ac_dir || continue
+ ac_builddir=.
+
+if test "$ac_dir" != .; then
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+ ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+ .) # No --srcdir option. We are building in place.
+ ac_srcdir=.
+ if test -z "$ac_top_builddir"; then
+ ac_top_srcdir=.
+ else
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+ fi ;;
+ [\\/]* | ?:[\\/]* ) # Absolute path.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir ;;
+ *) # Relative path.
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+
+# Do not use `cd foo && pwd` to compute absolute paths, because
+# the directories may not exist.
+case `pwd` in
+.) ac_abs_builddir="$ac_dir";;
+*)
+ case "$ac_dir" in
+ .) ac_abs_builddir=`pwd`;;
+ [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
+ *) ac_abs_builddir=`pwd`/"$ac_dir";;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_builddir=${ac_top_builddir}.;;
+*)
+ case ${ac_top_builddir}. in
+ .) ac_abs_top_builddir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
+ *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_srcdir=$ac_srcdir;;
+*)
+ case $ac_srcdir in
+ .) ac_abs_srcdir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
+ *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_srcdir=$ac_top_srcdir;;
+*)
+ case $ac_top_srcdir in
+ .) ac_abs_top_srcdir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
+ *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
+ esac;;
+esac
+
+ cd $ac_dir
+ # Check for guested configure; otherwise get Cygnus style configure.
+ if test -f $ac_srcdir/configure.gnu; then
+ echo
+ $SHELL $ac_srcdir/configure.gnu --help=recursive
+ elif test -f $ac_srcdir/configure; then
+ echo
+ $SHELL $ac_srcdir/configure --help=recursive
+ elif test -f $ac_srcdir/configure.ac ||
+ test -f $ac_srcdir/configure.in; then
+ echo
+ $ac_configure --help
+ else
+ echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+ fi
+ cd $ac_popdir
+ done
+fi
+
+test -n "$ac_init_help" && exit 0
+if $ac_init_version; then
+ cat <<\_ACEOF
+
+Copyright (C) 2003 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+ exit 0
+fi
+exec 5>config.log
+cat >&5 <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by $as_me, which was
+generated by GNU Autoconf 2.59. Invocation command line was
+
+ $ $0 $@
+
+_ACEOF
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
+
+/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
+/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+hostinfo = `(hostinfo) 2>/dev/null || echo unknown`
+/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
+/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
+/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ echo "PATH: $as_dir"
+done
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_sep=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+ for ac_arg
+ do
+ case $ac_arg in
+ -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ continue ;;
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+ ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ esac
+ case $ac_pass in
+ 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
+ 2)
+ ac_configure_args1="$ac_configure_args1 '$ac_arg'"
+ if test $ac_must_keep_next = true; then
+ ac_must_keep_next=false # Got value, back to normal.
+ else
+ case $ac_arg in
+ *=* | --config-cache | -C | -disable-* | --disable-* \
+ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+ | -with-* | --with-* | -without-* | --without-* | --x)
+ case "$ac_configure_args0 " in
+ "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+ esac
+ ;;
+ -* ) ac_must_keep_next=true ;;
+ esac
+ fi
+ ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'"
+ # Get rid of the leading space.
+ ac_sep=" "
+ ;;
+ esac
+ done
+done
+$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
+$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log. We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Be sure not to use single quotes in there, as some shells,
+# such as our DU 5.0 friend, will then `close' the trap.
+trap 'exit_status=$?
+ # Save into config.log some information that might help in debugging.
+ {
+ echo
+
+ cat <<\_ASBOX
+## ---------------- ##
+## Cache variables. ##
+## ---------------- ##
+_ASBOX
+ echo
+ # The following way of writing the cache mishandles newlines in values,
+{
+ (set) 2>&1 |
+ case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ sed -n \
+ "s/'"'"'/'"'"'\\\\'"'"''"'"'/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p"
+ ;;
+ *)
+ sed -n \
+ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+ ;;
+ esac;
+}
+ echo
+
+ cat <<\_ASBOX
+## ----------------- ##
+## Output variables. ##
+## ----------------- ##
+_ASBOX
+ echo
+ for ac_var in $ac_subst_vars
+ do
+ eval ac_val=$`echo $ac_var`
+ echo "$ac_var='"'"'$ac_val'"'"'"
+ done | sort
+ echo
+
+ if test -n "$ac_subst_files"; then
+ cat <<\_ASBOX
+## ------------- ##
+## Output files. ##
+## ------------- ##
+_ASBOX
+ echo
+ for ac_var in $ac_subst_files
+ do
+ eval ac_val=$`echo $ac_var`
+ echo "$ac_var='"'"'$ac_val'"'"'"
+ done | sort
+ echo
+ fi
+
+ if test -s confdefs.h; then
+ cat <<\_ASBOX
+## ----------- ##
+## confdefs.h. ##
+## ----------- ##
+_ASBOX
+ echo
+ sed "/^$/d" confdefs.h | sort
+ echo
+ fi
+ test "$ac_signal" != 0 &&
+ echo "$as_me: caught signal $ac_signal"
+ echo "$as_me: exit $exit_status"
+ } >&5
+ rm -f core *.core &&
+ rm -rf conftest* confdefs* conf$$* $ac_clean_files &&
+ exit $exit_status
+ ' 0
+for ac_signal in 1 2 13 15; do
+ trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -rf conftest* confdefs.h
+# AIX cpp loses on an empty file, so make sure it contains at least a newline.
+echo >confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer explicitly selected file to automatically selected ones.
+if test -z "$CONFIG_SITE"; then
+ if test "x$prefix" != xNONE; then
+ CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
+ else
+ CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
+ fi
+fi
+for ac_site_file in $CONFIG_SITE; do
+ if test -r "$ac_site_file"; then
+ { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
+echo "$as_me: loading site script $ac_site_file" >&6;}
+ sed 's/^/| /' "$ac_site_file" >&5
+ . "$ac_site_file"
+ fi
+done
+
+if test -r "$cache_file"; then
+ # Some versions of bash will fail to source /dev/null (special
+ # files actually), so we avoid doing that.
+ if test -f "$cache_file"; then
+ { echo "$as_me:$LINENO: loading cache $cache_file" >&5
+echo "$as_me: loading cache $cache_file" >&6;}
+ case $cache_file in
+ [\\/]* | ?:[\\/]* ) . $cache_file;;
+ *) . ./$cache_file;;
+ esac
+ fi
+else
+ { echo "$as_me:$LINENO: creating cache $cache_file" >&5
+echo "$as_me: creating cache $cache_file" >&6;}
+ >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in `(set) 2>&1 |
+ sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do
+ eval ac_old_set=\$ac_cv_env_${ac_var}_set
+ eval ac_new_set=\$ac_env_${ac_var}_set
+ eval ac_old_val="\$ac_cv_env_${ac_var}_value"
+ eval ac_new_val="\$ac_env_${ac_var}_value"
+ case $ac_old_set,$ac_new_set in
+ set,)
+ { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,set)
+ { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,);;
+ *)
+ if test "x$ac_old_val" != "x$ac_new_val"; then
+ { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
+echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+ { echo "$as_me:$LINENO: former value: $ac_old_val" >&5
+echo "$as_me: former value: $ac_old_val" >&2;}
+ { echo "$as_me:$LINENO: current value: $ac_new_val" >&5
+echo "$as_me: current value: $ac_new_val" >&2;}
+ ac_cache_corrupted=:
+ fi;;
+ esac
+ # Pass precious variables to config.status.
+ if test "$ac_new_set" = set; then
+ case $ac_new_val in
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+ ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+ *) ac_arg=$ac_var=$ac_new_val ;;
+ esac
+ case " $ac_configure_args " in
+ *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
+ *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+ esac
+ fi
+done
+if $ac_cache_corrupted; then
+ { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
+echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+ { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ac_config_headers="$ac_config_headers config.h"
+
+SO_NAME=3
+VERSION=3.0.0
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}gcc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="gcc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ CC=$ac_ct_CC
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ CC=$ac_ct_CC
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+fi
+if test -z "$CC"; then
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+ ac_prog_rejected=yes
+ continue
+ fi
+ ac_cv_prog_CC="cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+if test $ac_prog_rejected = yes; then
+ # We found a bogon in the path, so make sure we never use it.
+ set dummy $ac_cv_prog_CC
+ shift
+ if test $# != 0; then
+ # We chose a different compiler from the bogus one.
+ # However, it has the same basename, so the bogon will be chosen
+ # first if we set CC to just the basename; use the full file name.
+ shift
+ ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+ fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in cl
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ test -n "$CC" && break
+ done
+fi
+if test -z "$CC"; then
+ ac_ct_CC=$CC
+ for ac_prog in cl
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ test -n "$ac_ct_CC" && break
+done
+
+ CC=$ac_ct_CC
+fi
+
+fi
+
+
+test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+
+# Provide some information about the compiler.
+echo "$as_me:$LINENO:" \
+ "checking for C compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5
+ (eval $ac_compiler --version </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v </dev/null >&5\"") >&5
+ (eval $ac_compiler -v </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V </dev/null >&5\"") >&5
+ (eval $ac_compiler -V </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+echo "$as_me:$LINENO: checking for C compiler default output file name" >&5
+echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6
+ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5
+ (eval $ac_link_default) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ # Find the output, starting from the most likely. This scheme is
+# not robust to junk in `.', hence go to wildcards (a.*) only as a last
+# resort.
+
+# Be careful to initialize this variable, since it used to be cached.
+# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile.
+ac_cv_exeext=
+# b.out is created by i960 compilers.
+for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out
+do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj )
+ ;;
+ conftest.$ac_ext )
+ # This is the source file.
+ ;;
+ [ab].out )
+ # We found the default executable, but exeext='' is most
+ # certainly right.
+ break;;
+ *.* )
+ ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ # FIXME: I believe we export ac_cv_exeext for Libtool,
+ # but it would be cool to find out if it's true. Does anybody
+ # maintain Libtool? --akim.
+ export ac_cv_exeext
+ break;;
+ * )
+ break;;
+ esac
+done
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: C compiler cannot create executables
+See \`config.log' for more details." >&5
+echo "$as_me: error: C compiler cannot create executables
+See \`config.log' for more details." >&2;}
+ { (exit 77); exit 77; }; }
+fi
+
+ac_exeext=$ac_cv_exeext
+echo "$as_me:$LINENO: result: $ac_file" >&5
+echo "${ECHO_T}$ac_file" >&6
+
+# Check the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+echo "$as_me:$LINENO: checking whether the C compiler works" >&5
+echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6
+# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
+# If not cross compiling, check that we can run a simple program.
+if test "$cross_compiling" != yes; then
+ if { ac_try='./$ac_file'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ cross_compiling=no
+ else
+ if test "$cross_compiling" = maybe; then
+ cross_compiling=yes
+ else
+ { { echo "$as_me:$LINENO: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+ fi
+ fi
+fi
+echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+
+rm -f a.out a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+# Check the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
+echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6
+echo "$as_me:$LINENO: result: $cross_compiling" >&5
+echo "${ECHO_T}$cross_compiling" >&6
+
+echo "$as_me:$LINENO: checking for suffix of executables" >&5
+echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;;
+ *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ export ac_cv_exeext
+ break;;
+ * ) break;;
+ esac
+done
+else
+ { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest$ac_cv_exeext
+echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
+echo "${ECHO_T}$ac_cv_exeext" >&6
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+echo "$as_me:$LINENO: checking for suffix of object files" >&5
+echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6
+if test "${ac_cv_objext+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;;
+ *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+ break;;
+ esac
+done
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
+echo "${ECHO_T}$ac_cv_objext" >&6
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_compiler_gnu=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_compiler_gnu=no
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6
+GCC=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+CFLAGS="-g"
+echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
+echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6
+if test "${ac_cv_prog_cc_g+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_prog_cc_g=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_cv_prog_cc_g=no
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_g" >&6
+if test "$ac_test_CFLAGS" = set; then
+ CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+ if test "$GCC" = yes; then
+ CFLAGS="-g -O2"
+ else
+ CFLAGS="-g"
+ fi
+else
+ if test "$GCC" = yes; then
+ CFLAGS="-O2"
+ else
+ CFLAGS=
+ fi
+fi
+echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5
+echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6
+if test "${ac_cv_prog_cc_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_prog_cc_stdc=no
+ac_save_CC=$CC
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has
+ function prototypes and stuff, but not '\xHH' hex character constants.
+ These don't provoke an error unfortunately, instead are silently treated
+ as 'x'. The following induces an error, until -std1 is added to get
+ proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an
+ array size at least. It's necessary to write '\x00'==0 to get something
+ that's true only with -std1. */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+ ;
+ return 0;
+}
+_ACEOF
+# Don't try gcc -ansi; that turns off useful extensions and
+# breaks some systems' header files.
+# AIX -qlanglvl=ansi
+# Ultrix and OSF/1 -std1
+# HP-UX 10.20 and later -Ae
+# HP-UX older versions -Aa -D_HPUX_SOURCE
+# SVR4 -Xc -D__EXTENSIONS__
+for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_prog_cc_stdc=$ac_arg
+break
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.err conftest.$ac_objext
+done
+rm -f conftest.$ac_ext conftest.$ac_objext
+CC=$ac_save_CC
+
+fi
+
+case "x$ac_cv_prog_cc_stdc" in
+ x|xno)
+ echo "$as_me:$LINENO: result: none needed" >&5
+echo "${ECHO_T}none needed" >&6 ;;
+ *)
+ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6
+ CC="$CC $ac_cv_prog_cc_stdc" ;;
+esac
+
+# Some people use a C++ compiler to compile C. Since we use `exit',
+# in C++ we need to declare it. In case someone uses the same compiler
+# for both compiling C and C++ we need to have the C++ compiler decide
+# the declaration of exit, since it's the most demanding environment.
+cat >conftest.$ac_ext <<_ACEOF
+#ifndef __cplusplus
+ choke me
+#endif
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ for ac_declaration in \
+ '' \
+ 'extern "C" void std::exit (int) throw (); using std::exit;' \
+ 'extern "C" void std::exit (int); using std::exit;' \
+ 'extern "C" void exit (int) throw ();' \
+ 'extern "C" void exit (int);' \
+ 'void exit (int);'
+do
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_declaration
+#include <stdlib.h>
+int
+main ()
+{
+exit (42);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+continue
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_declaration
+int
+main ()
+{
+exit (42);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ break
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+done
+rm -f conftest*
+if test -n "$ac_declaration"; then
+ echo '#ifdef __cplusplus' >>confdefs.h
+ echo $ac_declaration >>confdefs.h
+ echo '#endif' >>confdefs.h
+fi
+
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+if test $GCC != "yes"
+then
+ { { echo "$as_me:$LINENO: error: Need GNU C compiler" >&5
+echo "$as_me: error: Need GNU C compiler" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+
+echo "$as_me:$LINENO: checking for malloc in -lc" >&5
+echo $ECHO_N "checking for malloc in -lc... $ECHO_C" >&6
+if test "${ac_cv_lib_c_malloc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lc $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+/* Override any gcc2 internal prototype to avoid an error. */
+#ifdef __cplusplus
+extern "C"
+#endif
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+char malloc ();
+int
+main ()
+{
+malloc ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_lib_c_malloc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_cv_lib_c_malloc=no
+fi
+rm -f conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+echo "$as_me:$LINENO: result: $ac_cv_lib_c_malloc" >&5
+echo "${ECHO_T}$ac_cv_lib_c_malloc" >&6
+if test $ac_cv_lib_c_malloc = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBC 1
+_ACEOF
+
+ LIBS="-lc $LIBS"
+
+fi
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5
+echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+ CPP=
+fi
+if test -z "$CPP"; then
+ if test "${ac_cv_prog_CPP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ # Double quotes because CPP needs to be expanded
+ for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+ do
+ ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether non-existent headers
+ # can be detected and how.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ # Broken: success on invalid input.
+continue
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+ break
+fi
+
+ done
+ ac_cv_prog_CPP=$CPP
+
+fi
+ CPP=$ac_cv_prog_CPP
+else
+ ac_cv_prog_CPP=$CPP
+fi
+echo "$as_me:$LINENO: result: $CPP" >&5
+echo "${ECHO_T}$CPP" >&6
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether non-existent headers
+ # can be detected and how.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ # Broken: success on invalid input.
+continue
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+ :
+else
+ { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&5
+echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+echo "$as_me:$LINENO: checking for egrep" >&5
+echo $ECHO_N "checking for egrep... $ECHO_C" >&6
+if test "${ac_cv_prog_egrep+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if echo a | (grep -E '(a|b)') >/dev/null 2>&1
+ then ac_cv_prog_egrep='grep -E'
+ else ac_cv_prog_egrep='egrep'
+ fi
+fi
+echo "$as_me:$LINENO: result: $ac_cv_prog_egrep" >&5
+echo "${ECHO_T}$ac_cv_prog_egrep" >&6
+ EGREP=$ac_cv_prog_egrep
+
+
+echo "$as_me:$LINENO: checking for ANSI C header files" >&5
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6
+if test "${ac_cv_header_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_header_stdc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_cv_header_stdc=no
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "memchr" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "free" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+ if test "$cross_compiling" = yes; then
+ :
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ctype.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+ (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ exit(2);
+ exit (0);
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_header_stdc=no
+fi
+rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+fi
+fi
+echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
+echo "${ECHO_T}$ac_cv_header_stdc" >&6
+if test $ac_cv_header_stdc = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+
+
+
+
+
+
+
+
+
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+ inttypes.h stdint.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ eval "$as_ac_Header=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+eval "$as_ac_Header=no"
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+
+
+for ac_header in getopt.h stdio.h stdlib.h memory.h string.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+else
+ # Is the header compilable?
+echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_header_compiler=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_header_compiler=no
+fi
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6
+
+# Is the header present?
+echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <$ac_header>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ ac_header_preproc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_preproc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+ yes:no: )
+ { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+ ac_header_preproc=yes
+ ;;
+ no:yes:* )
+ { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+ (
+ cat <<\_ASBOX
+## ------------------------------------------ ##
+## Report this to the AC_PACKAGE_NAME lists. ##
+## ------------------------------------------ ##
+_ASBOX
+ ) |
+ sed "s/^/$as_me: WARNING: /" >&2
+ ;;
+esac
+echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ eval "$as_ac_Header=\$ac_header_preproc"
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+if test -z "$HAVE_stdio.h"
+then
+ { { echo "$as_me:$LINENO: error: Need stdio.h!" >&5
+echo "$as_me: error: Need stdio.h!" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+if test -z "$HAVE_stdlib.h"
+then
+ { { echo "$as_me:$LINENO: error: Need stdlib.h!" >&5
+echo "$as_me: error: Need stdlib.h!" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+if test -z "$HAVE_stdlib.h"
+then
+ { { echo "$as_me:$LINENO: error: Need memory.h!" >&5
+echo "$as_me: error: Need memory.h!" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+if test -z "$HAVE_string.h"
+then
+ { { echo "$as_me:$LINENO: error: Need string.h" >&5
+echo "$as_me: error: Need string.h" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+ac_aux_dir=
+for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
+ if test -f $ac_dir/install-sh; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install-sh -c"
+ break
+ elif test -f $ac_dir/install.sh; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install.sh -c"
+ break
+ elif test -f $ac_dir/shtool; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/shtool install -c"
+ break
+ fi
+done
+if test -z "$ac_aux_dir"; then
+ { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." >&5
+echo "$as_me: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"
+ac_configure="$SHELL $ac_aux_dir/configure" # This should be Cygnus configure.
+
+# Make sure we can run config.sub.
+$ac_config_sub sun4 >/dev/null 2>&1 ||
+ { { echo "$as_me:$LINENO: error: cannot run $ac_config_sub" >&5
+echo "$as_me: error: cannot run $ac_config_sub" >&2;}
+ { (exit 1); exit 1; }; }
+
+echo "$as_me:$LINENO: checking build system type" >&5
+echo $ECHO_N "checking build system type... $ECHO_C" >&6
+if test "${ac_cv_build+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_build_alias=$build_alias
+test -z "$ac_cv_build_alias" &&
+ ac_cv_build_alias=`$ac_config_guess`
+test -z "$ac_cv_build_alias" &&
+ { { echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5
+echo "$as_me: error: cannot guess build type; you must specify one" >&2;}
+ { (exit 1); exit 1; }; }
+ac_cv_build=`$ac_config_sub $ac_cv_build_alias` ||
+ { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_build_alias failed" >&5
+echo "$as_me: error: $ac_config_sub $ac_cv_build_alias failed" >&2;}
+ { (exit 1); exit 1; }; }
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_build" >&5
+echo "${ECHO_T}$ac_cv_build" >&6
+build=$ac_cv_build
+build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+
+
+echo "$as_me:$LINENO: checking host system type" >&5
+echo $ECHO_N "checking host system type... $ECHO_C" >&6
+if test "${ac_cv_host+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_host_alias=$host_alias
+test -z "$ac_cv_host_alias" &&
+ ac_cv_host_alias=$ac_cv_build_alias
+ac_cv_host=`$ac_config_sub $ac_cv_host_alias` ||
+ { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_host_alias failed" >&5
+echo "$as_me: error: $ac_config_sub $ac_cv_host_alias failed" >&2;}
+ { (exit 1); exit 1; }; }
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_host" >&5
+echo "${ECHO_T}$ac_cv_host" >&6
+host=$ac_cv_host
+host_cpu=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+host_vendor=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+host_os=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+
+
+echo "$as_me:$LINENO: checking target system type" >&5
+echo $ECHO_N "checking target system type... $ECHO_C" >&6
+if test "${ac_cv_target+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_target_alias=$target_alias
+test "x$ac_cv_target_alias" = "x" &&
+ ac_cv_target_alias=$ac_cv_host_alias
+ac_cv_target=`$ac_config_sub $ac_cv_target_alias` ||
+ { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_target_alias failed" >&5
+echo "$as_me: error: $ac_config_sub $ac_cv_target_alias failed" >&2;}
+ { (exit 1); exit 1; }; }
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_target" >&5
+echo "${ECHO_T}$ac_cv_target" >&6
+target=$ac_cv_target
+target_cpu=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+target_vendor=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+target_os=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+
+
+# The aliases save the names the user supplied, while $host etc.
+# will get canonicalized.
+test -n "$target_alias" &&
+ test "$program_prefix$program_suffix$program_transform_name" = \
+ NONENONEs,x,x, &&
+ program_prefix=${target_alias}-
+case $target_cpu in
+i386|i486|i586|i686)
+ ARCH_OPTION="-march=$target_cpu"
+ MLIBS="viterbi27_mmx.o mmxbfly27.o viterbi27_sse.o ssebfly27.o viterbi27_sse2.o sse2bfly27.o \
+ viterbi29_mmx.o mmxbfly29.o viterbi29_sse.o ssebfly29.o viterbi29_sse2.o sse2bfly29.o \
+ viterbi39_sse2.o viterbi39_sse.o viterbi39_mmx.o \
+ viterbi615_mmx.o viterbi615_sse.o viterbi615_sse2.o \
+ dotprod_mmx.o dotprod_mmx_assist.o \
+ dotprod_sse2.o dotprod_sse2_assist.o \
+ peakval_mmx.o peakval_mmx_assist.o \
+ peakval_sse.o peakval_sse_assist.o \
+ peakval_sse2.o peakval_sse2_assist.o \
+ sumsq.o sumsq_port.o \
+ sumsq_sse2.o sumsq_sse2_assist.o \
+ sumsq_mmx.o sumsq_mmx_assist.o \
+ cpu_features.o cpu_mode_x86.o"
+ ;;
+powerpc*)
+ ARCH_OPTION="-fno-common -faltivec"
+ MLIBS="viterbi27_av.o viterbi29_av.o viterbi39_av.o viterbi615_av.o \
+ encode_rs_av.o \
+ dotprod_av.o sumsq_av.o peakval_av.o cpu_mode_ppc.o"
+ ;;
+*)
+ MLIBS=
+esac
+case $target_os in
+darwin*)
+ SH_LIB=libfec.dylib
+ REBIND=""
+ ;;
+*)
+ SH_LIB=libfec.so
+ REBIND=ldconfig
+ ;;
+esac
+
+
+
+
+
+
+
+
+
+for ac_func in getopt_long memset memmove
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
+if eval "test \"\${$as_ac_var+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any gcc2 internal prototype to avoid an error. */
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
+choke me
+#else
+char (*f) () = $ac_func;
+#endif
+#ifdef __cplusplus
+}
+#endif
+
+int
+main ()
+{
+return f != $ac_func;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag"
+ || test ! -s conftest.err'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+eval "$as_ac_var=no"
+fi
+rm -f conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+ ac_config_files="$ac_config_files makefile"
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems. If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+{
+ (set) 2>&1 |
+ case `(ac_space=' '; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote
+ # substitution turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ "s/'/'\\\\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+ ;;
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n \
+ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+ ;;
+ esac;
+} |
+ sed '
+ t clear
+ : clear
+ s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+ t end
+ /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+ : end' >>confcache
+if diff $cache_file confcache >/dev/null 2>&1; then :; else
+ if test -w $cache_file; then
+ test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file"
+ cat confcache >$cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=/{
+s/:*\$(srcdir):*/:/;
+s/:*\${srcdir}:*/:/;
+s/:*@srcdir@:*/:/;
+s/^\([^=]*=[ ]*\):*/\1/;
+s/:*$//;
+s/^[^=]*=[ ]*$//;
+}'
+fi
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+ # 1. Remove the extension, and $U if already installed.
+ ac_i=`echo "$ac_i" |
+ sed 's/\$U\././;s/\.o$//;s/\.obj$//'`
+ # 2. Add them.
+ ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext"
+ ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+: ${CONFIG_STATUS=./config.status}
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
+echo "$as_me: creating $CONFIG_STATUS" >&6;}
+cat >$CONFIG_STATUS <<_ACEOF
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+SHELL=\${CONFIG_SHELL-$SHELL}
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+ set -o posix
+fi
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+
+# Work around bugs in pre-3.0 UWIN ksh.
+$as_unset ENV MAIL MAILPATH
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+ LC_TELEPHONE LC_TIME
+do
+ if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+ eval $as_var=C; export $as_var
+ else
+ $as_unset $as_var
+ fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)$' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+ /^X\/\(\/\/\)$/{ s//\1/; q; }
+ /^X\/\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+
+
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ echo "#! /bin/sh" >conf$$.sh
+ echo "exit 0" >>conf$$.sh
+ chmod +x conf$$.sh
+ if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+ PATH_SEPARATOR=';'
+ else
+ PATH_SEPARATOR=:
+ fi
+ rm -f conf$$.sh
+fi
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" || {
+ # Find who we are. Look in the path if we contain no path at all
+ # relative or not.
+ case $0 in
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+
+ ;;
+ esac
+ # We did not find ourselves, most probably we were run as `sh COMMAND'
+ # in which case we are not to be found in the path.
+ if test "x$as_myself" = x; then
+ as_myself=$0
+ fi
+ if test ! -f "$as_myself"; then
+ { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
+echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
+ { (exit 1); exit 1; }; }
+ fi
+ case $CONFIG_SHELL in
+ '')
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for as_base in sh bash ksh sh5; do
+ case $as_dir in
+ /*)
+ if ("$as_dir/$as_base" -c '
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
+ $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
+ $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
+ CONFIG_SHELL=$as_dir/$as_base
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+ fi;;
+ esac
+ done
+done
+;;
+ esac
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line before each line; the second 'sed' does the real
+ # work. The second script uses 'N' to pair each line-number line
+ # with the numbered line, and appends trailing '-' during
+ # substitution so that $LINENO is not a special case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
+ sed '=' <$as_myself |
+ sed '
+ N
+ s,$,-,
+ : loop
+ s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+ t loop
+ s,-$,,
+ s,^['$as_cr_digits']*\n,,
+ ' >$as_me.lineno &&
+ chmod +x $as_me.lineno ||
+ { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
+echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensible to this).
+ . ./$as_me.lineno
+ # Exit status is that of the last command.
+ exit
+}
+
+
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+ *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T=' ' ;;
+ *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+ *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+ # We could just check for DJGPP; but this test a) works b) is more generic
+ # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+ if test -f conf$$.exe; then
+ # Don't use ln at all; we don't have any links
+ as_ln_s='cp -p'
+ else
+ as_ln_s='ln -s'
+ fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p=:
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+as_executable_p="test -f"
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" $as_nl"
+
+# CDPATH.
+$as_unset CDPATH
+
+exec 6>&1
+
+# Open the log real soon, to keep \$[0] and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling. Logging --version etc. is OK.
+exec 5>>config.log
+{
+ echo
+ sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+} >&5
+cat >&5 <<_CSEOF
+
+This file was extended by $as_me, which was
+generated by GNU Autoconf 2.59. Invocation command line was
+
+ CONFIG_FILES = $CONFIG_FILES
+ CONFIG_HEADERS = $CONFIG_HEADERS
+ CONFIG_LINKS = $CONFIG_LINKS
+ CONFIG_COMMANDS = $CONFIG_COMMANDS
+ $ $0 $@
+
+_CSEOF
+echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
+echo >&5
+_ACEOF
+
+# Files that config.status was made for.
+if test -n "$ac_config_files"; then
+ echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_headers"; then
+ echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_links"; then
+ echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_commands"; then
+ echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+ac_cs_usage="\
+\`$as_me' instantiates files from templates according to the
+current configuration.
+
+Usage: $0 [OPTIONS] [FILE]...
+
+ -h, --help print this help, then exit
+ -V, --version print version number, then exit
+ -q, --quiet do not print progress messages
+ -d, --debug don't remove temporary files
+ --recheck update $as_me by reconfiguring in the same conditions
+ --file=FILE[:TEMPLATE]
+ instantiate the configuration file FILE
+ --header=FILE[:TEMPLATE]
+ instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Report bugs to <bug-autoconf@gnu.org>."
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+ac_cs_version="\\
+config.status
+configured by $0, generated by GNU Autoconf 2.59,
+ with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
+
+Copyright (C) 2003 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+srcdir=$srcdir
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If no file are specified by the user, then we need to provide default
+# value. By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test $# != 0
+do
+ case $1 in
+ --*=*)
+ ac_option=`expr "x$1" : 'x\([^=]*\)='`
+ ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
+ ac_shift=:
+ ;;
+ -*)
+ ac_option=$1
+ ac_optarg=$2
+ ac_shift=shift
+ ;;
+ *) # This is not an option, so the user has probably given explicit
+ # arguments.
+ ac_option=$1
+ ac_need_defaults=false;;
+ esac
+
+ case $ac_option in
+ # Handling of the options.
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ ac_cs_recheck=: ;;
+ --version | --vers* | -V )
+ echo "$ac_cs_version"; exit 0 ;;
+ --he | --h)
+ # Conflict between --help and --header
+ { { echo "$as_me:$LINENO: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&2;}
+ { (exit 1); exit 1; }; };;
+ --help | --hel | -h )
+ echo "$ac_cs_usage"; exit 0 ;;
+ --debug | --d* | -d )
+ debug=: ;;
+ --file | --fil | --fi | --f )
+ $ac_shift
+ CONFIG_FILES="$CONFIG_FILES $ac_optarg"
+ ac_need_defaults=false;;
+ --header | --heade | --head | --hea )
+ $ac_shift
+ CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
+ ac_need_defaults=false;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil | --si | --s)
+ ac_cs_silent=: ;;
+
+ # This is an error.
+ -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2;}
+ { (exit 1); exit 1; }; } ;;
+
+ *) ac_config_targets="$ac_config_targets $1" ;;
+
+ esac
+ shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+ exec 6>/dev/null
+ ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+if \$ac_cs_recheck; then
+ echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
+ exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+fi
+
+_ACEOF
+
+
+
+
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+for ac_config_target in $ac_config_targets
+do
+ case "$ac_config_target" in
+ # Handling of arguments.
+ "makefile" ) CONFIG_FILES="$CONFIG_FILES makefile" ;;
+ "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
+ *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used. Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+ test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+ test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+fi
+
+# Have a temporary directory for convenience. Make it in the build tree
+# simply because there is no reason to put it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Create a temporary directory, and hook for its removal unless debugging.
+$debug ||
+{
+ trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
+ trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+
+# Create a (secure) tmp directory for tmp files.
+
+{
+ tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` &&
+ test -n "$tmp" && test -d "$tmp"
+} ||
+{
+ tmp=./confstat$$-$RANDOM
+ (umask 077 && mkdir $tmp)
+} ||
+{
+ echo "$me: cannot create a temporary directory in ." >&2
+ { (exit 1); exit 1; }
+}
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+
+#
+# CONFIG_FILES section.
+#
+
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "\$CONFIG_FILES"; then
+ # Protect against being on the right side of a sed subst in config.status.
+ sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g;
+ s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF
+s,@SHELL@,$SHELL,;t t
+s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t
+s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t
+s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t
+s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t
+s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t
+s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t
+s,@exec_prefix@,$exec_prefix,;t t
+s,@prefix@,$prefix,;t t
+s,@program_transform_name@,$program_transform_name,;t t
+s,@bindir@,$bindir,;t t
+s,@sbindir@,$sbindir,;t t
+s,@libexecdir@,$libexecdir,;t t
+s,@datadir@,$datadir,;t t
+s,@sysconfdir@,$sysconfdir,;t t
+s,@sharedstatedir@,$sharedstatedir,;t t
+s,@localstatedir@,$localstatedir,;t t
+s,@libdir@,$libdir,;t t
+s,@includedir@,$includedir,;t t
+s,@oldincludedir@,$oldincludedir,;t t
+s,@infodir@,$infodir,;t t
+s,@mandir@,$mandir,;t t
+s,@build_alias@,$build_alias,;t t
+s,@host_alias@,$host_alias,;t t
+s,@target_alias@,$target_alias,;t t
+s,@DEFS@,$DEFS,;t t
+s,@ECHO_C@,$ECHO_C,;t t
+s,@ECHO_N@,$ECHO_N,;t t
+s,@ECHO_T@,$ECHO_T,;t t
+s,@LIBS@,$LIBS,;t t
+s,@SO_NAME@,$SO_NAME,;t t
+s,@VERSION@,$VERSION,;t t
+s,@CC@,$CC,;t t
+s,@CFLAGS@,$CFLAGS,;t t
+s,@LDFLAGS@,$LDFLAGS,;t t
+s,@CPPFLAGS@,$CPPFLAGS,;t t
+s,@ac_ct_CC@,$ac_ct_CC,;t t
+s,@EXEEXT@,$EXEEXT,;t t
+s,@OBJEXT@,$OBJEXT,;t t
+s,@CPP@,$CPP,;t t
+s,@EGREP@,$EGREP,;t t
+s,@build@,$build,;t t
+s,@build_cpu@,$build_cpu,;t t
+s,@build_vendor@,$build_vendor,;t t
+s,@build_os@,$build_os,;t t
+s,@host@,$host,;t t
+s,@host_cpu@,$host_cpu,;t t
+s,@host_vendor@,$host_vendor,;t t
+s,@host_os@,$host_os,;t t
+s,@target@,$target,;t t
+s,@target_cpu@,$target_cpu,;t t
+s,@target_vendor@,$target_vendor,;t t
+s,@target_os@,$target_os,;t t
+s,@SH_LIB@,$SH_LIB,;t t
+s,@REBIND@,$REBIND,;t t
+s,@MLIBS@,$MLIBS,;t t
+s,@ARCH_OPTION@,$ARCH_OPTION,;t t
+s,@LIBOBJS@,$LIBOBJS,;t t
+s,@LTLIBOBJS@,$LTLIBOBJS,;t t
+CEOF
+
+_ACEOF
+
+ cat >>$CONFIG_STATUS <<\_ACEOF
+ # Split the substitutions into bite-sized pieces for seds with
+ # small command number limits, like on Digital OSF/1 and HP-UX.
+ ac_max_sed_lines=48
+ ac_sed_frag=1 # Number of current file.
+ ac_beg=1 # First line for current file.
+ ac_end=$ac_max_sed_lines # Line after last line for current file.
+ ac_more_lines=:
+ ac_sed_cmds=
+ while $ac_more_lines; do
+ if test $ac_beg -gt 1; then
+ sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+ else
+ sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+ fi
+ if test ! -s $tmp/subs.frag; then
+ ac_more_lines=false
+ else
+ # The purpose of the label and of the branching condition is to
+ # speed up the sed processing (if there are no `@' at all, there
+ # is no need to browse any of the substitutions).
+ # These are the two extra sed commands mentioned above.
+ (echo ':t
+ /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
+ else
+ ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
+ fi
+ ac_sed_frag=`expr $ac_sed_frag + 1`
+ ac_beg=$ac_end
+ ac_end=`expr $ac_end + $ac_max_sed_lines`
+ fi
+ done
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds=cat
+ fi
+fi # test -n "$CONFIG_FILES"
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case $ac_file in
+ - | *:- | *:-:* ) # input from stdin
+ cat >$tmp/stdin
+ ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ * ) ac_file_in=$ac_file.in ;;
+ esac
+
+ # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
+ ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ { if $as_mkdir_p; then
+ mkdir -p "$ac_dir"
+ else
+ as_dir="$ac_dir"
+ as_dirs=
+ while test ! -d "$as_dir"; do
+ as_dirs="$as_dir $as_dirs"
+ as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ done
+ test ! -n "$as_dirs" || mkdir $as_dirs
+ fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+ { (exit 1); exit 1; }; }; }
+
+ ac_builddir=.
+
+if test "$ac_dir" != .; then
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+ ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+ .) # No --srcdir option. We are building in place.
+ ac_srcdir=.
+ if test -z "$ac_top_builddir"; then
+ ac_top_srcdir=.
+ else
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+ fi ;;
+ [\\/]* | ?:[\\/]* ) # Absolute path.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir ;;
+ *) # Relative path.
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+
+# Do not use `cd foo && pwd` to compute absolute paths, because
+# the directories may not exist.
+case `pwd` in
+.) ac_abs_builddir="$ac_dir";;
+*)
+ case "$ac_dir" in
+ .) ac_abs_builddir=`pwd`;;
+ [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
+ *) ac_abs_builddir=`pwd`/"$ac_dir";;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_builddir=${ac_top_builddir}.;;
+*)
+ case ${ac_top_builddir}. in
+ .) ac_abs_top_builddir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
+ *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_srcdir=$ac_srcdir;;
+*)
+ case $ac_srcdir in
+ .) ac_abs_srcdir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
+ *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
+ esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_srcdir=$ac_top_srcdir;;
+*)
+ case $ac_top_srcdir in
+ .) ac_abs_top_srcdir=$ac_abs_builddir;;
+ [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
+ *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
+ esac;;
+esac
+
+
+
+ if test x"$ac_file" != x-; then
+ { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+ rm -f "$ac_file"
+ fi
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ if test x"$ac_file" = x-; then
+ configure_input=
+ else
+ configure_input="$ac_file. "
+ fi
+ configure_input=$configure_input"Generated from `echo $ac_file_in |
+ sed 's,.*/,,'` by configure."
+
+ # First look for the input files in the build tree, otherwise in the
+ # src tree.
+ ac_file_inputs=`IFS=:
+ for f in $ac_file_in; do
+ case $f in
+ -) echo $tmp/stdin ;;
+ [\\/$]*)
+ # Absolute (can't be DOS-style, as IFS=:)
+ test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ echo "$f";;
+ *) # Relative
+ if test -f "$f"; then
+ # Build tree
+ echo "$f"
+ elif test -f "$srcdir/$f"; then
+ # Source tree
+ echo "$srcdir/$f"
+ else
+ # /dev/null tree
+ { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ fi;;
+ esac
+ done` || { (exit 1); exit 1; }
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+ sed "$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s,@configure_input@,$configure_input,;t t
+s,@srcdir@,$ac_srcdir,;t t
+s,@abs_srcdir@,$ac_abs_srcdir,;t t
+s,@top_srcdir@,$ac_top_srcdir,;t t
+s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t
+s,@builddir@,$ac_builddir,;t t
+s,@abs_builddir@,$ac_abs_builddir,;t t
+s,@top_builddir@,$ac_top_builddir,;t t
+s,@abs_top_builddir@,$ac_abs_top_builddir,;t t
+" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
+ rm -f $tmp/stdin
+ if test x"$ac_file" != x-; then
+ mv $tmp/out $ac_file
+ else
+ cat $tmp/out
+ rm -f $tmp/out
+ fi
+
+done
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+#
+# CONFIG_HEADER section.
+#
+
+# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
+# NAME is the cpp macro being defined and VALUE is the value it is being given.
+#
+# ac_d sets the value in "#define NAME VALUE" lines.
+ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)'
+ac_dB='[ ].*$,\1#\2'
+ac_dC=' '
+ac_dD=',;t'
+# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
+ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
+ac_uB='$,\1#\2define\3'
+ac_uC=' '
+ac_uD=',;t'
+
+for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case $ac_file in
+ - | *:- | *:-:* ) # input from stdin
+ cat >$tmp/stdin
+ ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ * ) ac_file_in=$ac_file.in ;;
+ esac
+
+ test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+
+ # First look for the input files in the build tree, otherwise in the
+ # src tree.
+ ac_file_inputs=`IFS=:
+ for f in $ac_file_in; do
+ case $f in
+ -) echo $tmp/stdin ;;
+ [\\/$]*)
+ # Absolute (can't be DOS-style, as IFS=:)
+ test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ # Do quote $f, to prevent DOS paths from being IFS'd.
+ echo "$f";;
+ *) # Relative
+ if test -f "$f"; then
+ # Build tree
+ echo "$f"
+ elif test -f "$srcdir/$f"; then
+ # Source tree
+ echo "$srcdir/$f"
+ else
+ # /dev/null tree
+ { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ fi;;
+ esac
+ done` || { (exit 1); exit 1; }
+ # Remove the trailing spaces.
+ sed 's/[ ]*$//' $ac_file_inputs >$tmp/in
+
+_ACEOF
+
+# Transform confdefs.h into two sed scripts, `conftest.defines' and
+# `conftest.undefs', that substitutes the proper values into
+# config.h.in to produce config.h. The first handles `#define'
+# templates, and the second `#undef' templates.
+# And first: Protect against being on the right side of a sed subst in
+# config.status. Protect against being in an unquoted here document
+# in config.status.
+rm -f conftest.defines conftest.undefs
+# Using a here document instead of a string reduces the quoting nightmare.
+# Putting comments in sed scripts is not portable.
+#
+# `end' is used to avoid that the second main sed command (meant for
+# 0-ary CPP macros) applies to n-ary macro definitions.
+# See the Autoconf documentation for `clear'.
+cat >confdef2sed.sed <<\_ACEOF
+s/[\\&,]/\\&/g
+s,[\\$`],\\&,g
+t clear
+: clear
+s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp
+t end
+s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp
+: end
+_ACEOF
+# If some macros were called several times there might be several times
+# the same #defines, which is useless. Nevertheless, we may not want to
+# sort them, since we want the *last* AC-DEFINE to be honored.
+uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines
+sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs
+rm -f confdef2sed.sed
+
+# This sed command replaces #undef with comments. This is necessary, for
+# example, in the case of _POSIX_SOURCE, which is predefined and required
+# on some systems where configure will not decide to define it.
+cat >>conftest.undefs <<\_ACEOF
+s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */,
+_ACEOF
+
+# Break up conftest.defines because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS
+echo ' if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS
+echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS
+echo ' :' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.defines >/dev/null
+do
+ # Write a limited-size here document to $tmp/defines.sed.
+ echo ' cat >$tmp/defines.sed <<CEOF' >>$CONFIG_STATUS
+ # Speed up: don't consider the non `#define' lines.
+ echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS
+ # Work around the forget-to-reset-the-flag bug.
+ echo 't clr' >>$CONFIG_STATUS
+ echo ': clr' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $tmp/defines.sed $tmp/in >$tmp/out
+ rm -f $tmp/in
+ mv $tmp/out $tmp/in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail
+ rm -f conftest.defines
+ mv conftest.tail conftest.defines
+done
+rm -f conftest.defines
+echo ' fi # grep' >>$CONFIG_STATUS
+echo >>$CONFIG_STATUS
+
+# Break up conftest.undefs because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #undef templates' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.undefs >/dev/null
+do
+ # Write a limited-size here document to $tmp/undefs.sed.
+ echo ' cat >$tmp/undefs.sed <<CEOF' >>$CONFIG_STATUS
+ # Speed up: don't consider the non `#undef'
+ echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS
+ # Work around the forget-to-reset-the-flag bug.
+ echo 't clr' >>$CONFIG_STATUS
+ echo ': clr' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $tmp/undefs.sed $tmp/in >$tmp/out
+ rm -f $tmp/in
+ mv $tmp/out $tmp/in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail
+ rm -f conftest.undefs
+ mv conftest.tail conftest.undefs
+done
+rm -f conftest.undefs
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ if test x"$ac_file" = x-; then
+ echo "/* Generated by configure. */" >$tmp/config.h
+ else
+ echo "/* $ac_file. Generated by configure. */" >$tmp/config.h
+ fi
+ cat $tmp/in >>$tmp/config.h
+ rm -f $tmp/in
+ if test x"$ac_file" != x-; then
+ if diff $ac_file $tmp/config.h >/dev/null 2>&1; then
+ { echo "$as_me:$LINENO: $ac_file is unchanged" >&5
+echo "$as_me: $ac_file is unchanged" >&6;}
+ else
+ ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ { if $as_mkdir_p; then
+ mkdir -p "$ac_dir"
+ else
+ as_dir="$ac_dir"
+ as_dirs=
+ while test ! -d "$as_dir"; do
+ as_dirs="$as_dir $as_dirs"
+ as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ done
+ test ! -n "$as_dirs" || mkdir $as_dirs
+ fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+ { (exit 1); exit 1; }; }; }
+
+ rm -f $ac_file
+ mv $tmp/config.h $ac_file
+ fi
+ else
+ cat $tmp/config.h
+ rm -f $tmp/config.h
+ fi
+done
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+{ (exit 0); exit 0; }
+_ACEOF
+chmod +x $CONFIG_STATUS
+ac_clean_files=$ac_clean_files_save
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded. So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status. When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+ ac_cs_success=:
+ ac_config_status_args=
+ test "$silent" = yes &&
+ ac_config_status_args="$ac_config_status_args --quiet"
+ exec 5>/dev/null
+ $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+ exec 5>>config.log
+ # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+ # would make configure fail if this is the last instruction.
+ $ac_cs_success || { (exit 1); exit 1; }
+fi
+
diff --git a/configure.in b/configure.in
new file mode 100644
index 0000000..4e4110b
--- /dev/null
+++ b/configure.in
@@ -0,0 +1,83 @@
+dnl Process this file with autoconf to produce a configure script.
+AC_INIT(viterbi27.c)
+AC_CONFIG_HEADER(config.h)
+SO_NAME=3
+VERSION=3.0.0
+AC_SUBST(SO_NAME)
+AC_SUBST(VERSION)
+
+dnl Checks for programs.
+AC_PROG_CC
+if test $GCC != "yes"
+then
+ AC_MSG_ERROR([Need GNU C compiler])
+fi
+dnl Checks for libraries.
+AC_CHECK_LIB(c, malloc)
+
+dnl Checks for header files.
+AC_CHECK_HEADERS(getopt.h stdio.h stdlib.h memory.h string.h)
+if test -z "$HAVE_stdio.h"
+then
+ AC_MSG_ERROR([Need stdio.h!])
+fi
+if test -z "$HAVE_stdlib.h"
+then
+ AC_MSG_ERROR([Need stdlib.h!])
+fi
+if test -z "$HAVE_stdlib.h"
+then
+ AC_MSG_ERROR([Need memory.h!])
+fi
+if test -z "$HAVE_string.h"
+then
+ AC_MSG_ERROR([Need string.h])
+fi
+
+AC_CANONICAL_SYSTEM
+case $target_cpu in
+i386|i486|i586|i686)
+ ARCH_OPTION="-march=$target_cpu"
+ MLIBS="viterbi27_mmx.o mmxbfly27.o viterbi27_sse.o ssebfly27.o viterbi27_sse2.o sse2bfly27.o \
+ viterbi29_mmx.o mmxbfly29.o viterbi29_sse.o ssebfly29.o viterbi29_sse2.o sse2bfly29.o \
+ viterbi39_sse2.o viterbi39_sse.o viterbi39_mmx.o \
+ viterbi615_mmx.o viterbi615_sse.o viterbi615_sse2.o \
+ dotprod_mmx.o dotprod_mmx_assist.o \
+ dotprod_sse2.o dotprod_sse2_assist.o \
+ peakval_mmx.o peakval_mmx_assist.o \
+ peakval_sse.o peakval_sse_assist.o \
+ peakval_sse2.o peakval_sse2_assist.o \
+ sumsq.o sumsq_port.o \
+ sumsq_sse2.o sumsq_sse2_assist.o \
+ sumsq_mmx.o sumsq_mmx_assist.o \
+ cpu_features.o cpu_mode_x86.o"
+ ;;
+powerpc*)
+ ARCH_OPTION="-fno-common -faltivec"
+ MLIBS="viterbi27_av.o viterbi29_av.o viterbi39_av.o viterbi615_av.o \
+ encode_rs_av.o \
+ dotprod_av.o sumsq_av.o peakval_av.o cpu_mode_ppc.o"
+ ;;
+*)
+ MLIBS=
+esac
+case $target_os in
+darwin*)
+ SH_LIB=libfec.dylib
+ REBIND=""
+ ;;
+*)
+ SH_LIB=libfec.so
+ REBIND=ldconfig
+ ;;
+esac
+AC_SUBST(SH_LIB)
+AC_SUBST(REBIND)
+AC_SUBST(MLIBS)
+AC_SUBST(ARCH_OPTION)
+
+
+dnl Checks for library functions.
+AC_CHECK_FUNCS(getopt_long memset memmove)
+
+AC_OUTPUT(makefile)
diff --git a/cpu_features.s b/cpu_features.s
new file mode 100644
index 0000000..ef4ba4e
--- /dev/null
+++ b/cpu_features.s
@@ -0,0 +1,15 @@
+.text
+.global cpu_features
+ .type cpu_features,@function
+cpu_features:
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl $1,%eax
+ cpuid
+ movl %edx,%eax
+ popl %edx
+ popl %ecx
+ popl %ebx
+ ret
+ \ No newline at end of file
diff --git a/cpu_mode_ppc.c b/cpu_mode_ppc.c
new file mode 100644
index 0000000..0071558
--- /dev/null
+++ b/cpu_mode_ppc.c
@@ -0,0 +1,40 @@
+/* Determine CPU support for SIMD on Power PC
+ * Copyright 2004 Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include "fec.h"
+#ifdef __VEC__
+#include <sys/sysctl.h>
+#endif
+
+/* Various SIMD instruction set names */
+char *Cpu_modes[] = {"Unknown","Portable C","x86 Multi Media Extensions (MMX)",
+ "x86 Streaming SIMD Extensions (SSE)",
+ "x86 Streaming SIMD Extensions 2 (SSE2)",
+ "PowerPC G4/G5 Altivec/Velocity Engine"};
+
+enum cpu_mode Cpu_mode;
+
+void find_cpu_mode(void){
+
+ if(Cpu_mode != UNKNOWN)
+ return;
+
+#ifdef __VEC__
+ {
+ /* Ask the OS if we have Altivec support */
+ int selectors[2] = { CTL_HW, HW_VECTORUNIT };
+ int hasVectorUnit = 0;
+ size_t length = sizeof(hasVectorUnit);
+ int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
+ if(0 == error && hasVectorUnit)
+ Cpu_mode = ALTIVEC;
+ else
+ Cpu_mode = PORT;
+ }
+#else
+ Cpu_mode = PORT;
+#endif
+
+ fprintf(stderr,"SIMD CPU detect: %s\n",Cpu_modes[Cpu_mode]);
+}
diff --git a/cpu_mode_x86.c b/cpu_mode_x86.c
new file mode 100644
index 0000000..322018e
--- /dev/null
+++ b/cpu_mode_x86.c
@@ -0,0 +1,33 @@
+/* Determine CPU support for SIMD
+ * Copyright 2004 Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include "fec.h"
+
+/* Various SIMD instruction set names */
+char *Cpu_modes[] = {"Unknown","Portable C","x86 Multi Media Extensions (MMX)",
+ "x86 Streaming SIMD Extensions (SSE)",
+ "x86 Streaming SIMD Extensions 2 (SSE2)",
+ "PowerPC G4/G5 Altivec/Velocity Engine"};
+
+enum cpu_mode Cpu_mode;
+
+void find_cpu_mode(void){
+
+ int f;
+ if(Cpu_mode != UNKNOWN)
+ return;
+
+ /* Figure out what kind of CPU we have */
+ f = cpu_features();
+ if(f & (1<<26)){ /* SSE2 is present */
+ Cpu_mode = SSE2;
+ } else if(f & (1<<25)){ /* SSE is present */
+ Cpu_mode = SSE;
+ } else if(f & (1<<23)){ /* MMX is present */
+ Cpu_mode = MMX;
+ } else { /* No SIMD at all */
+ Cpu_mode = PORT;
+ }
+ fprintf(stderr,"SIMD CPU detect: %s\n",Cpu_modes[Cpu_mode]);
+}
diff --git a/decode_rs.c b/decode_rs.c
new file mode 100644
index 0000000..d7f97b3
--- /dev/null
+++ b/decode_rs.c
@@ -0,0 +1,262 @@
+/* Reed-Solomon decoder
+ * Copyright 2002 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include <string.h>
+
+#define NULL ((void *)0)
+#define min(a,b) ((a) < (b) ? (a) : (b))
+
+#ifdef FIXED
+#include "fixed.h"
+#elif defined(BIGSYM)
+#include "int.h"
+#else
+#include "char.h"
+#endif
+
+int DECODE_RS(
+#ifdef FIXED
+data_t *data, int *eras_pos, int no_eras,int pad){
+#else
+void *p,data_t *data, int *eras_pos, int no_eras){
+ struct rs *rs = (struct rs *)p;
+#endif
+ int deg_lambda, el, deg_omega;
+ int i, j, r,k;
+ data_t u,q,tmp,num1,num2,den,discr_r;
+ data_t lambda[NROOTS+1], s[NROOTS]; /* Err+Eras Locator poly
+ * and syndrome poly */
+ data_t b[NROOTS+1], t[NROOTS+1], omega[NROOTS+1];
+ data_t root[NROOTS], reg[NROOTS+1], loc[NROOTS];
+ int syn_error, count;
+
+#ifdef FIXED
+ /* Check pad parameter for validity */
+ if(pad < 0 || pad >= NN)
+ return -1;
+#endif
+
+ /* form the syndromes; i.e., evaluate data(x) at roots of g(x) */
+ for(i=0;i<NROOTS;i++)
+ s[i] = data[0];
+
+ for(j=1;j<NN-PAD;j++){
+ for(i=0;i<NROOTS;i++){
+ if(s[i] == 0){
+ s[i] = data[j];
+ } else {
+ s[i] = data[j] ^ ALPHA_TO[MODNN(INDEX_OF[s[i]] + (FCR+i)*PRIM)];
+ }
+ }
+ }
+
+ /* Convert syndromes to index form, checking for nonzero condition */
+ syn_error = 0;
+ for(i=0;i<NROOTS;i++){
+ syn_error |= s[i];
+ s[i] = INDEX_OF[s[i]];
+ }
+
+ if (!syn_error) {
+ /* if syndrome is zero, data[] is a codeword and there are no
+ * errors to correct. So return data[] unmodified
+ */
+ count = 0;
+ goto finish;
+ }
+ memset(&lambda[1],0,NROOTS*sizeof(lambda[0]));
+ lambda[0] = 1;
+
+ if (no_eras > 0) {
+ /* Init lambda to be the erasure locator polynomial */
+ lambda[1] = ALPHA_TO[MODNN(PRIM*(NN-1-eras_pos[0]))];
+ for (i = 1; i < no_eras; i++) {
+ u = MODNN(PRIM*(NN-1-eras_pos[i]));
+ for (j = i+1; j > 0; j--) {
+ tmp = INDEX_OF[lambda[j - 1]];
+ if(tmp != A0)
+ lambda[j] ^= ALPHA_TO[MODNN(u + tmp)];
+ }
+ }
+
+#if DEBUG >= 1
+ /* Test code that verifies the erasure locator polynomial just constructed
+ Needed only for decoder debugging. */
+
+ /* find roots of the erasure location polynomial */
+ for(i=1;i<=no_eras;i++)
+ reg[i] = INDEX_OF[lambda[i]];
+
+ count = 0;
+ for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) {
+ q = 1;
+ for (j = 1; j <= no_eras; j++)
+ if (reg[j] != A0) {
+ reg[j] = MODNN(reg[j] + j);
+ q ^= ALPHA_TO[reg[j]];
+ }
+ if (q != 0)
+ continue;
+ /* store root and error location number indices */
+ root[count] = i;
+ loc[count] = k;
+ count++;
+ }
+ if (count != no_eras) {
+ printf("count = %d no_eras = %d\n lambda(x) is WRONG\n",count,no_eras);
+ count = -1;
+ goto finish;
+ }
+#if DEBUG >= 2
+ printf("\n Erasure positions as determined by roots of Eras Loc Poly:\n");
+ for (i = 0; i < count; i++)
+ printf("%d ", loc[i]);
+ printf("\n");
+#endif
+#endif
+ }
+ for(i=0;i<NROOTS+1;i++)
+ b[i] = INDEX_OF[lambda[i]];
+
+ /*
+ * Begin Berlekamp-Massey algorithm to determine error+erasure
+ * locator polynomial
+ */
+ r = no_eras;
+ el = no_eras;
+ while (++r <= NROOTS) { /* r is the step number */
+ /* Compute discrepancy at the r-th step in poly-form */
+ discr_r = 0;
+ for (i = 0; i < r; i++){
+ if ((lambda[i] != 0) && (s[r-i-1] != A0)) {
+ discr_r ^= ALPHA_TO[MODNN(INDEX_OF[lambda[i]] + s[r-i-1])];
+ }
+ }
+ discr_r = INDEX_OF[discr_r]; /* Index form */
+ if (discr_r == A0) {
+ /* 2 lines below: B(x) <-- x*B(x) */
+ memmove(&b[1],b,NROOTS*sizeof(b[0]));
+ b[0] = A0;
+ } else {
+ /* 7 lines below: T(x) <-- lambda(x) - discr_r*x*b(x) */
+ t[0] = lambda[0];
+ for (i = 0 ; i < NROOTS; i++) {
+ if(b[i] != A0)
+ t[i+1] = lambda[i+1] ^ ALPHA_TO[MODNN(discr_r + b[i])];
+ else
+ t[i+1] = lambda[i+1];
+ }
+ if (2 * el <= r + no_eras - 1) {
+ el = r + no_eras - el;
+ /*
+ * 2 lines below: B(x) <-- inv(discr_r) *
+ * lambda(x)
+ */
+ for (i = 0; i <= NROOTS; i++)
+ b[i] = (lambda[i] == 0) ? A0 : MODNN(INDEX_OF[lambda[i]] - discr_r + NN);
+ } else {
+ /* 2 lines below: B(x) <-- x*B(x) */
+ memmove(&b[1],b,NROOTS*sizeof(b[0]));
+ b[0] = A0;
+ }
+ memcpy(lambda,t,(NROOTS+1)*sizeof(t[0]));
+ }
+ }
+
+ /* Convert lambda to index form and compute deg(lambda(x)) */
+ deg_lambda = 0;
+ for(i=0;i<NROOTS+1;i++){
+ lambda[i] = INDEX_OF[lambda[i]];
+ if(lambda[i] != A0)
+ deg_lambda = i;
+ }
+ /* Find roots of the error+erasure locator polynomial by Chien search */
+ memcpy(&reg[1],&lambda[1],NROOTS*sizeof(reg[0]));
+ count = 0; /* Number of roots of lambda(x) */
+ for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) {
+ q = 1; /* lambda[0] is always 0 */
+ for (j = deg_lambda; j > 0; j--){
+ if (reg[j] != A0) {
+ reg[j] = MODNN(reg[j] + j);
+ q ^= ALPHA_TO[reg[j]];
+ }
+ }
+ if (q != 0)
+ continue; /* Not a root */
+ /* store root (index-form) and error location number */
+#if DEBUG>=2
+ printf("count %d root %d loc %d\n",count,i,k);
+#endif
+ root[count] = i;
+ loc[count] = k;
+ /* If we've already found max possible roots,
+ * abort the search to save time
+ */
+ if(++count == deg_lambda)
+ break;
+ }
+ if (deg_lambda != count) {
+ /*
+ * deg(lambda) unequal to number of roots => uncorrectable
+ * error detected
+ */
+ count = -1;
+ goto finish;
+ }
+ /*
+ * Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo
+ * x**NROOTS). in index form. Also find deg(omega).
+ */
+ deg_omega = deg_lambda-1;
+ for (i = 0; i <= deg_omega;i++){
+ tmp = 0;
+ for(j=i;j >= 0; j--){
+ if ((s[i - j] != A0) && (lambda[j] != A0))
+ tmp ^= ALPHA_TO[MODNN(s[i - j] + lambda[j])];
+ }
+ omega[i] = INDEX_OF[tmp];
+ }
+
+ /*
+ * Compute error values in poly-form. num1 = omega(inv(X(l))), num2 =
+ * inv(X(l))**(FCR-1) and den = lambda_pr(inv(X(l))) all in poly-form
+ */
+ for (j = count-1; j >=0; j--) {
+ num1 = 0;
+ for (i = deg_omega; i >= 0; i--) {
+ if (omega[i] != A0)
+ num1 ^= ALPHA_TO[MODNN(omega[i] + i * root[j])];
+ }
+ num2 = ALPHA_TO[MODNN(root[j] * (FCR - 1) + NN)];
+ den = 0;
+
+ /* lambda[i+1] for i even is the formal derivative lambda_pr of lambda[i] */
+ for (i = min(deg_lambda,NROOTS-1) & ~1; i >= 0; i -=2) {
+ if(lambda[i+1] != A0)
+ den ^= ALPHA_TO[MODNN(lambda[i+1] + i * root[j])];
+ }
+#if DEBUG >= 1
+ if (den == 0) {
+ printf("\n ERROR: denominator = 0\n");
+ count = -1;
+ goto finish;
+ }
+#endif
+ /* Apply error to data */
+ if (num1 != 0 && loc[j] >= PAD) {
+ data[loc[j]-PAD] ^= ALPHA_TO[MODNN(INDEX_OF[num1] + INDEX_OF[num2] + NN - INDEX_OF[den])];
+ }
+ }
+ finish:
+ if(eras_pos != NULL){
+ for(i=0;i<count;i++)
+ eras_pos[i] = loc[i];
+ }
+ return count;
+}
diff --git a/decode_rs.h b/decode_rs.h
new file mode 100644
index 0000000..c165cf3
--- /dev/null
+++ b/decode_rs.h
@@ -0,0 +1,298 @@
+/* The guts of the Reed-Solomon decoder, meant to be #included
+ * into a function body with the following typedefs, macros and variables supplied
+ * according to the code parameters:
+
+ * data_t - a typedef for the data symbol
+ * data_t data[] - array of NN data and parity symbols to be corrected in place
+ * retval - an integer lvalue into which the decoder's return code is written
+ * NROOTS - the number of roots in the RS code generator polynomial,
+ * which is the same as the number of parity symbols in a block.
+ Integer variable or literal.
+ * NN - the total number of symbols in a RS block. Integer variable or literal.
+ * PAD - the number of pad symbols in a block. Integer variable or literal.
+ * ALPHA_TO - The address of an array of NN elements to convert Galois field
+ * elements in index (log) form to polynomial form. Read only.
+ * INDEX_OF - The address of an array of NN elements to convert Galois field
+ * elements in polynomial form to index (log) form. Read only.
+ * MODNN - a function to reduce its argument modulo NN. May be inline or a macro.
+ * FCR - An integer literal or variable specifying the first consecutive root of the
+ * Reed-Solomon generator polynomial. Integer variable or literal.
+ * PRIM - The primitive root of the generator poly. Integer variable or literal.
+ * DEBUG - If set to 1 or more, do various internal consistency checking. Leave this
+ * undefined for production code
+
+ * The memset(), memmove(), and memcpy() functions are used. The appropriate header
+ * file declaring these functions (usually <string.h>) must be included by the calling
+ * program.
+ */
+
+
+#if !defined(NROOTS)
+#error "NROOTS not defined"
+#endif
+
+#if !defined(NN)
+#error "NN not defined"
+#endif
+
+#if !defined(PAD)
+#error "PAD not defined"
+#endif
+
+#if !defined(ALPHA_TO)
+#error "ALPHA_TO not defined"
+#endif
+
+#if !defined(INDEX_OF)
+#error "INDEX_OF not defined"
+#endif
+
+#if !defined(MODNN)
+#error "MODNN not defined"
+#endif
+
+#if !defined(FCR)
+#error "FCR not defined"
+#endif
+
+#if !defined(PRIM)
+#error "PRIM not defined"
+#endif
+
+#if !defined(NULL)
+#define NULL ((void *)0)
+#endif
+
+#undef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#undef A0
+#define A0 (NN)
+
+{
+ int deg_lambda, el, deg_omega;
+ int i, j, r,k;
+ data_t u,q,tmp,num1,num2,den,discr_r;
+ data_t lambda[NROOTS+1], s[NROOTS]; /* Err+Eras Locator poly
+ * and syndrome poly */
+ data_t b[NROOTS+1], t[NROOTS+1], omega[NROOTS+1];
+ data_t root[NROOTS], reg[NROOTS+1], loc[NROOTS];
+ int syn_error, count;
+
+ /* form the syndromes; i.e., evaluate data(x) at roots of g(x) */
+ for(i=0;i<NROOTS;i++)
+ s[i] = data[0];
+
+ for(j=1;j<NN-PAD;j++){
+ for(i=0;i<NROOTS;i++){
+ if(s[i] == 0){
+ s[i] = data[j];
+ } else {
+ s[i] = data[j] ^ ALPHA_TO[MODNN(INDEX_OF[s[i]] + (FCR+i)*PRIM)];
+ }
+ }
+ }
+
+ /* Convert syndromes to index form, checking for nonzero condition */
+ syn_error = 0;
+ for(i=0;i<NROOTS;i++){
+ syn_error |= s[i];
+ s[i] = INDEX_OF[s[i]];
+ }
+
+ if (!syn_error) {
+ /* if syndrome is zero, data[] is a codeword and there are no
+ * errors to correct. So return data[] unmodified
+ */
+ count = 0;
+ goto finish;
+ }
+ memset(&lambda[1],0,NROOTS*sizeof(lambda[0]));
+ lambda[0] = 1;
+
+ if (no_eras > 0) {
+ /* Init lambda to be the erasure locator polynomial */
+ lambda[1] = ALPHA_TO[MODNN(PRIM*(NN-1-eras_pos[0]))];
+ for (i = 1; i < no_eras; i++) {
+ u = MODNN(PRIM*(NN-1-eras_pos[i]));
+ for (j = i+1; j > 0; j--) {
+ tmp = INDEX_OF[lambda[j - 1]];
+ if(tmp != A0)
+ lambda[j] ^= ALPHA_TO[MODNN(u + tmp)];
+ }
+ }
+
+#if DEBUG >= 1
+ /* Test code that verifies the erasure locator polynomial just constructed
+ Needed only for decoder debugging. */
+
+ /* find roots of the erasure location polynomial */
+ for(i=1;i<=no_eras;i++)
+ reg[i] = INDEX_OF[lambda[i]];
+
+ count = 0;
+ for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) {
+ q = 1;
+ for (j = 1; j <= no_eras; j++)
+ if (reg[j] != A0) {
+ reg[j] = MODNN(reg[j] + j);
+ q ^= ALPHA_TO[reg[j]];
+ }
+ if (q != 0)
+ continue;
+ /* store root and error location number indices */
+ root[count] = i;
+ loc[count] = k;
+ count++;
+ }
+ if (count != no_eras) {
+ printf("count = %d no_eras = %d\n lambda(x) is WRONG\n",count,no_eras);
+ count = -1;
+ goto finish;
+ }
+#if DEBUG >= 2
+ printf("\n Erasure positions as determined by roots of Eras Loc Poly:\n");
+ for (i = 0; i < count; i++)
+ printf("%d ", loc[i]);
+ printf("\n");
+#endif
+#endif
+ }
+ for(i=0;i<NROOTS+1;i++)
+ b[i] = INDEX_OF[lambda[i]];
+
+ /*
+ * Begin Berlekamp-Massey algorithm to determine error+erasure
+ * locator polynomial
+ */
+ r = no_eras;
+ el = no_eras;
+ while (++r <= NROOTS) { /* r is the step number */
+ /* Compute discrepancy at the r-th step in poly-form */
+ discr_r = 0;
+ for (i = 0; i < r; i++){
+ if ((lambda[i] != 0) && (s[r-i-1] != A0)) {
+ discr_r ^= ALPHA_TO[MODNN(INDEX_OF[lambda[i]] + s[r-i-1])];
+ }
+ }
+ discr_r = INDEX_OF[discr_r]; /* Index form */
+ if (discr_r == A0) {
+ /* 2 lines below: B(x) <-- x*B(x) */
+ memmove(&b[1],b,NROOTS*sizeof(b[0]));
+ b[0] = A0;
+ } else {
+ /* 7 lines below: T(x) <-- lambda(x) - discr_r*x*b(x) */
+ t[0] = lambda[0];
+ for (i = 0 ; i < NROOTS; i++) {
+ if(b[i] != A0)
+ t[i+1] = lambda[i+1] ^ ALPHA_TO[MODNN(discr_r + b[i])];
+ else
+ t[i+1] = lambda[i+1];
+ }
+ if (2 * el <= r + no_eras - 1) {
+ el = r + no_eras - el;
+ /*
+ * 2 lines below: B(x) <-- inv(discr_r) *
+ * lambda(x)
+ */
+ for (i = 0; i <= NROOTS; i++)
+ b[i] = (lambda[i] == 0) ? A0 : MODNN(INDEX_OF[lambda[i]] - discr_r + NN);
+ } else {
+ /* 2 lines below: B(x) <-- x*B(x) */
+ memmove(&b[1],b,NROOTS*sizeof(b[0]));
+ b[0] = A0;
+ }
+ memcpy(lambda,t,(NROOTS+1)*sizeof(t[0]));
+ }
+ }
+
+ /* Convert lambda to index form and compute deg(lambda(x)) */
+ deg_lambda = 0;
+ for(i=0;i<NROOTS+1;i++){
+ lambda[i] = INDEX_OF[lambda[i]];
+ if(lambda[i] != A0)
+ deg_lambda = i;
+ }
+ /* Find roots of the error+erasure locator polynomial by Chien search */
+ memcpy(&reg[1],&lambda[1],NROOTS*sizeof(reg[0]));
+ count = 0; /* Number of roots of lambda(x) */
+ for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) {
+ q = 1; /* lambda[0] is always 0 */
+ for (j = deg_lambda; j > 0; j--){
+ if (reg[j] != A0) {
+ reg[j] = MODNN(reg[j] + j);
+ q ^= ALPHA_TO[reg[j]];
+ }
+ }
+ if (q != 0)
+ continue; /* Not a root */
+ /* store root (index-form) and error location number */
+#if DEBUG>=2
+ printf("count %d root %d loc %d\n",count,i,k);
+#endif
+ root[count] = i;
+ loc[count] = k;
+ /* If we've already found max possible roots,
+ * abort the search to save time
+ */
+ if(++count == deg_lambda)
+ break;
+ }
+ if (deg_lambda != count) {
+ /*
+ * deg(lambda) unequal to number of roots => uncorrectable
+ * error detected
+ */
+ count = -1;
+ goto finish;
+ }
+ /*
+ * Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo
+ * x**NROOTS). in index form. Also find deg(omega).
+ */
+ deg_omega = deg_lambda-1;
+ for (i = 0; i <= deg_omega;i++){
+ tmp = 0;
+ for(j=i;j >= 0; j--){
+ if ((s[i - j] != A0) && (lambda[j] != A0))
+ tmp ^= ALPHA_TO[MODNN(s[i - j] + lambda[j])];
+ }
+ omega[i] = INDEX_OF[tmp];
+ }
+
+ /*
+ * Compute error values in poly-form. num1 = omega(inv(X(l))), num2 =
+ * inv(X(l))**(FCR-1) and den = lambda_pr(inv(X(l))) all in poly-form
+ */
+ for (j = count-1; j >=0; j--) {
+ num1 = 0;
+ for (i = deg_omega; i >= 0; i--) {
+ if (omega[i] != A0)
+ num1 ^= ALPHA_TO[MODNN(omega[i] + i * root[j])];
+ }
+ num2 = ALPHA_TO[MODNN(root[j] * (FCR - 1) + NN)];
+ den = 0;
+
+ /* lambda[i+1] for i even is the formal derivative lambda_pr of lambda[i] */
+ for (i = MIN(deg_lambda,NROOTS-1) & ~1; i >= 0; i -=2) {
+ if(lambda[i+1] != A0)
+ den ^= ALPHA_TO[MODNN(lambda[i+1] + i * root[j])];
+ }
+#if DEBUG >= 1
+ if (den == 0) {
+ printf("\n ERROR: denominator = 0\n");
+ count = -1;
+ goto finish;
+ }
+#endif
+ /* Apply error to data */
+ if (num1 != 0 && loc[j] >= PAD) {
+ data[loc[j]-PAD] ^= ALPHA_TO[MODNN(INDEX_OF[num1] + INDEX_OF[num2] + NN - INDEX_OF[den])];
+ }
+ }
+ finish:
+ if(eras_pos != NULL){
+ for(i=0;i<count;i++)
+ eras_pos[i] = loc[i];
+ }
+ retval = count;
+}
diff --git a/decode_rs_8.c b/decode_rs_8.c
new file mode 100644
index 0000000..995b0d9
--- /dev/null
+++ b/decode_rs_8.c
@@ -0,0 +1,24 @@
+/* General purpose Reed-Solomon decoder for 8-bit symbols or less
+ * Copyright 2003 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include <string.h>
+
+#include "fixed.h"
+
+int decode_rs_8(data_t *data, int *eras_pos, int no_eras, int pad){
+ int retval;
+
+ if(pad < 0 || pad > 222){
+ return -1;
+ }
+
+#include "decode_rs.h"
+
+ return retval;
+}
diff --git a/decode_rs_ccsds.c b/decode_rs_ccsds.c
new file mode 100644
index 0000000..0e246b4
--- /dev/null
+++ b/decode_rs_ccsds.c
@@ -0,0 +1,26 @@
+/* This function wraps around the fixed 8-bit decoder, performing the
+ * basis transformations necessary to meet the CCSDS standard
+ *
+ * Copyright 2002, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include "ccsds.h"
+#include "fec.h"
+
+int decode_rs_ccsds(data_t *data,int *eras_pos,int no_eras,int pad){
+ int i,r;
+ data_t cdata[NN];
+
+ /* Convert data from dual basis to conventional */
+ for(i=0;i<NN-pad;i++)
+ cdata[i] = Tal1tab[data[i]];
+
+ r = decode_rs_8(cdata,eras_pos,no_eras,pad);
+
+ if(r > 0){
+ /* Convert from conventional to dual basis */
+ for(i=0;i<NN-pad;i++)
+ data[i] = Taltab[cdata[i]];
+ }
+ return r;
+}
diff --git a/decode_rs_char.c b/decode_rs_char.c
new file mode 100644
index 0000000..7105233
--- /dev/null
+++ b/decode_rs_char.c
@@ -0,0 +1,22 @@
+/* General purpose Reed-Solomon decoder for 8-bit symbols or less
+ * Copyright 2003 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include <string.h>
+
+#include "char.h"
+#include "rs-common.h"
+
+int decode_rs_char(void *p, data_t *data, int *eras_pos, int no_eras){
+ int retval;
+ struct rs *rs = (struct rs *)p;
+
+#include "decode_rs.h"
+
+ return retval;
+}
diff --git a/decode_rs_int.c b/decode_rs_int.c
new file mode 100644
index 0000000..1ef1a1f
--- /dev/null
+++ b/decode_rs_int.c
@@ -0,0 +1,22 @@
+/* General purpose Reed-Solomon decoder
+ * Copyright 2003 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include <string.h>
+
+#include "int.h"
+#include "rs-common.h"
+
+int decode_rs_int(void *p, data_t *data, int *eras_pos, int no_eras){
+ int retval;
+ struct rs *rs = (struct rs *)p;
+
+#include "decode_rs.h"
+
+ return retval;
+}
diff --git a/dotprod.c b/dotprod.c
new file mode 100644
index 0000000..b3be913
--- /dev/null
+++ b/dotprod.c
@@ -0,0 +1,94 @@
+/* 16-bit signed integer dot product
+ * Switch to appropriate versions
+ * Copyright 2004 Phil Karn
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdlib.h>
+#include "fec.h"
+
+void *initdp_port(signed short coeffs[],int len);
+long dotprod_port(void *p,signed short *b);
+void freedp_port(void *p);
+
+#ifdef __i386__
+void *initdp_mmx(signed short coeffs[],int len);
+void *initdp_sse2(signed short coeffs[],int len);
+long dotprod_mmx(void *p,signed short *b);
+long dotprod_sse2(void *p,signed short *b);
+void freedp_mmx(void *p);
+void freedp_sse2(void *p);
+#endif
+
+#ifdef __VEC__
+void *initdp_av(signed short coeffs[],int len);
+long dotprod_av(void *p,signed short *b);
+void freedp_av(void *p);
+#endif
+
+/* Create and return a descriptor for use with the dot product function */
+void *initdp(signed short coeffs[],int len){
+ find_cpu_mode();
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return initdp_port(coeffs,len);
+#ifdef __i386__
+ case MMX:
+ case SSE:
+ return initdp_mmx(coeffs,len);
+ case SSE2:
+ return initdp_sse2(coeffs,len);
+#endif
+
+#ifdef __VEC__
+ case ALTIVEC:
+ return initdp_av(coeffs,len);
+#endif
+ }
+}
+
+
+/* Free a dot product descriptor created earlier */
+void freedp(void *p){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+#ifdef __i386__
+ case MMX:
+ case SSE:
+ return freedp_mmx(p);
+ case SSE2:
+ return freedp_sse2(p);
+#endif
+#ifdef __VEC__
+ case ALTIVEC:
+ return freedp_av(p);
+#endif
+ }
+}
+
+/* Compute a dot product given a descriptor and an input array
+ * The length is taken from the descriptor
+ */
+long dotprod(void *p,signed short a[]){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return dotprod_port(p,a);
+#ifdef __i386__
+ case MMX:
+ case SSE:
+ return dotprod_mmx(p,a);
+ case SSE2:
+ return dotprod_sse2(p,a);
+#endif
+
+#ifdef __VEC__
+ case ALTIVEC:
+ return dotprod_av(p,a);
+#endif
+ }
+}
+
+
diff --git a/dotprod.h b/dotprod.h
new file mode 100644
index 0000000..6b62b70
--- /dev/null
+++ b/dotprod.h
@@ -0,0 +1,15 @@
+/* Internal definitions for dotproduct function */
+
+struct dotprod {
+ int len; /* Number of coefficients */
+
+ /* On a MMX or SSE machine, these hold 4 copies of the coefficients,
+ * preshifted by 0,1,2,3 words to meet all possible input data
+ * alignments (see Intel ap559 on MMX dot products).
+ *
+ * SSE2 is similar, but with 8 words at a time
+ *
+ * On a non-MMX machine, only one copy is present
+ */
+ signed short *coeffs[8];
+};
diff --git a/dotprod_av.c b/dotprod_av.c
new file mode 100644
index 0000000..1f70471
--- /dev/null
+++ b/dotprod_av.c
@@ -0,0 +1,93 @@
+/* 16-bit signed integer dot product
+ * Altivec-assisted version
+ * Copyright 2004 Phil Karn
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdlib.h>
+#include "fec.h"
+
+struct dotprod {
+ int len; /* Number of coefficients */
+
+ /* On an Altivec machine, these hold 8 copies of the coefficients,
+ * preshifted by 0,1,..7 words to meet all possible input data
+ */
+ signed short *coeffs[8];
+};
+
+/* Create and return a descriptor for use with the dot product function */
+void *initdp_av(signed short coeffs[],int len){
+ struct dotprod *dp;
+ int i,j;
+
+ if(len == 0)
+ return NULL;
+
+ dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
+ dp->len = len;
+
+ /* Make 8 copies of coefficients, one for each data alignment,
+ * each aligned to 16-byte boundary
+ */
+ for(i=0;i<8;i++){
+ dp->coeffs[i] = calloc(1+(len+i-1)/8,sizeof(vector signed short));
+ for(j=0;j<len;j++)
+ dp->coeffs[i][j+i] = coeffs[j];
+ }
+ return (void *)dp;
+}
+
+
+/* Free a dot product descriptor created earlier */
+void freedp_av(void *p){
+ struct dotprod *dp = (struct dotprod *)p;
+ int i;
+
+ for(i=0;i<8;i++)
+ if(dp->coeffs[i] != NULL)
+ free(dp->coeffs[i]);
+ free(dp);
+}
+
+/* Compute a dot product given a descriptor and an input array
+ * The length is taken from the descriptor
+ */
+long dotprod_av(void *p,signed short a[]){
+ struct dotprod *dp = (struct dotprod *)p;
+ int al;
+ vector signed short *ar,*d;
+ vector signed int sums0,sums1,sums2,sums3;
+ union { vector signed int v; signed int w[4];} s;
+ int nblocks;
+
+ /* round ar down to beginning of 16-byte block containing 0th element of
+ * input buffer. Then set d to one of 8 sets of shifted coefficients
+ */
+ ar = (vector signed short *)((int)a & ~15);
+ al = ((int)a & 15)/sizeof(signed short);
+ d = (vector signed short *)dp->coeffs[al];
+
+ nblocks = (dp->len+al-1)/8+1;
+
+ /* Sum into four vectors each holding four 32-bit partial sums */
+ sums3 = sums2 = sums1 = sums0 = (vector signed int)(0);
+ while(nblocks >= 4){
+ sums0 = vec_msums(ar[nblocks-1],d[nblocks-1],sums0);
+ sums1 = vec_msums(ar[nblocks-2],d[nblocks-2],sums1);
+ sums2 = vec_msums(ar[nblocks-3],d[nblocks-3],sums2);
+ sums3 = vec_msums(ar[nblocks-4],d[nblocks-4],sums3);
+ nblocks -= 4;
+ }
+ sums0 = vec_adds(sums0,sums1);
+ sums2 = vec_adds(sums2,sums3);
+ sums0 = vec_adds(sums0,sums2);
+ while(nblocks-- > 0){
+ sums0 = vec_msums(ar[nblocks],d[nblocks],sums0);
+ }
+ /* Sum 4 partial sums into final result */
+ s.v = vec_sums(sums0,(vector signed int)(0));
+
+ return s.w[3];
+}
+
+
diff --git a/dotprod_mmx.c b/dotprod_mmx.c
new file mode 100644
index 0000000..c516afe
--- /dev/null
+++ b/dotprod_mmx.c
@@ -0,0 +1,81 @@
+/* 16-bit signed integer dot product
+ * MMX assisted version; also for SSE
+ *
+ * Copyright 2004 Phil Karn
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdlib.h>
+#include "fec.h"
+
+struct dotprod {
+ int len; /* Number of coefficients */
+
+ /* On a MMX or SSE machine, these hold 4 copies of the coefficients,
+ * preshifted by 0,1,2,3 words to meet all possible input data
+ * alignments (see Intel ap559 on MMX dot products).
+ */
+ signed short *coeffs[4];
+};
+long dotprod_mmx_assist(signed short *a,signed short *b,int cnt);
+
+/* Create and return a descriptor for use with the dot product function */
+void *initdp_mmx(signed short coeffs[],int len){
+ struct dotprod *dp;
+ int i,j;
+
+
+ if(len == 0)
+ return NULL;
+
+ dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
+ dp->len = len;
+
+ /* Make 4 copies of coefficients, one for each data alignment */
+ for(i=0;i<4;i++){
+ dp->coeffs[i] = (signed short *)calloc(1+(len+i-1)/4,
+ 4*sizeof(signed short));
+ for(j=0;j<len;j++)
+ dp->coeffs[i][j+i] = coeffs[j];
+ }
+ return (void *)dp;
+}
+
+
+/* Free a dot product descriptor created earlier */
+void freedp_mmx(void *p){
+ struct dotprod *dp = (struct dotprod *)p;
+ int i;
+
+ for(i=0;i<4;i++)
+ if(dp->coeffs[i] != NULL)
+ free(dp->coeffs[i]);
+ free(dp);
+}
+
+/* Compute a dot product given a descriptor and an input array
+ * The length is taken from the descriptor
+ */
+long dotprod_mmx(void *p,signed short a[]){
+ struct dotprod *dp = (struct dotprod *)p;
+ int al;
+ signed short *ar;
+
+ /* Round input data address down to 8 byte boundary
+ * NB: depending on the alignment of a[], memory
+ * before a[] will be accessed. The contents don't matter since they'll
+ * be multiplied by zero coefficients. I can't conceive of any
+ * situation where this could cause a segfault since memory protection
+ * in the x86 machines is done on much larger boundaries
+ */
+ ar = (signed short *)((int)a & ~7);
+
+ /* Choose one of 4 sets of pre-shifted coefficients. al is both the
+ * index into dp->coeffs[] and the number of 0 words padded onto
+ * that coefficients array for alignment purposes
+ */
+ al = a - ar;
+
+ /* Call assembler routine to do the work, passing number of 4-word blocks */
+ return dotprod_mmx_assist(ar,dp->coeffs[al],(dp->len+al-1)/4+1);
+}
+
diff --git a/dotprod_mmx_assist.s b/dotprod_mmx_assist.s
new file mode 100644
index 0000000..25deffd
--- /dev/null
+++ b/dotprod_mmx_assist.s
@@ -0,0 +1,83 @@
+# SIMD MMX dot product
+# Equivalent to the following C code:
+# long dotprod(signed short *a,signed short *b,int cnt)
+# {
+# long sum = 0;
+# cnt *= 4;
+# while(cnt--)
+# sum += *a++ + *b++;
+# return sum;
+# }
+# a and b should also be 64-bit aligned, or speed will suffer greatly
+# Copyright 1999, Phil Karn KA9Q
+# May be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ .text
+ .global dotprod_mmx_assist
+ .type dotprod_mmx_assist,@function
+dotprod_mmx_assist:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %ecx
+ pushl %ebx
+ movl 8(%ebp),%esi # a
+ movl 12(%ebp),%edi # b
+ movl 16(%ebp),%ecx # cnt
+ pxor %mm0,%mm0 # clear running sum (in two 32-bit halves)
+
+# MMX dot product loop unrolled 4 times, crunching 16 terms per loop
+ .align 16
+.Loop1: subl $4,%ecx
+ jl .Loop1Done
+
+ movq (%esi),%mm1 # mm1 = a[3],a[2],a[1],a[0]
+ pmaddwd (%edi),%mm1 # mm1 = b[3]*a[3]+b[2]*a[2],b[1]*a[1]+b[0]*a[0]
+ paddd %mm1,%mm0
+
+ movq 8(%esi),%mm1
+ pmaddwd 8(%edi),%mm1
+ paddd %mm1,%mm0
+
+ movq 16(%esi),%mm1
+ pmaddwd 16(%edi),%mm1
+ paddd %mm1,%mm0
+
+ movq 24(%esi),%mm1
+ addl $32,%esi
+ pmaddwd 24(%edi),%mm1
+ addl $32,%edi
+ paddd %mm1,%mm0
+
+ jmp .Loop1
+.Loop1Done:
+
+ addl $4,%ecx
+
+# MMX dot product loop, not unrolled, crunching 4 terms per loop
+# This could be redone as Duff's Device on the unrolled loop above
+.Loop2: subl $1,%ecx
+ jl .Loop2Done
+
+ movq (%esi),%mm1
+ addl $8,%esi
+ pmaddwd (%edi),%mm1
+ addl $8,%edi
+ paddd %mm1,%mm0
+ jmp .Loop2
+.Loop2Done:
+
+ movd %mm0,%ebx # right-hand word to ebx
+ punpckhdq %mm0,%mm0 # left-hand word to right side of %mm0
+ movd %mm0,%eax
+ addl %ebx,%eax # running sum now in %eax
+ emms # done with MMX
+
+ popl %ebx
+ popl %ecx
+ popl %edi
+ popl %esi
+ movl %ebp,%esp
+ popl %ebp
+ ret
diff --git a/dotprod_port.c b/dotprod_port.c
new file mode 100644
index 0000000..ef635ec
--- /dev/null
+++ b/dotprod_port.c
@@ -0,0 +1,58 @@
+/* 16-bit signed integer dot product
+ * Portable C version
+ * Copyright 2004 Phil Karn
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdlib.h>
+#include "fec.h"
+
+struct dotprod {
+ int len; /* Number of coefficients */
+
+ signed short *coeffs;
+};
+
+/* Create and return a descriptor for use with the dot product function */
+void *initdp_port(signed short coeffs[],int len){
+ struct dotprod *dp;
+ int j;
+
+ if(len == 0)
+ return NULL;
+
+ dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
+ dp->len = len;
+
+ /* Just one copy of the coefficients for the C version */
+ dp->coeffs = (signed short *)calloc(len,sizeof(signed short));
+ for(j=0;j<len;j++)
+ dp->coeffs[j] = coeffs[j];
+ return (void *)dp;
+}
+
+
+/* Free a dot product descriptor created earlier */
+void freedp_port(void *p){
+ struct dotprod *dp = (struct dotprod *)p;
+
+ if(dp->coeffs != NULL)
+ free(dp->coeffs);
+ free(dp);
+}
+
+/* Compute a dot product given a descriptor and an input array
+ * The length is taken from the descriptor
+ */
+long dotprod_port(void *p,signed short a[]){
+ struct dotprod *dp = (struct dotprod *)p;
+ long corr;
+ int i;
+
+ corr = 0;
+ for(i=0;i<dp->len;i++){
+ corr += (long)a[i] * dp->coeffs[i];
+ }
+ return corr;
+}
+
+
diff --git a/dotprod_sse2.c b/dotprod_sse2.c
new file mode 100644
index 0000000..1fddd18
--- /dev/null
+++ b/dotprod_sse2.c
@@ -0,0 +1,72 @@
+/* 16-bit signed integer dot product
+ * SSE2 version
+ * Copyright 2004 Phil Karn
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#define _XOPEN_SOURCE 600
+#include <stdlib.h>
+#include <memory.h>
+#include "fec.h"
+
+struct dotprod {
+ int len; /* Number of coefficients */
+
+ /* On a SSE2 machine, these hold 8 copies of the coefficients,
+ * preshifted by 0,1,..7 words to meet all possible input data
+ * alignments (see Intel ap559 on MMX dot products).
+ */
+ signed short *coeffs[8];
+};
+
+long dotprod_sse2_assist(signed short *a,signed short *b,int cnt);
+
+/* Create and return a descriptor for use with the dot product function */
+void *initdp_sse2(signed short coeffs[],int len){
+ struct dotprod *dp;
+ int i,j,blksize;
+
+ if(len == 0)
+ return NULL;
+
+ dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
+ dp->len = len;
+
+ /* Make 8 copies of coefficients, one for each data alignment,
+ * each aligned to 16-byte boundary
+ */
+ for(i=0;i<8;i++){
+ blksize = (1+(len+i-1)/8) * 8*sizeof(signed short);
+ posix_memalign((void **)&dp->coeffs[i],16,blksize);
+ memset(dp->coeffs[i],0,blksize);
+ for(j=0;j<len;j++)
+ dp->coeffs[i][j+i] = coeffs[j];
+ }
+ return (void *)dp;
+}
+
+
+/* Free a dot product descriptor created earlier */
+void freedp_sse2(void *p){
+ struct dotprod *dp = (struct dotprod *)p;
+ int i;
+
+ for(i=0;i<8;i++)
+ if(dp->coeffs[i] != NULL)
+ free(dp->coeffs[i]);
+ free(dp);
+}
+
+/* Compute a dot product given a descriptor and an input array
+ * The length is taken from the descriptor
+ */
+long dotprod_sse2(void *p,signed short a[]){
+ struct dotprod *dp = (struct dotprod *)p;
+ int al;
+ signed short *ar;
+
+ ar = (signed short *)((int)a & ~15);
+ al = a - ar;
+
+ /* Call assembler routine to do the work, passing number of 8-word blocks */
+ return dotprod_sse2_assist(ar,dp->coeffs[al],(dp->len+al-1)/8+1);
+}
diff --git a/dotprod_sse2_assist.s b/dotprod_sse2_assist.s
new file mode 100644
index 0000000..47348fa
--- /dev/null
+++ b/dotprod_sse2_assist.s
@@ -0,0 +1,85 @@
+# SIMD SSE2 dot product
+# Equivalent to the following C code:
+# long dotprod(signed short *a,signed short *b,int cnt)
+# {
+# long sum = 0;
+# cnt *= 8;
+# while(cnt--)
+# sum += *a++ + *b++;
+# return sum;
+# }
+# a and b must be 128-bit aligned
+# Copyright 2001, Phil Karn KA9Q
+# May be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ .text
+ .global dotprod_sse2_assist
+ .type dotprod_sse2_assist,@function
+dotprod_sse2_assist:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %ecx
+ pushl %ebx
+ movl 8(%ebp),%esi # a
+ movl 12(%ebp),%edi # b
+ movl 16(%ebp),%ecx # cnt
+ pxor %xmm0,%xmm0 # clear running sum (in two 32-bit halves)
+
+# SSE2 dot product loop unrolled 4 times, crunching 32 terms per loop
+ .align 16
+.Loop1: subl $4,%ecx
+ jl .Loop1Done
+
+ movdqa (%esi),%xmm1
+ pmaddwd (%edi),%xmm1
+ paddd %xmm1,%xmm0
+
+ movdqa 16(%esi),%xmm1
+ pmaddwd 16(%edi),%xmm1
+ paddd %xmm1,%xmm0
+
+ movdqa 32(%esi),%xmm1
+ pmaddwd 32(%edi),%xmm1
+ paddd %xmm1,%xmm0
+
+ movdqa 48(%esi),%xmm1
+ addl $64,%esi
+ pmaddwd 48(%edi),%xmm1
+ addl $64,%edi
+ paddd %xmm1,%xmm0
+
+ jmp .Loop1
+.Loop1Done:
+
+ addl $4,%ecx
+
+# SSE2 dot product loop, not unrolled, crunching 4 terms per loop
+# This could be redone as Duff's Device on the unrolled loop above
+.Loop2: subl $1,%ecx
+ jl .Loop2Done
+
+ movdqa (%esi),%xmm1
+ addl $16,%esi
+ pmaddwd (%edi),%xmm1
+ addl $16,%edi
+ paddd %xmm1,%xmm0
+ jmp .Loop2
+.Loop2Done:
+
+ movdqa %xmm0,%xmm1
+ psrldq $8,%xmm0
+ paddd %xmm1,%xmm0
+ movd %xmm0,%eax # right-hand word to eax
+ psrldq $4,%xmm0
+ movd %xmm0,%ebx
+ addl %ebx,%eax
+
+ popl %ebx
+ popl %ecx
+ popl %edi
+ popl %esi
+ movl %ebp,%esp
+ popl %ebp
+ ret
diff --git a/dsp.3 b/dsp.3
new file mode 100644
index 0000000..e9794da
--- /dev/null
+++ b/dsp.3
@@ -0,0 +1,63 @@
+.TH DSP 3
+.SH NAME
+initdp, freedp, dotprod, sumsq, peakval -\ SIMD-assisted
+digital signal processing primitives
+.SH SYNOPSIS
+.nf
+.ft
+#include "fec.h"
+
+void *initdp(signed short *coeffs,int len);
+long dotprod(void *p,signed short *a);
+void freedp(void *p);
+
+unsigned long long sumsq(signed short *in,int cnt);
+
+int peakval(signed short *b,int cnt);
+
+.SH DESCRIPTION
+These functions provide several basic primitives useful in digital
+signal processing (DSP), especially in modems. The \fBinitdp\fR,
+\fBdotprod\fR and \fBfreedp\fR functions implement an integer dot
+product useful in correlation and filtering operations on signed
+16-bit integers. \fBsumsq\fR computes the sum
+of the squares of an array of signed 16-bit integers,
+useful for measuring the energy of a signal. \fBpeakval\fR returns the
+absolute value of the largest magitude element in the input array,
+useful for scaling a signal's amplitude.
+
+Each function uses IA32 or PowerPC Altivec instructions when
+available; otherwise, a portable C version is used.
+
+.SH USAGE
+To create a FIR filter or correlator, call \fBinitdp\fR with the
+coefficients in \fBcoeff\fR and their number in \fBlen\fR. This
+creates the appropriate data structures and returns a handle.
+
+To compute a dot product, pass the handle from \fBinitdp\fR and the
+input array to \fBdotprod\fR. No length field is needed as the number
+of samples will be taken from the \fBlen\fR parameter originally given
+to \fBinitdp\fR. There must be at least as many samples in the input
+array as there were coefficients passed to \fBinitdp\fR.
+
+When the filter or correlator is no longer needed, the data structures
+may be freed by passing the handle to \fBfreedp\fR.
+
+The user is responsible for scaling the inputs to \fBinitdp\fR and
+\fBdotprod\fR, as the 32-bit result from \fBdotprod\fR will silently
+wrap around in the event of overflow.
+
+To compute the sum of the squares of an array of signed 16-bit
+integers, use sumsq\fR. This returns a 64 bit sum.
+
+\fBpeakval\fR computes the absolute value of each 16-bit element in
+the input array and returns the largest.
+
+.SH RETURN VALUES
+
+\fBinitdp\fR returns a handle that points to a control block, or NULL in
+the event of an error (such as a memory allocation failure). \fBsumsq\fR
+and \fBpeakval\fR have no error returns.
+
+.SH AUTHOR and COPYRIGHT
+Phil Karn, KA9Q (karn@ka9q.net)
diff --git a/dtest.c b/dtest.c
new file mode 100644
index 0000000..394cb03
--- /dev/null
+++ b/dtest.c
@@ -0,0 +1,99 @@
+/* Test dot-product function */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+#include "config.h"
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+#include "fec.h"
+
+#if HAVE_GETOPT_LONG
+struct option Options[] = {
+ {"force-altivec",0,NULL,'a'},
+ {"force-port",0,NULL,'p'},
+ {"force-mmx",0,NULL,'m'},
+ {"force-sse",0,NULL,'s'},
+ {"force-sse2",0,NULL,'t'},
+ {"trials",0,NULL,'n'},
+ {NULL},
+};
+#endif
+
+int main(int argc,char *argv[]){
+ short coeffs[512];
+ short input[2048];
+ int trials=1000,d;
+ int errors = 0;
+
+#if HAVE_GETOPT_LONG
+ while((d = getopt_long(argc,argv,"apmstn:",Options,NULL)) != EOF){
+#else
+ while((d = getopt(argc,argv,"apmstn:")) != EOF){
+#endif
+ switch(d){
+ case 'a':
+ Cpu_mode = ALTIVEC;
+ break;
+ case 'p':
+ Cpu_mode = PORT;
+ break;
+ case 'm':
+ Cpu_mode = MMX;
+ break;
+ case 's':
+ Cpu_mode = SSE;
+ break;
+ case 't':
+ Cpu_mode = SSE2;
+ break;
+ case 'n':
+ trials = atoi(optarg);
+ break;
+ }
+ }
+
+ while(trials--){
+ long port_result;
+ long simd_result;
+ int ntaps;
+ int i;
+ int csum = 0;
+ int offset;
+ void *dp_simd,*dp_port;
+
+ /* Generate set of coefficients
+ * limit sum of absolute values to 32767 to avoid overflow
+ */
+ memset(coeffs,0,sizeof(coeffs));
+ for(i=0;i<512;i++){
+ double gv;
+
+ gv = normal_rand(0.,100.);
+ if(csum + fabs(gv) > 32767)
+ break;
+ coeffs[i] = gv;
+ csum += fabs(gv);
+ }
+ ntaps = i;
+
+ /* Compare results to portable C version for a bunch of random data buffers and offsets */
+ dp_simd = initdp(coeffs,ntaps);
+ dp_port = initdp_port(coeffs,ntaps);
+
+ for(i=0;i<2048;i++)
+ input[i] = random();
+
+ offset = random() & 511;
+
+ simd_result = dotprod(dp_simd,input+offset);
+ port_result = dotprod_port(dp_port,input+offset);
+ if(simd_result != port_result){
+ errors++;
+ }
+ }
+ printf("dtest: %d errors\n",errors);
+ exit(0);
+}
diff --git a/encode_rs.c b/encode_rs.c
new file mode 100644
index 0000000..0649094
--- /dev/null
+++ b/encode_rs.c
@@ -0,0 +1,52 @@
+/* Reed-Solomon encoder
+ * Copyright 2002, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <string.h>
+
+#ifdef FIXED
+#include "fixed.h"
+#elif defined(BIGSYM)
+#include "int.h"
+#else
+#include "char.h"
+#endif
+
+void ENCODE_RS(
+#ifdef FIXED
+data_t *data, data_t *bb,int pad){
+#else
+void *p,data_t *data, data_t *bb){
+ struct rs *rs = (struct rs *)p;
+#endif
+ int i, j;
+ data_t feedback;
+
+#ifdef FIXED
+ /* Check pad parameter for validity */
+ if(pad < 0 || pad >= NN)
+ return;
+#endif
+
+ memset(bb,0,NROOTS*sizeof(data_t));
+
+ for(i=0;i<NN-NROOTS-PAD;i++){
+ feedback = INDEX_OF[data[i] ^ bb[0]];
+ if(feedback != A0){ /* feedback term is non-zero */
+#ifdef UNNORMALIZED
+ /* This line is unnecessary when GENPOLY[NROOTS] is unity, as it must
+ * always be for the polynomials constructed by init_rs()
+ */
+ feedback = MODNN(NN - GENPOLY[NROOTS] + feedback);
+#endif
+ for(j=1;j<NROOTS;j++)
+ bb[j] ^= ALPHA_TO[MODNN(feedback + GENPOLY[NROOTS-j])];
+ }
+ /* Shift */
+ memmove(&bb[0],&bb[1],sizeof(data_t)*(NROOTS-1));
+ if(feedback != A0)
+ bb[NROOTS-1] = ALPHA_TO[MODNN(feedback + GENPOLY[0])];
+ else
+ bb[NROOTS-1] = 0;
+ }
+}
diff --git a/encode_rs.h b/encode_rs.h
new file mode 100644
index 0000000..2c157f9
--- /dev/null
+++ b/encode_rs.h
@@ -0,0 +1,58 @@
+/* The guts of the Reed-Solomon encoder, meant to be #included
+ * into a function body with the following typedefs, macros and variables supplied
+ * according to the code parameters:
+
+ * data_t - a typedef for the data symbol
+ * data_t data[] - array of NN-NROOTS-PAD and type data_t to be encoded
+ * data_t parity[] - an array of NROOTS and type data_t to be written with parity symbols
+ * NROOTS - the number of roots in the RS code generator polynomial,
+ * which is the same as the number of parity symbols in a block.
+ Integer variable or literal.
+ *
+ * NN - the total number of symbols in a RS block. Integer variable or literal.
+ * PAD - the number of pad symbols in a block. Integer variable or literal.
+ * ALPHA_TO - The address of an array of NN elements to convert Galois field
+ * elements in index (log) form to polynomial form. Read only.
+ * INDEX_OF - The address of an array of NN elements to convert Galois field
+ * elements in polynomial form to index (log) form. Read only.
+ * MODNN - a function to reduce its argument modulo NN. May be inline or a macro.
+ * GENPOLY - an array of NROOTS+1 elements containing the generator polynomial in index form
+
+ * The memset() and memmove() functions are used. The appropriate header
+ * file declaring these functions (usually <string.h>) must be included by the calling
+ * program.
+
+ * Copyright 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+
+#undef A0
+#define A0 (NN) /* Special reserved value encoding zero in index form */
+
+{
+ int i, j;
+ data_t feedback;
+
+ memset(parity,0,NROOTS*sizeof(data_t));
+
+ for(i=0;i<NN-NROOTS-PAD;i++){
+ feedback = INDEX_OF[data[i] ^ parity[0]];
+ if(feedback != A0){ /* feedback term is non-zero */
+#ifdef UNNORMALIZED
+ /* This line is unnecessary when GENPOLY[NROOTS] is unity, as it must
+ * always be for the polynomials constructed by init_rs()
+ */
+ feedback = MODNN(NN - GENPOLY[NROOTS] + feedback);
+#endif
+ for(j=1;j<NROOTS;j++)
+ parity[j] ^= ALPHA_TO[MODNN(feedback + GENPOLY[NROOTS-j])];
+ }
+ /* Shift */
+ memmove(&parity[0],&parity[1],sizeof(data_t)*(NROOTS-1));
+ if(feedback != A0)
+ parity[NROOTS-1] = ALPHA_TO[MODNN(feedback + GENPOLY[0])];
+ else
+ parity[NROOTS-1] = 0;
+ }
+}
diff --git a/encode_rs_8.c b/encode_rs_8.c
new file mode 100644
index 0000000..5aaecca
--- /dev/null
+++ b/encode_rs_8.c
@@ -0,0 +1,109 @@
+/* Reed-Solomon encoder
+ * Copyright 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <string.h>
+#include "fixed.h"
+#ifdef __VEC__
+#include <sys/sysctl.h>
+#endif
+
+
+static enum {UNKNOWN=0,MMX,SSE,SSE2,ALTIVEC,PORT} cpu_mode;
+
+static void encode_rs_8_c(data_t *data, data_t *parity,int pad);
+#if __vec__
+static void encode_rs_8_av(data_t *data, data_t *parity,int pad);
+#endif
+#if __i386__
+int cpu_features(void);
+#endif
+
+void encode_rs_8(data_t *data, data_t *parity,int pad){
+ if(cpu_mode == UNKNOWN){
+#ifdef __i386__
+ int f;
+ /* Figure out what kind of CPU we have */
+ f = cpu_features();
+ if(f & (1<<26)){ /* SSE2 is present */
+ cpu_mode = SSE2;
+ } else if(f & (1<<25)){ /* SSE is present */
+ cpu_mode = SSE;
+ } else if(f & (1<<23)){ /* MMX is present */
+ cpu_mode = MMX;
+ } else { /* No SIMD at all */
+ cpu_mode = PORT;
+ }
+#elif __VEC__
+ /* Ask the OS if we have Altivec support */
+ int selectors[2] = { CTL_HW, HW_VECTORUNIT };
+ int hasVectorUnit = 0;
+ size_t length = sizeof(hasVectorUnit);
+ int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
+ if(0 == error && hasVectorUnit)
+ cpu_mode = ALTIVEC;
+ else
+ cpu_mode = PORT;
+#else
+ cpu_mode = PORT;
+#endif
+ }
+ switch(cpu_mode){
+#if __vec__
+ case ALTIVEC:
+ encode_rs_8_av(data,parity,pad);
+ return;
+#endif
+#if __i386__
+ case MMX:
+ case SSE:
+ case SSE2:
+#endif
+ default:
+ encode_rs_8_c(data,parity,pad);
+ return;
+ }
+}
+
+#if __vec__ /* PowerPC G4/G5 Altivec instructions are available */
+
+static vector unsigned char reverse = (vector unsigned char)(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
+static vector unsigned char shift_right = (vector unsigned char)(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
+
+/* Lookup table for feedback multiplications
+ * These are the low half of the coefficients. Since the generator polynomial is
+ * palindromic, we form the other half by reversing this one
+ */
+extern static union { vector unsigned char v; unsigned char c[16]; } table[256];
+
+static void encode_rs_8_av(data_t *data, data_t *parity,int pad){
+ union { vector unsigned char v[2]; unsigned char c[32]; } shift_register;
+ int i;
+
+ shift_register.v[0] = (vector unsigned char)(0);
+ shift_register.v[1] = (vector unsigned char)(0);
+
+ for(i=0;i<NN-NROOTS-pad;i++){
+ vector unsigned char feedback0,feedback1;
+ unsigned char f;
+
+ f = data[i] ^ shift_register.c[31];
+ feedback1 = table[f].v;
+ feedback0 = vec_perm(feedback1,feedback1,reverse);
+
+ /* Shift right one byte */
+ shift_register.v[1] = vec_perm(shift_register.v[0],shift_register.v[1],shift_right) ^ feedback1;
+ shift_register.v[0] = vec_sro(shift_register.v[0],(vector unsigned char)(8)) ^ feedback0;
+ shift_register.c[0] = f;
+ }
+ for(i=0;i<NROOTS;i++)
+ parity[NROOTS-i-1] = shift_register.c[i];
+}
+#endif
+
+/* Portable C version */
+static void encode_rs_8_c(data_t *data, data_t *parity,int pad){
+
+#include "encode_rs.h"
+
+}
diff --git a/encode_rs_av.c b/encode_rs_av.c
new file mode 100644
index 0000000..32e528f
--- /dev/null
+++ b/encode_rs_av.c
@@ -0,0 +1,61 @@
+/* Fast Reed-Solomon encoder for (255,223) CCSDS code on PowerPC G4/G5 using Altivec instructions
+ * Copyright 2004, Phil Karn KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <string.h>
+#include "fixed.h"
+
+/* Lookup table for feedback multiplications
+ * These are the low half of the coefficients. Since the generator polynomial is
+ * palindromic, we form it by reversing these on the fly
+ */
+static union { vector unsigned char v; unsigned char c[16]; } table[256];
+
+static vector unsigned char reverse = (vector unsigned char)(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
+static vector unsigned char shift_right = (vector unsigned char)(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
+
+extern data_t CCSDS_alpha_to[];
+extern data_t CCSDS_index_of[];
+extern data_t CCSDS_poly[];
+
+void rs_init_av(){
+ int i,j;
+
+ /* The PowerPC is big-endian, so the low-order byte of each vector contains the highest order term in the polynomial */
+ for(j=0;j<16;j++){
+ table[0].c[j] = 0;
+ for(i=1;i<256;i++){
+ table[i].c[16-j-1] = CCSDS_alpha_to[MODNN(CCSDS_poly[j+1] + CCSDS_index_of[i])];
+ }
+ }
+#if 0
+ for(i=0;i<256;i++){
+ printf("table[%3d] = %3vu\n",i,table[i].v);
+ }
+#endif
+}
+
+void encode_rs_av(unsigned char *data,unsigned char *parity,int pad){
+ union { vector unsigned char v[2]; unsigned char c[32]; } shift_register;
+ int i;
+
+ shift_register.v[0] = (vector unsigned char)(0);
+ shift_register.v[1] = (vector unsigned char)(0);
+
+ for(i=0;i<NN-NROOTS-pad;i++){
+ vector unsigned char feedback0,feedback1;
+ unsigned char f;
+
+ f = data[i] ^ shift_register.c[31];
+ feedback1 = table[f].v;
+ feedback0 = vec_perm(feedback1,feedback1,reverse);
+
+ /* Shift right one byte */
+ shift_register.v[1] = vec_perm(shift_register.v[0],shift_register.v[1],shift_right) ^ feedback1;
+ shift_register.v[0] = vec_sro(shift_register.v[0],(vector unsigned char)(8)) ^ feedback0;
+ shift_register.c[0] = f;
+ }
+ for(i=0;i<NROOTS;i++)
+ parity[NROOTS-i-1] = shift_register.c[i];
+}
diff --git a/encode_rs_ccsds.c b/encode_rs_ccsds.c
new file mode 100644
index 0000000..5a2ec70
--- /dev/null
+++ b/encode_rs_ccsds.c
@@ -0,0 +1,24 @@
+/* This function wraps around the fixed 8-bit encoder, performing the
+ * basis transformations necessary to meet the CCSDS standard
+ *
+ * Copyright 2002, Phil Karn, KA9Q
+ * fixed bug Aug 2007
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include "ccsds.h"
+#include "fec.h"
+
+void encode_rs_ccsds(data_t *data,data_t *parity,int pad){
+ int i;
+ data_t cdata[NN-NROOTS];
+
+ /* Convert data from dual basis to conventional */
+ for(i=0;i<NN-NROOTS-pad;i++)
+ cdata[i] = Tal1tab[data[i]];
+
+ encode_rs_8(cdata,parity,pad);
+
+ /* Convert parity from conventional to dual basis */
+ for(i=0;i<NROOTS;i++)
+ parity[i] = Taltab[parity[i]];
+}
diff --git a/encode_rs_char.c b/encode_rs_char.c
new file mode 100644
index 0000000..a9bf2b8
--- /dev/null
+++ b/encode_rs_char.c
@@ -0,0 +1,15 @@
+/* Reed-Solomon encoder
+ * Copyright 2002, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <string.h>
+
+#include "char.h"
+#include "rs-common.h"
+
+void encode_rs_char(void *p,data_t *data, data_t *parity){
+ struct rs *rs = (struct rs *)p;
+
+#include "encode_rs.h"
+
+}
diff --git a/encode_rs_int.c b/encode_rs_int.c
new file mode 100644
index 0000000..3c9ce78
--- /dev/null
+++ b/encode_rs_int.c
@@ -0,0 +1,15 @@
+/* Reed-Solomon encoder
+ * Copyright 2003, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <string.h>
+
+#include "int.h"
+#include "rs-common.h"
+
+void encode_rs_int(void *p,data_t *data, data_t *parity){
+ struct rs *rs = (struct rs *)p;
+
+#include "encode_rs.h"
+
+}
diff --git a/exercise.c b/exercise.c
new file mode 100644
index 0000000..8ae008c
--- /dev/null
+++ b/exercise.c
@@ -0,0 +1,122 @@
+/* Exercise an RS codec a specified number of times using random
+ * data and error patterns
+ *
+ * Copyright 2002 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#define FLAG_ERASURE 1 /* Randomly flag 50% of errors as erasures */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef FIXED
+#include "fixed.h"
+#define EXERCISE exercise_8
+#elif defined(CCSDS)
+#include "fixed.h"
+#include "ccsds.h"
+#define EXERCISE exercise_ccsds
+#elif defined(BIGSYM)
+#include "int.h"
+#define EXERCISE exercise_int
+#else
+#include "char.h"
+#define EXERCISE exercise_char
+#endif
+
+#ifdef FIXED
+#define PRINTPARM printf("(255,223):");
+#elif defined(CCSDS)
+#define PRINTPARM printf("CCSDS (255,223):");
+#else
+#define PRINTPARM printf("(%d,%d):",rs->nn,rs->nn-rs->nroots);
+#endif
+
+/* Exercise the RS codec passed as an argument */
+int EXERCISE(
+#if !defined(CCSDS) && !defined(FIXED)
+void *p,
+#endif
+int trials){
+#if !defined(CCSDS) && !defined(FIXED)
+ struct rs *rs = (struct rs *)p;
+#endif
+ data_t block[NN],tblock[NN];
+ int i;
+ int errors;
+ int errlocs[NN];
+ int derrlocs[NROOTS];
+ int derrors;
+ int errval,errloc;
+ int erasures;
+ int decoder_errors = 0;
+
+ while(trials-- != 0){
+ /* Test up to the error correction capacity of the code */
+ for(errors=0;errors <= NROOTS/2;errors++){
+
+ /* Load block with random data and encode */
+ for(i=0;i<NN-NROOTS;i++)
+ block[i] = random() & NN;
+
+#if defined(CCSDS) || defined(FIXED)
+ ENCODE_RS(&block[0],&block[NN-NROOTS],0);
+#else
+ ENCODE_RS(rs,&block[0],&block[NN-NROOTS]);
+#endif
+
+ /* Make temp copy, seed with errors */
+ memcpy(tblock,block,sizeof(tblock));
+ memset(errlocs,0,sizeof(errlocs));
+ memset(derrlocs,0,sizeof(derrlocs));
+ erasures=0;
+ for(i=0;i<errors;i++){
+ do {
+ errval = random() & NN;
+ } while(errval == 0); /* Error value must be nonzero */
+
+ do {
+ errloc = random() % NN;
+ } while(errlocs[errloc] != 0); /* Must not choose the same location twice */
+
+ errlocs[errloc] = 1;
+
+#if FLAG_ERASURE
+ if(random() & 1) /* 50-50 chance */
+ derrlocs[erasures++] = errloc;
+#endif
+ tblock[errloc] ^= errval;
+ }
+
+ /* Decode the errored block */
+#if defined(CCSDS) || defined(FIXED)
+ derrors = DECODE_RS(tblock,derrlocs,erasures,0);
+#else
+ derrors = DECODE_RS(rs,tblock,derrlocs,erasures);
+#endif
+
+ if(derrors != errors){
+ PRINTPARM
+ printf(" decoder says %d errors, true number is %d\n",derrors,errors);
+ decoder_errors++;
+ }
+ for(i=0;i<derrors;i++){
+ if(errlocs[derrlocs[i]] == 0){
+ PRINTPARM
+ printf(" decoder indicates error in location %d without error\n",derrlocs[i]);
+ decoder_errors++;
+ }
+ }
+ if(memcmp(tblock,block,sizeof(tblock)) != 0){
+ PRINTPARM
+ printf(" uncorrected errors! output ^ input:");
+ decoder_errors++;
+ for(i=0;i<NN;i++)
+ printf(" %02x",tblock[i] ^ block[i]);
+ printf("\n");
+ }
+ }
+ }
+ return decoder_errors;
+}
diff --git a/fec.c b/fec.c
new file mode 100644
index 0000000..35960c3
--- /dev/null
+++ b/fec.c
@@ -0,0 +1,66 @@
+/* Utility routines for FEC support
+ * Copyright 2004, Phil Karn, KA9Q
+ */
+
+#include <stdio.h>
+#include "fec.h"
+
+unsigned char Partab[256];
+int P_init;
+
+/* Create 256-entry odd-parity lookup table
+ * Needed only on non-ia32 machines
+ */
+void partab_init(void){
+ int i,cnt,ti;
+
+ /* Initialize parity lookup table */
+ for(i=0;i<256;i++){
+ cnt = 0;
+ ti = i;
+ while(ti){
+ if(ti & 1)
+ cnt++;
+ ti >>= 1;
+ }
+ Partab[i] = cnt & 1;
+ }
+ P_init=1;
+}
+
+/* Lookup table giving count of 1 bits for integers 0-255 */
+int Bitcnt[] = {
+ 0, 1, 1, 2, 1, 2, 2, 3,
+ 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7,
+ 5, 6, 6, 7, 6, 7, 7, 8,
+};
+
diff --git a/fec.h b/fec.h
new file mode 100644
index 0000000..08e8454
--- /dev/null
+++ b/fec.h
@@ -0,0 +1,347 @@
+/* User include file for libfec
+ * Copyright 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+#ifndef _FEC_H_
+#define _FEC_H_
+
+/* r=1/2 k=7 convolutional encoder polynomials
+ * The NASA-DSN convention is to use V27POLYA inverted, then V27POLYB
+ * The CCSDS/NASA-GSFC convention is to use V27POLYB, then V27POLYA inverted
+ */
+#define V27POLYA 0x6d
+#define V27POLYB 0x4f
+
+void *create_viterbi27(int len);
+void set_viterbi27_polynomial(int polys[2]);
+int init_viterbi27(void *vp,int starting_state);
+int update_viterbi27_blk(void *vp,unsigned char sym[],int npairs);
+int chainback_viterbi27(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi27(void *vp);
+
+#ifdef __VEC__
+void *create_viterbi27_av(int len);
+void set_viterbi27_polynomial_av(int polys[2]);
+int init_viterbi27_av(void *p,int starting_state);
+int chainback_viterbi27_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi27_av(void *p);
+int update_viterbi27_blk_av(void *p,unsigned char *syms,int nbits);
+#endif
+
+#ifdef __i386__
+void *create_viterbi27_mmx(int len);
+void set_viterbi27_polynomial_mmx(int polys[2]);
+int init_viterbi27_mmx(void *p,int starting_state);
+int chainback_viterbi27_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi27_mmx(void *p);
+int update_viterbi27_blk_mmx(void *p,unsigned char *syms,int nbits);
+
+void *create_viterbi27_sse(int len);
+void set_viterbi27_polynomial_sse(int polys[2]);
+int init_viterbi27_sse(void *p,int starting_state);
+int chainback_viterbi27_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi27_sse(void *p);
+int update_viterbi27_blk_sse(void *p,unsigned char *syms,int nbits);
+
+void *create_viterbi27_sse2(int len);
+void set_viterbi27_polynomial_sse2(int polys[2]);
+int init_viterbi27_sse2(void *p,int starting_state);
+int chainback_viterbi27_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi27_sse2(void *p);
+int update_viterbi27_blk_sse2(void *p,unsigned char *syms,int nbits);
+#endif
+
+void *create_viterbi27_port(int len);
+void set_viterbi27_polynomial_port(int polys[2]);
+int init_viterbi27_port(void *p,int starting_state);
+int chainback_viterbi27_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi27_port(void *p);
+int update_viterbi27_blk_port(void *p,unsigned char *syms,int nbits);
+
+/* r=1/2 k=9 convolutional encoder polynomials */
+#define V29POLYA 0x1af
+#define V29POLYB 0x11d
+
+void *create_viterbi29(int len);
+void set_viterbi29_polynomial(int polys[2]);
+int init_viterbi29(void *vp,int starting_state);
+int update_viterbi29_blk(void *vp,unsigned char syms[],int nbits);
+int chainback_viterbi29(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi29(void *vp);
+
+#ifdef __VEC__
+void *create_viterbi29_av(int len);
+void set_viterbi29_polynomial_av(int polys[2]);
+int init_viterbi29_av(void *p,int starting_state);
+int chainback_viterbi29_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi29_av(void *p);
+int update_viterbi29_blk_av(void *p,unsigned char *syms,int nbits);
+#endif
+
+#ifdef __i386__
+void *create_viterbi29_mmx(int len);
+void set_viterbi29_polynomial_mmx(int polys[2]);
+int init_viterbi29_mmx(void *p,int starting_state);
+int chainback_viterbi29_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi29_mmx(void *p);
+int update_viterbi29_blk_mmx(void *p,unsigned char *syms,int nbits);
+
+void *create_viterbi29_sse(int len);
+void set_viterbi29_polynomial_sse(int polys[2]);
+int init_viterbi29_sse(void *p,int starting_state);
+int chainback_viterbi29_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi29_sse(void *p);
+int update_viterbi29_blk_sse(void *p,unsigned char *syms,int nbits);
+
+void *create_viterbi29_sse2(int len);
+void set_viterbi29_polynomial_sse2(int polys[2]);
+int init_viterbi29_sse2(void *p,int starting_state);
+int chainback_viterbi29_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi29_sse2(void *p);
+int update_viterbi29_blk_sse2(void *p,unsigned char *syms,int nbits);
+#endif
+
+void *create_viterbi29_port(int len);
+void set_viterbi29_polynomial_port(int polys[2]);
+int init_viterbi29_port(void *p,int starting_state);
+int chainback_viterbi29_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi29_port(void *p);
+int update_viterbi29_blk_port(void *p,unsigned char *syms,int nbits);
+
+/* r=1/3 k=9 convolutional encoder polynomials */
+#define V39POLYA 0x1ed
+#define V39POLYB 0x19b
+#define V39POLYC 0x127
+
+void *create_viterbi39(int len);
+void set_viterbi39_polynomial(int polys[3]);
+int init_viterbi39(void *vp,int starting_state);
+int update_viterbi39_blk(void *vp,unsigned char syms[],int nbits);
+int chainback_viterbi39(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi39(void *vp);
+
+#ifdef __VEC__
+void *create_viterbi39_av(int len);
+void set_viterbi39_polynomial_av(int polys[3]);
+int init_viterbi39_av(void *p,int starting_state);
+int chainback_viterbi39_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi39_av(void *p);
+int update_viterbi39_blk_av(void *p,unsigned char *syms,int nbits);
+#endif
+
+#ifdef __i386__
+void *create_viterbi39_mmx(int len);
+void set_viterbi39_polynomial_mmx(int polys[3]);
+int init_viterbi39_mmx(void *p,int starting_state);
+int chainback_viterbi39_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi39_mmx(void *p);
+int update_viterbi39_blk_mmx(void *p,unsigned char *syms,int nbits);
+
+void *create_viterbi39_sse(int len);
+void set_viterbi39_polynomial_sse(int polys[3]);
+int init_viterbi39_sse(void *p,int starting_state);
+int chainback_viterbi39_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi39_sse(void *p);
+int update_viterbi39_blk_sse(void *p,unsigned char *syms,int nbits);
+
+void *create_viterbi39_sse2(int len);
+void set_viterbi39_polynomial_sse2(int polys[3]);
+int init_viterbi39_sse2(void *p,int starting_state);
+int chainback_viterbi39_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi39_sse2(void *p);
+int update_viterbi39_blk_sse2(void *p,unsigned char *syms,int nbits);
+#endif
+
+void *create_viterbi39_port(int len);
+void set_viterbi39_polynomial_port(int polys[3]);
+int init_viterbi39_port(void *p,int starting_state);
+int chainback_viterbi39_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi39_port(void *p);
+int update_viterbi39_blk_port(void *p,unsigned char *syms,int nbits);
+
+
+/* r=1/6 k=15 Cassini convolutional encoder polynomials without symbol inversion
+ * dfree = 56
+ * These bits may be left-right flipped from some textbook representations;
+ * here I have the bits entering the shift register from the right (low) end
+ *
+ * Some other spacecraft use the same code, but with the polynomials in a different order.
+ * E.g., Mars Pathfinder and STEREO swap POLYC and POLYD. All use alternate symbol inversion,
+ * so use set_viterbi615_polynomial() as appropriate.
+ */
+#define V615POLYA 042631
+#define V615POLYB 047245
+#define V615POLYC 056507
+#define V615POLYD 073363
+#define V615POLYE 077267
+#define V615POLYF 064537
+
+void *create_viterbi615(int len);
+void set_viterbi615_polynomial(int polys[6]);
+int init_viterbi615(void *vp,int starting_state);
+int update_viterbi615_blk(void *vp,unsigned char *syms,int nbits);
+int chainback_viterbi615(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi615(void *vp);
+
+#ifdef __VEC__
+void *create_viterbi615_av(int len);
+void set_viterbi615_polynomial_av(int polys[6]);
+int init_viterbi615_av(void *p,int starting_state);
+int chainback_viterbi615_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi615_av(void *p);
+int update_viterbi615_blk_av(void *p,unsigned char *syms,int nbits);
+#endif
+
+#ifdef __i386__
+void *create_viterbi615_mmx(int len);
+void set_viterbi615_polynomial_mmx(int polys[6]);
+int init_viterbi615_mmx(void *p,int starting_state);
+int chainback_viterbi615_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi615_mmx(void *p);
+int update_viterbi615_blk_mmx(void *p,unsigned char *syms,int nbits);
+
+void *create_viterbi615_sse(int len);
+void set_viterbi615_polynomial_sse(int polys[6]);
+int init_viterbi615_sse(void *p,int starting_state);
+int chainback_viterbi615_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi615_sse(void *p);
+int update_viterbi615_blk_sse(void *p,unsigned char *syms,int nbits);
+
+void *create_viterbi615_sse2(int len);
+void set_viterbi615_polynomial_sse2(int polys[6]);
+int init_viterbi615_sse2(void *p,int starting_state);
+int chainback_viterbi615_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi615_sse2(void *p);
+int update_viterbi615_blk_sse2(void *p,unsigned char *syms,int nbits);
+
+#endif
+
+void *create_viterbi615_port(int len);
+void set_viterbi615_polynomial_port(int polys[6]);
+int init_viterbi615_port(void *p,int starting_state);
+int chainback_viterbi615_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi615_port(void *p);
+int update_viterbi615_blk_port(void *p,unsigned char *syms,int nbits);
+
+
+/* General purpose RS codec, 8-bit symbols */
+void encode_rs_char(void *rs,unsigned char *data,unsigned char *parity);
+int decode_rs_char(void *rs,unsigned char *data,int *eras_pos,
+ int no_eras);
+void *init_rs_char(int symsize,int gfpoly,
+ int fcr,int prim,int nroots,
+ int pad);
+void free_rs_char(void *rs);
+
+/* General purpose RS codec, integer symbols */
+void encode_rs_int(void *rs,int *data,int *parity);
+int decode_rs_int(void *rs,int *data,int *eras_pos,int no_eras);
+void *init_rs_int(int symsize,int gfpoly,int fcr,
+ int prim,int nroots,int pad);
+void free_rs_int(void *rs);
+
+/* CCSDS standard (255,223) RS codec with conventional (*not* dual-basis)
+ * symbol representation
+ */
+void encode_rs_8(unsigned char *data,unsigned char *parity,int pad);
+int decode_rs_8(unsigned char *data,int *eras_pos,int no_eras,int pad);
+
+/* CCSDS standard (255,223) RS codec with dual-basis symbol representation */
+void encode_rs_ccsds(unsigned char *data,unsigned char *parity,int pad);
+int decode_rs_ccsds(unsigned char *data,int *eras_pos,int no_eras,int pad);
+
+/* Tables to map from conventional->dual (Taltab) and
+ * dual->conventional (Tal1tab) bases
+ */
+extern unsigned char Taltab[],Tal1tab[];
+
+
+/* CPU SIMD instruction set available */
+extern enum cpu_mode {UNKNOWN=0,PORT,MMX,SSE,SSE2,ALTIVEC} Cpu_mode;
+void find_cpu_mode(void); /* Call this once at startup to set Cpu_mode */
+
+/* Determine parity of argument: 1 = odd, 0 = even */
+#ifdef __i386__
+static inline int parityb(unsigned char x){
+ __asm__ __volatile__ ("test %1,%1;setpo %0" : "=g"(x) : "r" (x));
+ return x;
+}
+#else
+void partab_init();
+
+static inline int parityb(unsigned char x){
+ extern unsigned char Partab[256];
+ extern int P_init;
+ if(!P_init){
+ partab_init();
+ }
+ return Partab[x];
+}
+#endif
+
+
+static inline int parity(int x){
+ /* Fold down to one byte */
+ x ^= (x >> 16);
+ x ^= (x >> 8);
+ return parityb(x);
+}
+
+/* Useful utilities for simulation */
+double normal_rand(double mean, double std_dev);
+unsigned char addnoise(int sym,double amp,double gain,double offset,int clip);
+
+extern int Bitcnt[];
+
+/* Dot product functions */
+void *initdp(signed short coeffs[],int len);
+void freedp(void *dp);
+long dotprod(void *dp,signed short a[]);
+
+void *initdp_port(signed short coeffs[],int len);
+void freedp_port(void *dp);
+long dotprod_port(void *dp,signed short a[]);
+
+#ifdef __i386__
+void *initdp_mmx(signed short coeffs[],int len);
+void freedp_mmx(void *dp);
+long dotprod_mmx(void *dp,signed short a[]);
+
+void *initdp_sse(signed short coeffs[],int len);
+void freedp_sse(void *dp);
+long dotprod_sse(void *dp,signed short a[]);
+
+void *initdp_sse2(signed short coeffs[],int len);
+void freedp_sse2(void *dp);
+long dotprod_sse2(void *dp,signed short a[]);
+#endif
+
+#ifdef __VEC__
+void *initdp_av(signed short coeffs[],int len);
+void freedp_av(void *dp);
+long dotprod_av(void *dp,signed short a[]);
+#endif
+
+/* Sum of squares - accepts signed shorts, produces unsigned long long */
+unsigned long long sumsq(signed short *in,int cnt);
+unsigned long long sumsq_port(signed short *in,int cnt);
+
+#ifdef __i386__
+unsigned long long sumsq_mmx(signed short *in,int cnt);
+unsigned long long sumsq_sse(signed short *in,int cnt);
+unsigned long long sumsq_sse2(signed short *in,int cnt);
+#endif
+#ifdef __VEC__
+unsigned long long sumsq_av(signed short *in,int cnt);
+#endif
+
+
+/* Low-level data structures and routines */
+
+int cpu_features(void);
+
+#endif /* _FEC_H_ */
+
+
+
diff --git a/fixed.h b/fixed.h
new file mode 100644
index 0000000..0ff27b2
--- /dev/null
+++ b/fixed.h
@@ -0,0 +1,33 @@
+/* Stuff specific to the CCSDS (255,223) RS codec
+ * (255,223) code over GF(256). Note: the conventional basis is still
+ * used; the dual-basis mappings are performed in [en|de]code_rs_ccsds.c
+ *
+ * Copyright 2003 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+typedef unsigned char data_t;
+
+static inline int mod255(int x){
+ while (x >= 255) {
+ x -= 255;
+ x = (x >> 8) + (x & 255);
+ }
+ return x;
+}
+#define MODNN(x) mod255(x)
+
+extern data_t CCSDS_alpha_to[];
+extern data_t CCSDS_index_of[];
+extern data_t CCSDS_poly[];
+
+#define MM 8
+#define NN 255
+#define ALPHA_TO CCSDS_alpha_to
+#define INDEX_OF CCSDS_index_of
+#define GENPOLY CCSDS_poly
+#define NROOTS 32
+#define FCR 112
+#define PRIM 11
+#define IPRIM 116
+#define PAD pad
+
diff --git a/gen_ccsds.c b/gen_ccsds.c
new file mode 100644
index 0000000..e1e2e26
--- /dev/null
+++ b/gen_ccsds.c
@@ -0,0 +1,39 @@
+/* Generate tables for CCSDS code
+ * Copyright 2002 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "char.h"
+#include "rs-common.h"
+#include "fec.h"
+
+int main(){
+ struct rs *rs;
+ int i;
+
+ rs = init_rs_char(8,0x187,112,11,32,0); /* CCSDS standard */
+ assert(rs != NULL);
+ printf("char CCSDS_alpha_to[] = {");
+ for(i=0;i<256;i++){
+ if((i % 16) == 0)
+ printf("\n");
+ printf("0x%02x,",rs->alpha_to[i]);
+ }
+ printf("\n};\n\nchar CCSDS_index_of[] = {");
+ for(i=0;i<256;i++){
+ if((i % 16) == 0)
+ printf("\n");
+ printf("%3d,",rs->index_of[i]);
+ }
+ printf("\n};\n\nchar CCSDS_poly[] = {");
+ for(i=0;i<33;i++){
+ if((i % 16) == 0)
+ printf("\n");
+
+ printf("%3d,",rs->genpoly[i]);
+ }
+ printf("\n};\n");
+ exit(0);
+}
diff --git a/gen_ccsds_tal.c b/gen_ccsds_tal.c
new file mode 100644
index 0000000..fc75503
--- /dev/null
+++ b/gen_ccsds_tal.c
@@ -0,0 +1,53 @@
+/* Conversion lookup tables from conventional alpha to Berlekamp's
+ * dual-basis representation. Used in the CCSDS version only.
+ * taltab[] -- convert conventional to dual basis
+ * tal1tab[] -- convert dual basis to conventional
+
+ * Note: the actual RS encoder/decoder works with the conventional basis.
+ * So data is converted from dual to conventional basis before either
+ * encoding or decoding and then converted back.
+ *
+ * Copyright 2002 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#define DTYPE unsigned char
+DTYPE Taltab[256],Tal1tab[256];
+
+static DTYPE tal[] = { 0x8d, 0xef, 0xec, 0x86, 0xfa, 0x99, 0xaf, 0x7b };
+
+/* Generate conversion lookup tables between conventional alpha representation
+ * (@**7, @**6, ...@**0)
+ * and Berlekamp's dual basis representation
+ * (l0, l1, ...l7)
+ */
+int main(){
+ int i,j,k;
+
+ for(i=0;i<256;i++){/* For each value of input */
+ Taltab[i] = 0;
+ for(j=0;j<8;j++) /* for each column of matrix */
+ for(k=0;k<8;k++){ /* for each row of matrix */
+ if(i & (1<<k))
+ Taltab[i] ^= tal[7-k] & (1<<j);
+ }
+ Tal1tab[Taltab[i]] = i;
+ }
+ printf("unsigned char Taltab[] = {\n");
+ for(i=0;i<256;i++){
+ if((i % 16) == 0)
+ printf("\n");
+ printf("0x%02x,",Taltab[i]);
+ }
+ printf("\n};\n\nunsigned char Tal1tab[] = {");
+ for(i=0;i<256;i++){
+ if((i % 16) == 0)
+ printf("\n");
+ printf("0x%02x,",Tal1tab[i]);
+ }
+ printf("\n};\n");
+ exit(0);
+}
+
diff --git a/init_rs.c b/init_rs.c
new file mode 100644
index 0000000..ef1cf47
--- /dev/null
+++ b/init_rs.c
@@ -0,0 +1,39 @@
+/* Initialize a RS codec
+ *
+ * Copyright 2002 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdlib.h>
+#include "fec.h"
+
+#if !defined(NULL)
+#define NULL ((void *)0)
+#endif
+
+#include "rs-common.h"
+
+void free_rs(void *p){
+ struct rs *rs = (struct rs *)p;
+
+ free(rs->alpha_to);
+ free(rs->index_of);
+ free(rs->genpoly);
+ free(rs);
+}
+
+/* Initialize a Reed-Solomon codec
+ * symsize = symbol size, bits
+ * gfpoly = Field generator polynomial coefficients
+ * fcr = first root of RS code generator polynomial, index form
+ * prim = primitive element to generate polynomial roots
+ * nroots = RS code generator polynomial degree (number of roots)
+ * pad = padding bytes at front of shortened block
+ */
+void *init_rs_common(int symsize,int gfpoly,int fcr,int prim,
+ int nroots,int pad){
+ struct rs *rs;
+
+#include "init_rs.h"
+
+ return rs;
+}
diff --git a/init_rs.h b/init_rs.h
new file mode 100644
index 0000000..2b2ae98
--- /dev/null
+++ b/init_rs.h
@@ -0,0 +1,106 @@
+/* Common code for intializing a Reed-Solomon control block (char or int symbols)
+ * Copyright 2004 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#undef NULL
+#define NULL ((void *)0)
+
+{
+ int i, j, sr,root,iprim;
+
+ rs = NULL;
+ /* Check parameter ranges */
+ if(symsize < 0 || symsize > 8*sizeof(data_t)){
+ goto done;
+ }
+
+ if(fcr < 0 || fcr >= (1<<symsize))
+ goto done;
+ if(prim <= 0 || prim >= (1<<symsize))
+ goto done;
+ if(nroots < 0 || nroots >= (1<<symsize))
+ goto done; /* Can't have more roots than symbol values! */
+ if(pad < 0 || pad >= ((1<<symsize) -1 - nroots))
+ goto done; /* Too much padding */
+
+ rs = (struct rs *)calloc(1,sizeof(struct rs));
+ if(rs == NULL)
+ goto done;
+
+ rs->mm = symsize;
+ rs->nn = (1<<symsize)-1;
+ rs->pad = pad;
+
+ rs->alpha_to = (data_t *)malloc(sizeof(data_t)*(rs->nn+1));
+ if(rs->alpha_to == NULL){
+ free(rs);
+ rs = NULL;
+ goto done;
+ }
+ rs->index_of = (data_t *)malloc(sizeof(data_t)*(rs->nn+1));
+ if(rs->index_of == NULL){
+ free(rs->alpha_to);
+ free(rs);
+ rs = NULL;
+ goto done;
+ }
+
+ /* Generate Galois field lookup tables */
+ rs->index_of[0] = A0; /* log(zero) = -inf */
+ rs->alpha_to[A0] = 0; /* alpha**-inf = 0 */
+ sr = 1;
+ for(i=0;i<rs->nn;i++){
+ rs->index_of[sr] = i;
+ rs->alpha_to[i] = sr;
+ sr <<= 1;
+ if(sr & (1<<symsize))
+ sr ^= gfpoly;
+ sr &= rs->nn;
+ }
+ if(sr != 1){
+ /* field generator polynomial is not primitive! */
+ free(rs->alpha_to);
+ free(rs->index_of);
+ free(rs);
+ rs = NULL;
+ goto done;
+ }
+
+ /* Form RS code generator polynomial from its roots */
+ rs->genpoly = (data_t *)malloc(sizeof(data_t)*(nroots+1));
+ if(rs->genpoly == NULL){
+ free(rs->alpha_to);
+ free(rs->index_of);
+ free(rs);
+ rs = NULL;
+ goto done;
+ }
+ rs->fcr = fcr;
+ rs->prim = prim;
+ rs->nroots = nroots;
+
+ /* Find prim-th root of 1, used in decoding */
+ for(iprim=1;(iprim % prim) != 0;iprim += rs->nn)
+ ;
+ rs->iprim = iprim / prim;
+
+ rs->genpoly[0] = 1;
+ for (i = 0,root=fcr*prim; i < nroots; i++,root += prim) {
+ rs->genpoly[i+1] = 1;
+
+ /* Multiply rs->genpoly[] by @**(root + x) */
+ for (j = i; j > 0; j--){
+ if (rs->genpoly[j] != 0)
+ rs->genpoly[j] = rs->genpoly[j-1] ^ rs->alpha_to[modnn(rs,rs->index_of[rs->genpoly[j]] + root)];
+ else
+ rs->genpoly[j] = rs->genpoly[j-1];
+ }
+ /* rs->genpoly[0] can never be zero */
+ rs->genpoly[0] = rs->alpha_to[modnn(rs,rs->index_of[rs->genpoly[0]] + root)];
+ }
+ /* convert rs->genpoly[] to index form for quicker encoding */
+ for (i = 0; i <= nroots; i++)
+ rs->genpoly[i] = rs->index_of[rs->genpoly[i]];
+ done:;
+
+}
diff --git a/init_rs_char.c b/init_rs_char.c
new file mode 100644
index 0000000..a51099a
--- /dev/null
+++ b/init_rs_char.c
@@ -0,0 +1,35 @@
+/* Initialize a RS codec
+ *
+ * Copyright 2002 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdlib.h>
+
+#include "char.h"
+#include "rs-common.h"
+
+void free_rs_char(void *p){
+ struct rs *rs = (struct rs *)p;
+
+ free(rs->alpha_to);
+ free(rs->index_of);
+ free(rs->genpoly);
+ free(rs);
+}
+
+/* Initialize a Reed-Solomon codec
+ * symsize = symbol size, bits
+ * gfpoly = Field generator polynomial coefficients
+ * fcr = first root of RS code generator polynomial, index form
+ * prim = primitive element to generate polynomial roots
+ * nroots = RS code generator polynomial degree (number of roots)
+ * pad = padding bytes at front of shortened block
+ */
+void *init_rs_char(int symsize,int gfpoly,int fcr,int prim,
+ int nroots,int pad){
+ struct rs *rs;
+
+#include "init_rs.h"
+
+ return rs;
+}
diff --git a/init_rs_int.c b/init_rs_int.c
new file mode 100644
index 0000000..a6036c2
--- /dev/null
+++ b/init_rs_int.c
@@ -0,0 +1,35 @@
+/* Initialize a RS codec
+ *
+ * Copyright 2002 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdlib.h>
+
+#include "int.h"
+#include "rs-common.h"
+
+void free_rs_int(void *p){
+ struct rs *rs = (struct rs *)p;
+
+ free(rs->alpha_to);
+ free(rs->index_of);
+ free(rs->genpoly);
+ free(rs);
+}
+
+/* Initialize a Reed-Solomon codec
+ * symsize = symbol size, bits
+ * gfpoly = Field generator polynomial coefficients
+ * fcr = first root of RS code generator polynomial, index form
+ * prim = primitive element to generate polynomial roots
+ * nroots = RS code generator polynomial degree (number of roots)
+ * pad = padding bytes at front of shortened block
+ */
+void *init_rs_int(int symsize,int gfpoly,int fcr,int prim,
+ int nroots,int pad){
+ struct rs *rs;
+
+#include "init_rs.h"
+
+ return rs;
+}
diff --git a/install-sh b/install-sh
new file mode 100755
index 0000000..e9de238
--- /dev/null
+++ b/install-sh
@@ -0,0 +1,251 @@
+#!/bin/sh
+#
+# install - install a program, script, or datafile
+# This comes from X11R5 (mit/util/scripts/install.sh).
+#
+# Copyright 1991 by the Massachusetts Institute of Technology
+#
+# Permission to use, copy, modify, distribute, and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice appear in all copies and that both that
+# copyright notice and this permission notice appear in supporting
+# documentation, and that the name of M.I.T. not be used in advertising or
+# publicity pertaining to distribution of the software without specific,
+# written prior permission. M.I.T. makes no representations about the
+# suitability of this software for any purpose. It is provided "as is"
+# without express or implied warranty.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch. It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+transformbasename=""
+transform_arg=""
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=""
+chgrpcmd=""
+stripcmd=""
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=""
+dst=""
+dir_arg=""
+
+while [ x"$1" != x ]; do
+ case $1 in
+ -c) instcmd="$cpprog"
+ shift
+ continue;;
+
+ -d) dir_arg=true
+ shift
+ continue;;
+
+ -m) chmodcmd="$chmodprog $2"
+ shift
+ shift
+ continue;;
+
+ -o) chowncmd="$chownprog $2"
+ shift
+ shift
+ continue;;
+
+ -g) chgrpcmd="$chgrpprog $2"
+ shift
+ shift
+ continue;;
+
+ -s) stripcmd="$stripprog"
+ shift
+ continue;;
+
+ -t=*) transformarg=`echo $1 | sed 's/-t=//'`
+ shift
+ continue;;
+
+ -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+ shift
+ continue;;
+
+ *) if [ x"$src" = x ]
+ then
+ src=$1
+ else
+ # this colon is to work around a 386BSD /bin/sh bug
+ :
+ dst=$1
+ fi
+ shift
+ continue;;
+ esac
+done
+
+if [ x"$src" = x ]
+then
+ echo "install: no input file specified"
+ exit 1
+else
+ true
+fi
+
+if [ x"$dir_arg" != x ]; then
+ dst=$src
+ src=""
+
+ if [ -d $dst ]; then
+ instcmd=:
+ chmodcmd=""
+ else
+ instcmd=mkdir
+ fi
+else
+
+# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+# might cause directories to be created, which would be especially bad
+# if $src (and thus $dsttmp) contains '*'.
+
+ if [ -f $src -o -d $src ]
+ then
+ true
+ else
+ echo "install: $src does not exist"
+ exit 1
+ fi
+
+ if [ x"$dst" = x ]
+ then
+ echo "install: no destination specified"
+ exit 1
+ else
+ true
+ fi
+
+# If destination is a directory, append the input filename; if your system
+# does not like double slashes in filenames, you may need to add some logic
+
+ if [ -d $dst ]
+ then
+ dst="$dst"/`basename $src`
+ else
+ true
+ fi
+fi
+
+## this sed command emulates the dirname command
+dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+# Make sure that the destination directory exists.
+# this part is taken from Noah Friedman's mkinstalldirs script
+
+# Skip lots of stat calls in the usual case.
+if [ ! -d "$dstdir" ]; then
+defaultIFS='
+'
+IFS="${IFS-${defaultIFS}}"
+
+oIFS="${IFS}"
+# Some sh's can't handle IFS=/ for some reason.
+IFS='%'
+set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
+IFS="${oIFS}"
+
+pathcomp=''
+
+while [ $# -ne 0 ] ; do
+ pathcomp="${pathcomp}${1}"
+ shift
+
+ if [ ! -d "${pathcomp}" ] ;
+ then
+ $mkdirprog "${pathcomp}"
+ else
+ true
+ fi
+
+ pathcomp="${pathcomp}/"
+done
+fi
+
+if [ x"$dir_arg" != x ]
+then
+ $doit $instcmd $dst &&
+
+ if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
+ if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
+ if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
+ if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
+else
+
+# If we're going to rename the final executable, determine the name now.
+
+ if [ x"$transformarg" = x ]
+ then
+ dstfile=`basename $dst`
+ else
+ dstfile=`basename $dst $transformbasename |
+ sed $transformarg`$transformbasename
+ fi
+
+# don't allow the sed command to completely eliminate the filename
+
+ if [ x"$dstfile" = x ]
+ then
+ dstfile=`basename $dst`
+ else
+ true
+ fi
+
+# Make a temp file name in the proper directory.
+
+ dsttmp=$dstdir/#inst.$$#
+
+# Move or copy the file name to the temp name
+
+ $doit $instcmd $src $dsttmp &&
+
+ trap "rm -f ${dsttmp}" 0 &&
+
+# and set any options; do chmod last to preserve setuid bits
+
+# If any of these fail, we abort the whole thing. If we want to
+# ignore errors from any of these, just make sure not to ignore
+# errors from the above "$doit $instcmd $src $dsttmp" command.
+
+ if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
+ if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
+ if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
+ if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
+
+# Now rename the file to the real destination.
+
+ $doit $rmcmd -f $dstdir/$dstfile &&
+ $doit $mvcmd $dsttmp $dstdir/$dstfile
+
+fi &&
+
+
+exit 0
diff --git a/int.h b/int.h
new file mode 100644
index 0000000..46e865d
--- /dev/null
+++ b/int.h
@@ -0,0 +1,22 @@
+/* Stuff specific to the general (integer) version of the Reed-Solomon codecs
+ *
+ * Copyright 2003, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+typedef unsigned int data_t;
+
+#define MODNN(x) modnn(rs,x)
+
+#define MM (rs->mm)
+#define NN (rs->nn)
+#define ALPHA_TO (rs->alpha_to)
+#define INDEX_OF (rs->index_of)
+#define GENPOLY (rs->genpoly)
+#define NROOTS (rs->nroots)
+#define FCR (rs->fcr)
+#define PRIM (rs->prim)
+#define IPRIM (rs->iprim)
+#define PAD (rs->pad)
+#define A0 (NN)
+
+
diff --git a/lesser.txt b/lesser.txt
new file mode 100644
index 0000000..b1e3f5a
--- /dev/null
+++ b/lesser.txt
@@ -0,0 +1,504 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/makefile.in b/makefile.in
new file mode 100644
index 0000000..53fdfcb
--- /dev/null
+++ b/makefile.in
@@ -0,0 +1,242 @@
+# Makefile prototype for configure
+# Copyright 2004 Phil Karn, KA9Q
+# May be used under the terms of the GNU Lesser General Public License (LGPL)
+
+# @configure_input@
+srcdir = @srcdir@
+prefix = @prefix@
+exec_prefix=@exec_prefix@
+VPATH = @srcdir@
+CC=@CC@
+LIBS=@MLIBS@ fec.o sim.o viterbi27.o viterbi27_port.o viterbi29.o viterbi29_port.o \
+ viterbi39.o viterbi39_port.o \
+ viterbi615.o viterbi615_port.o encode_rs_char.o encode_rs_int.o encode_rs_8.o \
+ decode_rs_char.o decode_rs_int.o decode_rs_8.o \
+ init_rs_char.o init_rs_int.o ccsds_tab.o \
+ encode_rs_ccsds.o decode_rs_ccsds.o ccsds_tal.o \
+ dotprod.o dotprod_port.o \
+ peakval.o peakval_port.o \
+ sumsq.o sumsq_port.o
+
+CFLAGS=@CFLAGS@ -I. -Wall @ARCH_OPTION@
+
+SHARED_LIB=@SH_LIB@
+
+all: libfec.a $(SHARED_LIB)
+
+test: vtest27 vtest29 vtest39 vtest615 rstest dtest sumsq_test peaktest
+ @echo "Correctness tests:"
+ ./vtest27 -e 3.0 -n 1000 -v
+ ./vtest29 -e 2.5 -n 1000 -v
+ ./vtest39 -e 2.5 -n 1000 -v
+ ./vtest615 -e 1.0 -n 100 -v
+ ./rstest
+ ./dtest
+ ./sumsq_test
+ ./peaktest
+ @echo "Speed tests:"
+ ./vtest27
+ ./vtest29
+ ./vtest39
+ ./vtest615
+
+install: all
+ mkdir -p @libdir@
+ install -m 644 -p $(SHARED_LIB) libfec.a @libdir@
+# (cd @libdir@;ln -f -s $(SHARED_LIB) libfec.so)
+ @REBIND@
+ mkdir -p @includedir@
+ install -m 644 -p fec.h @includedir@
+ mkdir -m 0755 -p @mandir@/man3
+ install -m 644 -p simd-viterbi.3 rs.3 dsp.3 @mandir@/man3
+
+peaktest: peaktest.o libfec.a
+ gcc -g -o $@ $^
+
+sumsq_test: sumsq_test.o libfec.a
+ gcc -g -o $@ $^
+
+dtest: dtest.o libfec.a
+ gcc -g -o $@ $^ -lm
+
+vtest27: vtest27.o libfec.a
+ gcc -g -o $@ $^ -lm
+
+vtest29: vtest29.o libfec.a
+ gcc -g -o $@ $^ -lm
+
+vtest39: vtest39.o libfec.a
+ gcc -g -o $@ $^ -lm
+
+vtest615: vtest615.o libfec.a
+ gcc -g -o $@ $^ -lm
+
+rstest: rstest.o libfec.a
+ gcc -g -o $@ $^
+
+rs_speedtest: rs_speedtest.o libfec.a
+ gcc -g -o $@ $^
+
+# for some reason, the test programs without args segfault on the PPC with -O2 optimization. Dunno why - compiler bug?
+vtest27.o: vtest27.c fec.h
+ gcc -g -c $<
+
+vtest29.o: vtest29.c fec.h
+ gcc -g -c $<
+
+vtest39.o: vtest39.c fec.h
+ gcc -g -c $<
+
+vtest615.o: vtest615.c fec.h
+ gcc -g -c $<
+
+libfec.a: $(LIBS)
+ ar rv $@ $^
+ ranlib libfec.a
+
+# for Darwin
+libfec.dylib: $(LIBS)
+ $(CC) -dynamiclib -install_name $@ -o $@ $^
+
+# for Linux et al
+libfec.so: $(LIBS)
+ gcc -shared -Xlinker -soname=$@ -o $@ -Wl,-whole-archive $^ -Wl,-no-whole-archive -lc
+
+dotprod.o: dotprod.c fec.h
+
+dotprod_port.o: dotprod_port.c fec.h
+
+viterbi27.o: viterbi27.c fec.h
+
+viterbi27_port.o: viterbi27_port.c fec.h
+
+viterbi29.o: viterbi29.c fec.h
+
+viterbi39.o: viterbi39.c fec.h
+
+viterbi39_port.o: viterbi39_port.c fec.h
+
+viterbi39_sse2.o: viterbi39_sse2.c fec.h
+
+viterbi39_sse.o: viterbi39_sse.c fec.h
+
+viterbi39_mmx.o: viterbi39_mmx.c fec.h
+
+encode_rs_char.o: encode_rs_char.c char.h rs-common.h
+
+encode_rs_int.o: encode_rs_int.c int.h rs-common.h
+
+encode_rs_8.o: encode_rs_8.c fixed.h
+
+encode_rs_av.o: encode_rs_av.c fixed.h
+
+decode_rs_char.o: decode_rs_char.c char.h rs-common.h
+
+decode_rs_int.o: decode_rs_int.c int.h rs-common.h
+
+decode_rs_8.o: decode_rs_8.c fixed.h
+
+init_rs_char.o: init_rs_char.c char.h rs-common.h
+
+init_rs_int.o: init_rs_int.c int.h rs-common.h
+
+ccsds_tab.o: ccsds_tab.c
+
+ccsds_tab.c: gen_ccsds
+ ./gen_ccsds > ccsds_tab.c
+
+gen_ccsds: gen_ccsds.o init_rs_char.o
+ gcc -o $@ $^
+
+gen_ccsds.o: gen_ccsds.c
+ gcc $(CFLAGS) -c -o $@ $<
+
+ccsds_tal.o: ccsds_tal.c
+
+ccsds_tal.c: gen_ccsds_tal
+ ./gen_ccsds_tal > ccsds_tal.c
+
+exercise_char.o: exercise.c
+ gcc $(CFLAGS) -c -o $@ $<
+
+exercise_int.o: exercise.c
+ gcc -DBIGSYM=1 $(CFLAGS) -c -o $@ $<
+
+exercise_8.o: exercise.c
+ gcc -DFIXED=1 $(CFLAGS) -c -o $@ $<
+
+exercise_ccsds.o: exercise.c
+ gcc -DCCSDS=1 $(CFLAGS) -c -o $@ $<
+
+viterbi27.o: viterbi27.c fec.h
+
+viterbi27_port.o: viterbi27_port.c fec.h
+
+viterbi27_av.o: viterbi27_av.c fec.h
+
+viterbi27_mmx.o: viterbi27_mmx.c fec.h
+ gcc $(CFLAGS) -mmmx -c -o $@ $<
+
+viterbi27_sse.o: viterbi27_sse.c fec.h
+ gcc $(CFLAGS) -msse -c -o $@ $<
+
+viterbi27_sse2.o: viterbi27_sse2.c fec.h
+ gcc $(CFLAGS) -msse2 -c -o $@ $<
+
+viterbi29.o: viterbi29.c fec.h
+
+viterbi29_port.o: viterbi29_port.c fec.h
+
+viterbi29_av.o: viterbi29_av.c fec.h
+
+viterbi29_mmx.o: viterbi29_mmx.c fec.h
+ gcc $(CFLAGS) -mmmx -c -o $@ $<
+
+viterbi29_sse.o: viterbi29_sse.c fec.h
+ gcc $(CFLAGS) -msse -c -o $@ $<
+
+viterbi29_sse2.o: viterbi29_sse2.c fec.h
+ gcc $(CFLAGS) -msse2 -c -o $@ $<
+
+viterbi39.o: viterbi39.c fec.h
+
+viterbi39_port.o: viterbi39_port.c fec.h
+
+viterbi39_av.o: viterbi39_av.c fec.h
+
+viterbi39_mmx.o: viterbi39_mmx.c fec.h
+ gcc $(CFLAGS) -mmmx -c -o $@ $<
+
+viterbi39_sse.o: viterbi39_sse.c fec.h
+ gcc $(CFLAGS) -msse -c -o $@ $<
+
+viterbi39_sse2.o: viterbi39_sse2.c fec.h
+ gcc $(CFLAGS) -msse2 -c -o $@ $<
+
+viterbi615.o: viterbi615.c fec.h
+
+viterbi615_port.o: viterbi615_port.c fec.h
+
+viterbi615_av.o: viterbi615_av.c fec.h
+
+viterbi615_mmx.o: viterbi615_mmx.c fec.h
+ gcc $(CFLAGS) -mmmx -c -o $@ $<
+
+viterbi615_sse.o: viterbi615_sse.c fec.h
+ gcc $(CFLAGS) -msse -c -o $@ $<
+
+viterbi615_sse2.o: viterbi615_sse2.c fec.h
+ gcc $(CFLAGS) -msse2 -c -o $@ $<
+
+cpu_mode_x86.o: cpu_mode_x86.c fec.h
+
+cpu_mode_ppc.o: cpu_mode_ppc.c fec.h
+
+
+clean:
+ rm -f *.o $(SHARED_LIB) *.a rs_speedtest peaktest sumsq_test dtest vtest27 vtest29 vtest39 vtest615 rstest ccsds_tab.c ccsds_tal.c gen_ccsds gen_ccsds_tal core
+ rm -rf autom4te.cache
+
+distclean: clean
+ rm -f config.log config.cache config.status config.h makefile
+
diff --git a/mmxbfly27.s b/mmxbfly27.s
new file mode 100644
index 0000000..4abbf48
--- /dev/null
+++ b/mmxbfly27.s
@@ -0,0 +1,148 @@
+/* Intel SIMD MMX implementation of Viterbi ACS butterflies
+ for 64-state (k=7) convolutional code
+ Copyright 2004 Phil Karn, KA9Q
+ This code may be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ;
+*/
+ # MMX (64-bit SIMD) version
+ # requires Pentium-MMX, Pentium-II or better
+
+ # These are offsets into struct v27, defined in viterbi27_mmx.c
+ .set DP,128
+ .set OLDMETRICS,132
+ .set NEWMETRICS,136
+ .text
+ .global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2
+ .type update_viterbi27_blk_mmx,@function
+ .align 16
+
+update_viterbi27_blk_mmx:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %edx
+ pushl %ebx
+
+ movl 8(%ebp),%edx # edx = vp
+ testl %edx,%edx
+ jnz 0f
+ movl -1,%eax
+ jmp err
+0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
+ movl NEWMETRICS(%edx),%edi # edi -> new metrics
+ movl DP(%edx),%edx # edx -> decisions
+
+1: movl 16(%ebp),%eax # eax = nbits
+ decl %eax
+ jl 2f # passed zero, we're done
+ movl %eax,16(%ebp)
+
+ movl 12(%ebp),%ebx # ebx = syms
+ movw (%ebx),%ax # ax = second symbol : first symbol
+ addl $2,%ebx
+ movl %ebx,12(%ebp)
+
+ movb %ah,%bl
+ andl $255,%eax
+ andl $255,%ebx
+
+ # shift into first array index dimension slot
+ shll $5,%eax
+ shll $5,%ebx
+
+ # each invocation of this macro will do 8 butterflies in parallel
+ .MACRO butterfly GROUP
+ # Compute branch metrics
+ movq (Mettab27_1+8*\GROUP)(%eax),%mm3
+ movq fifteens,%mm0
+
+ paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3
+ paddb ones,%mm3 # emulate pavgb - this may not be necessary
+ psrlq $1,%mm3
+ pand %mm0,%mm3
+
+ movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0
+ movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1
+ movq %mm6,%mm1
+ movq %mm2,%mm7
+
+ paddb %mm3,%mm6
+ paddb %mm3,%mm2
+ pxor %mm0,%mm3 # invert branch metric
+ paddb %mm3,%mm7 # path metric for inverted symbols
+ paddb %mm3,%mm1
+
+ # live registers 1 2 6 7
+ # Compare mm6 and mm7; mm1 and mm2
+ pxor %mm3,%mm3
+ movq %mm6,%mm4
+ movq %mm1,%mm5
+ psubb %mm7,%mm4 # mm4 = mm6 - mm7
+ psubb %mm2,%mm5 # mm5 = mm1 - mm2
+ pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better)
+ pcmpgtb %mm3,%mm5 # mm5 = second set of decisions
+
+ # live registers 1 2 4 5 6 7
+ # select survivors
+ movq %mm4,%mm0
+ pand %mm4,%mm7
+ movq %mm5,%mm3
+ pand %mm5,%mm2
+ pandn %mm6,%mm0
+ pandn %mm1,%mm3
+ por %mm0,%mm7 # mm7 = first set of survivors
+ por %mm3,%mm2 # mm2 = second set of survivors
+
+ # live registers 2 4 5 7
+ # interleave & store decisions in mm4, mm5
+ # interleave & store new branch metrics in mm2, mm7
+ movq %mm4,%mm3
+ movq %mm7,%mm0
+ punpckhbw %mm5,%mm4
+ punpcklbw %mm5,%mm3
+ punpcklbw %mm2,%mm7 # interleave second 8 new metrics
+ punpckhbw %mm2,%mm0 # interleave first 8 new metrics
+ movq %mm4,(16*\GROUP+8)(%edx)
+ movq %mm3,(16*\GROUP)(%edx)
+ movq %mm7,(16*\GROUP)(%edi)
+ movq %mm0,(16*\GROUP+8)(%edi)
+
+ .endm
+
+# invoke macro 4 times for a total of 32 butterflies
+ butterfly GROUP=0
+ butterfly GROUP=1
+ butterfly GROUP=2
+ butterfly GROUP=3
+
+ addl $64,%edx # bump decision pointer
+
+ # swap metrics
+ movl %esi,%eax
+ movl %edi,%esi
+ movl %eax,%edi
+ jmp 1b
+
+2: emms
+ movl 8(%ebp),%ebx # ebx = vp
+ # stash metric pointers
+ movl %esi,OLDMETRICS(%ebx)
+ movl %edi,NEWMETRICS(%ebx)
+ movl %edx,DP(%ebx) # stash incremented value of vp->dp
+ xorl %eax,%eax
+err: popl %ebx
+ popl %edx
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+
+ .data
+ .align 8
+fifteens:
+ .byte 15,15,15,15,15,15,15,15
+
+ .align 8
+ones: .byte 1,1,1,1,1,1,1,1
diff --git a/mmxbfly29.s b/mmxbfly29.s
new file mode 100644
index 0000000..e37cab8
--- /dev/null
+++ b/mmxbfly29.s
@@ -0,0 +1,161 @@
+/* Intel SIMD MMX implementation of Viterbi ACS butterflies
+ for 256-state (k=9) convolutional code
+ Copyright 2004 Phil Karn, KA9Q
+ This code may be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ void update_viterbi29_blk_mmx(struct v29 *vp,unsigned char *syms,int nbits);
+*/
+
+ # These are offsets into struct v29, defined in viterbi29.h
+ .set DP,512
+ .set OLDMETRICS,516
+ .set NEWMETRICS,520
+ .text
+ .global update_viterbi29_blk_mmx,Mettab29_1,Mettab29_2
+ .type update_viterbi29_blk_mmx,@function
+ .align 16
+
+ # MMX (64-bit SIMD) version
+ # requires Pentium-MMX, Pentium-II or better
+
+update_viterbi29_blk_mmx:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %edx
+ pushl %ebx
+
+ movl 8(%ebp),%edx # edx = vp
+ movl 8(%ebp),%edx # edx = vp
+ testl %edx,%edx
+ jnz 0f
+ movl -1,%eax
+ jmp err
+0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
+ movl NEWMETRICS(%edx),%edi # edi -> new metrics
+ movl DP(%edx),%edx # edx -> decisions
+
+1: movl 16(%ebp),%eax # eax = nbits
+ decl %eax
+ jl 2f # passed zero, we're done
+ movl %eax,16(%ebp)
+
+ movl 12(%ebp),%ebx # ebx = syms
+ movw (%ebx),%ax # ax = second symbol : first symbol
+ addl $2,%ebx
+ movl %ebx,12(%ebp)
+
+ movb %ah,%bl
+ andl $255,%eax
+ andl $255,%ebx
+
+ # shift into first array index dimension slot
+ shll $7,%eax
+ shll $7,%ebx
+
+ # each invocation of this macro will do 8 butterflies in parallel
+ .MACRO butterfly GROUP
+ # Compute branch metrics
+ movq (Mettab29_1+8*\GROUP)(%eax),%mm3
+ movq fifteens,%mm0
+ paddb (Mettab29_2+8*\GROUP)(%ebx),%mm3
+ paddb ones,%mm3 # emulate pavgb - this may not be necessary
+ psrlq $1,%mm3
+ pand %mm0,%mm3
+
+ movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0
+ movq ((8*\GROUP)+128)(%esi),%mm2 # Incoming path metric, high bit = 1
+ movq %mm6,%mm1
+ movq %mm2,%mm7
+
+ paddb %mm3,%mm6
+ paddb %mm3,%mm2
+ pxor %mm0,%mm3 # invert branch metric
+ paddb %mm3,%mm7 # path metric for inverted symbols
+ paddb %mm3,%mm1
+
+ # live registers 1 2 6 7
+ # Compare mm6 and mm7; mm1 and mm2
+ pxor %mm3,%mm3
+ movq %mm6,%mm4
+ movq %mm1,%mm5
+ psubb %mm7,%mm4 # mm4 = mm6 - mm7
+ psubb %mm2,%mm5 # mm5 = mm1 - mm2
+ pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better)
+ pcmpgtb %mm3,%mm5 # mm5 = second set of decisions
+
+ # live registers 1 2 4 5 6 7
+ # select survivors
+ movq %mm4,%mm0
+ pand %mm4,%mm7
+ movq %mm5,%mm3
+ pand %mm5,%mm2
+ pandn %mm6,%mm0
+ pandn %mm1,%mm3
+ por %mm0,%mm7 # mm7 = first set of survivors
+ por %mm3,%mm2 # mm2 = second set of survivors
+
+ # live registers 2 4 5 7
+ # interleave & store decisions in mm4, mm5
+ # interleave & store new branch metrics in mm2, mm7
+ movq %mm4,%mm3
+ movq %mm7,%mm0
+ punpckhbw %mm5,%mm4
+ punpcklbw %mm5,%mm3
+ punpcklbw %mm2,%mm7 # interleave second 8 new metrics
+ punpckhbw %mm2,%mm0 # interleave first 8 new metrics
+ movq %mm4,(16*\GROUP+8)(%edx)
+ movq %mm3,(16*\GROUP)(%edx)
+ movq %mm7,(16*\GROUP)(%edi)
+ movq %mm0,(16*\GROUP+8)(%edi)
+
+ .endm
+
+# invoke macro 16 times for a total of 128 butterflies
+ butterfly GROUP=0
+ butterfly GROUP=1
+ butterfly GROUP=2
+ butterfly GROUP=3
+ butterfly GROUP=4
+ butterfly GROUP=5
+ butterfly GROUP=6
+ butterfly GROUP=7
+ butterfly GROUP=8
+ butterfly GROUP=9
+ butterfly GROUP=10
+ butterfly GROUP=11
+ butterfly GROUP=12
+ butterfly GROUP=13
+ butterfly GROUP=14
+ butterfly GROUP=15
+
+ addl $256,%edx # bump decision pointer
+
+ # swap metrics
+ movl %esi,%eax
+ movl %edi,%esi
+ movl %eax,%edi
+ jmp 1b
+
+2: emms
+ movl 8(%ebp),%ebx # ebx = vp
+ # stash metric pointers
+ movl %esi,OLDMETRICS(%ebx)
+ movl %edi,NEWMETRICS(%ebx)
+ movl %edx,DP(%ebx) # stash incremented value of vp->dp
+ xorl %eax,%eax
+err: popl %ebx
+ popl %edx
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+
+ .data
+ .align 8
+fifteens:
+ .byte 15,15,15,15,15,15,15,15
+
+ .align 8
+ones: .byte 1,1,1,1,1,1,1,1
diff --git a/peak_mmx_assist.s b/peak_mmx_assist.s
new file mode 100644
index 0000000..dae831f
--- /dev/null
+++ b/peak_mmx_assist.s
@@ -0,0 +1,70 @@
+# MMX assist routines for peakval
+# Copyright 2001 Phil Karn, KA9Q
+# May be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ .text
+
+# Find peak value in signed 16-bit input samples
+# int peakval_mmx(signed short *in,int cnt);
+ .global peakval_mmx
+ .type peakval_mmx,@function
+ .align 16
+peakval_mmx:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %ecx
+ pushl %ebx
+
+ movl 8(%ebp),%esi
+ movl 12(%ebp),%ecx
+
+ pxor %mm7,%mm7 # clear peak
+
+1: subl $4,%ecx
+ jl 2f
+ movq (%esi),%mm0
+ movq %mm0,%mm1
+ psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive
+ pxor %mm1,%mm0 # complement negatives
+ psubw %mm1,%mm0 # add 1 to negatives
+ movq %mm7,%mm6 # copy previous peak
+ pcmpgtw %mm0,%mm6 # ff == old peak greater
+ pand %mm6,%mm7 # select old peaks that are greater
+ pandn %mm0,%mm6 # select new values that are greater
+ por %mm6,%mm7
+
+ addl $8,%esi
+ jmp 1b
+
+2: movd %mm7,%eax
+ psrlq $16,%mm7
+ andl $0xffff,%eax
+
+ movd %mm7,%edx
+ psrlq $16,%mm7
+ andl $0xffff,%edx
+ cmpl %edx,%eax
+ jnl 3f
+ movl %edx,%eax
+3:
+ movd %mm7,%edx
+ psrlq $16,%mm7
+ andl $0xffff,%edx
+ cmpl %edx,%eax
+ jnl 4f
+ movl %edx,%eax
+4:
+ movd %mm7,%edx
+ andl $0xffff,%edx
+ cmpl %edx,%eax
+ jnl 5f
+ movl %edx,%eax
+5:
+ emms
+ popl %ebx
+ popl %ecx
+ popl %esi
+ popl %ebp
+ ret
+
diff --git a/peak_sse2_assist.s b/peak_sse2_assist.s
new file mode 100644
index 0000000..1dee3a8
--- /dev/null
+++ b/peak_sse2_assist.s
@@ -0,0 +1,51 @@
+# SSE2 assist routines for peakval
+# Copyright 2001 Phil Karn, KA9Q
+# May be used under the terms of the GNU Public License (GPL)
+
+ .text
+
+# Find peak absolute value in signed 16-bit input samples
+# int peakval_sse2(signed short *in,int cnt);
+ .global peakval_sse2
+ .type peakval_sse2,@function
+ .align 16
+peakval_sse2:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %ecx
+
+ movl 8(%ebp),%esi
+ movl 12(%ebp),%ecx
+
+ pxor %xmm7,%xmm7 # clear peak
+
+1: subl $8,%ecx
+ jl 2f
+ movaps (%esi),%xmm0
+ movaps %xmm0,%xmm1
+ psraw $15,%xmm1 # xmm1 = 1's if negative, 0's if positive
+ pxor %xmm1,%xmm0 # complement negatives
+ psubw %xmm1,%xmm0 # add 1 to negatives
+ pmaxsw %xmm0,%xmm7 # store peak
+
+ addl $16,%esi
+ jmp 1b
+
+2: movaps %xmm7,%xmm0
+ psrldq $8,%xmm0
+ pmaxsw %xmm0,%xmm7
+ movaps %xmm7,%xmm0
+ psrlq $32,%xmm0
+ pmaxsw %xmm0,%xmm7
+ movaps %xmm7,%xmm0
+ psrlq $16,%xmm0
+ pmaxsw %xmm0,%xmm7 # min value in low word of %xmm7
+
+ movd %xmm7,%eax
+ andl $0xffff,%eax
+
+ popl %ecx
+ popl %esi
+ popl %ebp
+ ret
diff --git a/peak_sse_assist.s b/peak_sse_assist.s
new file mode 100644
index 0000000..ea6fce8
--- /dev/null
+++ b/peak_sse_assist.s
@@ -0,0 +1,49 @@
+# SSE assist routines for peakval
+# Copyright 2001 Phil Karn, KA9Q
+# May be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ .text
+
+# Find peak absolute value in signed 16-bit input samples
+# int peakval_sse(signed short *in,int cnt);
+ .global peakval_sse
+ .type peakval_sse,@function
+ .align 16
+peakval_sse:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %ecx
+
+ movl 8(%ebp),%esi
+ movl 12(%ebp),%ecx
+
+ pxor %mm7,%mm7 # clear peak
+
+1: subl $4,%ecx
+ jl 2f
+ movq (%esi),%mm0
+ movq %mm0,%mm1
+ psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive
+ pxor %mm1,%mm0 # complement negatives
+ psubw %mm1,%mm0 # add 1 to negatives
+ pmaxsw %mm0,%mm7 # store peak
+
+ addl $8,%esi
+ jmp 1b
+
+2: movq %mm7,%mm0
+ psrlq $32,%mm0
+ pmaxsw %mm0,%mm7
+ movq %mm7,%mm0
+ psrlq $16,%mm0
+ pmaxsw %mm0,%mm7 # min value in low word of %mm7
+
+ movd %mm7,%eax
+ andl $0xffff,%eax
+
+ emms
+ popl %ecx
+ popl %esi
+ popl %ebp
+ ret
diff --git a/peaktest.c b/peaktest.c
new file mode 100644
index 0000000..fa4b280
--- /dev/null
+++ b/peaktest.c
@@ -0,0 +1,38 @@
+/* Verify correctness of the peak routine
+ * Copyright 2004 Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+/* These values should trigger leading/trailing array fragment handling */
+#define NSAMP 200002
+#define OFFSET 1
+
+int peakval(signed short *,int);
+int peakval_port(signed short *,int);
+
+int main(){
+ int i,s;
+ int result,rresult;
+ signed short samples[NSAMP];
+
+ srandom(time(NULL));
+
+ for(i=0;i<NSAMP;i++){
+ do {
+ s = random() & 0x0fff;
+ } while(s == 0x8000);
+ samples[i] = s;
+ }
+ samples[5] = 25000;
+
+ rresult = peakval_port(&samples[OFFSET],NSAMP-OFFSET);
+ result = peakval(&samples[OFFSET],NSAMP-OFFSET);
+ if(result == rresult){
+ printf("OK\n");
+ } else {
+ printf("peak mismatch: %d != %d\n",result,rresult);
+ }
+ exit(0);
+}
diff --git a/peakval.c b/peakval.c
new file mode 100644
index 0000000..811a3a9
--- /dev/null
+++ b/peakval.c
@@ -0,0 +1,39 @@
+/* Switch to appropriate version of peakval routine
+ * Copyright 2004, Phil Karn, KA9Q
+ */
+
+#include <stdlib.h>
+#include "fec.h"
+
+int peakval_port(signed short *b,int cnt);
+#ifdef __i386__
+int peakval_mmx(signed short *b,int cnt);
+int peakval_sse(signed short *b,int cnt);
+int peakval_sse2(signed short *b,int cnt);
+#endif
+
+#ifdef __VEC__
+int peakval_av(signed short *b,int cnt);
+#endif
+
+int peakval(signed short *b,int cnt){
+ find_cpu_mode();
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return peakval_port(b,cnt);
+#ifdef __i386__
+ case MMX:
+ return peakval_mmx(b,cnt);
+ case SSE:
+ return peakval_sse(b,cnt);
+ case SSE2:
+ return peakval_sse2(b,cnt);
+#endif
+#ifdef __VEC__
+ case ALTIVEC:
+ return peakval_av(b,cnt);
+#endif
+ }
+}
diff --git a/peakval_av.c b/peakval_av.c
new file mode 100644
index 0000000..ae54c10
--- /dev/null
+++ b/peakval_av.c
@@ -0,0 +1,61 @@
+/* Return the largest absolute value of a vector of signed shorts
+
+ * This is the Altivec SIMD version.
+
+ * Copyright 2004 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+#include "fec.h"
+
+signed short peakval_av(signed short *in,int cnt){
+ vector signed short x;
+ int pad;
+ union { vector signed char cv; vector signed short hv; signed short s[8]; signed char c[16];} s;
+ vector signed short smallest,largest;
+
+ smallest = (vector signed short)(0);
+ largest = (vector signed short)(0);
+ if((pad = (int)in & 15)!=0){
+ /* Load unaligned leading word */
+ x = vec_perm(vec_ld(0,in),(vector signed short)(0),vec_lvsl(0,in));
+ if(cnt < 8){ /* Shift right to chop stuff beyond end of short block */
+ s.c[15] = (8-cnt)<<4;
+ x = vec_sro(x,s.cv);
+ }
+ smallest = vec_min(smallest,x);
+ largest = vec_max(largest,x);
+ in += 8-pad/2;
+ cnt -= 8-pad/2;
+ }
+ /* Everything is now aligned, rip through most of the block */
+ while(cnt >= 8){
+ x = vec_ld(0,in);
+ smallest = vec_min(smallest,x);
+ largest = vec_max(largest,x);
+ in += 8;
+ cnt -= 8;
+ }
+ /* Handle trailing fragment, if any */
+ if(cnt > 0){
+ x = vec_ld(0,in);
+ s.c[15] = (8-cnt)<<4;
+ x = vec_sro(x,s.cv);
+ smallest = vec_min(smallest,x);
+ largest = vec_max(largest,x);
+ }
+ /* Combine and extract result */
+ largest = vec_max(largest,vec_abs(smallest));
+
+ s.c[15] = 64; /* Shift right four 16-bit words */
+ largest = vec_max(largest,vec_sro(largest,s.cv));
+
+ s.c[15] = 32; /* Shift right two 16-bit words */
+ largest = vec_max(largest,vec_sro(largest,s.cv));
+
+ s.c[15] = 16; /* Shift right one 16-bit word */
+ largest = vec_max(largest,vec_sro(largest,s.cv));
+
+ s.hv = largest;
+ return s.s[7];
+}
diff --git a/peakval_mmx.c b/peakval_mmx.c
new file mode 100644
index 0000000..436fe88
--- /dev/null
+++ b/peakval_mmx.c
@@ -0,0 +1,34 @@
+/* Wrapper for the MMX version of peakval
+ * Copyright 2004 Phil Karn, KA9Q
+ */
+
+#include <stdlib.h>
+
+int peakval_mmx_assist(signed short *,int);
+
+int peakval_mmx(signed short *b,int cnt){
+ int peak = 0;
+ int a;
+
+ while(((int)b & 7) != 0 && cnt != 0){
+ a = abs(*b);
+ if(a > peak)
+ peak = a;
+ b++;
+ cnt--;
+ }
+ a = peakval_mmx_assist(b,cnt);
+ if(a > peak)
+ peak = a;
+ b += cnt & ~3;
+ cnt &= 3;
+
+ while(cnt != 0){
+ a = abs(*b);
+ if(a > peak)
+ peak = a;
+ b++;
+ cnt--;
+ }
+ return peak;
+}
diff --git a/peakval_mmx_assist.s b/peakval_mmx_assist.s
new file mode 100644
index 0000000..553cb79
--- /dev/null
+++ b/peakval_mmx_assist.s
@@ -0,0 +1,70 @@
+# MMX assist routines for peakval
+# Copyright 2001 Phil Karn, KA9Q
+# May be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ .text
+
+# Find peak value in signed 16-bit input samples
+# int peakval_mmx_assist(signed short *in,int cnt);
+ .global peakval_mmx_assist
+ .type peakval_mmx_assist,@function
+ .align 16
+peakval_mmx_assist:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %ecx
+ pushl %ebx
+
+ movl 8(%ebp),%esi
+ movl 12(%ebp),%ecx
+
+ pxor %mm7,%mm7 # clear peak
+
+1: subl $4,%ecx
+ jl 2f
+ movq (%esi),%mm0
+ movq %mm0,%mm1
+ psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive
+ pxor %mm1,%mm0 # complement negatives
+ psubw %mm1,%mm0 # add 1 to negatives
+ movq %mm7,%mm6 # copy previous peak
+ pcmpgtw %mm0,%mm6 # ff == old peak greater
+ pand %mm6,%mm7 # select old peaks that are greater
+ pandn %mm0,%mm6 # select new values that are greater
+ por %mm6,%mm7
+
+ addl $8,%esi
+ jmp 1b
+
+2: movd %mm7,%eax
+ psrlq $16,%mm7
+ andl $0xffff,%eax
+
+ movd %mm7,%edx
+ psrlq $16,%mm7
+ andl $0xffff,%edx
+ cmpl %edx,%eax
+ jnl 3f
+ movl %edx,%eax
+3:
+ movd %mm7,%edx
+ psrlq $16,%mm7
+ andl $0xffff,%edx
+ cmpl %edx,%eax
+ jnl 4f
+ movl %edx,%eax
+4:
+ movd %mm7,%edx
+ andl $0xffff,%edx
+ cmpl %edx,%eax
+ jnl 5f
+ movl %edx,%eax
+5:
+ emms
+ popl %ebx
+ popl %ecx
+ popl %esi
+ popl %ebp
+ ret
+
diff --git a/peakval_port.c b/peakval_port.c
new file mode 100644
index 0000000..07ab316
--- /dev/null
+++ b/peakval_port.c
@@ -0,0 +1,16 @@
+/* Portable C version of peakval
+ * Copyright 2004 Phil Karn, KA9Q
+ */
+#include <stdlib.h>
+#include "fec.h"
+int peakval_port(signed short *b,int len){
+ int peak = 0;
+ int a,i;
+
+ for(i=0;i<len;i++){
+ a = abs(b[i]);
+ if(a > peak)
+ peak = a;
+ }
+ return peak;
+}
diff --git a/peakval_sse.c b/peakval_sse.c
new file mode 100644
index 0000000..9868b7f
--- /dev/null
+++ b/peakval_sse.c
@@ -0,0 +1,35 @@
+/* IA-32 SSE version of peakval
+ * Copyright 2004 Phil Karn, KA9Q
+ */
+
+#include <stdlib.h>
+#include "fec.h"
+
+int peakval_sse_assist(signed short *,int);
+
+int peakval_sse(signed short *b,int cnt){
+ int peak = 0;
+ int a;
+
+ while(((int)b & 7) != 0 && cnt != 0){
+ a = abs(*b);
+ if(a > peak)
+ peak = a;
+ b++;
+ cnt--;
+ }
+ a = peakval_sse_assist(b,cnt);
+ if(a > peak)
+ peak = a;
+ b += cnt & ~3;
+ cnt &= 3;
+
+ while(cnt != 0){
+ a = abs(*b);
+ if(a > peak)
+ peak = a;
+ b++;
+ cnt--;
+ }
+ return peak;
+}
diff --git a/peakval_sse2.c b/peakval_sse2.c
new file mode 100644
index 0000000..79d9059
--- /dev/null
+++ b/peakval_sse2.c
@@ -0,0 +1,34 @@
+/* Portable C version of peakval
+ * Copyright 2004 Phil Karn, KA9Q
+ */
+#include <stdlib.h>
+#include "fec.h"
+
+int peakval_sse2_assist(signed short *,int);
+
+int peakval_sse2(signed short *b,int cnt){
+ int peak = 0;
+ int a;
+
+ while(((int)b & 15) != 0 && cnt != 0){
+ a = abs(*b);
+ if(a > peak)
+ peak = a;
+ b++;
+ cnt--;
+ }
+ a = peakval_sse2_assist(b,cnt);
+ if(a > peak)
+ peak = a;
+ b += cnt & ~7;
+ cnt &= 7;
+
+ while(cnt != 0){
+ a = abs(*b);
+ if(a > peak)
+ peak = a;
+ b++;
+ cnt--;
+ }
+ return peak;
+}
diff --git a/peakval_sse2_assist.s b/peakval_sse2_assist.s
new file mode 100644
index 0000000..c7a58e7
--- /dev/null
+++ b/peakval_sse2_assist.s
@@ -0,0 +1,51 @@
+# SSE2 assist routines for peakval
+# Copyright 2001 Phil Karn, KA9Q
+# May be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ .text
+
+# Find peak absolute value in signed 16-bit input samples
+# int peakval_sse2_assist(signed short *in,int cnt);
+ .global peakval_sse2_assist
+ .type peakval_sse2_assist,@function
+ .align 16
+peakval_sse2_assist:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %ecx
+
+ movl 8(%ebp),%esi
+ movl 12(%ebp),%ecx
+
+ pxor %xmm7,%xmm7 # clear peak
+
+1: subl $8,%ecx
+ jl 2f
+ movaps (%esi),%xmm0
+ movaps %xmm0,%xmm1
+ psraw $15,%xmm1 # xmm1 = 1's if negative, 0's if positive
+ pxor %xmm1,%xmm0 # complement negatives
+ psubw %xmm1,%xmm0 # add 1 to negatives
+ pmaxsw %xmm0,%xmm7 # store peak
+
+ addl $16,%esi
+ jmp 1b
+
+2: movaps %xmm7,%xmm0
+ psrldq $8,%xmm0
+ pmaxsw %xmm0,%xmm7
+ movaps %xmm7,%xmm0
+ psrlq $32,%xmm0
+ pmaxsw %xmm0,%xmm7
+ movaps %xmm7,%xmm0
+ psrlq $16,%xmm0
+ pmaxsw %xmm0,%xmm7 # min value in low word of %xmm7
+
+ movd %xmm7,%eax
+ andl $0xffff,%eax
+
+ popl %ecx
+ popl %esi
+ popl %ebp
+ ret
diff --git a/peakval_sse_assist.s b/peakval_sse_assist.s
new file mode 100644
index 0000000..827c800
--- /dev/null
+++ b/peakval_sse_assist.s
@@ -0,0 +1,49 @@
+# SSE assist routines for peakval
+# Copyright 2001 Phil Karn, KA9Q
+# May be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ .text
+
+# Find peak absolute value in signed 16-bit input samples
+# int peakval_sse_assist(signed short *in,int cnt);
+ .global peakval_sse_assist
+ .type peakval_sse_assist,@function
+ .align 16
+peakval_sse_assist:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %ecx
+
+ movl 8(%ebp),%esi
+ movl 12(%ebp),%ecx
+
+ pxor %mm7,%mm7 # clear peak
+
+1: subl $4,%ecx
+ jl 2f
+ movq (%esi),%mm0
+ movq %mm0,%mm1
+ psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive
+ pxor %mm1,%mm0 # complement negatives
+ psubw %mm1,%mm0 # add 1 to negatives
+ pmaxsw %mm0,%mm7 # store peak
+
+ addl $8,%esi
+ jmp 1b
+
+2: movq %mm7,%mm0
+ psrlq $32,%mm0
+ pmaxsw %mm0,%mm7
+ movq %mm7,%mm0
+ psrlq $16,%mm0
+ pmaxsw %mm0,%mm7 # min value in low word of %mm7
+
+ movd %mm7,%eax
+ andl $0xffff,%eax
+
+ emms
+ popl %ecx
+ popl %esi
+ popl %ebp
+ ret
diff --git a/rs-common.h b/rs-common.h
new file mode 100644
index 0000000..e64eb39
--- /dev/null
+++ b/rs-common.h
@@ -0,0 +1,26 @@
+/* Stuff common to all the general-purpose Reed-Solomon codecs
+ * Copyright 2004 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+/* Reed-Solomon codec control block */
+struct rs {
+ int mm; /* Bits per symbol */
+ int nn; /* Symbols per block (= (1<<mm)-1) */
+ data_t *alpha_to; /* log lookup table */
+ data_t *index_of; /* Antilog lookup table */
+ data_t *genpoly; /* Generator polynomial */
+ int nroots; /* Number of generator roots = number of parity symbols */
+ int fcr; /* First consecutive root, index form */
+ int prim; /* Primitive element, index form */
+ int iprim; /* prim-th root of 1, index form */
+ int pad; /* Padding bytes in shortened block */
+};
+
+static inline int modnn(struct rs *rs,int x){
+ while (x >= rs->nn) {
+ x -= rs->nn;
+ x = (x >> rs->mm) + (x & rs->nn);
+ }
+ return x;
+}
diff --git a/rs.3 b/rs.3
new file mode 100644
index 0000000..5d71503
--- /dev/null
+++ b/rs.3
@@ -0,0 +1,198 @@
+.TH REED-SOLOMON 3
+.SH NAME
+init_rs_int, encode_rs_int, decode_rs_int, free_rs_int,
+init_rs_char, encode_rs_char, decode_rs_char, free_rs_char,
+encode_rs_8, decode_rs_8, encode_rs_ccsds, decode_rs_ccsds
+\- Reed-Solomon encoding/decoding
+.SH SYNOPSIS
+.nf
+.ft B
+#include "fec.h"
+
+void *init_rs_int(int symsize,int gfpoly,int fcr,int prim,
+ int nroots,int pad);
+
+void encode_rs_int(void *rs,int *data,int *parity);
+
+int decode_rs_int(void *rs,int *data,int *eras_pos,int no_eras);
+
+void free_rs_int(void *rs);
+
+
+void *init_rs_char(int symsize,int gfpoly,int fcr,int prim,
+ int nroots,int pad);
+
+void encode_rs_char(void *rs,unsigned char *data,
+ unsigned char *parity);
+
+int decode_rs_char(void *rs,unsigned char *data,int *eras_pos,
+ int no_eras);
+
+void free_rs_char(void *rs);
+
+
+void encode_rs_8(unsigned char *data,unsigned char *parity,
+ int pad);
+
+int decode_rs_8(unsigned char *data,int *eras_pos,int no_eras,
+ int pad);
+
+
+void encode_rs_ccsds(unsigned char *data,unsigned char *parity,
+ int pad);
+
+int decode_rs_ccsds(unsigned char *data,int *eras_pos,int no_eras,
+ int pad);
+
+unsigned char Taltab[256];
+unsigned char Tal1tab[256];
+
+.fi
+
+.SH DESCRIPTION
+These functions implement Reed-Solomon error control encoding and
+decoding. For optimal performance in a variety of applications, three
+sets of functions are supplied. To access these functions, add "-lfec"
+to your linker command line.
+
+The functions with names ending in \fB_int\fR handle data in integer arrays,
+permitting arbitrarily large codewords limited only by machine
+resources.
+
+The functions with names ending in \fB_char\fR take unsigned char arrays and can
+handle codes with symbols of 8 bits or less (i.e., with codewords of
+255 symbols or less).
+
+\fBencode_rs_8\fR and \fBdecode_rs_8\fR implement a specific
+(255,223) code with 8-bit symbols specified by the CCSDS:
+a field generator of 1 + X + X^2 + X^7 + X^8 and a code
+generator with first consecutive root = 112 and a primitive element of
+11. These functions use the conventional
+polynomial form, \fInot\fR the dual-basis specified in
+the CCSDS standard, to represent symbols. This code may be
+shortened by giving a non-zero \fBpad\fR value to produce a
+(255-\fBpad\fR,223-\fBpad\fR) code. The padding will consist of the
+specified number of zeroes at the front of the full codeword.
+
+For full CCSDS compatibility, \fBencode_rs_ccsds\fR and
+\fBdecode_rs_ccsds\fR are provided. These functions use two lookup
+tables, \fBTaltab\fR to convert from conventional to dual-basis, and
+\fBTal1tab\fR to perform the inverse mapping from dual-basis to
+conventional form, before and after calls to \fBencode_rs_8\fR
+and \fBdecode_rs_8\fR.
+
+The \fB_8\fR and \fB_ccsds\fR functions do not require initialization.
+
+To use the general purpose RS encoder or decoder (i.e.,
+the \fB_char\fR or \fB_int\fR versions), the user must first
+call \fBinit_rs_int\fR or \fBinit_rs_char\fR as appropriate. The
+arguments are as follows:
+
+\fBsymsize\fR gives the symbol size in bits, up to 8 for \fBinit_rs_char\fR
+or 32 for \fBinit_rs_int\fR on a machine with 32-bit ints (though such a
+huge code would exhaust memory limits on a 32-bit machine). The resulting
+Reed-Solomon code word will have 2^\fBsymsize\fR - 1 symbols,
+each containing \fBsymsize\fR bits. The codeword may be shortened with the
+\fBpad\fR parameter described below.
+
+\fBgfpoly\fR gives the extended Galois field generator polynomial coefficients,
+with the 0th coefficient in the low order bit. The polynomial
+\fImust\fR be primitive; if not, the call will fail and NULL will be
+returned.
+
+\fBfcr\fR gives, in index form, the first consecutive root of the
+Reed Solomon code generator polynomial.
+
+\fBprim\fR gives, in index form, the primitive element in the Galois field
+used to generate the Reed Solomon code generator polynomial.
+
+\fBnroots\fR gives the number of roots in the Reed Solomon code
+generator polynomial. This equals the number of parity symbols
+per code block.
+
+\fBpad\fR gives the number of leading symbols in the codeword
+that are implicitly padded to zero in a shortened code block.
+
+The resulting Reed-Solomon code has parameters (N,K), where
+N = 2^\fBsymsize\fR - \fBpad\fR - 1 and K = N-\fBnroots\fR.
+
+The \fBencode_rs_char\fR and \fBencode_rs_int\fR functions accept
+the pointer returned by \fBinit_rs_char\fR or
+\fBinit_rs_int\fR, respectively, to
+encode a block of data using the specified code.
+The input data array is expected to
+contain K symbols (of \fBsymsize\fR bits each, right justified
+in each char or int) and \fBnroots\fR parity symbols will be placed
+into the \fBparity\fR array, right justified.
+
+The \fBdecode_\fR functions correct
+the errors in a Reed-Solomon codeword of N symbols up to the capability of the code.
+An optional list of "erased" symbol indices may be given in the \fBeras_pos\fR
+array to assist the decoder; this parameter may be NULL if no erasures
+are given. The number of erased symbols must be given in the \fBno_eras\fR
+parameter.
+
+To maximize performance, the encode and decode functions perform no
+"sanity checking" of their inputs. Decoder failure may result if
+\fBeras_pos\fR contains duplicate entries, and both encoder and
+decoder will fail if an input symbol exceeds its allowable range.
+(Symbol range overflow cannot occur with the \fB_8\fR or
+\fB_ccsds\fR functions,
+or with the \fB_char\fR functions when 8-bit symbols are specified.)
+
+The decoder corrects the symbols "in place", returning the number
+of symbols in error. If the codeword is uncorrectable, -1 is returned
+and the data block is unchanged. If \fBeras_pos\fR is non-null, it is
+used to return a list of corrected symbol positions, in no particular
+order. This means that the
+array passed through this parameter \fImust\fR have at least \fBnroots\fR
+elements to prevent a possible buffer overflow.
+
+The \fBfree_rs_int\fR and \fBfree_rs_char\fR functions free the internal
+space allocated by the \fBinit_rs_int\fR and \fBinit_rs_char\fR functions,
+respecitively.
+
+The functions \fBencode_rs_8\fR and \fBdecode_rs_8\fR do not have
+corresponding \fBinit\fR and \fBfree\fR, nor do they take the
+\fBrs\fR argument accepted by the other functions as their parameters
+are statically compiled. These functions implement a code
+equivalent to calling
+
+\fBinit_rs_char\fR(8,0x187,112,11,32,pad);
+
+and using the resulting pointer with \fBencode_rs_char\fR and
+\fBdecode_rs_char\fR.
+
+.SH RETURN VALUES
+\fBinit_rs_int\fR and \fBinit_rs_char\fR return a pointer to an internal
+control structure that must be passed to the corresponding encode, decode
+and free functions. These functions return NULL on error.
+
+The \fBdecode_\fR functions return a count of corrected
+symbols, or -1 if the block was uncorrectible.
+
+.SH AUTHOR
+Phil Karn, KA9Q (karn@ka9q.net), based heavily on earlier work by Robert
+Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari Thirumoorthy
+(harit@spectra.eng.hawaii.edu). Extra improvements suggested by Detmar
+Welz (dwelz@web.de).
+
+.SH COPYRIGHT
+Copyright 2004, Phil Karn, KA9Q. May be used under the terms of the
+GNU Lesser General Public License (LGPL).
+
+.SH SEE ALSO
+CCSDS 101.0-B-6: Telemetry Channel Coding.
+http://www.ccsds.org/documents/101x0b6.pdf
+
+.SH NOTE
+CCSDS chose the "dual basis" symbol representation because it
+simplified the implementation of a Reed-Solomon encoder in dedicated
+hardware. However, this approach holds no advantages for a software
+implementation on a general purpose computer, so use of the dual basis
+is recommended only if compatibility with the CCSDS standard is needed,
+e.g., to decode data from an existing spacecraft using the CCSDS
+standard. If you just want a fast (255,223) RS codec without needing
+to interoperate with a CCSDS standard code, use \fBencode_rs_8\fR
+and \fBdecode_rs_8\fR.
+
diff --git a/rs_speedtest.c b/rs_speedtest.c
new file mode 100644
index 0000000..225f160
--- /dev/null
+++ b/rs_speedtest.c
@@ -0,0 +1,54 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include "fec.h"
+
+int main(){
+ unsigned char block[255];
+ int i;
+ void *rs;
+ struct rusage start,finish;
+ double extime;
+ int trials = 10000;
+
+ for(i=0;i<223;i++)
+ block[i] = 0x01;
+
+ rs = init_rs_char(8,0x187,112,11,32,0);
+ encode_rs_char(rs,block,&block[223]);
+
+ getrusage(RUSAGE_SELF,&start);
+ for(i=0;i<trials;i++){
+#if 0
+ block[0] ^= 0xff; /* Introduce an error */
+ block[2] ^= 0xff; /* Introduce an error */
+#endif
+ decode_rs_char(rs,block,NULL,0);
+ }
+ getrusage(RUSAGE_SELF,&finish);
+ extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec);
+
+ printf("Execution time for %d Reed-Solomon blocks using general decoder: %.2f sec\n",trials,extime);
+ printf("decoder speed: %g bits/s\n",trials*223*8/extime);
+
+
+ encode_rs_8(block,&block[223],0);
+ getrusage(RUSAGE_SELF,&start);
+ for(i=0;i<trials;i++){
+#if 0
+ block[0] ^= 0xff; /* Introduce an error */
+ block[2] ^= 0xff; /* Introduce an error */
+#endif
+ decode_rs_8(block,NULL,0,0);
+ }
+ getrusage(RUSAGE_SELF,&finish);
+ extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec);
+ printf("Execution time for %d Reed-Solomon blocks using CCSDS decoder: %.2f sec\n",trials,extime);
+ printf("decoder speed: %g bits/s\n",trials*223*8/extime);
+
+ exit(0);
+}
+
diff --git a/rstest.c b/rstest.c
new file mode 100644
index 0000000..539b40a
--- /dev/null
+++ b/rstest.c
@@ -0,0 +1,296 @@
+/* Test the Reed-Solomon codecs
+ * for various block sizes and with random data and random error patterns
+ *
+ * Copyright 2002 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <time.h>
+#include "fec.h"
+
+
+struct etab {
+ int symsize;
+ int genpoly;
+ int fcs;
+ int prim;
+ int nroots;
+ int ntrials;
+} Tab[] = {
+ {2, 0x7, 1, 1, 1, 10 },
+ {3, 0xb, 1, 1, 2, 10 },
+ {4, 0x13, 1, 1, 4, 10 },
+ {5, 0x25, 1, 1, 6, 10 },
+ {6, 0x43, 1, 1, 8, 10 },
+ {7, 0x89, 1, 1, 10, 10 },
+ {8, 0x11d, 1, 1, 32, 10 },
+ {8, 0x187, 112,11, 32, 10 }, /* Duplicates CCSDS codec */
+ {9, 0x211, 1, 1, 32, 10 },
+ {10,0x409, 1, 1, 32, 10 },
+ {11,0x805, 1, 1, 32, 10 },
+ {12,0x1053, 1, 1, 32, 5 },
+ {13,0x201b, 1, 1, 32, 2 },
+ {14,0x4443, 1, 1, 32, 1 },
+ {15,0x8003, 1, 1, 32, 1 },
+ {16,0x1100b, 1, 1, 32, 1 },
+ {0, 0, 0, 0, 0},
+};
+
+int exercise_char(struct etab *e);
+int exercise_int(struct etab *e);
+int exercise_8(void);
+
+int main(){
+ int i;
+
+ srandom(time(NULL));
+
+ printf("Testing fixed CCSDS encoder...\n");
+ exercise_8();
+ for(i=0;Tab[i].symsize != 0;i++){
+ int nn,kk;
+
+ nn = (1<<Tab[i].symsize) - 1;
+ kk = nn - Tab[i].nroots;
+ printf("Testing (%d,%d) code...\n",nn,kk);
+ if(Tab[i].symsize <= 8)
+ exercise_char(&Tab[i]);
+ else
+ exercise_int(&Tab[i]);
+ }
+ exit(0);
+}
+
+int exercise_8(void){
+ int nn = 255;
+ unsigned char block[nn],tblock[nn];
+ int errlocs[nn],derrlocs[nn];
+ int i;
+ int errors;
+ int derrors,kk;
+ int errval,errloc;
+ int erasures;
+ int decoder_errors = 0;
+
+ /* Compute code parameters */
+ kk = 223;
+
+
+ /* Test up to the error correction capacity of the code */
+ for(errors=0;errors<=(nn-kk)/2;errors++){
+
+ /* Load block with random data and encode */
+ for(i=0;i<kk;i++)
+ block[i] = random() & nn;
+ memcpy(tblock,block,sizeof(block));
+ encode_rs_8(block,&block[kk],0);
+
+ /* Make temp copy, seed with errors */
+ memcpy(tblock,block,sizeof(block));
+ memset(errlocs,0,sizeof(errlocs));
+ memset(derrlocs,0,sizeof(derrlocs));
+ erasures=0;
+ for(i=0;i<errors;i++){
+ do {
+ errval = random() & nn;
+ } while(errval == 0); /* Error value must be nonzero */
+
+ do {
+ errloc = random() % nn;
+ } while(errlocs[errloc] != 0); /* Must not choose the same location twice */
+
+ errlocs[errloc] = 1;
+
+#if FLAG_ERASURE
+ if(random() & 1) /* 50-50 chance */
+ derrlocs[erasures++] = errloc;
+#endif
+ tblock[errloc] ^= errval;
+ }
+
+ /* Decode the errored block */
+ derrors = decode_rs_8(tblock,derrlocs,erasures,0);
+
+ if(derrors != errors){
+ printf("(%d,%d) decoder says %d errors, true number is %d\n",nn,kk,derrors,errors);
+ decoder_errors++;
+ }
+ for(i=0;i<derrors;i++){
+ if(errlocs[derrlocs[i]] == 0){
+ printf("(%d,%d) decoder indicates error in location %d without error\n",nn,kk,derrlocs[i]);
+ decoder_errors++;
+ }
+ }
+ if(memcmp(tblock,block,sizeof(tblock)) != 0){
+ printf("(%d,%d) decoder uncorrected errors! output ^ input:",nn,kk);
+ decoder_errors++;
+ for(i=0;i<nn;i++)
+ printf(" %02x",tblock[i] ^ block[i]);
+ printf("\n");
+ }
+ }
+ return decoder_errors;
+}
+
+
+int exercise_char(struct etab *e){
+ int nn = (1<<e->symsize) - 1;
+ unsigned char block[nn],tblock[nn];
+ int errlocs[nn],derrlocs[nn];
+ int i;
+ int errors;
+ int derrors,kk;
+ int errval,errloc;
+ int erasures;
+ int decoder_errors = 0;
+ void *rs;
+
+ if(e->symsize > 8)
+ return -1;
+
+ /* Compute code parameters */
+ kk = nn - e->nroots;
+
+ rs = init_rs_char(e->symsize,e->genpoly,e->fcs,e->prim,e->nroots,0);
+ if(rs == NULL){
+ printf("init_rs_char failed!\n");
+ return -1;
+ }
+ /* Test up to the error correction capacity of the code */
+ for(errors=0;errors <= e->nroots/2;errors++){
+
+ /* Load block with random data and encode */
+ for(i=0;i<kk;i++)
+ block[i] = random() & nn;
+ memcpy(tblock,block,sizeof(block));
+ encode_rs_char(rs,block,&block[kk]);
+
+ /* Make temp copy, seed with errors */
+ memcpy(tblock,block,sizeof(block));
+ memset(errlocs,0,sizeof(errlocs));
+ memset(derrlocs,0,sizeof(derrlocs));
+ erasures=0;
+ for(i=0;i<errors;i++){
+ do {
+ errval = random() & nn;
+ } while(errval == 0); /* Error value must be nonzero */
+
+ do {
+ errloc = random() % nn;
+ } while(errlocs[errloc] != 0); /* Must not choose the same location twice */
+
+ errlocs[errloc] = 1;
+
+#if FLAG_ERASURE
+ if(random() & 1) /* 50-50 chance */
+ derrlocs[erasures++] = errloc;
+#endif
+ tblock[errloc] ^= errval;
+ }
+
+ /* Decode the errored block */
+ derrors = decode_rs_char(rs,tblock,derrlocs,erasures);
+
+ if(derrors != errors){
+ printf("(%d,%d) decoder says %d errors, true number is %d\n",nn,kk,derrors,errors);
+ decoder_errors++;
+ }
+ for(i=0;i<derrors;i++){
+ if(errlocs[derrlocs[i]] == 0){
+ printf("(%d,%d) decoder indicates error in location %d without error\n",nn,kk,derrlocs[i]);
+ decoder_errors++;
+ }
+ }
+ if(memcmp(tblock,block,sizeof(tblock)) != 0){
+ printf("(%d,%d) decoder uncorrected errors! output ^ input:",nn,kk);
+ decoder_errors++;
+ for(i=0;i<nn;i++)
+ printf(" %02x",tblock[i] ^ block[i]);
+ printf("\n");
+ }
+ }
+
+ free_rs_char(rs);
+ return 0;
+}
+
+int exercise_int(struct etab *e){
+ int nn = (1<<e->symsize) - 1;
+ int block[nn],tblock[nn];
+ int errlocs[nn],derrlocs[nn];
+ int i;
+ int errors;
+ int derrors,kk;
+ int errval,errloc;
+ int erasures;
+ int decoder_errors = 0;
+ void *rs;
+
+ /* Compute code parameters */
+ kk = nn - e->nroots;
+
+ rs = init_rs_int(e->symsize,e->genpoly,e->fcs,e->prim,e->nroots,0);
+ if(rs == NULL){
+ printf("init_rs_int failed!\n");
+ return -1;
+ }
+ /* Test up to the error correction capacity of the code */
+ for(errors=0;errors <= e->nroots/2;errors++){
+
+ /* Load block with random data and encode */
+ for(i=0;i<kk;i++)
+ block[i] = random() & nn;
+ memcpy(tblock,block,sizeof(block));
+ encode_rs_int(rs,block,&block[kk]);
+
+ /* Make temp copy, seed with errors */
+ memcpy(tblock,block,sizeof(block));
+ memset(errlocs,0,sizeof(errlocs));
+ memset(derrlocs,0,sizeof(derrlocs));
+ erasures=0;
+ for(i=0;i<errors;i++){
+ do {
+ errval = random() & nn;
+ } while(errval == 0); /* Error value must be nonzero */
+
+ do {
+ errloc = random() % nn;
+ } while(errlocs[errloc] != 0); /* Must not choose the same location twice */
+
+ errlocs[errloc] = 1;
+
+#if FLAG_ERASURE
+ if(random() & 1) /* 50-50 chance */
+ derrlocs[erasures++] = errloc;
+#endif
+ tblock[errloc] ^= errval;
+ }
+
+ /* Decode the errored block */
+ derrors = decode_rs_int(rs,tblock,derrlocs,erasures);
+
+ if(derrors != errors){
+ printf("(%d,%d) decoder says %d errors, true number is %d\n",nn,kk,derrors,errors);
+ decoder_errors++;
+ }
+ for(i=0;i<derrors;i++){
+ if(errlocs[derrlocs[i]] == 0){
+ printf("(%d,%d) decoder indicates error in location %d without error\n",nn,kk,derrlocs[i]);
+ decoder_errors++;
+ }
+ }
+ if(memcmp(tblock,block,sizeof(tblock)) != 0){
+ printf("(%d,%d) decoder uncorrected errors! output ^ input:",nn,kk);
+ decoder_errors++;
+ for(i=0;i<nn;i++)
+ printf(" %02x",tblock[i] ^ block[i]);
+ printf("\n");
+ }
+ }
+
+ free_rs_int(rs);
+ return 0;
+}
diff --git a/sim.c b/sim.c
new file mode 100644
index 0000000..151b04c
--- /dev/null
+++ b/sim.c
@@ -0,0 +1,43 @@
+#include <math.h>
+#include <stdlib.h>
+#include "fec.h"
+
+#define MAX_RANDOM 0x7fffffff
+
+/* Generate gaussian random double with specified mean and std_dev */
+double normal_rand(double mean, double std_dev)
+{
+ double fac,rsq,v1,v2;
+ static double gset;
+ static int iset;
+
+ if(iset){
+ /* Already got one */
+ iset = 0;
+ return mean + std_dev*gset;
+ }
+ /* Generate two evenly distributed numbers between -1 and +1
+ * that are inside the unit circle
+ */
+ do {
+ v1 = 2.0 * (double)random() / MAX_RANDOM - 1;
+ v2 = 2.0 * (double)random() / MAX_RANDOM - 1;
+ rsq = v1*v1 + v2*v2;
+ } while(rsq >= 1.0 || rsq == 0.0);
+ fac = sqrt(-2.0*log(rsq)/rsq);
+ gset = v1*fac;
+ iset++;
+ return mean + std_dev*v2*fac;
+}
+
+unsigned char addnoise(int sym,double amp,double gain,double offset,int clip){
+ int sample;
+
+ sample = offset + gain*normal_rand(sym?amp:-amp,1.0);
+ /* Clip to 8-bit offset range */
+ if(sample < 0)
+ sample = 0;
+ else if(sample > clip)
+ sample = clip;
+ return sample;
+}
diff --git a/simd-viterbi.3 b/simd-viterbi.3
new file mode 100644
index 0000000..4c67593
--- /dev/null
+++ b/simd-viterbi.3
@@ -0,0 +1,247 @@
+.TH SIMD-VITERBI 3
+.SH NAME
+create_viterbi27, set_viterbi27_polynomial, init_viterbi27, update_viterbi27_blk,
+chainback_viterbi27, delete_viterbi27,
+create_viterbi29, set_viterbi_29_polynomial, init_viterbi29, update_viterbi29_blk,
+chainback_viterbi29, delete_viterbi29,
+create_viterbi39, set_viterbi_39_polynomial, init_viterbi39, update_viterbi39_blk,
+chainback_viterbi39, delete_viterbi39,
+create_viterbi615, set_viterbi615_polynomial, init_viterbi615, update_viterbi615_blk,
+chainback_viterbi615, delete_viterbi615 -\ IA32 SIMD-assisted Viterbi decoders
+.SH SYNOPSIS
+.nf
+.ft B
+#include "fec.h"
+void *create_viterbi27(int blocklen);
+void set_viterbi27_polynomial(int polys[2]);
+int init_viterbi27(void *vp,int starting_state);
+int update_viterbi27_blk(void *vp,unsigned char syms[],int nbits);
+int chainback_viterbi27(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi27(void *vp);
+.fi
+.sp
+.nf
+.ft B
+void *create_viterbi29(int blocklen);
+void set_viterbi29_polynomial(int polys[2]);
+int init_viterbi29(void *vp,int starting_state);
+int update_viterbi29_blk(void *vp,unsigned char syms[],int nbits);
+int chainback_viterbi29(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi29(void *vp);
+.fi
+.sp
+.nf
+.ft B
+void *create_viterbi39(int blocklen);
+void set_viterbi39_polynomial(int polys[3]);
+int init_viterbi39(void *vp,int starting_state);
+int update_viterbi39_blk(void *vp,unsigned char syms[],int nbits);
+int chainback_viterbi39(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi39(void *vp);
+.fi
+.sp
+.nf
+.ft B
+void *create_viterbi615(int blocklen);
+void set_viterbi615_polynomial(int polys[6]);
+int init_viterbi615(void *vp,int starting_state);
+int update_viterbi615_blk(void *vp,unsigned char syms[],int nbits);
+int chainback_viterbi615(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
+void delete_viterbi615(void *vp);
+.fi
+.SH DESCRIPTION
+These functions implement high performance Viterbi decoders for four
+convolutional codes: a rate 1/2 constraint length 7 (k=7) code
+("viterbi27"), a rate 1/2 k=9 code ("viterbi29"),
+a rate 1/3 k=9 code ("viterbi39") and a rate 1/6 k=15 code ("viterbi615").
+The decoders use the Intel IA32 or PowerPC SIMD instruction sets, if available, to improve
+decoding speed.
+
+On the IA32 there are three different SIMD instruction sets. The first
+and most common is MMX, introduced on later Intel Pentiums and then on
+the Intel Pentium II and most Intel clones (AMD K6, Transmeta Crusoe,
+etc). SSE was introduced on the Pentium III and later implemented in
+the AMD Athlon 4 (AMD calls it "3D Now! Professional"). Most
+recently, SSE2 was introduced in the Intel Pentium 4, and has been
+adopted by more recent AMD CPUs. The presence of SSE2 implies the
+existence of SSE, which in turn implies MMX.
+
+Altivec is the PowerPC SIMD instruction set. It is roughly comparable
+to SSE2. Altivec was introduced to the general public in the Apple
+Macintosh G4; it is also present in the G5. Altivec is actually a
+Motorola trademark; Apple calls it "Velocity Engine" and IBM calls it
+"VMX". All refer to the same thing.
+
+When built for the IA32 or PPC architectures, the functions
+automatically use the most powerful SIMD instruction set available. If
+no SIMD instructions are available, or if the library is built for a
+non-IA32, non-PPC machine, a portable C version is executed
+instead.
+
+.SH USAGE
+Four versions of each function are provided, one for each code.
+In the following discussion, change "viterbi" to "viterbi27", "viterbi29", "viterbi39"
+or "viterbi615" as desired.
+
+Before Viterbi decoding can begin, an instance must first be created with
+\fBcreate_viterbi()\fR. This function creates and returns a pointer to
+an internal control structure
+containing the path metrics and the branch
+decisions. \fBcreate_viterbi()\fR takes one argument that gives the
+length of the data block in bits. You \fImust not\fR attempt to
+decode a block longer than the length given to \fBcreate_viterbi()\fR.
+
+Before decoding a new frame,
+\fBinit_viterbi()\fR must be called to reset the decoder state.
+It accepts the instance pointer returned by
+\fBcreate_viterbi()\fR and the initial starting state of the
+convolutional encoder (usually 0). If the initial starting state is unknown or
+incorrect, the decoder will still function but the decoded data may be
+incorrect at the start of the block.
+
+Blocks of received symbols are processed with calls to
+\fBupdate_viterbi_blk()\fR. The \fBnbits\fR parameter specifies the
+number of \fIdata bits\fR (not channel symbols) represented by the
+\fBsyms\fR buffer. (For rate 1/2 codes, the number of symbols in
+\fBsyms\fR is twice \fInbits\fR, and so on.)
+Each symbol is expected to range
+from 0 through 255, with 0 corresponding to a "strong 0" and 255
+corresponding to a "strong 1". The caller is responsible for
+determining the proper pairing of input symbols (commonly known as
+decoder symbol phasing).
+
+At the end of the block, the data is recovered with a call to
+\fBchainback_viterbi()\fR. The arguments are the pointer to the
+decoder instance, a pointer to a user-supplied buffer into which the
+decoded data is to be written, the number of data bits (not bytes)
+that are to be decoded, and the terminal state of the convolutional
+encoder at the end of the frame (usually 0). If the terminal state is
+incorrect or unknown, the decoded data bits at the end of the frame
+may be unreliable. The decoded data is written in big-endian order,
+i.e., the first bit in the frame is written into the high order bit of
+the first byte in the buffer. If the frame is not an integral number
+of bytes long, the low order bits of the last byte in the frame will
+be unused.
+
+Note that the decoders assume the use of a tail, i.e., the encoding
+and transmission of a sufficient number of padding bits beyond the end
+of the user data to force the convolutional encoder into the known
+terminal state given to \fBchainback_viterbi()\fR. The tail is
+always one bit less than the constraint length of the code, so the k=7
+code uses 6 tail bits (12 tail symbols), the k=9 code uses 8 tail bits
+(16 tail symbols) and the k=15 code uses 14 tail bits (84 tail
+symbols).
+
+The tail bits are not included in the length arguments to
+\fBcreate_viterbi()\fR and \fBchainback_viterbi()\fR. For example, if
+the block contains 1000 user bits, then this would be the length
+parameter given to \fBcreate_viterbi27()\fR and
+\fBchainback_viterbi27()\fR, and \fBupdate_viterbi27_blk()\fR would be called
+with a total of 2012 symbols - the last 12 encoded symbols
+representing the tail bits.
+
+After the call to \fBchainback_viterbi()\fR, the decoder may be reset
+with a call to \fBinit_viterbi()\fR and another block can be decoded.
+Alternatively, \fBdelete_viterbi()\fR can be called to free all resources
+used by the Viterbi decoder.
+
+The \fBset_viterbi_polynomial()\fR function allows use of other than the default
+code generator polynomials. Although only one set of polynomials are generally
+used with each code, there can are different conventions as to their order and
+symbol polarity, and these functions simplifies their use.
+
+The default polynomials for the viterbi27 routes
+are those of the NASA-JPL convention \fIwithout\fR symbol inversion.
+The NASA-JPL convention normally inverts the first symbol.
+The CCSDS/NASA-GSFC convention swaps the two symbols and inverts the second.
+.sp
+To set the NASA-JPL convention with symbol inversion:
+.sp
+.nf
+.ft B
+int polys[2] = { -V27POLYA,V27POLYB };
+set_viterbi27_polynomial(polys);
+.ft R
+.fi
+.sp
+and to set the CCSDS convention with symbol inversion:
+.sp
+.nf
+.ft B
+int polys[2] = { V27POLYB,-V27POLYA };
+set_viterbi27_polynomial(polys);
+.ft R
+.fi
+.sp
+The default polynomials for the viterbi615 routines
+are those used by the Cassini spacecraft \fIwithout\fR
+symbol inversion. Mars Pathfinder (MPF) and STEREO
+swap the third and fourth polynomials.
+Both conventions invert the
+first, third and fifth symbols. Refer to fec.h for the polynomial constant definitions.
+.sp
+To set the Cassini convention with symbol inversion, do the following:
+
+.nf
+.ft B
+int polys[6] = { -V615POLYA,V615POLYB,-V615POLYC,V615POLYD,-V615POLYE,V615POLYF };
+set_viterbi615_polynomial(polys);
+.ft R
+.fi
+.sp
+and to set the MPF/STEREO convention with symbol inversion:
+.sp
+.nf
+.ft B
+int polys[6] = { -V615POLYA,V615POLYB,-V615POLYD,V615POLYC,-V615POLYE,V615POLYF };
+set_viterbi615_polynomial(polys);
+.ft R
+.fi
+
+For performance reasons, calling this function changes the code
+generator polynomials for \fIall\fR instances of corresponding Viterbi decoder,
+including those already created.
+
+.SH ERROR PERFORMANCE
+These decoders have all been extensively tested and found to provide
+performance consistent with that expected for soft-decision Viterbi
+decoding with 8-bit symbols.
+
+Due to internal differences, the implementations
+vary slightly in error performance. In
+general, the portable C versions exhibit the best error performance
+because they use full-sized branch metrics, and the MMX versions
+exhibit the worst because they use 8-bit branch metrics with modulo
+comparisons. The SSE, SSE2 and Altivec implementations of the r=1/2 k=7 and
+r=1/2 k=9 codes use unsigned
+8-bit branch metrics, and are almost as good as the C versions. The
+r=1/3 k=9 and r=1/6 k=15 codes are implemented with 16-bit path metrics in all SIMD
+versions.
+
+.SH DIRECT ACCESS TO SPECIFIC FUNCTION VERSIONS
+Calling the functions listed above automatically calls the appropriate
+version of the function depending on the CPU type and available SIMD
+instructions. A particular version can also be called directly by
+appending the appropriate suffix to the function name. The available
+suffixes are "_mmx", "_sse", "_sse2", "_av" and "_port", for the MMX,
+SSE, SSE2, Altivec and portable versions, respectively. For example,
+the SSE2 version of the update_viterbi27_blk() function can be invoked
+as update_viterbi27_blk_sse2().
+
+Naturally, the _av functions are only available on the PowerPC and the
+_mmx, _sse and _sse2 versions are only available on IA-32. Calling
+a SIMD-enabled function on a CPU that doesn't support the appropriate
+set of instructions will result in an illegal instruction exception.
+
+.SH RETURN VALUES
+\fBcreate_viterbi\fR returns a pointer to the structure containing
+the decoder state.
+The other functions return -1 on error, 0 otherwise.
+
+.SH AUTHOR & COPYRIGHT
+Phil Karn, KA9Q (karn@ka9q.net)
+
+.SH LICENSE
+This software may be used under the terms of the GNU Limited General Public License (LGPL).
+
+
diff --git a/sqtest.c b/sqtest.c
new file mode 100644
index 0000000..b2abb09
--- /dev/null
+++ b/sqtest.c
@@ -0,0 +1,42 @@
+/* Verify correctness of the sum-of-square routines */
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+/* These values should trigger leading/trailing array fragment handling */
+#define NSAMP 200002
+#define OFFSET 1
+
+long long sumsq_wq(signed short *in,int cnt);
+long long sumsq_wq_ref(signed short *in,int cnt);
+
+int main(){
+ int i;
+ long long result,rresult;
+ signed short samples[NSAMP];
+
+ srandom(time(NULL));
+
+ for(i=0;i<NSAMP;i++)
+ samples[i] = random() & 0xffff;
+
+ rresult = sumsq_wq(&samples[OFFSET],NSAMP-OFFSET);
+ result = sumsq_wq(&samples[OFFSET],NSAMP-OFFSET);
+ if(result == rresult){
+ printf("OK\n");
+ } else {
+ printf("sum mismatch: %lld != %lld\n",result,rresult);
+ }
+ exit(0);
+}
+
+long long sumsq_wq_ref(signed short *in,int cnt){
+ long long sum = 0;
+ int i;
+
+ for(i=0;i<cnt;i++){
+ sum += (long)in[i] * in[i];
+ }
+ return sum;
+}
+
diff --git a/sse2bfly27.s b/sse2bfly27.s
new file mode 100644
index 0000000..27422a2
--- /dev/null
+++ b/sse2bfly27.s
@@ -0,0 +1,202 @@
+/* Intel SIMD (SSE2) implementations of Viterbi ACS butterflies
+ for 64-state (k=7) convolutional code
+ Copyright 2003 Phil Karn, KA9Q
+ This code may be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ void update_viterbi27_blk_sse2(struct v27 *vp,unsigned char syms[],int nbits) ;
+*/
+ # SSE2 (128-bit integer SIMD) version
+ # Requires Pentium 4 or better
+
+ # These are offsets into struct v27, defined in viterbi27.h
+ .set DP,128
+ .set OLDMETRICS,132
+ .set NEWMETRICS,136
+ .text
+ .global update_viterbi27_blk_sse2,Branchtab27_sse2
+ .type update_viterbi27_blk_sse2,@function
+ .align 16
+
+update_viterbi27_blk_sse2:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %edx
+ pushl %ebx
+
+ movl 8(%ebp),%edx # edx = vp
+ testl %edx,%edx
+ jnz 0f
+ movl -1,%eax
+ jmp err
+0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
+ movl NEWMETRICS(%edx),%edi # edi -> new metrics
+ movl DP(%edx),%edx # edx -> decisions
+
+1: movl 16(%ebp),%eax # eax = nbits
+ decl %eax
+ jl 2f # passed zero, we're done
+ movl %eax,16(%ebp)
+
+ xorl %eax,%eax
+ movl 12(%ebp),%ebx # ebx = syms
+ movb (%ebx),%al
+ movd %eax,%xmm6 # xmm6[0] = first symbol
+ movb 1(%ebx),%al
+ movd %eax,%xmm5 # xmm5[0] = second symbol
+ addl $2,%ebx
+ movl %ebx,12(%ebp)
+
+ punpcklbw %xmm6,%xmm6 # xmm6[1] = xmm6[0]
+ punpcklbw %xmm5,%xmm5
+ pshuflw $0,%xmm6,%xmm6 # copy low word to low 3
+ pshuflw $0,%xmm5,%xmm5
+ punpcklqdq %xmm6,%xmm6 # propagate to all 16
+ punpcklqdq %xmm5,%xmm5
+ # xmm6 now contains first symbol in each byte, xmm5 the second
+
+ movdqa thirtyones,%xmm7
+
+ # each invocation of this macro does 16 butterflies in parallel
+ .MACRO butterfly GROUP
+ # compute branch metrics
+ movdqa Branchtab27_sse2+(16*\GROUP),%xmm4
+ movdqa Branchtab27_sse2+32+(16*\GROUP),%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+
+ # compute 5-bit branch metric in xmm4 by adding the individual symbol metrics
+ # This is okay for this
+ # code because the worst-case metric spread (at high Eb/No) is only 120,
+ # well within the range of our unsigned 8-bit path metrics, and even within
+ # the range of signed 8-bit path metrics
+ pavgb %xmm3,%xmm4
+ psrlw $3,%xmm4
+
+ pand %xmm7,%xmm4
+
+ movdqa (16*\GROUP)(%esi),%xmm0 # Incoming path metric, high bit = 0
+ movdqa ((16*\GROUP)+32)(%esi),%xmm3 # Incoming path metric, high bit = 1
+ movdqa %xmm0,%xmm2
+ movdqa %xmm3,%xmm1
+ paddusb %xmm4,%xmm0 # note use of saturating arithmetic
+ paddusb %xmm4,%xmm3 # this shouldn't be necessary, but why not?
+
+ # negate branch metrics
+ pxor %xmm7,%xmm4
+ paddusb %xmm4,%xmm1
+ paddusb %xmm4,%xmm2
+
+ # Find survivors, leave in mm0,2
+ pminub %xmm1,%xmm0
+ pminub %xmm3,%xmm2
+ # get decisions, leave in mm1,3
+ pcmpeqb %xmm0,%xmm1
+ pcmpeqb %xmm2,%xmm3
+
+ # interleave and store new branch metrics in mm0,2
+ movdqa %xmm0,%xmm4
+ punpckhbw %xmm2,%xmm0 # interleave second 16 new metrics
+ punpcklbw %xmm2,%xmm4 # interleave first 16 new metrics
+ movdqa %xmm0,(32*\GROUP+16)(%edi)
+ movdqa %xmm4,(32*\GROUP)(%edi)
+
+ # interleave decisions & store
+ movdqa %xmm1,%xmm4
+ punpckhbw %xmm3,%xmm1
+ punpcklbw %xmm3,%xmm4
+ # work around bug in gas due to Intel doc error
+ .byte 0x66,0x0f,0xd7,0xd9 # pmovmskb %xmm1,%ebx
+ shll $16,%ebx
+ .byte 0x66,0x0f,0xd7,0xc4 # pmovmskb %xmm4,%eax
+ orl %eax,%ebx
+ movl %ebx,(4*\GROUP)(%edx)
+ .endm
+
+ # invoke macro 2 times for a total of 32 butterflies
+ butterfly GROUP=0
+ butterfly GROUP=1
+
+ addl $8,%edx # bump decision pointer
+
+ # See if we have to normalize. This requires an explanation. We don't want
+ # our path metrics to exceed 255 on the *next* iteration. Since the
+ # largest branch metric is 30, that means we don't want any to exceed 225
+ # on *this* iteration. Rather than look them all, we just pick an arbitrary one
+ # (the first) and see if it exceeds 225-120=105, where 120 is the experimentally-
+ # determined worst-case metric spread for this code and branch metrics in the range 0-30.
+
+ # This is extremely conservative, and empirical testing at a variety of Eb/Nos might
+ # show that a higher threshold could be used without affecting BER performance
+ movl (%edi),%eax # extract first output metric
+ andl $255,%eax
+ cmp $105,%eax
+ jle done # No, no need to normalize
+
+ # Normalize by finding smallest metric and subtracting it
+ # from all metrics. We can't just pick an arbitrary small constant because
+ # the minimum metric might be zero!
+ movdqa (%edi),%xmm0
+ movdqa %xmm0,%xmm4
+ movdqa 16(%edi),%xmm1
+ pminub %xmm1,%xmm4
+ movdqa 32(%edi),%xmm2
+ pminub %xmm2,%xmm4
+ movdqa 48(%edi),%xmm3
+ pminub %xmm3,%xmm4
+
+ # crunch down to single lowest metric
+ movdqa %xmm4,%xmm5
+ psrldq $8,%xmm5 # the count to psrldq is bytes, not bits!
+ pminub %xmm5,%xmm4
+ movdqa %xmm4,%xmm5
+ psrlq $32,%xmm5
+ pminub %xmm5,%xmm4
+ movdqa %xmm4,%xmm5
+ psrlq $16,%xmm5
+ pminub %xmm5,%xmm4
+ movdqa %xmm4,%xmm5
+ psrlq $8,%xmm5
+ pminub %xmm5,%xmm4 # now in lowest byte of %xmm4
+
+ punpcklbw %xmm4,%xmm4 # lowest 2 bytes
+ pshuflw $0,%xmm4,%xmm4 # lowest 8 bytes
+ punpcklqdq %xmm4,%xmm4 # all 16 bytes
+
+ # xmm4 now contains lowest metric in all 16 bytes
+ # subtract it from every output metric
+ psubusb %xmm4,%xmm0
+ psubusb %xmm4,%xmm1
+ psubusb %xmm4,%xmm2
+ psubusb %xmm4,%xmm3
+ movdqa %xmm0,(%edi)
+ movdqa %xmm1,16(%edi)
+ movdqa %xmm2,32(%edi)
+ movdqa %xmm3,48(%edi)
+
+done:
+ # swap metrics
+ movl %esi,%eax
+ movl %edi,%esi
+ movl %eax,%edi
+ jmp 1b
+
+2: movl 8(%ebp),%ebx # ebx = vp
+ # stash metric pointers
+ movl %esi,OLDMETRICS(%ebx)
+ movl %edi,NEWMETRICS(%ebx)
+ movl %edx,DP(%ebx) # stash incremented value of vp->dp
+ xorl %eax,%eax
+err: popl %ebx
+ popl %edx
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+
+ .data
+ .align 16
+
+thirtyones:
+ .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
diff --git a/sse2bfly29.s b/sse2bfly29.s
new file mode 100644
index 0000000..0fa1742
--- /dev/null
+++ b/sse2bfly29.s
@@ -0,0 +1,245 @@
+/* Intel SIMD SSE2 implementation of Viterbi ACS butterflies
+ for 256-state (k=9) convolutional code
+ Copyright 2004 Phil Karn, KA9Q
+ This code may be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ void update_viterbi29_blk_sse2(struct v29 *vp,unsigned char *syms,int nbits) ;
+*/
+
+ # SSE2 (128-bit integer SIMD) version
+ # Requires Pentium 4 or better
+ # These are offsets into struct v29, defined in viterbi29.h
+ .set DP,512
+ .set OLDMETRICS,516
+ .set NEWMETRICS,520
+
+ .text
+ .global update_viterbi29_blk_sse2,Branchtab29_sse2
+ .type update_viterbi29_blk_sse2,@function
+ .align 16
+
+update_viterbi29_blk_sse2:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %edx
+ pushl %ebx
+
+ movl 8(%ebp),%edx # edx = vp
+ testl %edx,%edx
+ jnz 0f
+ movl -1,%eax
+ jmp err
+0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
+ movl NEWMETRICS(%edx),%edi # edi -> new metrics
+ movl DP(%edx),%edx # edx -> decisions
+
+1: movl 16(%ebp),%eax # eax = nbits
+ decl %eax
+ jl 2f # passed zero, we're done
+ movl %eax,16(%ebp)
+
+ xorl %eax,%eax
+ movl 12(%ebp),%ebx # ebx = syms
+ movb (%ebx),%al
+ movd %eax,%xmm6 # xmm6[0] = first symbol
+ movb 1(%ebx),%al
+ movd %eax,%xmm5 # xmm5[0] = second symbol
+ addl $2,%ebx
+ movl %ebx,12(%ebp)
+
+ punpcklbw %xmm6,%xmm6 # xmm6[1] = xmm6[0]
+ punpcklbw %xmm5,%xmm5
+ movdqa thirtyones,%xmm7
+ pshuflw $0,%xmm6,%xmm6 # copy low word to low 3
+ pshuflw $0,%xmm5,%xmm5
+ punpcklqdq %xmm6,%xmm6 # propagate to all 16
+ punpcklqdq %xmm5,%xmm5
+ # xmm6 now contains first symbol in each byte, xmm5 the second
+
+ movdqa thirtyones,%xmm7
+
+ # each invocation of this macro does 16 butterflies in parallel
+ .MACRO butterfly GROUP
+ # compute branch metrics
+ movdqa Branchtab29_sse2+(16*\GROUP),%xmm4
+ movdqa Branchtab29_sse2+128+(16*\GROUP),%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pavgb %xmm3,%xmm4
+ psrlw $3,%xmm4
+
+ pand %xmm7,%xmm4 # xmm4 contains branch metrics
+
+ movdqa (16*\GROUP)(%esi),%xmm0 # Incoming path metric, high bit = 0
+ movdqa ((16*\GROUP)+128)(%esi),%xmm3 # Incoming path metric, high bit = 1
+ movdqa %xmm0,%xmm2
+ movdqa %xmm3,%xmm1
+ paddusb %xmm4,%xmm0
+ paddusb %xmm4,%xmm3
+
+ # invert branch metrics
+ pxor %xmm7,%xmm4
+
+ paddusb %xmm4,%xmm1
+ paddusb %xmm4,%xmm2
+
+ # Find survivors, leave in mm0,2
+ pminub %xmm1,%xmm0
+ pminub %xmm3,%xmm2
+ # get decisions, leave in mm1,3
+ pcmpeqb %xmm0,%xmm1
+ pcmpeqb %xmm2,%xmm3
+
+ # interleave and store new branch metrics in mm0,2
+ movdqa %xmm0,%xmm4
+ punpckhbw %xmm2,%xmm0 # interleave second 16 new metrics
+ punpcklbw %xmm2,%xmm4 # interleave first 16 new metrics
+ movdqa %xmm0,(32*\GROUP+16)(%edi)
+ movdqa %xmm4,(32*\GROUP)(%edi)
+
+ # interleave decisions & store
+ movdqa %xmm1,%xmm4
+ punpckhbw %xmm3,%xmm1
+ punpcklbw %xmm3,%xmm4
+ # work around bug in gas due to Intel doc error
+ .byte 0x66,0x0f,0xd7,0xd9 # pmovmskb %xmm1,%ebx
+ shll $16,%ebx
+ .byte 0x66,0x0f,0xd7,0xc4 # pmovmskb %xmm4,%eax
+ orl %eax,%ebx
+ movl %ebx,(4*\GROUP)(%edx)
+ .endm
+
+ # invoke macro 8 times for a total of 128 butterflies
+ butterfly GROUP=0
+ butterfly GROUP=1
+ butterfly GROUP=2
+ butterfly GROUP=3
+ butterfly GROUP=4
+ butterfly GROUP=5
+ butterfly GROUP=6
+ butterfly GROUP=7
+
+ addl $32,%edx # bump decision pointer
+
+ # see if we have to normalize
+ movl (%edi),%eax # extract first output metric
+ andl $255,%eax
+ cmp $50,%eax # is it greater than 50?
+ movl $0,%eax
+ jle done # No, no need to normalize
+
+ # Normalize by finding smallest metric and subtracting it
+ # from all metrics
+ movdqa (%edi),%xmm0
+ pminub 16(%edi),%xmm0
+ pminub 32(%edi),%xmm0
+ pminub 48(%edi),%xmm0
+ pminub 64(%edi),%xmm0
+ pminub 80(%edi),%xmm0
+ pminub 96(%edi),%xmm0
+ pminub 112(%edi),%xmm0
+ pminub 128(%edi),%xmm0
+ pminub 144(%edi),%xmm0
+ pminub 160(%edi),%xmm0
+ pminub 176(%edi),%xmm0
+ pminub 192(%edi),%xmm0
+ pminub 208(%edi),%xmm0
+ pminub 224(%edi),%xmm0
+ pminub 240(%edi),%xmm0
+
+ # crunch down to single lowest metric
+ movdqa %xmm0,%xmm1
+ psrldq $8,%xmm0 # the count to psrldq is bytes, not bits!
+ pminub %xmm1,%xmm0
+ movdqa %xmm0,%xmm1
+ psrlq $32,%xmm0
+ pminub %xmm1,%xmm0
+ movdqa %xmm0,%xmm1
+ psrlq $16,%xmm0
+ pminub %xmm1,%xmm0
+ movdqa %xmm0,%xmm1
+ psrlq $8,%xmm0
+ pminub %xmm1,%xmm0
+
+ punpcklbw %xmm0,%xmm0 # lowest 2 bytes
+ pshuflw $0,%xmm0,%xmm0 # lowest 8 bytes
+ punpcklqdq %xmm0,%xmm0 # all 16 bytes
+
+ # xmm0 now contains lowest metric in all 16 bytes
+ # subtract it from every output metric
+ movdqa (%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,(%edi)
+ movdqa 16(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,16(%edi)
+ movdqa 32(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,32(%edi)
+ movdqa 48(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,48(%edi)
+ movdqa 64(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,64(%edi)
+ movdqa 80(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,80(%edi)
+ movdqa 96(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,96(%edi)
+ movdqa 112(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,112(%edi)
+ movdqa 128(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,128(%edi)
+ movdqa 144(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,144(%edi)
+ movdqa 160(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,160(%edi)
+ movdqa 176(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,176(%edi)
+ movdqa 192(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,192(%edi)
+ movdqa 208(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,208(%edi)
+ movdqa 224(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,224(%edi)
+ movdqa 240(%edi),%xmm1
+ psubusb %xmm0,%xmm1
+ movdqa %xmm1,240(%edi)
+
+done:
+ # swap metrics
+ movl %esi,%eax
+ movl %edi,%esi
+ movl %eax,%edi
+ jmp 1b
+
+2: movl 8(%ebp),%ebx # ebx = vp
+ # stash metric pointers
+ movl %esi,OLDMETRICS(%ebx)
+ movl %edi,NEWMETRICS(%ebx)
+ movl %edx,DP(%ebx) # stash incremented value of vp->dp
+ xorl %eax,%eax
+err: popl %ebx
+ popl %edx
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+
+ .data
+ .align 16
+thirtyones:
+ .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
+
diff --git a/ssebfly27.s b/ssebfly27.s
new file mode 100644
index 0000000..7f445da
--- /dev/null
+++ b/ssebfly27.s
@@ -0,0 +1,205 @@
+/* Intel SIMD (SSE) implementation of Viterbi ACS butterflies
+ for 64-state (k=7) convolutional code
+ Copyright 2001 Phil Karn, KA9Q
+ This code may be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ int update_viterbi27_blk_sse(struct v27 *vp,unsigned char syms[],int nbits) ;
+*/
+
+ # SSE (64-bit integer SIMD) version
+ # Requires Pentium III or better
+
+ # These are offsets into struct v27, defined in viterbi27.h
+ .set DP,128
+ .set OLDMETRICS,132
+ .set NEWMETRICS,136
+.text
+.global update_viterbi27_blk_sse,Branchtab27_sse
+ .type update_viterbi27_blk_sse,@function
+ .align 16
+
+update_viterbi27_blk_sse:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %edx
+ pushl %ebx
+
+ movl 8(%ebp),%edx # edx = vp
+ testl %edx,%edx
+ jnz 0f
+ movl -1,%eax
+ jmp err
+0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
+ movl NEWMETRICS(%edx),%edi # edi -> new metrics
+ movl DP(%edx),%edx # edx -> decisions
+
+1: movl 16(%ebp),%eax # eax = nbits
+ decl %eax
+ jl 2f # passed zero, we're done
+ movl %eax,16(%ebp)
+
+ xorl %eax,%eax
+ movl 12(%ebp),%ebx # %ebx = syms
+ movb (%ebx),%al
+ movd %eax,%mm6 # mm6[0] = first symbol
+ movb 1(%ebx),%al
+ movd %eax,%mm5 # mm5[0] = second symbol
+ addl $2,%ebx
+ movl %ebx,12(%ebp)
+
+ punpcklbw %mm6,%mm6 # mm6[1] = mm6[0]
+ punpcklbw %mm5,%mm5
+ movq thirtyones,%mm7
+
+ pshufw $0,%mm6,%mm6 # copy low word to upper 3
+ pshufw $0,%mm5,%mm5
+ # mm6 now contains first symbol in each byte, mm5 the second
+
+ # each invocation of this macro does 8 butterflies in parallel
+ .MACRO butterfly GROUP
+ # compute branch metrics
+ movq Branchtab27_sse+(8*\GROUP),%mm4
+ movq Branchtab27_sse+32+(8*\GROUP),%mm3
+ pxor %mm6,%mm4
+ pxor %mm5,%mm3
+ pavgb %mm3,%mm4 # mm4 contains branch metrics
+ psrlw $3,%mm4
+ pand %mm7,%mm4
+
+ movq (8*\GROUP)(%esi),%mm0 # Incoming path metric, high bit = 0
+ movq ((8*\GROUP)+32)(%esi),%mm3 # Incoming path metric, high bit = 1
+ movq %mm0,%mm2
+ movq %mm3,%mm1
+ paddusb %mm4,%mm0
+ paddusb %mm4,%mm3
+
+ # invert branch metrics. This works only because they're 5 bits
+ pxor %mm7,%mm4
+
+ paddusb %mm4,%mm1
+ paddusb %mm4,%mm2
+
+ # Find survivors, leave in mm0,2
+ pminub %mm1,%mm0
+ pminub %mm3,%mm2
+ # get decisions, leave in mm1,3
+ pcmpeqb %mm0,%mm1
+ pcmpeqb %mm2,%mm3
+
+ # interleave and store new branch metrics in mm0,2
+ movq %mm0,%mm4
+ punpckhbw %mm2,%mm0 # interleave second 8 new metrics
+ punpcklbw %mm2,%mm4 # interleave first 8 new metrics
+ movq %mm0,(16*\GROUP+8)(%edi)
+ movq %mm4,(16*\GROUP)(%edi)
+
+ # interleave decisions, accumulate into %ebx
+ movq %mm1,%mm4
+ punpckhbw %mm3,%mm1
+ punpcklbw %mm3,%mm4
+ # Due to an error in the Intel instruction set ref (the register
+ # fields are swapped), gas assembles pmovmskb incorrectly
+ # See http://mail.gnu.org/pipermail/bug-gnu-utils/2000-August/002341.html
+ .byte 0x0f,0xd7,0xc1 # pmovmskb %mm1,%eax
+ shll $((16*\GROUP+8)&31),%eax
+ orl %eax,%ebx
+ .byte 0x0f,0xd7,0xc4 # pmovmskb %mm4,%eax
+ shll $((16*\GROUP)&31),%eax
+ orl %eax,%ebx
+ .endm
+
+ # invoke macro 4 times for a total of 32 butterflies
+ xorl %ebx,%ebx # clear decisions
+ butterfly GROUP=0
+ butterfly GROUP=1
+ movl %ebx,(%edx) # stash first 32 decisions
+ xorl %ebx,%ebx
+ butterfly GROUP=2
+ butterfly GROUP=3
+ movl %ebx,4(%edx) # stash second 32 decisions
+
+ addl $8,%edx # bump decision pointer
+
+ # see if we have to normalize
+ movl (%edi),%eax # extract first output metric
+ andl $255,%eax
+ cmpl $150,%eax # is it greater than 150?
+ movl $0,%eax
+ jle done # No, no need to normalize
+
+ # Normalize by finding smallest metric and subtracting it
+ # from all metrics
+ movq (%edi),%mm0
+ pminub 8(%edi),%mm0
+ pminub 16(%edi),%mm0
+ pminub 24(%edi),%mm0
+ pminub 32(%edi),%mm0
+ pminub 40(%edi),%mm0
+ pminub 48(%edi),%mm0
+ pminub 56(%edi),%mm0
+ # mm0 contains 8 smallest metrics
+ # crunch down to single lowest metric
+ movq %mm0,%mm1
+ psrlq $32,%mm0
+ pminub %mm1,%mm0
+ movq %mm0,%mm1
+ psrlq $16,%mm0
+ pminub %mm1,%mm0
+ movq %mm0,%mm1
+ psrlq $8,%mm0
+ pminub %mm1,%mm0
+ punpcklbw %mm0,%mm0 # expand to all 8 bytes
+ pshufw $0,%mm0,%mm0
+
+ # mm0 now contains lowest metric in all 8 bytes
+ # subtract it from every output metric
+ # Trashes %mm7
+ .macro PSUBUSBM REG,MEM
+ movq \MEM,%mm7
+ psubusb \REG,%mm7
+ movq %mm7,\MEM
+ .endm
+
+ PSUBUSBM %mm0,(%edi)
+ PSUBUSBM %mm0,8(%edi)
+ PSUBUSBM %mm0,16(%edi)
+ PSUBUSBM %mm0,24(%edi)
+ PSUBUSBM %mm0,32(%edi)
+ PSUBUSBM %mm0,40(%edi)
+ PSUBUSBM %mm0,48(%edi)
+ PSUBUSBM %mm0,56(%edi)
+
+ movd %mm0,%eax
+ and $0xff,%eax
+
+done: # swap metrics
+ movl %esi,%eax
+ movl %edi,%esi
+ movl %eax,%edi
+ jmp 1b
+
+2: emms
+ movl 8(%ebp),%ebx # ebx = vp
+ # stash metric pointers
+ movl %esi,OLDMETRICS(%ebx)
+ movl %edi,NEWMETRICS(%ebx)
+ movl %edx,DP(%ebx) # stash incremented value of vp->dp
+ xorl %eax,%eax
+err: popl %ebx
+ popl %edx
+ popl %edi
+ popl %esi
+ popl %ebp
+
+ ret
+
+ .data
+
+ .align 16
+thirtyones:
+ .byte 31,31,31,31,31,31,31,31
+
+
+
diff --git a/ssebfly29.s b/ssebfly29.s
new file mode 100644
index 0000000..d7d2149
--- /dev/null
+++ b/ssebfly29.s
@@ -0,0 +1,271 @@
+/* Intel SIMD SSE implementation of Viterbi ACS butterflies
+ for 256-state (k=9) convolutional code
+ Copyright 2004 Phil Karn, KA9Q
+ This code may be used under the terms of the GNU Lesser General Public License (LGPL)
+
+ void update_viterbi29_blk_sse(struct v29 *vp,unsigned char syms[],int nbits);
+*/
+ # SSE (64-bit integer SIMD) version
+ # Requires Pentium III or better
+ # These are offsets into struct v29, defined in viterbi29.h
+ .set DP,512
+ .set OLDMETRICS,516
+ .set NEWMETRICS,520
+ .text
+ .global update_viterbi29_blk_sse,Branchtab29_sse
+ .type update_viterbi29_blk_sse,@function
+ .align 16
+
+update_viterbi29_blk_sse:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %edx
+ pushl %ebx
+
+ movl 8(%ebp),%edx # edx = vp
+ testl %edx,%edx
+ jnz 0f
+ movl -1,%eax
+ jmp err
+0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
+ movl NEWMETRICS(%edx),%edi # edi -> new metrics
+ movl DP(%edx),%edx # edx -> decisions
+
+1: movl 16(%ebp),%eax # eax = nbits
+ decl %eax
+ jl 2f # passed zero, we're done
+ movl %eax,16(%ebp)
+
+ xorl %eax,%eax
+ movl 12(%ebp),%ebx # ebx = syms
+ movb (%ebx),%al
+ movd %eax,%mm6 # mm6[0] = first symbol
+ movb 1(%ebx),%al
+ movd %eax,%mm5 # mm5[0] = second symbol
+ addl $2,%ebx
+ movl %ebx,12(%ebp)
+
+ punpcklbw %mm6,%mm6 # mm6[1] = mm6[0]
+ punpcklbw %mm5,%mm5
+
+ movq thirtyones,%mm7
+ pshufw $0,%mm6,%mm6 # copy low word to upper 3
+ pshufw $0,%mm5,%mm5
+ # mm6 now contains first symbol in each byte, mm5 the second
+
+ # each invocation of this macro does 8 butterflies in parallel
+ .MACRO butterfly GROUP
+ # compute branch metrics
+ movq Branchtab29_sse+(8*\GROUP),%mm4
+ movq Branchtab29_sse+128+(8*\GROUP),%mm3
+ pxor %mm6,%mm4
+ pxor %mm5,%mm3
+ pavgb %mm3,%mm4 # mm4 contains branch metrics
+ psrlw $3,%mm4
+ pand %mm7,%mm4
+
+ movq (8*\GROUP)(%esi),%mm0 # Incoming path metric, high bit = 0
+ movq ((8*\GROUP)+128)(%esi),%mm3 # Incoming path metric, high bit = 1
+ movq %mm0,%mm2
+ movq %mm3,%mm1
+ paddusb %mm4,%mm0
+ paddusb %mm4,%mm3
+
+ # invert branch metrics. This works only because they're 5 bits
+ pxor %mm7,%mm4
+
+ paddusb %mm4,%mm1
+ paddusb %mm4,%mm2
+
+ # Find survivors, leave in mm0,2
+ pminub %mm1,%mm0
+ pminub %mm3,%mm2
+ # get decisions, leave in mm1,3
+ pcmpeqb %mm0,%mm1
+ pcmpeqb %mm2,%mm3
+
+ # interleave and store new branch metrics in mm0,2
+ movq %mm0,%mm4
+ punpckhbw %mm2,%mm0 # interleave second 8 new metrics
+ punpcklbw %mm2,%mm4 # interleave first 8 new metrics
+ movq %mm0,(16*\GROUP+8)(%edi)
+ movq %mm4,(16*\GROUP)(%edi)
+
+ # interleave decisions, accumulate into %ebx
+ movq %mm1,%mm4
+ punpckhbw %mm3,%mm1
+ punpcklbw %mm3,%mm4
+ # Due to an error in the Intel instruction set ref (the register
+ # fields are swapped), gas assembles pmovmskb incorrectly
+ # See http://mail.gnu.org/pipermail/bug-gnu-utils/2000-August/002341.html
+ .byte 0x0f,0xd7,0xc1 # pmovmskb %mm1,%eax
+ shll $((16*\GROUP+8)&31),%eax
+ orl %eax,%ebx
+ .byte 0x0f,0xd7,0xc4 # pmovmskb %mm4,%eax
+ shll $((16*\GROUP)&31),%eax
+ orl %eax,%ebx
+ .endm
+
+ # invoke macro 16 times for a total of 128 butterflies
+ xorl %ebx,%ebx # clear decisions
+ butterfly GROUP=0
+ butterfly GROUP=1
+ movl %ebx,(%edx) # stash first 32 decisions
+ xorl %ebx,%ebx
+ butterfly GROUP=2
+ butterfly GROUP=3
+ movl %ebx,4(%edx) # stash second 32 decisions
+ xorl %ebx,%ebx # clear decisions
+ butterfly GROUP=4
+ butterfly GROUP=5
+ movl %ebx,8(%edx) # stash first 32 decisions
+ xorl %ebx,%ebx
+ butterfly GROUP=6
+ butterfly GROUP=7
+ movl %ebx,12(%edx) # stash second 32 decisions
+ xorl %ebx,%ebx # clear decisions
+ butterfly GROUP=8
+ butterfly GROUP=9
+ movl %ebx,16(%edx) # stash first 32 decisions
+ xorl %ebx,%ebx
+ butterfly GROUP=10
+ butterfly GROUP=11
+ movl %ebx,20(%edx) # stash second 32 decisions
+ xorl %ebx,%ebx # clear decisions
+ butterfly GROUP=12
+ butterfly GROUP=13
+ movl %ebx,24(%edx) # stash first 32 decisions
+ xorl %ebx,%ebx
+ butterfly GROUP=14
+ butterfly GROUP=15
+ movl %ebx,28(%edx) # stash second 32 decisions
+
+ addl $32,%edx # bump decision pointer
+
+ # see if we have to normalize
+ movl (%edi),%eax # extract first output metric
+ andl $255,%eax
+ cmp $50,%eax # is it greater than 50?
+ movl $0,%eax
+ jle done # No, no need to normalize
+
+ # Normalize by finding smallest metric and subtracting it
+ # from all metrics
+ movq (%edi),%mm0
+ pminub 8(%edi),%mm0
+ pminub 16(%edi),%mm0
+ pminub 24(%edi),%mm0
+ pminub 32(%edi),%mm0
+ pminub 40(%edi),%mm0
+ pminub 48(%edi),%mm0
+ pminub 56(%edi),%mm0
+ pminub 64(%edi),%mm0
+ pminub 72(%edi),%mm0
+ pminub 80(%edi),%mm0
+ pminub 88(%edi),%mm0
+ pminub 96(%edi),%mm0
+ pminub 104(%edi),%mm0
+ pminub 112(%edi),%mm0
+ pminub 120(%edi),%mm0
+ pminub 128(%edi),%mm0
+ pminub 136(%edi),%mm0
+ pminub 144(%edi),%mm0
+ pminub 152(%edi),%mm0
+ pminub 160(%edi),%mm0
+ pminub 168(%edi),%mm0
+ pminub 176(%edi),%mm0
+ pminub 184(%edi),%mm0
+ pminub 192(%edi),%mm0
+ pminub 200(%edi),%mm0
+ pminub 208(%edi),%mm0
+ pminub 216(%edi),%mm0
+ pminub 224(%edi),%mm0
+ pminub 232(%edi),%mm0
+ pminub 240(%edi),%mm0
+ pminub 248(%edi),%mm0
+ # mm0 contains 8 smallest metrics
+ # crunch down to single lowest metric
+ movq %mm0,%mm1
+ psrlq $32,%mm0
+ pminub %mm1,%mm0
+ movq %mm0,%mm1
+ psrlq $16,%mm0
+ pminub %mm1,%mm0
+ movq %mm0,%mm1
+ psrlq $8,%mm0
+ pminub %mm1,%mm0
+ movq 8(%edi),%mm1 # reload
+ punpcklbw %mm0,%mm0 # expand to all 8 bytes
+ pshufw $0,%mm0,%mm0
+
+ # mm0 now contains lowest metric in all 8 bytes
+ # subtract it from every output metric
+ # Trashes %mm7
+ .macro PSUBUSBM REG,MEM
+ movq \MEM,%mm7
+ psubusb \REG,%mm7
+ movq %mm7,\MEM
+ .endm
+
+ PSUBUSBM %mm0,(%edi)
+ PSUBUSBM %mm0,8(%edi)
+ PSUBUSBM %mm0,16(%edi)
+ PSUBUSBM %mm0,24(%edi)
+ PSUBUSBM %mm0,32(%edi)
+ PSUBUSBM %mm0,40(%edi)
+ PSUBUSBM %mm0,48(%edi)
+ PSUBUSBM %mm0,56(%edi)
+ PSUBUSBM %mm0,64(%edi)
+ PSUBUSBM %mm0,72(%edi)
+ PSUBUSBM %mm0,80(%edi)
+ PSUBUSBM %mm0,88(%edi)
+ PSUBUSBM %mm0,96(%edi)
+ PSUBUSBM %mm0,104(%edi)
+ PSUBUSBM %mm0,112(%edi)
+ PSUBUSBM %mm0,120(%edi)
+ PSUBUSBM %mm0,128(%edi)
+ PSUBUSBM %mm0,136(%edi)
+ PSUBUSBM %mm0,144(%edi)
+ PSUBUSBM %mm0,152(%edi)
+ PSUBUSBM %mm0,160(%edi)
+ PSUBUSBM %mm0,168(%edi)
+ PSUBUSBM %mm0,176(%edi)
+ PSUBUSBM %mm0,184(%edi)
+ PSUBUSBM %mm0,192(%edi)
+ PSUBUSBM %mm0,200(%edi)
+ PSUBUSBM %mm0,208(%edi)
+ PSUBUSBM %mm0,216(%edi)
+ PSUBUSBM %mm0,224(%edi)
+ PSUBUSBM %mm0,232(%edi)
+ PSUBUSBM %mm0,240(%edi)
+ PSUBUSBM %mm0,248(%edi)
+
+done:
+ # swap metrics
+ movl %esi,%eax
+ movl %edi,%esi
+ movl %eax,%edi
+ jmp 1b
+
+2: emms
+ movl 8(%ebp),%ebx # ebx = vp
+ # stash metric pointers
+ movl %esi,OLDMETRICS(%ebx)
+ movl %edi,NEWMETRICS(%ebx)
+ movl %edx,DP(%ebx) # stash incremented value of vp->dp
+ xorl %eax,%eax
+err: popl %ebx
+ popl %edx
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+
+ .data
+ .align 8
+thirtyones:
+ .byte 31,31,31,31,31,31,31,31
+
+
diff --git a/sumsq.c b/sumsq.c
new file mode 100644
index 0000000..9ed6a39
--- /dev/null
+++ b/sumsq.c
@@ -0,0 +1,40 @@
+/* Compute the sum of the squares of a vector of signed shorts
+
+ * Copyright 2004 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+#include <stdlib.h>
+#include "fec.h"
+
+unsigned long long sumsq_port(signed short *,int);
+
+#ifdef __i386__
+unsigned long long sumsq_mmx(signed short *,int);
+unsigned long long sumsq_sse(signed short *,int);
+unsigned long long sumsq_sse2(signed short *,int);
+#endif
+
+#ifdef __VEC__
+unsigned long long sumsq_av(signed short *,int);
+#endif
+
+unsigned long long sumsq(signed short *in,int cnt){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return sumsq_port(in,cnt);
+#ifdef __i386__
+ case SSE:
+ case MMX:
+ return sumsq_mmx(in,cnt);
+ case SSE2:
+ return sumsq_sse2(in,cnt);
+#endif
+
+#ifdef __VEC__
+ case ALTIVEC:
+ return sumsq_av(in,cnt);
+#endif
+ }
+}
diff --git a/sumsq_av.c b/sumsq_av.c
new file mode 100644
index 0000000..53c6acf
--- /dev/null
+++ b/sumsq_av.c
@@ -0,0 +1,78 @@
+/* Compute the sum of the squares of a vector of signed shorts
+
+ * This is the Altivec SIMD version. It's a little hairy because Altivec
+ * does not do 64-bit operations directly, so we have to accumulate separate
+ * 32-bit sums and carries
+
+ * Copyright 2004 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+#include "fec.h"
+
+unsigned long long sumsq_av(signed short *in,int cnt){
+ long long sum;
+ vector signed short x;
+ vector unsigned int sums,carries,s1,s2;
+ int pad;
+ union { vector unsigned char cv; vector unsigned int iv; unsigned int w[4]; unsigned char c[16];} s;
+
+ carries = sums = (vector unsigned int)(0);
+ if((pad = (int)in & 15)!=0){
+ /* Load unaligned leading word */
+ x = vec_perm(vec_ld(0,in),(vector signed short)(0),vec_lvsl(0,in));
+ if(cnt < 8){ /* Shift right to chop stuff beyond end of short block */
+ s.c[15] = (8-cnt)<<4;
+ x = vec_sro(x,s.cv);
+ }
+ sums = (vector unsigned int)vec_msum(x,x,(vector signed int)(0));
+ in += 8-pad/2;
+ cnt -= 8-pad/2;
+ }
+ /* Everything is now aligned, rip through most of the block */
+ while(cnt >= 8){
+ x = vec_ld(0,in);
+ /* A single vec_msum cannot overflow, but we have to sum it with
+ * the earlier terms separately to handle the carries
+ * The cast to unsigned is OK because squares are always positive
+ */
+ s1 = (vector unsigned int)vec_msum(x,x,(vector signed int)(0));
+ carries = vec_add(carries,vec_addc(sums,s1));
+ sums = vec_add(sums,s1);
+ in += 8;
+ cnt -= 8;
+ }
+ /* Handle trailing fragment, if any */
+ if(cnt > 0){
+ x = vec_ld(0,in);
+ s.c[15] = (8-cnt)<<4;
+ x = vec_sro(x,s.cv);
+ s1 = (vector unsigned int)vec_msum(x,x,(vector signed int)(0));
+ carries = vec_add(carries,vec_addc(sums,s1));
+ sums = vec_add(sums,s1);
+ }
+ /* Combine 4 sub-sums and carries */
+ s.c[15] = 64; /* Shift right two 32-bit words */
+ s1 = vec_sro(sums,s.cv);
+ s2 = vec_sro(carries,s.cv);
+ carries = vec_add(carries,vec_addc(sums,s1));
+ sums = vec_add(sums,s1);
+ carries = vec_add(carries,s2);
+
+ s.c[15] = 32; /* Shift right one 32-bit word */
+ s1 = vec_sro(sums,s.cv);
+ s2 = vec_sro(carries,s.cv);
+ carries = vec_add(carries,vec_addc(sums,s1));
+ sums = vec_add(sums,s1);
+ carries = vec_add(carries,s2);
+
+ /* Extract sum and carries from right-hand words and combine into result */
+ s.iv = sums;
+ sum = s.w[3];
+
+ s.iv = carries;
+ sum += (long long)s.w[3] << 32;
+
+ return sum;
+}
+
diff --git a/sumsq_mmx.c b/sumsq_mmx.c
new file mode 100644
index 0000000..e766831
--- /dev/null
+++ b/sumsq_mmx.c
@@ -0,0 +1,35 @@
+/* Compute the sum of the squares of a vector of signed shorts
+
+ * MMX-assisted version (also used on SSE)
+
+ * The SSE2 and MMX assist routines both operate on multiples of
+ * 8 words; they differ only in their alignment requirements (8 bytes
+ * for MMX, 16 bytes for SSE2)
+
+ * Copyright 2004 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser Public License (LGPL)
+ */
+
+long long sumsq_mmx_assist(signed short *,int);
+
+long long sumsq_mmx(signed short *in,int cnt){
+ long long sum = 0;
+
+ /* Handle stuff before the next 8-byte boundary */
+ while(((int)in & 7) != 0 && cnt != 0){
+ sum += (long)in[0] * in[0];
+ in++;
+ cnt--;
+ }
+ sum += sumsq_mmx_assist(in,cnt);
+ in += cnt & ~7;
+ cnt &= 7;
+
+ /* Handle up to 7 words at end */
+ while(cnt != 0){
+ sum += (long)in[0] * in[0];
+ in++;
+ cnt--;
+ }
+ return sum;
+}
diff --git a/sumsq_mmx_assist.s b/sumsq_mmx_assist.s
new file mode 100644
index 0000000..b3bac66
--- /dev/null
+++ b/sumsq_mmx_assist.s
@@ -0,0 +1,83 @@
+# MMX assist routines for sumsq
+# Copyright 2001 Phil Karn, KA9Q
+# May be used under the terms of the GNU Public License (GPL)
+
+ .text
+
+# Evaluate sum of squares of signed 16-bit input samples
+# long long sumsq_mmx_assist(signed short *in,int cnt);
+ .global sumsq_mmx_assist
+ .type sumsq_mmx_assist,@function
+ .align 16
+sumsq_mmx_assist:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %ecx
+ pushl %ebx
+
+ movl 8(%ebp),%esi
+ movl 12(%ebp),%ecx
+ xor %eax,%eax
+ xor %edx,%edx
+
+ # Since 4 * 32767**2 < 2**32, we can accumulate two at a time
+1: subl $8,%ecx
+ jl 2f
+ movq (%esi),%mm0 # S0 S1 S2 S3
+ pmaddwd %mm0,%mm0 # (S0^2+S1^2) (S2^2+S3^2)
+ movq 8(%esi),%mm6 # S4 S5 S6 S7
+ pmaddwd %mm6,%mm6 # (S4^2+S5^2) (S6^2+S7^2)
+ paddd %mm6,%mm0 # (S0^2+S1^2+S4^2+S5^2)(S2^2+S3^2+S6^2+S7^2)
+ movd %mm0,%ebx
+ addl %ebx,%eax
+ adcl $0,%edx
+ psrlq $32,%mm0
+ movd %mm0,%ebx
+ addl %ebx,%eax
+ adcl $0,%edx
+ addl $16,%esi
+ jmp 1b
+
+2: emms
+ popl %ebx
+ popl %ecx
+ popl %esi
+ popl %ebp
+ ret
+
+# Evaluate sum of squares of signed 16-bit input samples
+# long sumsq_wd_mmx_assist(signed short *in,int cnt);
+# Quick version, only safe for small numbers of small input values...
+ .global sumsq_wd_mmx_assist
+ .type sumsq_wd_mmx_assist,@function
+ .align 16
+sumsq_wd_mmx_assist:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+
+ movl 8(%ebp),%esi
+ movl 12(%ebp),%ecx
+ pxor %mm2,%mm2 # zero sum
+
+1: subl $8,%ecx
+ jl 2f
+ movq (%esi),%mm0 # S0 S1 S2 S3
+ pmaddwd %mm0,%mm0 # (S0*S0+S1*S1) (S2*S2+S3*S3)
+ movq 8(%esi),%mm1
+ pmaddwd %mm1,%mm1
+ paddd %mm1,%mm2
+ paddd %mm0,%mm2 # accumulate
+
+ addl $16,%esi
+ jmp 1b
+
+2: movd %mm2,%eax # even sum
+ psrlq $32,%mm2
+ movd %mm2,%edx # odd sum
+ addl %edx,%eax
+ emms
+ popl %esi
+ popl %ebp
+ ret
diff --git a/sumsq_port.c b/sumsq_port.c
new file mode 100644
index 0000000..6d0b4c1
--- /dev/null
+++ b/sumsq_port.c
@@ -0,0 +1,16 @@
+/* Compute the sum of the squares of a vector of signed shorts
+
+ * Portable C version
+ * Copyright 2004 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+
+unsigned long long sumsq_port(signed short *in,int cnt){
+ long long sum = 0;
+ int i;
+
+ for(i=0;i<cnt;i++){
+ sum += (int)in[i] * (int)in[i];
+ }
+ return sum;
+}
diff --git a/sumsq_sse2.c b/sumsq_sse2.c
new file mode 100644
index 0000000..b05d2e9
--- /dev/null
+++ b/sumsq_sse2.c
@@ -0,0 +1,33 @@
+/* Compute the sum of the squares of a vector of signed shorts
+
+ * The SSE2 and MMX assist routines both operate on multiples of
+ * 8 words; they differ only in their alignment requirements (8 bytes
+ * for MMX, 16 bytes for SSE2)
+
+ * Copyright 2004 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser Public License (LGPL)
+ */
+
+long long sumsq_sse2_assist(signed short *,int);
+
+long long sumsq_sse2(signed short *in,int cnt){
+ long long sum = 0;
+
+ /* Handle stuff before the next 8-byte boundary */
+ while(((int)in & 15) != 0 && cnt != 0){
+ sum += (long)in[0] * in[0];
+ in++;
+ cnt--;
+ }
+ sum += sumsq_sse2_assist(in,cnt);
+ in += cnt & ~7;
+ cnt &= 7;
+
+ /* Handle up to 7 trailing words */
+ while(cnt != 0){
+ sum += (long)in[0] * in[0];
+ in++;
+ cnt--;
+ }
+ return sum;
+}
diff --git a/sumsq_sse2_assist.s b/sumsq_sse2_assist.s
new file mode 100644
index 0000000..d1c4ee7
--- /dev/null
+++ b/sumsq_sse2_assist.s
@@ -0,0 +1,49 @@
+# SSE2 assist routines for sumsq
+# Copyright 2001 Phil Karn, KA9Q
+# May be used under the terms of the GNU Public License (GPL)
+
+ .text
+# Evaluate sum of squares of signed 16-bit input samples
+# long long sumsq_sse2_assist(signed short *in,int cnt);
+ .global sumsq_sse2_assist
+ .type sumsq_sse2_assist,@function
+ .align 16
+sumsq_sse2_assist:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %ecx
+
+ movl 8(%ebp),%esi
+ movl 12(%ebp),%ecx
+ pxor %xmm2,%xmm2 # zero sum
+ movaps low,%xmm3 # load mask
+
+1: subl $8,%ecx
+ jl 2f
+ movaps (%esi),%xmm0 # S0 S1 S2 S3 S4 S5 S6 S7
+ pmaddwd %xmm0,%xmm0 # (S0*S0+S1*S1) (S2*S2+S3*S3) (S4*S4+S5*S5) (S6*S6+S7*S7)
+ movaps %xmm0,%xmm1
+ pand %xmm3,%xmm1 # (S0*S0+S1*S1) 0 (S4*S4+S5*S5) 0
+ paddq %xmm1,%xmm2 # sum even-numbered dwords
+ psrlq $32,%xmm0 # (S2*S2+S3*S3) 0 (S6*S6+S7*S7) 0
+ paddq %xmm0,%xmm2 # sum odd-numbered dwords
+ addl $16,%esi
+ jmp 1b
+
+2: movaps %xmm2,%xmm0
+ psrldq $8,%xmm0
+ paddq %xmm2,%xmm0 # combine 64-bit sums
+
+ movd %xmm0,%eax # low 32 bits of sum
+ psrldq $4,%xmm0
+ movd %xmm0,%edx # high 32 bits of sum
+
+ popl %ecx
+ popl %esi
+ popl %ebp
+ ret
+
+ .data
+ .align 16
+low: .byte 255,255,255,255,0,0,0,0,255,255,255,255,0,0,0,0
diff --git a/sumsq_test.c b/sumsq_test.c
new file mode 100644
index 0000000..4debd47
--- /dev/null
+++ b/sumsq_test.c
@@ -0,0 +1,101 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <time.h>
+#include "config.h"
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+#include "fec.h"
+
+#if HAVE_GETOPT_LONG
+struct option Options[] = {
+ {"frame-length",1,NULL,'l'},
+ {"frame-count",1,NULL,'n'},
+ {"verbose",0,NULL,'v'},
+ {"force-altivec",0,NULL,'a'},
+ {"force-port",0,NULL,'p'},
+ {"force-mmx",0,NULL,'m'},
+ {"force-sse",0,NULL,'s'},
+ {"force-sse2",0,NULL,'t'},
+ {NULL},
+};
+#endif
+
+int Verbose = 0;
+
+int main(int argc,char *argv[]){
+ signed short *buf;
+ int i,d,trial,trials=10000;
+ int bufsize = 2048;
+ long long port_sum,simd_sum;
+ time_t t;
+ int timetrials=0;
+
+ find_cpu_mode();
+ time(&t);
+ srandom(t);
+
+#if HAVE_GETOPT_LONG
+ while((d = getopt_long(argc,argv,"vapmstl:n:T",Options,NULL)) != EOF){
+#else
+ while((d = getopt(argc,argv,"vapmstl:n:T")) != EOF){
+#endif
+ switch(d){
+ case 'a':
+ Cpu_mode = ALTIVEC;
+ break;
+ case 'p':
+ Cpu_mode = PORT;
+ break;
+ case 'm':
+ Cpu_mode = MMX;
+ break;
+ case 's':
+ Cpu_mode = SSE;
+ break;
+ case 't':
+ Cpu_mode = SSE2;
+ break;
+ case 'l':
+ bufsize = atoi(optarg);
+ break;
+ case 'n':
+ trials = atoi(optarg);
+ break;
+ case 'v':
+ Verbose++;
+ break;
+ case 'T':
+ timetrials++;
+ break;
+ }
+ }
+
+ buf = (signed short *)calloc(bufsize,sizeof(signed short));
+ if(timetrials){
+ for(trial=0;trial<trials;trial++){
+ (void)sumsq(buf,bufsize);
+ }
+ } else {
+ for(trial=0;trial<trials;trial++){
+ int length,offset;
+
+ offset = random() & 7;
+ length = (random() % bufsize) - offset;
+ if(length <= 0)
+ continue;
+ for(i=0;i<bufsize;i++)
+ buf[i] = random();
+
+ port_sum = sumsq_port(buf+offset,length);
+ simd_sum = sumsq(buf+offset,length);
+ if(port_sum != simd_sum){
+ printf("offset %d len %d port_sum = %lld simd_sum = %lld ",offset,length,port_sum,simd_sum);
+
+ printf("ERROR! diff = %lld\n",simd_sum-port_sum);
+ }
+ }
+ }
+ exit(0);
+}
diff --git a/viterbi27.c b/viterbi27.c
new file mode 100644
index 0000000..554da92
--- /dev/null
+++ b/viterbi27.c
@@ -0,0 +1,161 @@
+/* K=7 r=1/2 Viterbi decoder with optional Intel or PowerPC SIMD
+ * Copyright Feb 2004, Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "fec.h"
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi27(int len){
+ find_cpu_mode();
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return create_viterbi27_port(len);
+#ifdef __VEC__
+ case ALTIVEC:
+ return create_viterbi27_av(len);
+#endif
+#ifdef __i386__
+ case MMX:
+ return create_viterbi27_mmx(len);
+ case SSE:
+ return create_viterbi27_sse(len);
+ case SSE2:
+ return create_viterbi27_sse2(len);
+#endif
+ }
+}
+
+void set_viterbi27_polynomial(int polys[2]){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ set_viterbi27_polynomial_port(polys);
+ break;
+#ifdef __VEC__
+ case ALTIVEC:
+ set_viterbi27_polynomial_av(polys);
+ break;
+#endif
+#ifdef __i386__
+ case MMX:
+ set_viterbi27_polynomial_mmx(polys);
+ break;
+ case SSE:
+ set_viterbi27_polynomial_sse(polys);
+ break;
+ case SSE2:
+ set_viterbi27_polynomial_sse2(polys);
+ break;
+#endif
+ }
+}
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi27(void *p,int starting_state){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return init_viterbi27_port(p,starting_state);
+#ifdef __VEC__
+ case ALTIVEC:
+ return init_viterbi27_av(p,starting_state);
+#endif
+#ifdef __i386__
+ case MMX:
+ return init_viterbi27_mmx(p,starting_state);
+ case SSE:
+ return init_viterbi27_sse(p,starting_state);
+ case SSE2:
+ return init_viterbi27_sse2(p,starting_state);
+#endif
+ }
+}
+
+/* Viterbi chainback */
+int chainback_viterbi27(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return chainback_viterbi27_port(p,data,nbits,endstate);
+#ifdef __VEC__
+ case ALTIVEC:
+ return chainback_viterbi27_av(p,data,nbits,endstate);
+#endif
+#ifdef __i386__
+ case MMX:
+ return chainback_viterbi27_mmx(p,data,nbits,endstate);
+ case SSE:
+ return chainback_viterbi27_sse(p,data,nbits,endstate);
+ case SSE2:
+ return chainback_viterbi27_sse2(p,data,nbits,endstate);
+#endif
+ }
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi27(void *p){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ delete_viterbi27_port(p);
+ break;
+#ifdef __VEC__
+ case ALTIVEC:
+ delete_viterbi27_av(p);
+ break;
+#endif
+#ifdef __i386__
+ case MMX:
+ delete_viterbi27_mmx(p);
+ break;
+ case SSE:
+ delete_viterbi27_sse(p);
+ break;
+ case SSE2:
+ delete_viterbi27_sse2(p);
+ break;
+#endif
+ }
+}
+
+/* Update decoder with a block of demodulated symbols
+ * Note that nbits is the number of decoded data bits, not the number
+ * of symbols!
+ */
+int update_viterbi27_blk(void *p,unsigned char syms[],int nbits){
+ if(p == NULL)
+ return -1;
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ update_viterbi27_blk_port(p,syms,nbits);
+ break;
+#ifdef __VEC__
+ case ALTIVEC:
+ update_viterbi27_blk_av(p,syms,nbits);
+ break;
+#endif
+#ifdef __i386__
+ case MMX:
+ update_viterbi27_blk_mmx(p,syms,nbits);
+ break;
+ case SSE:
+ update_viterbi27_blk_sse(p,syms,nbits);
+ break;
+ case SSE2:
+ update_viterbi27_blk_sse2(p,syms,nbits);
+ break;
+#endif
+ }
+ return 0;
+}
diff --git a/viterbi27_av.c b/viterbi27_av.c
new file mode 100644
index 0000000..98d7344
--- /dev/null
+++ b/viterbi27_av.c
@@ -0,0 +1,210 @@
+/* K=7 r=1/2 Viterbi decoder for PowerPC G4/G5 Altivec instructions
+ * Feb 2004, Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include <memory.h>
+#include <stdlib.h>
+#include "fec.h"
+
+typedef union { long long p; unsigned char c[64]; vector bool char v[4]; } decision_t;
+typedef union { long long p; unsigned char c[64]; vector unsigned char v[4]; } metric_t;
+
+static union branchtab27 { unsigned char c[32]; vector unsigned char v[2];} Branchtab27[2];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder
+ * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s!
+ */
+struct v27 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi27_av(void *p,int starting_state){
+ struct v27 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<4;i++)
+ vp->metrics1.v[i] = (vector unsigned char)(63);
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */
+ return 0;
+}
+
+void set_viterbi27_polynomial_av(int polys[2]){
+ int state;
+
+ for(state=0;state < 32;state++){
+ Branchtab27[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
+ Branchtab27[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi27_av(int len){
+ struct v27 *vp;
+
+ if(!Init){
+ int polys[2] = { V27POLYA,V27POLYB };
+ set_viterbi27_polynomial_av(polys);
+ }
+ if((vp = (struct v27 *)malloc(sizeof(struct v27))) == NULL)
+ return NULL;
+ if((vp->decisions = (decision_t *)malloc((len+6)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi27_av(vp,0);
+ return vp;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi27_av(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v27 *vp = p;
+ decision_t *d = (decision_t *)vp->decisions;
+
+ if(p == NULL)
+ return -1;
+
+ /* Make room beyond the end of the encoder register so we can
+ * accumulate a full byte of decoded data
+ */
+ endstate %= 64;
+ endstate <<= 2;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 6; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = d[nbits].c[endstate>>2] & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi27_av(void *p){
+ struct v27 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+/* Process received symbols */
+int update_viterbi27_blk_av(void *p,unsigned char *syms,int nbits){
+ struct v27 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->dp;
+ while(nbits--){
+ vector unsigned char survivor0,survivor1,sym0v,sym1v;
+ vector bool char decision0,decision1;
+ vector unsigned char metric,m_metric,m0,m1,m2,m3;
+ void *tmp;
+
+ /* sym0v.0 = syms[0]; sym0v.1 = syms[1] */
+ sym0v = vec_perm(vec_ld(0,syms),vec_ld(1,syms),vec_lvsl(0,syms));
+
+ sym1v = vec_splat(sym0v,1); /* Splat syms[1] across sym1v */
+ sym0v = vec_splat(sym0v,0); /* Splat syms[0] across sym0v */
+ syms += 2;
+
+ /* Do the 32 butterflies as two interleaved groups of 16 each to keep the pipes full */
+
+ /* Form first set of 16 branch metrics */
+ metric = vec_avg(vec_xor(Branchtab27[0].v[0],sym0v),vec_xor(Branchtab27[1].v[0],sym1v));
+ metric = vec_sr(metric,(vector unsigned char)(3));
+ m_metric = vec_sub((vector unsigned char)(31),metric);
+
+ /* Form first set of path metrics */
+ m0 = vec_adds(vp->old_metrics->v[0],metric);
+ m3 = vec_adds(vp->old_metrics->v[2],metric);
+ m1 = vec_adds(vp->old_metrics->v[2],m_metric);
+ m2 = vec_adds(vp->old_metrics->v[0],m_metric);
+
+ /* Form second set of 16 branch metrics */
+ metric = vec_avg(vec_xor(Branchtab27[0].v[1],sym0v),vec_xor(Branchtab27[1].v[1],sym1v));
+ metric = vec_sr(metric,(vector unsigned char)(3));
+ m_metric = vec_sub((vector unsigned char)(31),metric);
+
+ /* Compare and select first set */
+ decision0 = vec_cmpgt(m0,m1);
+ decision1 = vec_cmpgt(m2,m3);
+ survivor0 = vec_min(m0,m1);
+ survivor1 = vec_min(m2,m3);
+
+ /* Compute second set of path metrics */
+ m0 = vec_adds(vp->old_metrics->v[1],metric);
+ m3 = vec_adds(vp->old_metrics->v[3],metric);
+ m1 = vec_adds(vp->old_metrics->v[3],m_metric);
+ m2 = vec_adds(vp->old_metrics->v[1],m_metric);
+
+ /* Interleave and store first decisions and survivors */
+ d->v[0] = vec_mergeh(decision0,decision1);
+ d->v[1] = vec_mergel(decision0,decision1);
+ vp->new_metrics->v[0] = vec_mergeh(survivor0,survivor1);
+ vp->new_metrics->v[1] = vec_mergel(survivor0,survivor1);
+
+ /* Compare and select second set */
+ decision0 = vec_cmpgt(m0,m1);
+ decision1 = vec_cmpgt(m2,m3);
+ survivor0 = vec_min(m0,m1);
+ survivor1 = vec_min(m2,m3);
+
+ /* Interleave and store second set of decisions and survivors */
+ d->v[2] = vec_mergeh(decision0,decision1);
+ d->v[3] = vec_mergel(decision0,decision1);
+ vp->new_metrics->v[2] = vec_mergeh(survivor0,survivor1);
+ vp->new_metrics->v[3] = vec_mergel(survivor0,survivor1);
+
+ /* renormalize if necessary */
+ if(vp->new_metrics->c[0] >= 105){
+ vector unsigned char scale0,scale1;
+
+ /* Find smallest metric and splat */
+ scale0 = vec_min(vp->new_metrics->v[0],vp->new_metrics->v[1]);
+ scale1 = vec_min(vp->new_metrics->v[2],vp->new_metrics->v[3]);
+ scale0 = vec_min(scale0,scale1);
+ scale0 = vec_min(scale0,vec_sld(scale0,scale0,8));
+ scale0 = vec_min(scale0,vec_sld(scale0,scale0,4));
+ scale0 = vec_min(scale0,vec_sld(scale0,scale0,2));
+ scale0 = vec_min(scale0,vec_sld(scale0,scale0,1));
+
+ /* Now subtract from all metrics */
+ vp->new_metrics->v[0] = vec_subs(vp->new_metrics->v[0],scale0);
+ vp->new_metrics->v[1] = vec_subs(vp->new_metrics->v[1],scale0);
+ vp->new_metrics->v[2] = vec_subs(vp->new_metrics->v[2],scale0);
+ vp->new_metrics->v[3] = vec_subs(vp->new_metrics->v[3],scale0);
+ }
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+
+ return 0;
+}
+
diff --git a/viterbi27_mmx.c b/viterbi27_mmx.c
new file mode 100644
index 0000000..a6d5125
--- /dev/null
+++ b/viterbi27_mmx.c
@@ -0,0 +1,115 @@
+/* K=7 r=1/2 Viterbi decoder for MMX
+ * Copyright Feb 2004, Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <mmintrin.h>
+#include "fec.h"
+
+typedef union { char c[64]; __m64 v[8];} decision_t;
+typedef union { unsigned char c[64]; __m64 v[8];} metric_t;
+
+unsigned char Mettab27_1[256][32] __attribute__ ((aligned(16)));
+unsigned char Mettab27_2[256][32] __attribute__ ((aligned(16)));
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder
+ * Don't change this without also changing references in mmxbfly27.s!
+ */
+struct v27 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi27_mmx(void *p,int starting_state){
+ struct v27 *vp = (struct v27 *)p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<64;i++)
+ vp->metrics1.c[i] = 63;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */
+ return 0;
+}
+
+void set_viterbi27_polynomial_mmx(int polys[2]){
+ int state;
+
+ for(state=0;state < 32;state++){
+ int symbol;
+ for(symbol = 0;symbol < 256;symbol++){
+ int sym;
+
+ sym = parity((2*state) & abs(polys[0])) ^ (polys[0] < 0);
+ Mettab27_1[symbol][state] = (sym ? (255-symbol):symbol) / 16;
+
+ sym = parity((2*state) & abs(polys[1])) ^ (polys[1] < 0);
+ Mettab27_2[symbol][state] = (sym ? (255-symbol):symbol) / 16;
+ }
+ }
+ Init++;
+}
+
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi27_mmx(int len){
+ struct v27 *vp;
+ int polys[2] = { V27POLYA, V27POLYB };
+
+ if(Init == 0){
+ set_viterbi27_polynomial_mmx(polys);
+ }
+ if((vp = (struct v27 *)malloc(sizeof(struct v27))) == NULL)
+ return NULL;
+
+ if((vp->decisions = (decision_t *)malloc((len+6)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi27_mmx(vp,0);
+ return vp;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi27_mmx(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+
+ struct v27 *vp = (struct v27 *)p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->decisions;
+ endstate &= 63;
+ d += 6; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = d[nbits].c[endstate>>2] & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi27_mmx(void *p){
+ struct v27 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
diff --git a/viterbi27_port.c b/viterbi27_port.c
new file mode 100644
index 0000000..7cac2b3
--- /dev/null
+++ b/viterbi27_port.c
@@ -0,0 +1,191 @@
+/* K=7 r=1/2 Viterbi decoder in portable C
+ * Copyright Feb 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <limits.h>
+#include "fec.h"
+
+
+typedef union { unsigned int w[64]; } metric_t;
+typedef union { unsigned long w[2];} decision_t;
+static union branchtab27 { unsigned char c[32]; } Branchtab27[2] __attribute__ ((aligned(16)));
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder
+ * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s!
+ */
+struct v27 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi27_port(void *p,int starting_state){
+ struct v27 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<64;i++)
+ vp->metrics1.w[i] = 63;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->w[starting_state & 63] = 0; /* Bias known start state */
+ return 0;
+}
+
+void set_viterbi27_polynomial_port(int polys[2]){
+ int state;
+
+ for(state=0;state < 32;state++){
+ Branchtab27[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
+ Branchtab27[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi27_port(int len){
+ struct v27 *vp;
+
+ if(!Init){
+ int polys[2] = { V27POLYA, V27POLYB };
+ set_viterbi27_polynomial_port(polys);
+ }
+ if((vp = malloc(sizeof(struct v27))) == NULL)
+ return NULL;
+ if((vp->decisions = malloc((len+6)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi27_port(vp,0);
+
+ return vp;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi27_port(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v27 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = vp->decisions;
+ /* Make room beyond the end of the encoder register so we can
+ * accumulate a full byte of decoded data
+ */
+ endstate %= 64;
+ endstate <<= 2;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 6; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].w[(endstate>>2)/32] >> ((endstate>>2)%32)) & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi27_port(void *p){
+ struct v27 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+/* C-language butterfly */
+#define BFLY(i) {\
+unsigned int metric,m0,m1,decision;\
+ metric = (Branchtab27[0].c[i] ^ sym0) + (Branchtab27[1].c[i] ^ sym1);\
+ m0 = vp->old_metrics->w[i] + metric;\
+ m1 = vp->old_metrics->w[i+32] + (510 - metric);\
+ decision = (signed int)(m0-m1) > 0;\
+ vp->new_metrics->w[2*i] = decision ? m1 : m0;\
+ d->w[i/16] |= decision << ((2*i)&31);\
+ m0 -= (metric+metric-510);\
+ m1 += (metric+metric-510);\
+ decision = (signed int)(m0-m1) > 0;\
+ vp->new_metrics->w[2*i+1] = decision ? m1 : m0;\
+ d->w[i/16] |= decision << ((2*i+1)&31);\
+}
+
+/* Update decoder with a block of demodulated symbols
+ * Note that nbits is the number of decoded data bits, not the number
+ * of symbols!
+ */
+int update_viterbi27_blk_port(void *p,unsigned char *syms,int nbits){
+ struct v27 *vp = p;
+ void *tmp;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->dp;
+ while(nbits--){
+ unsigned char sym0,sym1;
+
+ d->w[0] = d->w[1] = 0;
+ sym0 = *syms++;
+ sym1 = *syms++;
+
+ BFLY(0);
+ BFLY(1);
+ BFLY(2);
+ BFLY(3);
+ BFLY(4);
+ BFLY(5);
+ BFLY(6);
+ BFLY(7);
+ BFLY(8);
+ BFLY(9);
+ BFLY(10);
+ BFLY(11);
+ BFLY(12);
+ BFLY(13);
+ BFLY(14);
+ BFLY(15);
+ BFLY(16);
+ BFLY(17);
+ BFLY(18);
+ BFLY(19);
+ BFLY(20);
+ BFLY(21);
+ BFLY(22);
+ BFLY(23);
+ BFLY(24);
+ BFLY(25);
+ BFLY(26);
+ BFLY(27);
+ BFLY(28);
+ BFLY(29);
+ BFLY(30);
+ BFLY(31);
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ return 0;
+}
diff --git a/viterbi27_sse.c b/viterbi27_sse.c
new file mode 100644
index 0000000..cd1f287
--- /dev/null
+++ b/viterbi27_sse.c
@@ -0,0 +1,113 @@
+/* K=7 r=1/2 Viterbi decoder for SSE
+ * Feb 2004, Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <xmmintrin.h>
+#include "fec.h"
+
+typedef union { unsigned char c[64]; } metric_t;
+typedef union { unsigned long w[2]; unsigned char c[8]; __m64 v[1];} decision_t;
+union branchtab27 { unsigned char c[32]; __m64 v[4];} Branchtab27_sse[2];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder
+ * Don't change this without also changing references in ssebfly27.s!
+ */
+struct v27 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi27_sse(int len){
+ struct v27 *vp;
+
+ if(!Init){
+ int polys[2] = { V27POLYA, V27POLYB };
+
+ set_viterbi27_polynomial_sse(polys);
+ }
+ if((vp = malloc(sizeof(struct v27))) == NULL)
+ return NULL;
+ if((vp->decisions = malloc((len+6)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi27(vp,0);
+ return vp;
+}
+
+void set_viterbi27_polynomial_sse(int polys[2]){
+ int state;
+
+ for(state=0;state < 32;state++){
+ Branchtab27_sse[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
+ Branchtab27_sse[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi27_sse(void *p,int starting_state){
+ struct v27 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<64;i++)
+ vp->metrics1.c[i] = 63;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */
+ return 0;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi27_sse(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v27 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+
+ d = vp->decisions;
+ /* Make room beyond the end of the encoder register so we can
+ * accumulate a full byte of decoded data
+ */
+ endstate %= 64;
+ endstate <<= 2;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 6; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi27_sse(void *p){
+ struct v27 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
diff --git a/viterbi27_sse2.c b/viterbi27_sse2.c
new file mode 100644
index 0000000..bc01710
--- /dev/null
+++ b/viterbi27_sse2.c
@@ -0,0 +1,180 @@
+/* K=7 r=1/2 Viterbi decoder for SSE2
+ * Feb 2004, Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <xmmintrin.h>
+#include "fec.h"
+
+typedef union { unsigned char c[64]; __m128i v[4]; } metric_t;
+typedef union { unsigned long w[2]; unsigned char c[8]; unsigned short s[4]; __m64 v[1];} decision_t;
+union branchtab27 { unsigned char c[32]; __m128i v[2];} Branchtab27_sse2[2];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder
+ * Don't change this without also changing references in sse2bfly27.s!
+ */
+struct v27 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi27_sse2(void *p,int starting_state){
+ struct v27 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<64;i++)
+ vp->metrics1.c[i] = 63;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */
+ return 0;
+}
+
+void set_viterbi27_polynomial_sse2(int polys[2]){
+ int state;
+
+ for(state=0;state < 32;state++){
+ Branchtab27_sse2[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
+ Branchtab27_sse2[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
+ }
+ Init++;
+}
+
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi27_sse2(int len){
+ void *p;
+ struct v27 *vp;
+
+ if(!Init){
+ int polys[2] = { V27POLYA, V27POLYB };
+ set_viterbi27_polynomial_sse2(polys);
+ }
+ /* Ordinary malloc() only returns 8-byte alignment, we need 16 */
+ if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v27)))
+ return NULL;
+ vp = (struct v27 *)p;
+
+ if((p = malloc((len+6)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ vp->decisions = (decision_t *)p;
+ init_viterbi27_sse2(vp,0);
+
+ return vp;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi27_sse2(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v27 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = vp->decisions;
+ /* Make room beyond the end of the encoder register so we can
+ * accumulate a full byte of decoded data
+ */
+ endstate %= 64;
+ endstate <<= 2;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 6; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi27_sse2(void *p){
+ struct v27 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+
+#if 0
+/* This code is turned off because it's slower than my hand-crafted assembler in sse2bfly27.s. But it does work. */
+void update_viterbi27_blk_sse2(void *p,unsigned char *syms,int nbits){
+ struct v27 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return;
+ d = (decision_t *)vp->dp;
+ while(nbits--){
+ __m128i sym0v,sym1v;
+ void *tmp;
+ int i;
+
+ /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
+ sym0v = _mm_set1_epi8(syms[0]);
+ sym1v = _mm_set1_epi8(syms[1]);
+ syms += 2;
+
+ for(i=0;i<2;i++){
+ __m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
+
+ /* Form branch metrics */
+ metric = _mm_avg_epu8(_mm_xor_si128(Branchtab27_sse2[0].v[i],sym0v),_mm_xor_si128(Branchtab27_sse2[1].v[i],sym1v));
+ /* There's no packed bytes right shift in SSE2, so we use the word version and mask
+ * (I'm *really* starting to like Altivec...)
+ */
+ metric = _mm_srli_epi16(metric,3);
+ metric = _mm_and_si128(metric,_mm_set1_epi8(31));
+ m_metric = _mm_sub_epi8(_mm_set1_epi8(31),metric);
+
+ /* Add branch metrics to path metrics */
+ m0 = _mm_add_epi8(vp->old_metrics->v[i],metric);
+ m3 = _mm_add_epi8(vp->old_metrics->v[2+i],metric);
+ m1 = _mm_add_epi8(vp->old_metrics->v[2+i],m_metric);
+ m2 = _mm_add_epi8(vp->old_metrics->v[i],m_metric);
+
+ /* Compare and select, using modulo arithmetic */
+ decision0 = _mm_cmpgt_epi8(_mm_sub_epi8(m0,m1),_mm_setzero_si128());
+ decision1 = _mm_cmpgt_epi8(_mm_sub_epi8(m2,m3),_mm_setzero_si128());
+ survivor0 = _mm_or_si128(_mm_and_si128(decision0,m1),_mm_andnot_si128(decision0,m0));
+ survivor1 = _mm_or_si128(_mm_and_si128(decision1,m3),_mm_andnot_si128(decision1,m2));
+
+ /* Pack each set of decisions into 16 bits */
+ d->s[2*i] = _mm_movemask_epi8(_mm_unpacklo_epi8(decision0,decision1));
+ d->s[2*i+1] = _mm_movemask_epi8(_mm_unpackhi_epi8(decision0,decision1));
+
+ /* Store surviving metrics */
+ vp->new_metrics->v[2*i] = _mm_unpacklo_epi8(survivor0,survivor1);
+ vp->new_metrics->v[2*i+1] = _mm_unpackhi_epi8(survivor0,survivor1);
+ }
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+}
+#endif
diff --git a/viterbi29.c b/viterbi29.c
new file mode 100644
index 0000000..80cbb33
--- /dev/null
+++ b/viterbi29.c
@@ -0,0 +1,152 @@
+/* Switch to K=9 r=1/2 Viterbi decoder with optional Intel or PowerPC SIMD
+ * Copyright Feb 2004, Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "fec.h"
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi29(int len){
+ find_cpu_mode();
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return create_viterbi29_port(len);
+#ifdef __VEC__
+ case ALTIVEC:
+ return create_viterbi29_av(len);
+#endif
+#ifdef __i386__
+ case MMX:
+ return create_viterbi29_mmx(len);
+ case SSE:
+ return create_viterbi29_sse(len);
+ case SSE2:
+ return create_viterbi29_sse2(len);
+#endif
+ }
+}
+
+void set_viterbi29_polynomial(int polys[2]){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ set_viterbi29_polynomial_port(polys);
+ break;
+#ifdef __VEC__
+ case ALTIVEC:
+ set_viterbi29_polynomial_av(polys);
+ break;
+#endif
+#ifdef __i386__
+ case MMX:
+ set_viterbi29_polynomial_mmx(polys);
+ break;
+ case SSE:
+ set_viterbi29_polynomial_sse(polys);
+ break;
+ case SSE2:
+ set_viterbi29_polynomial_sse2(polys);
+ break;
+#endif
+ }
+}
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi29(void *p,int starting_state){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return init_viterbi29_port(p,starting_state);
+#ifdef __VEC__
+ case ALTIVEC:
+ return init_viterbi29_av(p,starting_state);
+#endif
+#ifdef __i386__
+ case MMX:
+ return init_viterbi29_mmx(p,starting_state);
+ case SSE:
+ return init_viterbi29_sse(p,starting_state);
+ case SSE2:
+ return init_viterbi29_sse2(p,starting_state);
+#endif
+ }
+}
+
+/* Viterbi chainback */
+int chainback_viterbi29(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return chainback_viterbi29_port(p,data,nbits,endstate);
+#ifdef __VEC__
+ case ALTIVEC:
+ return chainback_viterbi29_av(p,data,nbits,endstate);
+#endif
+#ifdef __i386__
+ case MMX:
+ return chainback_viterbi29_mmx(p,data,nbits,endstate);
+ case SSE:
+ return chainback_viterbi29_sse(p,data,nbits,endstate);
+ case SSE2:
+ return chainback_viterbi29_sse2(p,data,nbits,endstate);
+#endif
+ }
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi29(void *p){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ delete_viterbi29_port(p);
+ break;
+#ifdef __VEC__
+ case ALTIVEC:
+ delete_viterbi29_av(p);
+ break;
+#endif
+#ifdef __i386__
+ case MMX:
+ delete_viterbi29_mmx(p);
+ break;
+ case SSE:
+ delete_viterbi29_sse(p);
+ break;
+ case SSE2:
+ delete_viterbi29_sse2(p);
+ break;
+#endif
+ }
+}
+
+/* Update decoder with a block of demodulated symbols
+ * Note that nbits is the number of decoded data bits, not the number
+ * of symbols!
+ */
+int update_viterbi29_blk(void *p,unsigned char syms[],int nbits){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return update_viterbi29_blk_port(p,syms,nbits);
+#ifdef __VEC__
+ case ALTIVEC:
+ return update_viterbi29_blk_av(p,syms,nbits);
+#endif
+#ifdef __i386__
+ case MMX:
+ return update_viterbi29_blk_mmx(p,syms,nbits);
+ case SSE:
+ return update_viterbi29_blk_sse(p,syms,nbits);
+ case SSE2:
+ return update_viterbi29_blk_sse2(p,syms,nbits);
+#endif
+ }
+}
diff --git a/viterbi29_av.c b/viterbi29_av.c
new file mode 100644
index 0000000..31c8d27
--- /dev/null
+++ b/viterbi29_av.c
@@ -0,0 +1,190 @@
+/* K=9 r=1/2 Viterbi decoder for PowerPC G4/G5 Altivec
+ * Copyright Feb 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <sys/sysctl.h>
+#include "fec.h"
+
+typedef union { unsigned char c[256]; vector bool char v[16]; } decision_t;
+typedef union { unsigned char c[256]; vector unsigned char v[16]; } metric_t;
+
+static union branchtab29 { unsigned char c[128]; vector unsigned char v[8]; } Branchtab29[2];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v29 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi29_av(void *p,int starting_state){
+ struct v29 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<16;i++)
+ vp->metrics1.v[i] = (vector unsigned char)(63);
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->c[starting_state & 255] = 0; /* Bias known start state */
+ return 0;
+}
+
+void set_viterbi29_polynomial_av(int polys[2]){
+ int state;
+
+ for(state=0;state < 128;state++){
+ Branchtab29[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
+ Branchtab29[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi29_av(int len){
+ struct v29 *vp;
+
+ if(!Init){
+ int polys[2] = { V29POLYA,V29POLYB };
+ set_viterbi29_polynomial_av(polys);
+ }
+ if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL)
+ return NULL;
+ if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi29_av(vp,0);
+ return vp;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi29_av(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v29 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->decisions;
+ /* Make room beyond the end of the encoder register so we can
+ * accumulate a full byte of decoded data
+ */
+ endstate %= 256;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 8; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = d[nbits].c[endstate] & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi29_av(void *p){
+ struct v29 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+
+int update_viterbi29_blk_av(void *p,unsigned char *syms,int nbits){
+ struct v29 *vp = p;
+ decision_t *d;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->dp;
+
+ while(nbits--){
+ vector unsigned char sym1v,sym2v;
+ void *tmp;
+
+ /* All this seems necessary just to load a byte into all elements of a vector! */
+ sym1v = vec_perm(vec_ld(0,syms),vec_ld(1,syms),vec_lvsl(0,syms)); /* sym1v.0 = syms[0]; sym1v.1 = syms[1] */
+ sym2v = vec_splat(sym1v,1); /* Splat syms[1] across sym2v */
+ sym1v = vec_splat(sym1v,0); /* Splat syms[0] across sym1v */
+ syms += 2;
+
+ for(i=0;i<8;i++){
+ vector bool char decision0,decision1;
+ vector unsigned char metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
+
+ /* Form branch metrics */
+ metric = vec_avg(vec_xor(Branchtab29[0].v[i],sym1v),vec_xor(Branchtab29[1].v[i],sym2v));
+ metric = vec_sr(metric,(vector unsigned char)(3));
+ m_metric = (vector unsigned char)(31) - metric;
+
+ /* Add branch metrics to path metrics */
+ m0 = vec_adds(vp->old_metrics->v[i],metric);
+ m3 = vec_adds(vp->old_metrics->v[8+i],metric);
+ m1 = vec_adds(vp->old_metrics->v[8+i],m_metric);
+ m2 = vec_adds(vp->old_metrics->v[i],m_metric);
+
+ /* Compare and select first set */
+ decision0 = vec_cmpgt(m0,m1);
+ decision1 = vec_cmpgt(m2,m3);
+ survivor0 = vec_min(m0,m1);
+ survivor1 = vec_min(m2,m3);
+
+ /* Interleave and store decisions and survivors */
+ d->v[2*i] = vec_mergeh(decision0,decision1);
+ d->v[2*i+1] = vec_mergel(decision0,decision1);
+ vp->new_metrics->v[2*i] = vec_mergeh(survivor0,survivor1);
+ vp->new_metrics->v[2*i+1] = vec_mergel(survivor0,survivor1);
+ }
+ d++;
+ /* renormalize if necessary */
+ if(vp->new_metrics->c[0] >= 50){
+ int i;
+ vector unsigned char scale0,scale1;
+
+ /* Find smallest metric and splat */
+ scale0 = vp->new_metrics->v[0];
+ scale1 = vp->new_metrics->v[1];
+ for(i=2;i<16;i+=2){
+ scale0 = vec_min(scale0,vp->new_metrics->v[i]);
+ scale1 = vec_min(scale1,vp->new_metrics->v[i+1]);
+ }
+ scale0 = vec_min(scale0,scale1);
+ scale0 = vec_min(scale0,vec_sld(scale0,scale0,8));
+ scale0 = vec_min(scale0,vec_sld(scale0,scale0,4));
+ scale0 = vec_min(scale0,vec_sld(scale0,scale0,2));
+ scale0 = vec_min(scale0,vec_sld(scale0,scale0,1));
+
+ /* Now subtract from all metrics */
+ for(i=0;i<16;i++)
+ vp->new_metrics->v[i] = vec_subs(vp->new_metrics->v[i],scale0);
+ }
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ return 0;
+}
diff --git a/viterbi29_mmx.c b/viterbi29_mmx.c
new file mode 100644
index 0000000..563f40a
--- /dev/null
+++ b/viterbi29_mmx.c
@@ -0,0 +1,118 @@
+/* K=9 r=1/2 Viterbi decoder for MMX
+ * Copyright Feb 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <mmintrin.h>
+#include "fec.h"
+
+typedef union { char c[256]; __m64 v[32];} decision_t;
+typedef union { unsigned char c[256]; __m64 v[32];} metric_t;
+
+unsigned char Mettab29_1[256][128] __attribute__ ((aligned(8)));
+unsigned char Mettab29_2[256][128] __attribute__ ((aligned(8)));
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder
+ * Don't change this without also changing references in mmxbfly29.s!
+ */
+struct v29 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi29_mmx(int len){
+ struct v29 *vp;
+
+ if(Init == 0){
+ int polys[2] = {V29POLYA,V29POLYB};
+
+ set_viterbi29_polynomial_mmx(polys);
+ }
+ if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL)
+ return NULL;
+
+ if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi29(vp,0);
+ return vp;
+}
+
+void set_viterbi29_polynomial_mmx(int polys[2]){
+ int state;
+
+ for(state=0;state < 128;state++){
+ int symbol;
+
+ for(symbol = 0;symbol < 256;symbol++){
+ int sym;
+
+ sym = parity((2*state) & abs(polys[0])) ^ (polys[0] < 0);
+ Mettab29_1[symbol][state] = (sym ? (255-symbol):symbol) / 16;
+
+ sym = parity((2*state) & abs(polys[1])) ^ (polys[1] < 0);
+ Mettab29_2[symbol][state] = (sym ? (255-symbol):symbol) / 16;
+ }
+ }
+ Init++;
+}
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi29_mmx(void *p,int starting_state){
+ struct v29 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<256;i++)
+ vp->metrics1.c[i] = 63;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->c[starting_state & 255] = 0; /* Bias known start state */
+ return 0;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi29_mmx(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+
+ struct v29 *vp = (struct v29 *)p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+
+ d = (decision_t *)vp->decisions;
+ endstate &= 255;
+ d += 8; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = d[nbits].c[endstate] & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi29_mmx(void *p){
+ struct v29 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
diff --git a/viterbi29_port.c b/viterbi29_port.c
new file mode 100644
index 0000000..292dce8
--- /dev/null
+++ b/viterbi29_port.c
@@ -0,0 +1,166 @@
+/* K=9 r=1/2 Viterbi decoder in portable C
+ * Copyright Feb 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "fec.h"
+
+typedef union { unsigned int w[256]; } metric_t;
+typedef union { unsigned long w[8];} decision_t;
+
+static union { unsigned char c[128]; } Branchtab29[2];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v29 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi29_port(void *p,int starting_state){
+ struct v29 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<256;i++)
+ vp->metrics1.w[i] = 63;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */
+ return 0;
+}
+
+void set_viterbi29_polynomial_port(int polys[2]){
+ int state;
+
+ for(state=0;state < 128;state++){
+ Branchtab29[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
+ Branchtab29[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
+ }
+ Init++;
+}
+
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi29_port(int len){
+ struct v29 *vp;
+
+ if(!Init){
+ int polys[2] = {V29POLYA,V29POLYB};
+ set_viterbi29_polynomial_port(polys);
+ }
+ if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL)
+ return NULL;
+
+ if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi29_port(vp,0);
+
+ return vp;
+}
+
+
+/* Viterbi chainback */
+int chainback_viterbi29_port(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v29 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+
+ d = vp->decisions;
+ /* Make room beyond the end of the encoder register so we can
+ * accumulate a full byte of decoded data
+ */
+ endstate %= 256;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 8; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].w[(endstate)/32] >> (endstate%32)) & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi29_port(void *p){
+ struct v29 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+/* C-language butterfly */
+#define BFLY(i) {\
+unsigned int metric,m0,m1,decision;\
+ metric = (Branchtab29[0].c[i] ^ sym0) + (Branchtab29[1].c[i] ^ sym1);\
+ m0 = vp->old_metrics->w[i] + metric;\
+ m1 = vp->old_metrics->w[i+128] + (510 - metric);\
+ decision = (signed int)(m0-m1) > 0;\
+ vp->new_metrics->w[2*i] = decision ? m1 : m0;\
+ d->w[i/16] |= decision << ((2*i)&31);\
+ m0 -= (metric+metric-510);\
+ m1 += (metric+metric-510);\
+ decision = (signed int)(m0-m1) > 0;\
+ vp->new_metrics->w[2*i+1] = decision ? m1 : m0;\
+ d->w[i/16] |= decision << ((2*i+1)&31);\
+}
+
+/* Update decoder with a block of demodulated symbols
+ * Note that nbits is the number of decoded data bits, not the number
+ * of symbols!
+ */
+
+int update_viterbi29_blk_port(void *p,unsigned char *syms,int nbits){
+ struct v29 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+
+ d = (decision_t *)vp->dp;
+ while(nbits--){
+ void *tmp;
+ unsigned char sym0,sym1;
+ int i;
+
+ for(i=0;i<8;i++)
+ d->w[i] = 0;
+ sym0 = *syms++;
+ sym1 = *syms++;
+
+ for(i=0;i<128;i++)
+ BFLY(i);
+
+ d++;
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ return 0;
+}
diff --git a/viterbi29_sse.c b/viterbi29_sse.c
new file mode 100644
index 0000000..4a92e5f
--- /dev/null
+++ b/viterbi29_sse.c
@@ -0,0 +1,114 @@
+/* K=9 r=1/2 Viterbi decoder for SSE
+ * Copyright Feb 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <xmmintrin.h>
+#include "fec.h"
+
+typedef union { unsigned char w[256]; __m64 v[32];} metric_t;
+typedef union { unsigned long w[8]; unsigned char c[32]; __m64 v[4];} decision_t;
+
+union branchtab29 { unsigned char c[128]; } Branchtab29_sse[2];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder
+ * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s!
+ */
+struct v29 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi29_sse(int len){
+ struct v29 *vp;
+
+ if(!Init){
+ int polys[2] = { V29POLYA,V29POLYB };
+
+ set_viterbi29_polynomial_sse(polys);
+ }
+ if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL)
+ return NULL;
+ if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi29(vp,0);
+ return vp;
+}
+
+void set_viterbi29_polynomial_sse(int polys[2]){
+ int state;
+
+ for(state=0;state < 128;state++){
+ Branchtab29_sse[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
+ Branchtab29_sse[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi29_sse(void *p,int starting_state){
+ struct v29 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<256;i++)
+ vp->metrics1.w[i] = 200;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */
+ return 0;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi29_sse(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v29 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = vp->decisions;
+ /* Make room beyond the end of the encoder register so we can
+ * accumulate a full byte of decoded data
+ */
+ endstate %= 256;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 8; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi29_sse(void *p){
+ struct v29 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
diff --git a/viterbi29_sse2.c b/viterbi29_sse2.c
new file mode 100644
index 0000000..4c7336c
--- /dev/null
+++ b/viterbi29_sse2.c
@@ -0,0 +1,119 @@
+/* K=9 r=1/2 Viterbi decoder for SSE2
+ * Copyright Feb 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <emmintrin.h>
+#include "fec.h"
+
+typedef union { unsigned char c[256]; __m128i v[16];} metric_t;
+typedef union { unsigned long w[8]; unsigned char c[32];} decision_t;
+
+union branchtab29 { unsigned char c[128]; } Branchtab29_sse2[2];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder
+ * Don't change this without also changing references in sse2bfly29.s!
+ */
+struct v29 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi29_sse2(void *p,int starting_state){
+ struct v29 *vp = p;
+ int i;
+
+ for(i=0;i<256;i++)
+ vp->metrics1.c[i] = 63;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->c[starting_state & 255] = 0; /* Bias known start state */
+ return 0;
+}
+
+void set_viterbi29_polynomial_sse2(int polys[2]){
+ int state;
+
+ for(state=0;state < 128;state++){
+ Branchtab29_sse2[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
+ Branchtab29_sse2[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
+ }
+ Init++;
+}
+
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi29_sse2(int len){
+ void *p;
+ struct v29 *vp;
+
+ if(!Init){
+ int polys[2] = {V29POLYA,V29POLYB};
+
+ set_viterbi29_polynomial(polys);
+ }
+ /* Ordinary malloc() only returns 8-byte alignment, we need 16 */
+ if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v29)))
+ return NULL;
+ vp = (struct v29 *)p;
+ if((p = malloc((len+8)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ vp->decisions = (decision_t *)p;
+ init_viterbi29_sse2(vp,0);
+ return vp;
+}
+
+
+/* Viterbi chainback */
+int chainback_viterbi29_sse2(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v29 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = vp->decisions;
+
+ /* Make room beyond the end of the encoder register so we can
+ * accumulate a full byte of decoded data
+ */
+ endstate %= 256;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 8; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi29_sse2(void *p){
+ struct v29 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
diff --git a/viterbi39.c b/viterbi39.c
new file mode 100644
index 0000000..ac28c2c
--- /dev/null
+++ b/viterbi39.c
@@ -0,0 +1,153 @@
+/* Switch to K=9 r=1/3 Viterbi decoder with optional Intel or PowerPC SIMD
+ * Copyright Aug 2006, Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "fec.h"
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi39(int len){
+ find_cpu_mode();
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return create_viterbi39_port(len);
+#ifdef __VEC__
+ case ALTIVEC:
+ return create_viterbi39_av(len);
+#endif
+#ifdef __i386__
+ case MMX:
+ return create_viterbi39_mmx(len);
+ case SSE:
+ return create_viterbi39_sse(len);
+ case SSE2:
+ return create_viterbi39_sse2(len);
+#endif
+ }
+}
+
+void set_viterbi39_polynomial(int polys[3]){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ set_viterbi39_polynomial_port(polys);
+ break;
+#ifdef __VEC__
+ case ALTIVEC:
+ set_viterbi39_polynomial_av(polys);
+ break;
+#endif
+#ifdef __i386__
+ case MMX:
+ set_viterbi39_polynomial_mmx(polys);
+ break;
+ case SSE:
+ set_viterbi39_polynomial_sse(polys);
+ break;
+ case SSE2:
+ set_viterbi39_polynomial_sse2(polys);
+ break;
+#endif
+ }
+}
+
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi39(void *p,int starting_state){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return init_viterbi39_port(p,starting_state);
+#ifdef __VEC__
+ case ALTIVEC:
+ return init_viterbi39_av(p,starting_state);
+#endif
+#ifdef __i386__
+ case MMX:
+ return init_viterbi39_mmx(p,starting_state);
+ case SSE:
+ return init_viterbi39_sse(p,starting_state);
+ case SSE2:
+ return init_viterbi39_sse2(p,starting_state);
+#endif
+ }
+}
+
+/* Viterbi chainback */
+int chainback_viterbi39(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return chainback_viterbi39_port(p,data,nbits,endstate);
+#ifdef __VEC__
+ case ALTIVEC:
+ return chainback_viterbi39_av(p,data,nbits,endstate);
+#endif
+#ifdef __i386__
+ case MMX:
+ return chainback_viterbi39_mmx(p,data,nbits,endstate);
+ case SSE:
+ return chainback_viterbi39_sse(p,data,nbits,endstate);
+ case SSE2:
+ return chainback_viterbi39_sse2(p,data,nbits,endstate);
+#endif
+ }
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi39(void *p){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ delete_viterbi39_port(p);
+ break;
+#ifdef __VEC__
+ case ALTIVEC:
+ delete_viterbi39_av(p);
+ break;
+#endif
+#ifdef __i386__
+ case MMX:
+ delete_viterbi39_mmx(p);
+ break;
+ case SSE:
+ delete_viterbi39_sse(p);
+ break;
+ case SSE2:
+ delete_viterbi39_sse2(p);
+ break;
+#endif
+ }
+}
+
+/* Update decoder with a block of demodulated symbols
+ * Note that nbits is the number of decoded data bits, not the number
+ * of symbols!
+ */
+int update_viterbi39_blk(void *p,unsigned char syms[],int nbits){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return update_viterbi39_blk_port(p,syms,nbits);
+#ifdef __VEC__
+ case ALTIVEC:
+ return update_viterbi39_blk_av(p,syms,nbits);
+#endif
+#ifdef __i386__
+ case MMX:
+ return update_viterbi39_blk_mmx(p,syms,nbits);
+ case SSE:
+ return update_viterbi39_blk_sse(p,syms,nbits);
+ case SSE2:
+ return update_viterbi39_blk_sse2(p,syms,nbits);
+#endif
+ }
+}
diff --git a/viterbi39_av.c b/viterbi39_av.c
new file mode 100644
index 0000000..2deed51
--- /dev/null
+++ b/viterbi39_av.c
@@ -0,0 +1,251 @@
+/* K=9 r=1/3 Viterbi decoder for PowerPC G4/G5 Altivec vector instructions
+ * 8-bit offset-binary soft decision samples
+ * Copyright Aug 2006, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <limits.h>
+#include "fec.h"
+
+typedef union { unsigned char c[2][16]; vector unsigned char v[2]; } decision_t;
+typedef union { unsigned short s[256]; vector unsigned short v[32]; } metric_t;
+
+static union branchtab39 { unsigned short s[128]; vector unsigned short v[16];} Branchtab39[3];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v39 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ void *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ void *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi39_av(void *p,int starting_state){
+ struct v39 *vp = p;
+ int i;
+
+ for(i=0;i<32;i++)
+ vp->metrics1.v[i] = (vector unsigned short)(1000);
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->s[starting_state & 255] = 0; /* Bias known start state */
+ return 0;
+}
+
+void set_viterbi39_polynomial_av(int polys[3]){
+ int state;
+
+ for(state=0;state < 128;state++){
+ Branchtab39[0].s[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
+ Branchtab39[1].s[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
+ Branchtab39[2].s[state] = (polys[2] < 0) ^ parity((2*state) & abs(polys[2])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi39_av(int len){
+ struct v39 *vp;
+
+ if(!Init){
+ int polys[3] = { V39POLYA, V39POLYB, V39POLYC };
+
+ set_viterbi39_polynomial_av(polys);
+ }
+ vp = (struct v39 *)malloc(sizeof(struct v39));
+ vp->decisions = malloc(sizeof(decision_t)*(len+8));
+ init_viterbi39_av(vp,0);
+ return vp;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi39_av(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v39 *vp = p;
+ decision_t *d = (decision_t *)vp->decisions;
+ int path_metric;
+
+ /* Make room beyond the end of the encoder register so we can
+ * accumulate a full byte of decoded data
+ */
+ endstate %= 256;
+
+ path_metric = vp->old_metrics->s[endstate];
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 8; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].c[endstate >> 7][endstate & 15] & (0x80 >> ((endstate>>4)&7)) ) ? 1 : 0;
+ endstate = (k << 7) | (endstate >> 1);
+ data[nbits>>3] = endstate;
+ }
+ return path_metric;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi39_av(void *p){
+ struct v39 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+int update_viterbi39_blk_av(void *p,unsigned char *syms,int nbits){
+ struct v39 *vp = p;
+ decision_t *d = (decision_t *)vp->dp;
+ int path_metric = 0;
+ vector unsigned char decisions = (vector unsigned char)(0);
+
+ while(nbits--){
+ vector unsigned short symv,sym0v,sym1v,sym2v;
+ vector unsigned char s;
+ void *tmp;
+ int i;
+
+ /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
+ s = (vector unsigned char)vec_perm(vec_ld(0,syms),vec_ld(5,syms),vec_lvsl(0,syms));
+
+ symv = (vector unsigned short)vec_mergeh((vector unsigned char)(0),s); /* Unsigned byte->word unpack */
+ sym0v = vec_splat(symv,0);
+ sym1v = vec_splat(symv,1);
+ sym2v = vec_splat(symv,2);
+ syms += 3;
+
+ for(i=0;i<16;i++){
+ vector bool short decision0,decision1;
+ vector unsigned short metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
+
+ /* Form branch metrics
+ * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255,
+ * the XOR operations constitute conditional negation.
+ * the metrics are in the range 0-765
+ */
+ m0 = vec_add(vec_xor(Branchtab39[0].v[i],sym0v),vec_xor(Branchtab39[1].v[i],sym1v));
+ m1 = vec_xor(Branchtab39[2].v[i],sym2v);
+ metric = vec_add(m0,m1);
+ m_metric = vec_sub((vector unsigned short)(765),metric);
+
+ /* Add branch metrics to path metrics */
+ m0 = vec_adds(vp->old_metrics->v[i],metric);
+ m3 = vec_adds(vp->old_metrics->v[16+i],metric);
+ m1 = vec_adds(vp->old_metrics->v[16+i],m_metric);
+ m2 = vec_adds(vp->old_metrics->v[i],m_metric);
+
+ /* Compare and select */
+ decision0 = vec_cmpgt(m0,m1);
+ decision1 = vec_cmpgt(m2,m3);
+ survivor0 = vec_min(m0,m1);
+ survivor1 = vec_min(m2,m3);
+
+ /* Store decisions and survivors.
+ * To save space without SSE2's handy PMOVMSKB instruction, we pack and store them in
+ * a funny interleaved fashion that we undo in the chainback function.
+ */
+ decisions = vec_add(decisions,decisions); /* Shift each byte 1 bit to the left */
+
+ /* Booleans are either 0xff or 0x00. Subtracting 0x00 leaves the lsb zero; subtracting
+ * 0xff is equivalent to adding 1, which sets the lsb.
+ */
+ decisions = vec_sub(decisions,(vector unsigned char)vec_pack(vec_mergeh(decision0,decision1),vec_mergel(decision0,decision1)));
+
+ vp->new_metrics->v[2*i] = vec_mergeh(survivor0,survivor1);
+ vp->new_metrics->v[2*i+1] = vec_mergel(survivor0,survivor1);
+
+ if((i % 8) == 7){
+ /* We've accumulated a total of 128 decisions, stash and start again */
+ d->v[i>>3] = decisions; /* No need to clear, the new bits will replace the old */
+ }
+ }
+#if 0
+ /* Experimentally determine metric spread
+ * The results are fixed for a given code and input symbol size
+ */
+ {
+ int i;
+ vector unsigned short min_metric;
+ vector unsigned short max_metric;
+ union { vector unsigned short v; unsigned short s[8];} t;
+ int minimum,maximum;
+ static int max_spread = 0;
+
+ min_metric = max_metric = vp->new_metrics->v[0];
+ for(i=1;i<32;i++){
+ min_metric = vec_min(min_metric,vp->new_metrics->v[i]);
+ max_metric = vec_max(max_metric,vp->new_metrics->v[i]);
+ }
+ min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,8));
+ max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,8));
+ min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,4));
+ max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,4));
+ min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,2));
+ max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,2));
+
+ t.v = min_metric;
+ minimum = t.s[0];
+ t.v = max_metric;
+ maximum = t.s[0];
+ if(maximum-minimum > max_spread){
+ max_spread = maximum-minimum;
+ printf("metric spread = %d\n",max_spread);
+ }
+ }
+#endif
+
+ /* Renormalize if necessary. This deserves some explanation.
+ * The maximum possible spread, found by experiment, for 8 bit symbols is about 3825
+ * So by looking at one arbitrary metric we can tell if any of them have possibly saturated.
+ * However, this is very conservative. Large spreads occur only at very high Eb/No, where
+ * saturating a bad path metric doesn't do much to increase its chances of being erroneously chosen as a survivor.
+
+ * At more interesting (low) Eb/No ratios, the spreads are much smaller so our chances of saturating a metric
+ * by not not normalizing when we should are extremely low. So either way, the risk to performance is small.
+
+ * All this is borne out by experiment.
+ */
+ if(vp->new_metrics->s[0] >= USHRT_MAX-5000){
+ vector unsigned short scale;
+ union { vector unsigned short v; unsigned short s[8];} t;
+
+ /* Find smallest metric and splat */
+ scale = vp->new_metrics->v[0];
+ for(i=1;i<32;i++)
+ scale = vec_min(scale,vp->new_metrics->v[i]);
+
+ scale = vec_min(scale,vec_sld(scale,scale,8));
+ scale = vec_min(scale,vec_sld(scale,scale,4));
+ scale = vec_min(scale,vec_sld(scale,scale,2));
+
+ /* Subtract it from all metrics
+ * Work backwards to try to improve the cache hit ratio, assuming LRU
+ */
+ for(i=31;i>=0;i--)
+ vp->new_metrics->v[i] = vec_subs(vp->new_metrics->v[i],scale);
+ t.v = scale;
+ path_metric += t.s[0];
+ }
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ return path_metric;
+}
diff --git a/viterbi39_mmx.c b/viterbi39_mmx.c
new file mode 100644
index 0000000..875391a
--- /dev/null
+++ b/viterbi39_mmx.c
@@ -0,0 +1,185 @@
+/* K=9 r=1/3 Viterbi decoder for x86 MMX
+ * Aug 2006, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <mmintrin.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "fec.h"
+
+typedef union { unsigned char c[256]; __m64 v[32];} decision_t;
+typedef union { unsigned short s[256]; __m64 v[64];} metric_t;
+
+static union branchtab39 { unsigned short s[128]; __m64 v[32];} Branchtab39[3];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v39 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ void *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ void *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi39_mmx(void *p,int starting_state){
+ struct v39 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<256;i++)
+ vp->metrics1.s[i] = 1000;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->s[starting_state & 255] = 0; /* Bias known start state */
+ return 0;
+}
+
+void set_viterbi39_polynomial_mmx(int polys[3]){
+ int state;
+
+ for(state=0;state < 128;state++){
+ Branchtab39[0].s[state] = (polys[0] < 0) ^ parity((2*state) & polys[0]) ? 255:0;
+ Branchtab39[1].s[state] = (polys[1] < 0) ^ parity((2*state) & polys[1]) ? 255:0;
+ Branchtab39[2].s[state] = (polys[2] < 0) ^ parity((2*state) & polys[2]) ? 255:0;
+ }
+ Init++;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi39_mmx(int len){
+ struct v39 *vp;
+
+ if(!Init){
+ int polys[3] = { V39POLYA,V39POLYB,V39POLYC };
+ set_viterbi39_polynomial_mmx(polys);
+ }
+ if((vp = (struct v39 *)malloc(sizeof(struct v39))) == NULL)
+ return NULL;
+ if((vp->decisions = malloc((len+8)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi39_mmx(vp,0);
+ return vp;
+}
+
+
+
+/* Viterbi chainback */
+int chainback_viterbi39_mmx(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v39 *vp = p;
+ decision_t *d;
+ int path_metric;
+
+ if(p == NULL)
+ return -1;
+
+ d = (decision_t *)vp->decisions;
+
+ endstate %= 256;
+
+ path_metric = vp->old_metrics->s[endstate];
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 8; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = d[nbits].c[endstate] & 1;
+ endstate = (k << 7) | (endstate >> 1);
+ data[nbits>>3] = endstate;
+ }
+ return path_metric;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi39_mmx(void *p){
+ struct v39 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+
+int update_viterbi39_blk_mmx(void *p,unsigned char *syms,int nbits){
+ struct v39 *vp = p;
+ decision_t *d;
+ int path_metric = 0;
+
+ if(p == NULL)
+ return -1;
+
+ d = (decision_t *)vp->dp;
+
+ while(nbits--){
+ __m64 sym0v,sym1v,sym2v;
+ void *tmp;
+ int i;
+
+ /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
+ sym0v = _mm_set1_pi16(syms[0]);
+ sym1v = _mm_set1_pi16(syms[1]);
+ sym2v = _mm_set1_pi16(syms[2]);
+ syms += 3;
+
+ for(i=0;i<32;i++){
+ __m64 decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
+
+ /* Form branch metrics
+ * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255,
+ * the XOR operations constitute conditional negation.
+ * metric and m_metric (-metric) are in the range 0-1530
+ */
+ m0 = _mm_add_pi16(_mm_xor_si64(Branchtab39[0].v[i],sym0v),_mm_xor_si64(Branchtab39[1].v[i],sym1v));
+ metric = _mm_add_pi16(_mm_xor_si64(Branchtab39[2].v[i],sym2v),m0);
+ m_metric = _mm_sub_pi16(_mm_set1_pi16(765),metric);
+
+ /* Add branch metrics to path metrics */
+ m0 = _mm_add_pi16(vp->old_metrics->v[i],metric);
+ m3 = _mm_add_pi16(vp->old_metrics->v[32+i],metric);
+ m1 = _mm_add_pi16(vp->old_metrics->v[32+i],m_metric);
+ m2 = _mm_add_pi16(vp->old_metrics->v[i],m_metric);
+
+ /* Compare and select
+ * There's no packed min instruction in MMX, so we use modulo arithmetic
+ * to form the decisions and then do the select the hard way
+ */
+ decision0 = _mm_cmpgt_pi16(_mm_sub_pi16(m0,m1),_mm_setzero_si64());
+ decision1 = _mm_cmpgt_pi16(_mm_sub_pi16(m2,m3),_mm_setzero_si64());
+ survivor0 = _mm_or_si64(_mm_and_si64(decision0,m1),_mm_andnot_si64(decision0,m0));
+ survivor1 = _mm_or_si64(_mm_and_si64(decision1,m3),_mm_andnot_si64(decision1,m2));
+
+ /* Merge decisions and store as bytes */
+ d->v[i] = _mm_unpacklo_pi8(_mm_packs_pi16(decision0,_mm_setzero_si64()),_mm_packs_pi16(decision1,_mm_setzero_si64()));
+
+ /* Store surviving metrics */
+ vp->new_metrics->v[2*i] = _mm_unpacklo_pi16(survivor0,survivor1);
+ vp->new_metrics->v[2*i+1] = _mm_unpackhi_pi16(survivor0,survivor1);
+ }
+ if(vp->new_metrics->s[0] < vp->old_metrics->s[0])
+ path_metric += 65536; /* Hack: wraparound probably occured */
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ _mm_empty();
+ return path_metric;
+}
diff --git a/viterbi39_port.c b/viterbi39_port.c
new file mode 100644
index 0000000..5685c90
--- /dev/null
+++ b/viterbi39_port.c
@@ -0,0 +1,168 @@
+/* K=9 r=1/3 Viterbi decoder in portable C
+ * Copyright Aug 2006, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "fec.h"
+
+typedef union { unsigned int w[256]; } metric_t;
+typedef union { unsigned long w[8];} decision_t;
+
+static union { unsigned char c[128]; } Branchtab39[3];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v39 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi39_port(void *p,int starting_state){
+ struct v39 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<256;i++)
+ vp->metrics1.w[i] = 63;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */
+ return 0;
+}
+
+void set_viterbi39_polynomial_port(int polys[3]){
+ int state;
+
+ for(state=0;state < 128;state++){
+ Branchtab39[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
+ Branchtab39[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
+ Branchtab39[2].c[state] = (polys[2] < 0) ^ parity((2*state) & abs(polys[2])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi39_port(int len){
+ struct v39 *vp;
+
+ if(!Init){
+ int polys[3] = {V39POLYA,V39POLYB,V39POLYC};
+ set_viterbi39_polynomial_port(polys);
+ }
+ if((vp = (struct v39 *)malloc(sizeof(struct v39))) == NULL)
+ return NULL;
+
+ if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi39_port(vp,0);
+
+ return vp;
+}
+
+
+/* Viterbi chainback */
+int chainback_viterbi39_port(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v39 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+
+ d = vp->decisions;
+ /* Make room beyond the end of the encoder register so we can
+ * accumulate a full byte of decoded data
+ */
+ endstate %= 256;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 8; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].w[(endstate)/32] >> (endstate%32)) & 1;
+ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
+ }
+ return 0;
+}
+
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi39_port(void *p){
+ struct v39 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+/* C-language butterfly */
+#define BFLY(i) {\
+unsigned int metric,m0,m1,decision;\
+ metric = (Branchtab39[0].c[i] ^ sym0) + (Branchtab39[1].c[i] ^ sym1) + \
+ (Branchtab39[2].c[i] ^ sym2);\
+ m0 = vp->old_metrics->w[i] + metric;\
+ m1 = vp->old_metrics->w[i+128] + (765 - metric);\
+ decision = (signed int)(m0-m1) > 0;\
+ vp->new_metrics->w[2*i] = decision ? m1 : m0;\
+ d->w[i/16] |= decision << ((2*i)&31);\
+ m0 -= (metric+metric-765);\
+ m1 += (metric+metric-765);\
+ decision = (signed int)(m0-m1) > 0;\
+ vp->new_metrics->w[2*i+1] = decision ? m1 : m0;\
+ d->w[i/16] |= decision << ((2*i+1)&31);\
+}
+
+/* Update decoder with a block of demodulated symbols
+ * Note that nbits is the number of decoded data bits, not the number
+ * of symbols!
+ */
+
+int update_viterbi39_blk_port(void *p,unsigned char *syms,int nbits){
+ struct v39 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+
+ d = (decision_t *)vp->dp;
+ while(nbits--){
+ void *tmp;
+ unsigned char sym0,sym1,sym2;
+ int i;
+
+ for(i=0;i<8;i++)
+ d->w[i] = 0;
+ sym0 = *syms++;
+ sym1 = *syms++;
+ sym2 = *syms++;
+
+ for(i=0;i<128;i++)
+ BFLY(i);
+
+ d++;
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ return 0;
+}
diff --git a/viterbi39_sse.c b/viterbi39_sse.c
new file mode 100644
index 0000000..c2f2865
--- /dev/null
+++ b/viterbi39_sse.c
@@ -0,0 +1,201 @@
+/* K=9 r=1/3 Viterbi decoder for x86 SSE
+ * Copyright Aug 2006, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <xmmintrin.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <limits.h>
+#include "fec.h"
+
+typedef union { unsigned long w[8]; unsigned char c[32];} decision_t;
+typedef union { signed short s[256]; __m64 v[64];} metric_t;
+
+static union branchtab39 { unsigned short s[128]; __m64 v[32];} Branchtab39[3];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v39 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ void *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ void *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi39_sse(void *p,int starting_state){
+ struct v39 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<256;i++)
+ vp->metrics1.s[i] = (SHRT_MIN+1000);
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->s[starting_state & 255] = SHRT_MIN; /* Bias known start state */
+ return 0;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi39_sse(int len){
+ struct v39 *vp;
+
+ if(!Init){
+ int polys[3] = { V39POLYA, V39POLYB, V39POLYC };
+
+ set_viterbi39_polynomial_sse(polys);
+ }
+ if((vp = (struct v39 *)malloc(sizeof(struct v39))) == NULL){
+ return NULL;
+ }
+ if((vp->decisions = malloc((len+8)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi39_sse(vp,0);
+ return vp;
+}
+
+void set_viterbi39_polynomial_sse(int polys[3]){
+ int state;
+
+ for(state=0;state < 128;state++){
+ Branchtab39[0].s[state] = (polys[0] < 0) ^ parity((2*state) & polys[0]) ? 255:0;
+ Branchtab39[1].s[state] = (polys[1] < 0) ^ parity((2*state) & polys[1]) ? 255:0;
+ Branchtab39[2].s[state] = (polys[2] < 0) ^ parity((2*state) & polys[2]) ? 255:0;
+ }
+ Init++;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi39_sse(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v39 *vp = p;
+ decision_t *d;
+ int path_metric;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->decisions;
+ endstate %= 256;
+
+ path_metric = vp->old_metrics->s[endstate];
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 8; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ /* k = (d[nbits].w[endstate/32] >> (endstate%32)) & 1;*/
+ k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1;
+ endstate = (k << 7) | (endstate >> 1);
+ data[nbits>>3] = endstate;
+ }
+ return path_metric - SHRT_MIN;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi39_sse(void *p){
+ struct v39 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+
+int update_viterbi39_blk_sse(void *p,unsigned char *syms,int nbits){
+ struct v39 *vp = p;
+ decision_t *d;
+ int path_metric = 0;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->dp;
+ while(nbits--){
+ __m64 sym0v,sym1v,sym2v;
+ void *tmp;
+ int i;
+
+ /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
+ sym0v = _mm_set1_pi16(syms[0]);
+ sym1v = _mm_set1_pi16(syms[1]);
+ sym2v = _mm_set1_pi16(syms[2]);
+ syms += 3;
+
+ for(i=0;i<32;i++){
+ __m64 decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
+
+ /* Form branch metrics
+ * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255,
+ * the XOR operations constitute conditional negation.
+ * metric and m_metric (-metric) are in the range 0-765
+ */
+ m0 = _mm_add_pi16(_mm_xor_si64(Branchtab39[0].v[i],sym0v),_mm_xor_si64(Branchtab39[1].v[i],sym1v));
+ metric = _mm_add_pi16(_mm_xor_si64(Branchtab39[2].v[i],sym2v),m0);
+ m_metric = _mm_sub_pi16(_mm_set1_pi16(765),metric);
+
+ /* Add branch metrics to path metrics */
+ m0 = _mm_adds_pi16(vp->old_metrics->v[i],metric);
+ m3 = _mm_adds_pi16(vp->old_metrics->v[32+i],metric);
+ m1 = _mm_adds_pi16(vp->old_metrics->v[32+i],m_metric);
+ m2 = _mm_adds_pi16(vp->old_metrics->v[i],m_metric);
+
+ /* Compare and select */
+ survivor0 = _mm_min_pi16(m0,m1);
+ survivor1 = _mm_min_pi16(m2,m3);
+ decision0 = _mm_cmpeq_pi16(survivor0,m1);
+ decision1 = _mm_cmpeq_pi16(survivor1,m3);
+
+ /* Pack decisions into 8 bits and store */
+ d->c[i] = _mm_movemask_pi8(_mm_unpacklo_pi8(_mm_packs_pi16(decision0,_mm_setzero_si64()),_mm_packs_pi16(decision1,_mm_setzero_si64())));
+
+ /* Store surviving metrics */
+ vp->new_metrics->v[2*i] = _mm_unpacklo_pi16(survivor0,survivor1);
+ vp->new_metrics->v[2*i+1] = _mm_unpackhi_pi16(survivor0,survivor1);
+ }
+ /* See if we need to renormalize
+ * Max metric spread for this code with 0-255 branch metrics is 12750
+ */
+ if(vp->new_metrics->s[0] >= SHRT_MAX-5000){
+ int i,adjust;
+ __m64 adjustv;
+ union { __m64 v; signed short w[4]; } t;
+
+ /* Find smallest metric and set adjustv to bring it down to SHRT_MIN */
+ adjustv = vp->new_metrics->v[0];
+ for(i=1;i<64;i++)
+ adjustv = _mm_min_pi16(adjustv,vp->new_metrics->v[i]);
+
+ adjustv = _mm_min_pi16(adjustv,_mm_srli_si64(adjustv,32));
+ adjustv = _mm_min_pi16(adjustv,_mm_srli_si64(adjustv,16));
+ t.v = adjustv;
+ adjust = t.w[0] - SHRT_MIN;
+ path_metric += adjust;
+ adjustv = _mm_set1_pi16(adjust);
+
+ for(i=0;i<64;i++)
+ vp->new_metrics->v[i] = _mm_sub_pi16(vp->new_metrics->v[i],adjustv);
+ }
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ _mm_empty();
+ return path_metric;
+}
diff --git a/viterbi39_sse2.c b/viterbi39_sse2.c
new file mode 100644
index 0000000..f13794e
--- /dev/null
+++ b/viterbi39_sse2.c
@@ -0,0 +1,200 @@
+/* K=15 r=1/6 Viterbi decoder for x86 SSE2
+ * Copyright Mar 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <emmintrin.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <limits.h>
+#include "fec.h"
+
+typedef union { unsigned long w[8]; unsigned short s[16];} decision_t;
+typedef union { signed short s[256]; __m128i v[32];} metric_t;
+
+static union branchtab39 { unsigned short s[128]; __m128i v[16];} Branchtab39[3];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v39 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ void *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ void *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi39_sse2(void *p,int starting_state){
+ struct v39 *vp = p;
+ int i;
+
+ for(i=0;i<256;i++)
+ vp->metrics1.s[i] = (SHRT_MIN+1000);
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->s[starting_state & 255] = SHRT_MIN; /* Bias known start state */
+ return 0;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi39_sse2(int len){
+ void *p;
+ struct v39 *vp;
+
+ if(!Init){
+ int polys[3] = { V39POLYA, V39POLYB, V39POLYC };
+
+ set_viterbi39_polynomial_sse2(polys);
+ }
+ /* Ordinary malloc() only returns 8-byte alignment, we need 16 */
+ if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v39)))
+ return NULL;
+
+ vp = (struct v39 *)p;
+ if((p = malloc((len+8)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ vp->decisions = (decision_t *)p;
+ init_viterbi39_sse2(vp,0);
+ return vp;
+}
+
+void set_viterbi39_polynomial_sse2(int polys[3]){
+ int state;
+
+ for(state=0;state < 128;state++){
+ Branchtab39[0].s[state] = (polys[0] < 0) ^ parity((2*state) & polys[0]) ? 255:0;
+ Branchtab39[1].s[state] = (polys[1] < 0) ^ parity((2*state) & polys[1]) ? 255:0;
+ Branchtab39[2].s[state] = (polys[2] < 0) ^ parity((2*state) & polys[2]) ? 255:0;
+ }
+ Init++;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi39_sse2(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v39 *vp = p;
+ decision_t *d = (decision_t *)vp->decisions;
+ int path_metric;
+
+ endstate %= 256;
+
+ path_metric = vp->old_metrics->s[endstate];
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 8; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].w[endstate/32] >> (endstate%32)) & 1;
+ endstate = (k << 7) | (endstate >> 1);
+ data[nbits>>3] = endstate;
+ }
+ return path_metric;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi39_sse2(void *p){
+ struct v39 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+
+int update_viterbi39_blk_sse2(void *p,unsigned char *syms,int nbits){
+ struct v39 *vp = p;
+ decision_t *d = (decision_t *)vp->dp;
+ int path_metric = 0;
+
+ while(nbits--){
+ __m128i sym0v,sym1v,sym2v;
+ void *tmp;
+ int i;
+
+ /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
+ sym0v = _mm_set1_epi16(syms[0]);
+ sym1v = _mm_set1_epi16(syms[1]);
+ sym2v = _mm_set1_epi16(syms[2]);
+ syms += 3;
+
+ /* SSE2 doesn't support saturated adds on unsigned shorts, so we have to use signed shorts */
+ for(i=0;i<16;i++){
+ __m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
+
+ /* Form branch metrics
+ * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255,
+ * the XOR operations constitute conditional negation.
+ * metric and m_metric (-metric) are in the range 0-765
+ */
+ m0 = _mm_add_epi16(_mm_xor_si128(Branchtab39[0].v[i],sym0v),_mm_xor_si128(Branchtab39[1].v[i],sym1v));
+ metric = _mm_add_epi16(_mm_xor_si128(Branchtab39[2].v[i],sym2v),m0);
+ m_metric = _mm_sub_epi16(_mm_set1_epi16(765),metric);
+
+ /* Add branch metrics to path metrics */
+ m0 = _mm_adds_epi16(vp->old_metrics->v[i],metric);
+ m3 = _mm_adds_epi16(vp->old_metrics->v[16+i],metric);
+ m1 = _mm_adds_epi16(vp->old_metrics->v[16+i],m_metric);
+ m2 = _mm_adds_epi16(vp->old_metrics->v[i],m_metric);
+
+ /* Compare and select */
+ survivor0 = _mm_min_epi16(m0,m1);
+ survivor1 = _mm_min_epi16(m2,m3);
+ decision0 = _mm_cmpeq_epi16(survivor0,m1);
+ decision1 = _mm_cmpeq_epi16(survivor1,m3);
+
+ /* Pack each set of decisions into 8 8-bit bytes, then interleave them and compress into 16 bits */
+ d->s[i] = _mm_movemask_epi8(_mm_unpacklo_epi8(_mm_packs_epi16(decision0,_mm_setzero_si128()),_mm_packs_epi16(decision1,_mm_setzero_si128())));
+
+ /* Store surviving metrics */
+ vp->new_metrics->v[2*i] = _mm_unpacklo_epi16(survivor0,survivor1);
+ vp->new_metrics->v[2*i+1] = _mm_unpackhi_epi16(survivor0,survivor1);
+ }
+ /* See if we need to renormalize */
+ if(vp->new_metrics->s[0] >= SHRT_MAX-5000){
+ int i,adjust;
+ __m128i adjustv;
+ union { __m128i v; signed short w[8]; } t;
+
+ /* Find smallest metric and set adjustv to bring it down to SHRT_MIN */
+ adjustv = vp->new_metrics->v[0];
+ for(i=1;i<32;i++)
+ adjustv = _mm_min_epi16(adjustv,vp->new_metrics->v[i]);
+
+ adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,8));
+ adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,4));
+ adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,2));
+ t.v = adjustv;
+ adjust = t.w[0] - SHRT_MIN;
+ path_metric += adjust;
+ adjustv = _mm_set1_epi16(adjust);
+
+ /* We cannot use a saturated subtract, because we often have to adjust by more than SHRT_MAX
+ * This is okay since it can't overflow anyway
+ */
+ for(i=0;i<32;i++)
+ vp->new_metrics->v[i] = _mm_sub_epi16(vp->new_metrics->v[i],adjustv);
+ }
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ return path_metric;
+}
+
+
diff --git a/viterbi615.c b/viterbi615.c
new file mode 100644
index 0000000..6dda51f
--- /dev/null
+++ b/viterbi615.c
@@ -0,0 +1,155 @@
+/* K=15 r=1/6 Viterbi decoder with optional Intel or PowerPC SIMD
+ * Copyright Feb 2004, Phil Karn, KA9Q
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "fec.h"
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi615(int len){
+
+ find_cpu_mode();
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return create_viterbi615_port(len);
+#ifdef __VEC__
+ case ALTIVEC:
+ return create_viterbi615_av(len);
+#endif
+#ifdef __i386__
+ case MMX:
+ return create_viterbi615_mmx(len);
+ case SSE:
+ return create_viterbi615_sse(len);
+ case SSE2:
+ return create_viterbi615_sse2(len);
+#endif
+ }
+}
+
+void set_viterbi615_polynomial(int polys[6]){
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ set_viterbi615_polynomial_port(polys);
+ break;
+#ifdef __VEC__
+ case ALTIVEC:
+ set_viterbi615_polynomial_av(polys);
+ break;
+#endif
+#ifdef __i386__
+ case MMX:
+ set_viterbi615_polynomial_mmx(polys);
+ break;
+ case SSE:
+ set_viterbi615_polynomial_sse(polys);
+ break;
+ case SSE2:
+ set_viterbi615_polynomial_sse2(polys);
+ break;
+#endif
+ }
+}
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi615(void *p,int starting_state){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return init_viterbi615_port(p,starting_state);
+#ifdef __VEC__
+ case ALTIVEC:
+ return init_viterbi615_av(p,starting_state);
+#endif
+#ifdef __i386__
+ case MMX:
+ return init_viterbi615_mmx(p,starting_state);
+ case SSE:
+ return init_viterbi615_sse(p,starting_state);
+ case SSE2:
+ return init_viterbi615_sse2(p,starting_state);
+#endif
+ }
+}
+
+/* Viterbi chainback */
+int chainback_viterbi615(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return chainback_viterbi615_port(p,data,nbits,endstate);
+#ifdef __VEC__
+ case ALTIVEC:
+ return chainback_viterbi615_av(p,data,nbits,endstate);
+#endif
+#ifdef __i386__
+ case MMX:
+ return chainback_viterbi615_mmx(p,data,nbits,endstate);
+ case SSE:
+ return chainback_viterbi615_sse(p,data,nbits,endstate);
+ case SSE2:
+ return chainback_viterbi615_sse2(p,data,nbits,endstate);
+#endif
+ }
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi615(void *p){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ delete_viterbi615_port(p);
+ break;
+#ifdef __VEC__
+ case ALTIVEC:
+ delete_viterbi615_av(p);
+ break;
+#endif
+#ifdef __i386__
+ case MMX:
+ delete_viterbi615_mmx(p);
+ break;
+ case SSE:
+ delete_viterbi615_sse(p);
+ break;
+ case SSE2:
+ delete_viterbi615_sse2(p);
+ break;
+#endif
+ }
+}
+
+/* Update decoder with a block of demodulated symbols
+ * Note that nbits is the number of decoded data bits, not the number
+ * of symbols!
+ */
+int update_viterbi615_blk(void *p,unsigned char syms[],int nbits){
+ switch(Cpu_mode){
+ case PORT:
+ default:
+ return update_viterbi615_blk_port(p,syms,nbits);
+#ifdef __VEC__
+ case ALTIVEC:
+ return update_viterbi615_blk_av(p,syms,nbits);
+#endif
+#ifdef __i386__
+ case MMX:
+ return update_viterbi615_blk_mmx(p,syms,nbits);
+ case SSE:
+ return update_viterbi615_blk_sse(p,syms,nbits);
+ case SSE2:
+ return update_viterbi615_blk_sse2(p,syms,nbits);
+#endif
+ }
+}
+
diff --git a/viterbi615_av.c b/viterbi615_av.c
new file mode 100644
index 0000000..4a6ce9c
--- /dev/null
+++ b/viterbi615_av.c
@@ -0,0 +1,257 @@
+/* K=15 r=1/6 Viterbi decoder for PowerPC G4/G5 Altivec vector instructions
+ * 8-bit offset-binary soft decision samples
+ * Copyright Mar 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <limits.h>
+#include "fec.h"
+
+typedef union { unsigned char c[128][16]; vector unsigned char v[128]; } decision_t;
+typedef union { unsigned short s[16384]; vector unsigned short v[2048]; } metric_t;
+
+static union branchtab615 { unsigned short s[8192]; vector unsigned short v[1024];} Branchtab615[6];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v615 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ void *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ void *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi615_av(void *p,int starting_state){
+ struct v615 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+
+ for(i=0;i<2048;i++)
+ vp->metrics1.v[i] = (vector unsigned short)(5000);
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->s[starting_state & 16383] = 0; /* Bias known start state */
+ return 0;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi615_av(int len){
+ struct v615 *vp;
+
+ if(!Init){
+ int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF };
+ set_viterbi615_polynomial_av(polys);
+ }
+ vp = (struct v615 *)malloc(sizeof(struct v615));
+ vp->decisions = malloc(sizeof(decision_t)*(len+14));
+ init_viterbi615_av(vp,0);
+ return vp;
+}
+
+void set_viterbi615_polynomial_av(int polys[6]){
+ int state;
+ int i;
+
+ for(state=0;state < 8192;state++){
+ for(i=0;i<6;i++)
+ Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0;
+ }
+ Init++;
+}
+
+
+/* Viterbi chainback */
+int chainback_viterbi615_av(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v615 *vp = p;
+ decision_t *d = (decision_t *)vp->decisions;
+ int path_metric;
+
+ endstate %= 16384;
+
+ path_metric = vp->old_metrics->s[endstate];
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 14; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].c[endstate >> 7][endstate & 15] & (0x80 >> ((endstate>>4)&7)) ) ? 1 : 0;
+ endstate = (k << 13) | (endstate >> 1);
+ data[nbits>>3] = endstate >> 6;
+ }
+ return path_metric;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi615_av(void *p){
+ struct v615 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+int update_viterbi615_blk_av(void *p,unsigned char *syms,int nbits){
+ struct v615 *vp = p;
+ decision_t *d = (decision_t *)vp->dp;
+ int path_metric = 0;
+ vector unsigned char decisions = (vector unsigned char)(0);
+
+ while(nbits--){
+ vector unsigned short symv,sym0v,sym1v,sym2v,sym3v,sym4v,sym5v;
+ vector unsigned char s;
+ void *tmp;
+ int i;
+
+ /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
+ s = (vector unsigned char)vec_perm(vec_ld(0,syms),vec_ld(5,syms),vec_lvsl(0,syms));
+
+ symv = (vector unsigned short)vec_mergeh((vector unsigned char)(0),s); /* Unsigned byte->word unpack */
+ sym0v = vec_splat(symv,0);
+ sym1v = vec_splat(symv,1);
+ sym2v = vec_splat(symv,2);
+ sym3v = vec_splat(symv,3);
+ sym4v = vec_splat(symv,4);
+ sym5v = vec_splat(symv,5);
+ syms += 6;
+
+ for(i=0;i<1024;i++){
+ vector bool short decision0,decision1;
+ vector unsigned short metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
+
+ /* Form branch metrics
+ * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255,
+ * the XOR operations constitute conditional negation.
+ * metric and m_metric (-metric) are in the range 0-1530
+ */
+ m0 = vec_add(vec_xor(Branchtab615[0].v[i],sym0v),vec_xor(Branchtab615[1].v[i],sym1v));
+ m1 = vec_add(vec_xor(Branchtab615[2].v[i],sym2v),vec_xor(Branchtab615[3].v[i],sym3v));
+ m2 = vec_add(vec_xor(Branchtab615[4].v[i],sym4v),vec_xor(Branchtab615[5].v[i],sym5v));
+ metric = vec_add(m0,m1);
+ metric = vec_add(metric,m2);
+ m_metric = vec_sub((vector unsigned short)(1530),metric);
+
+ /* Add branch metrics to path metrics */
+ m0 = vec_adds(vp->old_metrics->v[i],metric);
+ m3 = vec_adds(vp->old_metrics->v[1024+i],metric);
+ m1 = vec_adds(vp->old_metrics->v[1024+i],m_metric);
+ m2 = vec_adds(vp->old_metrics->v[i],m_metric);
+
+ /* Compare and select */
+ decision0 = vec_cmpgt(m0,m1);
+ decision1 = vec_cmpgt(m2,m3);
+ survivor0 = vec_min(m0,m1);
+ survivor1 = vec_min(m2,m3);
+
+ /* Store decisions and survivors.
+ * To save space without SSE2's handy PMOVMSKB instruction, we pack and store them in
+ * a funny interleaved fashion that we undo in the chainback function.
+ */
+ decisions = vec_add(decisions,decisions); /* Shift each byte 1 bit to the left */
+
+ /* Booleans are either 0xff or 0x00. Subtracting 0x00 leaves the lsb zero; subtracting
+ * 0xff is equivalent to adding 1, which sets the lsb.
+ */
+ decisions = vec_sub(decisions,(vector unsigned char)vec_pack(vec_mergeh(decision0,decision1),vec_mergel(decision0,decision1)));
+
+ vp->new_metrics->v[2*i] = vec_mergeh(survivor0,survivor1);
+ vp->new_metrics->v[2*i+1] = vec_mergel(survivor0,survivor1);
+
+ if((i % 8) == 7){
+ /* We've accumulated a total of 128 decisions, stash and start again */
+ d->v[i>>3] = decisions; /* No need to clear, the new bits will replace the old */
+ }
+ }
+#if 0
+ /* Experimentally determine metric spread
+ * The results are fixed for a given code and input symbol size
+ */
+ {
+ int i;
+ vector unsigned short min_metric;
+ vector unsigned short max_metric;
+ union { vector unsigned short v; unsigned short s[8];} t;
+ int minimum,maximum;
+ static int max_spread = 0;
+
+ min_metric = max_metric = vp->new_metrics->v[0];
+ for(i=1;i<2048;i++){
+ min_metric = vec_min(min_metric,vp->new_metrics->v[i]);
+ max_metric = vec_max(max_metric,vp->new_metrics->v[i]);
+ }
+ min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,8));
+ max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,8));
+ min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,4));
+ max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,4));
+ min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,2));
+ max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,2));
+
+ t.v = min_metric;
+ minimum = t.s[0];
+ t.v = max_metric;
+ maximum = t.s[0];
+ if(maximum-minimum > max_spread){
+ max_spread = maximum-minimum;
+ printf("metric spread = %d\n",max_spread);
+ }
+ }
+#endif
+
+ /* Renormalize if necessary. This deserves some explanation.
+
+ * The maximum possible spread, found by experiment, for 4-bit symbols is 405; for 8 bit symbols, it's 12750.
+ * So by looking at one arbitrary metric we can tell if any of them have possibly saturated.
+ * However, this is very conservative. Large spreads occur only at very high Eb/No, where
+ * saturating a bad path metric doesn't do much to increase its chances of being erroneously chosen as a survivor.
+
+ * At more interesting (low) Eb/No ratios, the spreads are much smaller so our chances of saturating a metric
+ * by not not normalizing when we should are extremely low. So either way, the risk to performance is small.
+
+ * All this is borne out by experiment.
+ */
+ if(vp->new_metrics->s[0] >= USHRT_MAX-12750){
+ vector unsigned short scale;
+ union { vector unsigned short v; unsigned short s[8];} t;
+
+ /* Find smallest metric and splat */
+ scale = vp->new_metrics->v[0];
+ for(i=1;i<2048;i++)
+ scale = vec_min(scale,vp->new_metrics->v[i]);
+
+ scale = vec_min(scale,vec_sld(scale,scale,8));
+ scale = vec_min(scale,vec_sld(scale,scale,4));
+ scale = vec_min(scale,vec_sld(scale,scale,2));
+
+ /* Subtract it from all metrics
+ * Work backwards to try to improve the cache hit ratio, assuming LRU
+ */
+ for(i=2047;i>=0;i--)
+ vp->new_metrics->v[i] = vec_subs(vp->new_metrics->v[i],scale);
+ t.v = scale;
+ path_metric += t.s[0];
+ }
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ return path_metric;
+}
diff --git a/viterbi615_mmx.c b/viterbi615_mmx.c
new file mode 100644
index 0000000..89a56f7
--- /dev/null
+++ b/viterbi615_mmx.c
@@ -0,0 +1,183 @@
+/* K=15 r=1/6 Viterbi decoder for x86 MMX
+ * Mar 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <mmintrin.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "fec.h"
+
+typedef union { unsigned char c[16384]; __m64 v[2048];} decision_t;
+typedef union { unsigned short s[16384]; __m64 v[4096];} metric_t;
+
+static union branchtab615 { unsigned short s[8192]; __m64 v[2048];} Branchtab615[6];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v615 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ void *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ void *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi615_mmx(void *p,int starting_state){
+ struct v615 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<16384;i++)
+ vp->metrics1.s[i] = 5000;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->s[starting_state & 16383] = 0; /* Bias known start state */
+ return 0;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi615_mmx(int len){
+ struct v615 *vp;
+
+ if(!Init){
+ int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF };
+ set_viterbi615_polynomial_mmx(polys);
+ }
+
+ if((vp = (struct v615 *)malloc(sizeof(struct v615))) == NULL)
+ return NULL;
+ if((vp->decisions = malloc((len+14)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi615_mmx(vp,0);
+ return vp;
+}
+
+void set_viterbi615_polynomial_mmx(int polys[6]){
+ int state;
+ int i;
+
+ for(state=0;state < 8192;state++){
+ for(i=0;i<6;i++)
+ Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi615_mmx(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v615 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+
+ d = (decision_t *)vp->decisions;
+
+ endstate %= 16384;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 14; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = d[nbits].c[endstate] & 1;
+ endstate = (k << 13) | (endstate >> 1);
+ data[nbits>>3] = endstate >> 6;
+ }
+ return 0;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi615_mmx(void *p){
+ struct v615 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+
+int update_viterbi615_blk_mmx(void *p,unsigned char *syms,int nbits){
+ struct v615 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+
+ d = (decision_t *)vp->dp;
+
+ while(nbits--){
+ __m64 sym0v,sym1v,sym2v,sym3v,sym4v,sym5v;
+ void *tmp;
+ int i;
+
+ /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
+ sym0v = _mm_set1_pi16(syms[0]);
+ sym1v = _mm_set1_pi16(syms[1]);
+ sym2v = _mm_set1_pi16(syms[2]);
+ sym3v = _mm_set1_pi16(syms[3]);
+ sym4v = _mm_set1_pi16(syms[4]);
+ sym5v = _mm_set1_pi16(syms[5]);
+ syms += 6;
+
+ for(i=0;i<2048;i++){
+ __m64 decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
+
+ /* Form branch metrics
+ * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255,
+ * the XOR operations constitute conditional negation.
+ * metric and m_metric (-metric) are in the range 0-1530
+ */
+ m0 = _mm_add_pi16(_mm_xor_si64(Branchtab615[0].v[i],sym0v),_mm_xor_si64(Branchtab615[1].v[i],sym1v));
+ m1 = _mm_add_pi16(_mm_xor_si64(Branchtab615[2].v[i],sym2v),_mm_xor_si64(Branchtab615[3].v[i],sym3v));
+ m2 = _mm_add_pi16(_mm_xor_si64(Branchtab615[4].v[i],sym4v),_mm_xor_si64(Branchtab615[5].v[i],sym5v));
+ metric = _mm_add_pi16(m0,_mm_add_pi16(m1,m2));
+ m_metric = _mm_sub_pi16(_mm_set1_pi16(1530),metric);
+
+ /* Add branch metrics to path metrics */
+ m0 = _mm_add_pi16(vp->old_metrics->v[i],metric);
+ m3 = _mm_add_pi16(vp->old_metrics->v[2048+i],metric);
+ m1 = _mm_add_pi16(vp->old_metrics->v[2048+i],m_metric);
+ m2 = _mm_add_pi16(vp->old_metrics->v[i],m_metric);
+
+ /* Compare and select
+ * There's no packed min instruction in MMX, so we use modulo arithmetic
+ * to form the decisions and then do the select the hard way
+ */
+ decision0 = _mm_cmpgt_pi16(_mm_sub_pi16(m0,m1),_mm_setzero_si64());
+ decision1 = _mm_cmpgt_pi16(_mm_sub_pi16(m2,m3),_mm_setzero_si64());
+ survivor0 = _mm_or_si64(_mm_and_si64(decision0,m1),_mm_andnot_si64(decision0,m0));
+ survivor1 = _mm_or_si64(_mm_and_si64(decision1,m3),_mm_andnot_si64(decision1,m2));
+
+ /* Merge decisions and store as bytes */
+ d->v[i] = _mm_unpacklo_pi8(_mm_packs_pi16(decision0,_mm_setzero_si64()),_mm_packs_pi16(decision1,_mm_setzero_si64()));
+
+ /* Store surviving metrics */
+ vp->new_metrics->v[2*i] = _mm_unpacklo_pi16(survivor0,survivor1);
+ vp->new_metrics->v[2*i+1] = _mm_unpackhi_pi16(survivor0,survivor1);
+ }
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ _mm_empty();
+ return 0;
+}
diff --git a/viterbi615_port.c b/viterbi615_port.c
new file mode 100644
index 0000000..89bdd80
--- /dev/null
+++ b/viterbi615_port.c
@@ -0,0 +1,156 @@
+/* K=15 r=1/6 Viterbi decoder in portable C
+ * Copyright Mar 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <limits.h>
+#include "fec.h"
+
+typedef union { unsigned long w[512]; unsigned char c[2048];} decision_t;
+typedef union { unsigned long w[16384]; } metric_t;
+
+static union branchtab615 { unsigned long w[8192]; } Branchtab615[6] __attribute__ ((aligned(16)));
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v615 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ decision_t *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ decision_t *decisions; /* Beginning of decisions for block */
+};
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi615_port(int len){
+ struct v615 *vp;
+
+ if(!Init){
+ int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF };
+ set_viterbi615_polynomial_port(polys);
+ }
+ if((vp = (struct v615 *)malloc(sizeof(struct v615))) == NULL)
+ return NULL;
+ if((vp->decisions = malloc((len+14)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi615(vp,0);
+ return vp;
+}
+
+void set_viterbi615_polynomial_port(int polys[6]){
+ int state;
+ int i;
+
+ for(state=0;state < 8192;state++){
+ for(i=0;i<6;i++)
+ Branchtab615[i].w[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi615_port(void *p,int starting_state){
+ struct v615 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<16384;i++)
+ vp->metrics1.w[i] = 1000;
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->w[starting_state & 16383] = 0; /* Bias known start state */
+ return 0;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi615_port(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v615 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->decisions;
+ endstate %= 16384;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 14; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1;
+ endstate = (k << 13) | (endstate >> 1);
+ data[nbits>>3] = endstate >> 6;
+ }
+ return 0;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi615_port(void *p){
+ struct v615 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+/* C-language butterfly */
+#define BFLY(i) {\
+unsigned long metric,m0,m1,m2,m3,decision0,decision1;\
+ metric = ((Branchtab615[0].w[i] ^ syms[0]) + (Branchtab615[1].w[i] ^ syms[1])\
+ +(Branchtab615[2].w[i] ^ syms[2]) + (Branchtab615[3].w[i] ^ syms[3])\
+ +(Branchtab615[4].w[i] ^ syms[4]) + (Branchtab615[5].w[i] ^ syms[5]));\
+ m0 = vp->old_metrics->w[i] + metric;\
+ m1 = vp->old_metrics->w[i+8192] + (1530 - metric);\
+ m2 = vp->old_metrics->w[i] + (1530-metric);\
+ m3 = vp->old_metrics->w[i+8192] + metric;\
+ decision0 = (signed long)(m0-m1) >= 0;\
+ decision1 = (signed long)(m2-m3) >= 0;\
+ vp->new_metrics->w[2*i] = decision0 ? m1 : m0;\
+ vp->new_metrics->w[2*i+1] = decision1 ? m3 : m2;\
+ d->c[i/4] |= ((decision0|(decision1<<1)) << ((2*i)&7));\
+}
+/* Update decoder with a block of demodulated symbols
+ * Note that nbits is the number of decoded data bits, not the number
+ * of symbols!
+ */
+
+int update_viterbi615_blk_port(void *p,unsigned char *syms,int nbits){
+ struct v615 *vp = p;
+ void *tmp;
+ decision_t *d;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->dp;
+ while(nbits--){
+ memset(d,0,sizeof(decision_t));
+ for(i=0;i<8192;i++)
+ BFLY(i);
+
+ syms += 6;
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ return 0;
+}
+
diff --git a/viterbi615_sse.c b/viterbi615_sse.c
new file mode 100644
index 0000000..de0f8af
--- /dev/null
+++ b/viterbi615_sse.c
@@ -0,0 +1,201 @@
+/* K=15 r=1/6 Viterbi decoder for x86 SSE
+ * Copyright Mar 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <xmmintrin.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <limits.h>
+#include "fec.h"
+
+typedef union { unsigned long w[512]; unsigned char c[2048];} decision_t;
+typedef union { signed short s[16384]; __m64 v[4096];} metric_t;
+
+static union branchtab615 { unsigned short s[8192]; __m64 v[2048];} Branchtab615[6];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v615 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ void *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ void *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi615_sse(void *p,int starting_state){
+ struct v615 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<16384;i++)
+ vp->metrics1.s[i] = (SHRT_MIN+5000);
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->s[starting_state & 16383] = SHRT_MIN; /* Bias known start state */
+ return 0;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi615_sse(int len){
+ struct v615 *vp;
+
+ if(!Init){
+ int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF };
+ set_viterbi615_polynomial_sse(polys);
+ }
+
+ if((vp = (struct v615 *)malloc(sizeof(struct v615))) == NULL){
+ return NULL;
+ }
+ if((vp->decisions = malloc((len+14)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ init_viterbi615_sse(vp,0);
+ return vp;
+}
+
+void set_viterbi615_polynomial_sse(int polys[6]){
+ int state;
+ int i;
+
+ for(state=0;state < 8192;state++){
+ for(i=0;i<6;i++)
+ Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi615_sse(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v615 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->decisions;
+ endstate %= 16384;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 14; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ /* k = (d[nbits].w[endstate/32] >> (endstate%32)) & 1;*/
+ k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1;
+ endstate = (k << 13) | (endstate >> 1);
+ data[nbits>>3] = endstate >> 6;
+ }
+ return 0;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi615_sse(void *p){
+ struct v615 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+
+int update_viterbi615_blk_sse(void *p,unsigned char *syms,int nbits){
+ struct v615 *vp = p;
+ decision_t *d;
+
+ if(p == NULL)
+ return -1;
+ d = (decision_t *)vp->dp;
+ while(nbits--){
+ __m64 sym0v,sym1v,sym2v,sym3v,sym4v,sym5v;
+ void *tmp;
+ int i;
+
+ /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
+ sym0v = _mm_set1_pi16(syms[0]);
+ sym1v = _mm_set1_pi16(syms[1]);
+ sym2v = _mm_set1_pi16(syms[2]);
+ sym3v = _mm_set1_pi16(syms[3]);
+ sym4v = _mm_set1_pi16(syms[4]);
+ sym5v = _mm_set1_pi16(syms[5]);
+ syms += 6;
+
+ for(i=0;i<2048;i++){
+ __m64 decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
+
+ /* Form branch metrics
+ * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255,
+ * the XOR operations constitute conditional negation.
+ * metric and m_metric (-metric) are in the range 0-1530
+ */
+ m0 = _mm_add_pi16(_mm_xor_si64(Branchtab615[0].v[i],sym0v),_mm_xor_si64(Branchtab615[1].v[i],sym1v));
+ m1 = _mm_add_pi16(_mm_xor_si64(Branchtab615[2].v[i],sym2v),_mm_xor_si64(Branchtab615[3].v[i],sym3v));
+ m2 = _mm_add_pi16(_mm_xor_si64(Branchtab615[4].v[i],sym4v),_mm_xor_si64(Branchtab615[5].v[i],sym5v));
+ metric = _mm_add_pi16(m0,_mm_add_pi16(m1,m2));
+ m_metric = _mm_sub_pi16(_mm_set1_pi16(1530),metric);
+
+ /* Add branch metrics to path metrics */
+ m0 = _mm_adds_pi16(vp->old_metrics->v[i],metric);
+ m3 = _mm_adds_pi16(vp->old_metrics->v[2048+i],metric);
+ m1 = _mm_adds_pi16(vp->old_metrics->v[2048+i],m_metric);
+ m2 = _mm_adds_pi16(vp->old_metrics->v[i],m_metric);
+
+ /* Compare and select */
+ survivor0 = _mm_min_pi16(m0,m1);
+ survivor1 = _mm_min_pi16(m2,m3);
+ decision0 = _mm_cmpeq_pi16(survivor0,m1);
+ decision1 = _mm_cmpeq_pi16(survivor1,m3);
+
+ /* Pack decisions into 8 bits and store */
+ d->c[i] = _mm_movemask_pi8(_mm_unpacklo_pi8(_mm_packs_pi16(decision0,_mm_setzero_si64()),_mm_packs_pi16(decision1,_mm_setzero_si64())));
+
+ /* Store surviving metrics */
+ vp->new_metrics->v[2*i] = _mm_unpacklo_pi16(survivor0,survivor1);
+ vp->new_metrics->v[2*i+1] = _mm_unpackhi_pi16(survivor0,survivor1);
+ }
+ /* See if we need to renormalize
+ * Max metric spread for this code with 0-255 branch metrics is 12750
+ */
+ if(vp->new_metrics->s[0] >= SHRT_MAX-12750){
+ int i,adjust;
+ __m64 adjustv;
+ union { __m64 v; signed short w[4]; } t;
+
+ /* Find smallest metric and set adjustv to bring it down to SHRT_MIN */
+ adjustv = vp->new_metrics->v[0];
+ for(i=1;i<4096;i++)
+ adjustv = _mm_min_pi16(adjustv,vp->new_metrics->v[i]);
+
+ adjustv = _mm_min_pi16(adjustv,_mm_srli_si64(adjustv,32));
+ adjustv = _mm_min_pi16(adjustv,_mm_srli_si64(adjustv,16));
+ t.v = adjustv;
+ adjust = t.w[0] - SHRT_MIN;
+ adjustv = _mm_set1_pi16(adjust);
+
+ for(i=0;i<4096;i++)
+ vp->new_metrics->v[i] = _mm_sub_pi16(vp->new_metrics->v[i],adjustv);
+ }
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ _mm_empty();
+ return 0;
+}
diff --git a/viterbi615_sse2.c b/viterbi615_sse2.c
new file mode 100644
index 0000000..7f711e5
--- /dev/null
+++ b/viterbi615_sse2.c
@@ -0,0 +1,204 @@
+/* K=15 r=1/6 Viterbi decoder for x86 SSE2
+ * Copyright Mar 2004, Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser General Public License (LGPL)
+ */
+#include <emmintrin.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <limits.h>
+#include "fec.h"
+
+typedef union { unsigned long w[512]; unsigned short s[1024];} decision_t;
+typedef union { signed short s[16384]; __m128i v[2048];} metric_t;
+
+static union branchtab615 { unsigned short s[8192]; __m128i v[1024];} Branchtab615[6];
+static int Init = 0;
+
+/* State info for instance of Viterbi decoder */
+struct v615 {
+ metric_t metrics1; /* path metric buffer 1 */
+ metric_t metrics2; /* path metric buffer 2 */
+ void *dp; /* Pointer to current decision */
+ metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
+ void *decisions; /* Beginning of decisions for block */
+};
+
+/* Initialize Viterbi decoder for start of new frame */
+int init_viterbi615_sse2(void *p,int starting_state){
+ struct v615 *vp = p;
+ int i;
+
+ if(p == NULL)
+ return -1;
+ for(i=0;i<16384;i++)
+ vp->metrics1.s[i] = (SHRT_MIN+5000);
+
+ vp->old_metrics = &vp->metrics1;
+ vp->new_metrics = &vp->metrics2;
+ vp->dp = vp->decisions;
+ vp->old_metrics->s[starting_state & 16383] = SHRT_MIN; /* Bias known start state */
+ return 0;
+}
+
+/* Create a new instance of a Viterbi decoder */
+void *create_viterbi615_sse2(int len){
+ void *p;
+ struct v615 *vp;
+
+ if(!Init){
+ int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF };
+ set_viterbi615_polynomial_sse2(polys);
+ }
+
+ /* Ordinary malloc() only returns 8-byte alignment, we need 16 */
+ if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v615)))
+ return NULL;
+
+ vp = (struct v615 *)p;
+ if((p = malloc((len+14)*sizeof(decision_t))) == NULL){
+ free(vp);
+ return NULL;
+ }
+ vp->decisions = (decision_t *)p;
+ init_viterbi615_sse2(vp,0);
+ return vp;
+}
+
+void set_viterbi615_polynomial_sse2(int polys[6]){
+ int state;
+ int i;
+
+ for(state=0;state < 8192;state++){
+ for(i=0;i<6;i++)
+ Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0;
+ }
+ Init++;
+}
+
+/* Viterbi chainback */
+int chainback_viterbi615_sse2(
+ void *p,
+ unsigned char *data, /* Decoded output data */
+ unsigned int nbits, /* Number of data bits */
+ unsigned int endstate){ /* Terminal encoder state */
+ struct v615 *vp = p;
+ decision_t *d = (decision_t *)vp->decisions;
+
+ endstate %= 16384;
+
+ /* The store into data[] only needs to be done every 8 bits.
+ * But this avoids a conditional branch, and the writes will
+ * combine in the cache anyway
+ */
+ d += 14; /* Look past tail */
+ while(nbits-- != 0){
+ int k;
+
+ k = (d[nbits].w[endstate/32] >> (endstate%32)) & 1;
+ endstate = (k << 13) | (endstate >> 1);
+ data[nbits>>3] = endstate >> 6;
+ }
+ return 0;
+}
+
+/* Delete instance of a Viterbi decoder */
+void delete_viterbi615_sse2(void *p){
+ struct v615 *vp = p;
+
+ if(vp != NULL){
+ free(vp->decisions);
+ free(vp);
+ }
+}
+
+
+int update_viterbi615_blk_sse2(void *p,unsigned char *syms,int nbits){
+ struct v615 *vp = p;
+ decision_t *d = (decision_t *)vp->dp;
+
+ while(nbits--){
+ __m128i sym0v,sym1v,sym2v,sym3v,sym4v,sym5v;
+ void *tmp;
+ int i;
+
+ /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
+ sym0v = _mm_set1_epi16(syms[0]);
+ sym1v = _mm_set1_epi16(syms[1]);
+ sym2v = _mm_set1_epi16(syms[2]);
+ sym3v = _mm_set1_epi16(syms[3]);
+ sym4v = _mm_set1_epi16(syms[4]);
+ sym5v = _mm_set1_epi16(syms[5]);
+ syms += 6;
+
+ /* SSE2 doesn't support saturated adds on unsigned shorts, so we have to use signed shorts */
+ for(i=0;i<1024;i++){
+ __m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
+
+ /* Form branch metrics
+ * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255,
+ * the XOR operations constitute conditional negation.
+ * metric and m_metric (-metric) are in the range 0-1530
+ */
+ m0 = _mm_add_epi16(_mm_xor_si128(Branchtab615[0].v[i],sym0v),_mm_xor_si128(Branchtab615[1].v[i],sym1v));
+ m1 = _mm_add_epi16(_mm_xor_si128(Branchtab615[2].v[i],sym2v),_mm_xor_si128(Branchtab615[3].v[i],sym3v));
+ m2 = _mm_add_epi16(_mm_xor_si128(Branchtab615[4].v[i],sym4v),_mm_xor_si128(Branchtab615[5].v[i],sym5v));
+ metric = _mm_add_epi16(m0,_mm_add_epi16(m1,m2));
+ m_metric = _mm_sub_epi16(_mm_set1_epi16(1530),metric);
+
+ /* Add branch metrics to path metrics */
+ m0 = _mm_adds_epi16(vp->old_metrics->v[i],metric);
+ m3 = _mm_adds_epi16(vp->old_metrics->v[1024+i],metric);
+ m1 = _mm_adds_epi16(vp->old_metrics->v[1024+i],m_metric);
+ m2 = _mm_adds_epi16(vp->old_metrics->v[i],m_metric);
+
+ /* Compare and select */
+ survivor0 = _mm_min_epi16(m0,m1);
+ survivor1 = _mm_min_epi16(m2,m3);
+ decision0 = _mm_cmpeq_epi16(survivor0,m1);
+ decision1 = _mm_cmpeq_epi16(survivor1,m3);
+
+ /* Pack each set of decisions into 8 8-bit bytes, then interleave them and compress into 16 bits */
+ d->s[i] = _mm_movemask_epi8(_mm_unpacklo_epi8(_mm_packs_epi16(decision0,_mm_setzero_si128()),_mm_packs_epi16(decision1,_mm_setzero_si128())));
+
+ /* Store surviving metrics */
+ vp->new_metrics->v[2*i] = _mm_unpacklo_epi16(survivor0,survivor1);
+ vp->new_metrics->v[2*i+1] = _mm_unpackhi_epi16(survivor0,survivor1);
+ }
+ /* See if we need to renormalize
+ * Max metric spread for this code with 0-90 branch metrics is 405
+ */
+ if(vp->new_metrics->s[0] >= SHRT_MAX-12750){
+ int i,adjust;
+ __m128i adjustv;
+ union { __m128i v; signed short w[8]; } t;
+
+ /* Find smallest metric and set adjustv to bring it down to SHRT_MIN */
+ adjustv = vp->new_metrics->v[0];
+ for(i=1;i<2048;i++)
+ adjustv = _mm_min_epi16(adjustv,vp->new_metrics->v[i]);
+
+ adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,8));
+ adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,4));
+ adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,2));
+ t.v = adjustv;
+ adjust = t.w[0] - SHRT_MIN;
+ adjustv = _mm_set1_epi16(adjust);
+
+ /* We cannot use a saturated subtract, because we often have to adjust by more than SHRT_MAX
+ * This is okay since it can't overflow anyway
+ */
+ for(i=0;i<2048;i++)
+ vp->new_metrics->v[i] = _mm_sub_epi16(vp->new_metrics->v[i],adjustv);
+ }
+ d++;
+ /* Swap pointers to old and new metrics */
+ tmp = vp->old_metrics;
+ vp->old_metrics = vp->new_metrics;
+ vp->new_metrics = tmp;
+ }
+ vp->dp = d;
+ return 0;
+}
+
+
diff --git a/vtest27.c b/vtest27.c
new file mode 100644
index 0000000..7256483
--- /dev/null
+++ b/vtest27.c
@@ -0,0 +1,184 @@
+/* Test viterbi decoder speeds */
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <math.h>
+#include <memory.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+#include "fec.h"
+
+#if HAVE_GETOPT_LONG
+struct option Options[] = {
+ {"frame-length",1,NULL,'l'},
+ {"frame-count",1,NULL,'n'},
+ {"ebn0",1,NULL,'e'},
+ {"gain",1,NULL,'g'},
+ {"verbose",0,NULL,'v'},
+ {"force-altivec",0,NULL,'a'},
+ {"force-port",0,NULL,'p'},
+ {"force-mmx",0,NULL,'m'},
+ {"force-sse",0,NULL,'s'},
+ {"force-sse2",0,NULL,'t'},
+ {NULL},
+};
+#endif
+
+#define RATE (1./2.)
+#define MAXBYTES 10000
+
+double Gain = 32.0;
+int Verbose = 0;
+
+int main(int argc,char *argv[]){
+ int i,d,tr;
+ int sr=0,trials = 10000,errcnt,framebits=2048;
+ long long int tot_errs=0;
+ unsigned char bits[MAXBYTES];
+ unsigned char data[MAXBYTES];
+ unsigned char xordata[MAXBYTES];
+ unsigned char symbols[8*2*(MAXBYTES+6)];
+ void *vp;
+ extern char *optarg;
+ struct rusage start,finish;
+ double extime;
+ double gain,esn0,ebn0;
+ time_t t;
+ int badframes=0;
+
+ time(&t);
+ srandom(t);
+ ebn0 = -100;
+#if HAVE_GETOPT_LONG
+ while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){
+#else
+ while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){
+#endif
+ switch(d){
+ case 'a':
+ Cpu_mode = ALTIVEC;
+ break;
+ case 'p':
+ Cpu_mode = PORT;
+ break;
+ case 'm':
+ Cpu_mode = MMX;
+ break;
+ case 's':
+ Cpu_mode = SSE;
+ break;
+ case 't':
+ Cpu_mode = SSE2;
+ break;
+ case 'l':
+ framebits = atoi(optarg);
+ break;
+ case 'n':
+ trials = atoi(optarg);
+ break;
+ case 'e':
+ ebn0 = atof(optarg);
+ break;
+ case 'g':
+ Gain = atof(optarg);
+ break;
+ case 'v':
+ Verbose++;
+ break;
+ }
+ }
+ if(framebits > 8*MAXBYTES){
+ fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8);
+ framebits = MAXBYTES*8;
+ }
+ if((vp = create_viterbi27(framebits)) == NULL){
+ printf("create_viterbi27 failed\n");
+ exit(1);
+ }
+ if(ebn0 != -100){
+ esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */
+ /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing
+ * only half the noise power, and the sqrt() converts power to
+ * voltage.
+ */
+ gain = 1./sqrt(0.5/pow(10.,esn0/10.));
+
+ printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain);
+
+ for(tr=0;tr<trials;tr++){
+ /* Encode a frame of random data */
+ for(i=0;i<framebits+6;i++){
+ int bit = (i < framebits) ? (random() & 1) : 0;
+
+ sr = (sr << 1) | bit;
+ bits[i/8] = sr & 0xff;
+ symbols[2*i+0] = addnoise(parity(sr & V27POLYA),gain,Gain,127.5,255);
+ symbols[2*i+1] = addnoise(parity(sr & V27POLYB),gain,Gain,127.5,255);
+ }
+ /* Decode it and make sure we get the right answer */
+ /* Initialize Viterbi decoder */
+ init_viterbi27(vp,0);
+
+ /* Decode block */
+ update_viterbi27_blk(vp,symbols,framebits+6);
+
+ /* Do Viterbi chainback */
+ chainback_viterbi27(vp,data,framebits,0);
+ errcnt = 0;
+ for(i=0;i<framebits/8;i++){
+ int e = Bitcnt[xordata[i] = data[i] ^ bits[i]];
+ errcnt += e;
+ tot_errs += e;
+ }
+ if(errcnt != 0)
+ badframes++;
+ if(Verbose > 1 && errcnt != 0){
+ printf("frame %d, %d errors: ",tr,errcnt);
+ for(i=0;i<framebits/8;i++){
+ printf("%02x",xordata[i]);
+ }
+ printf("\n");
+ }
+ if(Verbose)
+ printf("BER %lld/%lld (%10.3g) FER %d/%d (%10.3g)\r",
+ tot_errs,(long long)framebits*(tr+1),tot_errs/((double)framebits*(tr+1)),
+ badframes,tr+1,(double)badframes/(tr+1));
+ fflush(stdout);
+ }
+ if(Verbose > 1)
+ printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain);
+ else if(Verbose == 0)
+ printf("BER %lld/%lld (%.3g) FER %d/%d (%.3g)\n",
+ tot_errs,(long long)framebits*trials,tot_errs/((double)framebits*trials),
+ badframes,tr+1,(double)badframes/(tr+1));
+ else
+ printf("\n");
+
+ } else {
+ /* Do time trials */
+ memset(symbols,127,sizeof(symbols));
+ printf("Starting time trials\n");
+ getrusage(RUSAGE_SELF,&start);
+ for(tr=0;tr < trials;tr++){
+ /* Initialize Viterbi decoder */
+ init_viterbi27(vp,0);
+
+ /* Decode block */
+ update_viterbi27_blk(vp,symbols,framebits);
+
+ /* Do Viterbi chainback */
+ chainback_viterbi27(vp,data,framebits,0);
+ }
+ getrusage(RUSAGE_SELF,&finish);
+ extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec);
+ printf("Execution time for %d %d-bit frames: %.2f sec\n",trials,
+ framebits,extime);
+ printf("decoder speed: %g bits/s\n",trials*framebits/extime);
+ }
+ exit(0);
+}
diff --git a/vtest29.c b/vtest29.c
new file mode 100644
index 0000000..8471b54
--- /dev/null
+++ b/vtest29.c
@@ -0,0 +1,185 @@
+/* Test viterbi decoder speeds */
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <math.h>
+#include <memory.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+#include "fec.h"
+
+#if HAVE_GETOPT_LONG
+struct option Options[] = {
+ {"frame-length",1,NULL,'l'},
+ {"frame-count",1,NULL,'n'},
+ {"ebn0",1,NULL,'e'},
+ {"gain",1,NULL,'g'},
+ {"verbose",0,NULL,'v'},
+ {"force-altivec",0,NULL,'a'},
+ {"force-port",0,NULL,'p'},
+ {"force-mmx",0,NULL,'m'},
+ {"force-sse",0,NULL,'s'},
+ {"force-sse2",0,NULL,'t'},
+ {NULL},
+};
+#endif
+
+#define RATE (1./2.)
+#define MAXBYTES 10000
+
+double Gain = 32.0;
+int Verbose = 0;
+
+int main(int argc,char *argv[]){
+ int i,d,tr;
+ int sr=0,trials = 10000,errcnt,framebits=2048;
+ long long tot_errs=0;
+ unsigned char bits[MAXBYTES];
+ unsigned char data[MAXBYTES];
+ unsigned char xordata[MAXBYTES];
+ unsigned char symbols[8*2*(MAXBYTES+8)];
+ void *vp;
+ extern char *optarg;
+ struct rusage start,finish;
+ double extime;
+ double gain,esn0,ebn0;
+ time_t t;
+ int badframes=0;
+
+ time(&t);
+ srandom(t);
+ ebn0 = -100;
+#if HAVE_GETOPT_LONG
+ while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){
+#else
+ while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){
+#endif
+ switch(d){
+ case 'a':
+ Cpu_mode = ALTIVEC;
+ break;
+ case 'p':
+ Cpu_mode = PORT;
+ break;
+ case 'm':
+ Cpu_mode = MMX;
+ break;
+ case 's':
+ Cpu_mode = SSE;
+ break;
+ case 't':
+ Cpu_mode = SSE2;
+ break;
+ case 'l':
+ framebits = atoi(optarg);
+ break;
+ case 'n':
+ trials = atoi(optarg);
+ break;
+ case 'e':
+ ebn0 = atof(optarg);
+ break;
+ case 'g':
+ Gain = atof(optarg);
+ break;
+ case 'v':
+ Verbose++;
+ break;
+ }
+ }
+ if(framebits > 8*MAXBYTES){
+ fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8);
+ framebits = MAXBYTES*8;
+ }
+ if((vp = create_viterbi29(framebits)) == NULL){
+ printf("create_viterbi29 failed\n");
+ exit(1);
+ }
+ if(ebn0 != -100){
+ esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */
+ /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing
+ * only half the noise power, and the sqrt() converts power to
+ * voltage.
+ */
+ gain = 1./sqrt(0.5/pow(10.,esn0/10.));
+
+ printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain);
+
+ for(tr=0;tr<trials;tr++){
+ /* Encode a frame of random data */
+ for(i=0;i<framebits+8;i++){
+ int bit = (i < framebits) ? (random() & 1) : 0;
+
+ sr = (sr << 1) | bit;
+ bits[i/8] = sr & 0xff;
+ symbols[2*i+0] = addnoise(parity(sr & V29POLYA),gain,Gain,127.5,255);
+ symbols[2*i+1] = addnoise(parity(sr & V29POLYB),gain,Gain,127.5,255);
+ }
+ /* Decode it and make sure we get the right answer */
+ /* Initialize Viterbi decoder */
+ init_viterbi29(vp,0);
+
+ /* Decode block */
+ update_viterbi29_blk(vp,symbols,framebits+8);
+
+ /* Do Viterbi chainback */
+ chainback_viterbi29(vp,data,framebits,0);
+ errcnt = 0;
+ for(i=0;i<framebits/8;i++){
+ int e = Bitcnt[xordata[i] = data[i] ^ bits[i]];
+ errcnt += e;
+ tot_errs += e;
+ }
+ if(errcnt != 0)
+ badframes++;
+ if(Verbose > 1 && errcnt != 0){
+ printf("frame %d, %d errors: ",tr,errcnt);
+ for(i=0;i<framebits/8;i++){
+ printf("%02x",xordata[i]);
+ }
+ printf("\n");
+ }
+ if(Verbose)
+ printf("BER %lld/%lld (%10.3g) FER %d/%d (%10.3g)\r",
+ tot_errs,(long long)framebits*(tr+1),tot_errs/((double)framebits*(tr+1)),
+ badframes,tr+1,(double)badframes/(tr+1));
+ fflush(stdout);
+ }
+ if(Verbose > 1)
+ printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain);
+ else if(Verbose == 0)
+ printf("BER %lld/%lld (%.3g) FER %d/%d (%.3g)\n",
+ tot_errs,(long long)framebits*trials,tot_errs/((double)framebits*trials),
+ badframes,tr+1,(double)badframes/(tr+1));
+ else
+ printf("\n");
+ } else {
+ /* Do time trials */
+ memset(symbols,127,sizeof(symbols));
+ printf("Starting time trials\n");
+ getrusage(RUSAGE_SELF,&start);
+ for(tr=0;tr < trials;tr++){
+ /* Initialize Viterbi decoder */
+ init_viterbi29(vp,0);
+
+ /* Decode block */
+ update_viterbi29_blk(vp,symbols,framebits);
+
+ /* Do Viterbi chainback */
+ chainback_viterbi29(vp,data,framebits,0);
+ }
+ getrusage(RUSAGE_SELF,&finish);
+ extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec);
+ printf("Execution time for %d %d-bit frames: %.2f sec\n",trials,
+ framebits,extime);
+ printf("decoder speed: %g bits/s\n",trials*framebits/extime);
+ }
+ exit(0);
+}
+
+
diff --git a/vtest39.c b/vtest39.c
new file mode 100644
index 0000000..76723b2
--- /dev/null
+++ b/vtest39.c
@@ -0,0 +1,186 @@
+/* Test viterbi decoder speeds */
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <math.h>
+#include <memory.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+#include "fec.h"
+
+#if HAVE_GETOPT_LONG
+struct option Options[] = {
+ {"frame-length",1,NULL,'l'},
+ {"frame-count",1,NULL,'n'},
+ {"ebn0",1,NULL,'e'},
+ {"gain",1,NULL,'g'},
+ {"verbose",0,NULL,'v'},
+ {"force-altivec",0,NULL,'a'},
+ {"force-port",0,NULL,'p'},
+ {"force-mmx",0,NULL,'m'},
+ {"force-sse",0,NULL,'s'},
+ {"force-sse2",0,NULL,'t'},
+ {NULL},
+};
+#endif
+
+#define RATE (1./3.)
+#define MAXBYTES 10000
+
+double Gain = 32.0;
+int Verbose = 0;
+
+int main(int argc,char *argv[]){
+ int i,d,tr;
+ int sr=0,trials = 10000,errcnt,framebits=2048;
+ long long tot_errs=0;
+ unsigned char bits[MAXBYTES];
+ unsigned char data[MAXBYTES];
+ unsigned char xordata[MAXBYTES];
+ unsigned char symbols[8*3*(MAXBYTES+8)];
+ void *vp;
+ extern char *optarg;
+ struct rusage start,finish;
+ double extime;
+ double gain,esn0,ebn0;
+ time_t t;
+ int badframes=0;
+
+ time(&t);
+ srandom(t);
+ ebn0 = -100;
+#if HAVE_GETOPT_LONG
+ while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){
+#else
+ while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){
+#endif
+ switch(d){
+ case 'a':
+ Cpu_mode = ALTIVEC;
+ break;
+ case 'p':
+ Cpu_mode = PORT;
+ break;
+ case 'm':
+ Cpu_mode = MMX;
+ break;
+ case 's':
+ Cpu_mode = SSE;
+ break;
+ case 't':
+ Cpu_mode = SSE2;
+ break;
+ case 'l':
+ framebits = atoi(optarg);
+ break;
+ case 'n':
+ trials = atoi(optarg);
+ break;
+ case 'e':
+ ebn0 = atof(optarg);
+ break;
+ case 'g':
+ Gain = atof(optarg);
+ break;
+ case 'v':
+ Verbose++;
+ break;
+ }
+ }
+ if(framebits > 8*MAXBYTES){
+ fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8);
+ framebits = MAXBYTES*8;
+ }
+ if((vp = create_viterbi39(framebits)) == NULL){
+ printf("create_viterbi39 failed\n");
+ exit(1);
+ }
+ if(ebn0 != -100){
+ esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */
+ /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing
+ * only half the noise power, and the sqrt() converts power to
+ * voltage.
+ */
+ gain = 1./sqrt(0.5/pow(10.,esn0/10.));
+
+ printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain);
+
+ for(tr=0;tr<trials;tr++){
+ /* Encode a frame of random data */
+ for(i=0;i<framebits+8;i++){
+ int bit = (i < framebits) ? (random() & 1) : 0;
+
+ sr = (sr << 1) | bit;
+ bits[i/8] = sr & 0xff;
+ symbols[3*i+0] = addnoise(parity(sr & V39POLYA),gain,Gain,127.5,255);
+ symbols[3*i+1] = addnoise(parity(sr & V39POLYB),gain,Gain,127.5,255);
+ symbols[3*i+2] = addnoise(parity(sr & V39POLYC),gain,Gain,127.5,255);
+ }
+ /* Decode it and make sure we get the right answer */
+ /* Initialize Viterbi decoder */
+ init_viterbi39(vp,0);
+
+ /* Decode block */
+ update_viterbi39_blk(vp,symbols,framebits+8);
+
+ /* Do Viterbi chainback */
+ chainback_viterbi39(vp,data,framebits,0);
+ errcnt = 0;
+ for(i=0;i<framebits/8;i++){
+ int e = Bitcnt[xordata[i] = data[i] ^ bits[i]];
+ errcnt += e;
+ tot_errs += e;
+ }
+ if(errcnt != 0)
+ badframes++;
+ if(Verbose > 1 && errcnt != 0){
+ printf("frame %d, %d errors: ",tr,errcnt);
+ for(i=0;i<framebits/8;i++){
+ printf("%02x",xordata[i]);
+ }
+ printf("\n");
+ }
+ if(Verbose)
+ printf("BER %lld/%lld (%10.3g) FER %d/%d (%10.3g)\r",
+ tot_errs,(long long)framebits*(tr+1),tot_errs/((double)framebits*(tr+1)),
+ badframes,tr+1,(double)badframes/(tr+1));
+ fflush(stdout);
+ }
+ if(Verbose > 1)
+ printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain);
+ else if(Verbose == 0)
+ printf("BER %lld/%lld (%.3g) FER %d/%d (%.3g)\n",
+ tot_errs,(long long)framebits*trials,tot_errs/((double)framebits*trials),
+ badframes,tr+1,(double)badframes/(tr+1));
+ else
+ printf("\n");
+ } else {
+ /* Do time trials */
+ memset(symbols,127,sizeof(symbols));
+ printf("Starting time trials\n");
+ getrusage(RUSAGE_SELF,&start);
+ for(tr=0;tr < trials;tr++){
+ /* Initialize Viterbi decoder */
+ init_viterbi39(vp,0);
+
+ /* Decode block */
+ update_viterbi39_blk(vp,symbols,framebits);
+
+ /* Do Viterbi chainback */
+ chainback_viterbi39(vp,data,framebits,0);
+ }
+ getrusage(RUSAGE_SELF,&finish);
+ extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec);
+ printf("Execution time for %d %d-bit frames: %.2f sec\n",trials,
+ framebits,extime);
+ printf("decoder speed: %g bits/s\n",trials*framebits/extime);
+ }
+ exit(0);
+}
+
+
diff --git a/vtest615.c b/vtest615.c
new file mode 100644
index 0000000..4bd8c4f
--- /dev/null
+++ b/vtest615.c
@@ -0,0 +1,191 @@
+/* Test viterbi decoder speeds */
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <math.h>
+#include <memory.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+#include "fec.h"
+
+#if HAVE_GETOPT_LONG
+struct option Options[] = {
+ {"frame-length",1,NULL,'l'},
+ {"frame-count",1,NULL,'n'},
+ {"ebn0",1,NULL,'e'},
+ {"gain",1,NULL,'g'},
+ {"verbose",0,NULL,'v'},
+ {"force-altivec",0,NULL,'a'},
+ {"force-port",0,NULL,'p'},
+ {"force-mmx",0,NULL,'m'},
+ {"force-sse",0,NULL,'s'},
+ {"force-sse2",0,NULL,'t'},
+ {NULL},
+};
+#endif
+
+#define RATE (1./6.)
+#define MAXBYTES 10000
+#define OFFSET (127.5)
+#define CLIP 255
+
+double Gain = 24.0;
+int Verbose = 0;
+
+int main(int argc,char *argv[]){
+ int i,d,tr;
+ int sr=0,trials = 10,errcnt,framebits=2048;
+ int tot_errs=0;
+ unsigned char bits[MAXBYTES];
+ unsigned char data[MAXBYTES];
+ unsigned char xordata[MAXBYTES];
+ unsigned char symbols[8*6*(MAXBYTES+14)];
+ void *vp;
+ extern char *optarg;
+ struct rusage start,finish;
+ double extime;
+ double gain,esn0,ebn0;
+ time_t t;
+ int badframes=0;
+
+ time(&t);
+ srandom(t);
+ ebn0 = -100;
+#if HAVE_GETOPT_LONG
+ while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){
+#else
+ while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){
+#endif
+ switch(d){
+ case 'a':
+ Cpu_mode = ALTIVEC;
+ break;
+ case 'p':
+ Cpu_mode = PORT;
+ break;
+ case 'm':
+ Cpu_mode = MMX;
+ break;
+ case 's':
+ Cpu_mode = SSE;
+ break;
+ case 't':
+ Cpu_mode = SSE2;
+ break;
+ case 'l':
+ framebits = atoi(optarg);
+ break;
+ case 'n':
+ trials = atoi(optarg);
+ break;
+ case 'e':
+ ebn0 = atof(optarg);
+ break;
+ case 'g':
+ Gain = atof(optarg);
+ break;
+ case 'v':
+ Verbose++;
+ break;
+ }
+ }
+ if(framebits > 8*MAXBYTES){
+ fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8);
+ framebits = MAXBYTES*8;
+ }
+ if((vp = create_viterbi615(framebits)) == NULL){
+ printf("create_viterbi615 failed\n");
+ exit(1);
+ }
+ if(ebn0 != -100){
+ esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */
+ /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing
+ * only half the noise power, and the sqrt() converts power to
+ * voltage.
+ */
+ gain = 1./sqrt(0.5/pow(10.,esn0/10.));
+
+ printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain);
+
+ for(tr=0;tr<trials;tr++){
+ /* Encode a frame of random data */
+ for(i=0;i<framebits+14;i++){
+ int bit = (i < framebits) ? (random() & 1) : 0;
+
+ sr = (sr << 1) | bit;
+ bits[i/8] = sr & 0xff;
+ symbols[6*i+0] = addnoise(parity(sr & V615POLYA),gain,Gain,OFFSET,CLIP);
+ symbols[6*i+1] = addnoise(parity(sr & V615POLYB),gain,Gain,OFFSET,CLIP);
+ symbols[6*i+2] = addnoise(parity(sr & V615POLYC),gain,Gain,OFFSET,CLIP);
+ symbols[6*i+3] = addnoise(parity(sr & V615POLYD),gain,Gain,OFFSET,CLIP);
+ symbols[6*i+4] = addnoise(parity(sr & V615POLYE),gain,Gain,OFFSET,CLIP);
+ symbols[6*i+5] = addnoise(parity(sr & V615POLYF),gain,Gain,OFFSET,CLIP);
+ }
+ /* Decode it and make sure we get the right answer */
+ /* Initialize Viterbi decoder */
+ init_viterbi615(vp,0);
+
+ /* Decode block */
+ update_viterbi615_blk(vp,symbols,framebits+14);
+
+ /* Do Viterbi chainback */
+ chainback_viterbi615(vp,data,framebits,0);
+ errcnt = 0;
+ for(i=0;i<framebits/8;i++){
+ int e = Bitcnt[xordata[i] = data[i] ^ bits[i]];
+ errcnt += e;
+ tot_errs += e;
+ }
+ if(errcnt != 0)
+ badframes++;
+ if(Verbose > 1 && errcnt != 0){
+ printf("frame %d, %d errors: ",tr,errcnt);
+ for(i=0;i<framebits/8;i++){
+ printf("%02x",xordata[i]);
+ }
+ printf("\n");
+ }
+ if(Verbose)
+ printf("BER %d/%d (%10.3g) FER %d/%d (%10.3g)\r",
+ tot_errs,framebits*(tr+1),tot_errs/((double)framebits*(tr+1)),
+ badframes,(tr+1),(double)badframes/(tr+1));
+ fflush(stdout);
+
+ }
+
+ if(Verbose > 1)
+ printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain);
+ else if(Verbose == 0)
+ printf("BER %d/%d (%.3g) FER %d/%d (%.3g)\n",
+ tot_errs,framebits*(tr+1),tot_errs/((double)framebits*(tr+1)),
+ badframes,(tr+1),(double)badframes/(tr+1));
+ else
+ printf("\n");
+ } else {
+ /* Do time trials */
+ memset(symbols,127,sizeof(symbols));
+ printf("Starting time trials\n");
+ getrusage(RUSAGE_SELF,&start);
+ for(tr=0;tr < trials;tr++){
+ /* Initialize Viterbi decoder */
+ init_viterbi615(vp,0);
+
+ /* Decode block */
+ update_viterbi615_blk(vp,symbols,framebits+14);
+
+ /* Do Viterbi chainback */
+ chainback_viterbi615(vp,data,framebits,0);
+ }
+ getrusage(RUSAGE_SELF,&finish);
+ extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec);
+ printf("Execution time for %d %d-bit frames: %.2f sec\n",trials,
+ framebits,extime);
+ printf("decoder speed: %g bits/s\n",trials*framebits/extime);
+ }
+ exit(0);
+}