diff options
author | android-build-team Robot <android-build-team-robot@google.com> | 2018-01-18 08:24:39 +0000 |
---|---|---|
committer | android-build-team Robot <android-build-team-robot@google.com> | 2018-01-18 08:24:39 +0000 |
commit | 68e936ed49dfff9cd0e5c0bca9f57509778fedb1 (patch) | |
tree | 9bf3a00fa4593bf19a63b5a8b6ee6ec33b2fe477 | |
parent | 16f3e7749963deb2c99e73263bd4b5c50c37f6dd (diff) | |
parent | 2a2f59be31eda0c290bf1c6e17dda9a1fc1b25fb (diff) | |
download | flac-pie-cuttlefish-testing.tar.gz |
Snap for 4551672 from 2a2f59be31eda0c290bf1c6e17dda9a1fc1b25fb to pi-releaseandroid-wear-9.0.0_r9android-wear-9.0.0_r8android-wear-9.0.0_r7android-wear-9.0.0_r6android-wear-9.0.0_r5android-wear-9.0.0_r4android-wear-9.0.0_r34android-wear-9.0.0_r33android-wear-9.0.0_r32android-wear-9.0.0_r31android-wear-9.0.0_r30android-wear-9.0.0_r3android-wear-9.0.0_r29android-wear-9.0.0_r28android-wear-9.0.0_r27android-wear-9.0.0_r26android-wear-9.0.0_r25android-wear-9.0.0_r24android-wear-9.0.0_r23android-wear-9.0.0_r22android-wear-9.0.0_r21android-wear-9.0.0_r20android-wear-9.0.0_r2android-wear-9.0.0_r19android-wear-9.0.0_r18android-wear-9.0.0_r17android-wear-9.0.0_r16android-wear-9.0.0_r15android-wear-9.0.0_r14android-wear-9.0.0_r13android-wear-9.0.0_r12android-wear-9.0.0_r11android-wear-9.0.0_r10android-wear-9.0.0_r1android-vts-9.0_r9android-vts-9.0_r8android-vts-9.0_r7android-vts-9.0_r6android-vts-9.0_r5android-vts-9.0_r4android-vts-9.0_r19android-vts-9.0_r18android-vts-9.0_r17android-vts-9.0_r16android-vts-9.0_r15android-vts-9.0_r14android-vts-9.0_r13android-vts-9.0_r12android-vts-9.0_r11android-vts-9.0_r10android-security-9.0.0_r76android-security-9.0.0_r75android-security-9.0.0_r74android-security-9.0.0_r73android-security-9.0.0_r72android-security-9.0.0_r71android-security-9.0.0_r70android-security-9.0.0_r69android-security-9.0.0_r68android-security-9.0.0_r67android-security-9.0.0_r66android-security-9.0.0_r65android-security-9.0.0_r64android-security-9.0.0_r63android-security-9.0.0_r62android-cts-9.0_r9android-cts-9.0_r8android-cts-9.0_r7android-cts-9.0_r6android-cts-9.0_r5android-cts-9.0_r4android-cts-9.0_r3android-cts-9.0_r20android-cts-9.0_r2android-cts-9.0_r19android-cts-9.0_r18android-cts-9.0_r17android-cts-9.0_r16android-cts-9.0_r15android-cts-9.0_r14android-cts-9.0_r13android-cts-9.0_r12android-cts-9.0_r11android-cts-9.0_r10android-cts-9.0_r1android-9.0.0_r9android-9.0.0_r8android-9.0.0_r7android-9.0.0_r61android-9.0.0_r60android-9.0.0_r6android-9.0.0_r59android-9.0.0_r58android-9.0.0_r57android-9.0.0_r56android-9.0.0_r55android-9.0.0_r54android-9.0.0_r53android-9.0.0_r52android-9.0.0_r51android-9.0.0_r50android-9.0.0_r5android-9.0.0_r49android-9.0.0_r48android-9.0.0_r3android-9.0.0_r2android-9.0.0_r18android-9.0.0_r17android-9.0.0_r10android-9.0.0_r1security-pi-releasepie-vts-releasepie-security-releasepie-s2-releasepie-release-2pie-releasepie-r2-s2-releasepie-r2-s1-releasepie-r2-releasepie-platform-releasepie-gsipie-cuttlefish-testingpie-cts-release
Change-Id: I6b15465587e6d1d3a90f7989455c46e33ae7d013
84 files changed, 13474 insertions, 915 deletions
diff --git a/README.version b/README.version index 0f07f97e..7f4074eb 100644 --- a/README.version +++ b/README.version @@ -1,3 +1,3 @@ -URL: http://downloads.xiph.org/releases/flac/flac-1.2.1.tar.gz -Version: 1.2.1 +URL: https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz +Version: 1.3.2 BugComponent: 42195 @@ -1,50 +1,83 @@ -/* config.h. Generated by configure. */ -/* config.h.in. Generated from configure.in by autoheader. */ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define if building universal (internal helper macro) */ +/* #undef AC_APPLE_UNIVERSAL_BUILD */ + +/* Target processor is big endian. */ +#define CPU_IS_BIG_ENDIAN 0 + +/* Target processor is little endian. */ +#define CPU_IS_LITTLE_ENDIAN 1 + +/* Set FLAC__BYTES_PER_WORD to 8 (4 is the default) */ +#define ENABLE_64_BIT_WORDS 0 /* define to align allocated memory on 32-byte boundaries */ #define FLAC__ALIGN_MALLOC_DATA 1 /* define if building for ia32/i386 */ -/* #define FLAC__CPU_IA32 1 */ +/* #undef FLAC__CPU_IA32 */ /* define if building for PowerPC */ /* #undef FLAC__CPU_PPC */ +/* define if building for PowerPC with SPE ABI */ +/* #undef FLAC__CPU_PPC_SPE */ + /* define if building for SPARC */ /* #undef FLAC__CPU_SPARC */ -/* define if you are compiling for PowerPC and have the 'as' assembler */ -/* #undef FLAC__HAS_AS */ +/* define if building for x86_64 */ +/* #undef FLAC__CPU_X86_64 */ /* define if you have docbook-to-man or docbook2man */ -#define FLAC__HAS_DOCBOOK_TO_MAN 1 - -/* define if you are compiling for PowerPC and have the 'gas' assembler */ -/* #define FLAC__HAS_GAS 0 */ +/* #undef FLAC__HAS_DOCBOOK_TO_MAN */ /* define if you are compiling for x86 and have the NASM assembler */ -/* #define FLAC__HAS_NASM 0 */ +/* #undef FLAC__HAS_NASM */ /* define if you have the ogg library */ -/* #undef FLAC__HAS_OGG */ +#define FLAC__HAS_OGG 0 + +/* Set to 1 if <x86intrin.h> is available. */ +#define FLAC__HAS_X86INTRIN 1 /* define to disable use of assembly code */ #define FLAC__NO_ASM 1 -/* define if your operating system supports SSE instructions */ -/* #undef FLAC__SSE_OS */ - /* define if building for Darwin / MacOS X */ /* #undef FLAC__SYS_DARWIN */ /* define if building for Linux */ -/* #undef FLAC__SYS_LINUX */ - -/* define to enable use of 3Dnow! instructions */ -/* #define FLAC__USE_3DNOW 0 */ +#define FLAC__SYS_LINUX 1 /* define to enable use of Altivec instructions */ -/* #define FLAC__USE_ALTIVEC 0 */ +/* #define FLAC__USE_ALTIVEC 1 */ + +/* define to enable use of AVX instructions */ +/* #define FLAC__USE_AVX 1 */ + +/* Compiler has the __builtin_bswap16 intrinsic */ +#define HAVE_BSWAP16 1 + +/* Compiler has the __builtin_bswap32 intrinsic */ +#define HAVE_BSWAP32 1 + +/* Define to 1 if you have the <byteswap.h> header file. */ +#define HAVE_BYTESWAP_H 1 + +/* define if you have clock_gettime */ +#define HAVE_CLOCK_GETTIME 1 + +/* Define to 1 if you have the <cpuid.h> header file. */ +/* #undef HAVE_CPUID_H */ + +/* Define to 1 if C++ supports variable-length arrays. */ +#define HAVE_CXX_VARARRAYS 1 + +/* Define to 1 if C supports variable-length arrays. */ +#define HAVE_C_VARARRAYS 1 /* Define to 1 if you have the <dlfcn.h> header file. */ #define HAVE_DLFCN_H 1 @@ -55,14 +88,17 @@ /* Define to 1 if you have the `getopt_long' function. */ #define HAVE_GETOPT_LONG 1 -/* Define if you have the iconv() function. */ +/* Define if you have the iconv() function and it works. */ #define HAVE_ICONV 1 /* Define to 1 if you have the <inttypes.h> header file. */ #define HAVE_INTTYPES_H 1 /* Define if you have <langinfo.h> and nl_langinfo(CODESET). */ -/* #define HAVE_LANGINFO_CODESET 0 */ +#define HAVE_LANGINFO_CODESET 1 + +/* lround support */ +#define HAVE_LROUND 1 /* Define to 1 if you have the <memory.h> header file. */ #define HAVE_MEMORY_H 1 @@ -82,37 +118,65 @@ /* Define to 1 if you have the <string.h> header file. */ #define HAVE_STRING_H 1 +/* Define to 1 if you have the <sys/ioctl.h> header file. */ +#define HAVE_SYS_IOCTL_H 1 + +/* Define to 1 if you have the <sys/param.h> header file. */ +#define HAVE_SYS_PARAM_H 1 + /* Define to 1 if you have the <sys/stat.h> header file. */ #define HAVE_SYS_STAT_H 1 /* Define to 1 if you have the <sys/types.h> header file. */ #define HAVE_SYS_TYPES_H 1 +/* Define to 1 if you have the <termios.h> header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if typeof works with your compiler. */ +#define HAVE_TYPEOF 1 + /* Define to 1 if you have the <unistd.h> header file. */ #define HAVE_UNISTD_H 1 +/* Define to 1 if you have the <x86intrin.h> header file. */ +/* #undef HAVE_X86INTRIN_H */ + /* Define as const if the declaration of iconv() needs const. */ #define ICONV_CONST +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + /* Name of package */ #define PACKAGE "flac" /* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" +#define PACKAGE_BUGREPORT "flac-dev@xiph.org" /* Define to the full name of this package. */ -#define PACKAGE_NAME "" +#define PACKAGE_NAME "flac" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "" +#define PACKAGE_STRING "flac 1.3.2" /* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "" +#define PACKAGE_TARNAME "flac" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "https://www.xiph.org/flac/" /* Define to the version of this package. */ -#define PACKAGE_VERSION "" +#define PACKAGE_VERSION "1.3.2" -/* The size of a `void*', as computed by sizeof. */ +/* The size of `off_t', as computed by sizeof. */ +#if __LP64__ +#define SIZEOF_OFF_T 8 +#else +#define SIZEOF_OFF_T 4 +#endif + +/* The size of `void*', as computed by sizeof. */ #if __LP64__ #define SIZEOF_VOIDP 8 #else @@ -122,18 +186,63 @@ /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif + + /* Version number of package */ -#define VERSION "1.3.1" +#define VERSION "1.3.2" -/* Define to 1 if your processor stores words with the most significant byte - first (like Motorola and SPARC, unlike Intel and VAX). */ -/* #undef WORDS_BIGENDIAN */ +/* Target processor is big endian. */ +#define WORDS_BIGENDIAN 0 + +/* Enable large inode numbers on Mac OS X 10.5. */ +#ifndef _DARWIN_USE_64_BIT_INODE +# define _DARWIN_USE_64_BIT_INODE 1 +#endif /* Number of bits in a file offset, on hosts where this is settable. */ -/* #define _FILE_OFFSET_BITS 64 */ +/* #undef _FILE_OFFSET_BITS */ /* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ /* #undef _LARGEFILE_SOURCE */ /* Define for large files, on AIX-style hosts. */ /* #undef _LARGE_FILES */ + +/* Define to 1 if on MINIX. */ +/* #undef _MINIX */ + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +/* #undef _POSIX_1_SOURCE */ + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +/* #undef _POSIX_SOURCE */ + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + +/* Define to __typeof__ if your compiler spells it that way. */ +/* #undef typeof */ diff --git a/include/FLAC/all.h b/include/FLAC/all.h new file mode 100644 index 00000000..11d47d79 --- /dev/null +++ b/include/FLAC/all.h @@ -0,0 +1,371 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FLAC__ALL_H +#define FLAC__ALL_H + +#include "export.h" + +#include "assert.h" +#include "callback.h" +#include "format.h" +#include "metadata.h" +#include "ordinals.h" +#include "stream_decoder.h" +#include "stream_encoder.h" + +/** \mainpage + * + * \section intro Introduction + * + * This is the documentation for the FLAC C and C++ APIs. It is + * highly interconnected; this introduction should give you a top + * level idea of the structure and how to find the information you + * need. As a prerequisite you should have at least a basic + * knowledge of the FLAC format, documented + * <A HREF="../format.html">here</A>. + * + * \section c_api FLAC C API + * + * The FLAC C API is the interface to libFLAC, a set of structures + * describing the components of FLAC streams, and functions for + * encoding and decoding streams, as well as manipulating FLAC + * metadata in files. The public include files will be installed + * in your include area (for example /usr/include/FLAC/...). + * + * By writing a little code and linking against libFLAC, it is + * relatively easy to add FLAC support to another program. The + * library is licensed under <A HREF="../license.html">Xiph's BSD license</A>. + * Complete source code of libFLAC as well as the command-line + * encoder and plugins is available and is a useful source of + * examples. + * + * Aside from encoders and decoders, libFLAC provides a powerful + * metadata interface for manipulating metadata in FLAC files. It + * allows the user to add, delete, and modify FLAC metadata blocks + * and it can automatically take advantage of PADDING blocks to avoid + * rewriting the entire FLAC file when changing the size of the + * metadata. + * + * libFLAC usually only requires the standard C library and C math + * library. In particular, threading is not used so there is no + * dependency on a thread library. However, libFLAC does not use + * global variables and should be thread-safe. + * + * libFLAC also supports encoding to and decoding from Ogg FLAC. + * However the metadata editing interfaces currently have limited + * read-only support for Ogg FLAC files. + * + * \section cpp_api FLAC C++ API + * + * The FLAC C++ API is a set of classes that encapsulate the + * structures and functions in libFLAC. They provide slightly more + * functionality with respect to metadata but are otherwise + * equivalent. For the most part, they share the same usage as + * their counterparts in libFLAC, and the FLAC C API documentation + * can be used as a supplement. The public include files + * for the C++ API will be installed in your include area (for + * example /usr/include/FLAC++/...). + * + * libFLAC++ is also licensed under + * <A HREF="../license.html">Xiph's BSD license</A>. + * + * \section getting_started Getting Started + * + * A good starting point for learning the API is to browse through + * the <A HREF="modules.html">modules</A>. Modules are logical + * groupings of related functions or classes, which correspond roughly + * to header files or sections of header files. Each module includes a + * detailed description of the general usage of its functions or + * classes. + * + * From there you can go on to look at the documentation of + * individual functions. You can see different views of the individual + * functions through the links in top bar across this page. + * + * If you prefer a more hands-on approach, you can jump right to some + * <A HREF="../documentation_example_code.html">example code</A>. + * + * \section porting_guide Porting Guide + * + * Starting with FLAC 1.1.3 a \link porting Porting Guide \endlink + * has been introduced which gives detailed instructions on how to + * port your code to newer versions of FLAC. + * + * \section embedded_developers Embedded Developers + * + * libFLAC has grown larger over time as more functionality has been + * included, but much of it may be unnecessary for a particular embedded + * implementation. Unused parts may be pruned by some simple editing of + * src/libFLAC/Makefile.am. In general, the decoders, encoders, and + * metadata interface are all independent from each other. + * + * It is easiest to just describe the dependencies: + * + * - All modules depend on the \link flac_format Format \endlink module. + * - The decoders and encoders depend on the bitbuffer. + * - The decoder is independent of the encoder. The encoder uses the + * decoder because of the verify feature, but this can be removed if + * not needed. + * - Parts of the metadata interface require the stream decoder (but not + * the encoder). + * - Ogg support is selectable through the compile time macro + * \c FLAC__HAS_OGG. + * + * For example, if your application only requires the stream decoder, no + * encoder, and no metadata interface, you can remove the stream encoder + * and the metadata interface, which will greatly reduce the size of the + * library. + * + * Also, there are several places in the libFLAC code with comments marked + * with "OPT:" where a #define can be changed to enable code that might be + * faster on a specific platform. Experimenting with these can yield faster + * binaries. + */ + +/** \defgroup porting Porting Guide for New Versions + * + * This module describes differences in the library interfaces from + * version to version. It assists in the porting of code that uses + * the libraries to newer versions of FLAC. + * + * One simple facility for making porting easier that has been added + * in FLAC 1.1.3 is a set of \c #defines in \c export.h of each + * library's includes (e.g. \c include/FLAC/export.h). The + * \c #defines mirror the libraries' + * <A HREF="http://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning">libtool version numbers</A>, + * e.g. in libFLAC there are \c FLAC_API_VERSION_CURRENT, + * \c FLAC_API_VERSION_REVISION, and \c FLAC_API_VERSION_AGE. + * These can be used to support multiple versions of an API during the + * transition phase, e.g. + * + * \code + * #if !defined(FLAC_API_VERSION_CURRENT) || FLAC_API_VERSION_CURRENT <= 7 + * legacy code + * #else + * new code + * #endif + * \endcode + * + * The source will work for multiple versions and the legacy code can + * easily be removed when the transition is complete. + * + * Another available symbol is FLAC_API_SUPPORTS_OGG_FLAC (defined in + * include/FLAC/export.h), which can be used to determine whether or not + * the library has been compiled with support for Ogg FLAC. This is + * simpler than trying to call an Ogg init function and catching the + * error. + */ + +/** \defgroup porting_1_1_2_to_1_1_3 Porting from FLAC 1.1.2 to 1.1.3 + * \ingroup porting + * + * \brief + * This module describes porting from FLAC 1.1.2 to FLAC 1.1.3. + * + * The main change between the APIs in 1.1.2 and 1.1.3 is that they have + * been simplified. First, libOggFLAC has been merged into libFLAC and + * libOggFLAC++ has been merged into libFLAC++. Second, both the three + * decoding layers and three encoding layers have been merged into a + * single stream decoder and stream encoder. That is, the functionality + * of FLAC__SeekableStreamDecoder and FLAC__FileDecoder has been merged + * into FLAC__StreamDecoder, and FLAC__SeekableStreamEncoder and + * FLAC__FileEncoder into FLAC__StreamEncoder. Only the + * FLAC__StreamDecoder and FLAC__StreamEncoder remain. What this means + * is there is now a single API that can be used to encode or decode + * streams to/from native FLAC or Ogg FLAC and the single API can work + * on both seekable and non-seekable streams. + * + * Instead of creating an encoder or decoder of a certain layer, now the + * client will always create a FLAC__StreamEncoder or + * FLAC__StreamDecoder. The old layers are now differentiated by the + * initialization function. For example, for the decoder, + * FLAC__stream_decoder_init() has been replaced by + * FLAC__stream_decoder_init_stream(). This init function takes + * callbacks for the I/O, and the seeking callbacks are optional. This + * allows the client to use the same object for seekable and + * non-seekable streams. For decoding a FLAC file directly, the client + * can use FLAC__stream_decoder_init_file() and pass just a filename + * and fewer callbacks; most of the other callbacks are supplied + * internally. For situations where fopen()ing by filename is not + * possible (e.g. Unicode filenames on Windows) the client can instead + * open the file itself and supply the FILE* to + * FLAC__stream_decoder_init_FILE(). The init functions now returns a + * FLAC__StreamDecoderInitStatus instead of FLAC__StreamDecoderState. + * Since the callbacks and client data are now passed to the init + * function, the FLAC__stream_decoder_set_*_callback() functions and + * FLAC__stream_decoder_set_client_data() are no longer needed. The + * rest of the calls to the decoder are the same as before. + * + * There are counterpart init functions for Ogg FLAC, e.g. + * FLAC__stream_decoder_init_ogg_stream(). All the rest of the calls + * and callbacks are the same as for native FLAC. + * + * As an example, in FLAC 1.1.2 a seekable stream decoder would have + * been set up like so: + * + * \code + * FLAC__SeekableStreamDecoder *decoder = FLAC__seekable_stream_decoder_new(); + * if(decoder == NULL) do_something; + * FLAC__seekable_stream_decoder_set_md5_checking(decoder, true); + * [... other settings ...] + * FLAC__seekable_stream_decoder_set_read_callback(decoder, my_read_callback); + * FLAC__seekable_stream_decoder_set_seek_callback(decoder, my_seek_callback); + * FLAC__seekable_stream_decoder_set_tell_callback(decoder, my_tell_callback); + * FLAC__seekable_stream_decoder_set_length_callback(decoder, my_length_callback); + * FLAC__seekable_stream_decoder_set_eof_callback(decoder, my_eof_callback); + * FLAC__seekable_stream_decoder_set_write_callback(decoder, my_write_callback); + * FLAC__seekable_stream_decoder_set_metadata_callback(decoder, my_metadata_callback); + * FLAC__seekable_stream_decoder_set_error_callback(decoder, my_error_callback); + * FLAC__seekable_stream_decoder_set_client_data(decoder, my_client_data); + * if(FLAC__seekable_stream_decoder_init(decoder) != FLAC__SEEKABLE_STREAM_DECODER_OK) do_something; + * \endcode + * + * In FLAC 1.1.3 it is like this: + * + * \code + * FLAC__StreamDecoder *decoder = FLAC__stream_decoder_new(); + * if(decoder == NULL) do_something; + * FLAC__stream_decoder_set_md5_checking(decoder, true); + * [... other settings ...] + * if(FLAC__stream_decoder_init_stream( + * decoder, + * my_read_callback, + * my_seek_callback, // or NULL + * my_tell_callback, // or NULL + * my_length_callback, // or NULL + * my_eof_callback, // or NULL + * my_write_callback, + * my_metadata_callback, // or NULL + * my_error_callback, + * my_client_data + * ) != FLAC__STREAM_DECODER_INIT_STATUS_OK) do_something; + * \endcode + * + * or you could do; + * + * \code + * [...] + * FILE *file = fopen("somefile.flac","rb"); + * if(file == NULL) do_somthing; + * if(FLAC__stream_decoder_init_FILE( + * decoder, + * file, + * my_write_callback, + * my_metadata_callback, // or NULL + * my_error_callback, + * my_client_data + * ) != FLAC__STREAM_DECODER_INIT_STATUS_OK) do_something; + * \endcode + * + * or just: + * + * \code + * [...] + * if(FLAC__stream_decoder_init_file( + * decoder, + * "somefile.flac", + * my_write_callback, + * my_metadata_callback, // or NULL + * my_error_callback, + * my_client_data + * ) != FLAC__STREAM_DECODER_INIT_STATUS_OK) do_something; + * \endcode + * + * Another small change to the decoder is in how it handles unparseable + * streams. Before, when the decoder found an unparseable stream + * (reserved for when the decoder encounters a stream from a future + * encoder that it can't parse), it changed the state to + * \c FLAC__STREAM_DECODER_UNPARSEABLE_STREAM. Now the decoder instead + * drops sync and calls the error callback with a new error code + * \c FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM. This is + * more robust. If your error callback does not discriminate on the the + * error state, your code does not need to be changed. + * + * The encoder now has a new setting: + * FLAC__stream_encoder_set_apodization(). This is for setting the + * method used to window the data before LPC analysis. You only need to + * add a call to this function if the default is not suitable. There + * are also two new convenience functions that may be useful: + * FLAC__metadata_object_cuesheet_calculate_cddb_id() and + * FLAC__metadata_get_cuesheet(). + * + * The \a bytes parameter to FLAC__StreamDecoderReadCallback, + * FLAC__StreamEncoderReadCallback, and FLAC__StreamEncoderWriteCallback + * is now \c size_t instead of \c unsigned. + */ + +/** \defgroup porting_1_1_3_to_1_1_4 Porting from FLAC 1.1.3 to 1.1.4 + * \ingroup porting + * + * \brief + * This module describes porting from FLAC 1.1.3 to FLAC 1.1.4. + * + * There were no changes to any of the interfaces from 1.1.3 to 1.1.4. + * There was a slight change in the implementation of + * FLAC__stream_encoder_set_metadata(); the function now makes a copy + * of the \a metadata array of pointers so the client no longer needs + * to maintain it after the call. The objects themselves that are + * pointed to by the array are still not copied though and must be + * maintained until the call to FLAC__stream_encoder_finish(). + */ + +/** \defgroup porting_1_1_4_to_1_2_0 Porting from FLAC 1.1.4 to 1.2.0 + * \ingroup porting + * + * \brief + * This module describes porting from FLAC 1.1.4 to FLAC 1.2.0. + * + * There were only very minor changes to the interfaces from 1.1.4 to 1.2.0. + * In libFLAC, \c FLAC__format_sample_rate_is_subset() was added. + * In libFLAC++, \c FLAC::Decoder::Stream::get_decode_position() was added. + * + * Finally, value of the constant \c FLAC__FRAME_HEADER_RESERVED_LEN + * has changed to reflect the conversion of one of the reserved bits + * into active use. It used to be \c 2 and now is \c 1. However the + * FLAC frame header length has not changed, so to skip the proper + * number of bits, use \c FLAC__FRAME_HEADER_RESERVED_LEN + + * \c FLAC__FRAME_HEADER_BLOCKING_STRATEGY_LEN + */ + +/** \defgroup flac FLAC C API + * + * The FLAC C API is the interface to libFLAC, a set of structures + * describing the components of FLAC streams, and functions for + * encoding and decoding streams, as well as manipulating FLAC + * metadata in files. + * + * You should start with the format components as all other modules + * are dependent on it. + */ + +#endif diff --git a/include/FLAC/assert.h b/include/FLAC/assert.h index dc9bcef4..b546fd07 100644 --- a/include/FLAC/assert.h +++ b/include/FLAC/assert.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2001-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/include/FLAC/callback.h b/include/FLAC/callback.h index ce8787ff..f942dd25 100644 --- a/include/FLAC/callback.h +++ b/include/FLAC/callback.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2004-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/include/FLAC/export.h b/include/FLAC/export.h index 9cc9e137..d52f0bbb 100644 --- a/include/FLAC/export.h +++ b/include/FLAC/export.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/include/FLAC/format.h b/include/FLAC/format.h index 7424565b..c087d4a7 100644 --- a/include/FLAC/format.h +++ b/include/FLAC/format.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -512,8 +512,8 @@ typedef enum { FLAC__METADATA_TYPE_UNDEFINED = 7, /**< marker to denote beginning of undefined type range; this number will increase as new metadata types are added */ - FLAC__MAX_METADATA_TYPE = FLAC__MAX_METADATA_TYPE_CODE, - /**< No type will ever be greater than this. There is not enough room in the protocol block. */ + FLAC__MAX_METADATA_TYPE = FLAC__MAX_METADATA_TYPE_CODE, + /**< No type will ever be greater than this. There is not enough room in the protocol block. */ } FLAC__MetadataType; /** Maps a FLAC__MetadataType to a C string. diff --git a/include/FLAC/metadata.h b/include/FLAC/metadata.h index b8ad8b4d..4e18cd68 100644 --- a/include/FLAC/metadata.h +++ b/include/FLAC/metadata.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2001-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/include/FLAC/ordinals.h b/include/FLAC/ordinals.h index b1e1acfc..ea52ea63 100644 --- a/include/FLAC/ordinals.h +++ b/include/FLAC/ordinals.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/include/FLAC/stream_decoder.h b/include/FLAC/stream_decoder.h index 9bfdd1f5..39c958db 100644 --- a/include/FLAC/stream_decoder.h +++ b/include/FLAC/stream_decoder.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -228,7 +228,7 @@ typedef enum { */ FLAC__STREAM_DECODER_ABORTED, - /**< The decoder was aborted by the read callback. */ + /**< The decoder was aborted by the read or write callback. */ FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR, /**< An error occurred allocating memory. The decoder is in an invalid diff --git a/include/FLAC/stream_encoder.h b/include/FLAC/stream_encoder.h index efc213ae..40a2fd36 100644 --- a/include/FLAC/stream_encoder.h +++ b/include/FLAC/stream_encoder.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/include/share/alloc.h b/include/share/alloc.h index 3b707491..914de9ba 100644 --- a/include/share/alloc.h +++ b/include/share/alloc.h @@ -1,6 +1,6 @@ /* alloc - Convenience routines for safely allocating memory * Copyright (C) 2007-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -52,7 +52,7 @@ # ifndef SIZE_T_MAX # ifdef _MSC_VER # ifdef _WIN64 -# define SIZE_T_MAX 0xffffffffffffffffui64 +# define SIZE_T_MAX FLAC__U64L(0xffffffffffffffff) # else # define SIZE_T_MAX 0xffffffff # endif diff --git a/include/share/compat.h b/include/share/compat.h index 671b1107..2083f3a2 100644 --- a/include/share/compat.h +++ b/include/share/compat.h @@ -1,5 +1,5 @@ /* libFLAC - Free Lossless Audio Codec library - * Copyright (C) 2012-2014 Xiph.org Foundation + * Copyright (C) 2012-2016 Xiph.org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -117,7 +117,9 @@ #endif #if defined _MSC_VER -# if _MSC_VER >= 1600 +# if _MSC_VER >= 1800 +# include <inttypes.h> +# elif _MSC_VER >= 1600 /* Visual Studio 2010 has decent C99 support */ # include <stdint.h> # define PRIu64 "llu" @@ -144,23 +146,26 @@ #ifdef _WIN32 /* All char* strings are in UTF-8 format. Added to support Unicode files on Windows */ -#include "share/win_utf8_io.h" +#include "share/win_utf8_io.h" #define flac_printf printf_utf8 #define flac_fprintf fprintf_utf8 #define flac_vfprintf vfprintf_utf8 -#define flac_fopen fopen_utf8 -#define flac_chmod chmod_utf8 -#define flac_utime utime_utf8 -#define flac_unlink unlink_utf8 -#define flac_rename rename_utf8 -#define flac_stat _stat64_utf8 + +#include "share/windows_unicode_filenames.h" +#define flac_fopen flac_internal_fopen_utf8 +#define flac_chmod flac_internal_chmod_utf8 +#define flac_utime flac_internal_utime_utf8 +#define flac_unlink flac_internal_unlink_utf8 +#define flac_rename flac_internal_rename_utf8 +#define flac_stat flac_internal_stat64_utf8 #else #define flac_printf printf #define flac_fprintf fprintf #define flac_vfprintf vfprintf + #define flac_fopen fopen #define flac_chmod chmod #define flac_utime utime diff --git a/include/share/endswap.h b/include/share/endswap.h index 4fde4c15..9088a747 100644 --- a/include/share/endswap.h +++ b/include/share/endswap.h @@ -1,5 +1,5 @@ /* libFLAC - Free Lossless Audio Codec library - * Copyright (C) 2012-2014 Xiph.org Foundation + * Copyright (C) 2012-2016 Xiph.org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -43,11 +43,15 @@ static inline unsigned short __builtin_bswap16(unsigned short a) #define ENDSWAP_16(x) (__builtin_bswap16 (x)) #define ENDSWAP_32(x) (__builtin_bswap32 (x)) +#define ENDSWAP_64(x) (__builtin_bswap64 (x)) -#elif defined _MSC_VER /* Windows. Apparently in <stdlib.h>. */ +#elif defined _MSC_VER /* Windows */ + +#include <stdlib.h> #define ENDSWAP_16(x) (_byteswap_ushort (x)) #define ENDSWAP_32(x) (_byteswap_ulong (x)) +#define ENDSWAP_64(x) (_byteswap_uint64 (x)) #elif defined HAVE_BYTESWAP_H /* Linux */ @@ -55,16 +59,18 @@ static inline unsigned short __builtin_bswap16(unsigned short a) #define ENDSWAP_16(x) (bswap_16 (x)) #define ENDSWAP_32(x) (bswap_32 (x)) +#define ENDSWAP_64(x) (bswap_64 (x)) #else #define ENDSWAP_16(x) ((((x) >> 8) & 0xFF) | (((x) & 0xFF) << 8)) #define ENDSWAP_32(x) ((((x) >> 24) & 0xFF) | (((x) >> 8) & 0xFF00) | (((x) & 0xFF00) << 8) | (((x) & 0xFF) << 24)) +#define ENDSWAP_64(x) ((ENDSWAP_32(((x) >> 32) & 0xFFFFFFFF)) | (ENDSWAP_32((x) & 0xFFFFFFFF) << 32)) #endif -/* Host to little-endian byte swapping. */ +/* Host to little-endian byte swapping (for MD5 calculation) */ #if CPU_IS_BIG_ENDIAN #define H2LE_16(x) ENDSWAP_16 (x) diff --git a/include/share/getopt.h b/include/share/getopt.h new file mode 100644 index 00000000..66aced0f --- /dev/null +++ b/include/share/getopt.h @@ -0,0 +1,184 @@ +/* + NOTE: + I cannot get the vanilla getopt code to work (i.e. compile only what + is needed and not duplicate symbols found in the standard library) + on all the platforms that FLAC supports. In particular the gating + of code with the ELIDE_CODE #define is not accurate enough on systems + that are POSIX but not glibc. If someone has a patch that works on + GNU/Linux, Darwin, AND Solaris please submit it on the project page: + https://sourceforge.net/p/flac/patches/ + + In the meantime I have munged the global symbols and removed gates + around code, while at the same time trying to touch the original as + little as possible. +*/ +/* Declarations for getopt. + Copyright (C) 1989,90,91,92,93,94,96,97,98 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#ifndef SHARE__GETOPT_H +#define SHARE__GETOPT_H + +/*[JEC] was:#ifndef __need_getopt*/ +/*[JEC] was:# define _GETOPT_H 1*/ +/*[JEC] was:#endif*/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `share__getopt' to the caller. + When `share__getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *share__optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `share__getopt'. + + On entry to `share__getopt', zero means this is the first call; initialize. + + When `share__getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `share__optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int share__optind; + +/* Callers store zero here to inhibit the error message `share__getopt' prints + for unrecognized options. */ + +extern int share__opterr; + +/* Set to an option character which was unrecognized. */ + +extern int share__optopt; + +/*[JEC] was:#ifndef __need_getopt */ +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to share__getopt_long or share__getopt_long_only is a vector + of `struct share__option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + share__no_argument (or 0) if the option does not take an argument, + share__required_argument (or 1) if the option requires an argument, + share__optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `share__optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `share__getopt' + returns the contents of the `val' field. */ + +struct share__option +{ +# if defined __STDC__ && __STDC__ + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct share__option'. */ + +# define share__no_argument 0 +# define share__required_argument 1 +# define share__optional_argument 2 +/*[JEC] was:#endif*/ /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `share__optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `share__optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `share__getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `share__getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `share__getopt'. */ + +/*[JEC] was:#if defined __STDC__ && __STDC__*/ +/*[JEC] was:# ifdef __GNU_LIBRARY__*/ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int share__getopt (int argc, char *const *argv, const char *shortopts); +/*[JEC] was:# else*/ /* not __GNU_LIBRARY__ */ +/*[JEC] was:extern int getopt ();*/ +/*[JEC] was:# endif*/ /* __GNU_LIBRARY__ */ + +/*[JEC] was:# ifndef __need_getopt*/ +extern int share__getopt_long (int argc, char *const *argv, const char *shortopts, + const struct share__option *longopts, int *longind); +extern int share__getopt_long_only (int argc, char *const *argv, + const char *shortopts, + const struct share__option *longopts, int *longind); + +/* Internal only. Users should not call this directly. */ +extern int share___getopt_internal (int argc, char *const *argv, + const char *shortopts, + const struct share__option *longopts, int *longind, + int long_only); +/*[JEC] was:# endif*/ +/*[JEC] was:#else*/ /* not __STDC__ */ +/*[JEC] was:extern int getopt ();*/ +/*[JEC] was:# ifndef __need_getopt*/ +/*[JEC] was:extern int getopt_long ();*/ +/*[JEC] was:extern int getopt_long_only ();*/ + +/*[JEC] was:extern int _getopt_internal ();*/ +/*[JEC] was:# endif*/ +/*[JEC] was:#endif*/ /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +/*[JEC] was:#undef __need_getopt*/ + +#endif /* getopt.h */ diff --git a/include/share/grabbag.h b/include/share/grabbag.h new file mode 100644 index 00000000..92ec998b --- /dev/null +++ b/include/share/grabbag.h @@ -0,0 +1,30 @@ +/* grabbag - Convenience lib for various routines common to several tools + * Copyright (C) 2002-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SHARE__GRABBAG_H +#define SHARE__GRABBAG_H + +/* These can't be included by themselves, only from within grabbag.h */ +#include "grabbag/cuesheet.h" +#include "grabbag/file.h" +#include "grabbag/picture.h" +#include "grabbag/replaygain.h" +#include "grabbag/seektable.h" + +#endif diff --git a/include/share/grabbag/cuesheet.h b/include/share/grabbag/cuesheet.h new file mode 100644 index 00000000..b465ae6a --- /dev/null +++ b/include/share/grabbag/cuesheet.h @@ -0,0 +1,43 @@ +/* grabbag - Convenience lib for various routines common to several tools + * Copyright (C) 2002-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* This .h cannot be included by itself; #include "share/grabbag.h" instead. */ + +#ifndef GRABBAG__CUESHEET_H +#define GRABBAG__CUESHEET_H + +#include <stdio.h> +#include "FLAC/metadata.h" + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned grabbag__cuesheet_msf_to_frame(unsigned minutes, unsigned seconds, unsigned frames); +void grabbag__cuesheet_frame_to_msf(unsigned frame, unsigned *minutes, unsigned *seconds, unsigned *frames); + +FLAC__StreamMetadata *grabbag__cuesheet_parse(FILE *file, const char **error_message, unsigned *last_line_read, unsigned sample_rate, FLAC__bool is_cdda, FLAC__uint64 lead_out_offset); + +void grabbag__cuesheet_emit(FILE *file, const FLAC__StreamMetadata *cuesheet, const char *file_reference); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/share/grabbag/file.h b/include/share/grabbag/file.h new file mode 100644 index 00000000..b3f41484 --- /dev/null +++ b/include/share/grabbag/file.h @@ -0,0 +1,65 @@ +/* grabbag - Convenience lib for various routines common to several tools + * Copyright (C) 2002-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Convenience routines for manipulating files */ + +/* This .h cannot be included by itself; #include "share/grabbag.h" instead. */ + +#ifndef GRABAG__FILE_H +#define GRABAG__FILE_H + +/* needed because of off_t */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <sys/types.h> /* for off_t */ +#include <stdio.h> /* for FILE */ +#include "FLAC/ordinals.h" +#include "share/compat.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void grabbag__file_copy_metadata(const char *srcpath, const char *destpath); +FLAC__off_t grabbag__file_get_filesize(const char *srcpath); +const char *grabbag__file_get_basename(const char *srcpath); + +/* read_only == false means "make file writable by user" + * read_only == true means "make file read-only for everyone" + */ +FLAC__bool grabbag__file_change_stats(const char *filename, FLAC__bool read_only); + +/* returns true iff stat() succeeds for both files and they have the same device and inode. */ +/* on windows, uses GetFileInformationByHandle() to compare */ +FLAC__bool grabbag__file_are_same(const char *f1, const char *f2); + +/* attempts to make writable before unlinking */ +FLAC__bool grabbag__file_remove_file(const char *filename); + +/* these will forcibly set stdin/stdout to binary mode (for OSes that require it) */ +FILE *grabbag__file_get_binary_stdin(void); +FILE *grabbag__file_get_binary_stdout(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/share/grabbag/picture.h b/include/share/grabbag/picture.h new file mode 100644 index 00000000..ea308f1e --- /dev/null +++ b/include/share/grabbag/picture.h @@ -0,0 +1,54 @@ +/* grabbag - Convenience lib for various routines common to several tools + * Copyright (C) 2006-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* This .h cannot be included by itself; #include "share/grabbag.h" instead. */ + +#ifndef GRABBAG__PICTURE_H +#define GRABBAG__PICTURE_H + +#include "FLAC/metadata.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* spec should be of the form "[TYPE]|MIME_TYPE|[DESCRIPTION]|[WIDTHxHEIGHTxDEPTH[/COLORS]]|FILE", e.g. + * "|image/jpeg|||cover.jpg" + * "4|image/jpeg||300x300x24|backcover.jpg" + * "|image/png|description|300x300x24/71|cover.png" + * "-->|image/gif||300x300x24/71|http://blah.blah.blah/cover.gif" + * + * empty type means default to FLAC__STREAM_METADATA_PICTURE_TYPE_FRONT_COVER + * empty resolution spec means to get from the file (cannot get used with "-->" linked images) + * spec and error_message must not be NULL + */ +FLAC__StreamMetadata *grabbag__picture_parse_specification(const char *spec, const char **error_message); + +typedef struct PictureResolution +{ uint32_t width, height, depth, colors ; +} PictureResolution ; + +FLAC__StreamMetadata *grabbag__picture_from_specification(int type, const char *mime_type, const char * description, + const PictureResolution * res, const char * filepath, const char **error_message); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/share/grabbag/replaygain.h b/include/share/grabbag/replaygain.h new file mode 100644 index 00000000..faa32720 --- /dev/null +++ b/include/share/grabbag/replaygain.h @@ -0,0 +1,73 @@ +/* grabbag - Convenience lib for various routines common to several tools + * Copyright (C) 2002-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * This wraps the replaygain_analysis lib, which is LGPL. This wrapper + * allows analysis of different input resolutions by automatically + * scaling the input signal + */ + +/* This .h cannot be included by itself; #include "share/grabbag.h" instead. */ + +#ifndef GRABBAG__REPLAYGAIN_H +#define GRABBAG__REPLAYGAIN_H + +#include "FLAC/metadata.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const unsigned GRABBAG__REPLAYGAIN_MAX_TAG_SPACE_REQUIRED; + +extern const FLAC__byte * const GRABBAG__REPLAYGAIN_TAG_REFERENCE_LOUDNESS; /* = "REPLAYGAIN_REFERENCE_LOUDNESS" */ +extern const FLAC__byte * const GRABBAG__REPLAYGAIN_TAG_TITLE_GAIN; /* = "REPLAYGAIN_TRACK_GAIN" */ +extern const FLAC__byte * const GRABBAG__REPLAYGAIN_TAG_TITLE_PEAK; /* = "REPLAYGAIN_TRACK_PEAK" */ +extern const FLAC__byte * const GRABBAG__REPLAYGAIN_TAG_ALBUM_GAIN; /* = "REPLAYGAIN_ALBUM_GAIN" */ +extern const FLAC__byte * const GRABBAG__REPLAYGAIN_TAG_ALBUM_PEAK; /* = "REPLAYGAIN_ALBUM_PEAK" */ + +FLAC__bool grabbag__replaygain_is_valid_sample_frequency(unsigned sample_frequency); + +FLAC__bool grabbag__replaygain_init(unsigned sample_frequency); + +/* 'bps' must be valid for FLAC, i.e. >=4 and <= 32 */ +FLAC__bool grabbag__replaygain_analyze(const FLAC__int32 * const input[], FLAC__bool is_stereo, unsigned bps, unsigned samples); + +void grabbag__replaygain_get_album(float *gain, float *peak); +void grabbag__replaygain_get_title(float *gain, float *peak); + +/* These three functions return an error string on error, or NULL if successful */ +const char *grabbag__replaygain_analyze_file(const char *filename, float *title_gain, float *title_peak); +const char *grabbag__replaygain_store_to_vorbiscomment(FLAC__StreamMetadata *block, float album_gain, float album_peak, float title_gain, float title_peak); +const char *grabbag__replaygain_store_to_vorbiscomment_reference(FLAC__StreamMetadata *block); +const char *grabbag__replaygain_store_to_vorbiscomment_album(FLAC__StreamMetadata *block, float album_gain, float album_peak); +const char *grabbag__replaygain_store_to_vorbiscomment_title(FLAC__StreamMetadata *block, float title_gain, float title_peak); +const char *grabbag__replaygain_store_to_file(const char *filename, float album_gain, float album_peak, float title_gain, float title_peak, FLAC__bool preserve_modtime); +const char *grabbag__replaygain_store_to_file_reference(const char *filename, FLAC__bool preserve_modtime); +const char *grabbag__replaygain_store_to_file_album(const char *filename, float album_gain, float album_peak, FLAC__bool preserve_modtime); +const char *grabbag__replaygain_store_to_file_title(const char *filename, float title_gain, float title_peak, FLAC__bool preserve_modtime); + +FLAC__bool grabbag__replaygain_load_from_vorbiscomment(const FLAC__StreamMetadata *block, FLAC__bool album_mode, FLAC__bool strict, double *reference, double *gain, double *peak); +double grabbag__replaygain_compute_scale_factor(double peak, double gain, double preamp, FLAC__bool prevent_clipping); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/share/grabbag/seektable.h b/include/share/grabbag/seektable.h new file mode 100644 index 00000000..ac294a3e --- /dev/null +++ b/include/share/grabbag/seektable.h @@ -0,0 +1,39 @@ +/* grabbag - Convenience lib for various routines common to several tools + * Copyright (C) 2002-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Convenience routines for working with seek tables */ + +/* This .h cannot be included by itself; #include "share/grabbag.h" instead. */ + +#ifndef GRABAG__SEEKTABLE_H +#define GRABAG__SEEKTABLE_H + +#include "FLAC/format.h" + +#ifdef __cplusplus +extern "C" { +#endif + +FLAC__bool grabbag__seektable_convert_specification_to_template(const char *spec, FLAC__bool only_explicit_placeholders, FLAC__uint64 total_samples_to_encode, unsigned sample_rate, FLAC__StreamMetadata *seektable_template, FLAC__bool *spec_has_real_points); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/share/macros.h b/include/share/macros.h new file mode 100644 index 00000000..20b3ea56 --- /dev/null +++ b/include/share/macros.h @@ -0,0 +1,45 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2013-2016 Xiph.org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <errno.h> + +/* FLAC_CHECK_RETURN : Check the return value of the provided function and + * print an error message if it fails (ie returns a value < 0). + * + * Ideally, a library should not print anything, but this macro is only used + * for things that extremely unlikely to fail, like `chown` to a previoulsy + * saved `uid`. + */ + +#define FLAC_CHECK_RETURN(x) \ + { if ((x) < 0) \ + fprintf (stderr, "%s : %s\n", #x, strerror (errno)) ; \ + } diff --git a/include/share/private.h b/include/share/private.h index 3a500d3c..f7e3b853 100644 --- a/include/share/private.h +++ b/include/share/private.h @@ -1,5 +1,5 @@ /* libFLAC - Free Lossless Audio Codec library - * Copyright (C) 2013-2014 Xiph.org Foundation + * Copyright (C) 2013-2016 Xiph.org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/include/share/replaygain_analysis.h b/include/share/replaygain_analysis.h new file mode 100644 index 00000000..f06a9b29 --- /dev/null +++ b/include/share/replaygain_analysis.h @@ -0,0 +1,59 @@ +/* + * ReplayGainAnalysis - analyzes input samples and give the recommended dB change + * Copyright (C) 2001 David Robinson and Glen Sawyer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * concept and filter values by David Robinson (David@Robinson.org) + * -- blame him if you think the idea is flawed + * coding by Glen Sawyer (glensawyer@hotmail.com) 442 N 700 E, Provo, UT 84606 USA + * -- blame him if you think this runs too slowly, or the coding is otherwise flawed + * minor cosmetic tweaks to integrate with FLAC by Josh Coalson + * + * For an explanation of the concepts and the basic algorithms involved, go to: + * http://www.replaygain.org/ + */ + +#ifndef GAIN_ANALYSIS_H +#define GAIN_ANALYSIS_H + +#include <stddef.h> + +#define GAIN_NOT_ENOUGH_SAMPLES -24601 +#define GAIN_ANALYSIS_ERROR 0 +#define GAIN_ANALYSIS_OK 1 + +#define INIT_GAIN_ANALYSIS_ERROR 0 +#define INIT_GAIN_ANALYSIS_OK 1 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef float flac_float_t; /* Type used for filtering */ + +extern flac_float_t ReplayGainReferenceLoudness; /* in dB SPL, currently == 89.0 */ + +int InitGainAnalysis ( long samplefreq ); +int ValidGainFrequency ( long samplefreq ); +int AnalyzeSamples ( const flac_float_t* left_samples, const flac_float_t* right_samples, size_t num_samples, int num_channels ); +flac_float_t GetTitleGain ( void ); +flac_float_t GetAlbumGain ( void ); + +#ifdef __cplusplus +} +#endif + +#endif /* GAIN_ANALYSIS_H */ diff --git a/include/share/replaygain_synthesis.h b/include/share/replaygain_synthesis.h new file mode 100644 index 00000000..5f4c3ffb --- /dev/null +++ b/include/share/replaygain_synthesis.h @@ -0,0 +1,52 @@ +/* replaygain_synthesis - Routines for applying ReplayGain to a signal + * Copyright (C) 2002-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef FLAC__SHARE__REPLAYGAIN_SYNTHESIS_H +#define FLAC__SHARE__REPLAYGAIN_SYNTHESIS_H + +#include <stdlib.h> /* for size_t */ +#include "FLAC/format.h" + +#define FLAC_SHARE__MAX_SUPPORTED_CHANNELS FLAC__MAX_CHANNELS + +typedef enum { + NOISE_SHAPING_NONE = 0, + NOISE_SHAPING_LOW = 1, + NOISE_SHAPING_MEDIUM = 2, + NOISE_SHAPING_HIGH = 3 +} NoiseShaping; + +typedef struct { + const float* FilterCoeff; + FLAC__uint64 Mask; + double Add; + float Dither; + float ErrorHistory [FLAC_SHARE__MAX_SUPPORTED_CHANNELS] [16]; /* 16th order Noise shaping */ + float DitherHistory [FLAC_SHARE__MAX_SUPPORTED_CHANNELS] [16]; + int LastRandomNumber [FLAC_SHARE__MAX_SUPPORTED_CHANNELS]; + unsigned LastHistoryIndex; + NoiseShaping ShapingType; +} DitherContext; + +void FLAC__replaygain_synthesis__init_dither_context(DitherContext *dither, int bits, int shapingtype); + +/* scale = (float) pow(10., (double)replaygain * 0.05); */ +size_t FLAC__replaygain_synthesis__apply_gain(FLAC__byte *data_out, FLAC__bool little_endian_data_out, FLAC__bool unsigned_data_out, const FLAC__int32 * const input[], unsigned wide_samples, unsigned channels, const unsigned source_bps, const unsigned target_bps, const double scale, const FLAC__bool hard_limit, FLAC__bool do_dithering, DitherContext *dither_context); + +#endif diff --git a/include/share/safe_str.h b/include/share/safe_str.h new file mode 100644 index 00000000..eb974c51 --- /dev/null +++ b/include/share/safe_str.h @@ -0,0 +1,69 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2013-2016 Xiph.org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Safe string handling functions to replace things like strcpy, strncpy, + * strcat, strncat etc. + * All of these functions guarantee a correctly NUL terminated string but + * the string may be truncated if the destination buffer was too short. + */ + +#ifndef FLAC__SHARE_SAFE_STR_H +#define FLAC__SHARE_SAFE_STR_H + +static inline char * +safe_strncat(char *dest, const char *src, size_t dest_size) +{ + char * ret; + + if (dest_size < 1) + return dest; + + ret = strncat(dest, src, dest_size - strlen (dest)); + dest [dest_size - 1] = 0; + + return ret; +} + +static inline char * +safe_strncpy(char *dest, const char *src, size_t dest_size) +{ + char * ret; + + if (dest_size < 1) + return dest; + + ret = strncpy(dest, src, dest_size); + dest [dest_size - 1] = 0; + + return ret; +} + +#endif /* FLAC__SHARE_SAFE_STR_H */ diff --git a/include/share/utf8.h b/include/share/utf8.h new file mode 100644 index 00000000..7d6650d6 --- /dev/null +++ b/include/share/utf8.h @@ -0,0 +1,25 @@ +#ifndef SHARE__UTF8_H +#define SHARE__UTF8_H + +/* + * Convert a string between UTF-8 and the locale's charset. + * Invalid bytes are replaced by '#', and characters that are + * not available in the target encoding are replaced by '?'. + * + * If the locale's charset is not set explicitly then it is + * obtained using nl_langinfo(CODESET), where available, the + * environment variable CHARSET, or assumed to be US-ASCII. + * + * Return value of conversion functions: + * + * -1 : memory allocation failed + * 0 : data was converted exactly + * 1 : valid data was converted approximately (using '?') + * 2 : input was invalid (but still converted, using '#') + * 3 : unknown encoding (but still converted, using '?') + */ + +int utf8_encode(const char *from, char **to); +int utf8_decode(const char *from, char **to); + +#endif diff --git a/include/share/win_utf8_io.h b/include/share/win_utf8_io.h new file mode 100644 index 00000000..13fd1186 --- /dev/null +++ b/include/share/win_utf8_io.h @@ -0,0 +1,58 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2013-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _WIN32 + +#ifndef flac__win_utf8_io_h +#define flac__win_utf8_io_h + +#include <stdio.h> +#include <stdarg.h> + +#ifdef __cplusplus +extern "C" { +#endif + +size_t strlen_utf8(const char *str); +int win_get_console_width(void); + +int get_utf8_argv(int *argc, char ***argv); + +int printf_utf8(const char *format, ...); +int fprintf_utf8(FILE *stream, const char *format, ...); +int vfprintf_utf8(FILE *stream, const char *format, va_list argptr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif +#endif diff --git a/include/share/windows_unicode_filenames.h b/include/share/windows_unicode_filenames.h new file mode 100644 index 00000000..86820ca1 --- /dev/null +++ b/include/share/windows_unicode_filenames.h @@ -0,0 +1,67 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2013-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _WIN32 + +#ifndef flac__windows_unicode_filenames_h +#define flac__windows_unicode_filenames_h + +#include <stdio.h> +#include <sys/stat.h> +#include <sys/utime.h> +#include "FLAC/ordinals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void flac_internal_set_utf8_filenames(FLAC__bool flag); +FLAC__bool flac_internal_get_utf8_filenames(void); +#define flac_set_utf8_filenames flac_internal_set_utf8_filenames +#define flac_get_utf8_filenames flac_internal_get_utf8_filenames + +FILE* flac_internal_fopen_utf8(const char *filename, const char *mode); +int flac_internal_stat64_utf8(const char *path, struct __stat64 *buffer); +int flac_internal_chmod_utf8(const char *filename, int pmode); +int flac_internal_utime_utf8(const char *filename, struct utimbuf *times); +int flac_internal_unlink_utf8(const char *filename); +int flac_internal_rename_utf8(const char *oldname, const char *newname); + +#include <windows.h> +HANDLE WINAPI flac_internal_CreateFile_utf8(const char *lpFileName, DWORD dwDesiredAccess, DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes, DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, HANDLE hTemplateFile); +#define CreateFile_utf8 flac_internal_CreateFile_utf8 + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif +#endif diff --git a/libFLAC/Android.bp b/libFLAC/Android.bp index eb6198cf..d1b51f10 100644 --- a/libFLAC/Android.bp +++ b/libFLAC/Android.bp @@ -41,6 +41,7 @@ cc_library_static { "-funroll-loops", "-finline-functions", "-Werror", + "-Wno-unused-parameter", ], arch: { diff --git a/libFLAC/bitmath.c b/libFLAC/bitmath.c index 5b58ca99..b3d797d3 100644 --- a/libFLAC/bitmath.c +++ b/libFLAC/bitmath.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2001-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -60,50 +60,14 @@ * silog2( 9) = 5 * silog2( 10) = 5 */ -unsigned FLAC__bitmath_silog2(int v) +unsigned FLAC__bitmath_silog2(FLAC__int64 v) { - while(1) { - if(v == 0) { - return 0; - } - else if(v > 0) { - unsigned l = 0; - while(v) { - l++; - v >>= 1; - } - return l+1; - } - else if(v == -1) { - return 2; - } - else { - v++; - v = -v; - } - } -} + if(v == 0) + return 0; -unsigned FLAC__bitmath_silog2_wide(FLAC__int64 v) -{ - while(1) { - if(v == 0) { - return 0; - } - else if(v > 0) { - unsigned l = 0; - while(v) { - l++; - v >>= 1; - } - return l+1; - } - else if(v == -1) { - return 2; - } - else { - v++; - v = -v; - } - } + if(v == -1) + return 2; + + v = (v < 0) ? (-(v+1)) : v; + return FLAC__bitmath_ilog2_wide(v)+2; } diff --git a/libFLAC/bitreader.c b/libFLAC/bitreader.c index 67d0f371..ab62d414 100644 --- a/libFLAC/bitreader.c +++ b/libFLAC/bitreader.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -45,18 +45,43 @@ #include "share/endswap.h" /* Things should be fastest when this matches the machine word size */ -/* WATCHOUT: if you change this you must also change the following #defines down to FLAC__clz_uint32 below to match */ -/* WATCHOUT: there are a few places where the code will not work unless uint32_t is >= 32 bits wide */ +/* WATCHOUT: if you change this you must also change the following #defines down to COUNT_ZERO_MSBS2 below to match */ +/* WATCHOUT: there are a few places where the code will not work unless brword is >= 32 bits wide */ /* also, some sections currently only have fast versions for 4 or 8 bytes per word */ -#define FLAC__BYTES_PER_WORD 4 /* sizeof uint32_t */ -#define FLAC__BITS_PER_WORD (8 * FLAC__BYTES_PER_WORD) + +#if (ENABLE_64_BIT_WORDS == 0) + +typedef FLAC__uint32 brword; +#define FLAC__BYTES_PER_WORD 4 /* sizeof brword */ +#define FLAC__BITS_PER_WORD 32 #define FLAC__WORD_ALL_ONES ((FLAC__uint32)0xffffffff) -/* SWAP_BE_WORD_TO_HOST swaps bytes in a uint32_t (which is always big-endian) if necessary to match host byte order */ +/* SWAP_BE_WORD_TO_HOST swaps bytes in a brword (which is always big-endian) if necessary to match host byte order */ #if WORDS_BIGENDIAN #define SWAP_BE_WORD_TO_HOST(x) (x) #else #define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_32(x) #endif +/* counts the # of zero MSBs in a word */ +#define COUNT_ZERO_MSBS(word) FLAC__clz_uint32(word) +#define COUNT_ZERO_MSBS2(word) FLAC__clz2_uint32(word) + +#else + +typedef FLAC__uint64 brword; +#define FLAC__BYTES_PER_WORD 8 /* sizeof brword */ +#define FLAC__BITS_PER_WORD 64 +#define FLAC__WORD_ALL_ONES ((FLAC__uint64)FLAC__U64L(0xffffffffffffffff)) +/* SWAP_BE_WORD_TO_HOST swaps bytes in a brword (which is always big-endian) if necessary to match host byte order */ +#if WORDS_BIGENDIAN +#define SWAP_BE_WORD_TO_HOST(x) (x) +#else +#define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_64(x) +#endif +/* counts the # of zero MSBs in a word */ +#define COUNT_ZERO_MSBS(word) FLAC__clz_uint64(word) +#define COUNT_ZERO_MSBS2(word) FLAC__clz2_uint64(word) + +#endif /* * This should be at least twice as large as the largest number of words @@ -77,7 +102,7 @@ static const unsigned FLAC__BITREADER_DEFAULT_CAPACITY = 65536u / FLAC__BITS_PER struct FLAC__BitReader { /* any partially-consumed word at the head will stay right-justified as bits are consumed from the left */ /* any incomplete word at the tail will be left-justified, and bytes from the read callback are added on the right */ - uint32_t *buffer; + brword *buffer; unsigned capacity; /* in words */ unsigned words; /* # of completed words in buffer */ unsigned bytes; /* # of bytes in incomplete word at buffer[words] */ @@ -89,7 +114,7 @@ struct FLAC__BitReader { void *client_data; }; -static inline void crc16_update_word_(FLAC__BitReader *br, uint32_t word) +static inline void crc16_update_word_(FLAC__BitReader *br, brword word) { register unsigned crc = br->read_crc16; #if FLAC__BYTES_PER_WORD == 4 @@ -142,7 +167,7 @@ static FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br) return false; /* no space left, buffer is too small; see note for FLAC__BITREADER_DEFAULT_CAPACITY */ target = ((FLAC__byte*)(br->buffer+br->words)) + br->bytes; - /* before reading, if the existing reader looks like this (say uint32_t is 32 bits wide) + /* before reading, if the existing reader looks like this (say brword is 32 bits wide) * bitstream : 11 22 33 44 55 br->words=1 br->bytes=1 (partial tail word is left-justified) * buffer[BE]: 11 22 33 44 55 ?? ?? ?? (shown layed out as bytes sequentially in memory) * buffer[LE]: 44 33 22 11 ?? ?? ?? 55 (?? being don't-care) @@ -175,7 +200,7 @@ static FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br) */ #if WORDS_BIGENDIAN #else - end = (br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes + (FLAC__BYTES_PER_WORD-1)) / FLAC__BYTES_PER_WORD; + end = (br->words*FLAC__BYTES_PER_WORD + br->bytes + (unsigned)bytes + (FLAC__BYTES_PER_WORD-1)) / FLAC__BYTES_PER_WORD; for(start = br->words; start < end; start++) br->buffer[start] = SWAP_BE_WORD_TO_HOST(br->buffer[start]); #endif @@ -186,7 +211,7 @@ static FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br) * buffer[LE]: 44 33 22 11 88 77 66 55 CC BB AA 99 ?? FF EE DD * finally we'll update the reader values: */ - end = br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes; + end = br->words*FLAC__BYTES_PER_WORD + br->bytes + (unsigned)bytes; br->words = end / FLAC__BYTES_PER_WORD; br->bytes = end % FLAC__BYTES_PER_WORD; @@ -236,7 +261,7 @@ FLAC__bool FLAC__bitreader_init(FLAC__BitReader *br, FLAC__BitReaderReadCallback br->words = br->bytes = 0; br->consumed_words = br->consumed_bits = 0; br->capacity = FLAC__BITREADER_DEFAULT_CAPACITY; - br->buffer = malloc(sizeof(uint32_t) * br->capacity); + br->buffer = malloc(sizeof(brword) * br->capacity); if(br->buffer == 0) return false; br->read_callback = rcb; @@ -281,7 +306,7 @@ void FLAC__bitreader_dump(const FLAC__BitReader *br, FILE *out) if(i < br->consumed_words || (i == br->consumed_words && j < br->consumed_bits)) fprintf(out, "."); else - fprintf(out, "%01u", br->buffer[i] & (1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0); + fprintf(out, "%01u", br->buffer[i] & ((brword)1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0); fprintf(out, "\n"); } if(br->bytes > 0) { @@ -290,7 +315,7 @@ void FLAC__bitreader_dump(const FLAC__BitReader *br, FILE *out) if(i < br->consumed_words || (i == br->consumed_words && j < br->consumed_bits)) fprintf(out, "."); else - fprintf(out, "%01u", br->buffer[i] & (1 << (br->bytes*8-j-1)) ? 1:0); + fprintf(out, "%01u", br->buffer[i] & ((brword)1 << (br->bytes*8-j-1)) ? 1:0); fprintf(out, "\n"); } } @@ -315,7 +340,7 @@ FLAC__uint16 FLAC__bitreader_get_read_crc16(FLAC__BitReader *br) /* CRC any tail bytes in a partially-consumed word */ if(br->consumed_bits) { - const uint32_t tail = br->buffer[br->consumed_words]; + const brword tail = br->buffer[br->consumed_words]; for( ; br->crc16_align < br->consumed_bits; br->crc16_align += 8) br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)((tail >> (FLAC__BITS_PER_WORD-8-br->crc16_align)) & 0xff), br->read_crc16); } @@ -363,33 +388,34 @@ FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *va if(br->consumed_bits) { /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */ const unsigned n = FLAC__BITS_PER_WORD - br->consumed_bits; - const uint32_t word = br->buffer[br->consumed_words]; + const brword word = br->buffer[br->consumed_words]; if(bits < n) { - *val = (word & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (n-bits); + *val = (FLAC__uint32)((word & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (n-bits)); /* The result has <= 32 non-zero bits */ br->consumed_bits += bits; return true; } - *val = word & (FLAC__WORD_ALL_ONES >> br->consumed_bits); + /* (FLAC__BITS_PER_WORD - br->consumed_bits <= bits) ==> (FLAC__WORD_ALL_ONES >> br->consumed_bits) has no more than 'bits' non-zero bits */ + *val = (FLAC__uint32)(word & (FLAC__WORD_ALL_ONES >> br->consumed_bits)); bits -= n; crc16_update_word_(br, word); br->consumed_words++; br->consumed_bits = 0; if(bits) { /* if there are still bits left to read, there have to be less than 32 so they will all be in the next word */ *val <<= bits; - *val |= (br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits)); + *val |= (FLAC__uint32)(br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits)); br->consumed_bits = bits; } return true; } - else { - const uint32_t word = br->buffer[br->consumed_words]; + else { /* br->consumed_bits == 0 */ + const brword word = br->buffer[br->consumed_words]; if(bits < FLAC__BITS_PER_WORD) { - *val = word >> (FLAC__BITS_PER_WORD-bits); + *val = (FLAC__uint32)(word >> (FLAC__BITS_PER_WORD-bits)); br->consumed_bits = bits; return true; } - /* at this point 'bits' must be == FLAC__BITS_PER_WORD; because of previous assertions, it can't be larger */ - *val = word; + /* at this point bits == FLAC__BITS_PER_WORD == 32; because of previous assertions, it can't be larger */ + *val = (FLAC__uint32)word; crc16_update_word_(br, word); br->consumed_words++; return true; @@ -404,12 +430,12 @@ FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *va if(br->consumed_bits) { /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */ FLAC__ASSERT(br->consumed_bits + bits <= br->bytes*8); - *val = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (FLAC__BITS_PER_WORD-br->consumed_bits-bits); + *val = (FLAC__uint32)((br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (FLAC__BITS_PER_WORD-br->consumed_bits-bits)); br->consumed_bits += bits; return true; } else { - *val = br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits); + *val = (FLAC__uint32)(br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits)); br->consumed_bits += bits; return true; } @@ -565,7 +591,7 @@ FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, F /* step 2: read whole words in chunks */ while(nvals >= FLAC__BYTES_PER_WORD) { if(br->consumed_words < br->words) { - const uint32_t word = br->buffer[br->consumed_words++]; + const brword word = br->buffer[br->consumed_words++]; #if FLAC__BYTES_PER_WORD == 4 val[0] = (FLAC__byte)(word >> 24); val[1] = (FLAC__byte)(word >> 16); @@ -630,9 +656,9 @@ FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *va *val = 0; while(1) { while(br->consumed_words < br->words) { /* if we've not consumed up to a partial tail word... */ - uint32_t b = br->buffer[br->consumed_words] << br->consumed_bits; + brword b = br->buffer[br->consumed_words] << br->consumed_bits; if(b) { - i = FLAC__clz_uint32(b); + i = COUNT_ZERO_MSBS(b); *val += i; i++; br->consumed_bits += i; @@ -660,9 +686,9 @@ FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *va */ if(br->bytes*8 > br->consumed_bits) { const unsigned end = br->bytes * 8; - uint32_t b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits; + brword b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits; if(b) { - i = FLAC__clz_uint32(b); + i = COUNT_ZERO_MSBS(b); *val += i; i++; br->consumed_bits += i; @@ -717,7 +743,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[ * bitreader functions that use them, and before returning */ unsigned cwords, words, lsbs, msbs, x, y; unsigned ucbits; /* keep track of the number of unconsumed bits in word */ - uint32_t b; + brword b; int *val, *end; FLAC__ASSERT(0 != br); @@ -758,7 +784,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[ while(val < end) { /* read the unary MSBs and end bit */ - x = y = FLAC__clz2_uint32(b); + x = y = COUNT_ZERO_MSBS2(b); if(x == FLAC__BITS_PER_WORD) { x = ucbits; do { @@ -767,7 +793,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[ if (cwords >= words) goto incomplete_msbs; b = br->buffer[cwords]; - y = FLAC__clz2_uint32(b); + y = COUNT_ZERO_MSBS2(b); x += y; } while(y == FLAC__BITS_PER_WORD); } @@ -777,7 +803,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[ msbs = x; /* read the binary LSBs */ - x = b >> (FLAC__BITS_PER_WORD - parameter); + x = (FLAC__uint32)(b >> (FLAC__BITS_PER_WORD - parameter)); /* parameter < 32, so we can cast to 32-bit unsigned */ if(parameter <= ucbits) { ucbits -= parameter; b <<= parameter; @@ -788,7 +814,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[ goto incomplete_lsbs; b = br->buffer[cwords]; ucbits += FLAC__BITS_PER_WORD - parameter; - x |= b >> ucbits; + x |= (FLAC__uint32)(b >> ucbits); b <<= FLAC__BITS_PER_WORD - ucbits; } lsbs = x; diff --git a/libFLAC/bitwriter.c b/libFLAC/bitwriter.c index dcdd93e2..402b1c49 100644 --- a/libFLAC/bitwriter.c +++ b/libFLAC/bitwriter.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -46,33 +46,50 @@ /* Things should be fastest when this matches the machine word size */ /* WATCHOUT: if you change this you must also change the following #defines down to SWAP_BE_WORD_TO_HOST below to match */ -/* WATCHOUT: there are a few places where the code will not work unless uint32_t is >= 32 bits wide */ -#define FLAC__BYTES_PER_WORD 4 +/* WATCHOUT: there are a few places where the code will not work unless bwword is >= 32 bits wide */ + +#if (ENABLE_64_BIT_WORDS == 0) + +typedef FLAC__uint32 bwword; +#define FLAC__BYTES_PER_WORD 4 /* sizeof bwword */ #define FLAC__BITS_PER_WORD 32 -#define FLAC__WORD_ALL_ONES ((FLAC__uint32)0xffffffff) -/* SWAP_BE_WORD_TO_HOST swaps bytes in a uint32_t (which is always big-endian) if necessary to match host byte order */ +/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */ #if WORDS_BIGENDIAN #define SWAP_BE_WORD_TO_HOST(x) (x) #else #define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_32(x) #endif +#else + +typedef FLAC__uint64 bwword; +#define FLAC__BYTES_PER_WORD 8 /* sizeof bwword */ +#define FLAC__BITS_PER_WORD 64 +/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */ +#if WORDS_BIGENDIAN +#define SWAP_BE_WORD_TO_HOST(x) (x) +#else +#define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_64(x) +#endif + +#endif + /* * The default capacity here doesn't matter too much. The buffer always grows * to hold whatever is written to it. Usually the encoder will stop adding at * a frame or metadata block, then write that out and clear the buffer for the * next one. */ -static const unsigned FLAC__BITWRITER_DEFAULT_CAPACITY = 32768u / sizeof(uint32_t); /* size in words */ +static const unsigned FLAC__BITWRITER_DEFAULT_CAPACITY = 32768u / sizeof(bwword); /* size in words */ /* When growing, increment 4K at a time */ -static const unsigned FLAC__BITWRITER_DEFAULT_INCREMENT = 4096u / sizeof(uint32_t); /* size in words */ +static const unsigned FLAC__BITWRITER_DEFAULT_INCREMENT = 4096u / sizeof(bwword); /* size in words */ #define FLAC__WORDS_TO_BITS(words) ((words) * FLAC__BITS_PER_WORD) #define FLAC__TOTAL_BITS(bw) (FLAC__WORDS_TO_BITS((bw)->words) + (bw)->bits) struct FLAC__BitWriter { - uint32_t *buffer; - uint32_t accum; /* accumulator; bits are right-justified; when full, accum is appended to buffer */ + bwword *buffer; + bwword accum; /* accumulator; bits are right-justified; when full, accum is appended to buffer */ unsigned capacity; /* capacity of buffer in words */ unsigned words; /* # of complete words in buffer */ unsigned bits; /* # of used bits in accum */ @@ -85,7 +102,7 @@ static FLAC__bool bitwriter_grow_(FLAC__BitWriter *bw, unsigned bits_to_add) { unsigned new_capacity; - uint32_t *new_buffer; + bwword *new_buffer; FLAC__ASSERT(0 != bw); FLAC__ASSERT(0 != bw->buffer); @@ -107,7 +124,7 @@ FLAC__bool bitwriter_grow_(FLAC__BitWriter *bw, unsigned bits_to_add) FLAC__ASSERT(new_capacity > bw->capacity); FLAC__ASSERT(new_capacity >= bw->words + ((bw->bits + bits_to_add + FLAC__BITS_PER_WORD - 1) / FLAC__BITS_PER_WORD)); - new_buffer = safe_realloc_mul_2op_(bw->buffer, sizeof(uint32_t), /*times*/new_capacity); + new_buffer = safe_realloc_mul_2op_(bw->buffer, sizeof(bwword), /*times*/new_capacity); if(new_buffer == 0) return false; bw->buffer = new_buffer; @@ -149,7 +166,7 @@ FLAC__bool FLAC__bitwriter_init(FLAC__BitWriter *bw) bw->words = bw->bits = 0; bw->capacity = FLAC__BITWRITER_DEFAULT_CAPACITY; - bw->buffer = malloc(sizeof(uint32_t) * bw->capacity); + bw->buffer = malloc(sizeof(bwword) * bw->capacity); if(bw->buffer == 0) return false; @@ -184,13 +201,13 @@ void FLAC__bitwriter_dump(const FLAC__BitWriter *bw, FILE *out) for(i = 0; i < bw->words; i++) { fprintf(out, "%08X: ", i); for(j = 0; j < FLAC__BITS_PER_WORD; j++) - fprintf(out, "%01u", bw->buffer[i] & (1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0); + fprintf(out, "%01u", bw->buffer[i] & ((bwword)1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0); fprintf(out, "\n"); } if(bw->bits > 0) { fprintf(out, "%08X: ", i); for(j = 0; j < bw->bits; j++) - fprintf(out, "%01u", bw->accum & (1 << (bw->bits-j-1)) ? 1:0); + fprintf(out, "%01u", bw->accum & ((bwword)1 << (bw->bits-j-1)) ? 1:0); fprintf(out, "\n"); } } @@ -302,20 +319,24 @@ inline FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, unsigned bit return true; } -inline FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__uint32 val, unsigned bits) +static inline FLAC__bool FLAC__bitwriter_write_raw_uint32_nocheck(FLAC__BitWriter *bw, FLAC__uint32 val, unsigned bits) { register unsigned left; /* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */ FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32); - FLAC__ASSERT(0 != bw); - FLAC__ASSERT(0 != bw->buffer); + if(bw == 0 || bw->buffer == 0) + return false; + + if (bits > 32) + return false; - FLAC__ASSERT(bits <= 32); if(bits == 0) return true; + FLAC__ASSERT((bits == 32) || (val>>bits == 0)); + /* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+bits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */ if(bw->capacity <= bw->words + bits && !bitwriter_grow_(bw, bits)) return false; @@ -330,24 +351,31 @@ inline FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__ui bw->accum <<= left; bw->accum |= val >> (bw->bits = bits - left); bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); - bw->accum = val; + bw->accum = val; /* unused top bits can contain garbage */ } - else { - bw->accum = val; - bw->bits = 0; - bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(val); + else { /* at this point bits == FLAC__BITS_PER_WORD == 32 and bw->bits == 0 */ + bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST((bwword)val); } return true; } +inline FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__uint32 val, unsigned bits) +{ + /* check that unused bits are unset */ + if((bits < 32) && (val>>bits != 0)) + return false; + + return FLAC__bitwriter_write_raw_uint32_nocheck(bw, val, bits); +} + inline FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLAC__int32 val, unsigned bits) { /* zero-out unused bits */ if(bits < 32) val &= (~(0xffffffff << bits)); - return FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)val, bits); + return FLAC__bitwriter_write_raw_uint32_nocheck(bw, (FLAC__uint32)val, bits); } inline FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__uint64 val, unsigned bits) @@ -356,7 +384,7 @@ inline FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__ui if(bits > 32) { return FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)(val>>32), bits-32) && - FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)val, 32); + FLAC__bitwriter_write_raw_uint32_nocheck(bw, (FLAC__uint32)val, 32); } else return FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)val, bits); @@ -366,13 +394,13 @@ inline FLAC__bool FLAC__bitwriter_write_raw_uint32_little_endian(FLAC__BitWriter { /* this doesn't need to be that fast as currently it is only used for vorbis comments */ - if(!FLAC__bitwriter_write_raw_uint32(bw, val & 0xff, 8)) + if(!FLAC__bitwriter_write_raw_uint32_nocheck(bw, val & 0xff, 8)) return false; - if(!FLAC__bitwriter_write_raw_uint32(bw, (val>>8) & 0xff, 8)) + if(!FLAC__bitwriter_write_raw_uint32_nocheck(bw, (val>>8) & 0xff, 8)) return false; - if(!FLAC__bitwriter_write_raw_uint32(bw, (val>>16) & 0xff, 8)) + if(!FLAC__bitwriter_write_raw_uint32_nocheck(bw, (val>>16) & 0xff, 8)) return false; - if(!FLAC__bitwriter_write_raw_uint32(bw, val>>24, 8)) + if(!FLAC__bitwriter_write_raw_uint32_nocheck(bw, val>>24, 8)) return false; return true; @@ -384,7 +412,7 @@ inline FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter *bw, const FL /* this could be faster but currently we don't need it to be since it's only used for writing metadata */ for(i = 0; i < nvals; i++) { - if(!FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)(vals[i]), 8)) + if(!FLAC__bitwriter_write_raw_uint32_nocheck(bw, (FLAC__uint32)(vals[i]), 8)) return false; } @@ -394,21 +422,23 @@ inline FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter *bw, const FL FLAC__bool FLAC__bitwriter_write_unary_unsigned(FLAC__BitWriter *bw, unsigned val) { if(val < 32) - return FLAC__bitwriter_write_raw_uint32(bw, 1, ++val); + return FLAC__bitwriter_write_raw_uint32_nocheck(bw, 1, ++val); else return FLAC__bitwriter_write_zeroes(bw, val) && - FLAC__bitwriter_write_raw_uint32(bw, 1, 1); + FLAC__bitwriter_write_raw_uint32_nocheck(bw, 1, 1); } unsigned FLAC__bitwriter_rice_bits(FLAC__int32 val, unsigned parameter) { FLAC__uint32 uval; - FLAC__ASSERT(parameter < sizeof(unsigned)*8); + FLAC__ASSERT(parameter < 32); /* fold signed to unsigned; actual formula is: negative(v)? -2v-1 : 2v */ - uval = (val<<1) ^ (val>>31); + uval = val; + uval <<= 1; + uval ^= (val>>31); return 1 + parameter + (uval >> parameter); } @@ -484,10 +514,12 @@ FLAC__bool FLAC__bitwriter_write_rice_signed(FLAC__BitWriter *bw, FLAC__int32 va FLAC__ASSERT(0 != bw); FLAC__ASSERT(0 != bw->buffer); - FLAC__ASSERT(parameter < 8*sizeof(uval)); + FLAC__ASSERT(parameter < 32); /* fold signed to unsigned; actual formula is: negative(v)? -2v-1 : 2v */ - uval = (val<<1) ^ (val>>31); + uval = val; + uval <<= 1; + uval ^= (val>>31); msbs = uval >> parameter; interesting_bits = 1 + parameter; @@ -505,16 +537,16 @@ FLAC__bool FLAC__bitwriter_write_rice_signed(FLAC__BitWriter *bw, FLAC__int32 va FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FLAC__int32 *vals, unsigned nvals, unsigned parameter) { - const FLAC__uint32 mask1 = FLAC__WORD_ALL_ONES << parameter; /* we val|=mask1 to set the stop bit above it... */ - const FLAC__uint32 mask2 = FLAC__WORD_ALL_ONES >> (31-parameter); /* ...then mask off the bits above the stop bit with val&=mask2*/ + const FLAC__uint32 mask1 = (FLAC__uint32)0xffffffff << parameter; /* we val|=mask1 to set the stop bit above it... */ + const FLAC__uint32 mask2 = (FLAC__uint32)0xffffffff >> (31-parameter); /* ...then mask off the bits above the stop bit with val&=mask2 */ FLAC__uint32 uval; unsigned left; const unsigned lsbits = 1 + parameter; - unsigned msbits; + unsigned msbits, total_bits; FLAC__ASSERT(0 != bw); FLAC__ASSERT(0 != bw->buffer); - FLAC__ASSERT(parameter < 8*sizeof(uint32_t)-1); + FLAC__ASSERT(parameter < 31); /* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */ FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32); @@ -525,19 +557,20 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL uval ^= (*vals>>31); msbits = uval >> parameter; + total_bits = lsbits + msbits; - if(bw->bits && bw->bits + msbits + lsbits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current uint32_t */ - /* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free uint32_t to work in */ - bw->bits = bw->bits + msbits + lsbits; + if(bw->bits && bw->bits + total_bits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */ + /* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free bwword to work in */ + bw->bits += total_bits; uval |= mask1; /* set stop bit */ uval &= mask2; /* mask off unused top bits */ - bw->accum <<= msbits + lsbits; + bw->accum <<= total_bits; bw->accum |= uval; } else { /* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */ /* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */ - if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 uint32_t*/ && !bitwriter_grow_(bw, msbits+lsbits)) + if(bw->capacity <= bw->words + bw->bits + msbits + 1 /* lsbits always fit in 1 bwword */ && !bitwriter_grow_(bw, total_bits)) return false; if(msbits) { @@ -587,7 +620,7 @@ break1: bw->accum <<= left; bw->accum |= uval >> (bw->bits = lsbits - left); bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); - bw->accum = uval; + bw->accum = uval; /* unused top bits can contain garbage */ } } vals++; @@ -727,40 +760,41 @@ FLAC__bool FLAC__bitwriter_write_utf8_uint32(FLAC__BitWriter *bw, FLAC__uint32 v FLAC__ASSERT(0 != bw); FLAC__ASSERT(0 != bw->buffer); - FLAC__ASSERT(!(val & 0x80000000)); /* this version only handles 31 bits */ + if((val & 0x80000000) != 0) /* this version only handles 31 bits */ + return false; if(val < 0x80) { - return FLAC__bitwriter_write_raw_uint32(bw, val, 8); + return FLAC__bitwriter_write_raw_uint32_nocheck(bw, val, 8); } else if(val < 0x800) { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xC0 | (val>>6), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xC0 | (val>>6), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (val&0x3F), 8); } else if(val < 0x10000) { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xE0 | (val>>12), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>6)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xE0 | (val>>12), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>6)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (val&0x3F), 8); } else if(val < 0x200000) { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xF0 | (val>>18), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>12)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>6)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xF0 | (val>>18), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>12)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>6)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (val&0x3F), 8); } else if(val < 0x4000000) { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xF8 | (val>>24), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>18)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>12)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>6)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xF8 | (val>>24), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>18)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>12)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>6)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (val&0x3F), 8); } else { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xFC | (val>>30), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>24)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>18)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>12)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>6)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xFC | (val>>30), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>24)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>18)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>12)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>6)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (val&0x3F), 8); } return ok; @@ -773,49 +807,50 @@ FLAC__bool FLAC__bitwriter_write_utf8_uint64(FLAC__BitWriter *bw, FLAC__uint64 v FLAC__ASSERT(0 != bw); FLAC__ASSERT(0 != bw->buffer); - FLAC__ASSERT(!(val & FLAC__U64L(0xFFFFFFF000000000))); /* this version only handles 36 bits */ + if((val & FLAC__U64L(0xFFFFFFF000000000)) != 0) /* this version only handles 36 bits */ + return false; if(val < 0x80) { - return FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)val, 8); + return FLAC__bitwriter_write_raw_uint32_nocheck(bw, (FLAC__uint32)val, 8); } else if(val < 0x800) { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xC0 | (FLAC__uint32)(val>>6), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xC0 | (FLAC__uint32)(val>>6), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); } else if(val < 0x10000) { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xE0 | (FLAC__uint32)(val>>12), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xE0 | (FLAC__uint32)(val>>12), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); } else if(val < 0x200000) { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xF0 | (FLAC__uint32)(val>>18), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xF0 | (FLAC__uint32)(val>>18), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); } else if(val < 0x4000000) { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xF8 | (FLAC__uint32)(val>>24), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xF8 | (FLAC__uint32)(val>>24), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); } else if(val < 0x80000000) { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xFC | (FLAC__uint32)(val>>30), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>24)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xFC | (FLAC__uint32)(val>>30), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>24)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); } else { - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xFE, 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>30)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>24)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8); - ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xFE, 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>30)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>24)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8); + ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8); } return ok; @@ -839,6 +874,7 @@ FLAC__bool FLAC__bitwriter_zero_pad_to_byte_boundary(FLAC__BitWriter *bw) * fix that we add extern declarations here. */ extern FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, unsigned bits); +extern FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__uint32 val, unsigned bits); extern FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLAC__int32 val, unsigned bits); extern FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__uint64 val, unsigned bits); extern FLAC__bool FLAC__bitwriter_write_raw_uint32_little_endian(FLAC__BitWriter *bw, FLAC__uint32 val); diff --git a/libFLAC/cpu.c b/libFLAC/cpu.c index 8249500b..b9df19a9 100644 --- a/libFLAC/cpu.c +++ b/libFLAC/cpu.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2001-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -35,73 +35,43 @@ #endif #include "private/cpu.h" +#include "share/compat.h" #include <stdlib.h> #include <memory.h> -#ifdef DEBUG -# include <stdio.h> -#endif - -#if defined FLAC__CPU_IA32 -# include <signal.h> - -static void disable_sse(FLAC__CPUInfo *info) -{ - info->ia32.sse = false; - info->ia32.sse2 = false; - info->ia32.sse3 = false; - info->ia32.ssse3 = false; - info->ia32.sse41 = false; - info->ia32.sse42 = false; -} - -static void disable_avx(FLAC__CPUInfo *info) -{ - info->ia32.avx = false; - info->ia32.avx2 = false; - info->ia32.fma = false; -} - -#elif defined FLAC__CPU_X86_64 -static void disable_avx(FLAC__CPUInfo *info) -{ - info->x86.avx = false; - info->x86.avx2 = false; - info->x86.fma = false; -} +#if defined(_MSC_VER) +# include <intrin.h> /* for __cpuid() and _xgetbv() */ #endif -#if defined (__NetBSD__) || defined(__OpenBSD__) -#include <sys/param.h> -#include <sys/sysctl.h> -#include <machine/cpu.h> +#if defined __GNUC__ && defined HAVE_CPUID_H +# include <cpuid.h> /* for __get_cpuid() and __get_cpuid_max() */ #endif -#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) -#include <sys/types.h> -#include <sys/sysctl.h> -#endif +#ifdef DEBUG +#include <stdio.h> -#if defined(__APPLE__) -/* how to get sysctlbyname()? */ +#define dfprintf fprintf +#else +/* This is bad practice, it should be a static void empty function */ +#define dfprintf(file, format, ...) #endif -#ifdef FLAC__CPU_IA32 + +#if defined FLAC__CPU_IA32 /* these are flags in EDX of CPUID AX=00000001 */ static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000; static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000; -static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000; static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000; static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000; #endif +#if FLAC__HAS_X86INTRIN || FLAC__AVX_SUPPORTED /* these are flags in ECX of CPUID AX=00000001 */ static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001; static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200; static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE41 = 0x00080000; static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE42 = 0x00100000; -#if defined FLAC__AVX_SUPPORTED /* these are flags in ECX of CPUID AX=00000001 */ static const unsigned FLAC__CPUINFO_IA32_CPUID_OSXSAVE = 0x08000000; static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX = 0x10000000; @@ -110,384 +80,214 @@ static const unsigned FLAC__CPUINFO_IA32_CPUID_FMA = 0x00001000; static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX2 = 0x00000020; #endif -/* - * Extra stuff needed for detection of OS support for SSE on IA-32 - */ -#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN) && !defined FLAC__NO_SSE_OS && !defined FLAC__SSE_OS -# if defined(__linux__) -/* - * If the OS doesn't support SSE, we will get here with a SIGILL. We - * modify the return address to jump over the offending SSE instruction - * and also the operation following it that indicates the instruction - * executed successfully. In this way we use no global variables and - * stay thread-safe. - * - * 3 + 3 + 6: - * 3 bytes for "xorps xmm0,xmm0" - * 3 bytes for estimate of how long the follwing "inc var" instruction is - * 6 bytes extra in case our estimate is wrong - * 12 bytes puts us in the NOP "landing zone" - */ -# include <sys/ucontext.h> - static void sigill_handler_sse_os(int signal, siginfo_t *si, void *uc) - { - (void)signal, (void)si; - ((ucontext_t*)uc)->uc_mcontext.gregs[14/*REG_EIP*/] += 3 + 3 + 6; - } -# elif defined(_MSC_VER) -# include <windows.h> -# endif +#if defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64 +static uint32_t +cpu_xgetbv_x86(void) +{ +#if (defined _MSC_VER || defined __INTEL_COMPILER) && FLAC__HAS_X86INTRIN && FLAC__AVX_SUPPORTED + return (uint32_t)_xgetbv(0); +#elif defined __GNUC__ + uint32_t lo, hi; + asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a"(lo), "=d"(hi) : "c" (0)); + return lo; +#else + return 0; +#endif +} #endif - -void FLAC__cpu_info(FLAC__CPUInfo *info) +static void +ia32_cpu_info (FLAC__CPUInfo *info) { -/* - * IA32-specific - */ -#ifdef FLAC__CPU_IA32 - FLAC__bool ia32_fxsr = false; +#if !defined FLAC__CPU_IA32 + (void) info; +#else FLAC__bool ia32_osxsave = false; - (void) ia32_fxsr; (void) ia32_osxsave; /* to avoid warnings about unused variables */ - memset(info, 0, sizeof(*info)); - info->type = FLAC__CPUINFO_TYPE_IA32; -#if !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN) + FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx; + +#if !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || FLAC__HAS_X86INTRIN) info->use_asm = true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */ -#ifdef FLAC__HAS_X86INTRIN - if(!FLAC__cpu_have_cpuid_x86()) - return; -#else +#if defined FLAC__HAS_NASM if(!FLAC__cpu_have_cpuid_asm_ia32()) return; #endif - { - /* http://www.sandpile.org/x86/cpuid.htm */ -#ifdef FLAC__HAS_X86INTRIN - FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx; + /* http://www.sandpile.org/x86/cpuid.htm */ + if (FLAC__HAS_X86INTRIN) { FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); - info->ia32.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E)? true : false; /* GenuineIntel */ + info->ia32.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E) ? true : false; /* GenuineIntel */ FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); -#else - FLAC__uint32 flags_ecx, flags_edx; + } + else { FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx); -#endif - info->ia32.cmov = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false; - info->ia32.mmx = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX )? true : false; - ia32_fxsr = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false; - info->ia32.sse = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE )? true : false; - info->ia32.sse2 = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false; - info->ia32.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false; - info->ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false; - info->ia32.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false; - info->ia32.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false; -#if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED - ia32_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE)? true : false; - info->ia32.avx = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX )? true : false; - info->ia32.fma = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA )? true : false; - FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); - info->ia32.avx2 = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2 )? true : false; -#endif } -#ifdef DEBUG - fprintf(stderr, "CPU info (IA-32):\n"); - fprintf(stderr, " CMOV ....... %c\n", info->ia32.cmov ? 'Y' : 'n'); - fprintf(stderr, " MMX ........ %c\n", info->ia32.mmx ? 'Y' : 'n'); - fprintf(stderr, " SSE ........ %c\n", info->ia32.sse ? 'Y' : 'n'); - fprintf(stderr, " SSE2 ....... %c\n", info->ia32.sse2 ? 'Y' : 'n'); - fprintf(stderr, " SSE3 ....... %c\n", info->ia32.sse3 ? 'Y' : 'n'); - fprintf(stderr, " SSSE3 ...... %c\n", info->ia32.ssse3 ? 'Y' : 'n'); - fprintf(stderr, " SSE41 ...... %c\n", info->ia32.sse41 ? 'Y' : 'n'); - fprintf(stderr, " SSE42 ...... %c\n", info->ia32.sse42 ? 'Y' : 'n'); -# if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED - fprintf(stderr, " AVX ........ %c\n", info->ia32.avx ? 'Y' : 'n'); - fprintf(stderr, " FMA ........ %c\n", info->ia32.fma ? 'Y' : 'n'); - fprintf(stderr, " AVX2 ....... %c\n", info->ia32.avx2 ? 'Y' : 'n'); -# endif -#endif + info->ia32.cmov = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV ) ? true : false; + info->ia32.mmx = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX ) ? true : false; + info->ia32.sse = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE ) ? true : false; + info->ia32.sse2 = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 ) ? true : false; + info->ia32.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 ) ? true : false; + info->ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3) ? true : false; + info->ia32.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41) ? true : false; + info->ia32.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42) ? true : false; + + if (FLAC__HAS_X86INTRIN && FLAC__AVX_SUPPORTED) { + ia32_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE) ? true : false; + info->ia32.avx = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX ) ? true : false; + info->ia32.fma = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA ) ? true : false; + FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); + info->ia32.avx2 = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2 ) ? true : false; + } - /* - * now have to check for OS support of SSE instructions - */ - if(info->ia32.sse) { -#if defined FLAC__NO_SSE_OS - /* assume user knows better than us; turn it off */ - disable_sse(info); -#elif defined FLAC__SSE_OS - /* assume user knows better than us; leave as detected above */ -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__) - int sse = 0; - size_t len; - /* at least one of these must work: */ - len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse); - len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse" , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */ - if(!sse) - disable_sse(info); -#elif defined(__NetBSD__) || defined (__OpenBSD__) -# if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__) - int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE }; - size_t len = sizeof(val); - if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) - disable_sse(info); - else { /* double-check SSE2 */ - mib[1] = CPU_SSE2; - len = sizeof(val); - if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) { - disable_sse(info); - info->ia32.sse = true; - } - } -# else - disable_sse(info); -# endif -#elif defined(__ANDROID__) || defined(ANDROID) - /* no need to check OS SSE support */ -#elif defined(__linux__) - int sse = 0; - struct sigaction sigill_save; - struct sigaction sigill_sse; - sigill_sse.sa_sigaction = sigill_handler_sse_os; - __sigemptyset(&sigill_sse.sa_mask); - sigill_sse.sa_flags = SA_SIGINFO | SA_RESETHAND; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */ - if(0 == sigaction(SIGILL, &sigill_sse, &sigill_save)) - { - /* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */ - /* see sigill_handler_sse_os() for an explanation of the following: */ - asm volatile ( - "xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */ - "incl %0\n\t" /* SIGILL handler will jump over this */ - /* landing zone */ - "nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */ - "nop\n\t" - "nop\n\t" - "nop\n\t" - "nop\n\t" - "nop\n\t" - "nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */ - "nop\n\t" - "nop" /* SIGILL jump lands here if "inc" is 1 byte */ - : "=r"(sse) - : "0"(sse) - ); - - sigaction(SIGILL, &sigill_save, NULL); - } - - if(!sse) - disable_sse(info); -#elif defined(_MSC_VER) - __try { - __asm { - xorps xmm0,xmm0 - } - } - __except(EXCEPTION_EXECUTE_HANDLER) { - if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION) - disable_sse(info); - } -#elif defined(__GNUC__) /* MinGW goes here */ - int sse = 0; - /* Based on the idea described in Agner Fog's manual "Optimizing subroutines in assembly language" */ - /* In theory, not guaranteed to detect lack of OS SSE support on some future Intel CPUs, but in practice works (see the aforementioned manual) */ - if (ia32_fxsr) { - struct { - FLAC__uint32 buff[128]; - } __attribute__((aligned(16))) fxsr; - FLAC__uint32 old_val, new_val; - - asm volatile ("fxsave %0" : "=m" (fxsr) : "m" (fxsr)); - old_val = fxsr.buff[50]; - fxsr.buff[50] ^= 0x0013c0de; /* change value in the buffer */ - asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr)); /* try to change SSE register */ - fxsr.buff[50] = old_val; /* restore old value in the buffer */ - asm volatile ("fxsave %0 " : "=m" (fxsr) : "m" (fxsr)); /* old value will be overwritten if SSE register was changed */ - new_val = fxsr.buff[50]; /* == old_val if FXRSTOR didn't change SSE register and (old_val ^ 0x0013c0de) otherwise */ - fxsr.buff[50] = old_val; /* again restore old value in the buffer */ - asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr)); /* restore old values of registers */ - - if ((old_val^new_val) == 0x0013c0de) - sse = 1; - } - if(!sse) - disable_sse(info); -#else - /* no way to test, disable to be safe */ - disable_sse(info); -#endif -#ifdef DEBUG - fprintf(stderr, " SSE OS sup . %c\n", info->ia32.sse ? 'Y' : 'n'); -#endif + dfprintf(stderr, "CPU info (IA-32):\n"); + dfprintf(stderr, " CMOV ....... %c\n", info->ia32.cmov ? 'Y' : 'n'); + dfprintf(stderr, " MMX ........ %c\n", info->ia32.mmx ? 'Y' : 'n'); + dfprintf(stderr, " SSE ........ %c\n", info->ia32.sse ? 'Y' : 'n'); + dfprintf(stderr, " SSE2 ....... %c\n", info->ia32.sse2 ? 'Y' : 'n'); + dfprintf(stderr, " SSE3 ....... %c\n", info->ia32.sse3 ? 'Y' : 'n'); + dfprintf(stderr, " SSSE3 ...... %c\n", info->ia32.ssse3 ? 'Y' : 'n'); + dfprintf(stderr, " SSE41 ...... %c\n", info->ia32.sse41 ? 'Y' : 'n'); + dfprintf(stderr, " SSE42 ...... %c\n", info->ia32.sse42 ? 'Y' : 'n'); + + if (FLAC__HAS_X86INTRIN && FLAC__AVX_SUPPORTED) { + dfprintf(stderr, " AVX ........ %c\n", info->ia32.avx ? 'Y' : 'n'); + dfprintf(stderr, " FMA ........ %c\n", info->ia32.fma ? 'Y' : 'n'); + dfprintf(stderr, " AVX2 ....... %c\n", info->ia32.avx2 ? 'Y' : 'n'); } - else /* info->ia32.sse == false */ - disable_sse(info); /* * now have to check for OS support of AVX instructions */ - if(info->ia32.avx && ia32_osxsave) { - FLAC__uint32 ecr = FLAC__cpu_xgetbv_x86(); - if ((ecr & 0x6) != 0x6) - disable_avx(info); -#ifdef DEBUG - fprintf(stderr, " AVX OS sup . %c\n", info->ia32.avx ? 'Y' : 'n'); -#endif + if (!FLAC__HAS_X86INTRIN || !info->ia32.avx || !ia32_osxsave || (cpu_xgetbv_x86() & 0x6) != 0x6) { + /* no OS AVX support */ + info->ia32.avx = false; + info->ia32.avx2 = false; + info->ia32.fma = false; + } + + if (FLAC__HAS_X86INTRIN && FLAC__AVX_SUPPORTED) { + dfprintf(stderr, " AVX OS sup . %c\n", info->ia32.avx ? 'Y' : 'n'); } - else /* no OS AVX support*/ - disable_avx(info); #else info->use_asm = false; #endif +#endif +} -/* - * x86-64-specific - */ -#elif defined FLAC__CPU_X86_64 +static void +x86_64_cpu_info (FLAC__CPUInfo *info) +{ +#if !defined FLAC__NO_ASM && FLAC__HAS_X86INTRIN FLAC__bool x86_osxsave = false; - (void) x86_osxsave; /* to avoid warnings about unused variables */ - memset(info, 0, sizeof(*info)); - info->type = FLAC__CPUINFO_TYPE_X86_64; -#if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN + FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx; + info->use_asm = true; - { - /* http://www.sandpile.org/x86/cpuid.htm */ - FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx; - FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); - info->x86.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E)? true : false; /* GenuineIntel */ - FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); - info->x86.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false; - info->x86.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false; - info->x86.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false; - info->x86.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false; -#if defined FLAC__AVX_SUPPORTED - x86_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE)? true : false; - info->x86.avx = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX )? true : false; - info->x86.fma = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA )? true : false; + + /* http://www.sandpile.org/x86/cpuid.htm */ + FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); + info->x86.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E) ? true : false; /* GenuineIntel */ + FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); + info->x86.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 ) ? true : false; + info->x86.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3) ? true : false; + info->x86.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41) ? true : false; + info->x86.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42) ? true : false; + + if (FLAC__AVX_SUPPORTED) { + x86_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE) ? true : false; + info->x86.avx = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX ) ? true : false; + info->x86.fma = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA ) ? true : false; FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); - info->x86.avx2 = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2 )? true : false; -#endif + info->x86.avx2 = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2 ) ? true : false; + } + + dfprintf(stderr, "CPU info (x86-64):\n"); + dfprintf(stderr, " SSE3 ....... %c\n", info->x86.sse3 ? 'Y' : 'n'); + dfprintf(stderr, " SSSE3 ...... %c\n", info->x86.ssse3 ? 'Y' : 'n'); + dfprintf(stderr, " SSE41 ...... %c\n", info->x86.sse41 ? 'Y' : 'n'); + dfprintf(stderr, " SSE42 ...... %c\n", info->x86.sse42 ? 'Y' : 'n'); + + if (FLAC__AVX_SUPPORTED) { + dfprintf(stderr, " AVX ........ %c\n", info->x86.avx ? 'Y' : 'n'); + dfprintf(stderr, " FMA ........ %c\n", info->x86.fma ? 'Y' : 'n'); + dfprintf(stderr, " AVX2 ....... %c\n", info->x86.avx2 ? 'Y' : 'n'); } -#ifdef DEBUG - fprintf(stderr, "CPU info (x86-64):\n"); - fprintf(stderr, " SSE3 ....... %c\n", info->x86.sse3 ? 'Y' : 'n'); - fprintf(stderr, " SSSE3 ...... %c\n", info->x86.ssse3 ? 'Y' : 'n'); - fprintf(stderr, " SSE41 ...... %c\n", info->x86.sse41 ? 'Y' : 'n'); - fprintf(stderr, " SSE42 ...... %c\n", info->x86.sse42 ? 'Y' : 'n'); -# if defined FLAC__AVX_SUPPORTED - fprintf(stderr, " AVX ........ %c\n", info->x86.avx ? 'Y' : 'n'); - fprintf(stderr, " FMA ........ %c\n", info->x86.fma ? 'Y' : 'n'); - fprintf(stderr, " AVX2 ....... %c\n", info->x86.avx2 ? 'Y' : 'n'); -# endif -#endif /* * now have to check for OS support of AVX instructions */ - if(info->x86.avx && x86_osxsave) { - FLAC__uint32 ecr = FLAC__cpu_xgetbv_x86(); - if ((ecr & 0x6) != 0x6) - disable_avx(info); -#ifdef DEBUG - fprintf(stderr, " AVX OS sup . %c\n", info->x86.avx ? 'Y' : 'n'); -#endif + if (!info->x86.avx || !x86_osxsave || (cpu_xgetbv_x86() & 0x6) != 0x6) { + /* no OS AVX support */ + info->x86.avx = false; + info->x86.avx2 = false; + info->x86.fma = false; } - else /* no OS AVX support*/ - disable_avx(info); -#else - info->use_asm = false; -#endif -/* - * unknown CPU - */ + if (FLAC__AVX_SUPPORTED) { + dfprintf(stderr, " AVX OS sup . %c\n", info->x86.avx ? 'Y' : 'n'); + } #else - info->type = FLAC__CPUINFO_TYPE_UNKNOWN; - info->use_asm = false; + /* Silence compiler warnings. */ + (void) info; +#if defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64 + if (0) cpu_xgetbv_x86 (); +#endif #endif } -#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN +void FLAC__cpu_info (FLAC__CPUInfo *info) +{ + memset(info, 0, sizeof(*info)); -#if defined _MSC_VER -#include <intrin.h> /* for __cpuid() and _xgetbv() */ -#elif defined __GNUC__ && defined HAVE_CPUID_H -#include <cpuid.h> /* for __get_cpuid() and __get_cpuid_max() */ +#ifdef FLAC__CPU_IA32 + info->type = FLAC__CPUINFO_TYPE_IA32; +#elif defined FLAC__CPU_X86_64 + info->type = FLAC__CPUINFO_TYPE_X86_64; +#else + info->type = FLAC__CPUINFO_TYPE_UNKNOWN; + info->use_asm = false; #endif -FLAC__uint32 FLAC__cpu_have_cpuid_x86(void) -{ -#ifdef FLAC__CPU_X86_64 - return 1; -#else -# if defined _MSC_VER || defined __INTEL_COMPILER /* Do they support CPUs w/o CPUID support (or OSes that work on those CPUs)? */ - FLAC__uint32 flags1, flags2; - __asm { - pushfd - pushfd - pop eax - mov flags1, eax - xor eax, 0x200000 - push eax - popfd - pushfd - pop eax - mov flags2, eax - popfd + switch (info->type) { + case FLAC__CPUINFO_TYPE_IA32: + ia32_cpu_info (info); + break; + case FLAC__CPUINFO_TYPE_X86_64: + x86_64_cpu_info (info); + break; + default: + info->use_asm = false; + break; } - if (((flags1^flags2) & 0x200000) != 0) - return 1; - else - return 0; -# elif defined __GNUC__ && defined HAVE_CPUID_H - if (__get_cpuid_max(0, 0) != 0) - return 1; - else - return 0; -# else - return 0; -# endif -#endif } +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN + void FLAC__cpu_info_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx) { #if defined _MSC_VER || defined __INTEL_COMPILER int cpuinfo[4]; int ext = level & 0x80000000; __cpuid(cpuinfo, ext); - if((unsigned)cpuinfo[0] < level) { - *eax = *ebx = *ecx = *edx = 0; - return; - } -#if defined FLAC__AVX_SUPPORTED - __cpuidex(cpuinfo, level, 0); /* for AVX2 detection */ + if((unsigned)cpuinfo[0] >= level) { +#if FLAC__AVX_SUPPORTED + __cpuidex(cpuinfo, ext, 0); /* for AVX2 detection */ #else - __cpuid(cpuinfo, level); /* some old compilers don't support __cpuidex */ + __cpuid(cpuinfo, ext); /* some old compilers don't support __cpuidex */ #endif - *eax = cpuinfo[0]; *ebx = cpuinfo[1]; *ecx = cpuinfo[2]; *edx = cpuinfo[3]; + + *eax = cpuinfo[0]; *ebx = cpuinfo[1]; *ecx = cpuinfo[2]; *edx = cpuinfo[3]; + + return; + } #elif defined __GNUC__ && defined HAVE_CPUID_H FLAC__uint32 ext = level & 0x80000000; __cpuid(ext, *eax, *ebx, *ecx, *edx); - if (*eax < level) { - *eax = *ebx = *ecx = *edx = 0; + if (*eax >= level) { + __cpuid_count(level, 0, *eax, *ebx, *ecx, *edx); + return; } - __cpuid_count(level, 0, *eax, *ebx, *ecx, *edx); -#else - *eax = *ebx = *ecx = *edx = 0; -#endif -} - -FLAC__uint32 FLAC__cpu_xgetbv_x86(void) -{ -#if (defined _MSC_VER || defined __INTEL_COMPILER) && defined FLAC__AVX_SUPPORTED - return (FLAC__uint32)_xgetbv(0); -#elif defined __GNUC__ - FLAC__uint32 lo, hi; - asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a"(lo), "=d"(hi) : "c" (0)); - return lo; -#else - return 0; #endif + *eax = *ebx = *ecx = *edx = 0; } #endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ diff --git a/libFLAC/crc.c b/libFLAC/crc.c index de2cb188..8123c3b6 100644 --- a/libFLAC/crc.c +++ b/libFLAC/crc.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/fixed.c b/libFLAC/fixed.c index 74b31b32..1e2d5b28 100644 --- a/libFLAC/fixed.c +++ b/libFLAC/fixed.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -214,7 +214,7 @@ static FLAC__fixedpoint local__compute_rbps_wide_integerized(FLAC__uint64 err, F #endif #ifndef FLAC__INTEGER_ONLY_LIBRARY -unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]) +unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]) #else unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]) #endif @@ -255,11 +255,11 @@ unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned d FLAC__ASSERT(data_len > 0 || total_error_3 == 0); FLAC__ASSERT(data_len > 0 || total_error_4 == 0); #ifndef FLAC__INTEGER_ONLY_LIBRARY - residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0); - residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0); - residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0); - residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0); - residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0); #else residual_bits_per_sample[0] = (total_error_0 > 0) ? local__compute_rbps_integerized(total_error_0, data_len) : 0; residual_bits_per_sample[1] = (total_error_1 > 0) ? local__compute_rbps_integerized(total_error_1, data_len) : 0; @@ -272,7 +272,7 @@ unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned d } #ifndef FLAC__INTEGER_ONLY_LIBRARY -unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]) +unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]) #else unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]) #endif @@ -317,11 +317,11 @@ unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsig FLAC__ASSERT(data_len > 0 || total_error_3 == 0); FLAC__ASSERT(data_len > 0 || total_error_4 == 0); #ifndef FLAC__INTEGER_ONLY_LIBRARY - residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0); - residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0); - residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0); - residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0); - residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0); #else residual_bits_per_sample[0] = (total_error_0 > 0) ? local__compute_rbps_wide_integerized(total_error_0, data_len) : 0; residual_bits_per_sample[1] = (total_error_1 > 0) ? local__compute_rbps_wide_integerized(total_error_1, data_len) : 0; @@ -349,27 +349,15 @@ void FLAC__fixed_compute_residual(const FLAC__int32 data[], unsigned data_len, u break; case 2: for(i = 0; i < idata_len; i++) -#if 1 /* OPT: may be faster with some compilers on some systems */ - residual[i] = data[i] - (data[i-1] << 1) + data[i-2]; -#else residual[i] = data[i] - 2*data[i-1] + data[i-2]; -#endif break; case 3: for(i = 0; i < idata_len; i++) -#if 1 /* OPT: may be faster with some compilers on some systems */ - residual[i] = data[i] - (((data[i-1]-data[i-2])<<1) + (data[i-1]-data[i-2])) - data[i-3]; -#else residual[i] = data[i] - 3*data[i-1] + 3*data[i-2] - data[i-3]; -#endif break; case 4: for(i = 0; i < idata_len; i++) -#if 1 /* OPT: may be faster with some compilers on some systems */ - residual[i] = data[i] - ((data[i-1]+data[i-3])<<2) + ((data[i-2]<<2) + (data[i-2]<<1)) + data[i-4]; -#else residual[i] = data[i] - 4*data[i-1] + 6*data[i-2] - 4*data[i-3] + data[i-4]; -#endif break; default: FLAC__ASSERT(0); @@ -391,27 +379,15 @@ void FLAC__fixed_restore_signal(const FLAC__int32 residual[], unsigned data_len, break; case 2: for(i = 0; i < idata_len; i++) -#if 1 /* OPT: may be faster with some compilers on some systems */ - data[i] = residual[i] + (data[i-1]<<1) - data[i-2]; -#else data[i] = residual[i] + 2*data[i-1] - data[i-2]; -#endif break; case 3: for(i = 0; i < idata_len; i++) -#if 1 /* OPT: may be faster with some compilers on some systems */ - data[i] = residual[i] + (((data[i-1]-data[i-2])<<1) + (data[i-1]-data[i-2])) + data[i-3]; -#else data[i] = residual[i] + 3*data[i-1] - 3*data[i-2] + data[i-3]; -#endif break; case 4: for(i = 0; i < idata_len; i++) -#if 1 /* OPT: may be faster with some compilers on some systems */ - data[i] = residual[i] + ((data[i-1]+data[i-3])<<2) - ((data[i-2]<<2) + (data[i-2]<<1)) - data[i-4]; -#else data[i] = residual[i] + 4*data[i-1] - 6*data[i-2] + 4*data[i-3] - data[i-4]; -#endif break; default: FLAC__ASSERT(0); diff --git a/libFLAC/fixed_intrin_sse2.c b/libFLAC/fixed_intrin_sse2.c new file mode 100644 index 00000000..6a9b4dd0 --- /dev/null +++ b/libFLAC/fixed_intrin_sse2.c @@ -0,0 +1,255 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "private/cpu.h" + +#ifndef FLAC__INTEGER_ONLY_LIBRARY +#ifndef FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN +#include "private/fixed.h" +#ifdef FLAC__SSE2_SUPPORTED + +#include <emmintrin.h> /* SSE2 */ +#include <math.h> +#include "private/macros.h" +#include "share/compat.h" +#include "FLAC/assert.h" + +#ifdef FLAC__CPU_IA32 +#define m128i_to_i64(dest, src) _mm_storel_epi64((__m128i*)&dest, src) +#else +#define m128i_to_i64(dest, src) dest = _mm_cvtsi128_si64(src) +#endif + +FLAC__SSE_TARGET("sse2") +unsigned FLAC__fixed_compute_best_predictor_intrin_sse2(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]) +{ + FLAC__uint32 total_error_0, total_error_1, total_error_2, total_error_3, total_error_4; + unsigned i, order; + + __m128i total_err0, total_err1, total_err2; + + { + FLAC__int32 itmp; + __m128i last_error; + + last_error = _mm_cvtsi32_si128(data[-1]); // 0 0 0 le0 + itmp = data[-2]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 0 le0 le1 + itmp -= data[-3]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 le0 le1 le2 + itmp -= data[-3] - data[-4]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // le0 le1 le2 le3 + + total_err0 = total_err1 = _mm_setzero_si128(); + for(i = 0; i < data_len; i++) { + __m128i err0, err1, tmp; + err0 = _mm_cvtsi32_si128(data[i]); // 0 0 0 e0 + err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(0,0,0,0)); // e0 e0 e0 e0 +#if 1 /* OPT_SSE */ + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 le0 le1 le2 + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 0 le0 le1 + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 0 0 le0 + err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 +#else + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 8)); // le0 le1 le2+le0 le3+le1 + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 4)); // le0 le1+le0 le2+le0+le1 le3+le1+le2+le0 + err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 +#endif + tmp = _mm_slli_si128(err0, 12); // e0 0 0 0 + last_error = _mm_srli_si128(err1, 4); // 0 e1 e2 e3 + last_error = _mm_or_si128(last_error, tmp); // e0 e1 e2 e3 + + tmp = _mm_srai_epi32(err0, 31); + err0 = _mm_xor_si128(err0, tmp); + err0 = _mm_sub_epi32(err0, tmp); + tmp = _mm_srai_epi32(err1, 31); + err1 = _mm_xor_si128(err1, tmp); + err1 = _mm_sub_epi32(err1, tmp); + + total_err0 = _mm_add_epi32(total_err0, err0); // 0 0 0 te0 + total_err1 = _mm_add_epi32(total_err1, err1); // te1 te2 te3 te4 + } + } + + total_error_0 = _mm_cvtsi128_si32(total_err0); + total_err2 = total_err1; // te1 te2 te3 te4 + total_err1 = _mm_srli_si128(total_err1, 8); // 0 0 te1 te2 + total_error_4 = _mm_cvtsi128_si32(total_err2); + total_error_2 = _mm_cvtsi128_si32(total_err1); + total_err2 = _mm_srli_si128(total_err2, 4); // 0 te1 te2 te3 + total_err1 = _mm_srli_si128(total_err1, 4); // 0 0 0 te1 + total_error_3 = _mm_cvtsi128_si32(total_err2); + total_error_1 = _mm_cvtsi128_si32(total_err1); + + /* prefer higher order */ + if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4)) + order = 0; + else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4)) + order = 1; + else if(total_error_2 < flac_min(total_error_3, total_error_4)) + order = 2; + else if(total_error_3 < total_error_4) + order = 3; + else + order = 4; + + /* Estimate the expected number of bits per residual signal sample. */ + /* 'total_error*' is linearly related to the variance of the residual */ + /* signal, so we use it directly to compute E(|x|) */ + FLAC__ASSERT(data_len > 0 || total_error_0 == 0); + FLAC__ASSERT(data_len > 0 || total_error_1 == 0); + FLAC__ASSERT(data_len > 0 || total_error_2 == 0); + FLAC__ASSERT(data_len > 0 || total_error_3 == 0); + FLAC__ASSERT(data_len > 0 || total_error_4 == 0); + + residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0); + + return order; +} + +FLAC__SSE_TARGET("sse2") +unsigned FLAC__fixed_compute_best_predictor_wide_intrin_sse2(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]) +{ + FLAC__uint64 total_error_0, total_error_1, total_error_2, total_error_3, total_error_4; + unsigned i, order; + + __m128i total_err0, total_err1, total_err3; + + { + FLAC__int32 itmp; + __m128i last_error, zero = _mm_setzero_si128(); + + last_error = _mm_cvtsi32_si128(data[-1]); // 0 0 0 le0 + itmp = data[-2]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 0 le0 le1 + itmp -= data[-3]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 le0 le1 le2 + itmp -= data[-3] - data[-4]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // le0 le1 le2 le3 + + total_err0 = total_err1 = total_err3 = _mm_setzero_si128(); + for(i = 0; i < data_len; i++) { + __m128i err0, err1, tmp; + err0 = _mm_cvtsi32_si128(data[i]); // 0 0 0 e0 + err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(0,0,0,0)); // e0 e0 e0 e0 +#if 1 /* OPT_SSE */ + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 le0 le1 le2 + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 0 le0 le1 + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 0 0 le0 + err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 +#else + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 8)); // le0 le1 le2+le0 le3+le1 + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 4)); // le0 le1+le0 le2+le0+le1 le3+le1+le2+le0 + err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 +#endif + tmp = _mm_slli_si128(err0, 12); // e0 0 0 0 + last_error = _mm_srli_si128(err1, 4); // 0 e1 e2 e3 + last_error = _mm_or_si128(last_error, tmp); // e0 e1 e2 e3 + + tmp = _mm_srai_epi32(err0, 31); + err0 = _mm_xor_si128(err0, tmp); + err0 = _mm_sub_epi32(err0, tmp); + tmp = _mm_srai_epi32(err1, 31); + err1 = _mm_xor_si128(err1, tmp); + err1 = _mm_sub_epi32(err1, tmp); + + total_err0 = _mm_add_epi64(total_err0, err0); // 0 te0 + err0 = _mm_unpacklo_epi32(err1, zero); // 0 |e3| 0 |e4| + err1 = _mm_unpackhi_epi32(err1, zero); // 0 |e1| 0 |e2| + total_err3 = _mm_add_epi64(total_err3, err0); // te3 te4 + total_err1 = _mm_add_epi64(total_err1, err1); // te1 te2 + } + } + + m128i_to_i64(total_error_0, total_err0); + m128i_to_i64(total_error_4, total_err3); + m128i_to_i64(total_error_2, total_err1); + total_err3 = _mm_srli_si128(total_err3, 8); // 0 te3 + total_err1 = _mm_srli_si128(total_err1, 8); // 0 te1 + m128i_to_i64(total_error_3, total_err3); + m128i_to_i64(total_error_1, total_err1); + + /* prefer higher order */ + if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4)) + order = 0; + else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4)) + order = 1; + else if(total_error_2 < flac_min(total_error_3, total_error_4)) + order = 2; + else if(total_error_3 < total_error_4) + order = 3; + else + order = 4; + + /* Estimate the expected number of bits per residual signal sample. */ + /* 'total_error*' is linearly related to the variance of the residual */ + /* signal, so we use it directly to compute E(|x|) */ + FLAC__ASSERT(data_len > 0 || total_error_0 == 0); + FLAC__ASSERT(data_len > 0 || total_error_1 == 0); + FLAC__ASSERT(data_len > 0 || total_error_2 == 0); + FLAC__ASSERT(data_len > 0 || total_error_3 == 0); + FLAC__ASSERT(data_len > 0 || total_error_4 == 0); + + residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0); + + return order; +} + +#endif /* FLAC__SSE2_SUPPORTED */ +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ +#endif /* FLAC__NO_ASM */ +#endif /* FLAC__INTEGER_ONLY_LIBRARY */ diff --git a/libFLAC/fixed_intrin_ssse3.c b/libFLAC/fixed_intrin_ssse3.c new file mode 100644 index 00000000..f4d93e8f --- /dev/null +++ b/libFLAC/fixed_intrin_ssse3.c @@ -0,0 +1,243 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "private/cpu.h" + +#ifndef FLAC__INTEGER_ONLY_LIBRARY +#ifndef FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN +#include "private/fixed.h" +#ifdef FLAC__SSSE3_SUPPORTED + +#include <tmmintrin.h> /* SSSE3 */ +#include <math.h> +#include "private/macros.h" +#include "share/compat.h" +#include "FLAC/assert.h" + +#ifdef FLAC__CPU_IA32 +#define m128i_to_i64(dest, src) _mm_storel_epi64((__m128i*)&dest, src) +#else +#define m128i_to_i64(dest, src) dest = _mm_cvtsi128_si64(src) +#endif + +FLAC__SSE_TARGET("ssse3") +unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]) +{ + FLAC__uint32 total_error_0, total_error_1, total_error_2, total_error_3, total_error_4; + unsigned i, order; + + __m128i total_err0, total_err1, total_err2; + + { + FLAC__int32 itmp; + __m128i last_error; + + last_error = _mm_cvtsi32_si128(data[-1]); // 0 0 0 le0 + itmp = data[-2]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 0 le0 le1 + itmp -= data[-3]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 le0 le1 le2 + itmp -= data[-3] - data[-4]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // le0 le1 le2 le3 + + total_err0 = total_err1 = _mm_setzero_si128(); + for(i = 0; i < data_len; i++) { + __m128i err0, err1; + err0 = _mm_cvtsi32_si128(data[i]); // 0 0 0 e0 + err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(0,0,0,0)); // e0 e0 e0 e0 +#if 1 /* OPT_SSE */ + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 le0 le1 le2 + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 0 le0 le1 + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 0 0 le0 + err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 +#else + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 8)); // le0 le1 le2+le0 le3+le1 + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 4)); // le0 le1+le0 le2+le0+le1 le3+le1+le2+le0 + err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 +#endif + last_error = _mm_alignr_epi8(err0, err1, 4); // e0 e1 e2 e3 + + err0 = _mm_abs_epi32(err0); + err1 = _mm_abs_epi32(err1); + + total_err0 = _mm_add_epi32(total_err0, err0); // 0 0 0 te0 + total_err1 = _mm_add_epi32(total_err1, err1); // te1 te2 te3 te4 + } + } + + total_error_0 = _mm_cvtsi128_si32(total_err0); + total_err2 = total_err1; // te1 te2 te3 te4 + total_err1 = _mm_srli_si128(total_err1, 8); // 0 0 te1 te2 + total_error_4 = _mm_cvtsi128_si32(total_err2); + total_error_2 = _mm_cvtsi128_si32(total_err1); + total_err2 = _mm_srli_si128(total_err2, 4); // 0 te1 te2 te3 + total_err1 = _mm_srli_si128(total_err1, 4); // 0 0 0 te1 + total_error_3 = _mm_cvtsi128_si32(total_err2); + total_error_1 = _mm_cvtsi128_si32(total_err1); + + /* prefer higher order */ + if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4)) + order = 0; + else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4)) + order = 1; + else if(total_error_2 < flac_min(total_error_3, total_error_4)) + order = 2; + else if(total_error_3 < total_error_4) + order = 3; + else + order = 4; + + /* Estimate the expected number of bits per residual signal sample. */ + /* 'total_error*' is linearly related to the variance of the residual */ + /* signal, so we use it directly to compute E(|x|) */ + FLAC__ASSERT(data_len > 0 || total_error_0 == 0); + FLAC__ASSERT(data_len > 0 || total_error_1 == 0); + FLAC__ASSERT(data_len > 0 || total_error_2 == 0); + FLAC__ASSERT(data_len > 0 || total_error_3 == 0); + FLAC__ASSERT(data_len > 0 || total_error_4 == 0); + + residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0); + + return order; +} + +FLAC__SSE_TARGET("ssse3") +unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]) +{ + FLAC__uint64 total_error_0, total_error_1, total_error_2, total_error_3, total_error_4; + unsigned i, order; + + __m128i total_err0, total_err1, total_err3; + + { + FLAC__int32 itmp; + __m128i last_error, zero = _mm_setzero_si128(); + + last_error = _mm_cvtsi32_si128(data[-1]); // 0 0 0 le0 + itmp = data[-2]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 0 le0 le1 + itmp -= data[-3]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 le0 le1 le2 + itmp -= data[-3] - data[-4]; + last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)); + last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // le0 le1 le2 le3 + + total_err0 = total_err1 = total_err3 = _mm_setzero_si128(); + for(i = 0; i < data_len; i++) { + __m128i err0, err1; + err0 = _mm_cvtsi32_si128(data[i]); // 0 0 0 e0 + err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(0,0,0,0)); // e0 e0 e0 e0 +#if 1 /* OPT_SSE */ + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 le0 le1 le2 + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 0 le0 le1 + err1 = _mm_sub_epi32(err1, last_error); + last_error = _mm_srli_si128(last_error, 4); // 0 0 0 le0 + err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 +#else + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 8)); // le0 le1 le2+le0 le3+le1 + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 4)); // le0 le1+le0 le2+le0+le1 le3+le1+le2+le0 + err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 +#endif + last_error = _mm_alignr_epi8(err0, err1, 4); // e0 e1 e2 e3 + + err0 = _mm_abs_epi32(err0); + err1 = _mm_abs_epi32(err1); // |e1| |e2| |e3| |e4| + + total_err0 = _mm_add_epi64(total_err0, err0); // 0 te0 + err0 = _mm_unpacklo_epi32(err1, zero); // 0 |e3| 0 |e4| + err1 = _mm_unpackhi_epi32(err1, zero); // 0 |e1| 0 |e2| + total_err3 = _mm_add_epi64(total_err3, err0); // te3 te4 + total_err1 = _mm_add_epi64(total_err1, err1); // te1 te2 + } + } + + m128i_to_i64(total_error_0, total_err0); + m128i_to_i64(total_error_4, total_err3); + m128i_to_i64(total_error_2, total_err1); + total_err3 = _mm_srli_si128(total_err3, 8); // 0 te3 + total_err1 = _mm_srli_si128(total_err1, 8); // 0 te1 + m128i_to_i64(total_error_3, total_err3); + m128i_to_i64(total_error_1, total_err1); + + /* prefer higher order */ + if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4)) + order = 0; + else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4)) + order = 1; + else if(total_error_2 < flac_min(total_error_3, total_error_4)) + order = 2; + else if(total_error_3 < total_error_4) + order = 3; + else + order = 4; + + /* Estimate the expected number of bits per residual signal sample. */ + /* 'total_error*' is linearly related to the variance of the residual */ + /* signal, so we use it directly to compute E(|x|) */ + FLAC__ASSERT(data_len > 0 || total_error_0 == 0); + FLAC__ASSERT(data_len > 0 || total_error_1 == 0); + FLAC__ASSERT(data_len > 0 || total_error_2 == 0); + FLAC__ASSERT(data_len > 0 || total_error_3 == 0); + FLAC__ASSERT(data_len > 0 || total_error_4 == 0); + + residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0); + residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0); + + return order; +} + +#endif /* FLAC__SSSE3_SUPPORTED */ +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ +#endif /* FLAC__NO_ASM */ +#endif /* FLAC__INTEGER_ONLY_LIBRARY */ diff --git a/libFLAC/float.c b/libFLAC/float.c index 068069f3..25d1a786 100644 --- a/libFLAC/float.c +++ b/libFLAC/float.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2004-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/format.c b/libFLAC/format.c index 0b1d10b5..214bd09a 100644 --- a/libFLAC/format.c +++ b/libFLAC/format.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -44,10 +44,10 @@ #include "private/format.h" #include "private/macros.h" -/* VERSION should come from configure */ -FLAC_API const char *FLAC__VERSION_STRING = VERSION; +/* PACKAGE_VERSION should come from configure */ +FLAC_API const char *FLAC__VERSION_STRING = PACKAGE_VERSION; -FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC " VERSION " 20141125"; +FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC " PACKAGE_VERSION " 20170101"; FLAC_API const FLAC__byte FLAC__STREAM_SYNC_STRING[4] = { 'f','L','a','C' }; FLAC_API const unsigned FLAC__STREAM_SYNC = 0x664C6143; diff --git a/libFLAC/include/private/all.h b/libFLAC/include/private/all.h new file mode 100644 index 00000000..77159620 --- /dev/null +++ b/libFLAC/include/private/all.h @@ -0,0 +1,50 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FLAC__PRIVATE__ALL_H +#define FLAC__PRIVATE__ALL_H + +#include "bitmath.h" +#include "bitreader.h" +#include "bitwriter.h" +#include "cpu.h" +#include "crc.h" +#include "fixed.h" +#include "float.h" +#include "format.h" +#include "lpc.h" +#include "md5.h" +#include "memory.h" +#include "metadata.h" +#include "stream_encoder_framing.h" + +#endif diff --git a/libFLAC/include/private/bitmath.h b/libFLAC/include/private/bitmath.h index 22d894f8..9c75f85b 100644 --- a/libFLAC/include/private/bitmath.h +++ b/libFLAC/include/private/bitmath.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2001-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -36,69 +36,98 @@ #include "FLAC/ordinals.h" #include "FLAC/assert.h" -/* for CHAR_BIT */ -#include <limits.h> #include "share/compat.h" -#if defined(_MSC_VER) && (_MSC_VER >= 1400) +#if defined(_MSC_VER) #include <intrin.h> /* for _BitScanReverse* */ #endif /* Will never be emitted for MSVC, GCC, Intel compilers */ -static inline unsigned int FLAC__clz_soft_uint32(unsigned int word) +static inline unsigned int FLAC__clz_soft_uint32(FLAC__uint32 word) { - static const unsigned char byte_to_unary_table[] = { - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - - return (word) > 0xffffff ? byte_to_unary_table[(word) >> 24] : - (word) > 0xffff ? byte_to_unary_table[(word) >> 16] + 8 : - (word) > 0xff ? byte_to_unary_table[(word) >> 8] + 16 : - byte_to_unary_table[(word)] + 24; + static const unsigned char byte_to_unary_table[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + return word > 0xffffff ? byte_to_unary_table[word >> 24] : + word > 0xffff ? byte_to_unary_table[word >> 16] + 8 : + word > 0xff ? byte_to_unary_table[word >> 8] + 16 : + byte_to_unary_table[word] + 24; } static inline unsigned int FLAC__clz_uint32(FLAC__uint32 v) { /* Never used with input 0 */ - FLAC__ASSERT(v > 0); + FLAC__ASSERT(v > 0); #if defined(__INTEL_COMPILER) - return _bit_scan_reverse(v) ^ 31U; + return _bit_scan_reverse(v) ^ 31U; #elif defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) /* This will translate either to (bsr ^ 31U), clz , ctlz, cntlz, lzcnt depending on * -march= setting or to a software routine in exotic machines. */ - return __builtin_clz(v); -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) - { - unsigned long idx; - _BitScanReverse(&idx, v); - return idx ^ 31U; - } + return __builtin_clz(v); +#elif defined(_MSC_VER) + { + unsigned long idx; + _BitScanReverse(&idx, v); + return idx ^ 31U; + } +#else + return FLAC__clz_soft_uint32(v); +#endif +} + +/* Used when 64-bit bsr/clz is unavailable; can use 32-bit bsr/clz when possible */ +static inline unsigned int FLAC__clz_soft_uint64(FLAC__uint64 word) +{ + return (FLAC__uint32)(word>>32) ? FLAC__clz_uint32((FLAC__uint32)(word>>32)) : + FLAC__clz_uint32((FLAC__uint32)word) + 32; +} + +static inline unsigned int FLAC__clz_uint64(FLAC__uint64 v) +{ + /* Never used with input 0 */ + FLAC__ASSERT(v > 0); +#if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) + return __builtin_clzll(v); +#elif (defined(__INTEL_COMPILER) || defined(_MSC_VER)) && (defined(_M_IA64) || defined(_M_X64)) + { + unsigned long idx; + _BitScanReverse64(&idx, v); + return idx ^ 63U; + } #else - return FLAC__clz_soft_uint32(v); + return FLAC__clz_soft_uint64(v); #endif } -/* This one works with input 0 */ +/* These two functions work with input 0 */ static inline unsigned int FLAC__clz2_uint32(FLAC__uint32 v) { - if (!v) - return 32; - return FLAC__clz_uint32(v); + if (!v) + return 32; + return FLAC__clz_uint32(v); +} + +static inline unsigned int FLAC__clz2_uint64(FLAC__uint64 v) +{ + if (!v) + return 64; + return FLAC__clz_uint64(v); } /* An example of what FLAC__bitmath_ilog2() computes: @@ -126,61 +155,56 @@ static inline unsigned int FLAC__clz2_uint32(FLAC__uint32 v) static inline unsigned FLAC__bitmath_ilog2(FLAC__uint32 v) { - FLAC__ASSERT(v > 0); + FLAC__ASSERT(v > 0); #if defined(__INTEL_COMPILER) - return _bit_scan_reverse(v); -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) - { - unsigned long idx; - _BitScanReverse(&idx, v); - return idx; - } + return _bit_scan_reverse(v); +#elif defined(_MSC_VER) + { + unsigned long idx; + _BitScanReverse(&idx, v); + return idx; + } #else - return sizeof(FLAC__uint32) * CHAR_BIT - 1 - FLAC__clz_uint32(v); + return FLAC__clz_uint32(v) ^ 31U; #endif } - -#ifdef FLAC__INTEGER_ONLY_LIBRARY /* Unused otherwise */ - static inline unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v) { - FLAC__ASSERT(v > 0); + FLAC__ASSERT(v > 0); #if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) - return sizeof(FLAC__uint64) * CHAR_BIT - 1 - __builtin_clzll(v); + return __builtin_clzll(v) ^ 63U; /* Sorry, only supported in x64/Itanium.. and both have fast FPU which makes integer-only encoder pointless */ -#elif (defined(_MSC_VER) && (_MSC_VER >= 1400)) && (defined(_M_IA64) || defined(_M_X64)) - { - unsigned long idx; - _BitScanReverse64(&idx, v); - return idx; - } +#elif (defined(__INTEL_COMPILER) || defined(_MSC_VER)) && (defined(_M_IA64) || defined(_M_X64)) + { + unsigned long idx; + _BitScanReverse64(&idx, v); + return idx; + } #else /* Brain-damaged compilers will use the fastest possible way that is, - de Bruijn sequences (http://supertech.csail.mit.edu/papers/debruijn.pdf) - (C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 CC0 (Public domain). + de Bruijn sequences (http://supertech.csail.mit.edu/papers/debruijn.pdf) + (C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 CC0 (Public domain). */ - { - static const unsigned char DEBRUIJN_IDX64[64]={ - 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40, - 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57, - 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56, - 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58 - }; - v|= v>>1; - v|= v>>2; - v|= v>>4; - v|= v>>8; - v|= v>>16; - v|= v>>32; - v= (v>>1)+1; - return DEBRUIJN_IDX64[v*0x218A392CD3D5DBF>>58&0x3F]; - } + { + static const unsigned char DEBRUIJN_IDX64[64]={ + 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40, + 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57, + 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56, + 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58 + }; + v|= v>>1; + v|= v>>2; + v|= v>>4; + v|= v>>8; + v|= v>>16; + v|= v>>32; + v= (v>>1)+1; + return DEBRUIJN_IDX64[v*FLAC__U64L(0x218A392CD3D5DBF)>>58&0x3F]; + } #endif } -#endif -unsigned FLAC__bitmath_silog2(int v); -unsigned FLAC__bitmath_silog2_wide(FLAC__int64 v); +unsigned FLAC__bitmath_silog2(FLAC__int64 v); #endif diff --git a/libFLAC/include/private/bitreader.h b/libFLAC/include/private/bitreader.h index 4dea8d03..7c731655 100644 --- a/libFLAC/include/private/bitreader.h +++ b/libFLAC/include/private/bitreader.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/bitwriter.h b/libFLAC/include/private/bitwriter.h index 3b1362d8..ef3ad1b6 100644 --- a/libFLAC/include/private/bitwriter.h +++ b/libFLAC/include/private/bitwriter.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/cpu.h b/libFLAC/include/private/cpu.h index 380f4f07..7c651807 100644 --- a/libFLAC/include/private/cpu.h +++ b/libFLAC/include/private/cpu.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2001-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -39,7 +39,24 @@ #include <config.h> #endif -#if defined FLAC__HAS_X86INTRIN +#ifndef FLAC__CPU_X86_64 + +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +#define FLAC__CPU_X86_64 +#endif + +#endif + +#ifndef FLAC__CPU_IA32 + +#if defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) ||defined( __i386) || defined(_M_IX86) +#define FLAC__CPU_IA32 +#endif + +#endif + + +#if FLAC__HAS_X86INTRIN /* SSE intrinsics support by ICC/MSVC/GCC */ #if defined __INTEL_COMPILER #define FLAC__SSE_TARGET(x) @@ -78,9 +95,11 @@ #define FLAC__SSE2_SUPPORTED 1 #define FLAC__SSSE3_SUPPORTED 1 #define FLAC__SSE4_1_SUPPORTED 1 +#ifdef FLAC__USE_AVX #define FLAC__AVX_SUPPORTED 1 #define FLAC__AVX2_SUPPORTED 1 #define FLAC__FMA_SUPPORTED 1 +#endif #else /* for GCC older than 4.9 */ #define FLAC__SSE_TARGET(x) #ifdef __SSE__ @@ -108,13 +127,17 @@ #endif /* compiler version */ #endif /* intrinsics support */ + +#ifndef FLAC__AVX_SUPPORTED +#define FLAC__AVX_SUPPORTED 0 +#endif + typedef enum { FLAC__CPUINFO_TYPE_IA32, FLAC__CPUINFO_TYPE_X86_64, FLAC__CPUINFO_TYPE_UNKNOWN } FLAC__CPUInfo_Type; -#if defined FLAC__CPU_IA32 typedef struct { FLAC__bool intel; @@ -131,7 +154,7 @@ typedef struct { FLAC__bool avx2; FLAC__bool fma; } FLAC__CPUInfo_IA32; -#elif defined FLAC__CPU_X86_64 + typedef struct { FLAC__bool intel; @@ -143,30 +166,21 @@ typedef struct { FLAC__bool avx2; FLAC__bool fma; } FLAC__CPUInfo_x86; -#endif + typedef struct { FLAC__bool use_asm; FLAC__CPUInfo_Type type; -#if defined FLAC__CPU_IA32 FLAC__CPUInfo_IA32 ia32; -#elif defined FLAC__CPU_X86_64 FLAC__CPUInfo_x86 x86; -#endif } FLAC__CPUInfo; void FLAC__cpu_info(FLAC__CPUInfo *info); -#ifndef FLAC__NO_ASM -# if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32(void); + void FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx); -# endif -# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN -FLAC__uint32 FLAC__cpu_have_cpuid_x86(void); + void FLAC__cpu_info_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx); -FLAC__uint32 FLAC__cpu_xgetbv_x86(void); -# endif -#endif #endif diff --git a/libFLAC/include/private/crc.h b/libFLAC/include/private/crc.h index 29c512ce..294f60ea 100644 --- a/libFLAC/include/private/crc.h +++ b/libFLAC/include/private/crc.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/fixed.h b/libFLAC/include/private/fixed.h index dcc47151..68cdfceb 100644 --- a/libFLAC/include/private/fixed.h +++ b/libFLAC/include/private/fixed.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -54,21 +54,21 @@ * OUT residual_bits_per_sample[0,FLAC__MAX_FIXED_ORDER] */ #ifndef FLAC__INTEGER_ONLY_LIBRARY -unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); -unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); +unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); +unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); # ifndef FLAC__NO_ASM -# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN +# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN # ifdef FLAC__SSE2_SUPPORTED -unsigned FLAC__fixed_compute_best_predictor_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]); -unsigned FLAC__fixed_compute_best_predictor_wide_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]); +unsigned FLAC__fixed_compute_best_predictor_intrin_sse2(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]); +unsigned FLAC__fixed_compute_best_predictor_wide_intrin_sse2(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]); # endif # ifdef FLAC__SSSE3_SUPPORTED -unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); -unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]); +unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); +unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]); # endif # endif # if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM -unsigned FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); +unsigned FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); # endif # endif #else diff --git a/libFLAC/include/private/float.h b/libFLAC/include/private/float.h index ab264324..12ece605 100644 --- a/libFLAC/include/private/float.h +++ b/libFLAC/include/private/float.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2004-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -40,18 +40,15 @@ #include "FLAC/ordinals.h" /* - * These typedefs make it easier to ensure that integer versions of - * the library really only contain integer operations. All the code - * in libFLAC should use FLAC__float and FLAC__double in place of - * float and double, and be protected by checks of the macro + * All the code in libFLAC that uses float and double + * should be protected by checks of the macro * FLAC__INTEGER_ONLY_LIBRARY. * - * FLAC__real is the basic floating point type used in LPC analysis. */ #ifndef FLAC__INTEGER_ONLY_LIBRARY -typedef double FLAC__double; -typedef float FLAC__float; /* + * FLAC__real is the basic floating point type used in LPC analysis. + * * WATCHOUT: changing FLAC__real will change the signatures of many * functions that have assembly language equivalents and break them. */ diff --git a/libFLAC/include/private/format.h b/libFLAC/include/private/format.h index 2fd34608..5b9cfbd0 100644 --- a/libFLAC/include/private/format.h +++ b/libFLAC/include/private/format.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/lpc.h b/libFLAC/include/private/lpc.h index c4ed085c..6eb02be6 100644 --- a/libFLAC/include/private/lpc.h +++ b/libFLAC/include/private/lpc.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -79,7 +79,7 @@ void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12_old(const FLAC__real void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); # endif # endif -# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN +# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN # ifdef FLAC__SSE_SUPPORTED void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); @@ -114,7 +114,7 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new(const FLAC__real da * in lp_coeff[8][0,8], the LP coefficients for order 8 will be * in lp_coeff[7][0,7], etc. */ -void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], FLAC__double error[]); +void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], double error[]); /* * FLAC__lpc_quantize_coefficients() @@ -161,7 +161,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx(const FLAC__i void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]); # endif # endif -# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN +# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN # ifdef FLAC__SSE2_SUPPORTED void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]); void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]); @@ -205,7 +205,7 @@ void FLAC__lpc_restore_signal_asm_ia32_mmx(const FLAC__int32 residual[], unsigne void FLAC__lpc_restore_signal_wide_asm_ia32(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); # endif /* FLAC__HAS_NASM */ # endif /* FLAC__CPU_IA32 */ -# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN +# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN # ifdef FLAC__SSE2_SUPPORTED void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); # endif @@ -227,8 +227,8 @@ void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], un * IN total_samples > 0 # of samples in residual signal * RETURN expected bits per sample */ -FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample(FLAC__double lpc_error, unsigned total_samples); -FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(FLAC__double lpc_error, FLAC__double error_scale); +double FLAC__lpc_compute_expected_bits_per_residual_sample(double lpc_error, unsigned total_samples); +double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(double lpc_error, double error_scale); /* * FLAC__lpc_compute_best_order() @@ -243,7 +243,7 @@ FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scal * (includes warmup sample size and quantized LP coefficient) * RETURN [1,max_order] best order */ -unsigned FLAC__lpc_compute_best_order(const FLAC__double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order); +unsigned FLAC__lpc_compute_best_order(const double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order); #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */ diff --git a/libFLAC/include/private/macros.h b/libFLAC/include/private/macros.h index fd48a7fa..becc59f9 100644 --- a/libFLAC/include/private/macros.h +++ b/libFLAC/include/private/macros.h @@ -1,5 +1,5 @@ /* libFLAC - Free Lossless Audio Codec library - * Copyright (C) 2012-2014 Xiph.org Foundation + * Copyright (C) 2012-2016 Xiph.org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -32,19 +32,19 @@ #ifndef FLAC__PRIVATE__MACROS_H #define FLAC__PRIVATE__MACROS_H -#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 3))) +#if defined(__GNUC__) && (__GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 3)) #define flac_max(a,b) \ - ({ __typeof__ (a) _a = (a); \ - __typeof__ (b) _b = (b); \ - _a > _b ? _a : _b; }) + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; }) #define MIN_PASTE(A,B) A##B #define MIN_IMPL(A,B,L) ({ \ - __typeof__(A) MIN_PASTE(__a,L) = (A); \ - __typeof__(B) MIN_PASTE(__b,L) = (B); \ - MIN_PASTE(__a,L) < MIN_PASTE(__b,L) ? MIN_PASTE(__a,L) : MIN_PASTE(__b,L); \ - }) + __typeof__(A) MIN_PASTE(__a,L) = (A); \ + __typeof__(B) MIN_PASTE(__b,L) = (B); \ + MIN_PASTE(__a,L) < MIN_PASTE(__b,L) ? MIN_PASTE(__a,L) : MIN_PASTE(__b,L); \ + }) #define flac_min(A,B) MIN_IMPL(A,B,__COUNTER__) diff --git a/libFLAC/include/private/memory.h b/libFLAC/include/private/memory.h index c9f27129..f103c531 100644 --- a/libFLAC/include/private/memory.h +++ b/libFLAC/include/private/memory.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2001-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/metadata.h b/libFLAC/include/private/metadata.h index 092764c2..161947fd 100644 --- a/libFLAC/include/private/metadata.h +++ b/libFLAC/include/private/metadata.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2002-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/ogg_decoder_aspect.h b/libFLAC/include/private/ogg_decoder_aspect.h index 439bf8e4..218f44ed 100644 --- a/libFLAC/include/private/ogg_decoder_aspect.h +++ b/libFLAC/include/private/ogg_decoder_aspect.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec * Copyright (C) 2002-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/ogg_encoder_aspect.h b/libFLAC/include/private/ogg_encoder_aspect.h index 709597d4..f55ef322 100644 --- a/libFLAC/include/private/ogg_encoder_aspect.h +++ b/libFLAC/include/private/ogg_encoder_aspect.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec * Copyright (C) 2002-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/ogg_helper.h b/libFLAC/include/private/ogg_helper.h index 87b96ef8..4c1000c8 100644 --- a/libFLAC/include/private/ogg_helper.h +++ b/libFLAC/include/private/ogg_helper.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec * Copyright (C) 2004-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/ogg_mapping.h b/libFLAC/include/private/ogg_mapping.h index d35a46cd..1fa022d0 100644 --- a/libFLAC/include/private/ogg_mapping.h +++ b/libFLAC/include/private/ogg_mapping.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec * Copyright (C) 2004-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/stream_encoder.h b/libFLAC/include/private/stream_encoder.h index 96d31358..ab1721f6 100644 --- a/libFLAC/include/private/stream_encoder.h +++ b/libFLAC/include/private/stream_encoder.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/stream_encoder_framing.h b/libFLAC/include/private/stream_encoder_framing.h index 2b7387a7..f633a9d3 100644 --- a/libFLAC/include/private/stream_encoder_framing.h +++ b/libFLAC/include/private/stream_encoder_framing.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/private/window.h b/libFLAC/include/private/window.h index 52c92621..bfed7740 100644 --- a/libFLAC/include/private/window.h +++ b/libFLAC/include/private/window.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2006-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/protected/all.h b/libFLAC/include/protected/all.h new file mode 100644 index 00000000..9468bd3b --- /dev/null +++ b/libFLAC/include/protected/all.h @@ -0,0 +1,39 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2001-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FLAC__PROTECTED__ALL_H +#define FLAC__PROTECTED__ALL_H + +#include "stream_decoder.h" +#include "stream_encoder.h" + +#endif diff --git a/libFLAC/include/protected/stream_decoder.h b/libFLAC/include/protected/stream_decoder.h index 7be95afd..5c31c161 100644 --- a/libFLAC/include/protected/stream_decoder.h +++ b/libFLAC/include/protected/stream_decoder.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/include/protected/stream_encoder.h b/libFLAC/include/protected/stream_encoder.h index 6917f5d4..8850c6bc 100644 --- a/libFLAC/include/protected/stream_encoder.h +++ b/libFLAC/include/protected/stream_encoder.h @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2001-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/libFLAC.m4 b/libFLAC/libFLAC.m4 new file mode 100644 index 00000000..da7354e9 --- /dev/null +++ b/libFLAC/libFLAC.m4 @@ -0,0 +1,118 @@ +# Configure paths for libFLAC +# "Inspired" by ogg.m4 + +dnl AM_PATH_LIBFLAC([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]) +dnl Test for libFLAC, and define LIBFLAC_CFLAGS, LIBFLAC_LIBS, LIBFLAC_LIBDIR +dnl +AC_DEFUN([AM_PATH_LIBFLAC], +[dnl +dnl Get the cflags and libraries +dnl +AC_ARG_WITH(libFLAC,[ --with-libFLAC=PFX Prefix where libFLAC is installed (optional)], libFLAC_prefix="$withval", libFLAC_prefix="") +AC_ARG_WITH(libFLAC-libraries,[ --with-libFLAC-libraries=DIR Directory where libFLAC library is installed (optional)], libFLAC_libraries="$withval", libFLAC_libraries="") +AC_ARG_WITH(libFLAC-includes,[ --with-libFLAC-includes=DIR Directory where libFLAC header files are installed (optional)], libFLAC_includes="$withval", libFLAC_includes="") +AC_ARG_ENABLE(libFLACtest, [ --disable-libFLACtest Do not try to compile and run a test libFLAC program],, enable_libFLACtest=yes) + + if test "x$libFLAC_libraries" != "x" ; then + LIBFLAC_LIBS="-L$libFLAC_libraries" + elif test "x$libFLAC_prefix" = "xno" || test "x$libFLAC_prefix" = "xyes" ; then + LIBFLAC_LIBS="" + elif test "x$libFLAC_prefix" != "x" ; then + LIBFLAC_LIBS="-L$libFLAC_prefix/lib" + elif test "x$prefix" != "xNONE"; then + LIBFLAC_LIBS="-L$prefix/lib" + fi + + if test "x$libFLAC_prefix" != "xno" ; then + LIBFLAC_LIBS="$LIBFLAC_LIBS -lFLAC $OGG_LIBS -lm" + fi + + if test "x$libFLAC_includes" != "x" ; then + LIBFLAC_CFLAGS="-I$libFLAC_includes" + elif test "x$libFLAC_prefix" != "x" ; then + LIBFLAC_CFLAGS="-I$libFLAC_prefix/include" + elif test "$prefix" != "xNONE"; then + LIBFLAC_CFLAGS="" + fi + + AC_MSG_CHECKING(for libFLAC) + no_libFLAC="" + + + if test "x$enable_libFLACtest" = "xyes" ; then + ac_save_CFLAGS="$CFLAGS" + ac_save_CXXFLAGS="$CXXFLAGS" + ac_save_LIBS="$LIBS" + ac_save_LD_LIBRARY_PATH="$LD_LIBRARY_PATH" + CFLAGS="$CFLAGS $LIBFLAC_CFLAGS" + CXXFLAGS="$CXXFLAGS $LIBFLAC_CFLAGS" + LIBS="$LIBS $LIBFLAC_LIBS" + LD_LIBRARY_PATH="$LIBFLAC_LIBDIR:$LD_LIBRARY_PATH" +dnl +dnl Now check if the installed libFLAC is sufficiently new. +dnl + rm -f conf.libFLACtest + AC_TRY_RUN([ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <FLAC/format.h> + +int main () +{ + system("touch conf.libFLACtest"); + return 0; +} + +],, no_libFLAC=yes,[echo $ac_n "cross compiling; assumed OK... $ac_c"]) + CFLAGS="$ac_save_CFLAGS" + CXXFLAGS="$ac_save_CXXFLAGS" + LIBS="$ac_save_LIBS" + LD_LIBRARY_PATH="$ac_save_LD_LIBRARY_PATH" + fi + + if test "x$no_libFLAC" = "x" ; then + AC_MSG_RESULT(yes) + ifelse([$1], , :, [$1]) + else + AC_MSG_RESULT(no) + if test -f conf.libFLACtest ; then + : + else + echo "*** Could not run libFLAC test program, checking why..." + CFLAGS="$CFLAGS $LIBFLAC_CFLAGS" + CXXFLAGS="$CXXFLAGS $LIBFLAC_CFLAGS" + LIBS="$LIBS $LIBFLAC_LIBS" + LD_LIBRARY_PATH="$LIBFLAC_LIBDIR:$LD_LIBRARY_PATH" + AC_TRY_LINK([ +#include <stdio.h> +#include <FLAC/format.h> +], [ return 0; ], + [ echo "*** The test program compiled, but did not run. This usually means" + echo "*** that the run-time linker is not finding libFLAC or finding the wrong" + echo "*** version of libFLAC. If it is not finding libFLAC, you'll need to set your" + echo "*** LD_LIBRARY_PATH environment variable, or edit /etc/ld.so.conf to point" + echo "*** to the installed location Also, make sure you have run ldconfig if that" + echo "*** is required on your system" + echo "***" + echo "*** If you have an old version installed, it is best to remove it, although" + echo "*** you may also be able to get things to work by modifying LD_LIBRARY_PATH"], + [ echo "*** The test program failed to compile or link. See the file config.log for the" + echo "*** exact error that occured. This usually means libFLAC was incorrectly installed" + echo "*** or that you have moved libFLAC since it was installed. In the latter case, you" + echo "*** may want to edit the libFLAC-config script: $LIBFLAC_CONFIG" ]) + CFLAGS="$ac_save_CFLAGS" + CXXFLAGS="$ac_save_CXXFLAGS" + LIBS="$ac_save_LIBS" + LD_LIBRARY_PATH="$ac_save_LD_LIBRARY_PATH" + fi + LIBFLAC_CFLAGS="" + LIBFLAC_LIBDIR="" + LIBFLAC_LIBS="" + ifelse([$2], , :, [$2]) + fi + AC_SUBST(LIBFLAC_CFLAGS) + AC_SUBST(LIBFLAC_LIBDIR) + AC_SUBST(LIBFLAC_LIBS) + rm -f conf.libFLACtest +]) diff --git a/libFLAC/lpc.c b/libFLAC/lpc.c index 5c6e1750..531247b5 100644 --- a/libFLAC/lpc.c +++ b/libFLAC/lpc.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -51,15 +51,14 @@ #ifndef FLAC__INTEGER_ONLY_LIBRARY -#if !defined(HAVE_LROUND) -#if defined(_MSC_VER) +#if defined(_MSC_VER) && (_MSC_VER < 1800) #include <float.h> -#define copysign _copysign -#elif defined(__GNUC__) -#define copysign __builtin_copysign -#endif static inline long int lround(double x) { - return (long)(x + copysign (0.5, x)); + return (long)(x + _copysign(0.5, x)); +} +#elif !defined(HAVE_LROUND) && defined(__GNUC__) +static inline long int lround(double x) { + return (long)(x + __builtin_copysign(0.5, x)); } /* If this fails, we are in the presence of a mid 90's compiler, move along... */ #endif @@ -120,10 +119,10 @@ void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_le } } -void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], FLAC__double error[]) +void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], double error[]) { unsigned i, j; - FLAC__double r, err, lpc[FLAC__MAX_LPC_ORDER]; + double r, err, lpc[FLAC__MAX_LPC_ORDER]; FLAC__ASSERT(0 != max_order); FLAC__ASSERT(0 < *max_order); @@ -142,7 +141,7 @@ void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_o /* Update LPC coefficients and total error. */ lpc[i]=r; for(j = 0; j < (i>>1); j++) { - FLAC__double tmp = lpc[j]; + double tmp = lpc[j]; lpc[j] += r * lpc[i-1-j]; lpc[i-1-j] += r * tmp; } @@ -167,7 +166,7 @@ void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_o int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order, unsigned precision, FLAC__int32 qlp_coeff[], int *shift) { unsigned i; - FLAC__double cmax; + double cmax; FLAC__int32 qmax, qmin; FLAC__ASSERT(precision > 0); @@ -182,7 +181,7 @@ int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order, /* calc cmax = max( |lp_coeff[i]| ) */ cmax = 0.0; for(i = 0; i < order; i++) { - const FLAC__double d = fabs(lp_coeff[i]); + const double d = fabs(lp_coeff[i]); if(d > cmax) cmax = d; } @@ -207,7 +206,7 @@ int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order, } if(*shift >= 0) { - FLAC__double error = 0.0; + double error = 0.0; FLAC__int32 q; for(i = 0; i < order; i++) { error += lp_coeff[i] * (1 << *shift); @@ -228,12 +227,12 @@ int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order, } } /* negative shift is very rare but due to design flaw, negative shift is - * a NOP in the decoder, so it must be handled specially by scaling down - * coeffs + * not allowed in the decoder, so it must be handled specially by scaling + * down coeffs */ else { const int nshift = -(*shift); - FLAC__double error = 0.0; + double error = 0.0; FLAC__int32 q; #ifdef DEBUG fprintf(stderr,"FLAC__lpc_quantize_coefficients: negative shift=%d order=%u cmax=%f\n", *shift, order, cmax); @@ -546,11 +545,11 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 * f history = data; for(j = 0; j < order; j++) sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history)); - if(FLAC__bitmath_silog2_wide(sum >> lp_quantization) > 32) { + if(FLAC__bitmath_silog2(sum >> lp_quantization) > 32) { fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, sum=%" PRId64 "\n", i, (sum >> lp_quantization)); break; } - if(FLAC__bitmath_silog2_wide((FLAC__int64)(*data) - (sum >> lp_quantization)) > 32) { + if(FLAC__bitmath_silog2((FLAC__int64)(*data) - (sum >> lp_quantization)) > 32) { fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, data=%d, sum=%" PRId64 ", residual=%" PRId64 "\n", i, *data, (int64_t)(sum >> lp_quantization), ((FLAC__int64)(*data) - (sum >> lp_quantization))); break; } @@ -1063,11 +1062,11 @@ void FLAC__lpc_restore_signal_wide(const FLAC__int32 * flac_restrict residual, u history = data; for(j = 0; j < order; j++) sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history)); - if(FLAC__bitmath_silog2_wide(sum >> lp_quantization) > 32) { + if(FLAC__bitmath_silog2(sum >> lp_quantization) > 32) { fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, sum=%" PRId64 "\n", i, (sum >> lp_quantization)); break; } - if(FLAC__bitmath_silog2_wide((FLAC__int64)(*r) + (sum >> lp_quantization)) > 32) { + if(FLAC__bitmath_silog2((FLAC__int64)(*r) + (sum >> lp_quantization)) > 32) { fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, residual=%d, sum=%" PRId64 ", data=%" PRId64 "\n", i, *r, (sum >> lp_quantization), ((FLAC__int64)(*r) + (sum >> lp_quantization))); break; } @@ -1303,21 +1302,21 @@ void FLAC__lpc_restore_signal_wide(const FLAC__int32 * flac_restrict residual, u #ifndef FLAC__INTEGER_ONLY_LIBRARY -FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample(FLAC__double lpc_error, unsigned total_samples) +double FLAC__lpc_compute_expected_bits_per_residual_sample(double lpc_error, unsigned total_samples) { - FLAC__double error_scale; + double error_scale; FLAC__ASSERT(total_samples > 0); - error_scale = 0.5 / (FLAC__double)total_samples; + error_scale = 0.5 / (double)total_samples; return FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error, error_scale); } -FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(FLAC__double lpc_error, FLAC__double error_scale) +double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(double lpc_error, double error_scale) { if(lpc_error > 0.0) { - FLAC__double bps = (FLAC__double)0.5 * log(error_scale * lpc_error) / M_LN2; + double bps = (double)0.5 * log(error_scale * lpc_error) / M_LN2; if(bps >= 0.0) return bps; else @@ -1331,21 +1330,21 @@ FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scal } } -unsigned FLAC__lpc_compute_best_order(const FLAC__double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order) +unsigned FLAC__lpc_compute_best_order(const double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order) { unsigned order, indx, best_index; /* 'index' the index into lpc_error; index==order-1 since lpc_error[0] is for order==1, lpc_error[1] is for order==2, etc */ - FLAC__double bits, best_bits, error_scale; + double bits, best_bits, error_scale; FLAC__ASSERT(max_order > 0); FLAC__ASSERT(total_samples > 0); - error_scale = 0.5 / (FLAC__double)total_samples; + error_scale = 0.5 / (double)total_samples; best_index = 0; best_bits = (unsigned)(-1); for(indx = 0, order = 1; indx < max_order; indx++, order++) { - bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[indx], error_scale) * (FLAC__double)(total_samples - order) + (FLAC__double)(order * overhead_bits_per_order); + bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[indx], error_scale) * (double)(total_samples - order) + (double)(order * overhead_bits_per_order); if(bits < best_bits) { best_index = indx; best_bits = bits; diff --git a/libFLAC/lpc_intrin_avx2.c b/libFLAC/lpc_intrin_avx2.c new file mode 100644 index 00000000..f9f5ccdb --- /dev/null +++ b/libFLAC/lpc_intrin_avx2.c @@ -0,0 +1,1122 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "private/cpu.h" + +#ifndef FLAC__INTEGER_ONLY_LIBRARY +#ifndef FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN +#include "private/lpc.h" +#ifdef FLAC__AVX2_SUPPORTED + +#include "FLAC/assert.h" +#include "FLAC/format.h" + +#include <immintrin.h> /* AVX2 */ + +FLAC__SSE_TARGET("avx2") +void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]) +{ + int i; + FLAC__int32 sum; + __m128i cnt = _mm_cvtsi32_si128(lp_quantization); + + FLAC__ASSERT(order > 0); + FLAC__ASSERT(order <= 32); + + if(order <= 12) { + if(order > 8) { + if(order > 10) { + if(order == 12) { + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(0xffff & qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(0xffff & qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(0xffff & qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(0xffff & qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(0xffff & qlp_coeff[6 ]); + q7 = _mm256_set1_epi32(0xffff & qlp_coeff[7 ]); + q8 = _mm256_set1_epi32(0xffff & qlp_coeff[8 ]); + q9 = _mm256_set1_epi32(0xffff & qlp_coeff[9 ]); + q10 = _mm256_set1_epi32(0xffff & qlp_coeff[10]); + q11 = _mm256_set1_epi32(0xffff & qlp_coeff[11]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q11, _mm256_loadu_si256((const __m256i*)(data+i-12))); + mull = _mm256_madd_epi16(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 11 */ + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(0xffff & qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(0xffff & qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(0xffff & qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(0xffff & qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(0xffff & qlp_coeff[6 ]); + q7 = _mm256_set1_epi32(0xffff & qlp_coeff[7 ]); + q8 = _mm256_set1_epi32(0xffff & qlp_coeff[8 ]); + q9 = _mm256_set1_epi32(0xffff & qlp_coeff[9 ]); + q10 = _mm256_set1_epi32(0xffff & qlp_coeff[10]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); + mull = _mm256_madd_epi16(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + else { + if(order == 10) { + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(0xffff & qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(0xffff & qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(0xffff & qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(0xffff & qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(0xffff & qlp_coeff[6 ]); + q7 = _mm256_set1_epi32(0xffff & qlp_coeff[7 ]); + q8 = _mm256_set1_epi32(0xffff & qlp_coeff[8 ]); + q9 = _mm256_set1_epi32(0xffff & qlp_coeff[9 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); + mull = _mm256_madd_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 9 */ + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(0xffff & qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(0xffff & qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(0xffff & qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(0xffff & qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(0xffff & qlp_coeff[6 ]); + q7 = _mm256_set1_epi32(0xffff & qlp_coeff[7 ]); + q8 = _mm256_set1_epi32(0xffff & qlp_coeff[8 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); + mull = _mm256_madd_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + } + else if(order > 4) { + if(order > 6) { + if(order == 8) { + __m256i q0, q1, q2, q3, q4, q5, q6, q7; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(0xffff & qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(0xffff & qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(0xffff & qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(0xffff & qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(0xffff & qlp_coeff[6 ]); + q7 = _mm256_set1_epi32(0xffff & qlp_coeff[7 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); + mull = _mm256_madd_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 7 */ + __m256i q0, q1, q2, q3, q4, q5, q6; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(0xffff & qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(0xffff & qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(0xffff & qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(0xffff & qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(0xffff & qlp_coeff[6 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); + mull = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + else { + if(order == 6) { + __m256i q0, q1, q2, q3, q4, q5; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(0xffff & qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(0xffff & qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(0xffff & qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(0xffff & qlp_coeff[5 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); + mull = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 5 */ + __m256i q0, q1, q2, q3, q4; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(0xffff & qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(0xffff & qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(0xffff & qlp_coeff[4 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); + mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + } + else { + if(order > 2) { + if(order == 4) { + __m256i q0, q1, q2, q3; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(0xffff & qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(0xffff & qlp_coeff[3 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); + mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 3 */ + __m256i q0, q1, q2; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(0xffff & qlp_coeff[2 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); + mull = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + else { + if(order == 2) { + __m256i q0, q1; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(0xffff & qlp_coeff[1 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); + mull = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 1 */ + __m256i q0; + q0 = _mm256_set1_epi32(0xffff & qlp_coeff[0 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ; + summ = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + } + for(; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 12: sum += qlp_coeff[11] * data[i-12]; + case 11: sum += qlp_coeff[10] * data[i-11]; + case 10: sum += qlp_coeff[ 9] * data[i-10]; + case 9: sum += qlp_coeff[ 8] * data[i- 9]; + case 8: sum += qlp_coeff[ 7] * data[i- 8]; + case 7: sum += qlp_coeff[ 6] * data[i- 7]; + case 6: sum += qlp_coeff[ 5] * data[i- 6]; + case 5: sum += qlp_coeff[ 4] * data[i- 5]; + case 4: sum += qlp_coeff[ 3] * data[i- 4]; + case 3: sum += qlp_coeff[ 2] * data[i- 3]; + case 2: sum += qlp_coeff[ 1] * data[i- 2]; + case 1: sum += qlp_coeff[ 0] * data[i- 1]; + } + residual[i] = data[i] - (sum >> lp_quantization); + } + } + else { /* order > 12 */ + for(i = 0; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 32: sum += qlp_coeff[31] * data[i-32]; + case 31: sum += qlp_coeff[30] * data[i-31]; + case 30: sum += qlp_coeff[29] * data[i-30]; + case 29: sum += qlp_coeff[28] * data[i-29]; + case 28: sum += qlp_coeff[27] * data[i-28]; + case 27: sum += qlp_coeff[26] * data[i-27]; + case 26: sum += qlp_coeff[25] * data[i-26]; + case 25: sum += qlp_coeff[24] * data[i-25]; + case 24: sum += qlp_coeff[23] * data[i-24]; + case 23: sum += qlp_coeff[22] * data[i-23]; + case 22: sum += qlp_coeff[21] * data[i-22]; + case 21: sum += qlp_coeff[20] * data[i-21]; + case 20: sum += qlp_coeff[19] * data[i-20]; + case 19: sum += qlp_coeff[18] * data[i-19]; + case 18: sum += qlp_coeff[17] * data[i-18]; + case 17: sum += qlp_coeff[16] * data[i-17]; + case 16: sum += qlp_coeff[15] * data[i-16]; + case 15: sum += qlp_coeff[14] * data[i-15]; + case 14: sum += qlp_coeff[13] * data[i-14]; + case 13: sum += qlp_coeff[12] * data[i-13]; + sum += qlp_coeff[11] * data[i-12]; + sum += qlp_coeff[10] * data[i-11]; + sum += qlp_coeff[ 9] * data[i-10]; + sum += qlp_coeff[ 8] * data[i- 9]; + sum += qlp_coeff[ 7] * data[i- 8]; + sum += qlp_coeff[ 6] * data[i- 7]; + sum += qlp_coeff[ 5] * data[i- 6]; + sum += qlp_coeff[ 4] * data[i- 5]; + sum += qlp_coeff[ 3] * data[i- 4]; + sum += qlp_coeff[ 2] * data[i- 3]; + sum += qlp_coeff[ 1] * data[i- 2]; + sum += qlp_coeff[ 0] * data[i- 1]; + } + residual[i] = data[i] - (sum >> lp_quantization); + } + } + _mm256_zeroupper(); +} + +FLAC__SSE_TARGET("avx2") +void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]) +{ + int i; + FLAC__int32 sum; + __m128i cnt = _mm_cvtsi32_si128(lp_quantization); + + FLAC__ASSERT(order > 0); + FLAC__ASSERT(order <= 32); + + if(order <= 12) { + if(order > 8) { + if(order > 10) { + if(order == 12) { + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(qlp_coeff[6 ]); + q7 = _mm256_set1_epi32(qlp_coeff[7 ]); + q8 = _mm256_set1_epi32(qlp_coeff[8 ]); + q9 = _mm256_set1_epi32(qlp_coeff[9 ]); + q10 = _mm256_set1_epi32(qlp_coeff[10]); + q11 = _mm256_set1_epi32(qlp_coeff[11]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q11, _mm256_loadu_si256((const __m256i*)(data+i-12))); + mull = _mm256_mullo_epi32(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q8, _mm256_loadu_si256((const __m256i*)(data+i-9))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q7, _mm256_loadu_si256((const __m256i*)(data+i-8))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 11 */ + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(qlp_coeff[6 ]); + q7 = _mm256_set1_epi32(qlp_coeff[7 ]); + q8 = _mm256_set1_epi32(qlp_coeff[8 ]); + q9 = _mm256_set1_epi32(qlp_coeff[9 ]); + q10 = _mm256_set1_epi32(qlp_coeff[10]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); + mull = _mm256_mullo_epi32(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q8, _mm256_loadu_si256((const __m256i*)(data+i-9))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q7, _mm256_loadu_si256((const __m256i*)(data+i-8))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + else { + if(order == 10) { + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(qlp_coeff[6 ]); + q7 = _mm256_set1_epi32(qlp_coeff[7 ]); + q8 = _mm256_set1_epi32(qlp_coeff[8 ]); + q9 = _mm256_set1_epi32(qlp_coeff[9 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); + mull = _mm256_mullo_epi32(q8, _mm256_loadu_si256((const __m256i*)(data+i-9))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q7, _mm256_loadu_si256((const __m256i*)(data+i-8))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 9 */ + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(qlp_coeff[6 ]); + q7 = _mm256_set1_epi32(qlp_coeff[7 ]); + q8 = _mm256_set1_epi32(qlp_coeff[8 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q8, _mm256_loadu_si256((const __m256i*)(data+i-9))); + mull = _mm256_mullo_epi32(q7, _mm256_loadu_si256((const __m256i*)(data+i-8))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + } + else if(order > 4) { + if(order > 6) { + if(order == 8) { + __m256i q0, q1, q2, q3, q4, q5, q6, q7; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(qlp_coeff[6 ]); + q7 = _mm256_set1_epi32(qlp_coeff[7 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q7, _mm256_loadu_si256((const __m256i*)(data+i-8))); + mull = _mm256_mullo_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 7 */ + __m256i q0, q1, q2, q3, q4, q5, q6; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(qlp_coeff[5 ]); + q6 = _mm256_set1_epi32(qlp_coeff[6 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); + mull = _mm256_mullo_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + else { + if(order == 6) { + __m256i q0, q1, q2, q3, q4, q5; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(qlp_coeff[4 ]); + q5 = _mm256_set1_epi32(qlp_coeff[5 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); + mull = _mm256_mullo_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 5 */ + __m256i q0, q1, q2, q3, q4; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(qlp_coeff[3 ]); + q4 = _mm256_set1_epi32(qlp_coeff[4 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); + mull = _mm256_mullo_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + } + else { + if(order > 2) { + if(order == 4) { + __m256i q0, q1, q2, q3; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(qlp_coeff[2 ]); + q3 = _mm256_set1_epi32(qlp_coeff[3 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); + mull = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 3 */ + __m256i q0, q1, q2; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + q2 = _mm256_set1_epi32(qlp_coeff[2 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); + mull = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + else { + if(order == 2) { + __m256i q0, q1; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + q1 = _mm256_set1_epi32(qlp_coeff[1 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ, mull; + summ = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); + mull = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + else { /* order == 1 */ + __m256i q0; + q0 = _mm256_set1_epi32(qlp_coeff[0 ]); + + for(i = 0; i < (int)data_len-7; i+=8) { + __m256i summ; + summ = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); + summ = _mm256_sra_epi32(summ, cnt); + _mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); + } + } + } + } + for(; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 12: sum += qlp_coeff[11] * data[i-12]; + case 11: sum += qlp_coeff[10] * data[i-11]; + case 10: sum += qlp_coeff[ 9] * data[i-10]; + case 9: sum += qlp_coeff[ 8] * data[i- 9]; + case 8: sum += qlp_coeff[ 7] * data[i- 8]; + case 7: sum += qlp_coeff[ 6] * data[i- 7]; + case 6: sum += qlp_coeff[ 5] * data[i- 6]; + case 5: sum += qlp_coeff[ 4] * data[i- 5]; + case 4: sum += qlp_coeff[ 3] * data[i- 4]; + case 3: sum += qlp_coeff[ 2] * data[i- 3]; + case 2: sum += qlp_coeff[ 1] * data[i- 2]; + case 1: sum += qlp_coeff[ 0] * data[i- 1]; + } + residual[i] = data[i] - (sum >> lp_quantization); + } + } + else { /* order > 12 */ + for(i = 0; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 32: sum += qlp_coeff[31] * data[i-32]; + case 31: sum += qlp_coeff[30] * data[i-31]; + case 30: sum += qlp_coeff[29] * data[i-30]; + case 29: sum += qlp_coeff[28] * data[i-29]; + case 28: sum += qlp_coeff[27] * data[i-28]; + case 27: sum += qlp_coeff[26] * data[i-27]; + case 26: sum += qlp_coeff[25] * data[i-26]; + case 25: sum += qlp_coeff[24] * data[i-25]; + case 24: sum += qlp_coeff[23] * data[i-24]; + case 23: sum += qlp_coeff[22] * data[i-23]; + case 22: sum += qlp_coeff[21] * data[i-22]; + case 21: sum += qlp_coeff[20] * data[i-21]; + case 20: sum += qlp_coeff[19] * data[i-20]; + case 19: sum += qlp_coeff[18] * data[i-19]; + case 18: sum += qlp_coeff[17] * data[i-18]; + case 17: sum += qlp_coeff[16] * data[i-17]; + case 16: sum += qlp_coeff[15] * data[i-16]; + case 15: sum += qlp_coeff[14] * data[i-15]; + case 14: sum += qlp_coeff[13] * data[i-14]; + case 13: sum += qlp_coeff[12] * data[i-13]; + sum += qlp_coeff[11] * data[i-12]; + sum += qlp_coeff[10] * data[i-11]; + sum += qlp_coeff[ 9] * data[i-10]; + sum += qlp_coeff[ 8] * data[i- 9]; + sum += qlp_coeff[ 7] * data[i- 8]; + sum += qlp_coeff[ 6] * data[i- 7]; + sum += qlp_coeff[ 5] * data[i- 6]; + sum += qlp_coeff[ 4] * data[i- 5]; + sum += qlp_coeff[ 3] * data[i- 4]; + sum += qlp_coeff[ 2] * data[i- 3]; + sum += qlp_coeff[ 1] * data[i- 2]; + sum += qlp_coeff[ 0] * data[i- 1]; + } + residual[i] = data[i] - (sum >> lp_quantization); + } + } + _mm256_zeroupper(); +} + +static FLAC__int32 pack_arr[8] = { 0, 2, 4, 6, 1, 3, 5, 7 }; + +FLAC__SSE_TARGET("avx2") +void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]) +{ + int i; + FLAC__int64 sum; + __m128i cnt = _mm_cvtsi32_si128(lp_quantization); + __m256i pack = _mm256_loadu_si256((const __m256i *)pack_arr); + + FLAC__ASSERT(order > 0); + FLAC__ASSERT(order <= 32); + FLAC__ASSERT(lp_quantization <= 32); /* there's no _mm256_sra_epi64() so we have to use _mm256_srl_epi64() */ + + if(order <= 12) { + if(order > 8) { + if(order > 10) { + if(order == 12) { + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + q2 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[2 ])); + q3 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[3 ])); + q4 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[4 ])); + q5 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[5 ])); + q6 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[6 ])); + q7 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[7 ])); + q8 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[8 ])); + q9 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[9 ])); + q10 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[10])); + q11 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[11])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q11, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-12)))); + mull = _mm256_mul_epi32(q10, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-11)))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q9, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-10)))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q8, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q7, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q6, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q5, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q4, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q3, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + else { /* order == 11 */ + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + q2 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[2 ])); + q3 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[3 ])); + q4 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[4 ])); + q5 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[5 ])); + q6 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[6 ])); + q7 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[7 ])); + q8 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[8 ])); + q9 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[9 ])); + q10 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[10])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q10, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-11)))); + mull = _mm256_mul_epi32(q9, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-10)))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q8, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q7, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q6, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q5, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q4, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q3, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + } + else { + if(order == 10) { + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + q2 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[2 ])); + q3 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[3 ])); + q4 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[4 ])); + q5 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[5 ])); + q6 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[6 ])); + q7 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[7 ])); + q8 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[8 ])); + q9 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[9 ])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q9, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-10)))); + mull = _mm256_mul_epi32(q8, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q7, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q6, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q5, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q4, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q3, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + else { /* order == 9 */ + __m256i q0, q1, q2, q3, q4, q5, q6, q7, q8; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + q2 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[2 ])); + q3 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[3 ])); + q4 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[4 ])); + q5 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[5 ])); + q6 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[6 ])); + q7 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[7 ])); + q8 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[8 ])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q8, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); + mull = _mm256_mul_epi32(q7, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q6, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q5, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q4, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q3, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + } + } + else if(order > 4) { + if(order > 6) { + if(order == 8) { + __m256i q0, q1, q2, q3, q4, q5, q6, q7; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + q2 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[2 ])); + q3 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[3 ])); + q4 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[4 ])); + q5 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[5 ])); + q6 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[6 ])); + q7 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[7 ])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q7, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); + mull = _mm256_mul_epi32(q6, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q5, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q4, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q3, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + else { /* order == 7 */ + __m256i q0, q1, q2, q3, q4, q5, q6; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + q2 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[2 ])); + q3 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[3 ])); + q4 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[4 ])); + q5 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[5 ])); + q6 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[6 ])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q6, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); + mull = _mm256_mul_epi32(q5, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q4, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q3, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + } + else { + if(order == 6) { + __m256i q0, q1, q2, q3, q4, q5; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + q2 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[2 ])); + q3 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[3 ])); + q4 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[4 ])); + q5 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[5 ])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q5, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); + mull = _mm256_mul_epi32(q4, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q3, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + else { /* order == 5 */ + __m256i q0, q1, q2, q3, q4; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + q2 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[2 ])); + q3 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[3 ])); + q4 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[4 ])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q4, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); + mull = _mm256_mul_epi32(q3, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + } + } + else { + if(order > 2) { + if(order == 4) { + __m256i q0, q1, q2, q3; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + q2 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[2 ])); + q3 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[3 ])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q3, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); + mull = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + else { /* order == 3 */ + __m256i q0, q1, q2; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + q2 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[2 ])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); + mull = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + } + else { + if(order == 2) { + __m256i q0, q1; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + q1 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[1 ])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ, mull; + summ = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); + mull = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + else { /* order == 1 */ + __m256i q0; + q0 = _mm256_cvtepu32_epi64(_mm_set1_epi32(qlp_coeff[0 ])); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m256i summ; + summ = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); + summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); + } + } + } + } + for(; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 12: sum += qlp_coeff[11] * (FLAC__int64)data[i-12]; + case 11: sum += qlp_coeff[10] * (FLAC__int64)data[i-11]; + case 10: sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10]; + case 9: sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9]; + case 8: sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8]; + case 7: sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7]; + case 6: sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6]; + case 5: sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5]; + case 4: sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4]; + case 3: sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3]; + case 2: sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2]; + case 1: sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1]; + } + residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization); + } + } + else { /* order > 12 */ + for(i = 0; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32]; + case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31]; + case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30]; + case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29]; + case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28]; + case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27]; + case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26]; + case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25]; + case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24]; + case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23]; + case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22]; + case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21]; + case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20]; + case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19]; + case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18]; + case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17]; + case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16]; + case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15]; + case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14]; + case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13]; + sum += qlp_coeff[11] * (FLAC__int64)data[i-12]; + sum += qlp_coeff[10] * (FLAC__int64)data[i-11]; + sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10]; + sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9]; + sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8]; + sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7]; + sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6]; + sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5]; + sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4]; + sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3]; + sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2]; + sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1]; + } + residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization); + } + } + _mm256_zeroupper(); +} + +#endif /* FLAC__AVX2_SUPPORTED */ +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ +#endif /* FLAC__NO_ASM */ +#endif /* FLAC__INTEGER_ONLY_LIBRARY */ diff --git a/libFLAC/lpc_intrin_sse.c b/libFLAC/lpc_intrin_sse.c new file mode 100644 index 00000000..430e73f0 --- /dev/null +++ b/libFLAC/lpc_intrin_sse.c @@ -0,0 +1,454 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "private/cpu.h" + +#ifndef FLAC__INTEGER_ONLY_LIBRARY +#ifndef FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN +#include "private/lpc.h" +#ifdef FLAC__SSE_SUPPORTED +#include "FLAC/assert.h" +#include "FLAC/format.h" + +#include <xmmintrin.h> /* SSE */ + +/* new routines: more unaligned loads, less shuffle + * old routines: less unaligned loads, more shuffle + * these *_old routines are equivalent to the ASM routines in ia32/lpc_asm.nasm + */ + +/* new routines: faster on current Intel (starting from Core i aka Nehalem) and all AMD CPUs */ + +FLAC__SSE_TARGET("sse") +void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]) +{ + int i; + int limit = data_len - 4; + __m128 sum0; + + (void) lag; + FLAC__ASSERT(lag <= 4); + FLAC__ASSERT(lag <= data_len); + + sum0 = _mm_setzero_ps(); + + for(i = 0; i <= limit; i++) { + __m128 d, d0; + d0 = _mm_loadu_ps(data+i); + d = d0; d = _mm_shuffle_ps(d, d, 0); + sum0 = _mm_add_ps(sum0, _mm_mul_ps(d0, d)); + } + + { + __m128 d0 = _mm_setzero_ps(); + limit++; if(limit < 0) limit = 0; + + for(i = data_len-1; i >= limit; i--) { + __m128 d; + d = _mm_load_ss(data+i); d = _mm_shuffle_ps(d, d, 0); + d0 = _mm_shuffle_ps(d0, d0, _MM_SHUFFLE(2,1,0,3)); + d0 = _mm_move_ss(d0, d); + sum0 = _mm_add_ps(sum0, _mm_mul_ps(d, d0)); + } + } + + _mm_storeu_ps(autoc, sum0); +} + +FLAC__SSE_TARGET("sse") +void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]) +{ + int i; + int limit = data_len - 8; + __m128 sum0, sum1; + + (void) lag; + FLAC__ASSERT(lag <= 8); + FLAC__ASSERT(lag <= data_len); + + sum0 = _mm_setzero_ps(); + sum1 = _mm_setzero_ps(); + + for(i = 0; i <= limit; i++) { + __m128 d, d0, d1; + d0 = _mm_loadu_ps(data+i); + d1 = _mm_loadu_ps(data+i+4); + d = d0; d = _mm_shuffle_ps(d, d, 0); + sum0 = _mm_add_ps(sum0, _mm_mul_ps(d0, d)); + sum1 = _mm_add_ps(sum1, _mm_mul_ps(d1, d)); + } + + { + __m128 d0 = _mm_setzero_ps(); + __m128 d1 = _mm_setzero_ps(); + limit++; if(limit < 0) limit = 0; + + for(i = data_len-1; i >= limit; i--) { + __m128 d; + d = _mm_load_ss(data+i); d = _mm_shuffle_ps(d, d, 0); + d1 = _mm_shuffle_ps(d1, d1, _MM_SHUFFLE(2,1,0,3)); + d0 = _mm_shuffle_ps(d0, d0, _MM_SHUFFLE(2,1,0,3)); + d1 = _mm_move_ss(d1, d0); + d0 = _mm_move_ss(d0, d); + sum1 = _mm_add_ps(sum1, _mm_mul_ps(d, d1)); + sum0 = _mm_add_ps(sum0, _mm_mul_ps(d, d0)); + } + } + + _mm_storeu_ps(autoc, sum0); + _mm_storeu_ps(autoc+4, sum1); +} + +FLAC__SSE_TARGET("sse") +void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]) +{ + int i; + int limit = data_len - 12; + __m128 sum0, sum1, sum2; + + (void) lag; + FLAC__ASSERT(lag <= 12); + FLAC__ASSERT(lag <= data_len); + + sum0 = _mm_setzero_ps(); + sum1 = _mm_setzero_ps(); + sum2 = _mm_setzero_ps(); + + for(i = 0; i <= limit; i++) { + __m128 d, d0, d1, d2; + d0 = _mm_loadu_ps(data+i); + d1 = _mm_loadu_ps(data+i+4); + d2 = _mm_loadu_ps(data+i+8); + d = d0; d = _mm_shuffle_ps(d, d, 0); + sum0 = _mm_add_ps(sum0, _mm_mul_ps(d0, d)); + sum1 = _mm_add_ps(sum1, _mm_mul_ps(d1, d)); + sum2 = _mm_add_ps(sum2, _mm_mul_ps(d2, d)); + } + + { + __m128 d0 = _mm_setzero_ps(); + __m128 d1 = _mm_setzero_ps(); + __m128 d2 = _mm_setzero_ps(); + limit++; if(limit < 0) limit = 0; + + for(i = data_len-1; i >= limit; i--) { + __m128 d; + d = _mm_load_ss(data+i); d = _mm_shuffle_ps(d, d, 0); + d2 = _mm_shuffle_ps(d2, d2, _MM_SHUFFLE(2,1,0,3)); + d1 = _mm_shuffle_ps(d1, d1, _MM_SHUFFLE(2,1,0,3)); + d0 = _mm_shuffle_ps(d0, d0, _MM_SHUFFLE(2,1,0,3)); + d2 = _mm_move_ss(d2, d1); + d1 = _mm_move_ss(d1, d0); + d0 = _mm_move_ss(d0, d); + sum2 = _mm_add_ps(sum2, _mm_mul_ps(d, d2)); + sum1 = _mm_add_ps(sum1, _mm_mul_ps(d, d1)); + sum0 = _mm_add_ps(sum0, _mm_mul_ps(d, d0)); + } + } + + _mm_storeu_ps(autoc, sum0); + _mm_storeu_ps(autoc+4, sum1); + _mm_storeu_ps(autoc+8, sum2); +} + +FLAC__SSE_TARGET("sse") +void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]) +{ + int i; + int limit = data_len - 16; + __m128 sum0, sum1, sum2, sum3; + + (void) lag; + FLAC__ASSERT(lag <= 16); + FLAC__ASSERT(lag <= data_len); + + sum0 = _mm_setzero_ps(); + sum1 = _mm_setzero_ps(); + sum2 = _mm_setzero_ps(); + sum3 = _mm_setzero_ps(); + + for(i = 0; i <= limit; i++) { + __m128 d, d0, d1, d2, d3; + d0 = _mm_loadu_ps(data+i); + d1 = _mm_loadu_ps(data+i+4); + d2 = _mm_loadu_ps(data+i+8); + d3 = _mm_loadu_ps(data+i+12); + d = d0; d = _mm_shuffle_ps(d, d, 0); + sum0 = _mm_add_ps(sum0, _mm_mul_ps(d0, d)); + sum1 = _mm_add_ps(sum1, _mm_mul_ps(d1, d)); + sum2 = _mm_add_ps(sum2, _mm_mul_ps(d2, d)); + sum3 = _mm_add_ps(sum3, _mm_mul_ps(d3, d)); + } + + { + __m128 d0 = _mm_setzero_ps(); + __m128 d1 = _mm_setzero_ps(); + __m128 d2 = _mm_setzero_ps(); + __m128 d3 = _mm_setzero_ps(); + limit++; if(limit < 0) limit = 0; + + for(i = data_len-1; i >= limit; i--) { + __m128 d; + d = _mm_load_ss(data+i); d = _mm_shuffle_ps(d, d, 0); + d3 = _mm_shuffle_ps(d3, d3, _MM_SHUFFLE(2,1,0,3)); + d2 = _mm_shuffle_ps(d2, d2, _MM_SHUFFLE(2,1,0,3)); + d1 = _mm_shuffle_ps(d1, d1, _MM_SHUFFLE(2,1,0,3)); + d0 = _mm_shuffle_ps(d0, d0, _MM_SHUFFLE(2,1,0,3)); + d3 = _mm_move_ss(d3, d2); + d2 = _mm_move_ss(d2, d1); + d1 = _mm_move_ss(d1, d0); + d0 = _mm_move_ss(d0, d); + sum3 = _mm_add_ps(sum3, _mm_mul_ps(d, d3)); + sum2 = _mm_add_ps(sum2, _mm_mul_ps(d, d2)); + sum1 = _mm_add_ps(sum1, _mm_mul_ps(d, d1)); + sum0 = _mm_add_ps(sum0, _mm_mul_ps(d, d0)); + } + } + + _mm_storeu_ps(autoc, sum0); + _mm_storeu_ps(autoc+4, sum1); + _mm_storeu_ps(autoc+8, sum2); + _mm_storeu_ps(autoc+12,sum3); +} + +/* old routines: faster on older Intel CPUs (up to Core 2) */ + +FLAC__SSE_TARGET("sse") +void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]) +{ + __m128 xmm0, xmm2, xmm5; + + (void) lag; + FLAC__ASSERT(lag > 0); + FLAC__ASSERT(lag <= 4); + FLAC__ASSERT(lag <= data_len); + FLAC__ASSERT(data_len > 0); + + xmm5 = _mm_setzero_ps(); + + xmm0 = _mm_load_ss(data++); + xmm2 = xmm0; + xmm0 = _mm_shuffle_ps(xmm0, xmm0, 0); + + xmm0 = _mm_mul_ps(xmm0, xmm2); + xmm5 = _mm_add_ps(xmm5, xmm0); + + data_len--; + + while(data_len) + { + xmm0 = _mm_load1_ps(data++); + + xmm2 = _mm_shuffle_ps(xmm2, xmm2, _MM_SHUFFLE(2,1,0,3)); + xmm2 = _mm_move_ss(xmm2, xmm0); + xmm0 = _mm_mul_ps(xmm0, xmm2); + xmm5 = _mm_add_ps(xmm5, xmm0); + + data_len--; + } + + _mm_storeu_ps(autoc, xmm5); +} + +FLAC__SSE_TARGET("sse") +void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]) +{ + __m128 xmm0, xmm1, xmm2, xmm3, xmm5, xmm6; + + (void) lag; + FLAC__ASSERT(lag > 0); + FLAC__ASSERT(lag <= 8); + FLAC__ASSERT(lag <= data_len); + FLAC__ASSERT(data_len > 0); + + xmm5 = _mm_setzero_ps(); + xmm6 = _mm_setzero_ps(); + + xmm0 = _mm_load_ss(data++); + xmm2 = xmm0; + xmm0 = _mm_shuffle_ps(xmm0, xmm0, 0); + xmm3 = _mm_setzero_ps(); + + xmm0 = _mm_mul_ps(xmm0, xmm2); + xmm5 = _mm_add_ps(xmm5, xmm0); + + data_len--; + + while(data_len) + { + xmm0 = _mm_load1_ps(data++); + + xmm2 = _mm_shuffle_ps(xmm2, xmm2, _MM_SHUFFLE(2,1,0,3)); + xmm3 = _mm_shuffle_ps(xmm3, xmm3, _MM_SHUFFLE(2,1,0,3)); + xmm3 = _mm_move_ss(xmm3, xmm2); + xmm2 = _mm_move_ss(xmm2, xmm0); + + xmm1 = xmm0; + xmm1 = _mm_mul_ps(xmm1, xmm3); + xmm0 = _mm_mul_ps(xmm0, xmm2); + xmm6 = _mm_add_ps(xmm6, xmm1); + xmm5 = _mm_add_ps(xmm5, xmm0); + + data_len--; + } + + _mm_storeu_ps(autoc, xmm5); + _mm_storeu_ps(autoc+4, xmm6); +} + +FLAC__SSE_TARGET("sse") +void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]) +{ + __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + + (void) lag; + FLAC__ASSERT(lag > 0); + FLAC__ASSERT(lag <= 12); + FLAC__ASSERT(lag <= data_len); + FLAC__ASSERT(data_len > 0); + + xmm5 = _mm_setzero_ps(); + xmm6 = _mm_setzero_ps(); + xmm7 = _mm_setzero_ps(); + + xmm0 = _mm_load_ss(data++); + xmm2 = xmm0; + xmm0 = _mm_shuffle_ps(xmm0, xmm0, 0); + xmm3 = _mm_setzero_ps(); + xmm4 = _mm_setzero_ps(); + + xmm0 = _mm_mul_ps(xmm0, xmm2); + xmm5 = _mm_add_ps(xmm5, xmm0); + + data_len--; + + while(data_len) + { + xmm0 = _mm_load1_ps(data++); + + xmm2 = _mm_shuffle_ps(xmm2, xmm2, _MM_SHUFFLE(2,1,0,3)); + xmm3 = _mm_shuffle_ps(xmm3, xmm3, _MM_SHUFFLE(2,1,0,3)); + xmm4 = _mm_shuffle_ps(xmm4, xmm4, _MM_SHUFFLE(2,1,0,3)); + xmm4 = _mm_move_ss(xmm4, xmm3); + xmm3 = _mm_move_ss(xmm3, xmm2); + xmm2 = _mm_move_ss(xmm2, xmm0); + + xmm1 = xmm0; + xmm1 = _mm_mul_ps(xmm1, xmm2); + xmm5 = _mm_add_ps(xmm5, xmm1); + xmm1 = xmm0; + xmm1 = _mm_mul_ps(xmm1, xmm3); + xmm6 = _mm_add_ps(xmm6, xmm1); + xmm0 = _mm_mul_ps(xmm0, xmm4); + xmm7 = _mm_add_ps(xmm7, xmm0); + + data_len--; + } + + _mm_storeu_ps(autoc, xmm5); + _mm_storeu_ps(autoc+4, xmm6); + _mm_storeu_ps(autoc+8, xmm7); +} + +FLAC__SSE_TARGET("sse") +void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_old(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]) +{ + __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9; + + (void) lag; + FLAC__ASSERT(lag > 0); + FLAC__ASSERT(lag <= 16); + FLAC__ASSERT(lag <= data_len); + FLAC__ASSERT(data_len > 0); + + xmm6 = _mm_setzero_ps(); + xmm7 = _mm_setzero_ps(); + xmm8 = _mm_setzero_ps(); + xmm9 = _mm_setzero_ps(); + + xmm0 = _mm_load_ss(data++); + xmm2 = xmm0; + xmm0 = _mm_shuffle_ps(xmm0, xmm0, 0); + xmm3 = _mm_setzero_ps(); + xmm4 = _mm_setzero_ps(); + xmm5 = _mm_setzero_ps(); + + xmm0 = _mm_mul_ps(xmm0, xmm2); + xmm6 = _mm_add_ps(xmm6, xmm0); + + data_len--; + + while(data_len) + { + xmm0 = _mm_load1_ps(data++); + + /* shift xmm5:xmm4:xmm3:xmm2 left by one float */ + xmm5 = _mm_shuffle_ps(xmm5, xmm5, _MM_SHUFFLE(2,1,0,3)); + xmm4 = _mm_shuffle_ps(xmm4, xmm4, _MM_SHUFFLE(2,1,0,3)); + xmm3 = _mm_shuffle_ps(xmm3, xmm3, _MM_SHUFFLE(2,1,0,3)); + xmm2 = _mm_shuffle_ps(xmm2, xmm2, _MM_SHUFFLE(2,1,0,3)); + xmm5 = _mm_move_ss(xmm5, xmm4); + xmm4 = _mm_move_ss(xmm4, xmm3); + xmm3 = _mm_move_ss(xmm3, xmm2); + xmm2 = _mm_move_ss(xmm2, xmm0); + + /* xmm9|xmm8|xmm7|xmm6 += xmm0|xmm0|xmm0|xmm0 * xmm5|xmm4|xmm3|xmm2 */ + xmm1 = xmm0; + xmm1 = _mm_mul_ps(xmm1, xmm5); + xmm9 = _mm_add_ps(xmm9, xmm1); + xmm1 = xmm0; + xmm1 = _mm_mul_ps(xmm1, xmm4); + xmm8 = _mm_add_ps(xmm8, xmm1); + xmm1 = xmm0; + xmm1 = _mm_mul_ps(xmm1, xmm3); + xmm7 = _mm_add_ps(xmm7, xmm1); + xmm0 = _mm_mul_ps(xmm0, xmm2); + xmm6 = _mm_add_ps(xmm6, xmm0); + + data_len--; + } + + _mm_storeu_ps(autoc, xmm6); + _mm_storeu_ps(autoc+4, xmm7); + _mm_storeu_ps(autoc+8, xmm8); + _mm_storeu_ps(autoc+12,xmm9); +} + +#endif /* FLAC__SSE_SUPPORTED */ +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ +#endif /* FLAC__NO_ASM */ +#endif /* FLAC__INTEGER_ONLY_LIBRARY */ diff --git a/libFLAC/lpc_intrin_sse2.c b/libFLAC/lpc_intrin_sse2.c new file mode 100644 index 00000000..13833948 --- /dev/null +++ b/libFLAC/lpc_intrin_sse2.c @@ -0,0 +1,1090 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "private/cpu.h" + +#ifndef FLAC__INTEGER_ONLY_LIBRARY +#ifndef FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN +#include "private/lpc.h" +#ifdef FLAC__SSE2_SUPPORTED + +#include "FLAC/assert.h" +#include "FLAC/format.h" + +#include <emmintrin.h> /* SSE2 */ + +#define RESIDUAL16_RESULT(xmmN) curr = *data++; *residual++ = curr - (_mm_cvtsi128_si32(xmmN) >> lp_quantization); +#define DATA16_RESULT(xmmN) curr = *residual++ + (_mm_cvtsi128_si32(xmmN) >> lp_quantization); *data++ = curr; + +#define RESIDUAL32_RESULT(xmmN) residual[i] = data[i] - (_mm_cvtsi128_si32(xmmN) >> lp_quantization); +#define DATA32_RESULT(xmmN) data[i] = residual[i] + (_mm_cvtsi128_si32(xmmN) >> lp_quantization); + +FLAC__SSE_TARGET("sse2") +void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]) +{ + int i; + FLAC__int32 sum; + __m128i cnt = _mm_cvtsi32_si128(lp_quantization); + + FLAC__ASSERT(order > 0); + FLAC__ASSERT(order <= 32); + + if(order <= 12) { + if(order > 8) { + if(order > 10) { + if(order == 12) { + __m128i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + q7 = _mm_cvtsi32_si128(0xffff & qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0)); + q8 = _mm_cvtsi32_si128(0xffff & qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0)); + q9 = _mm_cvtsi32_si128(0xffff & qlp_coeff[9]); q9 = _mm_shuffle_epi32(q9, _MM_SHUFFLE(0,0,0,0)); + q10 = _mm_cvtsi32_si128(0xffff & qlp_coeff[10]); q10 = _mm_shuffle_epi32(q10, _MM_SHUFFLE(0,0,0,0)); + q11 = _mm_cvtsi32_si128(0xffff & qlp_coeff[11]); q11 = _mm_shuffle_epi32(q11, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q11, _mm_loadu_si128((const __m128i*)(data+i-12))); + mull = _mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 11 */ + __m128i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + q7 = _mm_cvtsi32_si128(0xffff & qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0)); + q8 = _mm_cvtsi32_si128(0xffff & qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0)); + q9 = _mm_cvtsi32_si128(0xffff & qlp_coeff[9]); q9 = _mm_shuffle_epi32(q9, _MM_SHUFFLE(0,0,0,0)); + q10 = _mm_cvtsi32_si128(0xffff & qlp_coeff[10]); q10 = _mm_shuffle_epi32(q10, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); + mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + else { + if(order == 10) { + __m128i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + q7 = _mm_cvtsi32_si128(0xffff & qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0)); + q8 = _mm_cvtsi32_si128(0xffff & qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0)); + q9 = _mm_cvtsi32_si128(0xffff & qlp_coeff[9]); q9 = _mm_shuffle_epi32(q9, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); + mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 9 */ + __m128i q0, q1, q2, q3, q4, q5, q6, q7, q8; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + q7 = _mm_cvtsi32_si128(0xffff & qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0)); + q8 = _mm_cvtsi32_si128(0xffff & qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); + mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + } + else if(order > 4) { + if(order > 6) { + if(order == 8) { + __m128i q0, q1, q2, q3, q4, q5, q6, q7; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + q7 = _mm_cvtsi32_si128(0xffff & qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); + mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 7 */ + __m128i q0, q1, q2, q3, q4, q5, q6; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); + mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + else { + if(order == 6) { + __m128i q0, q1, q2, q3, q4, q5; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); + mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 5 */ + __m128i q0, q1, q2, q3, q4; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); + mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + } + else { + if(order > 2) { + if(order == 4) { + __m128i q0, q1, q2, q3; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); + mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 3 */ + __m128i q0, q1, q2; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); + mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + else { + if(order == 2) { + __m128i q0, q1; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); + mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 1 */ + __m128i q0; + q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ; + summ = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + } + for(; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 12: sum += qlp_coeff[11] * data[i-12]; + case 11: sum += qlp_coeff[10] * data[i-11]; + case 10: sum += qlp_coeff[ 9] * data[i-10]; + case 9: sum += qlp_coeff[ 8] * data[i- 9]; + case 8: sum += qlp_coeff[ 7] * data[i- 8]; + case 7: sum += qlp_coeff[ 6] * data[i- 7]; + case 6: sum += qlp_coeff[ 5] * data[i- 6]; + case 5: sum += qlp_coeff[ 4] * data[i- 5]; + case 4: sum += qlp_coeff[ 3] * data[i- 4]; + case 3: sum += qlp_coeff[ 2] * data[i- 3]; + case 2: sum += qlp_coeff[ 1] * data[i- 2]; + case 1: sum += qlp_coeff[ 0] * data[i- 1]; + } + residual[i] = data[i] - (sum >> lp_quantization); + } + } + else { /* order > 12 */ + for(i = 0; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 32: sum += qlp_coeff[31] * data[i-32]; + case 31: sum += qlp_coeff[30] * data[i-31]; + case 30: sum += qlp_coeff[29] * data[i-30]; + case 29: sum += qlp_coeff[28] * data[i-29]; + case 28: sum += qlp_coeff[27] * data[i-28]; + case 27: sum += qlp_coeff[26] * data[i-27]; + case 26: sum += qlp_coeff[25] * data[i-26]; + case 25: sum += qlp_coeff[24] * data[i-25]; + case 24: sum += qlp_coeff[23] * data[i-24]; + case 23: sum += qlp_coeff[22] * data[i-23]; + case 22: sum += qlp_coeff[21] * data[i-22]; + case 21: sum += qlp_coeff[20] * data[i-21]; + case 20: sum += qlp_coeff[19] * data[i-20]; + case 19: sum += qlp_coeff[18] * data[i-19]; + case 18: sum += qlp_coeff[17] * data[i-18]; + case 17: sum += qlp_coeff[16] * data[i-17]; + case 16: sum += qlp_coeff[15] * data[i-16]; + case 15: sum += qlp_coeff[14] * data[i-15]; + case 14: sum += qlp_coeff[13] * data[i-14]; + case 13: sum += qlp_coeff[12] * data[i-13]; + sum += qlp_coeff[11] * data[i-12]; + sum += qlp_coeff[10] * data[i-11]; + sum += qlp_coeff[ 9] * data[i-10]; + sum += qlp_coeff[ 8] * data[i- 9]; + sum += qlp_coeff[ 7] * data[i- 8]; + sum += qlp_coeff[ 6] * data[i- 7]; + sum += qlp_coeff[ 5] * data[i- 6]; + sum += qlp_coeff[ 4] * data[i- 5]; + sum += qlp_coeff[ 3] * data[i- 4]; + sum += qlp_coeff[ 2] * data[i- 3]; + sum += qlp_coeff[ 1] * data[i- 2]; + sum += qlp_coeff[ 0] * data[i- 1]; + } + residual[i] = data[i] - (sum >> lp_quantization); + } + } +} + +FLAC__SSE_TARGET("sse2") +void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]) +{ + int i; + + FLAC__ASSERT(order > 0); + FLAC__ASSERT(order <= 32); + + if(order <= 12) { + if(order > 8) { /* order == 9, 10, 11, 12 */ + if(order > 10) { /* order == 11, 12 */ + if(order == 12) { + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); // 0 0 q[1] q[0] + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); // 0 0 q[3] q[2] + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); // 0 0 q[5] q[4] + xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); // 0 0 q[7] q[6] + xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8)); // 0 0 q[9] q[8] + xmm5 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+10)); // 0 0 q[11] q[10] + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); // 0 q[1] 0 q[0] + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); // 0 q[3] 0 q[2] + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); // 0 q[5] 0 q[4] + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(3,1,2,0)); // 0 q[7] 0 q[6] + xmm4 = _mm_shuffle_epi32(xmm4, _MM_SHUFFLE(3,1,2,0)); // 0 q[9] 0 q[8] + xmm5 = _mm_shuffle_epi32(xmm5, _MM_SHUFFLE(3,1,2,0)); // 0 q[11] 0 q[10] + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[11] * data[i-12]; + //sum += qlp_coeff[10] * data[i-11]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-12)); // 0 0 d[i-11] d[i-12] + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); // 0 d[i-12] 0 d[i-11] + xmm7 = _mm_mul_epu32(xmm7, xmm5); /* we use _unsigned_ multiplication and discard high dword of the result values */ + + //sum += qlp_coeff[9] * data[i-10]; + //sum += qlp_coeff[8] * data[i-9]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-10)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm4); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[7] * data[i-8]; + //sum += qlp_coeff[6] * data[i-7]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm3); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[5] * data[i-6]; + //sum += qlp_coeff[4] * data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm2); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[3] * data[i-4]; + //sum += qlp_coeff[2] * data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm1); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm0); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + else { /* order == 11 */ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); + xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8)); + xmm5 = _mm_cvtsi32_si128(qlp_coeff[10]); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(3,1,2,0)); + xmm4 = _mm_shuffle_epi32(xmm4, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum = qlp_coeff[10] * data[i-11]; + xmm7 = _mm_cvtsi32_si128(data[i-11]); + xmm7 = _mm_mul_epu32(xmm7, xmm5); + + //sum += qlp_coeff[9] * data[i-10]; + //sum += qlp_coeff[8] * data[i-9]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-10)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm4); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[7] * data[i-8]; + //sum += qlp_coeff[6] * data[i-7]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm3); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[5] * data[i-6]; + //sum += qlp_coeff[4] * data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm2); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[3] * data[i-4]; + //sum += qlp_coeff[2] * data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm1); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm0); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + } + else { /* order == 9, 10 */ + if(order == 10) { + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); + xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8)); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(3,1,2,0)); + xmm4 = _mm_shuffle_epi32(xmm4, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[9] * data[i-10]; + //sum += qlp_coeff[8] * data[i-9]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-10)); + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); + xmm7 = _mm_mul_epu32(xmm7, xmm4); + + //sum += qlp_coeff[7] * data[i-8]; + //sum += qlp_coeff[6] * data[i-7]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm3); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[5] * data[i-6]; + //sum += qlp_coeff[4] * data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm2); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[3] * data[i-4]; + //sum += qlp_coeff[2] * data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm1); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm0); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + else { /* order == 9 */ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); + xmm4 = _mm_cvtsi32_si128(qlp_coeff[8]); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum = qlp_coeff[8] * data[i-9]; + xmm7 = _mm_cvtsi32_si128(data[i-9]); + xmm7 = _mm_mul_epu32(xmm7, xmm4); + + //sum += qlp_coeff[7] * data[i-8]; + //sum += qlp_coeff[6] * data[i-7]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm3); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[5] * data[i-6]; + //sum += qlp_coeff[4] * data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm2); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[3] * data[i-4]; + //sum += qlp_coeff[2] * data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm1); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm0); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + } + } + else if(order > 4) { /* order == 5, 6, 7, 8 */ + if(order > 6) { /* order == 7, 8 */ + if(order == 8) { + __m128i xmm0, xmm1, xmm2, xmm3, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[7] * data[i-8]; + //sum += qlp_coeff[6] * data[i-7]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-8)); + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); + xmm7 = _mm_mul_epu32(xmm7, xmm3); + + //sum += qlp_coeff[5] * data[i-6]; + //sum += qlp_coeff[4] * data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm2); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[3] * data[i-4]; + //sum += qlp_coeff[2] * data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm1); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm0); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + else { /* order == 7 */ + __m128i xmm0, xmm1, xmm2, xmm3, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + xmm3 = _mm_cvtsi32_si128(qlp_coeff[6]); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum = qlp_coeff[6] * data[i-7]; + xmm7 = _mm_cvtsi32_si128(data[i-7]); + xmm7 = _mm_mul_epu32(xmm7, xmm3); + + //sum += qlp_coeff[5] * data[i-6]; + //sum += qlp_coeff[4] * data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm2); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[3] * data[i-4]; + //sum += qlp_coeff[2] * data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm1); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm0); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + } + else { /* order == 5, 6 */ + if(order == 6) { + __m128i xmm0, xmm1, xmm2, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[5] * data[i-6]; + //sum += qlp_coeff[4] * data[i-5]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); + xmm7 = _mm_mul_epu32(xmm7, xmm2); + + //sum += qlp_coeff[3] * data[i-4]; + //sum += qlp_coeff[2] * data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm1); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm0); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + else { /* order == 5 */ + __m128i xmm0, xmm1, xmm2, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_cvtsi32_si128(qlp_coeff[4]); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum = qlp_coeff[4] * data[i-5]; + xmm7 = _mm_cvtsi32_si128(data[i-5]); + xmm7 = _mm_mul_epu32(xmm7, xmm2); + + //sum += qlp_coeff[3] * data[i-4]; + //sum += qlp_coeff[2] * data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm1); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm0); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + } + } + else { /* order == 1, 2, 3, 4 */ + if(order > 2) { /* order == 3, 4 */ + if(order == 4) { + __m128i xmm0, xmm1, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[3] * data[i-4]; + //sum += qlp_coeff[2] * data[i-3]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); + xmm7 = _mm_mul_epu32(xmm7, xmm1); + + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm0); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + else { /* order == 3 */ + __m128i xmm0, xmm1, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_cvtsi32_si128(qlp_coeff[2]); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum = qlp_coeff[2] * data[i-3]; + xmm7 = _mm_cvtsi32_si128(data[i-3]); + xmm7 = _mm_mul_epu32(xmm7, xmm1); + + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epu32(xmm6, xmm0); + xmm7 = _mm_add_epi32(xmm7, xmm6); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + } + else { /* order == 1, 2 */ + if(order == 2) { + __m128i xmm0, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[1] * data[i-2]; + //sum += qlp_coeff[0] * data[i-1]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); + xmm7 = _mm_mul_epu32(xmm7, xmm0); + + xmm7 = _mm_add_epi32(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL32_RESULT(xmm7); + } + } + else { /* order == 1 */ + for(i = 0; i < (int)data_len; i++) + residual[i] = data[i] - ((qlp_coeff[0] * data[i-1]) >> lp_quantization); + } + } + } + } + else { /* order > 12 */ + FLAC__int32 sum; + for(i = 0; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 32: sum += qlp_coeff[31] * data[i-32]; + case 31: sum += qlp_coeff[30] * data[i-31]; + case 30: sum += qlp_coeff[29] * data[i-30]; + case 29: sum += qlp_coeff[28] * data[i-29]; + case 28: sum += qlp_coeff[27] * data[i-28]; + case 27: sum += qlp_coeff[26] * data[i-27]; + case 26: sum += qlp_coeff[25] * data[i-26]; + case 25: sum += qlp_coeff[24] * data[i-25]; + case 24: sum += qlp_coeff[23] * data[i-24]; + case 23: sum += qlp_coeff[22] * data[i-23]; + case 22: sum += qlp_coeff[21] * data[i-22]; + case 21: sum += qlp_coeff[20] * data[i-21]; + case 20: sum += qlp_coeff[19] * data[i-20]; + case 19: sum += qlp_coeff[18] * data[i-19]; + case 18: sum += qlp_coeff[17] * data[i-18]; + case 17: sum += qlp_coeff[16] * data[i-17]; + case 16: sum += qlp_coeff[15] * data[i-16]; + case 15: sum += qlp_coeff[14] * data[i-15]; + case 14: sum += qlp_coeff[13] * data[i-14]; + case 13: sum += qlp_coeff[12] * data[i-13]; + sum += qlp_coeff[11] * data[i-12]; + sum += qlp_coeff[10] * data[i-11]; + sum += qlp_coeff[ 9] * data[i-10]; + sum += qlp_coeff[ 8] * data[i- 9]; + sum += qlp_coeff[ 7] * data[i- 8]; + sum += qlp_coeff[ 6] * data[i- 7]; + sum += qlp_coeff[ 5] * data[i- 6]; + sum += qlp_coeff[ 4] * data[i- 5]; + sum += qlp_coeff[ 3] * data[i- 4]; + sum += qlp_coeff[ 2] * data[i- 3]; + sum += qlp_coeff[ 1] * data[i- 2]; + sum += qlp_coeff[ 0] * data[i- 1]; + } + residual[i] = data[i] - (sum >> lp_quantization); + } + } +} + +#if defined FLAC__CPU_IA32 && !defined FLAC__HAS_NASM /* unused for x64; not better than MMX asm */ + +FLAC__SSE_TARGET("sse2") +void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]) +{ + if (order < 8 || order > 12) { + FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data); + return; + } + if (data_len == 0) + return; + + FLAC__ASSERT(order >= 8); + FLAC__ASSERT(order <= 12); + + if(order > 8) { /* order == 9, 10, 11, 12 */ + FLAC__int32 curr; + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + xmm0 = _mm_loadu_si128((const __m128i*)(qlp_coeff+0)); + xmm6 = _mm_loadu_si128((const __m128i*)(qlp_coeff+4)); + xmm1 = _mm_loadu_si128((const __m128i*)(qlp_coeff+8)); /* read 0 to 3 uninitialized coeffs... */ + switch(order) /* ...and zero them out */ + { + case 9: + xmm1 = _mm_slli_si128(xmm1, 12); xmm1 = _mm_srli_si128(xmm1, 12); break; + case 10: + xmm1 = _mm_slli_si128(xmm1, 8); xmm1 = _mm_srli_si128(xmm1, 8); break; + case 11: + xmm1 = _mm_slli_si128(xmm1, 4); xmm1 = _mm_srli_si128(xmm1, 4); break; + } + xmm2 = _mm_setzero_si128(); + xmm0 = _mm_packs_epi32(xmm0, xmm6); + xmm1 = _mm_packs_epi32(xmm1, xmm2); + + xmm4 = _mm_loadu_si128((const __m128i*)(data-12)); + xmm5 = _mm_loadu_si128((const __m128i*)(data-8)); + xmm3 = _mm_loadu_si128((const __m128i*)(data-4)); + xmm4 = _mm_shuffle_epi32(xmm4, _MM_SHUFFLE(0,1,2,3)); + xmm5 = _mm_shuffle_epi32(xmm5, _MM_SHUFFLE(0,1,2,3)); + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(0,1,2,3)); + xmm4 = _mm_packs_epi32(xmm4, xmm2); + xmm3 = _mm_packs_epi32(xmm3, xmm5); + + xmm7 = _mm_slli_si128(xmm1, 2); + xmm7 = _mm_or_si128(xmm7, _mm_srli_si128(xmm0, 14)); + xmm2 = _mm_slli_si128(xmm0, 2); + + /* xmm0, xmm1: qlp_coeff + xmm2, xmm7: qlp_coeff << 16 bit + xmm3, xmm4: data */ + + xmm5 = _mm_madd_epi16(xmm4, xmm1); + xmm6 = _mm_madd_epi16(xmm3, xmm0); + xmm6 = _mm_add_epi32(xmm6, xmm5); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8)); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4)); + + DATA16_RESULT(xmm6); + + data_len--; + + if(data_len % 2) { + xmm6 = _mm_srli_si128(xmm3, 14); + xmm4 = _mm_slli_si128(xmm4, 2); + xmm3 = _mm_slli_si128(xmm3, 2); + xmm4 = _mm_or_si128(xmm4, xmm6); + xmm3 = _mm_insert_epi16(xmm3, curr, 0); + + xmm5 = _mm_madd_epi16(xmm4, xmm1); + xmm6 = _mm_madd_epi16(xmm3, xmm0); + xmm6 = _mm_add_epi32(xmm6, xmm5); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8)); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4)); + + DATA16_RESULT(xmm6); + + data_len--; + } + + while(data_len) { /* data_len is a multiple of 2 */ + /* 1 _mm_slli_si128 per data element less but we need shifted qlp_coeff in xmm2:xmm7 */ + xmm6 = _mm_srli_si128(xmm3, 12); + xmm4 = _mm_slli_si128(xmm4, 4); + xmm3 = _mm_slli_si128(xmm3, 4); + xmm4 = _mm_or_si128(xmm4, xmm6); + xmm3 = _mm_insert_epi16(xmm3, curr, 1); + + xmm5 = _mm_madd_epi16(xmm4, xmm7); + xmm6 = _mm_madd_epi16(xmm3, xmm2); + xmm6 = _mm_add_epi32(xmm6, xmm5); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8)); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4)); + + DATA16_RESULT(xmm6); + + xmm3 = _mm_insert_epi16(xmm3, curr, 0); + + xmm5 = _mm_madd_epi16(xmm4, xmm1); + xmm6 = _mm_madd_epi16(xmm3, xmm0); + xmm6 = _mm_add_epi32(xmm6, xmm5); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8)); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4)); + + DATA16_RESULT(xmm6); + + data_len-=2; + } + } /* endif(order > 8) */ + else + { + FLAC__int32 curr; + __m128i xmm0, xmm1, xmm3, xmm6; + xmm0 = _mm_loadu_si128((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadu_si128((const __m128i*)(qlp_coeff+4)); + xmm0 = _mm_packs_epi32(xmm0, xmm1); + + xmm1 = _mm_loadu_si128((const __m128i*)(data-8)); + xmm3 = _mm_loadu_si128((const __m128i*)(data-4)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(0,1,2,3)); + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(0,1,2,3)); + xmm3 = _mm_packs_epi32(xmm3, xmm1); + + /* xmm0: qlp_coeff + xmm3: data */ + + xmm6 = _mm_madd_epi16(xmm3, xmm0); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8)); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4)); + + DATA16_RESULT(xmm6); + + data_len--; + + while(data_len) { + xmm3 = _mm_slli_si128(xmm3, 2); + xmm3 = _mm_insert_epi16(xmm3, curr, 0); + + xmm6 = _mm_madd_epi16(xmm3, xmm0); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8)); + xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4)); + + DATA16_RESULT(xmm6); + + data_len--; + } + } +} + +#endif /* defined FLAC__CPU_IA32 && !defined FLAC__HAS_NASM */ + +#endif /* FLAC__SSE2_SUPPORTED */ +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ +#endif /* FLAC__NO_ASM */ +#endif /* FLAC__INTEGER_ONLY_LIBRARY */ diff --git a/libFLAC/lpc_intrin_sse41.c b/libFLAC/lpc_intrin_sse41.c new file mode 100644 index 00000000..bef73f41 --- /dev/null +++ b/libFLAC/lpc_intrin_sse41.c @@ -0,0 +1,1314 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "private/cpu.h" + +#ifndef FLAC__INTEGER_ONLY_LIBRARY +#ifndef FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN +#include "private/lpc.h" +#ifdef FLAC__SSE4_1_SUPPORTED + +#include "FLAC/assert.h" +#include "FLAC/format.h" + +#include <smmintrin.h> /* SSE4.1 */ + +#if defined FLAC__CPU_IA32 /* unused for x64 */ + +#define RESIDUAL64_RESULT(xmmN) residual[i] = data[i] - _mm_cvtsi128_si32(_mm_srl_epi64(xmmN, cnt)) +#define RESIDUAL64_RESULT1(xmmN) residual[i] = data[i] - _mm_cvtsi128_si32(_mm_srli_epi64(xmmN, lp_quantization)) + +FLAC__SSE_TARGET("sse4.1") +void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]) +{ + int i; + __m128i cnt = _mm_cvtsi32_si128(lp_quantization); + + FLAC__ASSERT(order > 0); + FLAC__ASSERT(order <= 32); + FLAC__ASSERT(lp_quantization <= 32); /* there's no _mm_sra_epi64() so we have to use _mm_srl_epi64() */ + + if(order <= 12) { + if(order > 8) { /* order == 9, 10, 11, 12 */ + if(order > 10) { /* order == 11, 12 */ + if(order == 12) { + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); // 0 0 q[1] q[0] + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); // 0 0 q[3] q[2] + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); // 0 0 q[5] q[4] + xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); // 0 0 q[7] q[6] + xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8)); // 0 0 q[9] q[8] + xmm5 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+10)); // 0 0 q[11] q[10] + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); // 0 q[1] 0 q[0] + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); // 0 q[3] 0 q[2] + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); // 0 q[5] 0 q[4] + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(3,1,2,0)); // 0 q[7] 0 q[6] + xmm4 = _mm_shuffle_epi32(xmm4, _MM_SHUFFLE(3,1,2,0)); // 0 q[9] 0 q[8] + xmm5 = _mm_shuffle_epi32(xmm5, _MM_SHUFFLE(3,1,2,0)); // 0 q[11] 0 q[10] + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[11] * (FLAC__int64)data[i-12]; + //sum += qlp_coeff[10] * (FLAC__int64)data[i-11]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-12)); // 0 0 d[i-11] d[i-12] + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); // 0 d[i-12] 0 d[i-11] + xmm7 = _mm_mul_epi32(xmm7, xmm5); + + //sum += qlp_coeff[9] * (FLAC__int64)data[i-10]; + //sum += qlp_coeff[8] * (FLAC__int64)data[i-9]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-10)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm4); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[7] * (FLAC__int64)data[i-8]; + //sum += qlp_coeff[6] * (FLAC__int64)data[i-7]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm3); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[5] * (FLAC__int64)data[i-6]; + //sum += qlp_coeff[4] * (FLAC__int64)data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm2); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[3] * (FLAC__int64)data[i-4]; + //sum += qlp_coeff[2] * (FLAC__int64)data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm1); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm0); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT1(xmm7); + } + } + else { /* order == 11 */ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); + xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8)); + xmm5 = _mm_cvtsi32_si128(qlp_coeff[10]); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(3,1,2,0)); + xmm4 = _mm_shuffle_epi32(xmm4, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum = qlp_coeff[10] * (FLAC__int64)data[i-11]; + xmm7 = _mm_cvtsi32_si128(data[i-11]); + xmm7 = _mm_mul_epi32(xmm7, xmm5); + + //sum += qlp_coeff[9] * (FLAC__int64)data[i-10]; + //sum += qlp_coeff[8] * (FLAC__int64)data[i-9]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-10)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm4); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[7] * (FLAC__int64)data[i-8]; + //sum += qlp_coeff[6] * (FLAC__int64)data[i-7]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm3); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[5] * (FLAC__int64)data[i-6]; + //sum += qlp_coeff[4] * (FLAC__int64)data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm2); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[3] * (FLAC__int64)data[i-4]; + //sum += qlp_coeff[2] * (FLAC__int64)data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm1); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm0); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT1(xmm7); + } + } + } + else { /* order == 9, 10 */ + if(order == 10) { + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); + xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8)); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(3,1,2,0)); + xmm4 = _mm_shuffle_epi32(xmm4, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[9] * (FLAC__int64)data[i-10]; + //sum += qlp_coeff[8] * (FLAC__int64)data[i-9]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-10)); + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); + xmm7 = _mm_mul_epi32(xmm7, xmm4); + + //sum += qlp_coeff[7] * (FLAC__int64)data[i-8]; + //sum += qlp_coeff[6] * (FLAC__int64)data[i-7]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm3); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[5] * (FLAC__int64)data[i-6]; + //sum += qlp_coeff[4] * (FLAC__int64)data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm2); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[3] * (FLAC__int64)data[i-4]; + //sum += qlp_coeff[2] * (FLAC__int64)data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm1); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm0); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT(xmm7); + } + } + else { /* order == 9 */ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); + xmm4 = _mm_cvtsi32_si128(qlp_coeff[8]); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum = qlp_coeff[8] * (FLAC__int64)data[i-9]; + xmm7 = _mm_cvtsi32_si128(data[i-9]); + xmm7 = _mm_mul_epi32(xmm7, xmm4); + + //sum += qlp_coeff[7] * (FLAC__int64)data[i-8]; + //sum += qlp_coeff[6] * (FLAC__int64)data[i-7]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm3); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[5] * (FLAC__int64)data[i-6]; + //sum += qlp_coeff[4] * (FLAC__int64)data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm2); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[3] * (FLAC__int64)data[i-4]; + //sum += qlp_coeff[2] * (FLAC__int64)data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm1); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm0); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT(xmm7); + } + } + } + } + else if(order > 4) { /* order == 5, 6, 7, 8 */ + if(order > 6) { /* order == 7, 8 */ + if(order == 8) { + __m128i xmm0, xmm1, xmm2, xmm3, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[7] * (FLAC__int64)data[i-8]; + //sum += qlp_coeff[6] * (FLAC__int64)data[i-7]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-8)); + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); + xmm7 = _mm_mul_epi32(xmm7, xmm3); + + //sum += qlp_coeff[5] * (FLAC__int64)data[i-6]; + //sum += qlp_coeff[4] * (FLAC__int64)data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm2); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[3] * (FLAC__int64)data[i-4]; + //sum += qlp_coeff[2] * (FLAC__int64)data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm1); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm0); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT(xmm7); + } + } + else { /* order == 7 */ + __m128i xmm0, xmm1, xmm2, xmm3, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + xmm3 = _mm_cvtsi32_si128(qlp_coeff[6]); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum = qlp_coeff[6] * (FLAC__int64)data[i-7]; + xmm7 = _mm_cvtsi32_si128(data[i-7]); + xmm7 = _mm_mul_epi32(xmm7, xmm3); + + //sum += qlp_coeff[5] * (FLAC__int64)data[i-6]; + //sum += qlp_coeff[4] * (FLAC__int64)data[i-5]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm2); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[3] * (FLAC__int64)data[i-4]; + //sum += qlp_coeff[2] * (FLAC__int64)data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm1); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm0); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT(xmm7); + } + } + } + else { /* order == 5, 6 */ + if(order == 6) { + __m128i xmm0, xmm1, xmm2, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + xmm2 = _mm_shuffle_epi32(xmm2, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[5] * (FLAC__int64)data[i-6]; + //sum += qlp_coeff[4] * (FLAC__int64)data[i-5]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-6)); + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); + xmm7 = _mm_mul_epi32(xmm7, xmm2); + + //sum += qlp_coeff[3] * (FLAC__int64)data[i-4]; + //sum += qlp_coeff[2] * (FLAC__int64)data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm1); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm0); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT(xmm7); + } + } + else { /* order == 5 */ + __m128i xmm0, xmm1, xmm2, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + xmm2 = _mm_cvtsi32_si128(qlp_coeff[4]); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum = qlp_coeff[4] * (FLAC__int64)data[i-5]; + xmm7 = _mm_cvtsi32_si128(data[i-5]); + xmm7 = _mm_mul_epi32(xmm7, xmm2); + + //sum += qlp_coeff[3] * (FLAC__int64)data[i-4]; + //sum += qlp_coeff[2] * (FLAC__int64)data[i-3]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm1); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm0); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT(xmm7); + } + } + } + } + else { /* order == 1, 2, 3, 4 */ + if(order > 2) { /* order == 3, 4 */ + if(order == 4) { + __m128i xmm0, xmm1, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[3] * (FLAC__int64)data[i-4]; + //sum += qlp_coeff[2] * (FLAC__int64)data[i-3]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-4)); + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); + xmm7 = _mm_mul_epi32(xmm7, xmm1); + + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm0); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT(xmm7); + } + } + else { /* order == 3 */ + __m128i xmm0, xmm1, xmm6, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm1 = _mm_cvtsi32_si128(qlp_coeff[2]); + + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum = qlp_coeff[2] * (FLAC__int64)data[i-3]; + xmm7 = _mm_cvtsi32_si128(data[i-3]); + xmm7 = _mm_mul_epi32(xmm7, xmm1); + + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1)); + xmm6 = _mm_mul_epi32(xmm6, xmm0); + xmm7 = _mm_add_epi64(xmm7, xmm6); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT(xmm7); + } + } + } + else { /* order == 1, 2 */ + if(order == 2) { + __m128i xmm0, xmm7; + xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); + + for(i = 0; i < (int)data_len; i++) { + //sum = 0; + //sum += qlp_coeff[1] * (FLAC__int64)data[i-2]; + //sum += qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-2)); + xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); + xmm7 = _mm_mul_epi32(xmm7, xmm0); + + xmm7 = _mm_add_epi64(xmm7, _mm_srli_si128(xmm7, 8)); + RESIDUAL64_RESULT(xmm7); + } + } + else { /* order == 1 */ + __m128i xmm0, xmm7; + xmm0 = _mm_cvtsi32_si128(qlp_coeff[0]); + + for(i = 0; i < (int)data_len; i++) { + //sum = qlp_coeff[0] * (FLAC__int64)data[i-1]; + xmm7 = _mm_cvtsi32_si128(data[i-1]); + xmm7 = _mm_mul_epi32(xmm7, xmm0); + RESIDUAL64_RESULT(xmm7); + } + } + } + } + } + else { /* order > 12 */ + FLAC__int64 sum; + for(i = 0; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32]; + case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31]; + case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30]; + case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29]; + case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28]; + case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27]; + case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26]; + case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25]; + case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24]; + case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23]; + case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22]; + case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21]; + case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20]; + case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19]; + case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18]; + case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17]; + case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16]; + case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15]; + case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14]; + case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13]; + sum += qlp_coeff[11] * (FLAC__int64)data[i-12]; + sum += qlp_coeff[10] * (FLAC__int64)data[i-11]; + sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10]; + sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9]; + sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8]; + sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7]; + sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6]; + sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5]; + sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4]; + sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3]; + sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2]; + sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1]; + } + residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization); + } + } +} + +FLAC__SSE_TARGET("sse4.1") +void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]) +{ + int i; + __m128i cnt = _mm_cvtsi32_si128(lp_quantization); + + if (!data_len) + return; + + FLAC__ASSERT(order > 0); + FLAC__ASSERT(order <= 32); + FLAC__ASSERT(lp_quantization <= 32); /* there's no _mm_sra_epi64() so we have to use _mm_srl_epi64() */ + + if(order <= 12) { + if(order > 8) { /* order == 9, 10, 11, 12 */ + if(order > 10) { /* order == 11, 12 */ + __m128i qlp[6], dat[6]; + __m128i summ, temp; + qlp[0] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); // 0 0 q[1] q[0] + qlp[1] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); // 0 0 q[3] q[2] + qlp[2] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); // 0 0 q[5] q[4] + qlp[3] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); // 0 0 q[7] q[6] + qlp[4] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8)); // 0 0 q[9] q[8] + if (order == 12) + qlp[5] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+10)); // 0 0 q[11] q[10] + else + qlp[5] = _mm_cvtsi32_si128(qlp_coeff[10]); // 0 0 0 q[10] + + qlp[0] = _mm_shuffle_epi32(qlp[0], _MM_SHUFFLE(2,0,3,1)); // 0 q[0] 0 q[1] + qlp[1] = _mm_shuffle_epi32(qlp[1], _MM_SHUFFLE(2,0,3,1)); // 0 q[2] 0 q[3] + qlp[2] = _mm_shuffle_epi32(qlp[2], _MM_SHUFFLE(2,0,3,1)); // 0 q[4] 0 q[5] + qlp[3] = _mm_shuffle_epi32(qlp[3], _MM_SHUFFLE(2,0,3,1)); // 0 q[5] 0 q[7] + qlp[4] = _mm_shuffle_epi32(qlp[4], _MM_SHUFFLE(2,0,3,1)); // 0 q[8] 0 q[9] + qlp[5] = _mm_shuffle_epi32(qlp[5], _MM_SHUFFLE(2,0,3,1)); // 0 q[10] 0 q[11] + + dat[5] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-12))); // ? d[i-11] ? d[i-12] + dat[4] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-10))); // ? d[i-9] ? d[i-10] + dat[3] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-8 ))); // ? d[i-7] ? d[i-8] + dat[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-6 ))); // ? d[i-5] ? d[i-6] + dat[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-4 ))); // ? d[i-3] ? d[i-4] + dat[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 ))); // ? d[i-1] ? d[i-2] + + summ = _mm_mul_epi32(dat[5], qlp[5]) ; + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[4], qlp[4])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0])); + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); // ?_64 sum_64 + summ = _mm_srl_epi64(summ, cnt); // ?_64 (sum >> lp_quantization)_64 == ?_32 ?_32 ?_32 (sum >> lp_quantization)_32 + temp = _mm_cvtsi32_si128(residual[0]); // 0 0 0 r[i] + temp = _mm_add_epi32(temp, summ); // ? ? ? d[i] + data[0] = _mm_cvtsi128_si32(temp); + + for(i = 1; i < (int)data_len; i++) { + dat[5] = _mm_alignr_epi8(dat[4], dat[5], 8); // ? d[i-10] ? d[i-11] + dat[4] = _mm_alignr_epi8(dat[3], dat[4], 8); // ? d[i-8] ? d[i-9] + dat[3] = _mm_alignr_epi8(dat[2], dat[3], 8); // ? d[i-6] ? d[i-7] + dat[2] = _mm_alignr_epi8(dat[1], dat[2], 8); // ? d[i-4] ? d[i-5] + dat[1] = _mm_alignr_epi8(dat[0], dat[1], 8); // ? d[i-2] ? d[i-3] + dat[0] = _mm_alignr_epi8(temp, dat[0], 8); // ? d[i ] ? d[i-1] + + summ = _mm_mul_epi32(dat[5], qlp[5]) ; + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[4], qlp[4])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0])); + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); // ?_64 sum_64 + summ = _mm_srl_epi64(summ, cnt); // ?_64 (sum >> lp_quantization)_64 == ?_32 ?_32 ?_32 (sum >> lp_quantization)_32 + temp = _mm_cvtsi32_si128(residual[i]); // 0 0 0 r[i] + temp = _mm_add_epi32(temp, summ); // ? ? ? d[i] + data[i] = _mm_cvtsi128_si32(temp); + } + } + else { /* order == 9, 10 */ + __m128i qlp[5], dat[5]; + __m128i summ, temp; + qlp[0] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + qlp[1] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + qlp[2] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + qlp[3] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); + if (order == 10) + qlp[4] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8)); + else + qlp[4] = _mm_cvtsi32_si128(qlp_coeff[8]); + + qlp[0] = _mm_shuffle_epi32(qlp[0], _MM_SHUFFLE(2,0,3,1)); + qlp[1] = _mm_shuffle_epi32(qlp[1], _MM_SHUFFLE(2,0,3,1)); + qlp[2] = _mm_shuffle_epi32(qlp[2], _MM_SHUFFLE(2,0,3,1)); + qlp[3] = _mm_shuffle_epi32(qlp[3], _MM_SHUFFLE(2,0,3,1)); + qlp[4] = _mm_shuffle_epi32(qlp[4], _MM_SHUFFLE(2,0,3,1)); + + dat[4] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-10))); + dat[3] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-8 ))); + dat[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-6 ))); + dat[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-4 ))); + dat[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 ))); + + summ = _mm_mul_epi32(dat[4], qlp[4]) ; + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0])); + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[0]); + temp = _mm_add_epi32(temp, summ); + data[0] = _mm_cvtsi128_si32(temp); + + for(i = 1; i < (int)data_len; i++) { + dat[4] = _mm_alignr_epi8(dat[3], dat[4], 8); + dat[3] = _mm_alignr_epi8(dat[2], dat[3], 8); + dat[2] = _mm_alignr_epi8(dat[1], dat[2], 8); + dat[1] = _mm_alignr_epi8(dat[0], dat[1], 8); + dat[0] = _mm_alignr_epi8(temp, dat[0], 8); + + summ = _mm_mul_epi32(dat[4], qlp[4]) ; + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0])); + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[i]); + temp = _mm_add_epi32(temp, summ); + data[i] = _mm_cvtsi128_si32(temp); + } + } + } + else if(order > 4) { /* order == 5, 6, 7, 8 */ + if(order > 6) { /* order == 7, 8 */ + __m128i qlp[4], dat[4]; + __m128i summ, temp; + qlp[0] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + qlp[1] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + qlp[2] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + if (order == 8) + qlp[3] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); + else + qlp[3] = _mm_cvtsi32_si128(qlp_coeff[6]); + + qlp[0] = _mm_shuffle_epi32(qlp[0], _MM_SHUFFLE(2,0,3,1)); + qlp[1] = _mm_shuffle_epi32(qlp[1], _MM_SHUFFLE(2,0,3,1)); + qlp[2] = _mm_shuffle_epi32(qlp[2], _MM_SHUFFLE(2,0,3,1)); + qlp[3] = _mm_shuffle_epi32(qlp[3], _MM_SHUFFLE(2,0,3,1)); + + dat[3] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-8 ))); + dat[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-6 ))); + dat[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-4 ))); + dat[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 ))); + + summ = _mm_mul_epi32(dat[3], qlp[3]) ; + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0])); + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[0]); + temp = _mm_add_epi32(temp, summ); + data[0] = _mm_cvtsi128_si32(temp); + + for(i = 1; i < (int)data_len; i++) { + dat[3] = _mm_alignr_epi8(dat[2], dat[3], 8); + dat[2] = _mm_alignr_epi8(dat[1], dat[2], 8); + dat[1] = _mm_alignr_epi8(dat[0], dat[1], 8); + dat[0] = _mm_alignr_epi8(temp, dat[0], 8); + + summ = _mm_mul_epi32(dat[3], qlp[3]) ; + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0])); + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[i]); + temp = _mm_add_epi32(temp, summ); + data[i] = _mm_cvtsi128_si32(temp); + } + } + else { /* order == 5, 6 */ + __m128i qlp[3], dat[3]; + __m128i summ, temp; + qlp[0] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + qlp[1] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + if (order == 6) + qlp[2] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); + else + qlp[2] = _mm_cvtsi32_si128(qlp_coeff[4]); + + qlp[0] = _mm_shuffle_epi32(qlp[0], _MM_SHUFFLE(2,0,3,1)); + qlp[1] = _mm_shuffle_epi32(qlp[1], _MM_SHUFFLE(2,0,3,1)); + qlp[2] = _mm_shuffle_epi32(qlp[2], _MM_SHUFFLE(2,0,3,1)); + + dat[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-6 ))); + dat[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-4 ))); + dat[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 ))); + + summ = _mm_mul_epi32(dat[2], qlp[2]) ; + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0])); + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[0]); + temp = _mm_add_epi32(temp, summ); + data[0] = _mm_cvtsi128_si32(temp); + + for(i = 1; i < (int)data_len; i++) { + dat[2] = _mm_alignr_epi8(dat[1], dat[2], 8); + dat[1] = _mm_alignr_epi8(dat[0], dat[1], 8); + dat[0] = _mm_alignr_epi8(temp, dat[0], 8); + + summ = _mm_mul_epi32(dat[2], qlp[2]) ; + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1])); + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0])); + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[i]); + temp = _mm_add_epi32(temp, summ); + data[i] = _mm_cvtsi128_si32(temp); + } + } + } + else { /* order == 1, 2, 3, 4 */ + if(order > 2) { /* order == 3, 4 */ + __m128i qlp[2], dat[2]; + __m128i summ, temp; + qlp[0] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); + if (order == 4) + qlp[1] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); + else + qlp[1] = _mm_cvtsi32_si128(qlp_coeff[2]); + + qlp[0] = _mm_shuffle_epi32(qlp[0], _MM_SHUFFLE(2,0,3,1)); + qlp[1] = _mm_shuffle_epi32(qlp[1], _MM_SHUFFLE(2,0,3,1)); + + dat[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-4 ))); + dat[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 ))); + + summ = _mm_mul_epi32(dat[1], qlp[1]) ; + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0])); + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[0]); + temp = _mm_add_epi32(temp, summ); + data[0] = _mm_cvtsi128_si32(temp); + + for(i = 1; i < (int)data_len; i++) { + dat[1] = _mm_alignr_epi8(dat[0], dat[1], 8); + dat[0] = _mm_alignr_epi8(temp, dat[0], 8); + + summ = _mm_mul_epi32(dat[1], qlp[1]) ; + summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0])); + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[i]); + temp = _mm_add_epi32(temp, summ); + data[i] = _mm_cvtsi128_si32(temp); + } + } + else { /* order == 1, 2 */ + if(order == 2) { + __m128i qlp0, dat0; + __m128i summ, temp; + qlp0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff)); + qlp0 = _mm_shuffle_epi32(qlp0, _MM_SHUFFLE(2,0,3,1)); + + dat0 = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 ))); + + summ = _mm_mul_epi32(dat0, qlp0) ; + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[0]); + temp = _mm_add_epi32(temp, summ); + data[0] = _mm_cvtsi128_si32(temp); + + for(i = 1; i < (int)data_len; i++) { + dat0 = _mm_alignr_epi8(temp, dat0, 8); + + summ = _mm_mul_epi32(dat0, qlp0) ; + + summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[i]); + temp = _mm_add_epi32(temp, summ); + data[i] = _mm_cvtsi128_si32(temp); + } + } + else { /* order == 1 */ + __m128i qlp0; + __m128i summ, temp; + qlp0 = _mm_cvtsi32_si128(qlp_coeff[0]); + temp = _mm_cvtsi32_si128(data[-1]); + + summ = _mm_mul_epi32(temp, qlp0); + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[0]); + temp = _mm_add_epi32(temp, summ); + data[0] = _mm_cvtsi128_si32(temp); + + for(i = 1; i < (int)data_len; i++) { + summ = _mm_mul_epi32(temp, qlp0) ; + summ = _mm_srl_epi64(summ, cnt); + temp = _mm_cvtsi32_si128(residual[i]); + temp = _mm_add_epi32(temp, summ); + data[i] = _mm_cvtsi128_si32(temp); + } + } + } + } + } + else { /* order > 12 */ + FLAC__int64 sum; + for(i = 0; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32]; + case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31]; + case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30]; + case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29]; + case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28]; + case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27]; + case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26]; + case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25]; + case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24]; + case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23]; + case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22]; + case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21]; + case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20]; + case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19]; + case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18]; + case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17]; + case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16]; + case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15]; + case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14]; + case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13]; + sum += qlp_coeff[11] * (FLAC__int64)data[i-12]; + sum += qlp_coeff[10] * (FLAC__int64)data[i-11]; + sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10]; + sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9]; + sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8]; + sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7]; + sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6]; + sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5]; + sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4]; + sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3]; + sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2]; + sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1]; + } + data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization); + } + } +} + +#endif /* defined FLAC__CPU_IA32 */ + +FLAC__SSE_TARGET("sse4.1") +void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]) +{ + int i; + FLAC__int32 sum; + __m128i cnt = _mm_cvtsi32_si128(lp_quantization); + + FLAC__ASSERT(order > 0); + FLAC__ASSERT(order <= 32); + + if(order <= 12) { + if(order > 8) { + if(order > 10) { + if(order == 12) { + __m128i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + q7 = _mm_cvtsi32_si128(qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0)); + q8 = _mm_cvtsi32_si128(qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0)); + q9 = _mm_cvtsi32_si128(qlp_coeff[9]); q9 = _mm_shuffle_epi32(q9, _MM_SHUFFLE(0,0,0,0)); + q10 = _mm_cvtsi32_si128(qlp_coeff[10]); q10 = _mm_shuffle_epi32(q10, _MM_SHUFFLE(0,0,0,0)); + q11 = _mm_cvtsi32_si128(qlp_coeff[11]); q11 = _mm_shuffle_epi32(q11, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q11, _mm_loadu_si128((const __m128i*)(data+i-12))); + mull = _mm_mullo_epi32(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 11 */ + __m128i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + q7 = _mm_cvtsi32_si128(qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0)); + q8 = _mm_cvtsi32_si128(qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0)); + q9 = _mm_cvtsi32_si128(qlp_coeff[9]); q9 = _mm_shuffle_epi32(q9, _MM_SHUFFLE(0,0,0,0)); + q10 = _mm_cvtsi32_si128(qlp_coeff[10]); q10 = _mm_shuffle_epi32(q10, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); + mull = _mm_mullo_epi32(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + else { + if(order == 10) { + __m128i q0, q1, q2, q3, q4, q5, q6, q7, q8, q9; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + q7 = _mm_cvtsi32_si128(qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0)); + q8 = _mm_cvtsi32_si128(qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0)); + q9 = _mm_cvtsi32_si128(qlp_coeff[9]); q9 = _mm_shuffle_epi32(q9, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); + mull = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 9 */ + __m128i q0, q1, q2, q3, q4, q5, q6, q7, q8; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + q7 = _mm_cvtsi32_si128(qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0)); + q8 = _mm_cvtsi32_si128(qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); + mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + } + else if(order > 4) { + if(order > 6) { + if(order == 8) { + __m128i q0, q1, q2, q3, q4, q5, q6, q7; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + q7 = _mm_cvtsi32_si128(qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); + mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 7 */ + __m128i q0, q1, q2, q3, q4, q5, q6; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + q6 = _mm_cvtsi32_si128(qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); + mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + else { + if(order == 6) { + __m128i q0, q1, q2, q3, q4, q5; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + q5 = _mm_cvtsi32_si128(qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); + mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 5 */ + __m128i q0, q1, q2, q3, q4; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + q4 = _mm_cvtsi32_si128(qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); + mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + } + else { + if(order > 2) { + if(order == 4) { + __m128i q0, q1, q2, q3; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + q3 = _mm_cvtsi32_si128(qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); + mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 3 */ + __m128i q0, q1, q2; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + q2 = _mm_cvtsi32_si128(qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); + mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + else { + if(order == 2) { + __m128i q0, q1; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + q1 = _mm_cvtsi32_si128(qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ, mull; + summ = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); + mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + else { /* order == 1 */ + __m128i q0; + q0 = _mm_cvtsi32_si128(qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0)); + + for(i = 0; i < (int)data_len-3; i+=4) { + __m128i summ; + summ = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); + summ = _mm_sra_epi32(summ, cnt); + _mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ)); + } + } + } + } + for(; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 12: sum += qlp_coeff[11] * data[i-12]; + case 11: sum += qlp_coeff[10] * data[i-11]; + case 10: sum += qlp_coeff[ 9] * data[i-10]; + case 9: sum += qlp_coeff[ 8] * data[i- 9]; + case 8: sum += qlp_coeff[ 7] * data[i- 8]; + case 7: sum += qlp_coeff[ 6] * data[i- 7]; + case 6: sum += qlp_coeff[ 5] * data[i- 6]; + case 5: sum += qlp_coeff[ 4] * data[i- 5]; + case 4: sum += qlp_coeff[ 3] * data[i- 4]; + case 3: sum += qlp_coeff[ 2] * data[i- 3]; + case 2: sum += qlp_coeff[ 1] * data[i- 2]; + case 1: sum += qlp_coeff[ 0] * data[i- 1]; + } + residual[i] = data[i] - (sum >> lp_quantization); + } + } + else { /* order > 12 */ + for(i = 0; i < (int)data_len; i++) { + sum = 0; + switch(order) { + case 32: sum += qlp_coeff[31] * data[i-32]; + case 31: sum += qlp_coeff[30] * data[i-31]; + case 30: sum += qlp_coeff[29] * data[i-30]; + case 29: sum += qlp_coeff[28] * data[i-29]; + case 28: sum += qlp_coeff[27] * data[i-28]; + case 27: sum += qlp_coeff[26] * data[i-27]; + case 26: sum += qlp_coeff[25] * data[i-26]; + case 25: sum += qlp_coeff[24] * data[i-25]; + case 24: sum += qlp_coeff[23] * data[i-24]; + case 23: sum += qlp_coeff[22] * data[i-23]; + case 22: sum += qlp_coeff[21] * data[i-22]; + case 21: sum += qlp_coeff[20] * data[i-21]; + case 20: sum += qlp_coeff[19] * data[i-20]; + case 19: sum += qlp_coeff[18] * data[i-19]; + case 18: sum += qlp_coeff[17] * data[i-18]; + case 17: sum += qlp_coeff[16] * data[i-17]; + case 16: sum += qlp_coeff[15] * data[i-16]; + case 15: sum += qlp_coeff[14] * data[i-15]; + case 14: sum += qlp_coeff[13] * data[i-14]; + case 13: sum += qlp_coeff[12] * data[i-13]; + sum += qlp_coeff[11] * data[i-12]; + sum += qlp_coeff[10] * data[i-11]; + sum += qlp_coeff[ 9] * data[i-10]; + sum += qlp_coeff[ 8] * data[i- 9]; + sum += qlp_coeff[ 7] * data[i- 8]; + sum += qlp_coeff[ 6] * data[i- 7]; + sum += qlp_coeff[ 5] * data[i- 6]; + sum += qlp_coeff[ 4] * data[i- 5]; + sum += qlp_coeff[ 3] * data[i- 4]; + sum += qlp_coeff[ 2] * data[i- 3]; + sum += qlp_coeff[ 1] * data[i- 2]; + sum += qlp_coeff[ 0] * data[i- 1]; + } + residual[i] = data[i] - (sum >> lp_quantization); + } + } +} + +#endif /* FLAC__SSE4_1_SUPPORTED */ +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ +#endif /* FLAC__NO_ASM */ +#endif /* FLAC__INTEGER_ONLY_LIBRARY */ diff --git a/libFLAC/memory.c b/libFLAC/memory.c index d22df70e..a8ebd10f 100644 --- a/libFLAC/memory.c +++ b/libFLAC/memory.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2001-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/metadata_iterators.c b/libFLAC/metadata_iterators.c new file mode 100644 index 00000000..0a84d03b --- /dev/null +++ b/libFLAC/metadata_iterators.c @@ -0,0 +1,3481 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2001-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> + +#include <sys/stat.h> /* for stat(), maybe chmod() */ + +#include "private/metadata.h" + +#include "FLAC/assert.h" +#include "FLAC/stream_decoder.h" +#include "share/alloc.h" +#include "share/compat.h" +#include "share/macros.h" +#include "share/safe_str.h" +#include "private/macros.h" +#include "private/memory.h" + +/* Alias the first (in share/alloc.h) to the second (in src/libFLAC/memory.c). */ +#define safe_malloc_mul_2op_ safe_malloc_mul_2op_p + +/**************************************************************************** + * + * Local function declarations + * + ***************************************************************************/ + +static void pack_uint32_(FLAC__uint32 val, FLAC__byte *b, unsigned bytes); +static void pack_uint32_little_endian_(FLAC__uint32 val, FLAC__byte *b, unsigned bytes); +static void pack_uint64_(FLAC__uint64 val, FLAC__byte *b, unsigned bytes); +static FLAC__uint32 unpack_uint32_(FLAC__byte *b, unsigned bytes); +static FLAC__uint32 unpack_uint32_little_endian_(FLAC__byte *b, unsigned bytes); +static FLAC__uint64 unpack_uint64_(FLAC__byte *b, unsigned bytes); + +static FLAC__bool read_metadata_block_header_(FLAC__Metadata_SimpleIterator *iterator); +static FLAC__bool read_metadata_block_data_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block); +static FLAC__bool read_metadata_block_header_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__bool *is_last, FLAC__MetadataType *type, unsigned *length); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata *block); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_streaminfo_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_StreamInfo *block); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_Padding *block, unsigned block_length); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Application *block, unsigned block_length); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_seektable_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_SeekTable *block, unsigned block_length); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_entry_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_VorbisComment_Entry *entry, unsigned max_length); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_VorbisComment *block, unsigned block_length); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cuesheet_track_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_CueSheet_Track *track); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cuesheet_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_CueSheet *block); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_picture_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Picture *block); +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Unknown *block, unsigned block_length); + +static FLAC__bool write_metadata_block_header_(FILE *file, FLAC__Metadata_SimpleIteratorStatus *status, const FLAC__StreamMetadata *block); +static FLAC__bool write_metadata_block_data_(FILE *file, FLAC__Metadata_SimpleIteratorStatus *status, const FLAC__StreamMetadata *block); +static FLAC__bool write_metadata_block_header_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata *block); +static FLAC__bool write_metadata_block_data_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata *block); +static FLAC__bool write_metadata_block_data_streaminfo_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_StreamInfo *block); +static FLAC__bool write_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Padding *block, unsigned block_length); +static FLAC__bool write_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Application *block, unsigned block_length); +static FLAC__bool write_metadata_block_data_seektable_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_SeekTable *block); +static FLAC__bool write_metadata_block_data_vorbis_comment_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_VorbisComment *block); +static FLAC__bool write_metadata_block_data_cuesheet_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_CueSheet *block); +static FLAC__bool write_metadata_block_data_picture_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Picture *block); +static FLAC__bool write_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Unknown *block, unsigned block_length); + +static FLAC__bool write_metadata_block_stationary_(FLAC__Metadata_SimpleIterator *iterator, const FLAC__StreamMetadata *block); +static FLAC__bool write_metadata_block_stationary_with_padding_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, unsigned padding_length, FLAC__bool padding_is_last); +static FLAC__bool rewrite_whole_file_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, FLAC__bool append); + +static void simple_iterator_push_(FLAC__Metadata_SimpleIterator *iterator); +static FLAC__bool simple_iterator_pop_(FLAC__Metadata_SimpleIterator *iterator); + +static unsigned seek_to_first_metadata_block_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb); +static unsigned seek_to_first_metadata_block_(FILE *f); + +static FLAC__bool simple_iterator_copy_file_prefix_(FLAC__Metadata_SimpleIterator *iterator, FILE **tempfile, char **tempfilename, FLAC__bool append); +static FLAC__bool simple_iterator_copy_file_postfix_(FLAC__Metadata_SimpleIterator *iterator, FILE **tempfile, char **tempfilename, int fixup_is_last_code, FLAC__off_t fixup_is_last_flag_offset, FLAC__bool backup); + +static FLAC__bool copy_n_bytes_from_file_(FILE *file, FILE *tempfile, FLAC__off_t bytes, FLAC__Metadata_SimpleIteratorStatus *status); +static FLAC__bool copy_n_bytes_from_file_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOHandle temp_handle, FLAC__IOCallback_Write temp_write_cb, FLAC__off_t bytes, FLAC__Metadata_SimpleIteratorStatus *status); +static FLAC__bool copy_remaining_bytes_from_file_(FILE *file, FILE *tempfile, FLAC__Metadata_SimpleIteratorStatus *status); +static FLAC__bool copy_remaining_bytes_from_file_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Eof eof_cb, FLAC__IOHandle temp_handle, FLAC__IOCallback_Write temp_write_cb, FLAC__Metadata_SimpleIteratorStatus *status); + +static FLAC__bool open_tempfile_(const char *filename, const char *tempfile_path_prefix, FILE **tempfile, char **tempfilename, FLAC__Metadata_SimpleIteratorStatus *status); +static FLAC__bool transport_tempfile_(const char *filename, FILE **tempfile, char **tempfilename, FLAC__Metadata_SimpleIteratorStatus *status); +static void cleanup_tempfile_(FILE **tempfile, char **tempfilename); + +static FLAC__bool get_file_stats_(const char *filename, struct flac_stat_s *stats); +static void set_file_stats_(const char *filename, struct flac_stat_s *stats); + +static int fseek_wrapper_(FLAC__IOHandle handle, FLAC__int64 offset, int whence); +static FLAC__int64 ftell_wrapper_(FLAC__IOHandle handle); + +static FLAC__Metadata_ChainStatus get_equivalent_status_(FLAC__Metadata_SimpleIteratorStatus status); + + +#ifdef FLAC__VALGRIND_TESTING +static size_t local__fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + size_t ret = fwrite(ptr, size, nmemb, stream); + if(!ferror(stream)) + fflush(stream); + return ret; +} +#else +#define local__fwrite fwrite +#endif + +/**************************************************************************** + * + * Level 0 implementation + * + ***************************************************************************/ + +static FLAC__StreamDecoderWriteStatus write_callback_(const FLAC__StreamDecoder *decoder, const FLAC__Frame *frame, const FLAC__int32 * const buffer[], void *client_data); +static void metadata_callback_(const FLAC__StreamDecoder *decoder, const FLAC__StreamMetadata *metadata, void *client_data); +static void error_callback_(const FLAC__StreamDecoder *decoder, FLAC__StreamDecoderErrorStatus status, void *client_data); + +typedef struct { + FLAC__bool got_error; + FLAC__StreamMetadata *object; +} level0_client_data; + +static FLAC__StreamMetadata *get_one_metadata_block_(const char *filename, FLAC__MetadataType type) +{ + level0_client_data cd; + FLAC__StreamDecoder *decoder; + + FLAC__ASSERT(0 != filename); + + cd.got_error = false; + cd.object = 0; + + decoder = FLAC__stream_decoder_new(); + + if(0 == decoder) + return 0; + + FLAC__stream_decoder_set_md5_checking(decoder, false); + FLAC__stream_decoder_set_metadata_ignore_all(decoder); + FLAC__stream_decoder_set_metadata_respond(decoder, type); + + if(FLAC__stream_decoder_init_file(decoder, filename, write_callback_, metadata_callback_, error_callback_, &cd) != FLAC__STREAM_DECODER_INIT_STATUS_OK || cd.got_error) { + (void)FLAC__stream_decoder_finish(decoder); + FLAC__stream_decoder_delete(decoder); + return 0; + } + + if(!FLAC__stream_decoder_process_until_end_of_metadata(decoder) || cd.got_error) { + (void)FLAC__stream_decoder_finish(decoder); + FLAC__stream_decoder_delete(decoder); + if(0 != cd.object) + FLAC__metadata_object_delete(cd.object); + return 0; + } + + (void)FLAC__stream_decoder_finish(decoder); + FLAC__stream_decoder_delete(decoder); + + return cd.object; +} + +FLAC_API FLAC__bool FLAC__metadata_get_streaminfo(const char *filename, FLAC__StreamMetadata *streaminfo) +{ + FLAC__StreamMetadata *object; + + FLAC__ASSERT(0 != filename); + FLAC__ASSERT(0 != streaminfo); + + object = get_one_metadata_block_(filename, FLAC__METADATA_TYPE_STREAMINFO); + + if (object) { + /* can just copy the contents since STREAMINFO has no internal structure */ + *streaminfo = *object; + FLAC__metadata_object_delete(object); + return true; + } + else { + return false; + } +} + +FLAC_API FLAC__bool FLAC__metadata_get_tags(const char *filename, FLAC__StreamMetadata **tags) +{ + FLAC__ASSERT(0 != filename); + FLAC__ASSERT(0 != tags); + + *tags = get_one_metadata_block_(filename, FLAC__METADATA_TYPE_VORBIS_COMMENT); + + return 0 != *tags; +} + +FLAC_API FLAC__bool FLAC__metadata_get_cuesheet(const char *filename, FLAC__StreamMetadata **cuesheet) +{ + FLAC__ASSERT(0 != filename); + FLAC__ASSERT(0 != cuesheet); + + *cuesheet = get_one_metadata_block_(filename, FLAC__METADATA_TYPE_CUESHEET); + + return 0 != *cuesheet; +} + +FLAC__StreamDecoderWriteStatus write_callback_(const FLAC__StreamDecoder *decoder, const FLAC__Frame *frame, const FLAC__int32 * const buffer[], void *client_data) +{ + (void)decoder, (void)frame, (void)buffer, (void)client_data; + + return FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE; +} + +void metadata_callback_(const FLAC__StreamDecoder *decoder, const FLAC__StreamMetadata *metadata, void *client_data) +{ + level0_client_data *cd = (level0_client_data *)client_data; + (void)decoder; + + /* + * we assume we only get here when the one metadata block we were + * looking for was passed to us + */ + if(!cd->got_error && 0 == cd->object) { + if(0 == (cd->object = FLAC__metadata_object_clone(metadata))) + cd->got_error = true; + } +} + +void error_callback_(const FLAC__StreamDecoder *decoder, FLAC__StreamDecoderErrorStatus status, void *client_data) +{ + level0_client_data *cd = (level0_client_data *)client_data; + (void)decoder; + + if(status != FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC) + cd->got_error = true; +} + +FLAC_API FLAC__bool FLAC__metadata_get_picture(const char *filename, FLAC__StreamMetadata **picture, FLAC__StreamMetadata_Picture_Type type, const char *mime_type, const FLAC__byte *description, unsigned max_width, unsigned max_height, unsigned max_depth, unsigned max_colors) +{ + FLAC__Metadata_SimpleIterator *it; + FLAC__uint64 max_area_seen = 0; + FLAC__uint64 max_depth_seen = 0; + + FLAC__ASSERT(0 != filename); + FLAC__ASSERT(0 != picture); + + *picture = 0; + + it = FLAC__metadata_simple_iterator_new(); + if(0 == it) + return false; + if(!FLAC__metadata_simple_iterator_init(it, filename, /*read_only=*/true, /*preserve_file_stats=*/true)) { + FLAC__metadata_simple_iterator_delete(it); + return false; + } + do { + if(FLAC__metadata_simple_iterator_get_block_type(it) == FLAC__METADATA_TYPE_PICTURE) { + FLAC__StreamMetadata *obj = FLAC__metadata_simple_iterator_get_block(it); + FLAC__uint64 area = (FLAC__uint64)obj->data.picture.width * (FLAC__uint64)obj->data.picture.height; + /* check constraints */ + if( + (type == (FLAC__StreamMetadata_Picture_Type)(-1) || type == obj->data.picture.type) && + (mime_type == 0 || !strcmp(mime_type, obj->data.picture.mime_type)) && + (description == 0 || !strcmp((const char *)description, (const char *)obj->data.picture.description)) && + obj->data.picture.width <= max_width && + obj->data.picture.height <= max_height && + obj->data.picture.depth <= max_depth && + obj->data.picture.colors <= max_colors && + (area > max_area_seen || (area == max_area_seen && obj->data.picture.depth > max_depth_seen)) + ) { + if(*picture) + FLAC__metadata_object_delete(*picture); + *picture = obj; + max_area_seen = area; + max_depth_seen = obj->data.picture.depth; + } + else { + FLAC__metadata_object_delete(obj); + } + } + } while(FLAC__metadata_simple_iterator_next(it)); + + FLAC__metadata_simple_iterator_delete(it); + + return (0 != *picture); +} + + +/**************************************************************************** + * + * Level 1 implementation + * + ***************************************************************************/ + +#define SIMPLE_ITERATOR_MAX_PUSH_DEPTH (1+4) +/* 1 for initial offset, +4 for our own personal use */ + +struct FLAC__Metadata_SimpleIterator { + FILE *file; + char *filename, *tempfile_path_prefix; + struct flac_stat_s stats; + FLAC__bool has_stats; + FLAC__bool is_writable; + FLAC__Metadata_SimpleIteratorStatus status; + FLAC__off_t offset[SIMPLE_ITERATOR_MAX_PUSH_DEPTH]; + FLAC__off_t first_offset; /* this is the offset to the STREAMINFO block */ + unsigned depth; + /* this is the metadata block header of the current block we are pointing to: */ + FLAC__bool is_last; + FLAC__MetadataType type; + unsigned length; +}; + +FLAC_API const char * const FLAC__Metadata_SimpleIteratorStatusString[] = { + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ILLEGAL_INPUT", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ERROR_OPENING_FILE", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_NOT_A_FLAC_FILE", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_NOT_WRITABLE", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_BAD_METADATA", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_WRITE_ERROR", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_RENAME_ERROR", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_UNLINK_ERROR", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR", + "FLAC__METADATA_SIMPLE_ITERATOR_STATUS_INTERNAL_ERROR" +}; + + +FLAC_API FLAC__Metadata_SimpleIterator *FLAC__metadata_simple_iterator_new(void) +{ + FLAC__Metadata_SimpleIterator *iterator = calloc(1, sizeof(FLAC__Metadata_SimpleIterator)); + + if(0 != iterator) { + iterator->file = 0; + iterator->filename = 0; + iterator->tempfile_path_prefix = 0; + iterator->has_stats = false; + iterator->is_writable = false; + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; + iterator->first_offset = iterator->offset[0] = -1; + iterator->depth = 0; + } + + return iterator; +} + +static void simple_iterator_free_guts_(FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + + if(0 != iterator->file) { + fclose(iterator->file); + iterator->file = 0; + if(iterator->has_stats) + set_file_stats_(iterator->filename, &iterator->stats); + } + if(0 != iterator->filename) { + free(iterator->filename); + iterator->filename = 0; + } + if(0 != iterator->tempfile_path_prefix) { + free(iterator->tempfile_path_prefix); + iterator->tempfile_path_prefix = 0; + } +} + +FLAC_API void FLAC__metadata_simple_iterator_delete(FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + + simple_iterator_free_guts_(iterator); + free(iterator); +} + +FLAC_API FLAC__Metadata_SimpleIteratorStatus FLAC__metadata_simple_iterator_status(FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__Metadata_SimpleIteratorStatus status; + + FLAC__ASSERT(0 != iterator); + + status = iterator->status; + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; + return status; +} + +static FLAC__bool simple_iterator_prime_input_(FLAC__Metadata_SimpleIterator *iterator, FLAC__bool read_only) +{ + unsigned ret; + + FLAC__ASSERT(0 != iterator); + + if(read_only || 0 == (iterator->file = flac_fopen(iterator->filename, "r+b"))) { + iterator->is_writable = false; + if(read_only || errno == EACCES) { + if(0 == (iterator->file = flac_fopen(iterator->filename, "rb"))) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ERROR_OPENING_FILE; + return false; + } + } + else { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ERROR_OPENING_FILE; + return false; + } + } + else { + iterator->is_writable = true; + } + + ret = seek_to_first_metadata_block_(iterator->file); + switch(ret) { + case 0: + iterator->depth = 0; + iterator->first_offset = iterator->offset[iterator->depth] = ftello(iterator->file); + return read_metadata_block_header_(iterator); + case 1: + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + return false; + case 2: + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + case 3: + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_NOT_A_FLAC_FILE; + return false; + default: + FLAC__ASSERT(0); + return false; + } +} + +#if 0 +@@@ If we decide to finish implementing this, put this comment back in metadata.h +/* + * The 'tempfile_path_prefix' allows you to specify a directory where + * tempfiles should go. Remember that if your metadata edits cause the + * FLAC file to grow, the entire file will have to be rewritten. If + * 'tempfile_path_prefix' is NULL, the temp file will be written in the + * same directory as the original FLAC file. This makes replacing the + * original with the tempfile fast but requires extra space in the same + * partition for the tempfile. If space is a problem, you can pass a + * directory name belonging to a different partition in + * 'tempfile_path_prefix'. Note that you should use the forward slash + * '/' as the directory separator. A trailing slash is not needed; it + * will be added automatically. + */ +FLAC__bool FLAC__metadata_simple_iterator_init(FLAC__Metadata_SimpleIterator *iterator, const char *filename, FLAC__bool preserve_file_stats, const char *tempfile_path_prefix); +#endif + +FLAC_API FLAC__bool FLAC__metadata_simple_iterator_init(FLAC__Metadata_SimpleIterator *iterator, const char *filename, FLAC__bool read_only, FLAC__bool preserve_file_stats) +{ + const char *tempfile_path_prefix = 0; /*@@@ search for comments near 'flac_rename(...)' for what it will take to finish implementing this */ + + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != filename); + + simple_iterator_free_guts_(iterator); + + if(!read_only && preserve_file_stats) + iterator->has_stats = get_file_stats_(filename, &iterator->stats); + + if(0 == (iterator->filename = strdup(filename))) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + return false; + } + if(0 != tempfile_path_prefix && 0 == (iterator->tempfile_path_prefix = strdup(tempfile_path_prefix))) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + return false; + } + + return simple_iterator_prime_input_(iterator, read_only); +} + +FLAC_API FLAC__bool FLAC__metadata_simple_iterator_is_writable(const FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + + return iterator->is_writable; +} + +FLAC_API FLAC__bool FLAC__metadata_simple_iterator_next(FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + + if(iterator->is_last) + return false; + + if(0 != fseeko(iterator->file, iterator->length, SEEK_CUR)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + + iterator->offset[iterator->depth] = ftello(iterator->file); + + return read_metadata_block_header_(iterator); +} + +FLAC_API FLAC__bool FLAC__metadata_simple_iterator_prev(FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__off_t this_offset; + + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + + if(iterator->offset[iterator->depth] == iterator->first_offset) + return false; + + if(0 != fseeko(iterator->file, iterator->first_offset, SEEK_SET)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + this_offset = iterator->first_offset; + if(!read_metadata_block_header_(iterator)) + return false; + + /* we ignore any error from ftello() and catch it in fseeko() */ + while(ftello(iterator->file) + (FLAC__off_t)iterator->length < iterator->offset[iterator->depth]) { + if(0 != fseeko(iterator->file, iterator->length, SEEK_CUR)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + this_offset = ftello(iterator->file); + if(!read_metadata_block_header_(iterator)) + return false; + } + + iterator->offset[iterator->depth] = this_offset; + + return true; +} + +/*@@@@add to tests*/ +FLAC_API FLAC__bool FLAC__metadata_simple_iterator_is_last(const FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + + return iterator->is_last; +} + +/*@@@@add to tests*/ +FLAC_API off_t FLAC__metadata_simple_iterator_get_block_offset(const FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + + return iterator->offset[iterator->depth]; +} + +FLAC_API FLAC__MetadataType FLAC__metadata_simple_iterator_get_block_type(const FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + + return iterator->type; +} + +/*@@@@add to tests*/ +FLAC_API unsigned FLAC__metadata_simple_iterator_get_block_length(const FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + + return iterator->length; +} + +/*@@@@add to tests*/ +FLAC_API FLAC__bool FLAC__metadata_simple_iterator_get_application_id(FLAC__Metadata_SimpleIterator *iterator, FLAC__byte *id) +{ + const unsigned id_bytes = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8; + + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + FLAC__ASSERT(0 != id); + + if(iterator->type != FLAC__METADATA_TYPE_APPLICATION) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ILLEGAL_INPUT; + return false; + } + + if(fread(id, 1, id_bytes, iterator->file) != id_bytes) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + return false; + } + + /* back up */ + if(0 != fseeko(iterator->file, -((int)id_bytes), SEEK_CUR)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + + return true; +} + +FLAC_API FLAC__StreamMetadata *FLAC__metadata_simple_iterator_get_block(FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__StreamMetadata *block = FLAC__metadata_object_new(iterator->type); + + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + + if(0 != block) { + block->is_last = iterator->is_last; + block->length = iterator->length; + + if(!read_metadata_block_data_(iterator, block)) { + FLAC__metadata_object_delete(block); + return 0; + } + + /* back up to the beginning of the block data to stay consistent */ + if(0 != fseeko(iterator->file, iterator->offset[iterator->depth] + FLAC__STREAM_METADATA_HEADER_LENGTH, SEEK_SET)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + FLAC__metadata_object_delete(block); + return 0; + } + } + else + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + + return block; +} + +FLAC_API FLAC__bool FLAC__metadata_simple_iterator_set_block(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, FLAC__bool use_padding) +{ + FLAC__ASSERT_DECLARATION(FLAC__off_t debug_target_offset = iterator->offset[iterator->depth];) + FLAC__bool ret; + + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + FLAC__ASSERT(0 != block); + + if(!iterator->is_writable) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_NOT_WRITABLE; + return false; + } + + if(iterator->type == FLAC__METADATA_TYPE_STREAMINFO || block->type == FLAC__METADATA_TYPE_STREAMINFO) { + if(iterator->type != block->type) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ILLEGAL_INPUT; + return false; + } + } + + block->is_last = iterator->is_last; + + if(iterator->length == block->length) + return write_metadata_block_stationary_(iterator, block); + else if(iterator->length > block->length) { + if(use_padding && iterator->length >= FLAC__STREAM_METADATA_HEADER_LENGTH + block->length) { + ret = write_metadata_block_stationary_with_padding_(iterator, block, iterator->length - FLAC__STREAM_METADATA_HEADER_LENGTH - block->length, block->is_last); + FLAC__ASSERT(!ret || iterator->offset[iterator->depth] == debug_target_offset); + FLAC__ASSERT(!ret || ftello(iterator->file) == debug_target_offset + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH); + return ret; + } + else { + ret = rewrite_whole_file_(iterator, block, /*append=*/false); + FLAC__ASSERT(!ret || iterator->offset[iterator->depth] == debug_target_offset); + FLAC__ASSERT(!ret || ftello(iterator->file) == debug_target_offset + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH); + return ret; + } + } + else /* iterator->length < block->length */ { + unsigned padding_leftover = 0; + FLAC__bool padding_is_last = false; + if(use_padding) { + /* first see if we can even use padding */ + if(iterator->is_last) { + use_padding = false; + } + else { + const unsigned extra_padding_bytes_required = block->length - iterator->length; + simple_iterator_push_(iterator); + if(!FLAC__metadata_simple_iterator_next(iterator)) { + (void)simple_iterator_pop_(iterator); + return false; + } + if(iterator->type != FLAC__METADATA_TYPE_PADDING) { + use_padding = false; + } + else { + if(FLAC__STREAM_METADATA_HEADER_LENGTH + iterator->length == extra_padding_bytes_required) { + padding_leftover = 0; + block->is_last = iterator->is_last; + } + else if(iterator->length < extra_padding_bytes_required) + use_padding = false; + else { + padding_leftover = FLAC__STREAM_METADATA_HEADER_LENGTH + iterator->length - extra_padding_bytes_required; + padding_is_last = iterator->is_last; + block->is_last = false; + } + } + if(!simple_iterator_pop_(iterator)) + return false; + } + } + if(use_padding) { + if(padding_leftover == 0) { + ret = write_metadata_block_stationary_(iterator, block); + FLAC__ASSERT(!ret || iterator->offset[iterator->depth] == debug_target_offset); + FLAC__ASSERT(!ret || ftello(iterator->file) == debug_target_offset + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH); + return ret; + } + else { + FLAC__ASSERT(padding_leftover >= FLAC__STREAM_METADATA_HEADER_LENGTH); + ret = write_metadata_block_stationary_with_padding_(iterator, block, padding_leftover - FLAC__STREAM_METADATA_HEADER_LENGTH, padding_is_last); + FLAC__ASSERT(!ret || iterator->offset[iterator->depth] == debug_target_offset); + FLAC__ASSERT(!ret || ftello(iterator->file) == debug_target_offset + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH); + return ret; + } + } + else { + ret = rewrite_whole_file_(iterator, block, /*append=*/false); + FLAC__ASSERT(!ret || iterator->offset[iterator->depth] == debug_target_offset); + FLAC__ASSERT(!ret || ftello(iterator->file) == debug_target_offset + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH); + return ret; + } + } +} + +FLAC_API FLAC__bool FLAC__metadata_simple_iterator_insert_block_after(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, FLAC__bool use_padding) +{ + unsigned padding_leftover = 0; + FLAC__bool padding_is_last = false; + + FLAC__ASSERT_DECLARATION(FLAC__off_t debug_target_offset = iterator->offset[iterator->depth] + FLAC__STREAM_METADATA_HEADER_LENGTH + iterator->length;) + FLAC__bool ret; + + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + FLAC__ASSERT(0 != block); + + if(!iterator->is_writable) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_NOT_WRITABLE; + return false; + } + + if(block->type == FLAC__METADATA_TYPE_STREAMINFO) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ILLEGAL_INPUT; + return false; + } + + block->is_last = iterator->is_last; + + if(use_padding) { + /* first see if we can even use padding */ + if(iterator->is_last) { + use_padding = false; + } + else { + simple_iterator_push_(iterator); + if(!FLAC__metadata_simple_iterator_next(iterator)) { + (void)simple_iterator_pop_(iterator); + return false; + } + if(iterator->type != FLAC__METADATA_TYPE_PADDING) { + use_padding = false; + } + else { + if(iterator->length == block->length) { + padding_leftover = 0; + block->is_last = iterator->is_last; + } + else if(iterator->length < FLAC__STREAM_METADATA_HEADER_LENGTH + block->length) + use_padding = false; + else { + padding_leftover = iterator->length - block->length; + padding_is_last = iterator->is_last; + block->is_last = false; + } + } + if(!simple_iterator_pop_(iterator)) + return false; + } + } + if(use_padding) { + /* move to the next block, which is suitable padding */ + if(!FLAC__metadata_simple_iterator_next(iterator)) + return false; + if(padding_leftover == 0) { + ret = write_metadata_block_stationary_(iterator, block); + FLAC__ASSERT(iterator->offset[iterator->depth] == debug_target_offset); + FLAC__ASSERT(ftello(iterator->file) == debug_target_offset + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH); + return ret; + } + else { + FLAC__ASSERT(padding_leftover >= FLAC__STREAM_METADATA_HEADER_LENGTH); + ret = write_metadata_block_stationary_with_padding_(iterator, block, padding_leftover - FLAC__STREAM_METADATA_HEADER_LENGTH, padding_is_last); + FLAC__ASSERT(iterator->offset[iterator->depth] == debug_target_offset); + FLAC__ASSERT(ftello(iterator->file) == debug_target_offset + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH); + return ret; + } + } + else { + ret = rewrite_whole_file_(iterator, block, /*append=*/true); + FLAC__ASSERT(iterator->offset[iterator->depth] == debug_target_offset); + FLAC__ASSERT(ftello(iterator->file) == debug_target_offset + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH); + return ret; + } +} + +FLAC_API FLAC__bool FLAC__metadata_simple_iterator_delete_block(FLAC__Metadata_SimpleIterator *iterator, FLAC__bool use_padding) +{ + FLAC__ASSERT_DECLARATION(FLAC__off_t debug_target_offset = iterator->offset[iterator->depth];) + FLAC__bool ret; + + if(!iterator->is_writable) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_NOT_WRITABLE; + return false; + } + + if(iterator->type == FLAC__METADATA_TYPE_STREAMINFO) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ILLEGAL_INPUT; + return false; + } + + if(use_padding) { + FLAC__StreamMetadata *padding = FLAC__metadata_object_new(FLAC__METADATA_TYPE_PADDING); + if(0 == padding) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + return false; + } + padding->length = iterator->length; + if(!FLAC__metadata_simple_iterator_set_block(iterator, padding, false)) { + FLAC__metadata_object_delete(padding); + return false; + } + FLAC__metadata_object_delete(padding); + if(!FLAC__metadata_simple_iterator_prev(iterator)) + return false; + FLAC__ASSERT(iterator->offset[iterator->depth] + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH + (FLAC__off_t)iterator->length == debug_target_offset); + FLAC__ASSERT(ftello(iterator->file) + (FLAC__off_t)iterator->length == debug_target_offset); + return true; + } + else { + ret = rewrite_whole_file_(iterator, 0, /*append=*/false); + FLAC__ASSERT(iterator->offset[iterator->depth] + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH + (FLAC__off_t)iterator->length == debug_target_offset); + FLAC__ASSERT(ftello(iterator->file) + (FLAC__off_t)iterator->length == debug_target_offset); + return ret; + } +} + + + +/**************************************************************************** + * + * Level 2 implementation + * + ***************************************************************************/ + + +typedef struct FLAC__Metadata_Node { + FLAC__StreamMetadata *data; + struct FLAC__Metadata_Node *prev, *next; +} FLAC__Metadata_Node; + +struct FLAC__Metadata_Chain { + char *filename; /* will be NULL if using callbacks */ + FLAC__bool is_ogg; + FLAC__Metadata_Node *head; + FLAC__Metadata_Node *tail; + unsigned nodes; + FLAC__Metadata_ChainStatus status; + FLAC__off_t first_offset, last_offset; + /* + * This is the length of the chain initially read from the FLAC file. + * it is used to compare against the current length to decide whether + * or not the whole file has to be rewritten. + */ + FLAC__off_t initial_length; + /* @@@ hacky, these are currently only needed by ogg reader */ + FLAC__IOHandle handle; + FLAC__IOCallback_Read read_cb; +}; + +struct FLAC__Metadata_Iterator { + FLAC__Metadata_Chain *chain; + FLAC__Metadata_Node *current; +}; + +FLAC_API const char * const FLAC__Metadata_ChainStatusString[] = { + "FLAC__METADATA_CHAIN_STATUS_OK", + "FLAC__METADATA_CHAIN_STATUS_ILLEGAL_INPUT", + "FLAC__METADATA_CHAIN_STATUS_ERROR_OPENING_FILE", + "FLAC__METADATA_CHAIN_STATUS_NOT_A_FLAC_FILE", + "FLAC__METADATA_CHAIN_STATUS_NOT_WRITABLE", + "FLAC__METADATA_CHAIN_STATUS_BAD_METADATA", + "FLAC__METADATA_CHAIN_STATUS_READ_ERROR", + "FLAC__METADATA_CHAIN_STATUS_SEEK_ERROR", + "FLAC__METADATA_CHAIN_STATUS_WRITE_ERROR", + "FLAC__METADATA_CHAIN_STATUS_RENAME_ERROR", + "FLAC__METADATA_CHAIN_STATUS_UNLINK_ERROR", + "FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR", + "FLAC__METADATA_CHAIN_STATUS_INTERNAL_ERROR", + "FLAC__METADATA_CHAIN_STATUS_INVALID_CALLBACKS", + "FLAC__METADATA_CHAIN_STATUS_READ_WRITE_MISMATCH", + "FLAC__METADATA_CHAIN_STATUS_WRONG_WRITE_CALL" +}; + + +static FLAC__Metadata_Node *node_new_(void) +{ + return calloc(1, sizeof(FLAC__Metadata_Node)); +} + +static void node_delete_(FLAC__Metadata_Node *node) +{ + FLAC__ASSERT(0 != node); + if(0 != node->data) + FLAC__metadata_object_delete(node->data); + free(node); +} + +static void chain_init_(FLAC__Metadata_Chain *chain) +{ + FLAC__ASSERT(0 != chain); + + chain->filename = 0; + chain->is_ogg = false; + chain->head = chain->tail = 0; + chain->nodes = 0; + chain->status = FLAC__METADATA_CHAIN_STATUS_OK; + chain->initial_length = 0; + chain->read_cb = 0; +} + +static void chain_clear_(FLAC__Metadata_Chain *chain) +{ + FLAC__Metadata_Node *node, *next; + + FLAC__ASSERT(0 != chain); + + for(node = chain->head; node; ) { + next = node->next; + node_delete_(node); + node = next; + } + + if(0 != chain->filename) + free(chain->filename); + + chain_init_(chain); +} + +static void chain_append_node_(FLAC__Metadata_Chain *chain, FLAC__Metadata_Node *node) +{ + FLAC__ASSERT(0 != chain); + FLAC__ASSERT(0 != node); + FLAC__ASSERT(0 != node->data); + + node->next = node->prev = 0; + node->data->is_last = true; + if(0 != chain->tail) + chain->tail->data->is_last = false; + + if(0 == chain->head) + chain->head = node; + else { + FLAC__ASSERT(0 != chain->tail); + chain->tail->next = node; + node->prev = chain->tail; + } + chain->tail = node; + chain->nodes++; +} + +static void chain_remove_node_(FLAC__Metadata_Chain *chain, FLAC__Metadata_Node *node) +{ + FLAC__ASSERT(0 != chain); + FLAC__ASSERT(0 != node); + + if(node == chain->head) + chain->head = node->next; + else + node->prev->next = node->next; + + if(node == chain->tail) + chain->tail = node->prev; + else + node->next->prev = node->prev; + + if(0 != chain->tail) + chain->tail->data->is_last = true; + + chain->nodes--; +} + +static void chain_delete_node_(FLAC__Metadata_Chain *chain, FLAC__Metadata_Node *node) +{ + chain_remove_node_(chain, node); + node_delete_(node); +} + +static FLAC__off_t chain_calculate_length_(FLAC__Metadata_Chain *chain) +{ + const FLAC__Metadata_Node *node; + FLAC__off_t length = 0; + for(node = chain->head; node; node = node->next) + length += (FLAC__STREAM_METADATA_HEADER_LENGTH + node->data->length); + return length; +} + +static void iterator_insert_node_(FLAC__Metadata_Iterator *iterator, FLAC__Metadata_Node *node) +{ + FLAC__ASSERT(0 != node); + FLAC__ASSERT(0 != node->data); + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->current); + FLAC__ASSERT(0 != iterator->chain); + FLAC__ASSERT(0 != iterator->chain->head); + FLAC__ASSERT(0 != iterator->chain->tail); + + node->data->is_last = false; + + node->prev = iterator->current->prev; + node->next = iterator->current; + + if(0 == node->prev) + iterator->chain->head = node; + else + node->prev->next = node; + + iterator->current->prev = node; + + iterator->chain->nodes++; +} + +static void iterator_insert_node_after_(FLAC__Metadata_Iterator *iterator, FLAC__Metadata_Node *node) +{ + FLAC__ASSERT(0 != node); + FLAC__ASSERT(0 != node->data); + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->current); + FLAC__ASSERT(0 != iterator->chain); + FLAC__ASSERT(0 != iterator->chain->head); + FLAC__ASSERT(0 != iterator->chain->tail); + + iterator->current->data->is_last = false; + + node->prev = iterator->current; + node->next = iterator->current->next; + + if(0 == node->next) + iterator->chain->tail = node; + else + node->next->prev = node; + + node->prev->next = node; + + iterator->chain->tail->data->is_last = true; + + iterator->chain->nodes++; +} + +/* return true iff node and node->next are both padding */ +static FLAC__bool chain_merge_adjacent_padding_(FLAC__Metadata_Chain *chain, FLAC__Metadata_Node *node) +{ + if(node->data->type == FLAC__METADATA_TYPE_PADDING && 0 != node->next && node->next->data->type == FLAC__METADATA_TYPE_PADDING) { + const unsigned growth = FLAC__STREAM_METADATA_HEADER_LENGTH + node->next->data->length; + node->data->length += growth; /* new block size can be greater than max metadata block size, but it'll be fixed later in chain_prepare_for_write_() */ + + chain_delete_node_(chain, node->next); + return true; + } + else + return false; +} + +/* Returns the new length of the chain, or 0 if there was an error. */ +/* WATCHOUT: This can get called multiple times before a write, so + * it should still work when this happens. + */ +/* WATCHOUT: Make sure to also update the logic in + * FLAC__metadata_chain_check_if_tempfile_needed() if the logic here changes. + */ +static FLAC__off_t chain_prepare_for_write_(FLAC__Metadata_Chain *chain, FLAC__bool use_padding) +{ + FLAC__off_t current_length = chain_calculate_length_(chain); + + if(use_padding) { + /* if the metadata shrank and the last block is padding, we just extend the last padding block */ + if(current_length < chain->initial_length && chain->tail->data->type == FLAC__METADATA_TYPE_PADDING) { + const FLAC__off_t delta = chain->initial_length - current_length; + chain->tail->data->length += delta; + current_length += delta; + FLAC__ASSERT(current_length == chain->initial_length); + } + /* if the metadata shrank more than 4 bytes then there's room to add another padding block */ + else if(current_length + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH <= chain->initial_length) { + FLAC__StreamMetadata *padding; + FLAC__Metadata_Node *node; + if(0 == (padding = FLAC__metadata_object_new(FLAC__METADATA_TYPE_PADDING))) { + chain->status = FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR; + return 0; + } + padding->length = chain->initial_length - (FLAC__STREAM_METADATA_HEADER_LENGTH + current_length); + if(0 == (node = node_new_())) { + FLAC__metadata_object_delete(padding); + chain->status = FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR; + return 0; + } + node->data = padding; + chain_append_node_(chain, node); + current_length = chain_calculate_length_(chain); + FLAC__ASSERT(current_length == chain->initial_length); + } + /* if the metadata grew but the last block is padding, try cutting the padding to restore the original length so we don't have to rewrite the whole file */ + else if(current_length > chain->initial_length) { + const FLAC__off_t delta = current_length - chain->initial_length; + if(chain->tail->data->type == FLAC__METADATA_TYPE_PADDING) { + /* if the delta is exactly the size of the last padding block, remove the padding block */ + if((FLAC__off_t)chain->tail->data->length + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH == delta) { + chain_delete_node_(chain, chain->tail); + current_length = chain_calculate_length_(chain); + FLAC__ASSERT(current_length == chain->initial_length); + } + /* if there is at least 'delta' bytes of padding, trim the padding down */ + else if((FLAC__off_t)chain->tail->data->length >= delta) { + chain->tail->data->length -= delta; + current_length -= delta; + FLAC__ASSERT(current_length == chain->initial_length); + } + } + } + } + + /* check sizes of all metadata blocks; reduce padding size if necessary */ + { + FLAC__Metadata_Node *node; + for (node = chain->head; node; node = node->next) { + if(node->data->length >= (1u << FLAC__STREAM_METADATA_LENGTH_LEN)) { + if(node->data->type == FLAC__METADATA_TYPE_PADDING) { + node->data->length = (1u << FLAC__STREAM_METADATA_LENGTH_LEN) - 1; + current_length = chain_calculate_length_(chain); + } else { + chain->status = FLAC__METADATA_CHAIN_STATUS_BAD_METADATA; + return 0; + } + } + } + } + + return current_length; +} + +static FLAC__bool chain_read_cb_(FLAC__Metadata_Chain *chain, FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__IOCallback_Tell tell_cb) +{ + FLAC__Metadata_Node *node; + + FLAC__ASSERT(0 != chain); + + /* we assume we're already at the beginning of the file */ + + switch(seek_to_first_metadata_block_cb_(handle, read_cb, seek_cb)) { + case 0: + break; + case 1: + chain->status = FLAC__METADATA_CHAIN_STATUS_READ_ERROR; + return false; + case 2: + chain->status = FLAC__METADATA_CHAIN_STATUS_SEEK_ERROR; + return false; + case 3: + chain->status = FLAC__METADATA_CHAIN_STATUS_NOT_A_FLAC_FILE; + return false; + default: + FLAC__ASSERT(0); + return false; + } + + { + FLAC__int64 pos = tell_cb(handle); + if(pos < 0) { + chain->status = FLAC__METADATA_CHAIN_STATUS_READ_ERROR; + return false; + } + chain->first_offset = (FLAC__off_t)pos; + } + + { + FLAC__bool is_last; + FLAC__MetadataType type; + unsigned length; + + do { + node = node_new_(); + if(0 == node) { + chain->status = FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR; + return false; + } + + if(!read_metadata_block_header_cb_(handle, read_cb, &is_last, &type, &length)) { + node_delete_(node); + chain->status = FLAC__METADATA_CHAIN_STATUS_READ_ERROR; + return false; + } + + node->data = FLAC__metadata_object_new(type); + if(0 == node->data) { + node_delete_(node); + chain->status = FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR; + return false; + } + + node->data->is_last = is_last; + node->data->length = length; + + chain->status = get_equivalent_status_(read_metadata_block_data_cb_(handle, read_cb, seek_cb, node->data)); + if(chain->status != FLAC__METADATA_CHAIN_STATUS_OK) { + node_delete_(node); + return false; + } + chain_append_node_(chain, node); + } while(!is_last); + } + + { + FLAC__int64 pos = tell_cb(handle); + if(pos < 0) { + chain->status = FLAC__METADATA_CHAIN_STATUS_READ_ERROR; + return false; + } + chain->last_offset = (FLAC__off_t)pos; + } + + chain->initial_length = chain_calculate_length_(chain); + + return true; +} + +static FLAC__StreamDecoderReadStatus chain_read_ogg_read_cb_(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes, void *client_data) +{ + FLAC__Metadata_Chain *chain = (FLAC__Metadata_Chain*)client_data; + (void)decoder; + if(*bytes > 0 && chain->status == FLAC__METADATA_CHAIN_STATUS_OK) { + *bytes = chain->read_cb(buffer, sizeof(FLAC__byte), *bytes, chain->handle); + if(*bytes == 0) + return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM; + else + return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE; + } + else + return FLAC__STREAM_DECODER_READ_STATUS_ABORT; +} + +static FLAC__StreamDecoderWriteStatus chain_read_ogg_write_cb_(const FLAC__StreamDecoder *decoder, const FLAC__Frame *frame, const FLAC__int32 * const buffer[], void *client_data) +{ + (void)decoder, (void)frame, (void)buffer, (void)client_data; + return FLAC__STREAM_DECODER_WRITE_STATUS_ABORT; +} + +static void chain_read_ogg_metadata_cb_(const FLAC__StreamDecoder *decoder, const FLAC__StreamMetadata *metadata, void *client_data) +{ + FLAC__Metadata_Chain *chain = (FLAC__Metadata_Chain*)client_data; + FLAC__Metadata_Node *node; + + (void)decoder; + + node = node_new_(); + if(0 == node) { + chain->status = FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR; + return; + } + + node->data = FLAC__metadata_object_clone(metadata); + if(0 == node->data) { + node_delete_(node); + chain->status = FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR; + return; + } + + chain_append_node_(chain, node); +} + +static void chain_read_ogg_error_cb_(const FLAC__StreamDecoder *decoder, FLAC__StreamDecoderErrorStatus status, void *client_data) +{ + FLAC__Metadata_Chain *chain = (FLAC__Metadata_Chain*)client_data; + (void)decoder, (void)status; + chain->status = FLAC__METADATA_CHAIN_STATUS_INTERNAL_ERROR; /*@@@ maybe needs better error code */ +} + +static FLAC__bool chain_read_ogg_cb_(FLAC__Metadata_Chain *chain, FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb) +{ + FLAC__StreamDecoder *decoder; + + FLAC__ASSERT(0 != chain); + + /* we assume we're already at the beginning of the file */ + + chain->handle = handle; + chain->read_cb = read_cb; + if(0 == (decoder = FLAC__stream_decoder_new())) { + chain->status = FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR; + return false; + } + FLAC__stream_decoder_set_metadata_respond_all(decoder); + if(FLAC__stream_decoder_init_ogg_stream(decoder, chain_read_ogg_read_cb_, /*seek_callback=*/0, /*tell_callback=*/0, /*length_callback=*/0, /*eof_callback=*/0, chain_read_ogg_write_cb_, chain_read_ogg_metadata_cb_, chain_read_ogg_error_cb_, chain) != FLAC__STREAM_DECODER_INIT_STATUS_OK) { + FLAC__stream_decoder_delete(decoder); + chain->status = FLAC__METADATA_CHAIN_STATUS_INTERNAL_ERROR; /*@@@ maybe needs better error code */ + return false; + } + + chain->first_offset = 0; /*@@@ wrong; will need to be set correctly to implement metadata writing for Ogg FLAC */ + + if(!FLAC__stream_decoder_process_until_end_of_metadata(decoder)) + chain->status = FLAC__METADATA_CHAIN_STATUS_INTERNAL_ERROR; /*@@@ maybe needs better error code */ + if(chain->status != FLAC__METADATA_CHAIN_STATUS_OK) { + FLAC__stream_decoder_delete(decoder); + return false; + } + + FLAC__stream_decoder_delete(decoder); + + chain->last_offset = 0; /*@@@ wrong; will need to be set correctly to implement metadata writing for Ogg FLAC */ + + chain->initial_length = chain_calculate_length_(chain); + + return true; +} + +static FLAC__bool chain_rewrite_metadata_in_place_cb_(FLAC__Metadata_Chain *chain, FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, FLAC__IOCallback_Seek seek_cb) +{ + FLAC__Metadata_Node *node; + + FLAC__ASSERT(0 != chain); + FLAC__ASSERT(0 != chain->head); + + if(0 != seek_cb(handle, chain->first_offset, SEEK_SET)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_SEEK_ERROR; + return false; + } + + for(node = chain->head; node; node = node->next) { + if(!write_metadata_block_header_cb_(handle, write_cb, node->data)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_WRITE_ERROR; + return false; + } + if(!write_metadata_block_data_cb_(handle, write_cb, node->data)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_WRITE_ERROR; + return false; + } + } + + /*FLAC__ASSERT(fflush(), ftello() == chain->last_offset);*/ + + chain->status = FLAC__METADATA_CHAIN_STATUS_OK; + return true; +} + +static FLAC__bool chain_rewrite_metadata_in_place_(FLAC__Metadata_Chain *chain) +{ + FILE *file; + FLAC__bool ret; + + FLAC__ASSERT(0 != chain->filename); + + if(0 == (file = flac_fopen(chain->filename, "r+b"))) { + chain->status = FLAC__METADATA_CHAIN_STATUS_ERROR_OPENING_FILE; + return false; + } + + /* chain_rewrite_metadata_in_place_cb_() sets chain->status for us */ + ret = chain_rewrite_metadata_in_place_cb_(chain, (FLAC__IOHandle)file, (FLAC__IOCallback_Write)fwrite, fseek_wrapper_); + + fclose(file); + + return ret; +} + +static FLAC__bool chain_rewrite_file_(FLAC__Metadata_Chain *chain, const char *tempfile_path_prefix) +{ + FILE *f, *tempfile = NULL; + char *tempfilename; + FLAC__Metadata_SimpleIteratorStatus status; + const FLAC__Metadata_Node *node; + + FLAC__ASSERT(0 != chain); + FLAC__ASSERT(0 != chain->filename); + FLAC__ASSERT(0 != chain->head); + + /* copy the file prefix (data up to first metadata block */ + if(0 == (f = flac_fopen(chain->filename, "rb"))) { + chain->status = FLAC__METADATA_CHAIN_STATUS_ERROR_OPENING_FILE; + return false; + } + if(!open_tempfile_(chain->filename, tempfile_path_prefix, &tempfile, &tempfilename, &status)) { + chain->status = get_equivalent_status_(status); + goto err; + } + if(!copy_n_bytes_from_file_(f, tempfile, chain->first_offset, &status)) { + chain->status = get_equivalent_status_(status); + goto err; + } + + /* write the metadata */ + for(node = chain->head; node; node = node->next) { + if(!write_metadata_block_header_(tempfile, &status, node->data)) { + chain->status = get_equivalent_status_(status); + goto err; + } + if(!write_metadata_block_data_(tempfile, &status, node->data)) { + chain->status = get_equivalent_status_(status); + goto err; + } + } + /*FLAC__ASSERT(fflush(), ftello() == chain->last_offset);*/ + + /* copy the file postfix (everything after the metadata) */ + if(0 != fseeko(f, chain->last_offset, SEEK_SET)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_SEEK_ERROR; + goto err; + } + if(!copy_remaining_bytes_from_file_(f, tempfile, &status)) { + chain->status = get_equivalent_status_(status); + goto err; + } + + /* move the tempfile on top of the original */ + (void)fclose(f); + if(!transport_tempfile_(chain->filename, &tempfile, &tempfilename, &status)) + return false; + + return true; + +err: + (void)fclose(f); + cleanup_tempfile_(&tempfile, &tempfilename); + return false; +} + +/* assumes 'handle' is already at beginning of file */ +static FLAC__bool chain_rewrite_file_cb_(FLAC__Metadata_Chain *chain, FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__IOCallback_Eof eof_cb, FLAC__IOHandle temp_handle, FLAC__IOCallback_Write temp_write_cb) +{ + FLAC__Metadata_SimpleIteratorStatus status; + const FLAC__Metadata_Node *node; + + FLAC__ASSERT(0 != chain); + FLAC__ASSERT(0 == chain->filename); + FLAC__ASSERT(0 != chain->head); + + /* copy the file prefix (data up to first metadata block */ + if(!copy_n_bytes_from_file_cb_(handle, read_cb, temp_handle, temp_write_cb, chain->first_offset, &status)) { + chain->status = get_equivalent_status_(status); + return false; + } + + /* write the metadata */ + for(node = chain->head; node; node = node->next) { + if(!write_metadata_block_header_cb_(temp_handle, temp_write_cb, node->data)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_WRITE_ERROR; + return false; + } + if(!write_metadata_block_data_cb_(temp_handle, temp_write_cb, node->data)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_WRITE_ERROR; + return false; + } + } + /*FLAC__ASSERT(fflush(), ftello() == chain->last_offset);*/ + + /* copy the file postfix (everything after the metadata) */ + if(0 != seek_cb(handle, chain->last_offset, SEEK_SET)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_SEEK_ERROR; + return false; + } + if(!copy_remaining_bytes_from_file_cb_(handle, read_cb, eof_cb, temp_handle, temp_write_cb, &status)) { + chain->status = get_equivalent_status_(status); + return false; + } + + return true; +} + +FLAC_API FLAC__Metadata_Chain *FLAC__metadata_chain_new(void) +{ + FLAC__Metadata_Chain *chain = calloc(1, sizeof(FLAC__Metadata_Chain)); + + if(0 != chain) + chain_init_(chain); + + return chain; +} + +FLAC_API void FLAC__metadata_chain_delete(FLAC__Metadata_Chain *chain) +{ + FLAC__ASSERT(0 != chain); + + chain_clear_(chain); + + free(chain); +} + +FLAC_API FLAC__Metadata_ChainStatus FLAC__metadata_chain_status(FLAC__Metadata_Chain *chain) +{ + FLAC__Metadata_ChainStatus status; + + FLAC__ASSERT(0 != chain); + + status = chain->status; + chain->status = FLAC__METADATA_CHAIN_STATUS_OK; + return status; +} + +static FLAC__bool chain_read_(FLAC__Metadata_Chain *chain, const char *filename, FLAC__bool is_ogg) +{ + FILE *file; + FLAC__bool ret; + + FLAC__ASSERT(0 != chain); + FLAC__ASSERT(0 != filename); + + chain_clear_(chain); + + if(0 == (chain->filename = strdup(filename))) { + chain->status = FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR; + return false; + } + + chain->is_ogg = is_ogg; + + if(0 == (file = flac_fopen(filename, "rb"))) { + chain->status = FLAC__METADATA_CHAIN_STATUS_ERROR_OPENING_FILE; + return false; + } + + /* the function also sets chain->status for us */ + ret = is_ogg? + chain_read_ogg_cb_(chain, file, (FLAC__IOCallback_Read)fread) : + chain_read_cb_(chain, file, (FLAC__IOCallback_Read)fread, fseek_wrapper_, ftell_wrapper_) + ; + + fclose(file); + + return ret; +} + +FLAC_API FLAC__bool FLAC__metadata_chain_read(FLAC__Metadata_Chain *chain, const char *filename) +{ + return chain_read_(chain, filename, /*is_ogg=*/false); +} + +/*@@@@add to tests*/ +FLAC_API FLAC__bool FLAC__metadata_chain_read_ogg(FLAC__Metadata_Chain *chain, const char *filename) +{ + return chain_read_(chain, filename, /*is_ogg=*/true); +} + +static FLAC__bool chain_read_with_callbacks_(FLAC__Metadata_Chain *chain, FLAC__IOHandle handle, FLAC__IOCallbacks callbacks, FLAC__bool is_ogg) +{ + FLAC__bool ret; + + FLAC__ASSERT(0 != chain); + + chain_clear_(chain); + + if (0 == callbacks.read || 0 == callbacks.seek || 0 == callbacks.tell) { + chain->status = FLAC__METADATA_CHAIN_STATUS_INVALID_CALLBACKS; + return false; + } + + chain->is_ogg = is_ogg; + + /* rewind */ + if(0 != callbacks.seek(handle, 0, SEEK_SET)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_SEEK_ERROR; + return false; + } + + /* the function also sets chain->status for us */ + ret = is_ogg? + chain_read_ogg_cb_(chain, handle, callbacks.read) : + chain_read_cb_(chain, handle, callbacks.read, callbacks.seek, callbacks.tell) + ; + + return ret; +} + +FLAC_API FLAC__bool FLAC__metadata_chain_read_with_callbacks(FLAC__Metadata_Chain *chain, FLAC__IOHandle handle, FLAC__IOCallbacks callbacks) +{ + return chain_read_with_callbacks_(chain, handle, callbacks, /*is_ogg=*/false); +} + +/*@@@@add to tests*/ +FLAC_API FLAC__bool FLAC__metadata_chain_read_ogg_with_callbacks(FLAC__Metadata_Chain *chain, FLAC__IOHandle handle, FLAC__IOCallbacks callbacks) +{ + return chain_read_with_callbacks_(chain, handle, callbacks, /*is_ogg=*/true); +} + +typedef enum { + LBS_NONE = 0, + LBS_SIZE_CHANGED, + LBS_BLOCK_ADDED, + LBS_BLOCK_REMOVED +} LastBlockState; + +FLAC_API FLAC__bool FLAC__metadata_chain_check_if_tempfile_needed(FLAC__Metadata_Chain *chain, FLAC__bool use_padding) +{ + /* This does all the same checks that are in chain_prepare_for_write_() + * but doesn't actually alter the chain. Make sure to update the logic + * here if chain_prepare_for_write_() changes. + */ + FLAC__off_t current_length; + LastBlockState lbs_state = LBS_NONE; + unsigned lbs_size = 0; + + FLAC__ASSERT(0 != chain); + + current_length = chain_calculate_length_(chain); + + if(use_padding) { + const FLAC__Metadata_Node * const node = chain->tail; + /* if the metadata shrank and the last block is padding, we just extend the last padding block */ + if(current_length < chain->initial_length && node->data->type == FLAC__METADATA_TYPE_PADDING) { + lbs_state = LBS_SIZE_CHANGED; + lbs_size = node->data->length + (chain->initial_length - current_length); + } + /* if the metadata shrank more than 4 bytes then there's room to add another padding block */ + else if(current_length + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH <= chain->initial_length) { + lbs_state = LBS_BLOCK_ADDED; + lbs_size = chain->initial_length - (current_length + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH); + } + /* if the metadata grew but the last block is padding, try cutting the padding to restore the original length so we don't have to rewrite the whole file */ + else if(current_length > chain->initial_length) { + const FLAC__off_t delta = current_length - chain->initial_length; + if(node->data->type == FLAC__METADATA_TYPE_PADDING) { + /* if the delta is exactly the size of the last padding block, remove the padding block */ + if((FLAC__off_t)node->data->length + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH == delta) { + lbs_state = LBS_BLOCK_REMOVED; + lbs_size = 0; + } + /* if there is at least 'delta' bytes of padding, trim the padding down */ + else if((FLAC__off_t)node->data->length >= delta) { + lbs_state = LBS_SIZE_CHANGED; + lbs_size = node->data->length - delta; + } + } + } + } + + current_length = 0; + /* check sizes of all metadata blocks; reduce padding size if necessary */ + { + const FLAC__Metadata_Node *node; + for(node = chain->head; node; node = node->next) { + unsigned block_len = node->data->length; + if(node == chain->tail) { + if(lbs_state == LBS_BLOCK_REMOVED) + continue; + else if(lbs_state == LBS_SIZE_CHANGED) + block_len = lbs_size; + } + if(block_len >= (1u << FLAC__STREAM_METADATA_LENGTH_LEN)) { + if(node->data->type == FLAC__METADATA_TYPE_PADDING) + block_len = (1u << FLAC__STREAM_METADATA_LENGTH_LEN) - 1; + else + return false /* the return value doesn't matter */; + } + current_length += (FLAC__STREAM_METADATA_HEADER_LENGTH + block_len); + } + + if(lbs_state == LBS_BLOCK_ADDED) { + /* test added padding block */ + unsigned block_len = lbs_size; + if(block_len >= (1u << FLAC__STREAM_METADATA_LENGTH_LEN)) + block_len = (1u << FLAC__STREAM_METADATA_LENGTH_LEN) - 1; + current_length += (FLAC__STREAM_METADATA_HEADER_LENGTH + block_len); + } + } + + return (current_length != chain->initial_length); +} + +FLAC_API FLAC__bool FLAC__metadata_chain_write(FLAC__Metadata_Chain *chain, FLAC__bool use_padding, FLAC__bool preserve_file_stats) +{ + struct flac_stat_s stats; + const char *tempfile_path_prefix = 0; + FLAC__off_t current_length; + + FLAC__ASSERT(0 != chain); + + if (chain->is_ogg) { /* cannot write back to Ogg FLAC yet */ + chain->status = FLAC__METADATA_CHAIN_STATUS_INTERNAL_ERROR; + return false; + } + + if (0 == chain->filename) { + chain->status = FLAC__METADATA_CHAIN_STATUS_READ_WRITE_MISMATCH; + return false; + } + + current_length = chain_prepare_for_write_(chain, use_padding); + + /* a return value of 0 means there was an error; chain->status is already set */ + if (0 == current_length) + return false; + + if(preserve_file_stats) + get_file_stats_(chain->filename, &stats); + + if(current_length == chain->initial_length) { + if(!chain_rewrite_metadata_in_place_(chain)) + return false; + } + else { + if(!chain_rewrite_file_(chain, tempfile_path_prefix)) + return false; + + /* recompute lengths and offsets */ + { + const FLAC__Metadata_Node *node; + chain->initial_length = current_length; + chain->last_offset = chain->first_offset; + for(node = chain->head; node; node = node->next) + chain->last_offset += (FLAC__STREAM_METADATA_HEADER_LENGTH + node->data->length); + } + } + + if(preserve_file_stats) + set_file_stats_(chain->filename, &stats); + + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_chain_write_with_callbacks(FLAC__Metadata_Chain *chain, FLAC__bool use_padding, FLAC__IOHandle handle, FLAC__IOCallbacks callbacks) +{ + FLAC__off_t current_length; + + FLAC__ASSERT(0 != chain); + + if (chain->is_ogg) { /* cannot write back to Ogg FLAC yet */ + chain->status = FLAC__METADATA_CHAIN_STATUS_INTERNAL_ERROR; + return false; + } + + if (0 != chain->filename) { + chain->status = FLAC__METADATA_CHAIN_STATUS_READ_WRITE_MISMATCH; + return false; + } + + if (0 == callbacks.write || 0 == callbacks.seek) { + chain->status = FLAC__METADATA_CHAIN_STATUS_INVALID_CALLBACKS; + return false; + } + + if (FLAC__metadata_chain_check_if_tempfile_needed(chain, use_padding)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_WRONG_WRITE_CALL; + return false; + } + + current_length = chain_prepare_for_write_(chain, use_padding); + + /* a return value of 0 means there was an error; chain->status is already set */ + if (0 == current_length) + return false; + + FLAC__ASSERT(current_length == chain->initial_length); + + return chain_rewrite_metadata_in_place_cb_(chain, handle, callbacks.write, callbacks.seek); +} + +FLAC_API FLAC__bool FLAC__metadata_chain_write_with_callbacks_and_tempfile(FLAC__Metadata_Chain *chain, FLAC__bool use_padding, FLAC__IOHandle handle, FLAC__IOCallbacks callbacks, FLAC__IOHandle temp_handle, FLAC__IOCallbacks temp_callbacks) +{ + FLAC__off_t current_length; + + FLAC__ASSERT(0 != chain); + + if (chain->is_ogg) { /* cannot write back to Ogg FLAC yet */ + chain->status = FLAC__METADATA_CHAIN_STATUS_INTERNAL_ERROR; + return false; + } + + if (0 != chain->filename) { + chain->status = FLAC__METADATA_CHAIN_STATUS_READ_WRITE_MISMATCH; + return false; + } + + if (0 == callbacks.read || 0 == callbacks.seek || 0 == callbacks.eof) { + chain->status = FLAC__METADATA_CHAIN_STATUS_INVALID_CALLBACKS; + return false; + } + if (0 == temp_callbacks.write) { + chain->status = FLAC__METADATA_CHAIN_STATUS_INVALID_CALLBACKS; + return false; + } + + if (!FLAC__metadata_chain_check_if_tempfile_needed(chain, use_padding)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_WRONG_WRITE_CALL; + return false; + } + + current_length = chain_prepare_for_write_(chain, use_padding); + + /* a return value of 0 means there was an error; chain->status is already set */ + if (0 == current_length) + return false; + + FLAC__ASSERT(current_length != chain->initial_length); + + /* rewind */ + if(0 != callbacks.seek(handle, 0, SEEK_SET)) { + chain->status = FLAC__METADATA_CHAIN_STATUS_SEEK_ERROR; + return false; + } + + if(!chain_rewrite_file_cb_(chain, handle, callbacks.read, callbacks.seek, callbacks.eof, temp_handle, temp_callbacks.write)) + return false; + + /* recompute lengths and offsets */ + { + const FLAC__Metadata_Node *node; + chain->initial_length = current_length; + chain->last_offset = chain->first_offset; + for(node = chain->head; node; node = node->next) + chain->last_offset += (FLAC__STREAM_METADATA_HEADER_LENGTH + node->data->length); + } + + return true; +} + +FLAC_API void FLAC__metadata_chain_merge_padding(FLAC__Metadata_Chain *chain) +{ + FLAC__Metadata_Node *node; + + FLAC__ASSERT(0 != chain); + + for(node = chain->head; node; ) { + if(!chain_merge_adjacent_padding_(chain, node)) + node = node->next; + } +} + +FLAC_API void FLAC__metadata_chain_sort_padding(FLAC__Metadata_Chain *chain) +{ + FLAC__Metadata_Node *node, *save; + unsigned i; + + FLAC__ASSERT(0 != chain); + + /* + * Don't try and be too smart... this simple algo is good enough for + * the small number of nodes that we deal with. + */ + for(i = 0, node = chain->head; i < chain->nodes; i++) { + if(node->data->type == FLAC__METADATA_TYPE_PADDING) { + save = node->next; + chain_remove_node_(chain, node); + chain_append_node_(chain, node); + node = save; + } + else { + node = node->next; + } + } + + FLAC__metadata_chain_merge_padding(chain); +} + + +FLAC_API FLAC__Metadata_Iterator *FLAC__metadata_iterator_new(void) +{ + FLAC__Metadata_Iterator *iterator = calloc(1, sizeof(FLAC__Metadata_Iterator)); + + /* calloc() implies: + iterator->current = 0; + iterator->chain = 0; + */ + + return iterator; +} + +FLAC_API void FLAC__metadata_iterator_delete(FLAC__Metadata_Iterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + + free(iterator); +} + +FLAC_API void FLAC__metadata_iterator_init(FLAC__Metadata_Iterator *iterator, FLAC__Metadata_Chain *chain) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != chain); + FLAC__ASSERT(0 != chain->head); + + iterator->chain = chain; + iterator->current = chain->head; +} + +FLAC_API FLAC__bool FLAC__metadata_iterator_next(FLAC__Metadata_Iterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + + if(0 == iterator->current || 0 == iterator->current->next) + return false; + + iterator->current = iterator->current->next; + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_iterator_prev(FLAC__Metadata_Iterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + + if(0 == iterator->current || 0 == iterator->current->prev) + return false; + + iterator->current = iterator->current->prev; + return true; +} + +FLAC_API FLAC__MetadataType FLAC__metadata_iterator_get_block_type(const FLAC__Metadata_Iterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->current); + FLAC__ASSERT(0 != iterator->current->data); + + return iterator->current->data->type; +} + +FLAC_API FLAC__StreamMetadata *FLAC__metadata_iterator_get_block(FLAC__Metadata_Iterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->current); + + return iterator->current->data; +} + +FLAC_API FLAC__bool FLAC__metadata_iterator_set_block(FLAC__Metadata_Iterator *iterator, FLAC__StreamMetadata *block) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != block); + return FLAC__metadata_iterator_delete_block(iterator, false) && FLAC__metadata_iterator_insert_block_after(iterator, block); +} + +FLAC_API FLAC__bool FLAC__metadata_iterator_delete_block(FLAC__Metadata_Iterator *iterator, FLAC__bool replace_with_padding) +{ + FLAC__Metadata_Node *save; + + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->current); + + if(0 == iterator->current->prev) { + FLAC__ASSERT(iterator->current->data->type == FLAC__METADATA_TYPE_STREAMINFO); + return false; + } + + save = iterator->current->prev; + + if(replace_with_padding) { + FLAC__metadata_object_delete_data(iterator->current->data); + iterator->current->data->type = FLAC__METADATA_TYPE_PADDING; + } + else { + chain_delete_node_(iterator->chain, iterator->current); + } + + iterator->current = save; + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_iterator_insert_block_before(FLAC__Metadata_Iterator *iterator, FLAC__StreamMetadata *block) +{ + FLAC__Metadata_Node *node; + + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->current); + FLAC__ASSERT(0 != block); + + if(block->type == FLAC__METADATA_TYPE_STREAMINFO) + return false; + + if(0 == iterator->current->prev) { + FLAC__ASSERT(iterator->current->data->type == FLAC__METADATA_TYPE_STREAMINFO); + return false; + } + + if(0 == (node = node_new_())) + return false; + + node->data = block; + iterator_insert_node_(iterator, node); + iterator->current = node; + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_iterator_insert_block_after(FLAC__Metadata_Iterator *iterator, FLAC__StreamMetadata *block) +{ + FLAC__Metadata_Node *node; + + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->current); + FLAC__ASSERT(0 != block); + + if(block->type == FLAC__METADATA_TYPE_STREAMINFO) + return false; + + if(0 == (node = node_new_())) + return false; + + node->data = block; + iterator_insert_node_after_(iterator, node); + iterator->current = node; + return true; +} + + +/**************************************************************************** + * + * Local function definitions + * + ***************************************************************************/ + +void pack_uint32_(FLAC__uint32 val, FLAC__byte *b, unsigned bytes) +{ + unsigned i; + + b += bytes; + + for(i = 0; i < bytes; i++) { + *(--b) = (FLAC__byte)(val & 0xff); + val >>= 8; + } +} + +void pack_uint32_little_endian_(FLAC__uint32 val, FLAC__byte *b, unsigned bytes) +{ + unsigned i; + + for(i = 0; i < bytes; i++) { + *(b++) = (FLAC__byte)(val & 0xff); + val >>= 8; + } +} + +void pack_uint64_(FLAC__uint64 val, FLAC__byte *b, unsigned bytes) +{ + unsigned i; + + b += bytes; + + for(i = 0; i < bytes; i++) { + *(--b) = (FLAC__byte)(val & 0xff); + val >>= 8; + } +} + +FLAC__uint32 unpack_uint32_(FLAC__byte *b, unsigned bytes) +{ + FLAC__uint32 ret = 0; + unsigned i; + + for(i = 0; i < bytes; i++) + ret = (ret << 8) | (FLAC__uint32)(*b++); + + return ret; +} + +FLAC__uint32 unpack_uint32_little_endian_(FLAC__byte *b, unsigned bytes) +{ + FLAC__uint32 ret = 0; + unsigned i; + + b += bytes; + + for(i = 0; i < bytes; i++) + ret = (ret << 8) | (FLAC__uint32)(*--b); + + return ret; +} + +FLAC__uint64 unpack_uint64_(FLAC__byte *b, unsigned bytes) +{ + FLAC__uint64 ret = 0; + unsigned i; + + for(i = 0; i < bytes; i++) + ret = (ret << 8) | (FLAC__uint64)(*b++); + + return ret; +} + +FLAC__bool read_metadata_block_header_(FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + + if(!read_metadata_block_header_cb_((FLAC__IOHandle)iterator->file, (FLAC__IOCallback_Read)fread, &iterator->is_last, &iterator->type, &iterator->length)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + return false; + } + + return true; +} + +FLAC__bool read_metadata_block_data_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block) +{ + FLAC__ASSERT(0 != iterator); + FLAC__ASSERT(0 != iterator->file); + + iterator->status = read_metadata_block_data_cb_((FLAC__IOHandle)iterator->file, (FLAC__IOCallback_Read)fread, fseek_wrapper_, block); + + return (iterator->status == FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK); +} + +FLAC__bool read_metadata_block_header_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__bool *is_last, FLAC__MetadataType *type, unsigned *length) +{ + FLAC__byte raw_header[FLAC__STREAM_METADATA_HEADER_LENGTH]; + + if(read_cb(raw_header, 1, FLAC__STREAM_METADATA_HEADER_LENGTH, handle) != FLAC__STREAM_METADATA_HEADER_LENGTH) + return false; + + *is_last = raw_header[0] & 0x80? true : false; + *type = (FLAC__MetadataType)(raw_header[0] & 0x7f); + *length = unpack_uint32_(raw_header + 1, 3); + + /* Note that we don't check: + * if(iterator->type >= FLAC__METADATA_TYPE_UNDEFINED) + * we just will read in an opaque block + */ + + return true; +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata *block) +{ + switch(block->type) { + case FLAC__METADATA_TYPE_STREAMINFO: + return read_metadata_block_data_streaminfo_cb_(handle, read_cb, &block->data.stream_info); + case FLAC__METADATA_TYPE_PADDING: + return read_metadata_block_data_padding_cb_(handle, seek_cb, &block->data.padding, block->length); + case FLAC__METADATA_TYPE_APPLICATION: + return read_metadata_block_data_application_cb_(handle, read_cb, &block->data.application, block->length); + case FLAC__METADATA_TYPE_SEEKTABLE: + return read_metadata_block_data_seektable_cb_(handle, read_cb, &block->data.seek_table, block->length); + case FLAC__METADATA_TYPE_VORBIS_COMMENT: + return read_metadata_block_data_vorbis_comment_cb_(handle, read_cb, seek_cb, &block->data.vorbis_comment, block->length); + case FLAC__METADATA_TYPE_CUESHEET: + return read_metadata_block_data_cuesheet_cb_(handle, read_cb, &block->data.cue_sheet); + case FLAC__METADATA_TYPE_PICTURE: + return read_metadata_block_data_picture_cb_(handle, read_cb, &block->data.picture); + default: + return read_metadata_block_data_unknown_cb_(handle, read_cb, &block->data.unknown, block->length); + } +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_streaminfo_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_StreamInfo *block) +{ + FLAC__byte buffer[FLAC__STREAM_METADATA_STREAMINFO_LENGTH], *b; + + if(read_cb(buffer, 1, FLAC__STREAM_METADATA_STREAMINFO_LENGTH, handle) != FLAC__STREAM_METADATA_STREAMINFO_LENGTH) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + + b = buffer; + + /* we are using hardcoded numbers for simplicity but we should + * probably eventually write a bit-level unpacker and use the + * _STREAMINFO_ constants. + */ + block->min_blocksize = unpack_uint32_(b, 2); b += 2; + block->max_blocksize = unpack_uint32_(b, 2); b += 2; + block->min_framesize = unpack_uint32_(b, 3); b += 3; + block->max_framesize = unpack_uint32_(b, 3); b += 3; + block->sample_rate = (unpack_uint32_(b, 2) << 4) | ((unsigned)(b[2] & 0xf0) >> 4); + block->channels = (unsigned)((b[2] & 0x0e) >> 1) + 1; + block->bits_per_sample = ((((unsigned)(b[2] & 0x01)) << 4) | (((unsigned)(b[3] & 0xf0)) >> 4)) + 1; + block->total_samples = (((FLAC__uint64)(b[3] & 0x0f)) << 32) | unpack_uint64_(b+4, 4); + memcpy(block->md5sum, b+8, 16); + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_Padding *block, unsigned block_length) +{ + (void)block; /* nothing to do; we don't care about reading the padding bytes */ + + if(0 != seek_cb(handle, block_length, SEEK_CUR)) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Application *block, unsigned block_length) +{ + const unsigned id_bytes = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8; + + if(read_cb(block->id, 1, id_bytes, handle) != id_bytes) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + + if(block_length < id_bytes) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + + block_length -= id_bytes; + + if(block_length == 0) { + block->data = 0; + } + else { + if(0 == (block->data = malloc(block_length))) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + + if(read_cb(block->data, 1, block_length, handle) != block_length) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + } + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_seektable_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_SeekTable *block, unsigned block_length) +{ + unsigned i; + FLAC__byte buffer[FLAC__STREAM_METADATA_SEEKPOINT_LENGTH]; + + FLAC__ASSERT(block_length % FLAC__STREAM_METADATA_SEEKPOINT_LENGTH == 0); + + block->num_points = block_length / FLAC__STREAM_METADATA_SEEKPOINT_LENGTH; + + if(block->num_points == 0) + block->points = 0; + else if(0 == (block->points = safe_malloc_mul_2op_p(block->num_points, /*times*/sizeof(FLAC__StreamMetadata_SeekPoint)))) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + + for(i = 0; i < block->num_points; i++) { + if(read_cb(buffer, 1, FLAC__STREAM_METADATA_SEEKPOINT_LENGTH, handle) != FLAC__STREAM_METADATA_SEEKPOINT_LENGTH) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + /* some MAGIC NUMBERs here */ + block->points[i].sample_number = unpack_uint64_(buffer, 8); + block->points[i].stream_offset = unpack_uint64_(buffer+8, 8); + block->points[i].frame_samples = unpack_uint32_(buffer+16, 2); + } + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_entry_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_VorbisComment_Entry *entry, unsigned max_length) +{ + const unsigned entry_length_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN / 8; + FLAC__byte buffer[4]; /* magic number is asserted below */ + + FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN / 8 == sizeof(buffer)); + + if(max_length < entry_length_len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_BAD_METADATA; + + max_length -= entry_length_len; + if(read_cb(buffer, 1, entry_length_len, handle) != entry_length_len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + entry->length = unpack_uint32_little_endian_(buffer, entry_length_len); + if(max_length < entry->length) { + entry->length = 0; + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_BAD_METADATA; + } else max_length -= entry->length; + + if(0 != entry->entry) + free(entry->entry); + + if(entry->length == 0) { + entry->entry = 0; + } + else { + if(0 == (entry->entry = safe_malloc_add_2op_(entry->length, /*+*/1))) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + + if(read_cb(entry->entry, 1, entry->length, handle) != entry->length) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + + entry->entry[entry->length] = '\0'; + } + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_VorbisComment *block, unsigned block_length) +{ + unsigned i; + FLAC__Metadata_SimpleIteratorStatus status; + const unsigned num_comments_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN / 8; + FLAC__byte buffer[4]; /* magic number is asserted below */ + + FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN / 8 == sizeof(buffer)); + + status = read_metadata_block_data_vorbis_comment_entry_cb_(handle, read_cb, &(block->vendor_string), block_length); + if(block_length >= 4) + block_length -= 4; + if(status == FLAC__METADATA_SIMPLE_ITERATOR_STATUS_BAD_METADATA) + goto skip; + else if(status != FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK) + return status; + block_length -= block->vendor_string.length; + + if(block_length < num_comments_len) goto skip; else block_length -= num_comments_len; + if(read_cb(buffer, 1, num_comments_len, handle) != num_comments_len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + block->num_comments = unpack_uint32_little_endian_(buffer, num_comments_len); + + if(block->num_comments == 0) { + block->comments = 0; + } + else if(0 == (block->comments = calloc(block->num_comments, sizeof(FLAC__StreamMetadata_VorbisComment_Entry)))) { + block->num_comments = 0; + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + } + + for(i = 0; i < block->num_comments; i++) { + status = read_metadata_block_data_vorbis_comment_entry_cb_(handle, read_cb, block->comments + i, block_length); + if(block_length >= 4) block_length -= 4; + if(status == FLAC__METADATA_SIMPLE_ITERATOR_STATUS_BAD_METADATA) { + block->num_comments = i; + goto skip; + } + else if(status != FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK) return status; + block_length -= block->comments[i].length; + } + + skip: + if(block_length > 0) { + /* bad metadata */ + if(0 != seek_cb(handle, block_length, SEEK_CUR)) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + } + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cuesheet_track_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_CueSheet_Track *track) +{ + unsigned i, len; + FLAC__byte buffer[32]; /* asserted below that this is big enough */ + + FLAC__ASSERT(sizeof(buffer) >= sizeof(FLAC__uint64)); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= (FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN) / 8); + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + track->offset = unpack_uint64_(buffer, len); + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + track->number = (FLAC__byte)unpack_uint32_(buffer, len); + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN / 8; + if(read_cb(track->isrc, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + + FLAC__ASSERT((FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN) % 8 == 0); + len = (FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN) / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN == 1); + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN == 1); + track->type = buffer[0] >> 7; + track->pre_emphasis = (buffer[0] >> 6) & 1; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + track->num_indices = (FLAC__byte)unpack_uint32_(buffer, len); + + if(track->num_indices == 0) { + track->indices = 0; + } + else if(0 == (track->indices = calloc(track->num_indices, sizeof(FLAC__StreamMetadata_CueSheet_Index)))) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + + for(i = 0; i < track->num_indices; i++) { + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + track->indices[i].offset = unpack_uint64_(buffer, len); + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + track->indices[i].number = (FLAC__byte)unpack_uint32_(buffer, len); + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + } + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cuesheet_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_CueSheet *block) +{ + unsigned i, len; + FLAC__Metadata_SimpleIteratorStatus status; + FLAC__byte buffer[1024]; /* MSVC needs a constant expression so we put a magic number and assert */ + + FLAC__ASSERT((FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN + FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN)/8 <= sizeof(buffer)); + FLAC__ASSERT(sizeof(FLAC__uint64) <= sizeof(buffer)); + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN / 8; + if(read_cb(block->media_catalog_number, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + block->lead_in = unpack_uint64_(buffer, len); + + FLAC__ASSERT((FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN + FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN) % 8 == 0); + len = (FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN + FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN) / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + block->is_cd = buffer[0]&0x80? true : false; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + block->num_tracks = unpack_uint32_(buffer, len); + + if(block->num_tracks == 0) { + block->tracks = 0; + } + else if(0 == (block->tracks = calloc(block->num_tracks, sizeof(FLAC__StreamMetadata_CueSheet_Track)))) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + + for(i = 0; i < block->num_tracks; i++) { + if(FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK != (status = read_metadata_block_data_cuesheet_track_cb_(handle, read_cb, block->tracks + i))) + return status; + } + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_picture_cstring_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__byte **data, FLAC__uint32 *length, FLAC__uint32 length_len) +{ + FLAC__byte buffer[sizeof(FLAC__uint32)]; + + FLAC__ASSERT(0 != data); + FLAC__ASSERT(length_len%8 == 0); + + length_len /= 8; /* convert to bytes */ + + FLAC__ASSERT(sizeof(buffer) >= length_len); + + if(read_cb(buffer, 1, length_len, handle) != length_len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + *length = unpack_uint32_(buffer, length_len); + + if(0 != *data) + free(*data); + + if(0 == (*data = safe_malloc_add_2op_(*length, /*+*/1))) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + + if(*length > 0) { + if(read_cb(*data, 1, *length, handle) != *length) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + } + + (*data)[*length] = '\0'; + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_picture_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Picture *block) +{ + FLAC__Metadata_SimpleIteratorStatus status; + FLAC__byte buffer[4]; /* asserted below that this is big enough */ + FLAC__uint32 len; + + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_TYPE_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_COLORS_LEN/8); + + FLAC__ASSERT(FLAC__STREAM_METADATA_PICTURE_TYPE_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_PICTURE_TYPE_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + block->type = (FLAC__StreamMetadata_Picture_Type)unpack_uint32_(buffer, len); + + if((status = read_metadata_block_data_picture_cstring_cb_(handle, read_cb, (FLAC__byte**)(&(block->mime_type)), &len, FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN)) != FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK) + return status; + + if((status = read_metadata_block_data_picture_cstring_cb_(handle, read_cb, &(block->description), &len, FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN)) != FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK) + return status; + + FLAC__ASSERT(FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + block->width = unpack_uint32_(buffer, len); + + FLAC__ASSERT(FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + block->height = unpack_uint32_(buffer, len); + + FLAC__ASSERT(FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + block->depth = unpack_uint32_(buffer, len); + + FLAC__ASSERT(FLAC__STREAM_METADATA_PICTURE_COLORS_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_PICTURE_COLORS_LEN / 8; + if(read_cb(buffer, 1, len, handle) != len) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + block->colors = unpack_uint32_(buffer, len); + + /* for convenience we use read_metadata_block_data_picture_cstring_cb_() even though it adds an extra terminating NUL we don't use */ + if((status = read_metadata_block_data_picture_cstring_cb_(handle, read_cb, &(block->data), &(block->data_length), FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN)) != FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK) + return status; + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Unknown *block, unsigned block_length) +{ + if(block_length == 0) { + block->data = 0; + } + else { + if(0 == (block->data = malloc(block_length))) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + + if(read_cb(block->data, 1, block_length, handle) != block_length) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + } + + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; +} + +FLAC__bool write_metadata_block_header_(FILE *file, FLAC__Metadata_SimpleIteratorStatus *status, const FLAC__StreamMetadata *block) +{ + FLAC__ASSERT(0 != file); + FLAC__ASSERT(0 != status); + + if(!write_metadata_block_header_cb_((FLAC__IOHandle)file, (FLAC__IOCallback_Write)fwrite, block)) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_WRITE_ERROR; + return false; + } + + return true; +} + +FLAC__bool write_metadata_block_data_(FILE *file, FLAC__Metadata_SimpleIteratorStatus *status, const FLAC__StreamMetadata *block) +{ + FLAC__ASSERT(0 != file); + FLAC__ASSERT(0 != status); + + if (write_metadata_block_data_cb_((FLAC__IOHandle)file, (FLAC__IOCallback_Write)fwrite, block)) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK; + return true; + } + else { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_WRITE_ERROR; + return false; + } +} + +FLAC__bool write_metadata_block_header_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata *block) +{ + FLAC__byte buffer[FLAC__STREAM_METADATA_HEADER_LENGTH]; + + FLAC__ASSERT(block->length < (1u << FLAC__STREAM_METADATA_LENGTH_LEN)); + /* double protection */ + if(block->length >= (1u << FLAC__STREAM_METADATA_LENGTH_LEN)) + return false; + + buffer[0] = (block->is_last? 0x80 : 0) | (FLAC__byte)block->type; + pack_uint32_(block->length, buffer + 1, 3); + + if(write_cb(buffer, 1, FLAC__STREAM_METADATA_HEADER_LENGTH, handle) != FLAC__STREAM_METADATA_HEADER_LENGTH) + return false; + + return true; +} + +FLAC__bool write_metadata_block_data_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata *block) +{ + FLAC__ASSERT(0 != block); + + switch(block->type) { + case FLAC__METADATA_TYPE_STREAMINFO: + return write_metadata_block_data_streaminfo_cb_(handle, write_cb, &block->data.stream_info); + case FLAC__METADATA_TYPE_PADDING: + return write_metadata_block_data_padding_cb_(handle, write_cb, &block->data.padding, block->length); + case FLAC__METADATA_TYPE_APPLICATION: + return write_metadata_block_data_application_cb_(handle, write_cb, &block->data.application, block->length); + case FLAC__METADATA_TYPE_SEEKTABLE: + return write_metadata_block_data_seektable_cb_(handle, write_cb, &block->data.seek_table); + case FLAC__METADATA_TYPE_VORBIS_COMMENT: + return write_metadata_block_data_vorbis_comment_cb_(handle, write_cb, &block->data.vorbis_comment); + case FLAC__METADATA_TYPE_CUESHEET: + return write_metadata_block_data_cuesheet_cb_(handle, write_cb, &block->data.cue_sheet); + case FLAC__METADATA_TYPE_PICTURE: + return write_metadata_block_data_picture_cb_(handle, write_cb, &block->data.picture); + default: + return write_metadata_block_data_unknown_cb_(handle, write_cb, &block->data.unknown, block->length); + } +} + +FLAC__bool write_metadata_block_data_streaminfo_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_StreamInfo *block) +{ + FLAC__byte buffer[FLAC__STREAM_METADATA_STREAMINFO_LENGTH]; + const unsigned channels1 = block->channels - 1; + const unsigned bps1 = block->bits_per_sample - 1; + + /* we are using hardcoded numbers for simplicity but we should + * probably eventually write a bit-level packer and use the + * _STREAMINFO_ constants. + */ + pack_uint32_(block->min_blocksize, buffer, 2); + pack_uint32_(block->max_blocksize, buffer+2, 2); + pack_uint32_(block->min_framesize, buffer+4, 3); + pack_uint32_(block->max_framesize, buffer+7, 3); + buffer[10] = (block->sample_rate >> 12) & 0xff; + buffer[11] = (block->sample_rate >> 4) & 0xff; + buffer[12] = ((block->sample_rate & 0x0f) << 4) | (channels1 << 1) | (bps1 >> 4); + buffer[13] = (FLAC__byte)(((bps1 & 0x0f) << 4) | ((block->total_samples >> 32) & 0x0f)); + pack_uint32_((FLAC__uint32)block->total_samples, buffer+14, 4); + memcpy(buffer+18, block->md5sum, 16); + + if(write_cb(buffer, 1, FLAC__STREAM_METADATA_STREAMINFO_LENGTH, handle) != FLAC__STREAM_METADATA_STREAMINFO_LENGTH) + return false; + + return true; +} + +FLAC__bool write_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Padding *block, unsigned block_length) +{ + unsigned i, n = block_length; + FLAC__byte buffer[1024]; + + (void)block; + + memset(buffer, 0, 1024); + + for(i = 0; i < n/1024; i++) + if(write_cb(buffer, 1, 1024, handle) != 1024) + return false; + + n %= 1024; + + if(write_cb(buffer, 1, n, handle) != n) + return false; + + return true; +} + +FLAC__bool write_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Application *block, unsigned block_length) +{ + const unsigned id_bytes = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8; + + if(write_cb(block->id, 1, id_bytes, handle) != id_bytes) + return false; + + block_length -= id_bytes; + + if(write_cb(block->data, 1, block_length, handle) != block_length) + return false; + + return true; +} + +FLAC__bool write_metadata_block_data_seektable_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_SeekTable *block) +{ + unsigned i; + FLAC__byte buffer[FLAC__STREAM_METADATA_SEEKPOINT_LENGTH]; + + for(i = 0; i < block->num_points; i++) { + /* some MAGIC NUMBERs here */ + pack_uint64_(block->points[i].sample_number, buffer, 8); + pack_uint64_(block->points[i].stream_offset, buffer+8, 8); + pack_uint32_(block->points[i].frame_samples, buffer+16, 2); + if(write_cb(buffer, 1, FLAC__STREAM_METADATA_SEEKPOINT_LENGTH, handle) != FLAC__STREAM_METADATA_SEEKPOINT_LENGTH) + return false; + } + + return true; +} + +FLAC__bool write_metadata_block_data_vorbis_comment_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_VorbisComment *block) +{ + unsigned i; + const unsigned entry_length_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN / 8; + const unsigned num_comments_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN / 8; + FLAC__byte buffer[4]; /* magic number is asserted below */ + + FLAC__ASSERT(flac_max(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN, FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN) / 8 == sizeof(buffer)); + + pack_uint32_little_endian_(block->vendor_string.length, buffer, entry_length_len); + if(write_cb(buffer, 1, entry_length_len, handle) != entry_length_len) + return false; + if(write_cb(block->vendor_string.entry, 1, block->vendor_string.length, handle) != block->vendor_string.length) + return false; + + pack_uint32_little_endian_(block->num_comments, buffer, num_comments_len); + if(write_cb(buffer, 1, num_comments_len, handle) != num_comments_len) + return false; + + for(i = 0; i < block->num_comments; i++) { + pack_uint32_little_endian_(block->comments[i].length, buffer, entry_length_len); + if(write_cb(buffer, 1, entry_length_len, handle) != entry_length_len) + return false; + if(write_cb(block->comments[i].entry, 1, block->comments[i].length, handle) != block->comments[i].length) + return false; + } + + return true; +} + +FLAC__bool write_metadata_block_data_cuesheet_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_CueSheet *block) +{ + unsigned i, j, len; + FLAC__byte buffer[1024]; /* asserted below that this is big enough */ + + FLAC__ASSERT(sizeof(buffer) >= sizeof(FLAC__uint64)); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= (FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN + FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN)/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN/8); + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN / 8; + if(write_cb(block->media_catalog_number, 1, len, handle) != len) + return false; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN / 8; + pack_uint64_(block->lead_in, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + FLAC__ASSERT((FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN + FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN) % 8 == 0); + len = (FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN + FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN) / 8; + memset(buffer, 0, len); + if(block->is_cd) + buffer[0] |= 0x80; + if(write_cb(buffer, 1, len, handle) != len) + return false; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN / 8; + pack_uint32_(block->num_tracks, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + for(i = 0; i < block->num_tracks; i++) { + FLAC__StreamMetadata_CueSheet_Track *track = block->tracks + i; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN / 8; + pack_uint64_(track->offset, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN / 8; + pack_uint32_(track->number, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN / 8; + if(write_cb(track->isrc, 1, len, handle) != len) + return false; + + FLAC__ASSERT((FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN) % 8 == 0); + len = (FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN + FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN) / 8; + memset(buffer, 0, len); + buffer[0] = (track->type << 7) | (track->pre_emphasis << 6); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN / 8; + pack_uint32_(track->num_indices, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + for(j = 0; j < track->num_indices; j++) { + FLAC__StreamMetadata_CueSheet_Index *indx = track->indices + j; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN / 8; + pack_uint64_(indx->offset, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN / 8; + pack_uint32_(indx->number, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN % 8 == 0); + len = FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN / 8; + memset(buffer, 0, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + } + } + + return true; +} + +FLAC__bool write_metadata_block_data_picture_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Picture *block) +{ + unsigned len; + size_t slen; + FLAC__byte buffer[4]; /* magic number is asserted below */ + + FLAC__ASSERT(0 == FLAC__STREAM_METADATA_PICTURE_TYPE_LEN%8); + FLAC__ASSERT(0 == FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN%8); + FLAC__ASSERT(0 == FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN%8); + FLAC__ASSERT(0 == FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN%8); + FLAC__ASSERT(0 == FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN%8); + FLAC__ASSERT(0 == FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN%8); + FLAC__ASSERT(0 == FLAC__STREAM_METADATA_PICTURE_COLORS_LEN%8); + FLAC__ASSERT(0 == FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN%8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_TYPE_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_COLORS_LEN/8); + FLAC__ASSERT(sizeof(buffer) >= FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN/8); + + len = FLAC__STREAM_METADATA_PICTURE_TYPE_LEN/8; + pack_uint32_(block->type, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + len = FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN/8; + slen = strlen(block->mime_type); + pack_uint32_(slen, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + if(write_cb(block->mime_type, 1, slen, handle) != slen) + return false; + + len = FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN/8; + slen = strlen((const char *)block->description); + pack_uint32_(slen, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + if(write_cb(block->description, 1, slen, handle) != slen) + return false; + + len = FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN/8; + pack_uint32_(block->width, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + len = FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN/8; + pack_uint32_(block->height, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + len = FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN/8; + pack_uint32_(block->depth, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + len = FLAC__STREAM_METADATA_PICTURE_COLORS_LEN/8; + pack_uint32_(block->colors, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + + len = FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN/8; + pack_uint32_(block->data_length, buffer, len); + if(write_cb(buffer, 1, len, handle) != len) + return false; + if(write_cb(block->data, 1, block->data_length, handle) != block->data_length) + return false; + + return true; +} + +FLAC__bool write_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Unknown *block, unsigned block_length) +{ + if(write_cb(block->data, 1, block_length, handle) != block_length) + return false; + + return true; +} + +FLAC__bool write_metadata_block_stationary_(FLAC__Metadata_SimpleIterator *iterator, const FLAC__StreamMetadata *block) +{ + if(0 != fseeko(iterator->file, iterator->offset[iterator->depth], SEEK_SET)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + + if(!write_metadata_block_header_(iterator->file, &iterator->status, block)) + return false; + + if(!write_metadata_block_data_(iterator->file, &iterator->status, block)) + return false; + + if(0 != fseeko(iterator->file, iterator->offset[iterator->depth], SEEK_SET)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + + return read_metadata_block_header_(iterator); +} + +FLAC__bool write_metadata_block_stationary_with_padding_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, unsigned padding_length, FLAC__bool padding_is_last) +{ + FLAC__StreamMetadata *padding; + + if(0 != fseeko(iterator->file, iterator->offset[iterator->depth], SEEK_SET)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + + block->is_last = false; + + if(!write_metadata_block_header_(iterator->file, &iterator->status, block)) + return false; + + if(!write_metadata_block_data_(iterator->file, &iterator->status, block)) + return false; + + if(0 == (padding = FLAC__metadata_object_new(FLAC__METADATA_TYPE_PADDING))) + return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + + padding->is_last = padding_is_last; + padding->length = padding_length; + + if(!write_metadata_block_header_(iterator->file, &iterator->status, padding)) { + FLAC__metadata_object_delete(padding); + return false; + } + + if(!write_metadata_block_data_(iterator->file, &iterator->status, padding)) { + FLAC__metadata_object_delete(padding); + return false; + } + + FLAC__metadata_object_delete(padding); + + if(0 != fseeko(iterator->file, iterator->offset[iterator->depth], SEEK_SET)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + + return read_metadata_block_header_(iterator); +} + +FLAC__bool rewrite_whole_file_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, FLAC__bool append) +{ + FILE *tempfile = NULL; + char *tempfilename = NULL; + int fixup_is_last_code = 0; /* 0 => no need to change any is_last flags */ + FLAC__off_t fixup_is_last_flag_offset = -1; + + FLAC__ASSERT(0 != block || append == false); + + if(iterator->is_last) { + if(append) { + fixup_is_last_code = 1; /* 1 => clear the is_last flag at the following offset */ + fixup_is_last_flag_offset = iterator->offset[iterator->depth]; + } + else if(0 == block) { + simple_iterator_push_(iterator); + if(!FLAC__metadata_simple_iterator_prev(iterator)) { + (void)simple_iterator_pop_(iterator); + return false; + } + fixup_is_last_code = -1; /* -1 => set the is_last the flag at the following offset */ + fixup_is_last_flag_offset = iterator->offset[iterator->depth]; + if(!simple_iterator_pop_(iterator)) + return false; + } + } + + if(!simple_iterator_copy_file_prefix_(iterator, &tempfile, &tempfilename, append)) + return false; + + if(0 != block) { + if(!write_metadata_block_header_(tempfile, &iterator->status, block)) { + cleanup_tempfile_(&tempfile, &tempfilename); + return false; + } + + if(!write_metadata_block_data_(tempfile, &iterator->status, block)) { + cleanup_tempfile_(&tempfile, &tempfilename); + return false; + } + } + + if(!simple_iterator_copy_file_postfix_(iterator, &tempfile, &tempfilename, fixup_is_last_code, fixup_is_last_flag_offset, block==0)) + return false; + + if(append) + return FLAC__metadata_simple_iterator_next(iterator); + + return true; +} + +void simple_iterator_push_(FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(iterator->depth+1 < SIMPLE_ITERATOR_MAX_PUSH_DEPTH); + iterator->offset[iterator->depth+1] = iterator->offset[iterator->depth]; + iterator->depth++; +} + +FLAC__bool simple_iterator_pop_(FLAC__Metadata_SimpleIterator *iterator) +{ + FLAC__ASSERT(iterator->depth > 0); + iterator->depth--; + if(0 != fseeko(iterator->file, iterator->offset[iterator->depth], SEEK_SET)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + + return read_metadata_block_header_(iterator); +} + +/* return meanings: + * 0: ok + * 1: read error + * 2: seek error + * 3: not a FLAC file + */ +unsigned seek_to_first_metadata_block_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb) +{ + FLAC__byte buffer[4]; + size_t n; + unsigned i; + + FLAC__ASSERT(FLAC__STREAM_SYNC_LENGTH == sizeof(buffer)); + + /* skip any id3v2 tag */ + errno = 0; + n = read_cb(buffer, 1, 4, handle); + if(errno) + return 1; + else if(n != 4) + return 3; + else if(0 == memcmp(buffer, "ID3", 3)) { + unsigned tag_length = 0; + + /* skip to the tag length */ + if(seek_cb(handle, 2, SEEK_CUR) < 0) + return 2; + + /* read the length */ + for(i = 0; i < 4; i++) { + if(read_cb(buffer, 1, 1, handle) < 1 || buffer[0] & 0x80) + return 1; + tag_length <<= 7; + tag_length |= (buffer[0] & 0x7f); + } + + /* skip the rest of the tag */ + if(seek_cb(handle, tag_length, SEEK_CUR) < 0) + return 2; + + /* read the stream sync code */ + errno = 0; + n = read_cb(buffer, 1, 4, handle); + if(errno) + return 1; + else if(n != 4) + return 3; + } + + /* check for the fLaC signature */ + if(0 == memcmp(FLAC__STREAM_SYNC_STRING, buffer, FLAC__STREAM_SYNC_LENGTH)) + return 0; + else + return 3; +} + +unsigned seek_to_first_metadata_block_(FILE *f) +{ + return seek_to_first_metadata_block_cb_((FLAC__IOHandle)f, (FLAC__IOCallback_Read)fread, fseek_wrapper_); +} + +FLAC__bool simple_iterator_copy_file_prefix_(FLAC__Metadata_SimpleIterator *iterator, FILE **tempfile, char **tempfilename, FLAC__bool append) +{ + const FLAC__off_t offset_end = append? iterator->offset[iterator->depth] + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH + (FLAC__off_t)iterator->length : iterator->offset[iterator->depth]; + + if(0 != fseeko(iterator->file, 0, SEEK_SET)) { + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + if(!open_tempfile_(iterator->filename, iterator->tempfile_path_prefix, tempfile, tempfilename, &iterator->status)) { + cleanup_tempfile_(tempfile, tempfilename); + return false; + } + if(!copy_n_bytes_from_file_(iterator->file, *tempfile, offset_end, &iterator->status)) { + cleanup_tempfile_(tempfile, tempfilename); + return false; + } + + return true; +} + +FLAC__bool simple_iterator_copy_file_postfix_(FLAC__Metadata_SimpleIterator *iterator, FILE **tempfile, char **tempfilename, int fixup_is_last_code, FLAC__off_t fixup_is_last_flag_offset, FLAC__bool backup) +{ + FLAC__off_t save_offset = iterator->offset[iterator->depth]; + FLAC__ASSERT(0 != *tempfile); + + if(0 != fseeko(iterator->file, save_offset + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH + (FLAC__off_t)iterator->length, SEEK_SET)) { + cleanup_tempfile_(tempfile, tempfilename); + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + if(!copy_remaining_bytes_from_file_(iterator->file, *tempfile, &iterator->status)) { + cleanup_tempfile_(tempfile, tempfilename); + return false; + } + + if(fixup_is_last_code != 0) { + /* + * if code == 1, it means a block was appended to the end so + * we have to clear the is_last flag of the previous block + * if code == -1, it means the last block was deleted so + * we have to set the is_last flag of the previous block + */ + /* MAGIC NUMBERs here; we know the is_last flag is the high bit of the byte at this location */ + FLAC__byte x; + if(0 != fseeko(*tempfile, fixup_is_last_flag_offset, SEEK_SET)) { + cleanup_tempfile_(tempfile, tempfilename); + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + if(fread(&x, 1, 1, *tempfile) != 1) { + cleanup_tempfile_(tempfile, tempfilename); + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + return false; + } + if(fixup_is_last_code > 0) { + FLAC__ASSERT(x & 0x80); + x &= 0x7f; + } + else { + FLAC__ASSERT(!(x & 0x80)); + x |= 0x80; + } + if(0 != fseeko(*tempfile, fixup_is_last_flag_offset, SEEK_SET)) { + cleanup_tempfile_(tempfile, tempfilename); + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR; + return false; + } + if(local__fwrite(&x, 1, 1, *tempfile) != 1) { + cleanup_tempfile_(tempfile, tempfilename); + iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_WRITE_ERROR; + return false; + } + } + + (void)fclose(iterator->file); + + if(!transport_tempfile_(iterator->filename, tempfile, tempfilename, &iterator->status)) + return false; + + if(iterator->has_stats) + set_file_stats_(iterator->filename, &iterator->stats); + + if(!simple_iterator_prime_input_(iterator, !iterator->is_writable)) + return false; + if(backup) { + while(iterator->offset[iterator->depth] + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH + (FLAC__off_t)iterator->length < save_offset) + if(!FLAC__metadata_simple_iterator_next(iterator)) + return false; + return true; + } + else { + /* move the iterator to it's original block faster by faking a push, then doing a pop_ */ + FLAC__ASSERT(iterator->depth == 0); + iterator->offset[0] = save_offset; + iterator->depth++; + return simple_iterator_pop_(iterator); + } +} + +FLAC__bool copy_n_bytes_from_file_(FILE *file, FILE *tempfile, FLAC__off_t bytes, FLAC__Metadata_SimpleIteratorStatus *status) +{ + FLAC__byte buffer[8192]; + size_t n; + + FLAC__ASSERT(bytes >= 0); + while(bytes > 0) { + n = flac_min(sizeof(buffer), (size_t)bytes); + if(fread(buffer, 1, n, file) != n) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + return false; + } + if(local__fwrite(buffer, 1, n, tempfile) != n) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_WRITE_ERROR; + return false; + } + bytes -= n; + } + + return true; +} + +FLAC__bool copy_n_bytes_from_file_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOHandle temp_handle, FLAC__IOCallback_Write temp_write_cb, FLAC__off_t bytes, FLAC__Metadata_SimpleIteratorStatus *status) +{ + FLAC__byte buffer[8192]; + size_t n; + + FLAC__ASSERT(bytes >= 0); + while(bytes > 0) { + n = flac_min(sizeof(buffer), (size_t)bytes); + if(read_cb(buffer, 1, n, handle) != n) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + return false; + } + if(temp_write_cb(buffer, 1, n, temp_handle) != n) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_WRITE_ERROR; + return false; + } + bytes -= n; + } + + return true; +} + +FLAC__bool copy_remaining_bytes_from_file_(FILE *file, FILE *tempfile, FLAC__Metadata_SimpleIteratorStatus *status) +{ + FLAC__byte buffer[8192]; + size_t n; + + while(!feof(file)) { + n = fread(buffer, 1, sizeof(buffer), file); + if(n == 0 && !feof(file)) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + return false; + } + if(n > 0 && local__fwrite(buffer, 1, n, tempfile) != n) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_WRITE_ERROR; + return false; + } + } + + return true; +} + +FLAC__bool copy_remaining_bytes_from_file_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Eof eof_cb, FLAC__IOHandle temp_handle, FLAC__IOCallback_Write temp_write_cb, FLAC__Metadata_SimpleIteratorStatus *status) +{ + FLAC__byte buffer[8192]; + size_t n; + + while(!eof_cb(handle)) { + n = read_cb(buffer, 1, sizeof(buffer), handle); + if(n == 0 && !eof_cb(handle)) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR; + return false; + } + if(n > 0 && temp_write_cb(buffer, 1, n, temp_handle) != n) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_WRITE_ERROR; + return false; + } + } + + return true; +} + +static int +local_snprintf(char *str, size_t size, const char *fmt, ...) +{ + va_list va; + int rc; + +#if defined _MSC_VER + if (size == 0) + return 1024; +#endif + + va_start (va, fmt); + +#if defined _MSC_VER + rc = vsnprintf_s (str, size, _TRUNCATE, fmt, va); + if (rc < 0) + rc = size - 1; +#elif defined __MINGW32__ + rc = __mingw_vsnprintf (str, size, fmt, va); +#else + rc = vsnprintf (str, size, fmt, va); +#endif + va_end (va); + + return rc; +} + +FLAC__bool open_tempfile_(const char *filename, const char *tempfile_path_prefix, FILE **tempfile, char **tempfilename, FLAC__Metadata_SimpleIteratorStatus *status) +{ + static const char *tempfile_suffix = ".metadata_edit"; + if(0 == tempfile_path_prefix) { + size_t dest_len = strlen(filename) + strlen(tempfile_suffix) + 1; + if(0 == (*tempfilename = safe_malloc_(dest_len))) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + return false; + } + local_snprintf(*tempfilename, dest_len, "%s%s", filename, tempfile_suffix); + } + else { + const char *p = strrchr(filename, '/'); + size_t dest_len; + if(0 == p) + p = filename; + else + p++; + + dest_len = strlen(tempfile_path_prefix) + strlen(p) + strlen(tempfile_suffix) + 2; + + if(0 == (*tempfilename = safe_malloc_(dest_len))) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR; + return false; + } + local_snprintf(*tempfilename, dest_len, "%s/%s%s", tempfile_path_prefix, p, tempfile_suffix); + } + + if(0 == (*tempfile = flac_fopen(*tempfilename, "w+b"))) { + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ERROR_OPENING_FILE; + return false; + } + + return true; +} + +FLAC__bool transport_tempfile_(const char *filename, FILE **tempfile, char **tempfilename, FLAC__Metadata_SimpleIteratorStatus *status) +{ + FLAC__ASSERT(0 != filename); + FLAC__ASSERT(0 != tempfile); + FLAC__ASSERT(0 != *tempfile); + FLAC__ASSERT(0 != tempfilename); + FLAC__ASSERT(0 != *tempfilename); + FLAC__ASSERT(0 != status); + + (void)fclose(*tempfile); + *tempfile = 0; + +#if defined _MSC_VER || defined __BORLANDC__ || defined __MINGW32__ || defined __EMX__ + /* on some flavors of windows, flac_rename() will fail if the destination already exists */ + if(flac_unlink(filename) < 0) { + cleanup_tempfile_(tempfile, tempfilename); + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_UNLINK_ERROR; + return false; + } +#endif + + /*@@@ to fully support the tempfile_path_prefix we need to update this piece to actually copy across filesystems instead of just flac_rename(): */ + if(0 != flac_rename(*tempfilename, filename)) { + cleanup_tempfile_(tempfile, tempfilename); + *status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_RENAME_ERROR; + return false; + } + + cleanup_tempfile_(tempfile, tempfilename); + + return true; +} + +void cleanup_tempfile_(FILE **tempfile, char **tempfilename) +{ + if(0 != *tempfile) { + (void)fclose(*tempfile); + *tempfile = 0; + } + + if(0 != *tempfilename) { + (void)flac_unlink(*tempfilename); + free(*tempfilename); + *tempfilename = 0; + } +} + +FLAC__bool get_file_stats_(const char *filename, struct flac_stat_s *stats) +{ + FLAC__ASSERT(0 != filename); + FLAC__ASSERT(0 != stats); + return (0 == flac_stat(filename, stats)); +} + +void set_file_stats_(const char *filename, struct flac_stat_s *stats) +{ + struct utimbuf srctime; + + FLAC__ASSERT(0 != filename); + FLAC__ASSERT(0 != stats); + + srctime.actime = stats->st_atime; + srctime.modtime = stats->st_mtime; + (void)flac_chmod(filename, stats->st_mode); + (void)flac_utime(filename, &srctime); +#if !defined _MSC_VER && !defined __BORLANDC__ && !defined __MINGW32__ + FLAC_CHECK_RETURN(chown(filename, stats->st_uid, -1)); + FLAC_CHECK_RETURN(chown(filename, -1, stats->st_gid)); +#endif +} + +int fseek_wrapper_(FLAC__IOHandle handle, FLAC__int64 offset, int whence) +{ + return fseeko((FILE*)handle, (FLAC__off_t)offset, whence); +} + +FLAC__int64 ftell_wrapper_(FLAC__IOHandle handle) +{ + return ftello((FILE*)handle); +} + +FLAC__Metadata_ChainStatus get_equivalent_status_(FLAC__Metadata_SimpleIteratorStatus status) +{ + switch(status) { + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK: + return FLAC__METADATA_CHAIN_STATUS_OK; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ILLEGAL_INPUT: + return FLAC__METADATA_CHAIN_STATUS_ILLEGAL_INPUT; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ERROR_OPENING_FILE: + return FLAC__METADATA_CHAIN_STATUS_ERROR_OPENING_FILE; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_NOT_A_FLAC_FILE: + return FLAC__METADATA_CHAIN_STATUS_NOT_A_FLAC_FILE; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_NOT_WRITABLE: + return FLAC__METADATA_CHAIN_STATUS_NOT_WRITABLE; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_BAD_METADATA: + return FLAC__METADATA_CHAIN_STATUS_BAD_METADATA; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR: + return FLAC__METADATA_CHAIN_STATUS_READ_ERROR; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_SEEK_ERROR: + return FLAC__METADATA_CHAIN_STATUS_SEEK_ERROR; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_WRITE_ERROR: + return FLAC__METADATA_CHAIN_STATUS_WRITE_ERROR; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_RENAME_ERROR: + return FLAC__METADATA_CHAIN_STATUS_RENAME_ERROR; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_UNLINK_ERROR: + return FLAC__METADATA_CHAIN_STATUS_UNLINK_ERROR; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR: + return FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR; + case FLAC__METADATA_SIMPLE_ITERATOR_STATUS_INTERNAL_ERROR: + default: + return FLAC__METADATA_CHAIN_STATUS_INTERNAL_ERROR; + } +} diff --git a/libFLAC/metadata_object.c b/libFLAC/metadata_object.c new file mode 100644 index 00000000..9cb95019 --- /dev/null +++ b/libFLAC/metadata_object.c @@ -0,0 +1,1821 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2001-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <string.h> + +#include "private/metadata.h" +#include "private/memory.h" + +#include "FLAC/assert.h" +#include "share/alloc.h" +#include "share/compat.h" + +/* Alias the first (in share/alloc.h) to the second (in src/libFLAC/memory.c). */ +#define safe_malloc_mul_2op_ safe_malloc_mul_2op_p + + +/**************************************************************************** + * + * Local routines + * + ***************************************************************************/ + +/* copy bytes: + * from = NULL && bytes = 0 + * to <- NULL + * from != NULL && bytes > 0 + * to <- copy of from + * else ASSERT + * malloc error leaves 'to' unchanged + */ +static FLAC__bool copy_bytes_(FLAC__byte **to, const FLAC__byte *from, unsigned bytes) +{ + FLAC__ASSERT(to != NULL); + if (bytes > 0 && from != NULL) { + FLAC__byte *x; + if ((x = safe_malloc_(bytes)) == NULL) + return false; + memcpy(x, from, bytes); + *to = x; + } + else { + *to = 0; + } + return true; +} + +#if 0 /* UNUSED */ +/* like copy_bytes_(), but free()s the original '*to' if the copy succeeds and the original '*to' is non-NULL */ +static FLAC__bool free_copy_bytes_(FLAC__byte **to, const FLAC__byte *from, unsigned bytes) +{ + FLAC__byte *copy; + FLAC__ASSERT(to != NULL); + if (copy_bytes_(©, from, bytes)) { + free(*to); + *to = copy; + return true; + } + else + return false; +} +#endif + +/* reallocate entry to 1 byte larger and add a terminating NUL */ +/* realloc() failure leaves entry unchanged */ +static FLAC__bool ensure_null_terminated_(FLAC__byte **entry, unsigned length) +{ + FLAC__byte *x = safe_realloc_add_2op_(*entry, length, /*+*/1); + if (x != NULL) { + x[length] = '\0'; + *entry = x; + return true; + } + else + return false; +} + +/* copies the NUL-terminated C-string 'from' to '*to', leaving '*to' + * unchanged if malloc fails, free()ing the original '*to' if it + * succeeds and the original '*to' was not NULL + */ +static FLAC__bool copy_cstring_(char **to, const char *from) +{ + char *copy = strdup(from); + FLAC__ASSERT(to != NULL); + if (copy) { + free(*to); + *to = copy; + return true; + } + else + return false; +} + +static FLAC__bool copy_vcentry_(FLAC__StreamMetadata_VorbisComment_Entry *to, const FLAC__StreamMetadata_VorbisComment_Entry *from) +{ + to->length = from->length; + if (from->entry == 0) { + FLAC__ASSERT(from->length == 0); + to->entry = 0; + } + else { + FLAC__byte *x; + FLAC__ASSERT(from->length > 0); + if ((x = safe_malloc_add_2op_(from->length, /*+*/1)) == NULL) + return false; + memcpy(x, from->entry, from->length); + x[from->length] = '\0'; + to->entry = x; + } + return true; +} + +static FLAC__bool copy_track_(FLAC__StreamMetadata_CueSheet_Track *to, const FLAC__StreamMetadata_CueSheet_Track *from) +{ + memcpy(to, from, sizeof(FLAC__StreamMetadata_CueSheet_Track)); + if (from->indices == 0) { + FLAC__ASSERT(from->num_indices == 0); + } + else { + FLAC__StreamMetadata_CueSheet_Index *x; + FLAC__ASSERT(from->num_indices > 0); + if ((x = safe_malloc_mul_2op_p(from->num_indices, /*times*/sizeof(FLAC__StreamMetadata_CueSheet_Index))) == NULL) + return false; + memcpy(x, from->indices, from->num_indices * sizeof(FLAC__StreamMetadata_CueSheet_Index)); + to->indices = x; + } + return true; +} + +static void seektable_calculate_length_(FLAC__StreamMetadata *object) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + + object->length = object->data.seek_table.num_points * FLAC__STREAM_METADATA_SEEKPOINT_LENGTH; +} + +static FLAC__StreamMetadata_SeekPoint *seekpoint_array_new_(unsigned num_points) +{ + FLAC__StreamMetadata_SeekPoint *object_array; + + FLAC__ASSERT(num_points > 0); + + object_array = safe_malloc_mul_2op_p(num_points, /*times*/sizeof(FLAC__StreamMetadata_SeekPoint)); + + if (object_array != NULL) { + unsigned i; + for (i = 0; i < num_points; i++) { + object_array[i].sample_number = FLAC__STREAM_METADATA_SEEKPOINT_PLACEHOLDER; + object_array[i].stream_offset = 0; + object_array[i].frame_samples = 0; + } + } + + return object_array; +} + +static void vorbiscomment_calculate_length_(FLAC__StreamMetadata *object) +{ + unsigned i; + + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT); + + object->length = (FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN) / 8; + object->length += object->data.vorbis_comment.vendor_string.length; + object->length += (FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN) / 8; + for (i = 0; i < object->data.vorbis_comment.num_comments; i++) { + object->length += (FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN / 8); + object->length += object->data.vorbis_comment.comments[i].length; + } +} + +static FLAC__StreamMetadata_VorbisComment_Entry *vorbiscomment_entry_array_new_(unsigned num_comments) +{ + FLAC__ASSERT(num_comments > 0); + + return safe_calloc_(num_comments, sizeof(FLAC__StreamMetadata_VorbisComment_Entry)); +} + +static void vorbiscomment_entry_array_delete_(FLAC__StreamMetadata_VorbisComment_Entry *object_array, unsigned num_comments) +{ + unsigned i; + + FLAC__ASSERT(object_array != NULL && num_comments > 0); + + for (i = 0; i < num_comments; i++) + free(object_array[i].entry); + + free(object_array); +} + +static FLAC__StreamMetadata_VorbisComment_Entry *vorbiscomment_entry_array_copy_(const FLAC__StreamMetadata_VorbisComment_Entry *object_array, unsigned num_comments) +{ + FLAC__StreamMetadata_VorbisComment_Entry *return_array; + + FLAC__ASSERT(object_array != NULL); + FLAC__ASSERT(num_comments > 0); + + return_array = vorbiscomment_entry_array_new_(num_comments); + + if (return_array != NULL) { + unsigned i; + + for (i = 0; i < num_comments; i++) { + if (!copy_vcentry_(return_array+i, object_array+i)) { + vorbiscomment_entry_array_delete_(return_array, num_comments); + return 0; + } + } + } + + return return_array; +} + +static FLAC__bool vorbiscomment_set_entry_(FLAC__StreamMetadata *object, FLAC__StreamMetadata_VorbisComment_Entry *dest, const FLAC__StreamMetadata_VorbisComment_Entry *src, FLAC__bool copy) +{ + FLAC__byte *save; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(dest != NULL); + FLAC__ASSERT(src != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT); + FLAC__ASSERT((src->entry != NULL && src->length > 0) || (src->entry == NULL && src->length == 0)); + + save = dest->entry; + + if (src->entry != NULL) { + if (copy) { + /* do the copy first so that if we fail we leave the dest object untouched */ + if (!copy_vcentry_(dest, src)) + return false; + } + else { + /* we have to make sure that the string we're taking over is null-terminated */ + + /* + * Stripping the const from src->entry is OK since we're taking + * ownership of the pointer. This is a hack around a deficiency + * in the API where the same function is used for 'copy' and + * 'own', but the source entry is a const pointer. If we were + * precise, the 'own' flavor would be a separate function with a + * non-const source pointer. But it's not, so we hack away. + */ + if (!ensure_null_terminated_((FLAC__byte**)(&src->entry), src->length)) + return false; + *dest = *src; + } + } + else { + /* the src is null */ + *dest = *src; + } + + free(save); + + vorbiscomment_calculate_length_(object); + return true; +} + +static int vorbiscomment_find_entry_from_(const FLAC__StreamMetadata *object, unsigned offset, const char *field_name, unsigned field_name_length) +{ + unsigned i; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT); + FLAC__ASSERT(field_name != NULL); + + for (i = offset; i < object->data.vorbis_comment.num_comments; i++) { + if (FLAC__metadata_object_vorbiscomment_entry_matches(object->data.vorbis_comment.comments[i], field_name, field_name_length)) + return (int)i; + } + + return -1; +} + +static void cuesheet_calculate_length_(FLAC__StreamMetadata *object) +{ + unsigned i; + + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET); + + object->length = ( + FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN + + FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN + + FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN + + FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN + + FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN + ) / 8; + + object->length += object->data.cue_sheet.num_tracks * ( + FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN + + FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN + + FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN + + FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN + + FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN + + FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN + + FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN + ) / 8; + + for (i = 0; i < object->data.cue_sheet.num_tracks; i++) { + object->length += object->data.cue_sheet.tracks[i].num_indices * ( + FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN + + FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN + + FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN + ) / 8; + } +} + +static FLAC__StreamMetadata_CueSheet_Index *cuesheet_track_index_array_new_(unsigned num_indices) +{ + FLAC__ASSERT(num_indices > 0); + + return safe_calloc_(num_indices, sizeof(FLAC__StreamMetadata_CueSheet_Index)); +} + +static FLAC__StreamMetadata_CueSheet_Track *cuesheet_track_array_new_(unsigned num_tracks) +{ + FLAC__ASSERT(num_tracks > 0); + + return safe_calloc_(num_tracks, sizeof(FLAC__StreamMetadata_CueSheet_Track)); +} + +static void cuesheet_track_array_delete_(FLAC__StreamMetadata_CueSheet_Track *object_array, unsigned num_tracks) +{ + unsigned i; + + FLAC__ASSERT(object_array != NULL && num_tracks > 0); + + for (i = 0; i < num_tracks; i++) { + if (object_array[i].indices != 0) { + FLAC__ASSERT(object_array[i].num_indices > 0); + free(object_array[i].indices); + } + } + + free(object_array); +} + +static FLAC__StreamMetadata_CueSheet_Track *cuesheet_track_array_copy_(const FLAC__StreamMetadata_CueSheet_Track *object_array, unsigned num_tracks) +{ + FLAC__StreamMetadata_CueSheet_Track *return_array; + + FLAC__ASSERT(object_array != NULL); + FLAC__ASSERT(num_tracks > 0); + + return_array = cuesheet_track_array_new_(num_tracks); + + if (return_array != NULL) { + unsigned i; + + for (i = 0; i < num_tracks; i++) { + if (!copy_track_(return_array+i, object_array+i)) { + cuesheet_track_array_delete_(return_array, num_tracks); + return 0; + } + } + } + + return return_array; +} + +static FLAC__bool cuesheet_set_track_(FLAC__StreamMetadata *object, FLAC__StreamMetadata_CueSheet_Track *dest, const FLAC__StreamMetadata_CueSheet_Track *src, FLAC__bool copy) +{ + FLAC__StreamMetadata_CueSheet_Index *save; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(dest != NULL); + FLAC__ASSERT(src != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET); + FLAC__ASSERT((src->indices != NULL && src->num_indices > 0) || (src->indices == NULL && src->num_indices == 0)); + + save = dest->indices; + + /* do the copy first so that if we fail we leave the object untouched */ + if (copy) { + if (!copy_track_(dest, src)) + return false; + } + else { + *dest = *src; + } + + free(save); + + cuesheet_calculate_length_(object); + return true; +} + + +/**************************************************************************** + * + * Metadata object routines + * + ***************************************************************************/ + +FLAC_API FLAC__StreamMetadata *FLAC__metadata_object_new(FLAC__MetadataType type) +{ + FLAC__StreamMetadata *object; + + if (type > FLAC__MAX_METADATA_TYPE) + return 0; + + object = calloc(1, sizeof(FLAC__StreamMetadata)); + if (object != NULL) { + object->is_last = false; + object->type = type; + switch(type) { + case FLAC__METADATA_TYPE_STREAMINFO: + object->length = FLAC__STREAM_METADATA_STREAMINFO_LENGTH; + break; + case FLAC__METADATA_TYPE_PADDING: + /* calloc() took care of this for us: + object->length = 0; + */ + break; + case FLAC__METADATA_TYPE_APPLICATION: + object->length = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8; + /* calloc() took care of this for us: + object->data.application.data = 0; + */ + break; + case FLAC__METADATA_TYPE_SEEKTABLE: + /* calloc() took care of this for us: + object->length = 0; + object->data.seek_table.num_points = 0; + object->data.seek_table.points = 0; + */ + break; + case FLAC__METADATA_TYPE_VORBIS_COMMENT: + object->data.vorbis_comment.vendor_string.length = (unsigned)strlen(FLAC__VENDOR_STRING); + if (!copy_bytes_(&object->data.vorbis_comment.vendor_string.entry, (const FLAC__byte*)FLAC__VENDOR_STRING, object->data.vorbis_comment.vendor_string.length+1)) { + free(object); + return 0; + } + vorbiscomment_calculate_length_(object); + break; + case FLAC__METADATA_TYPE_CUESHEET: + cuesheet_calculate_length_(object); + break; + case FLAC__METADATA_TYPE_PICTURE: + object->length = ( + FLAC__STREAM_METADATA_PICTURE_TYPE_LEN + + FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN + /* empty mime_type string */ + FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN + /* empty description string */ + FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN + + FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN + + FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN + + FLAC__STREAM_METADATA_PICTURE_COLORS_LEN + + FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN + + 0 /* no data */ + ) / 8; + object->data.picture.type = FLAC__STREAM_METADATA_PICTURE_TYPE_OTHER; + object->data.picture.mime_type = 0; + object->data.picture.description = 0; + /* calloc() took care of this for us: + object->data.picture.width = 0; + object->data.picture.height = 0; + object->data.picture.depth = 0; + object->data.picture.colors = 0; + object->data.picture.data_length = 0; + object->data.picture.data = 0; + */ + /* now initialize mime_type and description with empty strings to make things easier on the client */ + if (!copy_cstring_(&object->data.picture.mime_type, "")) { + free(object); + return 0; + } + if (!copy_cstring_((char**)(&object->data.picture.description), "")) { + free(object->data.picture.mime_type); + free(object); + return 0; + } + break; + default: + /* calloc() took care of this for us: + object->length = 0; + object->data.unknown.data = 0; + */ + break; + } + } + + return object; +} + +FLAC_API FLAC__StreamMetadata *FLAC__metadata_object_clone(const FLAC__StreamMetadata *object) +{ + FLAC__StreamMetadata *to; + + FLAC__ASSERT(object != NULL); + + if ((to = FLAC__metadata_object_new(object->type)) != NULL) { + to->is_last = object->is_last; + to->type = object->type; + to->length = object->length; + switch(to->type) { + case FLAC__METADATA_TYPE_STREAMINFO: + memcpy(&to->data.stream_info, &object->data.stream_info, sizeof(FLAC__StreamMetadata_StreamInfo)); + break; + case FLAC__METADATA_TYPE_PADDING: + break; + case FLAC__METADATA_TYPE_APPLICATION: + if (to->length < FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8) { /* underflow check */ + FLAC__metadata_object_delete(to); + return 0; + } + memcpy(&to->data.application.id, &object->data.application.id, FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8); + if (!copy_bytes_(&to->data.application.data, object->data.application.data, object->length - FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8)) { + FLAC__metadata_object_delete(to); + return 0; + } + break; + case FLAC__METADATA_TYPE_SEEKTABLE: + to->data.seek_table.num_points = object->data.seek_table.num_points; + if (to->data.seek_table.num_points > UINT32_MAX / sizeof(FLAC__StreamMetadata_SeekPoint)) { /* overflow check */ + FLAC__metadata_object_delete(to); + return 0; + } + if (!copy_bytes_((FLAC__byte**)&to->data.seek_table.points, (FLAC__byte*)object->data.seek_table.points, object->data.seek_table.num_points * sizeof(FLAC__StreamMetadata_SeekPoint))) { + FLAC__metadata_object_delete(to); + return 0; + } + break; + case FLAC__METADATA_TYPE_VORBIS_COMMENT: + if (to->data.vorbis_comment.vendor_string.entry != NULL) { + free(to->data.vorbis_comment.vendor_string.entry); + to->data.vorbis_comment.vendor_string.entry = 0; + } + if (!copy_vcentry_(&to->data.vorbis_comment.vendor_string, &object->data.vorbis_comment.vendor_string)) { + FLAC__metadata_object_delete(to); + return 0; + } + if (object->data.vorbis_comment.num_comments == 0) { + to->data.vorbis_comment.comments = 0; + } + else { + to->data.vorbis_comment.comments = vorbiscomment_entry_array_copy_(object->data.vorbis_comment.comments, object->data.vorbis_comment.num_comments); + if (to->data.vorbis_comment.comments == NULL) { + to->data.vorbis_comment.num_comments = 0; + FLAC__metadata_object_delete(to); + return 0; + } + } + to->data.vorbis_comment.num_comments = object->data.vorbis_comment.num_comments; + break; + case FLAC__METADATA_TYPE_CUESHEET: + memcpy(&to->data.cue_sheet, &object->data.cue_sheet, sizeof(FLAC__StreamMetadata_CueSheet)); + if (object->data.cue_sheet.num_tracks == 0) { + FLAC__ASSERT(object->data.cue_sheet.tracks == NULL); + } + else { + FLAC__ASSERT(object->data.cue_sheet.tracks != 0); + to->data.cue_sheet.tracks = cuesheet_track_array_copy_(object->data.cue_sheet.tracks, object->data.cue_sheet.num_tracks); + if (to->data.cue_sheet.tracks == NULL) { + FLAC__metadata_object_delete(to); + return 0; + } + } + break; + case FLAC__METADATA_TYPE_PICTURE: + to->data.picture.type = object->data.picture.type; + if (!copy_cstring_(&to->data.picture.mime_type, object->data.picture.mime_type)) { + FLAC__metadata_object_delete(to); + return 0; + } + if (!copy_cstring_((char**)(&to->data.picture.description), (const char*)object->data.picture.description)) { + FLAC__metadata_object_delete(to); + return 0; + } + to->data.picture.width = object->data.picture.width; + to->data.picture.height = object->data.picture.height; + to->data.picture.depth = object->data.picture.depth; + to->data.picture.colors = object->data.picture.colors; + to->data.picture.data_length = object->data.picture.data_length; + if (!copy_bytes_((&to->data.picture.data), object->data.picture.data, object->data.picture.data_length)) { + FLAC__metadata_object_delete(to); + return 0; + } + break; + default: + if (!copy_bytes_(&to->data.unknown.data, object->data.unknown.data, object->length)) { + FLAC__metadata_object_delete(to); + return 0; + } + break; + } + } + + return to; +} + +void FLAC__metadata_object_delete_data(FLAC__StreamMetadata *object) +{ + FLAC__ASSERT(object != NULL); + + switch(object->type) { + case FLAC__METADATA_TYPE_STREAMINFO: + case FLAC__METADATA_TYPE_PADDING: + break; + case FLAC__METADATA_TYPE_APPLICATION: + if (object->data.application.data != NULL) { + free(object->data.application.data); + object->data.application.data = NULL; + } + break; + case FLAC__METADATA_TYPE_SEEKTABLE: + if (object->data.seek_table.points != NULL) { + free(object->data.seek_table.points); + object->data.seek_table.points = NULL; + } + break; + case FLAC__METADATA_TYPE_VORBIS_COMMENT: + if (object->data.vorbis_comment.vendor_string.entry != NULL) { + free(object->data.vorbis_comment.vendor_string.entry); + object->data.vorbis_comment.vendor_string.entry = 0; + } + if (object->data.vorbis_comment.comments != NULL) { + FLAC__ASSERT(object->data.vorbis_comment.num_comments > 0); + vorbiscomment_entry_array_delete_(object->data.vorbis_comment.comments, object->data.vorbis_comment.num_comments); + object->data.vorbis_comment.comments = NULL; + object->data.vorbis_comment.num_comments = 0; + } + break; + case FLAC__METADATA_TYPE_CUESHEET: + if (object->data.cue_sheet.tracks != NULL) { + FLAC__ASSERT(object->data.cue_sheet.num_tracks > 0); + cuesheet_track_array_delete_(object->data.cue_sheet.tracks, object->data.cue_sheet.num_tracks); + object->data.cue_sheet.tracks = NULL; + object->data.cue_sheet.num_tracks = 0; + } + break; + case FLAC__METADATA_TYPE_PICTURE: + if (object->data.picture.mime_type != NULL) { + free(object->data.picture.mime_type); + object->data.picture.mime_type = NULL; + } + if (object->data.picture.description != NULL) { + free(object->data.picture.description); + object->data.picture.description = NULL; + } + if (object->data.picture.data != NULL) { + free(object->data.picture.data); + object->data.picture.data = NULL; + } + break; + default: + if (object->data.unknown.data != NULL) { + free(object->data.unknown.data); + object->data.unknown.data = NULL; + } + break; + } +} + +FLAC_API void FLAC__metadata_object_delete(FLAC__StreamMetadata *object) +{ + FLAC__metadata_object_delete_data(object); + free(object); +} + +static FLAC__bool compare_block_data_streaminfo_(const FLAC__StreamMetadata_StreamInfo *block1, const FLAC__StreamMetadata_StreamInfo *block2) +{ + if (block1->min_blocksize != block2->min_blocksize) + return false; + if (block1->max_blocksize != block2->max_blocksize) + return false; + if (block1->min_framesize != block2->min_framesize) + return false; + if (block1->max_framesize != block2->max_framesize) + return false; + if (block1->sample_rate != block2->sample_rate) + return false; + if (block1->channels != block2->channels) + return false; + if (block1->bits_per_sample != block2->bits_per_sample) + return false; + if (block1->total_samples != block2->total_samples) + return false; + if (memcmp(block1->md5sum, block2->md5sum, 16) != 0) + return false; + return true; +} + +static FLAC__bool compare_block_data_application_(const FLAC__StreamMetadata_Application *block1, const FLAC__StreamMetadata_Application *block2, unsigned block_length) +{ + FLAC__ASSERT(block1 != NULL); + FLAC__ASSERT(block2 != NULL); + FLAC__ASSERT(block_length >= sizeof(block1->id)); + + if (memcmp(block1->id, block2->id, sizeof(block1->id)) != 0) + return false; + if (block1->data != NULL && block2->data != NULL) + return memcmp(block1->data, block2->data, block_length - sizeof(block1->id)) == 0; + else + return block1->data == block2->data; +} + +static FLAC__bool compare_block_data_seektable_(const FLAC__StreamMetadata_SeekTable *block1, const FLAC__StreamMetadata_SeekTable *block2) +{ + unsigned i; + + FLAC__ASSERT(block1 != NULL); + FLAC__ASSERT(block2 != NULL); + + if (block1->num_points != block2->num_points) + return false; + + if (block1->points != NULL && block2->points != NULL) { + for (i = 0; i < block1->num_points; i++) { + if (block1->points[i].sample_number != block2->points[i].sample_number) + return false; + if (block1->points[i].stream_offset != block2->points[i].stream_offset) + return false; + if (block1->points[i].frame_samples != block2->points[i].frame_samples) + return false; + } + return true; + } + else + return block1->points == block2->points; +} + +static FLAC__bool compare_block_data_vorbiscomment_(const FLAC__StreamMetadata_VorbisComment *block1, const FLAC__StreamMetadata_VorbisComment *block2) +{ + unsigned i; + + if (block1->vendor_string.length != block2->vendor_string.length) + return false; + + if (block1->vendor_string.entry != NULL && block2->vendor_string.entry != NULL) { + if (memcmp(block1->vendor_string.entry, block2->vendor_string.entry, block1->vendor_string.length) != 0) + return false; + } + else if (block1->vendor_string.entry != block2->vendor_string.entry) + return false; + + if (block1->num_comments != block2->num_comments) + return false; + + for (i = 0; i < block1->num_comments; i++) { + if (block1->comments[i].entry != NULL && block2->comments[i].entry != NULL) { + if (memcmp(block1->comments[i].entry, block2->comments[i].entry, block1->comments[i].length) != 0) + return false; + } + else if (block1->comments[i].entry != block2->comments[i].entry) + return false; + } + return true; +} + +static FLAC__bool compare_block_data_cuesheet_(const FLAC__StreamMetadata_CueSheet *block1, const FLAC__StreamMetadata_CueSheet *block2) +{ + unsigned i, j; + + if (strcmp(block1->media_catalog_number, block2->media_catalog_number) != 0) + return false; + + if (block1->lead_in != block2->lead_in) + return false; + + if (block1->is_cd != block2->is_cd) + return false; + + if (block1->num_tracks != block2->num_tracks) + return false; + + if (block1->tracks != NULL && block2->tracks != NULL) { + FLAC__ASSERT(block1->num_tracks > 0); + for (i = 0; i < block1->num_tracks; i++) { + if (block1->tracks[i].offset != block2->tracks[i].offset) + return false; + if (block1->tracks[i].number != block2->tracks[i].number) + return false; + if (memcmp(block1->tracks[i].isrc, block2->tracks[i].isrc, sizeof(block1->tracks[i].isrc)) != 0) + return false; + if (block1->tracks[i].type != block2->tracks[i].type) + return false; + if (block1->tracks[i].pre_emphasis != block2->tracks[i].pre_emphasis) + return false; + if (block1->tracks[i].num_indices != block2->tracks[i].num_indices) + return false; + if (block1->tracks[i].indices != NULL && block2->tracks[i].indices != NULL) { + FLAC__ASSERT(block1->tracks[i].num_indices > 0); + for (j = 0; j < block1->tracks[i].num_indices; j++) { + if (block1->tracks[i].indices[j].offset != block2->tracks[i].indices[j].offset) + return false; + if (block1->tracks[i].indices[j].number != block2->tracks[i].indices[j].number) + return false; + } + } + else if (block1->tracks[i].indices != block2->tracks[i].indices) + return false; + } + } + else if (block1->tracks != block2->tracks) + return false; + return true; +} + +static FLAC__bool compare_block_data_picture_(const FLAC__StreamMetadata_Picture *block1, const FLAC__StreamMetadata_Picture *block2) +{ + if (block1->type != block2->type) + return false; + if (block1->mime_type != block2->mime_type && (block1->mime_type == 0 || block2->mime_type == 0 || strcmp(block1->mime_type, block2->mime_type))) + return false; + if (block1->description != block2->description && (block1->description == 0 || block2->description == 0 || strcmp((const char *)block1->description, (const char *)block2->description))) + return false; + if (block1->width != block2->width) + return false; + if (block1->height != block2->height) + return false; + if (block1->depth != block2->depth) + return false; + if (block1->colors != block2->colors) + return false; + if (block1->data_length != block2->data_length) + return false; + if (block1->data != block2->data && (block1->data == NULL || block2->data == NULL || memcmp(block1->data, block2->data, block1->data_length))) + return false; + return true; +} + +static FLAC__bool compare_block_data_unknown_(const FLAC__StreamMetadata_Unknown *block1, const FLAC__StreamMetadata_Unknown *block2, unsigned block_length) +{ + FLAC__ASSERT(block1 != NULL); + FLAC__ASSERT(block2 != NULL); + + if (block1->data != NULL && block2->data != NULL) + return memcmp(block1->data, block2->data, block_length) == 0; + else + return block1->data == block2->data; +} + +FLAC_API FLAC__bool FLAC__metadata_object_is_equal(const FLAC__StreamMetadata *block1, const FLAC__StreamMetadata *block2) +{ + FLAC__ASSERT(block1 != NULL); + FLAC__ASSERT(block2 != NULL); + + if (block1->type != block2->type) { + return false; + } + if (block1->is_last != block2->is_last) { + return false; + } + if (block1->length != block2->length) { + return false; + } + switch(block1->type) { + case FLAC__METADATA_TYPE_STREAMINFO: + return compare_block_data_streaminfo_(&block1->data.stream_info, &block2->data.stream_info); + case FLAC__METADATA_TYPE_PADDING: + return true; /* we don't compare the padding guts */ + case FLAC__METADATA_TYPE_APPLICATION: + return compare_block_data_application_(&block1->data.application, &block2->data.application, block1->length); + case FLAC__METADATA_TYPE_SEEKTABLE: + return compare_block_data_seektable_(&block1->data.seek_table, &block2->data.seek_table); + case FLAC__METADATA_TYPE_VORBIS_COMMENT: + return compare_block_data_vorbiscomment_(&block1->data.vorbis_comment, &block2->data.vorbis_comment); + case FLAC__METADATA_TYPE_CUESHEET: + return compare_block_data_cuesheet_(&block1->data.cue_sheet, &block2->data.cue_sheet); + case FLAC__METADATA_TYPE_PICTURE: + return compare_block_data_picture_(&block1->data.picture, &block2->data.picture); + default: + return compare_block_data_unknown_(&block1->data.unknown, &block2->data.unknown, block1->length); + } +} + +FLAC_API FLAC__bool FLAC__metadata_object_application_set_data(FLAC__StreamMetadata *object, FLAC__byte *data, unsigned length, FLAC__bool copy) +{ + FLAC__byte *save; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_APPLICATION); + FLAC__ASSERT((data != NULL && length > 0) || (data == NULL && length == 0 && copy == false)); + + save = object->data.application.data; + + /* do the copy first so that if we fail we leave the object untouched */ + if (copy) { + if (!copy_bytes_(&object->data.application.data, data, length)) + return false; + } + else { + object->data.application.data = data; + } + + free(save); + + object->length = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8 + length; + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_seektable_resize_points(FLAC__StreamMetadata *object, unsigned new_num_points) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + + if (object->data.seek_table.points == 0) { + FLAC__ASSERT(object->data.seek_table.num_points == 0); + if (new_num_points == 0) + return true; + else if ((object->data.seek_table.points = seekpoint_array_new_(new_num_points)) == 0) + return false; + } + else { + const size_t old_size = object->data.seek_table.num_points * sizeof(FLAC__StreamMetadata_SeekPoint); + const size_t new_size = new_num_points * sizeof(FLAC__StreamMetadata_SeekPoint); + + /* overflow check */ + if (new_num_points > UINT32_MAX / sizeof(FLAC__StreamMetadata_SeekPoint)) + return false; + + FLAC__ASSERT(object->data.seek_table.num_points > 0); + + if (new_size == 0) { + free(object->data.seek_table.points); + object->data.seek_table.points = 0; + } + else if ((object->data.seek_table.points = safe_realloc_(object->data.seek_table.points, new_size)) == NULL) + return false; + + /* if growing, set new elements to placeholders */ + if (new_size > old_size) { + unsigned i; + for (i = object->data.seek_table.num_points; i < new_num_points; i++) { + object->data.seek_table.points[i].sample_number = FLAC__STREAM_METADATA_SEEKPOINT_PLACEHOLDER; + object->data.seek_table.points[i].stream_offset = 0; + object->data.seek_table.points[i].frame_samples = 0; + } + } + } + + object->data.seek_table.num_points = new_num_points; + + seektable_calculate_length_(object); + return true; +} + +FLAC_API void FLAC__metadata_object_seektable_set_point(FLAC__StreamMetadata *object, unsigned point_num, FLAC__StreamMetadata_SeekPoint point) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + FLAC__ASSERT(point_num < object->data.seek_table.num_points); + + object->data.seek_table.points[point_num] = point; +} + +FLAC_API FLAC__bool FLAC__metadata_object_seektable_insert_point(FLAC__StreamMetadata *object, unsigned point_num, FLAC__StreamMetadata_SeekPoint point) +{ + int i; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + FLAC__ASSERT(point_num <= object->data.seek_table.num_points); + + if (!FLAC__metadata_object_seektable_resize_points(object, object->data.seek_table.num_points+1)) + return false; + + /* move all points >= point_num forward one space */ + for (i = (int)object->data.seek_table.num_points-1; i > (int)point_num; i--) + object->data.seek_table.points[i] = object->data.seek_table.points[i-1]; + + FLAC__metadata_object_seektable_set_point(object, point_num, point); + seektable_calculate_length_(object); + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_seektable_delete_point(FLAC__StreamMetadata *object, unsigned point_num) +{ + unsigned i; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + FLAC__ASSERT(point_num < object->data.seek_table.num_points); + + /* move all points > point_num backward one space */ + for (i = point_num; i < object->data.seek_table.num_points-1; i++) + object->data.seek_table.points[i] = object->data.seek_table.points[i+1]; + + return FLAC__metadata_object_seektable_resize_points(object, object->data.seek_table.num_points-1); +} + +FLAC_API FLAC__bool FLAC__metadata_object_seektable_is_legal(const FLAC__StreamMetadata *object) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + + return FLAC__format_seektable_is_legal(&object->data.seek_table); +} + +FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_placeholders(FLAC__StreamMetadata *object, unsigned num) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + + if (num > 0) + /* WATCHOUT: we rely on the fact that growing the array adds PLACEHOLDERS at the end */ + return FLAC__metadata_object_seektable_resize_points(object, object->data.seek_table.num_points + num); + else + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_point(FLAC__StreamMetadata *object, FLAC__uint64 sample_number) +{ + FLAC__StreamMetadata_SeekTable *seek_table; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + + seek_table = &object->data.seek_table; + + if (!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + 1)) + return false; + + seek_table->points[seek_table->num_points - 1].sample_number = sample_number; + seek_table->points[seek_table->num_points - 1].stream_offset = 0; + seek_table->points[seek_table->num_points - 1].frame_samples = 0; + + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_points(FLAC__StreamMetadata *object, FLAC__uint64 sample_numbers[], unsigned num) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + FLAC__ASSERT(sample_numbers != 0 || num == 0); + + if (num > 0) { + FLAC__StreamMetadata_SeekTable *seek_table = &object->data.seek_table; + unsigned i, j; + + i = seek_table->num_points; + + if (!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + num)) + return false; + + for (j = 0; j < num; i++, j++) { + seek_table->points[i].sample_number = sample_numbers[j]; + seek_table->points[i].stream_offset = 0; + seek_table->points[i].frame_samples = 0; + } + } + + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_spaced_points(FLAC__StreamMetadata *object, unsigned num, FLAC__uint64 total_samples) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + FLAC__ASSERT(total_samples > 0); + + if (num > 0 && total_samples > 0) { + FLAC__StreamMetadata_SeekTable *seek_table = &object->data.seek_table; + unsigned i, j; + + i = seek_table->num_points; + + if (!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + num)) + return false; + + for (j = 0; j < num; i++, j++) { + seek_table->points[i].sample_number = total_samples * (FLAC__uint64)j / (FLAC__uint64)num; + seek_table->points[i].stream_offset = 0; + seek_table->points[i].frame_samples = 0; + } + } + + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_spaced_points_by_samples(FLAC__StreamMetadata *object, unsigned samples, FLAC__uint64 total_samples) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + FLAC__ASSERT(samples > 0); + FLAC__ASSERT(total_samples > 0); + + if (samples > 0 && total_samples > 0) { + FLAC__StreamMetadata_SeekTable *seek_table = &object->data.seek_table; + unsigned i, j; + FLAC__uint64 num, sample; + + num = 1 + total_samples / samples; /* 1+ for the first sample at 0 */ + /* now account for the fact that we don't place a seekpoint at "total_samples" since samples are number from 0: */ + if (total_samples % samples == 0) + num--; + + /* Put a strict upper bound on the number of allowed seek points. */ + if (num > 32768) { + /* Set the bound and recalculate samples accordingly. */ + num = 32768; + samples = total_samples / num; + } + + i = seek_table->num_points; + + if (!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + (unsigned)num)) + return false; + + sample = 0; + for (j = 0; j < num; i++, j++, sample += samples) { + seek_table->points[i].sample_number = sample; + seek_table->points[i].stream_offset = 0; + seek_table->points[i].frame_samples = 0; + } + } + + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_sort(FLAC__StreamMetadata *object, FLAC__bool compact) +{ + unsigned unique; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE); + + unique = FLAC__format_seektable_sort(&object->data.seek_table); + + return !compact || FLAC__metadata_object_seektable_resize_points(object, unique); +} + +FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_set_vendor_string(FLAC__StreamMetadata *object, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy) +{ + if (!FLAC__format_vorbiscomment_entry_value_is_legal(entry.entry, entry.length)) + return false; + return vorbiscomment_set_entry_(object, &object->data.vorbis_comment.vendor_string, &entry, copy); +} + +FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_resize_comments(FLAC__StreamMetadata *object, unsigned new_num_comments) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT); + + if (object->data.vorbis_comment.comments == NULL) { + FLAC__ASSERT(object->data.vorbis_comment.num_comments == 0); + if (new_num_comments == 0) + return true; + else if ((object->data.vorbis_comment.comments = vorbiscomment_entry_array_new_(new_num_comments)) == NULL) + return false; + } + else { + const size_t old_size = object->data.vorbis_comment.num_comments * sizeof(FLAC__StreamMetadata_VorbisComment_Entry); + const size_t new_size = new_num_comments * sizeof(FLAC__StreamMetadata_VorbisComment_Entry); + + /* overflow check */ + if (new_num_comments > UINT32_MAX / sizeof(FLAC__StreamMetadata_VorbisComment_Entry)) + return false; + + FLAC__ASSERT(object->data.vorbis_comment.num_comments > 0); + + /* if shrinking, free the truncated entries */ + if (new_num_comments < object->data.vorbis_comment.num_comments) { + unsigned i; + for (i = new_num_comments; i < object->data.vorbis_comment.num_comments; i++) + if (object->data.vorbis_comment.comments[i].entry != NULL) + free(object->data.vorbis_comment.comments[i].entry); + } + + if (new_size == 0) { + free(object->data.vorbis_comment.comments); + object->data.vorbis_comment.comments = 0; + } + else { + FLAC__StreamMetadata_VorbisComment_Entry *oldptr = object->data.vorbis_comment.comments; + if ((object->data.vorbis_comment.comments = realloc(object->data.vorbis_comment.comments, new_size)) == NULL) { + vorbiscomment_entry_array_delete_(oldptr, object->data.vorbis_comment.num_comments); + object->data.vorbis_comment.num_comments = 0; + return false; + } + } + + /* if growing, zero all the length/pointers of new elements */ + if (new_size > old_size) + memset(object->data.vorbis_comment.comments + object->data.vorbis_comment.num_comments, 0, new_size - old_size); + } + + object->data.vorbis_comment.num_comments = new_num_comments; + + vorbiscomment_calculate_length_(object); + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_set_comment(FLAC__StreamMetadata *object, unsigned comment_num, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(comment_num < object->data.vorbis_comment.num_comments); + + if (!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length)) + return false; + return vorbiscomment_set_entry_(object, &object->data.vorbis_comment.comments[comment_num], &entry, copy); +} + +FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_insert_comment(FLAC__StreamMetadata *object, unsigned comment_num, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy) +{ + FLAC__StreamMetadata_VorbisComment *vc; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT); + FLAC__ASSERT(comment_num <= object->data.vorbis_comment.num_comments); + + if (!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length)) + return false; + + vc = &object->data.vorbis_comment; + + if (!FLAC__metadata_object_vorbiscomment_resize_comments(object, vc->num_comments+1)) + return false; + + /* move all comments >= comment_num forward one space */ + memmove(&vc->comments[comment_num+1], &vc->comments[comment_num], sizeof(FLAC__StreamMetadata_VorbisComment_Entry)*(vc->num_comments-1-comment_num)); + vc->comments[comment_num].length = 0; + vc->comments[comment_num].entry = 0; + + return FLAC__metadata_object_vorbiscomment_set_comment(object, comment_num, entry, copy); +} + +FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_append_comment(FLAC__StreamMetadata *object, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT); + return FLAC__metadata_object_vorbiscomment_insert_comment(object, object->data.vorbis_comment.num_comments, entry, copy); +} + +FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_replace_comment(FLAC__StreamMetadata *object, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool all, FLAC__bool copy) +{ + FLAC__ASSERT(entry.entry != NULL && entry.length > 0); + + if (!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length)) + return false; + + { + int i; + size_t field_name_length; + const FLAC__byte *eq = (FLAC__byte*)memchr(entry.entry, '=', entry.length); + + if (eq == NULL) + return false; /* double protection */ + + field_name_length = eq-entry.entry; + + i = vorbiscomment_find_entry_from_(object, 0, (const char *)entry.entry, field_name_length); + if (i >= 0) { + unsigned indx = (unsigned)i; + if (!FLAC__metadata_object_vorbiscomment_set_comment(object, indx, entry, copy)) + return false; + entry = object->data.vorbis_comment.comments[indx]; + indx++; /* skip over replaced comment */ + if (all && indx < object->data.vorbis_comment.num_comments) { + i = vorbiscomment_find_entry_from_(object, indx, (const char *)entry.entry, field_name_length); + while (i >= 0) { + indx = (unsigned)i; + if (!FLAC__metadata_object_vorbiscomment_delete_comment(object, indx)) + return false; + if (indx < object->data.vorbis_comment.num_comments) + i = vorbiscomment_find_entry_from_(object, indx, (const char *)entry.entry, field_name_length); + else + i = -1; + } + } + return true; + } + else + return FLAC__metadata_object_vorbiscomment_append_comment(object, entry, copy); + } +} + +FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_delete_comment(FLAC__StreamMetadata *object, unsigned comment_num) +{ + FLAC__StreamMetadata_VorbisComment *vc; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT); + FLAC__ASSERT(comment_num < object->data.vorbis_comment.num_comments); + + vc = &object->data.vorbis_comment; + + /* free the comment at comment_num */ + free(vc->comments[comment_num].entry); + + /* move all comments > comment_num backward one space */ + memmove(&vc->comments[comment_num], &vc->comments[comment_num+1], sizeof(FLAC__StreamMetadata_VorbisComment_Entry)*(vc->num_comments-comment_num-1)); + vc->comments[vc->num_comments-1].length = 0; + vc->comments[vc->num_comments-1].entry = 0; + + return FLAC__metadata_object_vorbiscomment_resize_comments(object, vc->num_comments-1); +} + +FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_entry_from_name_value_pair(FLAC__StreamMetadata_VorbisComment_Entry *entry, const char *field_name, const char *field_value) +{ + FLAC__ASSERT(entry != NULL); + FLAC__ASSERT(field_name != NULL); + FLAC__ASSERT(field_value != NULL); + + if (!FLAC__format_vorbiscomment_entry_name_is_legal(field_name)) + return false; + if (!FLAC__format_vorbiscomment_entry_value_is_legal((const FLAC__byte *)field_value, (unsigned)(-1))) + return false; + + { + const size_t nn = strlen(field_name); + const size_t nv = strlen(field_value); + entry->length = nn + 1 /*=*/ + nv; + if ((entry->entry = safe_malloc_add_4op_(nn, /*+*/1, /*+*/nv, /*+*/1)) == NULL) + return false; + memcpy(entry->entry, field_name, nn); + entry->entry[nn] = '='; + memcpy(entry->entry+nn+1, field_value, nv); + entry->entry[entry->length] = '\0'; + } + + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_entry_to_name_value_pair(const FLAC__StreamMetadata_VorbisComment_Entry entry, char **field_name, char **field_value) +{ + FLAC__ASSERT(entry.entry != NULL && entry.length > 0); + FLAC__ASSERT(field_name != NULL); + FLAC__ASSERT(field_value != NULL); + + if (!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length)) + return false; + + { + const FLAC__byte *eq = (FLAC__byte*)memchr(entry.entry, '=', entry.length); + const size_t nn = eq-entry.entry; + const size_t nv = entry.length-nn-1; /* -1 for the '=' */ + + if (eq == NULL) + return false; /* double protection */ + if ((*field_name = safe_malloc_add_2op_(nn, /*+*/1)) == NULL) + return false; + if ((*field_value = safe_malloc_add_2op_(nv, /*+*/1)) == NULL) { + free(*field_name); + return false; + } + memcpy(*field_name, entry.entry, nn); + memcpy(*field_value, entry.entry+nn+1, nv); + (*field_name)[nn] = '\0'; + (*field_value)[nv] = '\0'; + } + + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_entry_matches(const FLAC__StreamMetadata_VorbisComment_Entry entry, const char *field_name, unsigned field_name_length) +{ + FLAC__ASSERT(entry.entry != NULL && entry.length > 0); + { + const FLAC__byte *eq = (FLAC__byte*)memchr(entry.entry, '=', entry.length); + return (eq != NULL && (unsigned)(eq-entry.entry) == field_name_length && FLAC__STRNCASECMP(field_name, (const char *)entry.entry, field_name_length) == 0); + } +} + +FLAC_API int FLAC__metadata_object_vorbiscomment_find_entry_from(const FLAC__StreamMetadata *object, unsigned offset, const char *field_name) +{ + FLAC__ASSERT(field_name != NULL); + + return vorbiscomment_find_entry_from_(object, offset, field_name, strlen(field_name)); +} + +FLAC_API int FLAC__metadata_object_vorbiscomment_remove_entry_matching(FLAC__StreamMetadata *object, const char *field_name) +{ + const unsigned field_name_length = strlen(field_name); + unsigned i; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT); + + for (i = 0; i < object->data.vorbis_comment.num_comments; i++) { + if (FLAC__metadata_object_vorbiscomment_entry_matches(object->data.vorbis_comment.comments[i], field_name, field_name_length)) { + if (!FLAC__metadata_object_vorbiscomment_delete_comment(object, i)) + return -1; + else + return 1; + } + } + + return 0; +} + +FLAC_API int FLAC__metadata_object_vorbiscomment_remove_entries_matching(FLAC__StreamMetadata *object, const char *field_name) +{ + FLAC__bool ok = true; + unsigned matching = 0; + const unsigned field_name_length = strlen(field_name); + int i; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT); + + /* must delete from end to start otherwise it will interfere with our iteration */ + for (i = (int)object->data.vorbis_comment.num_comments - 1; ok && i >= 0; i--) { + if (FLAC__metadata_object_vorbiscomment_entry_matches(object->data.vorbis_comment.comments[i], field_name, field_name_length)) { + matching++; + ok &= FLAC__metadata_object_vorbiscomment_delete_comment(object, (unsigned)i); + } + } + + return ok? (int)matching : -1; +} + +FLAC_API FLAC__StreamMetadata_CueSheet_Track *FLAC__metadata_object_cuesheet_track_new(void) +{ + return calloc(1, sizeof(FLAC__StreamMetadata_CueSheet_Track)); +} + +FLAC_API FLAC__StreamMetadata_CueSheet_Track *FLAC__metadata_object_cuesheet_track_clone(const FLAC__StreamMetadata_CueSheet_Track *object) +{ + FLAC__StreamMetadata_CueSheet_Track *to; + + FLAC__ASSERT(object != NULL); + + if ((to = FLAC__metadata_object_cuesheet_track_new()) != NULL) { + if (!copy_track_(to, object)) { + FLAC__metadata_object_cuesheet_track_delete(to); + return 0; + } + } + + return to; +} + +void FLAC__metadata_object_cuesheet_track_delete_data(FLAC__StreamMetadata_CueSheet_Track *object) +{ + FLAC__ASSERT(object != NULL); + + if (object->indices != NULL) { + FLAC__ASSERT(object->num_indices > 0); + free(object->indices); + } +} + +FLAC_API void FLAC__metadata_object_cuesheet_track_delete(FLAC__StreamMetadata_CueSheet_Track *object) +{ + FLAC__metadata_object_cuesheet_track_delete_data(object); + free(object); +} + +FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_resize_indices(FLAC__StreamMetadata *object, unsigned track_num, unsigned new_num_indices) +{ + FLAC__StreamMetadata_CueSheet_Track *track; + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET); + FLAC__ASSERT(track_num < object->data.cue_sheet.num_tracks); + + track = &object->data.cue_sheet.tracks[track_num]; + + if (track->indices == NULL) { + FLAC__ASSERT(track->num_indices == 0); + if (new_num_indices == 0) + return true; + else if ((track->indices = cuesheet_track_index_array_new_(new_num_indices)) == NULL) + return false; + } + else { + const size_t old_size = track->num_indices * sizeof(FLAC__StreamMetadata_CueSheet_Index); + const size_t new_size = new_num_indices * sizeof(FLAC__StreamMetadata_CueSheet_Index); + + /* overflow check */ + if (new_num_indices > UINT32_MAX / sizeof(FLAC__StreamMetadata_CueSheet_Index)) + return false; + + FLAC__ASSERT(track->num_indices > 0); + + if (new_size == 0) { + free(track->indices); + track->indices = 0; + } + else if ((track->indices = safe_realloc_(track->indices, new_size)) == NULL) + return false; + + /* if growing, zero all the lengths/pointers of new elements */ + if (new_size > old_size) + memset(track->indices + track->num_indices, 0, new_size - old_size); + } + + track->num_indices = new_num_indices; + + cuesheet_calculate_length_(object); + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_insert_index(FLAC__StreamMetadata *object, unsigned track_num, unsigned index_num, FLAC__StreamMetadata_CueSheet_Index indx) +{ + FLAC__StreamMetadata_CueSheet_Track *track; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET); + FLAC__ASSERT(track_num < object->data.cue_sheet.num_tracks); + FLAC__ASSERT(index_num <= object->data.cue_sheet.tracks[track_num].num_indices); + + track = &object->data.cue_sheet.tracks[track_num]; + + if (!FLAC__metadata_object_cuesheet_track_resize_indices(object, track_num, track->num_indices+1)) + return false; + + /* move all indices >= index_num forward one space */ + memmove(&track->indices[index_num+1], &track->indices[index_num], sizeof(FLAC__StreamMetadata_CueSheet_Index)*(track->num_indices-1-index_num)); + + track->indices[index_num] = indx; + cuesheet_calculate_length_(object); + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_insert_blank_index(FLAC__StreamMetadata *object, unsigned track_num, unsigned index_num) +{ + FLAC__StreamMetadata_CueSheet_Index indx; + memset(&indx, 0, sizeof(indx)); + return FLAC__metadata_object_cuesheet_track_insert_index(object, track_num, index_num, indx); +} + +FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_delete_index(FLAC__StreamMetadata *object, unsigned track_num, unsigned index_num) +{ + FLAC__StreamMetadata_CueSheet_Track *track; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET); + FLAC__ASSERT(track_num < object->data.cue_sheet.num_tracks); + FLAC__ASSERT(index_num < object->data.cue_sheet.tracks[track_num].num_indices); + + track = &object->data.cue_sheet.tracks[track_num]; + + /* move all indices > index_num backward one space */ + memmove(&track->indices[index_num], &track->indices[index_num+1], sizeof(FLAC__StreamMetadata_CueSheet_Index)*(track->num_indices-index_num-1)); + + FLAC__metadata_object_cuesheet_track_resize_indices(object, track_num, track->num_indices-1); + cuesheet_calculate_length_(object); + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_resize_tracks(FLAC__StreamMetadata *object, unsigned new_num_tracks) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET); + + if (object->data.cue_sheet.tracks == NULL) { + FLAC__ASSERT(object->data.cue_sheet.num_tracks == 0); + if (new_num_tracks == 0) + return true; + else if ((object->data.cue_sheet.tracks = cuesheet_track_array_new_(new_num_tracks)) == NULL) + return false; + } + else { + const size_t old_size = object->data.cue_sheet.num_tracks * sizeof(FLAC__StreamMetadata_CueSheet_Track); + const size_t new_size = new_num_tracks * sizeof(FLAC__StreamMetadata_CueSheet_Track); + + /* overflow check */ + if (new_num_tracks > UINT32_MAX / sizeof(FLAC__StreamMetadata_CueSheet_Track)) + return false; + + FLAC__ASSERT(object->data.cue_sheet.num_tracks > 0); + + /* if shrinking, free the truncated entries */ + if (new_num_tracks < object->data.cue_sheet.num_tracks) { + unsigned i; + for (i = new_num_tracks; i < object->data.cue_sheet.num_tracks; i++) + free(object->data.cue_sheet.tracks[i].indices); + } + + if (new_size == 0) { + free(object->data.cue_sheet.tracks); + object->data.cue_sheet.tracks = 0; + } + else if ((object->data.cue_sheet.tracks = safe_realloc_(object->data.cue_sheet.tracks, new_size)) == NULL) + return false; + + /* if growing, zero all the lengths/pointers of new elements */ + if (new_size > old_size) + memset(object->data.cue_sheet.tracks + object->data.cue_sheet.num_tracks, 0, new_size - old_size); + } + + object->data.cue_sheet.num_tracks = new_num_tracks; + + cuesheet_calculate_length_(object); + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_set_track(FLAC__StreamMetadata *object, unsigned track_num, FLAC__StreamMetadata_CueSheet_Track *track, FLAC__bool copy) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(track_num < object->data.cue_sheet.num_tracks); + + return cuesheet_set_track_(object, object->data.cue_sheet.tracks + track_num, track, copy); +} + +FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_insert_track(FLAC__StreamMetadata *object, unsigned track_num, FLAC__StreamMetadata_CueSheet_Track *track, FLAC__bool copy) +{ + FLAC__StreamMetadata_CueSheet *cs; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET); + FLAC__ASSERT(track_num <= object->data.cue_sheet.num_tracks); + + cs = &object->data.cue_sheet; + + if (!FLAC__metadata_object_cuesheet_resize_tracks(object, cs->num_tracks+1)) + return false; + + /* move all tracks >= track_num forward one space */ + memmove(&cs->tracks[track_num+1], &cs->tracks[track_num], sizeof(FLAC__StreamMetadata_CueSheet_Track)*(cs->num_tracks-1-track_num)); + cs->tracks[track_num].num_indices = 0; + cs->tracks[track_num].indices = 0; + + return FLAC__metadata_object_cuesheet_set_track(object, track_num, track, copy); +} + +FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_insert_blank_track(FLAC__StreamMetadata *object, unsigned track_num) +{ + FLAC__StreamMetadata_CueSheet_Track track; + memset(&track, 0, sizeof(track)); + return FLAC__metadata_object_cuesheet_insert_track(object, track_num, &track, /*copy=*/false); +} + +FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_delete_track(FLAC__StreamMetadata *object, unsigned track_num) +{ + FLAC__StreamMetadata_CueSheet *cs; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET); + FLAC__ASSERT(track_num < object->data.cue_sheet.num_tracks); + + cs = &object->data.cue_sheet; + + /* free the track at track_num */ + free(cs->tracks[track_num].indices); + + /* move all tracks > track_num backward one space */ + memmove(&cs->tracks[track_num], &cs->tracks[track_num+1], sizeof(FLAC__StreamMetadata_CueSheet_Track)*(cs->num_tracks-track_num-1)); + cs->tracks[cs->num_tracks-1].num_indices = 0; + cs->tracks[cs->num_tracks-1].indices = 0; + + return FLAC__metadata_object_cuesheet_resize_tracks(object, cs->num_tracks-1); +} + +FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_is_legal(const FLAC__StreamMetadata *object, FLAC__bool check_cd_da_subset, const char **violation) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET); + + return FLAC__format_cuesheet_is_legal(&object->data.cue_sheet, check_cd_da_subset, violation); +} + +static FLAC__uint64 get_index_01_offset_(const FLAC__StreamMetadata_CueSheet *cs, unsigned track) +{ + if (track >= (cs->num_tracks-1) || cs->tracks[track].num_indices < 1) + return 0; + else if (cs->tracks[track].indices[0].number == 1) + return cs->tracks[track].indices[0].offset + cs->tracks[track].offset + cs->lead_in; + else if (cs->tracks[track].num_indices < 2) + return 0; + else if (cs->tracks[track].indices[1].number == 1) + return cs->tracks[track].indices[1].offset + cs->tracks[track].offset + cs->lead_in; + else + return 0; +} + +static FLAC__uint32 cddb_add_digits_(FLAC__uint32 x) +{ + FLAC__uint32 n = 0; + while (x) { + n += (x%10); + x /= 10; + } + return n; +} + +/*@@@@add to tests*/ +FLAC_API FLAC__uint32 FLAC__metadata_object_cuesheet_calculate_cddb_id(const FLAC__StreamMetadata *object) +{ + const FLAC__StreamMetadata_CueSheet *cs; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET); + + cs = &object->data.cue_sheet; + + if (cs->num_tracks < 2) /* need at least one real track and the lead-out track */ + return 0; + + { + FLAC__uint32 i, length, sum = 0; + for (i = 0; i < (cs->num_tracks-1); i++) /* -1 to avoid counting the lead-out */ + sum += cddb_add_digits_((FLAC__uint32)(get_index_01_offset_(cs, i) / 44100)); + length = (FLAC__uint32)((cs->tracks[cs->num_tracks-1].offset+cs->lead_in) / 44100) - (FLAC__uint32)(get_index_01_offset_(cs, 0) / 44100); + + return (sum % 0xFF) << 24 | length << 8 | (FLAC__uint32)(cs->num_tracks-1); + } +} + +FLAC_API FLAC__bool FLAC__metadata_object_picture_set_mime_type(FLAC__StreamMetadata *object, char *mime_type, FLAC__bool copy) +{ + char *old; + size_t old_length, new_length; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_PICTURE); + FLAC__ASSERT(mime_type != NULL); + + old = object->data.picture.mime_type; + old_length = old? strlen(old) : 0; + new_length = strlen(mime_type); + + /* do the copy first so that if we fail we leave the object untouched */ + if (copy) { + if (new_length >= SIZE_MAX) /* overflow check */ + return false; + if (!copy_bytes_((FLAC__byte**)(&object->data.picture.mime_type), (FLAC__byte*)mime_type, new_length+1)) + return false; + } + else { + object->data.picture.mime_type = mime_type; + } + + free(old); + + object->length -= old_length; + object->length += new_length; + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_picture_set_description(FLAC__StreamMetadata *object, FLAC__byte *description, FLAC__bool copy) +{ + FLAC__byte *old; + size_t old_length, new_length; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_PICTURE); + FLAC__ASSERT(description != NULL); + + old = object->data.picture.description; + old_length = old? strlen((const char *)old) : 0; + new_length = strlen((const char *)description); + + /* do the copy first so that if we fail we leave the object untouched */ + if (copy) { + if (new_length >= SIZE_MAX) /* overflow check */ + return false; + if (!copy_bytes_(&object->data.picture.description, description, new_length+1)) + return false; + } + else { + object->data.picture.description = description; + } + + free(old); + + object->length -= old_length; + object->length += new_length; + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_picture_set_data(FLAC__StreamMetadata *object, FLAC__byte *data, FLAC__uint32 length, FLAC__bool copy) +{ + FLAC__byte *old; + + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_PICTURE); + FLAC__ASSERT((data != NULL && length > 0) || (data == NULL && length == 0 && copy == false)); + + old = object->data.picture.data; + + /* do the copy first so that if we fail we leave the object untouched */ + if (copy) { + if (!copy_bytes_(&object->data.picture.data, data, length)) + return false; + } + else { + object->data.picture.data = data; + } + + free(old); + + object->length -= object->data.picture.data_length; + object->data.picture.data_length = length; + object->length += length; + return true; +} + +FLAC_API FLAC__bool FLAC__metadata_object_picture_is_legal(const FLAC__StreamMetadata *object, const char **violation) +{ + FLAC__ASSERT(object != NULL); + FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_PICTURE); + + return FLAC__format_picture_is_legal(&object->data.picture, violation); +} diff --git a/libFLAC/ogg_decoder_aspect.c b/libFLAC/ogg_decoder_aspect.c new file mode 100644 index 00000000..40cee19c --- /dev/null +++ b/libFLAC/ogg_decoder_aspect.c @@ -0,0 +1,251 @@ +/* libFLAC - Free Lossless Audio Codec + * Copyright (C) 2002-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <string.h> /* for memcpy() */ +#include "FLAC/assert.h" +#include "private/ogg_decoder_aspect.h" +#include "private/ogg_mapping.h" +#include "private/macros.h" + + +/*********************************************************************** + * + * Public class methods + * + ***********************************************************************/ + +FLAC__bool FLAC__ogg_decoder_aspect_init(FLAC__OggDecoderAspect *aspect) +{ + /* we will determine the serial number later if necessary */ + if(ogg_stream_init(&aspect->stream_state, aspect->serial_number) != 0) + return false; + + if(ogg_sync_init(&aspect->sync_state) != 0) + return false; + + aspect->version_major = ~(0u); + aspect->version_minor = ~(0u); + + aspect->need_serial_number = aspect->use_first_serial_number; + + aspect->end_of_stream = false; + aspect->have_working_page = false; + + return true; +} + +void FLAC__ogg_decoder_aspect_finish(FLAC__OggDecoderAspect *aspect) +{ + (void)ogg_sync_clear(&aspect->sync_state); + (void)ogg_stream_clear(&aspect->stream_state); +} + +void FLAC__ogg_decoder_aspect_set_serial_number(FLAC__OggDecoderAspect *aspect, long value) +{ + aspect->use_first_serial_number = false; + aspect->serial_number = value; +} + +void FLAC__ogg_decoder_aspect_set_defaults(FLAC__OggDecoderAspect *aspect) +{ + aspect->use_first_serial_number = true; +} + +void FLAC__ogg_decoder_aspect_flush(FLAC__OggDecoderAspect *aspect) +{ + (void)ogg_stream_reset(&aspect->stream_state); + (void)ogg_sync_reset(&aspect->sync_state); + aspect->end_of_stream = false; + aspect->have_working_page = false; +} + +void FLAC__ogg_decoder_aspect_reset(FLAC__OggDecoderAspect *aspect) +{ + FLAC__ogg_decoder_aspect_flush(aspect); + + if(aspect->use_first_serial_number) + aspect->need_serial_number = true; +} + +FLAC__OggDecoderAspectReadStatus FLAC__ogg_decoder_aspect_read_callback_wrapper(FLAC__OggDecoderAspect *aspect, FLAC__byte buffer[], size_t *bytes, FLAC__OggDecoderAspectReadCallbackProxy read_callback, const FLAC__StreamDecoder *decoder, void *client_data) +{ + static const size_t OGG_BYTES_CHUNK = 8192; + const size_t bytes_requested = *bytes; + + /* + * The FLAC decoding API uses pull-based reads, whereas Ogg decoding + * is push-based. In libFLAC, when you ask to decode a frame, the + * decoder will eventually call the read callback to supply some data, + * but how much it asks for depends on how much free space it has in + * its internal buffer. It does not try to grow its internal buffer + * to accomodate a whole frame because then the internal buffer size + * could not be limited, which is necessary in embedded applications. + * + * Ogg however grows its internal buffer until a whole page is present; + * only then can you get decoded data out. So we can't just ask for + * the same number of bytes from Ogg, then pass what's decoded down to + * libFLAC. If what libFLAC is asking for will not contain a whole + * page, then we will get no data from ogg_sync_pageout(), and at the + * same time cannot just read more data from the client for the purpose + * of getting a whole decoded page because the decoded size might be + * larger than libFLAC's internal buffer. + * + * Instead, whenever this read callback wrapper is called, we will + * continually request data from the client until we have at least one + * page, and manage pages internally so that we can send pieces of + * pages down to libFLAC in such a way that we obey its size + * requirement. To limit the amount of callbacks, we will always try + * to read in enough pages to return the full number of bytes + * requested. + */ + *bytes = 0; + while (*bytes < bytes_requested && !aspect->end_of_stream) { + if (aspect->have_working_page) { + if (aspect->have_working_packet) { + size_t n = bytes_requested - *bytes; + if ((size_t)aspect->working_packet.bytes <= n) { + /* the rest of the packet will fit in the buffer */ + n = aspect->working_packet.bytes; + memcpy(buffer, aspect->working_packet.packet, n); + *bytes += n; + buffer += n; + aspect->have_working_packet = false; + } + else { + /* only n bytes of the packet will fit in the buffer */ + memcpy(buffer, aspect->working_packet.packet, n); + *bytes += n; + buffer += n; + aspect->working_packet.packet += n; + aspect->working_packet.bytes -= n; + } + } + else { + /* try and get another packet */ + const int ret = ogg_stream_packetout(&aspect->stream_state, &aspect->working_packet); + if (ret > 0) { + aspect->have_working_packet = true; + /* if it is the first header packet, check for magic and a supported Ogg FLAC mapping version */ + if (aspect->working_packet.bytes > 0 && aspect->working_packet.packet[0] == FLAC__OGG_MAPPING_FIRST_HEADER_PACKET_TYPE) { + const FLAC__byte *b = aspect->working_packet.packet; + const unsigned header_length = + FLAC__OGG_MAPPING_PACKET_TYPE_LENGTH + + FLAC__OGG_MAPPING_MAGIC_LENGTH + + FLAC__OGG_MAPPING_VERSION_MAJOR_LENGTH + + FLAC__OGG_MAPPING_VERSION_MINOR_LENGTH + + FLAC__OGG_MAPPING_NUM_HEADERS_LENGTH; + if (aspect->working_packet.bytes < (long)header_length) + return FLAC__OGG_DECODER_ASPECT_READ_STATUS_NOT_FLAC; + b += FLAC__OGG_MAPPING_PACKET_TYPE_LENGTH; + if (memcmp(b, FLAC__OGG_MAPPING_MAGIC, FLAC__OGG_MAPPING_MAGIC_LENGTH)) + return FLAC__OGG_DECODER_ASPECT_READ_STATUS_NOT_FLAC; + b += FLAC__OGG_MAPPING_MAGIC_LENGTH; + aspect->version_major = (unsigned)(*b); + b += FLAC__OGG_MAPPING_VERSION_MAJOR_LENGTH; + aspect->version_minor = (unsigned)(*b); + if (aspect->version_major != 1) + return FLAC__OGG_DECODER_ASPECT_READ_STATUS_UNSUPPORTED_MAPPING_VERSION; + aspect->working_packet.packet += header_length; + aspect->working_packet.bytes -= header_length; + } + } + else if (ret == 0) { + aspect->have_working_page = false; + } + else { /* ret < 0 */ + /* lost sync, we'll leave the working page for the next call */ + return FLAC__OGG_DECODER_ASPECT_READ_STATUS_LOST_SYNC; + } + } + } + else { + /* try and get another page */ + const int ret = ogg_sync_pageout(&aspect->sync_state, &aspect->working_page); + if (ret > 0) { + /* got a page, grab the serial number if necessary */ + if(aspect->need_serial_number) { + aspect->stream_state.serialno = aspect->serial_number = ogg_page_serialno(&aspect->working_page); + aspect->need_serial_number = false; + } + if(ogg_stream_pagein(&aspect->stream_state, &aspect->working_page) == 0) { + aspect->have_working_page = true; + aspect->have_working_packet = false; + } + /* else do nothing, could be a page from another stream */ + } + else if (ret == 0) { + /* need more data */ + const size_t ogg_bytes_to_read = flac_max(bytes_requested - *bytes, OGG_BYTES_CHUNK); + char *oggbuf = ogg_sync_buffer(&aspect->sync_state, ogg_bytes_to_read); + + if(0 == oggbuf) { + return FLAC__OGG_DECODER_ASPECT_READ_STATUS_MEMORY_ALLOCATION_ERROR; + } + else { + size_t ogg_bytes_read = ogg_bytes_to_read; + + switch(read_callback(decoder, (FLAC__byte*)oggbuf, &ogg_bytes_read, client_data)) { + case FLAC__OGG_DECODER_ASPECT_READ_STATUS_OK: + break; + case FLAC__OGG_DECODER_ASPECT_READ_STATUS_END_OF_STREAM: + aspect->end_of_stream = true; + break; + case FLAC__OGG_DECODER_ASPECT_READ_STATUS_ABORT: + return FLAC__OGG_DECODER_ASPECT_READ_STATUS_ABORT; + default: + FLAC__ASSERT(0); + } + + if(ogg_sync_wrote(&aspect->sync_state, ogg_bytes_read) < 0) { + /* double protection; this will happen if the read callback returns more bytes than the max requested, which would overflow Ogg's internal buffer */ + FLAC__ASSERT(0); + return FLAC__OGG_DECODER_ASPECT_READ_STATUS_ERROR; + } + } + } + else { /* ret < 0 */ + /* lost sync, bail out */ + return FLAC__OGG_DECODER_ASPECT_READ_STATUS_LOST_SYNC; + } + } + } + + if (aspect->end_of_stream && *bytes == 0) { + return FLAC__OGG_DECODER_ASPECT_READ_STATUS_END_OF_STREAM; + } + + return FLAC__OGG_DECODER_ASPECT_READ_STATUS_OK; +} diff --git a/libFLAC/ogg_encoder_aspect.c b/libFLAC/ogg_encoder_aspect.c new file mode 100644 index 00000000..ebd4614b --- /dev/null +++ b/libFLAC/ogg_encoder_aspect.c @@ -0,0 +1,228 @@ +/* libFLAC - Free Lossless Audio Codec + * Copyright (C) 2002-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <string.h> /* for memset() */ +#include "FLAC/assert.h" +#include "private/ogg_encoder_aspect.h" +#include "private/ogg_mapping.h" + +static const FLAC__byte FLAC__OGG_MAPPING_VERSION_MAJOR = 1; +static const FLAC__byte FLAC__OGG_MAPPING_VERSION_MINOR = 0; + +/*********************************************************************** + * + * Public class methods + * + ***********************************************************************/ + +FLAC__bool FLAC__ogg_encoder_aspect_init(FLAC__OggEncoderAspect *aspect) +{ + /* we will determine the serial number later if necessary */ + if(ogg_stream_init(&aspect->stream_state, aspect->serial_number) != 0) + return false; + + aspect->seen_magic = false; + aspect->is_first_packet = true; + aspect->samples_written = 0; + + return true; +} + +void FLAC__ogg_encoder_aspect_finish(FLAC__OggEncoderAspect *aspect) +{ + (void)ogg_stream_clear(&aspect->stream_state); + /*@@@ what about the page? */ +} + +void FLAC__ogg_encoder_aspect_set_serial_number(FLAC__OggEncoderAspect *aspect, long value) +{ + aspect->serial_number = value; +} + +FLAC__bool FLAC__ogg_encoder_aspect_set_num_metadata(FLAC__OggEncoderAspect *aspect, unsigned value) +{ + if(value < (1u << FLAC__OGG_MAPPING_NUM_HEADERS_LEN)) { + aspect->num_metadata = value; + return true; + } + else + return false; +} + +void FLAC__ogg_encoder_aspect_set_defaults(FLAC__OggEncoderAspect *aspect) +{ + aspect->serial_number = 0; + aspect->num_metadata = 0; +} + +/* + * The basic FLAC -> Ogg mapping goes like this: + * + * - 'fLaC' magic and STREAMINFO block get combined into the first + * packet. The packet is prefixed with + * + the one-byte packet type 0x7F + * + 'FLAC' magic + * + the 2 byte Ogg FLAC mapping version number + * + tne 2 byte big-endian # of header packets + * - The first packet is flushed to the first page. + * - Each subsequent metadata block goes into its own packet. + * - Each metadata packet is flushed to page (this is not required, + * the mapping only requires that a flush must occur after all + * metadata is written). + * - Each subsequent FLAC audio frame goes into its own packet. + * + * WATCHOUT: + * This depends on the behavior of FLAC__StreamEncoder that we get a + * separate write callback for the fLaC magic, and then separate write + * callbacks for each metadata block and audio frame. + */ +FLAC__StreamEncoderWriteStatus FLAC__ogg_encoder_aspect_write_callback_wrapper(FLAC__OggEncoderAspect *aspect, const FLAC__byte buffer[], size_t bytes, unsigned samples, unsigned current_frame, FLAC__bool is_last_block, FLAC__OggEncoderAspectWriteCallbackProxy write_callback, void *encoder, void *client_data) +{ + /* WATCHOUT: + * This depends on the behavior of FLAC__StreamEncoder that 'samples' + * will be 0 for metadata writes. + */ + const FLAC__bool is_metadata = (samples == 0); + + /* + * Treat fLaC magic packet specially. We will note when we see it, then + * wait until we get the STREAMINFO and prepend it in that packet + */ + if(aspect->seen_magic) { + ogg_packet packet; + FLAC__byte synthetic_first_packet_body[ + FLAC__OGG_MAPPING_PACKET_TYPE_LENGTH + + FLAC__OGG_MAPPING_MAGIC_LENGTH + + FLAC__OGG_MAPPING_VERSION_MAJOR_LENGTH + + FLAC__OGG_MAPPING_VERSION_MINOR_LENGTH + + FLAC__OGG_MAPPING_NUM_HEADERS_LENGTH + + FLAC__STREAM_SYNC_LENGTH + + FLAC__STREAM_METADATA_HEADER_LENGTH + + FLAC__STREAM_METADATA_STREAMINFO_LENGTH + ]; + + memset(&packet, 0, sizeof(packet)); + packet.granulepos = aspect->samples_written + samples; + + if(aspect->is_first_packet) { + FLAC__byte *b = synthetic_first_packet_body; + if(bytes != FLAC__STREAM_METADATA_HEADER_LENGTH + FLAC__STREAM_METADATA_STREAMINFO_LENGTH) { + /* + * If we get here, our assumption about the way write callbacks happen + * (explained above) is wrong + */ + FLAC__ASSERT(0); + return FLAC__STREAM_ENCODER_WRITE_STATUS_FATAL_ERROR; + } + /* add first header packet type */ + *b = FLAC__OGG_MAPPING_FIRST_HEADER_PACKET_TYPE; + b += FLAC__OGG_MAPPING_PACKET_TYPE_LENGTH; + /* add 'FLAC' mapping magic */ + memcpy(b, FLAC__OGG_MAPPING_MAGIC, FLAC__OGG_MAPPING_MAGIC_LENGTH); + b += FLAC__OGG_MAPPING_MAGIC_LENGTH; + /* add Ogg FLAC mapping major version number */ + memcpy(b, &FLAC__OGG_MAPPING_VERSION_MAJOR, FLAC__OGG_MAPPING_VERSION_MAJOR_LENGTH); + b += FLAC__OGG_MAPPING_VERSION_MAJOR_LENGTH; + /* add Ogg FLAC mapping minor version number */ + memcpy(b, &FLAC__OGG_MAPPING_VERSION_MINOR, FLAC__OGG_MAPPING_VERSION_MINOR_LENGTH); + b += FLAC__OGG_MAPPING_VERSION_MINOR_LENGTH; + /* add number of header packets */ + *b = (FLAC__byte)(aspect->num_metadata >> 8); + b++; + *b = (FLAC__byte)(aspect->num_metadata); + b++; + /* add native FLAC 'fLaC' magic */ + memcpy(b, FLAC__STREAM_SYNC_STRING, FLAC__STREAM_SYNC_LENGTH); + b += FLAC__STREAM_SYNC_LENGTH; + /* add STREAMINFO */ + memcpy(b, buffer, bytes); + FLAC__ASSERT(b + bytes - synthetic_first_packet_body == sizeof(synthetic_first_packet_body)); + packet.packet = (unsigned char *)synthetic_first_packet_body; + packet.bytes = sizeof(synthetic_first_packet_body); + + packet.b_o_s = 1; + aspect->is_first_packet = false; + } + else { + packet.packet = (unsigned char *)buffer; + packet.bytes = bytes; + } + + if(is_last_block) { + /* we used to check: + * FLAC__ASSERT(total_samples_estimate == 0 || total_samples_estimate == aspect->samples_written + samples); + * but it's really not useful since total_samples_estimate is an estimate and can be inexact + */ + packet.e_o_s = 1; + } + + if(ogg_stream_packetin(&aspect->stream_state, &packet) != 0) + return FLAC__STREAM_ENCODER_WRITE_STATUS_FATAL_ERROR; + + /*@@@ can't figure out a way to pass a useful number for 'samples' to the write_callback, so we'll just pass 0 */ + if(is_metadata) { + while(ogg_stream_flush(&aspect->stream_state, &aspect->page) != 0) { + if(write_callback(encoder, aspect->page.header, aspect->page.header_len, 0, current_frame, client_data) != FLAC__STREAM_ENCODER_WRITE_STATUS_OK) + return FLAC__STREAM_ENCODER_WRITE_STATUS_FATAL_ERROR; + if(write_callback(encoder, aspect->page.body, aspect->page.body_len, 0, current_frame, client_data) != FLAC__STREAM_ENCODER_WRITE_STATUS_OK) + return FLAC__STREAM_ENCODER_WRITE_STATUS_FATAL_ERROR; + } + } + else { + while(ogg_stream_pageout(&aspect->stream_state, &aspect->page) != 0) { + if(write_callback(encoder, aspect->page.header, aspect->page.header_len, 0, current_frame, client_data) != FLAC__STREAM_ENCODER_WRITE_STATUS_OK) + return FLAC__STREAM_ENCODER_WRITE_STATUS_FATAL_ERROR; + if(write_callback(encoder, aspect->page.body, aspect->page.body_len, 0, current_frame, client_data) != FLAC__STREAM_ENCODER_WRITE_STATUS_OK) + return FLAC__STREAM_ENCODER_WRITE_STATUS_FATAL_ERROR; + } + } + } + else if(is_metadata && current_frame == 0 && samples == 0 && bytes == 4 && 0 == memcmp(buffer, FLAC__STREAM_SYNC_STRING, sizeof(FLAC__STREAM_SYNC_STRING))) { + aspect->seen_magic = true; + } + else { + /* + * If we get here, our assumption about the way write callbacks happen + * explained above is wrong + */ + FLAC__ASSERT(0); + return FLAC__STREAM_ENCODER_WRITE_STATUS_FATAL_ERROR; + } + + aspect->samples_written += samples; + + return FLAC__STREAM_ENCODER_WRITE_STATUS_OK; +} diff --git a/libFLAC/ogg_helper.c b/libFLAC/ogg_helper.c new file mode 100644 index 00000000..7ca9160b --- /dev/null +++ b/libFLAC/ogg_helper.c @@ -0,0 +1,210 @@ +/* libFLAC - Free Lossless Audio Codec + * Copyright (C) 2004-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> /* for malloc() */ +#include <string.h> /* for memcmp(), memcpy() */ +#include "FLAC/assert.h" +#include "share/alloc.h" +#include "private/ogg_helper.h" +#include "protected/stream_encoder.h" + + +static FLAC__bool full_read_(FLAC__StreamEncoder *encoder, FLAC__byte *buffer, size_t bytes, FLAC__StreamEncoderReadCallback read_callback, void *client_data) +{ + while(bytes > 0) { + size_t bytes_read = bytes; + switch(read_callback(encoder, buffer, &bytes_read, client_data)) { + case FLAC__STREAM_ENCODER_READ_STATUS_CONTINUE: + bytes -= bytes_read; + buffer += bytes_read; + break; + case FLAC__STREAM_ENCODER_READ_STATUS_END_OF_STREAM: + if(bytes_read == 0) { + encoder->protected_->state = FLAC__STREAM_ENCODER_OGG_ERROR; + return false; + } + bytes -= bytes_read; + buffer += bytes_read; + break; + case FLAC__STREAM_ENCODER_READ_STATUS_ABORT: + encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR; + return false; + case FLAC__STREAM_ENCODER_READ_STATUS_UNSUPPORTED: + return false; + default: + /* double protection: */ + FLAC__ASSERT(0); + encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR; + return false; + } + } + + return true; +} + +void simple_ogg_page__init(ogg_page *page) +{ + page->header = 0; + page->header_len = 0; + page->body = 0; + page->body_len = 0; +} + +void simple_ogg_page__clear(ogg_page *page) +{ + if(page->header) + free(page->header); + if(page->body) + free(page->body); + simple_ogg_page__init(page); +} + +FLAC__bool simple_ogg_page__get_at(FLAC__StreamEncoder *encoder, FLAC__uint64 position, ogg_page *page, FLAC__StreamEncoderSeekCallback seek_callback, FLAC__StreamEncoderReadCallback read_callback, void *client_data) +{ + static const unsigned OGG_HEADER_FIXED_PORTION_LEN = 27; + static const unsigned OGG_MAX_HEADER_LEN = 27/*OGG_HEADER_FIXED_PORTION_LEN*/ + 255; + FLAC__byte crc[4]; + FLAC__StreamEncoderSeekStatus seek_status; + + FLAC__ASSERT(page->header == 0); + FLAC__ASSERT(page->header_len == 0); + FLAC__ASSERT(page->body == 0); + FLAC__ASSERT(page->body_len == 0); + + /* move the stream pointer to the supposed beginning of the page */ + if(0 == seek_callback) + return false; + if((seek_status = seek_callback((FLAC__StreamEncoder*)encoder, position, client_data)) != FLAC__STREAM_ENCODER_SEEK_STATUS_OK) { + if(seek_status == FLAC__STREAM_ENCODER_SEEK_STATUS_ERROR) + encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR; + return false; + } + + /* allocate space for the page header */ + if(0 == (page->header = safe_malloc_(OGG_MAX_HEADER_LEN))) { + encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR; + return false; + } + + /* read in the fixed part of the page header (up to but not including + * the segment table */ + if(!full_read_(encoder, page->header, OGG_HEADER_FIXED_PORTION_LEN, read_callback, client_data)) + return false; + + page->header_len = OGG_HEADER_FIXED_PORTION_LEN + page->header[26]; + + /* check to see if it's a correct, "simple" page (one packet only) */ + if( + memcmp(page->header, "OggS", 4) || /* doesn't start with OggS */ + (page->header[5] & 0x01) || /* continued packet */ + memcmp(page->header+6, "\0\0\0\0\0\0\0\0", 8) || /* granulepos is non-zero */ + page->header[26] == 0 /* packet is 0-size */ + ) { + encoder->protected_->state = FLAC__STREAM_ENCODER_OGG_ERROR; + return false; + } + + /* read in the segment table */ + if(!full_read_(encoder, page->header + OGG_HEADER_FIXED_PORTION_LEN, page->header[26], read_callback, client_data)) + return false; + + { + unsigned i; + + /* check to see that it specifies a single packet */ + for(i = 0; i < (unsigned)page->header[26] - 1; i++) { + if(page->header[i + OGG_HEADER_FIXED_PORTION_LEN] != 255) { + encoder->protected_->state = FLAC__STREAM_ENCODER_OGG_ERROR; + return false; + } + } + + page->body_len = 255 * i + page->header[i + OGG_HEADER_FIXED_PORTION_LEN]; + } + + /* allocate space for the page body */ + if(0 == (page->body = safe_malloc_(page->body_len))) { + encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR; + return false; + } + + /* read in the page body */ + if(!full_read_(encoder, page->body, page->body_len, read_callback, client_data)) + return false; + + /* check the CRC */ + memcpy(crc, page->header+22, 4); + ogg_page_checksum_set(page); + if(memcmp(crc, page->header+22, 4)) { + encoder->protected_->state = FLAC__STREAM_ENCODER_OGG_ERROR; + return false; + } + + return true; +} + +FLAC__bool simple_ogg_page__set_at(FLAC__StreamEncoder *encoder, FLAC__uint64 position, ogg_page *page, FLAC__StreamEncoderSeekCallback seek_callback, FLAC__StreamEncoderWriteCallback write_callback, void *client_data) +{ + FLAC__StreamEncoderSeekStatus seek_status; + + FLAC__ASSERT(page->header != 0); + FLAC__ASSERT(page->header_len != 0); + FLAC__ASSERT(page->body != 0); + FLAC__ASSERT(page->body_len != 0); + + /* move the stream pointer to the supposed beginning of the page */ + if(0 == seek_callback) + return false; + if((seek_status = seek_callback((FLAC__StreamEncoder*)encoder, position, client_data)) != FLAC__STREAM_ENCODER_SEEK_STATUS_OK) { + if(seek_status == FLAC__STREAM_ENCODER_SEEK_STATUS_ERROR) + encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR; + return false; + } + + ogg_page_checksum_set(page); + + /* re-write the page */ + if(write_callback((FLAC__StreamEncoder*)encoder, page->header, page->header_len, 0, 0, client_data) != FLAC__STREAM_ENCODER_WRITE_STATUS_OK) { + encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR; + return false; + } + if(write_callback((FLAC__StreamEncoder*)encoder, page->body, page->body_len, 0, 0, client_data) != FLAC__STREAM_ENCODER_WRITE_STATUS_OK) { + encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR; + return false; + } + + return true; +} diff --git a/libFLAC/ogg_mapping.c b/libFLAC/ogg_mapping.c new file mode 100644 index 00000000..08fa5146 --- /dev/null +++ b/libFLAC/ogg_mapping.c @@ -0,0 +1,48 @@ +/* libFLAC - Free Lossless Audio Codec + * Copyright (C) 2004-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "private/ogg_mapping.h" + +const unsigned FLAC__OGG_MAPPING_PACKET_TYPE_LEN = 8; /* bits */ + +const FLAC__byte FLAC__OGG_MAPPING_FIRST_HEADER_PACKET_TYPE = 0x7f; + +const FLAC__byte * const FLAC__OGG_MAPPING_MAGIC = (const FLAC__byte * const)"FLAC"; + +const unsigned FLAC__OGG_MAPPING_VERSION_MAJOR_LEN = 8; /* bits */ +const unsigned FLAC__OGG_MAPPING_VERSION_MINOR_LEN = 8; /* bits */ + +const unsigned FLAC__OGG_MAPPING_NUM_HEADERS_LEN = 16; /* bits */ diff --git a/libFLAC/stream_decoder.c b/libFLAC/stream_decoder.c index 27bac3e3..d364b0ce 100644 --- a/libFLAC/stream_decoder.c +++ b/libFLAC/stream_decoder.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -56,13 +56,7 @@ /* technically this should be in an "export.c" but this is convenient enough */ -FLAC_API int FLAC_API_SUPPORTS_OGG_FLAC = -#if FLAC__HAS_OGG - 1 -#else - 0 -#endif -; +FLAC_API int FLAC_API_SUPPORTS_OGG_FLAC = FLAC__HAS_OGG; /*********************************************************************** @@ -125,9 +119,7 @@ static FLAC__bool file_eof_callback_(const FLAC__StreamDecoder *decoder, void *c ***********************************************************************/ typedef struct FLAC__StreamDecoderPrivate { -#if FLAC__HAS_OGG FLAC__bool is_ogg; -#endif FLAC__StreamDecoderReadCallback read_callback; FLAC__StreamDecoderSeekCallback seek_callback; FLAC__StreamDecoderTellCallback tell_callback; @@ -174,9 +166,7 @@ typedef struct FLAC__StreamDecoderPrivate { FLAC__uint64 first_frame_offset; /* hint to the seek routine of where in the stream the first audio frame starts */ FLAC__uint64 target_sample; unsigned unparseable_frame_count; /* used to tell whether we're decoding a future version of FLAC or just got a bad sync */ -#if FLAC__HAS_OGG FLAC__bool got_a_frame; /* hack needed in Ogg FLAC seek routine to check when process_single() actually writes a frame */ -#endif } FLAC__StreamDecoderPrivate; /*********************************************************************** @@ -362,10 +352,8 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_( if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED) return FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED; -#if !FLAC__HAS_OGG - if(is_ogg) + if(FLAC__HAS_OGG == 0 && is_ogg) return FLAC__STREAM_DECODER_INIT_STATUS_UNSUPPORTED_CONTAINER; -#endif if( 0 == read_callback || @@ -405,7 +393,7 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_( decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32; } #endif -#ifdef FLAC__HAS_X86INTRIN +#if FLAC__HAS_X86INTRIN && ! defined FLAC__INTEGER_ONLY_LIBRARY # if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT_SSE: not better than MMX asm */ if(decoder->private_->cpuinfo.ia32.sse2) { decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2; @@ -655,11 +643,10 @@ FLAC_API FLAC__bool FLAC__stream_decoder_finish(FLAC__StreamDecoder *decoder) */ FLAC__MD5Final(decoder->private_->computed_md5sum, &decoder->private_->md5context); - if(decoder->private_->has_seek_table && 0 != decoder->private_->seek_table.data.seek_table.points) { - free(decoder->private_->seek_table.data.seek_table.points); - decoder->private_->seek_table.data.seek_table.points = 0; - decoder->private_->has_seek_table = false; - } + free(decoder->private_->seek_table.data.seek_table.points); + decoder->private_->seek_table.data.seek_table.points = 0; + decoder->private_->has_seek_table = false; + FLAC__bitreader_free(decoder->private_->input); for(i = 0; i < FLAC__MAX_CHANNELS; i++) { /* WATCHOUT: @@ -914,10 +901,9 @@ FLAC_API FLAC__bool FLAC__stream_decoder_get_decode_position(const FLAC__StreamD FLAC__ASSERT(0 != decoder->private_); FLAC__ASSERT(0 != position); -#if FLAC__HAS_OGG - if(decoder->private_->is_ogg) + if(FLAC__HAS_OGG && decoder->private_->is_ogg) return false; -#endif + if(0 == decoder->private_->tell_callback) return false; if(decoder->private_->tell_callback(decoder, position, decoder->private_->client_data) != FLAC__STREAM_DECODER_TELL_STATUS_OK) @@ -990,11 +976,11 @@ FLAC_API FLAC__bool FLAC__stream_decoder_reset(FLAC__StreamDecoder *decoder) decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_METADATA; decoder->private_->has_stream_info = false; - if(decoder->private_->has_seek_table && 0 != decoder->private_->seek_table.data.seek_table.points) { - free(decoder->private_->seek_table.data.seek_table.points); - decoder->private_->seek_table.data.seek_table.points = 0; - decoder->private_->has_seek_table = false; - } + + free(decoder->private_->seek_table.data.seek_table.points); + decoder->private_->seek_table.data.seek_table.points = 0; + decoder->private_->has_seek_table = false; + decoder->private_->do_md5_checking = decoder->protected_->md5_checking; /* * This goes in reset() and not flush() because according to the spec, a @@ -1234,9 +1220,7 @@ unsigned FLAC__stream_decoder_get_input_bytes_unconsumed(const FLAC__StreamDecod void set_defaults_(FLAC__StreamDecoder *decoder) { -#if FLAC__HAS_OGG decoder->private_->is_ogg = false; -#endif decoder->private_->read_callback = 0; decoder->private_->seek_callback = 0; decoder->private_->tell_callback = 0; @@ -1434,6 +1418,9 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder) decoder->private_->metadata_callback(decoder, &decoder->private_->stream_info, decoder->private_->client_data); } else if(type == FLAC__METADATA_TYPE_SEEKTABLE) { + /* just in case we already have a seek table, and reading the next one fails: */ + decoder->private_->has_seek_table = false; + if(!read_metadata_seektable_(decoder, is_last, length)) return false; @@ -1740,7 +1727,6 @@ FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__Stre if (0 == (obj->comments = safe_malloc_mul_2op_p(obj->num_comments, /*times*/sizeof(FLAC__StreamMetadata_VorbisComment_Entry)))) { obj->num_comments = 0; decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR; - obj->num_comments = 0; return false; } for (i = 0; i < obj->num_comments; i++) { @@ -1768,6 +1754,7 @@ FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__Stre length -= obj->comments[i].length; if (0 == (obj->comments[i].entry = safe_malloc_add_2op_(obj->comments[i].length, /*+*/1))) { decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR; + obj->num_comments = i; return false; } memset (obj->comments[i].entry, 0, obj->comments[i].length) ; @@ -2162,8 +2149,10 @@ FLAC__bool read_frame_(FLAC__StreamDecoder *decoder, FLAC__bool *got_a_frame, FL /* write it */ if(do_full_decode) { - if(write_audio_frame_to_client_(decoder, &decoder->private_->frame, (const FLAC__int32 * const *)decoder->private_->output) != FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE) + if(write_audio_frame_to_client_(decoder, &decoder->private_->frame, (const FLAC__int32 * const *)decoder->private_->output) != FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE) { + decoder->protected_->state = FLAC__STREAM_DECODER_ABORTED; return false; + } } decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC; @@ -2496,6 +2485,8 @@ FLAC__bool read_subframe_(FLAC__StreamDecoder *decoder, unsigned channel, unsign if(!FLAC__bitreader_read_unary_unsigned(decoder->private_->input, &u)) return false; /* read_callback_ sets the state for us */ decoder->private_->frame.subframes[channel].wasted_bits = u+1; + if (decoder->private_->frame.subframes[channel].wasted_bits >= bps) + return false; bps -= decoder->private_->frame.subframes[channel].wasted_bits; } else @@ -3122,7 +3113,7 @@ FLAC__bool seek_to_absolute_sample_(FLAC__StreamDecoder *decoder, FLAC__uint64 s return false; } #ifndef FLAC__INTEGER_ONLY_LIBRARY - pos = (FLAC__int64)lower_bound + (FLAC__int64)((FLAC__double)(target_sample - lower_bound_sample) / (FLAC__double)(upper_bound_sample - lower_bound_sample) * (FLAC__double)(upper_bound - lower_bound)) - approx_bytes_per_frame; + pos = (FLAC__int64)lower_bound + (FLAC__int64)((double)(target_sample - lower_bound_sample) / (double)(upper_bound_sample - lower_bound_sample) * (double)(upper_bound - lower_bound)) - approx_bytes_per_frame; #else /* a little less accurate: */ if(upper_bound - lower_bound < 0xffffffff) @@ -3149,7 +3140,8 @@ FLAC__bool seek_to_absolute_sample_(FLAC__StreamDecoder *decoder, FLAC__uint64 s * FLAC__stream_decoder_process_single() to return false. */ decoder->private_->unparseable_frame_count = 0; - if(!FLAC__stream_decoder_process_single(decoder)) { + if(!FLAC__stream_decoder_process_single(decoder) || + decoder->protected_->state == FLAC__STREAM_DECODER_ABORTED) { decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR; return false; } @@ -3246,7 +3238,7 @@ FLAC__bool seek_to_absolute_sample_ogg_(FLAC__StreamDecoder *decoder, FLAC__uint } else { #ifndef FLAC__INTEGER_ONLY_LIBRARY - pos = (FLAC__uint64)((FLAC__double)(target_sample - left_sample) / (FLAC__double)(right_sample - left_sample) * (FLAC__double)(right_pos - left_pos)); + pos = (FLAC__uint64)((double)(target_sample - left_sample) / (double)(right_sample - left_sample) * (double)(right_pos - left_pos)); #else /* a little less accurate: */ if ((target_sample-left_sample <= 0xffffffff) && (right_pos-left_pos <= 0xffffffff)) @@ -3276,7 +3268,8 @@ FLAC__bool seek_to_absolute_sample_ogg_(FLAC__StreamDecoder *decoder, FLAC__uint did_a_seek = false; decoder->private_->got_a_frame = false; - if(!FLAC__stream_decoder_process_single(decoder)) { + if(!FLAC__stream_decoder_process_single(decoder) || + decoder->protected_->state == FLAC__STREAM_DECODER_ABORTED) { decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR; return false; } diff --git a/libFLAC/stream_encoder.c b/libFLAC/stream_encoder.c index 23cc22db..037b8cb5 100644 --- a/libFLAC/stream_encoder.c +++ b/libFLAC/stream_encoder.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -39,6 +39,10 @@ #include <stdlib.h> /* for malloc() */ #include <string.h> /* for memcpy() */ #include <sys/types.h> /* for off_t */ +#ifdef _WIN32 +#include <windows.h> /* for GetFileType() */ +#include <io.h> /* for _get_osfhandle() */ +#endif #include "share/compat.h" #include "FLAC/assert.h" #include "FLAC/stream_decoder.h" @@ -349,8 +353,8 @@ typedef struct FLAC__StreamEncoderPrivate { FLAC__CPUInfo cpuinfo; void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps); #ifndef FLAC__INTEGER_ONLY_LIBRARY - unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); - unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); + unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); + unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); #else unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); @@ -361,15 +365,10 @@ typedef struct FLAC__StreamEncoderPrivate { void (*local_lpc_compute_residual_from_qlp_coefficients_64bit)(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]); void (*local_lpc_compute_residual_from_qlp_coefficients_16bit)(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]); #endif - FLAC__bool use_wide_by_block; /* use slow 64-bit versions of some functions because of the block size */ - FLAC__bool use_wide_by_partition; /* use slow 64-bit versions of some functions because of the min partition order and blocksize */ - FLAC__bool use_wide_by_order; /* use slow 64-bit versions of some functions because of the lpc order */ FLAC__bool disable_constant_subframes; FLAC__bool disable_fixed_subframes; FLAC__bool disable_verbatim_subframes; -#if FLAC__HAS_OGG FLAC__bool is_ogg; -#endif FLAC__StreamEncoderReadCallback read_callback; /* currently only needed for Ogg FLAC */ FLAC__StreamEncoderSeekCallback seek_callback; FLAC__StreamEncoderTellCallback tell_callback; @@ -634,10 +633,8 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( if(encoder->protected_->state != FLAC__STREAM_ENCODER_UNINITIALIZED) return FLAC__STREAM_ENCODER_INIT_STATUS_ALREADY_INITIALIZED; -#if !FLAC__HAS_OGG - if(is_ogg) + if(FLAC__HAS_OGG == 0 && is_ogg) return FLAC__STREAM_ENCODER_INIT_STATUS_UNSUPPORTED_CONTAINER; -#endif if(0 == write_callback || (seek_callback && 0 == tell_callback)) return FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_CALLBACKS; @@ -853,7 +850,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( encoder->private_->abs_residual_partition_sums_unaligned = encoder->private_->abs_residual_partition_sums = 0; encoder->private_->raw_bits_per_partition_unaligned = encoder->private_->raw_bits_per_partition = 0; #ifndef FLAC__INTEGER_ONLY_LIBRARY - encoder->private_->loose_mid_side_stereo_frames = (unsigned)((FLAC__double)encoder->protected_->sample_rate * 0.4 / (FLAC__double)encoder->protected_->blocksize + 0.5); + encoder->private_->loose_mid_side_stereo_frames = (unsigned)((double)encoder->protected_->sample_rate * 0.4 / (double)encoder->protected_->blocksize + 0.5); #else /* 26214 is the approximate fixed-point equivalent to 0.4 (0.4 * 2^16) */ /* sample rate can be up to 655350 Hz, and thus use 20 bits, so we do the multiply÷ by hand */ @@ -869,10 +866,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( encoder->private_->current_sample_number = 0; encoder->private_->current_frame_number = 0; - encoder->private_->use_wide_by_block = (encoder->protected_->bits_per_sample + FLAC__bitmath_ilog2(encoder->protected_->blocksize)+1 > 30); - encoder->private_->use_wide_by_order = (encoder->protected_->bits_per_sample + FLAC__bitmath_ilog2(flac_max(encoder->protected_->max_lpc_order, FLAC__MAX_FIXED_ORDER))+1 > 30); /*@@@ need to use this? */ - encoder->private_->use_wide_by_partition = (false); /*@@@ need to set this */ - /* * get the CPU info and set the function pointers */ @@ -924,7 +917,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( if(encoder->private_->cpuinfo.ia32.mmx && encoder->private_->cpuinfo.ia32.cmov) encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov; # endif /* FLAC__HAS_NASM */ -# ifdef FLAC__HAS_X86INTRIN +# if FLAC__HAS_X86INTRIN # if defined FLAC__SSE_SUPPORTED if(encoder->private_->cpuinfo.ia32.sse) { if(encoder->private_->cpuinfo.ia32.sse42 || !encoder->private_->cpuinfo.ia32.intel) { /* use new autocorrelation functions */ @@ -989,7 +982,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( # endif /* FLAC__HAS_X86INTRIN */ # elif defined FLAC__CPU_X86_64 FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64); -# ifdef FLAC__HAS_X86INTRIN +# if FLAC__HAS_X86INTRIN # ifdef FLAC__SSE_SUPPORTED if(encoder->private_->cpuinfo.x86.sse42 || !encoder->private_->cpuinfo.x86.intel) { /* use new autocorrelation functions */ if(encoder->protected_->max_lpc_order < 4) @@ -1044,7 +1037,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( } # endif /* !FLAC__NO_ASM */ #endif /* !FLAC__INTEGER_ONLY_LIBRARY */ -#if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN +#if !defined FLAC__NO_ASM && FLAC__HAS_X86INTRIN if(encoder->private_->cpuinfo.use_asm) { # if defined FLAC__CPU_IA32 # ifdef FLAC__SSE2_SUPPORTED @@ -1074,10 +1067,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( # endif /* FLAC__CPU_... */ } #endif /* !FLAC__NO_ASM && FLAC__HAS_X86INTRIN */ - /* finally override based on wide-ness if necessary */ - if(encoder->private_->use_wide_by_block) { - encoder->private_->local_fixed_compute_best_predictor = encoder->private_->local_fixed_compute_best_predictor_wide; - } /* set state to OK; from here on, errors are fatal and we'll override the state then */ encoder->protected_->state = FLAC__STREAM_ENCODER_OK; @@ -1341,7 +1330,8 @@ static FLAC__StreamEncoderInitStatus init_FILE_internal_( * Windows can suffer quite badly from disk fragmentation. This can be * reduced significantly by setting the output buffer size to be 10MB. */ - setvbuf(file, NULL, _IOFBF, 10*1024*1024); + if(GetFileType((HANDLE)_get_osfhandle(_fileno(file))) == FILE_TYPE_DISK) + setvbuf(file, NULL, _IOFBF, 10*1024*1024); #endif encoder->private_->file = file; @@ -1883,6 +1873,7 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_total_samples_estimate(FLAC__Stream FLAC__ASSERT(0 != encoder->protected_); if(encoder->protected_->state != FLAC__STREAM_ENCODER_UNINITIALIZED) return false; + value = flac_min(value, (FLAC__U64L(1) << FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN) - 1); encoder->protected_->total_samples_estimate = value; return true; } @@ -2159,8 +2150,6 @@ FLAC_API FLAC__bool FLAC__stream_encoder_process(FLAC__StreamEncoder *encoder, c FLAC__ASSERT(0 != encoder->protected_); FLAC__ASSERT(encoder->protected_->state == FLAC__STREAM_ENCODER_OK); -// FLAC__ASSERT(samples <= blocksize); - do { const unsigned n = flac_min(blocksize+OVERREAD_-encoder->private_->current_sample_number, samples-j); @@ -2333,9 +2322,7 @@ void set_defaults_(FLAC__StreamEncoder *encoder) encoder->private_->disable_constant_subframes = false; encoder->private_->disable_fixed_subframes = false; encoder->private_->disable_verbatim_subframes = false; -#if FLAC__HAS_OGG encoder->private_->is_ogg = false; -#endif encoder->private_->read_callback = 0; encoder->private_->write_callback = 0; encoder->private_->seek_callback = 0; @@ -3177,7 +3164,10 @@ FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder, FLAC__bool is_fracti */ if(do_independent) { for(channel = 0; channel < encoder->protected_->channels; channel++) { - const unsigned w = get_wasted_bits_(encoder->private_->integer_signal[channel], encoder->protected_->blocksize); + unsigned w = get_wasted_bits_(encoder->private_->integer_signal[channel], encoder->protected_->blocksize); + if (w > encoder->protected_->bits_per_sample) { + w = encoder->protected_->bits_per_sample; + } encoder->private_->subframe_workspace[channel][0].wasted_bits = encoder->private_->subframe_workspace[channel][1].wasted_bits = w; encoder->private_->subframe_bps[channel] = encoder->protected_->bits_per_sample - w; } @@ -3185,7 +3175,10 @@ FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder, FLAC__bool is_fracti if(do_mid_side) { FLAC__ASSERT(encoder->protected_->channels == 2); for(channel = 0; channel < 2; channel++) { - const unsigned w = get_wasted_bits_(encoder->private_->integer_signal_mid_side[channel], encoder->protected_->blocksize); + unsigned w = get_wasted_bits_(encoder->private_->integer_signal_mid_side[channel], encoder->protected_->blocksize); + if (w > encoder->protected_->bits_per_sample) { + w = encoder->protected_->bits_per_sample; + } encoder->private_->subframe_workspace_mid_side[channel][0].wasted_bits = encoder->private_->subframe_workspace_mid_side[channel][1].wasted_bits = w; encoder->private_->subframe_bps_mid_side[channel] = encoder->protected_->bits_per_sample - w + (channel==0? 0:1); } @@ -3376,14 +3369,14 @@ FLAC__bool process_subframe_( ) { #ifndef FLAC__INTEGER_ONLY_LIBRARY - FLAC__float fixed_residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]; + float fixed_residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]; #else FLAC__fixedpoint fixed_residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]; #endif #ifndef FLAC__INTEGER_ONLY_LIBRARY - FLAC__double lpc_residual_bits_per_sample; + double lpc_residual_bits_per_sample; FLAC__real autoc[FLAC__MAX_LPC_ORDER+1]; /* WATCHOUT: the size is important even though encoder->protected_->max_lpc_order might be less; some asm and x86 intrinsic routines need all the space */ - FLAC__double lpc_error[FLAC__MAX_LPC_ORDER]; + double lpc_error[FLAC__MAX_LPC_ORDER]; unsigned min_lpc_order, max_lpc_order, lpc_order; unsigned min_qlp_coeff_precision, max_qlp_coeff_precision, qlp_coeff_precision; #endif @@ -3405,7 +3398,10 @@ FLAC__bool process_subframe_( if(frame_header->blocksize >= FLAC__MAX_FIXED_ORDER) { unsigned signal_is_constant = false; - guess_fixed_order = encoder->private_->local_fixed_compute_best_predictor(integer_signal+FLAC__MAX_FIXED_ORDER, frame_header->blocksize-FLAC__MAX_FIXED_ORDER, fixed_residual_bits_per_sample); + if(subframe_bps + 4 + FLAC__bitmath_ilog2((frame_header->blocksize-FLAC__MAX_FIXED_ORDER)|1) <= 32) + guess_fixed_order = encoder->private_->local_fixed_compute_best_predictor(integer_signal+FLAC__MAX_FIXED_ORDER, frame_header->blocksize-FLAC__MAX_FIXED_ORDER, fixed_residual_bits_per_sample); + else + guess_fixed_order = encoder->private_->local_fixed_compute_best_predictor_wide(integer_signal+FLAC__MAX_FIXED_ORDER, frame_header->blocksize-FLAC__MAX_FIXED_ORDER, fixed_residual_bits_per_sample); /* check for constant subframe */ if( !encoder->private_->disable_constant_subframes && @@ -3446,7 +3442,7 @@ FLAC__bool process_subframe_( max_fixed_order = frame_header->blocksize - 1; for(fixed_order = min_fixed_order; fixed_order <= max_fixed_order; fixed_order++) { #ifndef FLAC__INTEGER_ONLY_LIBRARY - if(fixed_residual_bits_per_sample[fixed_order] >= (FLAC__float)subframe_bps) + if(fixed_residual_bits_per_sample[fixed_order] >= (float)subframe_bps) continue; /* don't even try */ rice_parameter = (fixed_residual_bits_per_sample[fixed_order] > 0.0)? (unsigned)(fixed_residual_bits_per_sample[fixed_order]+0.5) : 0; /* 0.5 is for rounding */ #else @@ -3523,7 +3519,7 @@ FLAC__bool process_subframe_( max_lpc_order = frame_header->blocksize - 1; for(lpc_order = min_lpc_order; lpc_order <= max_lpc_order; lpc_order++) { lpc_residual_bits_per_sample = FLAC__lpc_compute_expected_bits_per_residual_sample(lpc_error[lpc_order-1], frame_header->blocksize-lpc_order); - if(lpc_residual_bits_per_sample >= (FLAC__double)subframe_bps) + if(lpc_residual_bits_per_sample >= (double)subframe_bps) continue; /* don't even try */ rice_parameter = (lpc_residual_bits_per_sample > 0.0)? (unsigned)(lpc_residual_bits_per_sample+0.5) : 0; /* 0.5 is for rounding */ rice_parameter++; /* to account for the signed->unsigned conversion during rice coding */ @@ -3535,8 +3531,8 @@ FLAC__bool process_subframe_( } if(encoder->protected_->do_qlp_coeff_prec_search) { min_qlp_coeff_precision = FLAC__MIN_QLP_COEFF_PRECISION; - /* try to keep qlp coeff precision such that only 32-bit math is required for decode of <=16bps streams */ - if(subframe_bps <= 16) { + /* try to keep qlp coeff precision such that only 32-bit math is required for decode of <=16bps(+1bps for side channel) streams */ + if(subframe_bps <= 17) { max_qlp_coeff_precision = flac_min(32 - subframe_bps - FLAC__bitmath_ilog2(lpc_order), FLAC__MAX_QLP_COEFF_PRECISION); max_qlp_coeff_precision = flac_max(max_qlp_coeff_precision, min_qlp_coeff_precision); } @@ -3778,8 +3774,8 @@ unsigned evaluate_lpc_subframe_( int quantization, ret; const unsigned residual_samples = blocksize - order; - /* try to keep qlp coeff precision such that only 32-bit math is required for decode of <=16bps streams */ - if(subframe_bps <= 16) { + /* try to keep qlp coeff precision such that only 32-bit math is required for decode of <=16bps(+1bps for side channel) streams */ + if(subframe_bps <= 17) { FLAC__ASSERT(order > 0); FLAC__ASSERT(order <= FLAC__MAX_LPC_ORDER); qlp_coeff_precision = flac_min(qlp_coeff_precision, 32 - subframe_bps - FLAC__bitmath_ilog2(order)); @@ -3977,29 +3973,25 @@ void precompute_partition_info_sums_( /* first do max_partition_order */ { + const unsigned threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples); unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order); - /* WATCHOUT: "+ bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum - * assumed size of the average residual magnitude */ - if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) { - FLAC__uint32 abs_residual_partition_sum; - + /* WATCHOUT: "bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum assumed size of the average residual magnitude */ + if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) { for(partition = residual_sample = 0; partition < partitions; partition++) { + FLAC__uint32 abs_residual_partition_sum = 0; end += default_partition_samples; - abs_residual_partition_sum = 0; for( ; residual_sample < end; residual_sample++) abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */ abs_residual_partition_sums[partition] = abs_residual_partition_sum; } } else { /* have to pessimistically use 64 bits for accumulator */ - FLAC__uint64 abs_residual_partition_sum; - for(partition = residual_sample = 0; partition < partitions; partition++) { + FLAC__uint64 abs_residual_partition_sum64 = 0; end += default_partition_samples; - abs_residual_partition_sum = 0; for( ; residual_sample < end; residual_sample++) - abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */ - abs_residual_partition_sums[partition] = abs_residual_partition_sum; + abs_residual_partition_sum64 += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */ + abs_residual_partition_sums[partition] = abs_residual_partition_sum64; } } } diff --git a/libFLAC/stream_encoder_framing.c b/libFLAC/stream_encoder_framing.c index 959eca0a..0929cd7b 100644 --- a/libFLAC/stream_encoder_framing.c +++ b/libFLAC/stream_encoder_framing.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -39,6 +39,7 @@ #include "private/stream_encoder_framing.h" #include "private/crc.h" #include "FLAC/assert.h" +#include "share/compat.h" static FLAC__bool add_entropy_coding_method_(FLAC__BitWriter *bw, const FLAC__EntropyCodingMethod *method); static FLAC__bool add_residual_partitioned_rice_(FLAC__BitWriter *bw, const FLAC__int32 residual[], const unsigned residual_samples, const unsigned predictor_order, const unsigned rice_parameters[], const unsigned raw_bits[], const unsigned partition_order, const FLAC__bool is_extended); @@ -64,6 +65,9 @@ FLAC__bool FLAC__add_metadata_block(const FLAC__StreamMetadata *metadata, FLAC__ i += vendor_string_length; } FLAC__ASSERT(i < (1u << FLAC__STREAM_METADATA_LENGTH_LEN)); + /* double protection */ + if(i >= (1u << FLAC__STREAM_METADATA_LENGTH_LEN)) + return false; if(!FLAC__bitwriter_write_raw_uint32(bw, i, FLAC__STREAM_METADATA_LENGTH_LEN)) return false; @@ -92,6 +96,7 @@ FLAC__bool FLAC__add_metadata_block(const FLAC__StreamMetadata *metadata, FLAC__ FLAC__ASSERT(metadata->data.stream_info.bits_per_sample <= (1u << FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN)); if(!FLAC__bitwriter_write_raw_uint32(bw, metadata->data.stream_info.bits_per_sample-1, FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN)) return false; + FLAC__ASSERT(metadata->data.stream_info.total_samples < (FLAC__U64L(1) << FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN)); if(!FLAC__bitwriter_write_raw_uint64(bw, metadata->data.stream_info.total_samples, FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN)) return false; if(!FLAC__bitwriter_write_byte_block(bw, metadata->data.stream_info.md5sum, 16)) diff --git a/libFLAC/stream_encoder_intrin_avx2.c b/libFLAC/stream_encoder_intrin_avx2.c new file mode 100644 index 00000000..a94a02bf --- /dev/null +++ b/libFLAC/stream_encoder_intrin_avx2.c @@ -0,0 +1,146 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "private/cpu.h" + +#ifndef FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN +#include "private/stream_encoder.h" +#include "private/bitmath.h" +#ifdef FLAC__AVX2_SUPPORTED + +#include <stdlib.h> /* for abs() */ +#include <immintrin.h> /* AVX2 */ +#include "FLAC/assert.h" + +FLAC__SSE_TARGET("avx2") +void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], + unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps) +{ + const unsigned default_partition_samples = (residual_samples + predictor_order) >> max_partition_order; + unsigned partitions = 1u << max_partition_order; + + FLAC__ASSERT(default_partition_samples > predictor_order); + + /* first do max_partition_order */ + { + const unsigned threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples); + unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order); + + if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) { + for(partition = residual_sample = 0; partition < partitions; partition++) { + __m256i sum256 = _mm256_setzero_si256(); + __m128i sum128; + end += default_partition_samples; + + for( ; (int)residual_sample < (int)end-7; residual_sample+=8) { + __m256i res256 = _mm256_abs_epi32(_mm256_loadu_si256((const __m256i*)(residual+residual_sample))); + sum256 = _mm256_add_epi32(sum256, res256); + } + + sum128 = _mm_add_epi32(_mm256_extracti128_si256(sum256, 1), _mm256_castsi256_si128(sum256)); + + for( ; (int)residual_sample < (int)end-3; residual_sample+=4) { + __m128i res128 = _mm_abs_epi32(_mm_loadu_si128((const __m128i*)(residual+residual_sample))); + sum128 = _mm_add_epi32(sum128, res128); + } + + for( ; residual_sample < end; residual_sample++) { + __m128i res128 = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); + sum128 = _mm_add_epi32(sum128, res128); + } + + sum128 = _mm_hadd_epi32(sum128, sum128); + sum128 = _mm_hadd_epi32(sum128, sum128); + abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(sum128); +/* workaround for a bug in MSVC2015U2 - see https://connect.microsoft.com/VisualStudio/feedback/details/2659191/incorrect-code-generation-for-x86-64 */ +#if (defined _MSC_VER) && (_MSC_FULL_VER == 190023918) && (defined FLAC__CPU_X86_64) + abs_residual_partition_sums[partition] &= 0xFFFFFFFF; /**/ +#endif + } + } + else { /* have to pessimistically use 64 bits for accumulator */ + for(partition = residual_sample = 0; partition < partitions; partition++) { + __m256i sum256 = _mm256_setzero_si256(); + __m128i sum128; + end += default_partition_samples; + + for( ; (int)residual_sample < (int)end-3; residual_sample+=4) { + __m128i res128 = _mm_abs_epi32(_mm_loadu_si128((const __m128i*)(residual+residual_sample))); + __m256i res256 = _mm256_cvtepu32_epi64(res128); + sum256 = _mm256_add_epi64(sum256, res256); + } + + sum128 = _mm_add_epi64(_mm256_extracti128_si256(sum256, 1), _mm256_castsi256_si128(sum256)); + + for( ; (int)residual_sample < (int)end-1; residual_sample+=2) { + __m128i res128 = _mm_abs_epi32(_mm_loadl_epi64((const __m128i*)(residual+residual_sample))); + res128 = _mm_cvtepu32_epi64(res128); + sum128 = _mm_add_epi64(sum128, res128); + } + + for( ; residual_sample < end; residual_sample++) { + __m128i res128 = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); + sum128 = _mm_add_epi64(sum128, res128); + } + + sum128 = _mm_add_epi64(sum128, _mm_srli_si128(sum128, 8)); + _mm_storel_epi64((__m128i*)(abs_residual_partition_sums+partition), sum128); + } + } + } + + /* now merge partitions for lower orders */ + { + unsigned from_partition = 0, to_partition = partitions; + int partition_order; + for(partition_order = (int)max_partition_order - 1; partition_order >= (int)min_partition_order; partition_order--) { + unsigned i; + partitions >>= 1; + for(i = 0; i < partitions; i++) { + abs_residual_partition_sums[to_partition++] = + abs_residual_partition_sums[from_partition ] + + abs_residual_partition_sums[from_partition+1]; + from_partition += 2; + } + } + } + _mm256_zeroupper(); +} + +#endif /* FLAC__AVX2_SUPPORTED */ +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ +#endif /* FLAC__NO_ASM */ diff --git a/libFLAC/stream_encoder_intrin_sse2.c b/libFLAC/stream_encoder_intrin_sse2.c new file mode 100644 index 00000000..0f1d7aaf --- /dev/null +++ b/libFLAC/stream_encoder_intrin_sse2.c @@ -0,0 +1,159 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "private/cpu.h" + +#ifndef FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN +#include "private/stream_encoder.h" +#include "private/bitmath.h" +#ifdef FLAC__SSE2_SUPPORTED + +#include <stdlib.h> /* for abs() */ +#include <emmintrin.h> /* SSE2 */ +#include "FLAC/assert.h" +#include "share/compat.h" + +FLAC__SSE_TARGET("sse2") +static inline __m128i local_abs_epi32(__m128i val) +{ + __m128i mask = _mm_srai_epi32(val, 31); + val = _mm_xor_si128(val, mask); + val = _mm_sub_epi32(val, mask); + return val; +} + + +FLAC__SSE_TARGET("sse2") +void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], + unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps) +{ + const unsigned default_partition_samples = (residual_samples + predictor_order) >> max_partition_order; + unsigned partitions = 1u << max_partition_order; + + FLAC__ASSERT(default_partition_samples > predictor_order); + + /* first do max_partition_order */ + { + const unsigned threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples); + unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order); + + if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) { + for(partition = residual_sample = 0; partition < partitions; partition++) { + __m128i mm_sum = _mm_setzero_si128(); + unsigned e1, e3; + end += default_partition_samples; + + e1 = (residual_sample + 3) & ~3; e3 = end & ~3; + if(e1 > end) + e1 = end; /* try flac -l 1 -b 16 and you'll be here */ + + /* assumption: residual[] is properly aligned so (residual + e1) is properly aligned too and _mm_loadu_si128() is fast */ + for( ; residual_sample < e1; residual_sample++) { + __m128i mm_res = local_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); + mm_sum = _mm_add_epi32(mm_sum, mm_res); + } + + for( ; residual_sample < e3; residual_sample+=4) { + __m128i mm_res = local_abs_epi32(_mm_loadu_si128((const __m128i*)(residual+residual_sample))); + mm_sum = _mm_add_epi32(mm_sum, mm_res); + } + + for( ; residual_sample < end; residual_sample++) { + __m128i mm_res = local_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); + mm_sum = _mm_add_epi32(mm_sum, mm_res); + } + + mm_sum = _mm_add_epi32(mm_sum, _mm_srli_si128(mm_sum, 8)); + mm_sum = _mm_add_epi32(mm_sum, _mm_srli_si128(mm_sum, 4)); + abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(mm_sum); +/* workaround for a bug in MSVC2015U2 - see https://connect.microsoft.com/VisualStudio/feedback/details/2659191/incorrect-code-generation-for-x86-64 */ +#if (defined _MSC_VER) && (_MSC_FULL_VER == 190023918) && (defined FLAC__CPU_X86_64) + abs_residual_partition_sums[partition] &= 0xFFFFFFFF; +#endif + } + } + else { /* have to pessimistically use 64 bits for accumulator */ + for(partition = residual_sample = 0; partition < partitions; partition++) { + __m128i mm_sum = _mm_setzero_si128(); + unsigned e1, e3; + end += default_partition_samples; + + e1 = (residual_sample + 1) & ~1; e3 = end & ~1; + FLAC__ASSERT(e1 <= end); + + for( ; residual_sample < e1; residual_sample++) { + __m128i mm_res = local_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); /* 0 0 0 |r0| == 00 |r0_64| */ + mm_sum = _mm_add_epi64(mm_sum, mm_res); + } + + for( ; residual_sample < e3; residual_sample+=2) { + __m128i mm_res = local_abs_epi32(_mm_loadl_epi64((const __m128i*)(residual+residual_sample))); /* 0 0 |r1| |r0| */ + mm_res = _mm_shuffle_epi32(mm_res, _MM_SHUFFLE(3,1,2,0)); /* 0 |r1| 0 |r0| == |r1_64| |r0_64| */ + mm_sum = _mm_add_epi64(mm_sum, mm_res); + } + + for( ; residual_sample < end; residual_sample++) { + __m128i mm_res = local_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); + mm_sum = _mm_add_epi64(mm_sum, mm_res); + } + + mm_sum = _mm_add_epi64(mm_sum, _mm_srli_si128(mm_sum, 8)); + _mm_storel_epi64((__m128i*)(abs_residual_partition_sums+partition), mm_sum); + } + } + } + + /* now merge partitions for lower orders */ + { + unsigned from_partition = 0, to_partition = partitions; + int partition_order; + for(partition_order = (int)max_partition_order - 1; partition_order >= (int)min_partition_order; partition_order--) { + unsigned i; + partitions >>= 1; + for(i = 0; i < partitions; i++) { + abs_residual_partition_sums[to_partition++] = + abs_residual_partition_sums[from_partition ] + + abs_residual_partition_sums[from_partition+1]; + from_partition += 2; + } + } + } +} + +#endif /* FLAC__SSE2_SUPPORTED */ +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ +#endif /* FLAC__NO_ASM */ diff --git a/libFLAC/stream_encoder_intrin_ssse3.c b/libFLAC/stream_encoder_intrin_ssse3.c new file mode 100644 index 00000000..21a08fc2 --- /dev/null +++ b/libFLAC/stream_encoder_intrin_ssse3.c @@ -0,0 +1,148 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2000-2009 Josh Coalson + * Copyright (C) 2011-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "private/cpu.h" + +#ifndef FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN +#include "private/stream_encoder.h" +#include "private/bitmath.h" +#ifdef FLAC__SSSE3_SUPPORTED + +#include <stdlib.h> /* for abs() */ +#include <tmmintrin.h> /* SSSE3 */ +#include "FLAC/assert.h" + +FLAC__SSE_TARGET("ssse3") +void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], + unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps) +{ + const unsigned default_partition_samples = (residual_samples + predictor_order) >> max_partition_order; + unsigned partitions = 1u << max_partition_order; + + FLAC__ASSERT(default_partition_samples > predictor_order); + + /* first do max_partition_order */ + { + const unsigned threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples); + unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order); + + if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) { + for(partition = residual_sample = 0; partition < partitions; partition++) { + __m128i mm_sum = _mm_setzero_si128(); + unsigned e1, e3; + end += default_partition_samples; + + e1 = (residual_sample + 3) & ~3; e3 = end & ~3; + if(e1 > end) + e1 = end; /* try flac -l 1 -b 16 and you'll be here */ + + /* assumption: residual[] is properly aligned so (residual + e1) is properly aligned too and _mm_loadu_si128() is fast */ + for( ; residual_sample < e1; residual_sample++) { + __m128i mm_res = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); + mm_sum = _mm_add_epi32(mm_sum, mm_res); + } + + for( ; residual_sample < e3; residual_sample+=4) { + __m128i mm_res = _mm_abs_epi32(_mm_loadu_si128((const __m128i*)(residual+residual_sample))); + mm_sum = _mm_add_epi32(mm_sum, mm_res); + } + + for( ; residual_sample < end; residual_sample++) { + __m128i mm_res = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); + mm_sum = _mm_add_epi32(mm_sum, mm_res); + } + + mm_sum = _mm_hadd_epi32(mm_sum, mm_sum); + mm_sum = _mm_hadd_epi32(mm_sum, mm_sum); + abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(mm_sum); +/* workaround for a bug in MSVC2015U2 - see https://connect.microsoft.com/VisualStudio/feedback/details/2659191/incorrect-code-generation-for-x86-64 */ +#if (defined _MSC_VER) && (_MSC_FULL_VER == 190023918) && (defined FLAC__CPU_X86_64) + abs_residual_partition_sums[partition] &= 0xFFFFFFFF; +#endif + } + } + else { /* have to pessimistically use 64 bits for accumulator */ + for(partition = residual_sample = 0; partition < partitions; partition++) { + __m128i mm_sum = _mm_setzero_si128(); + unsigned e1, e3; + end += default_partition_samples; + + e1 = (residual_sample + 1) & ~1; e3 = end & ~1; + FLAC__ASSERT(e1 <= end); + + for( ; residual_sample < e1; residual_sample++) { + __m128i mm_res = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); /* 0 0 0 |r0| == 00 |r0_64| */ + mm_sum = _mm_add_epi64(mm_sum, mm_res); + } + + for( ; residual_sample < e3; residual_sample+=2) { + __m128i mm_res = _mm_abs_epi32(_mm_loadl_epi64((const __m128i*)(residual+residual_sample))); /* 0 0 |r1| |r0| */ + mm_res = _mm_shuffle_epi32(mm_res, _MM_SHUFFLE(3,1,2,0)); /* 0 |r1| 0 |r0| == |r1_64| |r0_64| */ + mm_sum = _mm_add_epi64(mm_sum, mm_res); + } + + for( ; residual_sample < end; residual_sample++) { + __m128i mm_res = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); + mm_sum = _mm_add_epi64(mm_sum, mm_res); + } + + mm_sum = _mm_add_epi64(mm_sum, _mm_srli_si128(mm_sum, 8)); + _mm_storel_epi64((__m128i*)(abs_residual_partition_sums+partition), mm_sum); + } + } + } + + /* now merge partitions for lower orders */ + { + unsigned from_partition = 0, to_partition = partitions; + int partition_order; + for(partition_order = (int)max_partition_order - 1; partition_order >= (int)min_partition_order; partition_order--) { + unsigned i; + partitions >>= 1; + for(i = 0; i < partitions; i++) { + abs_residual_partition_sums[to_partition++] = + abs_residual_partition_sums[from_partition ] + + abs_residual_partition_sums[from_partition+1]; + from_partition += 2; + } + } + } +} + +#endif /* FLAC__SSSE3_SUPPORTED */ +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ +#endif /* FLAC__NO_ASM */ diff --git a/libFLAC/window.c b/libFLAC/window.c index 4387ef72..e977fd86 100644 --- a/libFLAC/window.c +++ b/libFLAC/window.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2006-2009 Josh Coalson - * Copyright (C) 2011-2014 Xiph.Org Foundation + * Copyright (C) 2011-2016 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/libFLAC/windows_unicode_filenames.c b/libFLAC/windows_unicode_filenames.c new file mode 100644 index 00000000..2404e319 --- /dev/null +++ b/libFLAC/windows_unicode_filenames.c @@ -0,0 +1,201 @@ +/* libFLAC - Free Lossless Audio Codec library + * Copyright (C) 2013-2016 Xiph.Org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <io.h> +#include "share/windows_unicode_filenames.h" + +/* convert UTF-8 back to WCHAR. Caller is responsible for freeing memory */ +static wchar_t *wchar_from_utf8(const char *str) +{ + wchar_t *widestr; + int len; + + if (!str) + return NULL; + if ((len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0)) == 0) + return NULL; + if ((widestr = (wchar_t *)malloc(len*sizeof(wchar_t))) == NULL) + return NULL; + if (MultiByteToWideChar(CP_UTF8, 0, str, -1, widestr, len) == 0) { + free(widestr); + widestr = NULL; + } + + return widestr; +} + + +static FLAC__bool utf8_filenames = false; + + +void flac_internal_set_utf8_filenames(FLAC__bool flag) +{ + utf8_filenames = flag ? true : false; +} + +FLAC__bool flac_internal_get_utf8_filenames(void) +{ + return utf8_filenames; +} + +/* file functions */ + +FILE* flac_internal_fopen_utf8(const char *filename, const char *mode) +{ + if (!utf8_filenames) { + return fopen(filename, mode); + } else { + wchar_t *wname = NULL; + wchar_t *wmode = NULL; + FILE *f = NULL; + + do { + if (!(wname = wchar_from_utf8(filename))) break; + if (!(wmode = wchar_from_utf8(mode))) break; + f = _wfopen(wname, wmode); + } while(0); + + free(wname); + free(wmode); + + return f; + } +} + +int flac_internal_stat64_utf8(const char *path, struct __stat64 *buffer) +{ + if (!utf8_filenames) { + return _stat64(path, buffer); + } else { + wchar_t *wpath; + int ret; + + if (!(wpath = wchar_from_utf8(path))) return -1; + ret = _wstat64(wpath, buffer); + free(wpath); + + return ret; + } +} + +int flac_internal_chmod_utf8(const char *filename, int pmode) +{ + if (!utf8_filenames) { + return _chmod(filename, pmode); + } else { + wchar_t *wname; + int ret; + + if (!(wname = wchar_from_utf8(filename))) return -1; + ret = _wchmod(wname, pmode); + free(wname); + + return ret; + } +} + +int flac_internal_utime_utf8(const char *filename, struct utimbuf *times) +{ + if (!utf8_filenames) { + return utime(filename, times); + } else { + wchar_t *wname; + struct __utimbuf64 ut; + int ret; + + if (!(wname = wchar_from_utf8(filename))) return -1; + ut.actime = times->actime; + ut.modtime = times->modtime; + ret = _wutime64(wname, &ut); + free(wname); + + return ret; + } +} + +int flac_internal_unlink_utf8(const char *filename) +{ + if (!utf8_filenames) { + return _unlink(filename); + } else { + wchar_t *wname; + int ret; + + if (!(wname = wchar_from_utf8(filename))) return -1; + ret = _wunlink(wname); + free(wname); + + return ret; + } +} + +int flac_internal_rename_utf8(const char *oldname, const char *newname) +{ + if (!utf8_filenames) { + return rename(oldname, newname); + } else { + wchar_t *wold = NULL; + wchar_t *wnew = NULL; + int ret = -1; + + do { + if (!(wold = wchar_from_utf8(oldname))) break; + if (!(wnew = wchar_from_utf8(newname))) break; + ret = _wrename(wold, wnew); + } while(0); + + free(wold); + free(wnew); + + return ret; + } +} + +HANDLE WINAPI flac_internal_CreateFile_utf8(const char *lpFileName, DWORD dwDesiredAccess, DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes, DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, HANDLE hTemplateFile) +{ + if (!utf8_filenames) { + return CreateFileA(lpFileName, dwDesiredAccess, dwShareMode, lpSecurityAttributes, dwCreationDisposition, dwFlagsAndAttributes, hTemplateFile); + } else { + wchar_t *wname; + HANDLE handle = INVALID_HANDLE_VALUE; + + if ((wname = wchar_from_utf8(lpFileName)) != NULL) { + handle = CreateFileW(wname, dwDesiredAccess, dwShareMode, lpSecurityAttributes, dwCreationDisposition, dwFlagsAndAttributes, hTemplateFile); + free(wname); + } + + return handle; + } +} |