diff options
author | Vikas Arora <vikasa@google.com> | 2015-01-29 17:29:29 +0000 |
---|---|---|
committer | Android (Google) Code Review <android-gerrit@google.com> | 2015-01-29 17:29:30 +0000 |
commit | 9dbaf404293aff5b35993a9d6c7a3b45aeba1c52 (patch) | |
tree | d7ca3ab2344e6eaae0c5af176fc998b317952c23 | |
parent | b01fe55d0ee2435cf881e68989b599563ae543e8 (diff) | |
parent | 8c098653157979e397d3954fc2ea0ee43bae6ab2 (diff) | |
download | webp-9dbaf404293aff5b35993a9d6c7a3b45aeba1c52.tar.gz |
Merge "Sync-patch with libwebp ver 0.4.2"
-rw-r--r-- | ChangeLog | 1 | ||||
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | README | 2 | ||||
-rw-r--r-- | README.android | 2 | ||||
-rw-r--r-- | include/webp/decode.h | 10 | ||||
-rw-r--r-- | include/webp/encode.h | 18 | ||||
-rw-r--r-- | src/Android.mk | 2 | ||||
-rw-r--r-- | src/dec/frame.c | 2 | ||||
-rw-r--r-- | src/dec/idec.c | 6 | ||||
-rw-r--r-- | src/dec/vp8i.h | 2 | ||||
-rw-r--r-- | src/dec/vp8l.c | 9 | ||||
-rw-r--r-- | src/demux/demux.c | 2 | ||||
-rw-r--r-- | src/dsp/alpha_processing.c | 31 | ||||
-rw-r--r-- | src/dsp/alpha_processing_sse2.c | 77 | ||||
-rw-r--r-- | src/dsp/cpu.c | 2 | ||||
-rw-r--r-- | src/dsp/dsp.h | 20 | ||||
-rw-r--r-- | src/dsp/enc_neon.c | 50 | ||||
-rw-r--r-- | src/dsp/lossless.c | 13 | ||||
-rw-r--r-- | src/dsp/lossless.h | 30 | ||||
-rw-r--r-- | src/enc/alpha.c | 71 | ||||
-rw-r--r-- | src/enc/analysis.c | 6 | ||||
-rw-r--r-- | src/enc/config.c | 4 | ||||
-rw-r--r-- | src/enc/cost.h | 4 | ||||
-rw-r--r-- | src/enc/frame.c | 2 | ||||
-rw-r--r-- | src/enc/picture_csp.c | 903 | ||||
-rw-r--r-- | src/enc/vp8enci.h | 10 | ||||
-rw-r--r-- | src/enc/webpenc.c | 28 | ||||
-rw-r--r-- | src/utils/bit_reader.c | 57 | ||||
-rw-r--r-- | src/utils/bit_reader.h | 17 | ||||
-rw-r--r-- | src/utils/bit_writer.c | 10 | ||||
-rw-r--r-- | src/utils/endian_inl.h | 28 | ||||
-rw-r--r-- | src/utils/quant_levels_dec.c | 8 | ||||
-rw-r--r-- | src/utils/utils.c | 6 |
33 files changed, 1185 insertions, 249 deletions
@@ -9,3 +9,4 @@ - 9/13: Fix memleak in WebPIDelete() (change#Id4faef1b) - 1/14: Release version 0.4.0-rc1 (change#I22be12d8) - 7/14: Release version 0.4.1-rc1 (change#I5346984d2) +- 1/15: Release version 0.4.2 (change#I32a22786f) @@ -6,3 +6,4 @@ (#I737451d7f, #Ia300385a & #I9566a8e2). - 1/14: release version 0.4.0-rc1 (change#I22be12d8). - 7/14: release version 0.4.1-rc1 (change#I5346984d2). +- 1/15: release version 0.4.2 (change#I32a22786f). @@ -4,7 +4,7 @@ \__\__/\____/\_____/__/ ____ ___ / _/ / \ \ / _ \/ _/ / \_/ / / \ \ __/ \__ - \____/____/\_____/_____/____/v0.4.1 + \____/____/\_____/_____/____/v0.4.2 Description: ============ diff --git a/README.android b/README.android index a68e0a0d..81e7ac7d 100644 --- a/README.android +++ b/README.android @@ -46,6 +46,8 @@ Local modifications: - ~10% faster lossless decode - ~5-10% faster lossless encode (-m 3/4) - Arch64 (arm64) & MIPS support/optimizations. +- Sync-patch with libwebp ver 0.4.2 (change#I32a22786f). + - Cherry-picked Alpha-decoding bug. The Android.mk file creates WebP Decoder and Encoder static libraries which can be added to any application by Adding to LOCAL_STATIC_LIBRARIES diff --git a/include/webp/decode.h b/include/webp/decode.h index 36c27c37..8d3f7be9 100644 --- a/include/webp/decode.h +++ b/include/webp/decode.h @@ -444,16 +444,20 @@ struct WebPDecoderOptions { int dithering_strength; // dithering strength (0=Off, 100=full) #if WEBP_DECODER_ABI_VERSION > 0x0203 int flip; // flip output vertically +#endif +#if WEBP_DECODER_ABI_VERSION > 0x0204 int alpha_dithering_strength; // alpha dithering strength in [0..100] #endif // Unused for now: int force_rotation; // forced rotation (to be applied _last_) int no_enhancement; // if true, discard enhancement layer -#if WEBP_DECODER_ABI_VERSION > 0x0203 - uint32_t pad[3]; // padding for later use -#else +#if WEBP_DECODER_ABI_VERSION < 0x0203 uint32_t pad[5]; // padding for later use +#elif WEBP_DECODER_ABI_VERSION < 0x0204 + uint32_t pad[4]; // padding for later use +#else + uint32_t pad[3]; // padding for later use #endif }; diff --git a/include/webp/encode.h b/include/webp/encode.h index dd600568..3c263748 100644 --- a/include/webp/encode.h +++ b/include/webp/encode.h @@ -231,14 +231,14 @@ struct WebPMemoryWriter { // The following must be called first before any use. WEBP_EXTERN(void) WebPMemoryWriterInit(WebPMemoryWriter* writer); -#if WEBP_ENCODER_ABI_VERSION > 0x0202 +#if WEBP_ENCODER_ABI_VERSION > 0x0203 // The following must be called to deallocate writer->mem memory. The 'writer' // object itself is not deallocated. WEBP_EXTERN(void) WebPMemoryWriterClear(WebPMemoryWriter* writer); #endif // The custom writer to be used with WebPMemoryWriter as custom_ptr. Upon // completion, writer.mem and writer.size will hold the coded data. -#if WEBP_ENCODER_ABI_VERSION > 0x0202 +#if WEBP_ENCODER_ABI_VERSION > 0x0203 // writer.mem must be freed by calling WebPMemoryWriterClear. #else // writer.mem must be freed by calling 'free(writer.mem)'. @@ -446,13 +446,14 @@ WEBP_EXTERN(int) WebPPictureImportBGRA( WEBP_EXTERN(int) WebPPictureImportBGRX( WebPPicture* picture, const uint8_t* bgrx, int bgrx_stride); -// Converts picture->argb data to the YUVA format specified by 'colorspace'. +// Converts picture->argb data to the YUV420A format. The 'colorspace' +// parameter is deprecated and should be equal to WEBP_YUV420. // Upon return, picture->use_argb is set to false. The presence of real // non-opaque transparent values is detected, and 'colorspace' will be // adjusted accordingly. Note that this method is lossy. // Returns false in case of error. WEBP_EXTERN(int) WebPPictureARGBToYUVA(WebPPicture* picture, - WebPEncCSP colorspace); + WebPEncCSP /*colorspace = WEBP_YUV420*/); // Same as WebPPictureARGBToYUVA(), but the conversion is done using // pseudo-random dithering with a strength 'dithering' between @@ -461,6 +462,15 @@ WEBP_EXTERN(int) WebPPictureARGBToYUVA(WebPPicture* picture, WEBP_EXTERN(int) WebPPictureARGBToYUVADithered( WebPPicture* picture, WebPEncCSP colorspace, float dithering); +#if WEBP_ENCODER_ABI_VERSION > 0x0204 +// Performs 'smart' RGBA->YUVA420 downsampling and colorspace conversion. +// Downsampling is handled with extra care in case of color clipping. This +// method is roughly 2x slower than WebPPictureARGBToYUVA() but produces better +// YUV representation. +// Returns false in case of error. +WEBP_EXTERN(int) WebPPictureSmartARGBToYUVA(WebPPicture* picture); +#endif + // Converts picture->yuv to picture->argb and sets picture->use_argb to true. // The input format must be YUV_420 or YUV_420A. // Note that the use of this method is discouraged if one has access to the diff --git a/src/Android.mk b/src/Android.mk index 90303cae..027a17b4 100644 --- a/src/Android.mk +++ b/src/Android.mk @@ -38,6 +38,7 @@ LOCAL_SRC_FILES := \ enc/vp8l.c \ enc/webpenc.c \ dsp/alpha_processing.c \ + dsp/alpha_processing_sse2.c \ dsp/cpu.c \ dsp/cpu-features.c \ dsp/enc.c \ @@ -89,6 +90,7 @@ LOCAL_SRC_FILES := \ dec/vp8l.c \ dec/webp.c \ dsp/alpha_processing.c \ + dsp/alpha_processing_sse2.c \ dsp/cpu.c \ dsp/cpu-features.c \ dsp/dec.c \ diff --git a/src/dec/frame.c b/src/dec/frame.c index f7a0d1d8..2359acc5 100644 --- a/src/dec/frame.c +++ b/src/dec/frame.c @@ -177,7 +177,7 @@ void VP8InitDithering(const WebPDecoderOptions* const options, dec->dither_ = 1; } } -#if WEBP_DECODER_ABI_VERSION > 0x0203 +#if WEBP_DECODER_ABI_VERSION > 0x0204 // potentially allow alpha dithering dec->alpha_dithering_ = options->alpha_dithering_strength; if (dec->alpha_dithering_ > 100) { diff --git a/src/dec/idec.c b/src/dec/idec.c index 7bab1eab..5d8bb0c2 100644 --- a/src/dec/idec.c +++ b/src/dec/idec.c @@ -529,6 +529,12 @@ static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) { } if (!VP8LDecodeImage(dec)) { + // The decoding is called after all the data-bytes are aggregated. Change + // the error to VP8_BITSTREAM_ERROR in case lossless decoder fails to decode + // all the pixels (VP8_STATUS_SUSPENDED). + if (dec->status_ == VP8_STATUS_SUSPENDED) { + dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + } return ErrorStatusLossless(idec, dec->status_); } diff --git a/src/dec/vp8i.h b/src/dec/vp8i.h index 7cc1840f..29701be7 100644 --- a/src/dec/vp8i.h +++ b/src/dec/vp8i.h @@ -31,7 +31,7 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 0 #define DEC_MIN_VERSION 4 -#define DEC_REV_VERSION 1 +#define DEC_REV_VERSION 2 // intra prediction modes enum { B_DC_PRED = 0, // 4x4 modes diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c index 81cf99fc..e2780e5b 100644 --- a/src/dec/vp8l.c +++ b/src/dec/vp8l.c @@ -234,6 +234,7 @@ static int ReadHuffmanCodeLengths( End: VP8LHuffmanTreeFree(&tree); + if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR; return ok; } @@ -801,6 +802,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data, ok = 0; goto End; } + assert(br->eos_ == VP8LIsEndOfStream(br)); ok = !br->error_; if (!ok) goto End; } @@ -898,7 +900,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data, process_func(dec, row); } } - if (src < src_last) { + if (src < src_end) { if (col & mask) htree_group = GetHtreeGroupForPos(hdr, col, row); if (color_cache != NULL) { while (last_cached < src) { @@ -918,6 +920,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data, ok = 0; goto End; } + assert(br->eos_ == VP8LIsEndOfStream(br)); ok = !br->error_; if (!ok) goto End; } @@ -1354,6 +1357,10 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { // Sanity checks. if (dec == NULL) return 0; + dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + assert(dec->hdr_.htree_groups_ != NULL); + assert(dec->hdr_.num_htree_groups_ > 0); + io = dec->io_; assert(io != NULL); params = (WebPDecParams*)io->opaque; diff --git a/src/demux/demux.c b/src/demux/demux.c index 870c47b3..5aabdd25 100644 --- a/src/demux/demux.c +++ b/src/demux/demux.c @@ -25,7 +25,7 @@ #define DMUX_MAJ_VERSION 0 #define DMUX_MIN_VERSION 2 -#define DMUX_REV_VERSION 1 +#define DMUX_REV_VERSION 2 typedef struct { size_t start_; // start location of the data diff --git a/src/dsp/alpha_processing.c b/src/dsp/alpha_processing.c index 09deacfb..d0f7a6cc 100644 --- a/src/dsp/alpha_processing.c +++ b/src/dsp/alpha_processing.c @@ -284,15 +284,46 @@ static void ApplyAlphaMultiply_16b(uint8_t* rgba4444, #endif } +static int ExtractAlpha(const uint8_t* argb, int argb_stride, + int width, int height, + uint8_t* alpha, int alpha_stride) { + uint8_t alpha_mask = 0xff; + int i, j; + + for (j = 0; j < height; ++j) { + for (i = 0; i < width; ++i) { + const uint8_t alpha_value = argb[4 * i]; + alpha[i] = alpha_value; + alpha_mask &= alpha_value; + } + argb += argb_stride; + alpha += alpha_stride; + } + return (alpha_mask == 0xff); +} + void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int); void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int); +int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); //------------------------------------------------------------------------------ // Init function +extern void WebPInitAlphaProcessingSSE2(void); + void WebPInitAlphaProcessing(void) { WebPMultARGBRow = MultARGBRow; WebPMultRow = MultRow; WebPApplyAlphaMultiply = ApplyAlphaMultiply; WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b; + WebPExtractAlpha = ExtractAlpha; + + // If defined, use CPUInfo() to overwrite some pointers with faster versions. + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + WebPInitAlphaProcessingSSE2(); + } +#endif + } } diff --git a/src/dsp/alpha_processing_sse2.c b/src/dsp/alpha_processing_sse2.c new file mode 100644 index 00000000..3d0a9b57 --- /dev/null +++ b/src/dsp/alpha_processing_sse2.c @@ -0,0 +1,77 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Utilities for processing transparent channel. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./dsp.h" + +#if defined(WEBP_USE_SSE2) +#include <emmintrin.h> + +//------------------------------------------------------------------------------ + +static int ExtractAlpha(const uint8_t* argb, int argb_stride, + int width, int height, + uint8_t* alpha, int alpha_stride) { + // alpha_and stores an 'and' operation of all the alpha[] values. The final + // value is not 0xff if any of the alpha[] is not equal to 0xff. + uint32_t alpha_and = 0xff; + int i, j; + const __m128i a_mask = _mm_set1_epi32(0xffu); // to preserve alpha + const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u); + __m128i all_alphas = all_0xff; + + // We must be able to access 3 extra bytes after the last written byte + // 'src[4 * width - 4]', because we don't know if alpha is the first or the + // last byte of the quadruplet. + const int limit = (width - 1) & ~7; + + for (j = 0; j < height; ++j) { + const __m128i* src = (const __m128i*)argb; + for (i = 0; i < limit; i += 8) { + // load 32 argb bytes + const __m128i a0 = _mm_loadu_si128(src + 0); + const __m128i a1 = _mm_loadu_si128(src + 1); + const __m128i b0 = _mm_and_si128(a0, a_mask); + const __m128i b1 = _mm_and_si128(a1, a_mask); + const __m128i c0 = _mm_packs_epi32(b0, b1); + const __m128i d0 = _mm_packus_epi16(c0, c0); + // store + _mm_storel_epi64((__m128i*)&alpha[i], d0); + // accumulate eight alpha 'and' in parallel + all_alphas = _mm_and_si128(all_alphas, d0); + src += 2; + } + for (; i < width; ++i) { + const uint32_t alpha_value = argb[4 * i]; + alpha[i] = alpha_value; + alpha_and &= alpha_value; + } + argb += argb_stride; + alpha += alpha_stride; + } + // Combine the eight alpha 'and' into a 8-bit mask. + alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff)); + return (alpha_and == 0xff); +} + +#endif // WEBP_USE_SSE2 + +//------------------------------------------------------------------------------ +// Init function + +extern void WebPInitAlphaProcessingSSE2(void); + +void WebPInitAlphaProcessingSSE2(void) { +#if defined(WEBP_USE_SSE2) + WebPExtractAlpha = ExtractAlpha; +#endif +} diff --git a/src/dsp/cpu.c b/src/dsp/cpu.c index 581b5e30..70ba2ab0 100644 --- a/src/dsp/cpu.c +++ b/src/dsp/cpu.c @@ -57,7 +57,7 @@ static WEBP_INLINE uint64_t xgetbv(void) { } #elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 #define xgetbv() _xgetbv(0) -#elif defined(_M_IX86) +#elif defined(_MSC_VER) && defined(_M_IX86) static WEBP_INLINE uint64_t xgetbv(void) { uint32_t eax_, edx_; __asm { diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 8208da53..3b31ae08 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -32,9 +32,19 @@ extern "C" { # define LOCAL_GCC_PREREQ(maj, min) \ (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) #else +# define LOCAL_GCC_VERSION 0 # define LOCAL_GCC_PREREQ(maj, min) 0 #endif +#ifdef __clang__ +# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) +# define LOCAL_CLANG_PREREQ(maj, min) \ + (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) +#else +# define LOCAL_CLANG_VERSION 0 +# define LOCAL_CLANG_PREREQ(maj, min) 0 +#endif // __clang__ + #if defined(_MSC_VER) && _MSC_VER > 1310 && \ (defined(_M_X64) || defined(_M_IX86)) #define WEBP_MSC_SSE2 // Visual C++ SSE2 targets @@ -62,6 +72,9 @@ extern "C" { #if defined(__mips__) && !defined(__mips64) && (__mips_isa_rev < 6) #define WEBP_USE_MIPS32 +#if (__mips_isa_rev >= 2) +#define WEBP_USE_MIPS32_R2 +#endif #endif typedef enum { @@ -244,6 +257,13 @@ extern void (*WebPApplyAlphaMultiply)( extern void (*WebPApplyAlphaMultiply4444)( uint8_t* rgba4444, int w, int h, int stride); +// Extract the alpha values from 32b values in argb[] and pack them into alpha[] +// (this is the opposite of WebPDispatchAlpha). +// Returns true if there's only trivial 0xff alpha values. +extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride, + int width, int height, + uint8_t* alpha, int alpha_stride); + // Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B). // Un-Multiply operation transforms x into x * 255 / A. diff --git a/src/dsp/enc_neon.c b/src/dsp/enc_neon.c index 1e712c52..42041f73 100644 --- a/src/dsp/enc_neon.c +++ b/src/dsp/enc_neon.c @@ -253,7 +253,7 @@ static void ITransform(const uint8_t* ref, // Load all 4x4 pixels into a single uint8x16_t variable. static uint8x16_t Load4x4(const uint8_t* src) { - uint32x4_t out = { 0, 0, 0, 0 }; + uint32x4_t out = vdupq_n_u32(0); out = vld1q_lane_u32((const uint32_t*)(src + 0 * BPS), out, 0); out = vld1q_lane_u32((const uint32_t*)(src + 1 * BPS), out, 1); out = vld1q_lane_u32((const uint32_t*)(src + 2 * BPS), out, 2); @@ -929,7 +929,7 @@ static int SumToInt(uint32x4_t sum) { } static int SSE16x16(const uint8_t* a, const uint8_t* b) { - uint32x4_t sum = { 0, 0, 0, 0 }; + uint32x4_t sum = vdupq_n_u32(0); int y; for (y = 0; y < 16; ++y) { AccumulateSSE16(a + y * BPS, b + y * BPS, &sum); @@ -938,7 +938,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) { } static int SSE16x8(const uint8_t* a, const uint8_t* b) { - uint32x4_t sum = { 0, 0, 0, 0 }; + uint32x4_t sum = vdupq_n_u32(0); int y; for (y = 0; y < 8; ++y) { AccumulateSSE16(a + y * BPS, b + y * BPS, &sum); @@ -947,7 +947,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) { } static int SSE8x8(const uint8_t* a, const uint8_t* b) { - uint32x4_t sum = { 0, 0, 0, 0 }; + uint32x4_t sum = vdupq_n_u32(0); int y; for (y = 0; y < 8; ++y) { const uint8x8_t a0 = vld1_u8(a + y * BPS); @@ -970,9 +970,8 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) { //------------------------------------------------------------------------------ -// Compilation with gcc-4.6.x is problematic for now and vtbl? are unavailable -// in iOS/arm64 builds. Disable this function in those cases. -#if !(defined(WORK_AROUND_GCC) || defined(__aarch64__)) +// Compilation with gcc-4.6.x is problematic for now. +#if !defined(WORK_AROUND_GCC) static int16x8_t Quantize(int16_t* const in, const VP8Matrix* const mtx, int offset) { @@ -1002,27 +1001,44 @@ static int16x8_t Quantize(int16_t* const in, } static const uint8_t kShuffles[4][8] = { - { 0, 1, 2, 3, 8, 9, 16, 17 }, - { 10, 11, 4, 5, 6, 7, 12, 13 }, - { 18, 19, 24, 25, 26, 27, 20, 21 }, - { 14, 15, 22, 23, 28, 29, 30, 31 } + { 0, 1, 2, 3, 8, 9, 16, 17 }, + { 10, 11, 4, 5, 6, 7, 12, 13 }, + { 18, 19, 24, 25, 26, 27, 20, 21 }, + { 14, 15, 22, 23, 28, 29, 30, 31 } }; static int QuantizeBlock(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) { const int16x8_t out0 = Quantize(in, mtx, 0); const int16x8_t out1 = Quantize(in, mtx, 8); + uint8x8x4_t shuffles; + // vtbl4_u8 is marked unavailable for iOS arm64, use wider versions there. +#if defined(__APPLE__) && defined(__aarch64__) + uint8x16x2_t all_out; + INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1)); + INIT_VECTOR4(shuffles, + vtbl2q_u8(all_out, vld1_u8(kShuffles[0])), + vtbl2q_u8(all_out, vld1_u8(kShuffles[1])), + vtbl2q_u8(all_out, vld1_u8(kShuffles[2])), + vtbl2q_u8(all_out, vld1_u8(kShuffles[3]))); +#else uint8x8x4_t all_out; INIT_VECTOR4(all_out, vreinterpret_u8_s16(vget_low_s16(out0)), vreinterpret_u8_s16(vget_high_s16(out0)), vreinterpret_u8_s16(vget_low_s16(out1)), vreinterpret_u8_s16(vget_high_s16(out1))); + INIT_VECTOR4(shuffles, + vtbl4_u8(all_out, vld1_u8(kShuffles[0])), + vtbl4_u8(all_out, vld1_u8(kShuffles[1])), + vtbl4_u8(all_out, vld1_u8(kShuffles[2])), + vtbl4_u8(all_out, vld1_u8(kShuffles[3]))); +#endif // Zigzag reordering - vst1_u8((uint8_t*)(out + 0), vtbl4_u8(all_out, vld1_u8(kShuffles[0]))); - vst1_u8((uint8_t*)(out + 4), vtbl4_u8(all_out, vld1_u8(kShuffles[1]))); - vst1_u8((uint8_t*)(out + 8), vtbl4_u8(all_out, vld1_u8(kShuffles[2]))); - vst1_u8((uint8_t*)(out + 12), vtbl4_u8(all_out, vld1_u8(kShuffles[3]))); + vst1_u8((uint8_t*)(out + 0), shuffles.val[0]); + vst1_u8((uint8_t*)(out + 4), shuffles.val[1]); + vst1_u8((uint8_t*)(out + 8), shuffles.val[2]); + vst1_u8((uint8_t*)(out + 12), shuffles.val[3]); // test zeros if (*(uint64_t*)(out + 0) != 0) return 1; if (*(uint64_t*)(out + 4) != 0) return 1; @@ -1031,7 +1047,7 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], return 0; } -#endif // !WORK_AROUND_GCC && !__aarch64__ +#endif // !WORK_AROUND_GCC #endif // WEBP_USE_NEON @@ -1054,7 +1070,7 @@ void VP8EncDspInitNEON(void) { VP8SSE16x8 = SSE16x8; VP8SSE8x8 = SSE8x8; VP8SSE4x4 = SSE4x4; -#if !(defined(WORK_AROUND_GCC) || defined(__aarch64__)) +#if !defined(WORK_AROUND_GCC) VP8EncQuantizeBlock = QuantizeBlock; #endif #endif // WEBP_USE_NEON diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index 84e20784..a1bf3584 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -450,12 +450,21 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; } -static WEBP_INLINE int Sub3(int a, int b, int c) { +// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined. +#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409 +# define LOCAL_INLINE __attribute__ ((noinline)) +#else +# define LOCAL_INLINE WEBP_INLINE +#endif + +static LOCAL_INLINE int Sub3(int a, int b, int c) { const int pb = b - c; const int pa = a - c; return abs(pb) - abs(pa); } +#undef LOCAL_INLINE + static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { const int pa_minus_pb = Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + @@ -1169,7 +1178,7 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform, data += remaining_width; } ++y; - if ((y & mask) == 0) pred_row += tiles_per_row;; + if ((y & mask) == 0) pred_row += tiles_per_row; } } diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h index 03dfe223..08be9375 100644 --- a/src/dsp/lossless.h +++ b/src/dsp/lossless.h @@ -56,24 +56,20 @@ extern VP8LConvertFunc VP8LConvertBGRAToRGB565; extern VP8LConvertFunc VP8LConvertBGRAToBGR; // Expose some C-only fallback functions -extern void VP8LTransformColor_C(const VP8LMultipliers* const m, +void VP8LTransformColor_C(const VP8LMultipliers* const m, + uint32_t* data, int num_pixels); +void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data, int num_pixels); -extern void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, - uint32_t* data, int num_pixels); - -extern void VP8LConvertBGRAToRGB_C(const uint32_t* src, - int num_pixels, uint8_t* dst); -extern void VP8LConvertBGRAToRGBA_C(const uint32_t* src, - int num_pixels, uint8_t* dst); -extern void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src, - int num_pixels, uint8_t* dst); -extern void VP8LConvertBGRAToRGB565_C(const uint32_t* src, - int num_pixels, uint8_t* dst); -extern void VP8LConvertBGRAToBGR_C(const uint32_t* src, - int num_pixels, uint8_t* dst); -extern void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, - int num_pixels); -extern void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels); + +void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst); +void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst); +void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src, + int num_pixels, uint8_t* dst); +void VP8LConvertBGRAToRGB565_C(const uint32_t* src, + int num_pixels, uint8_t* dst); +void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst); +void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels); +void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels); // Must be called before calling any of the above methods. void VP8LDspInit(void); diff --git a/src/enc/alpha.c b/src/enc/alpha.c index ae4bf8ab..d624baa3 100644 --- a/src/enc/alpha.c +++ b/src/enc/alpha.c @@ -47,12 +47,11 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, int effort_level, // in [0..6] range - VP8BitWriter* const bw, + VP8LBitWriter* const bw, WebPAuxStats* const stats) { int ok = 0; WebPConfig config; WebPPicture picture; - VP8LBitWriter tmp_bw; WebPPictureInit(&picture); picture.width = width; @@ -84,16 +83,15 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, config.quality = 8.f * effort_level; assert(config.quality >= 0 && config.quality <= 100.f); - ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3); - ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK); + ok = (VP8LEncodeStream(&config, &picture, bw) == VP8_ENC_OK); WebPPictureFree(&picture); - if (ok) { - const uint8_t* const buffer = VP8LBitWriterFinish(&tmp_bw); - const size_t buffer_size = VP8LBitWriterNumBytes(&tmp_bw); - VP8BitWriterAppend(bw, buffer, buffer_size); + ok = ok && !bw->error_; + if (!ok) { + VP8LBitWriterDestroy(bw); + return 0; } - VP8LBitWriterDestroy(&tmp_bw); - return ok && !bw->error_; + return 1; + } // ----------------------------------------------------------------------------- @@ -115,8 +113,10 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, const uint8_t* alpha_src; WebPFilterFunc filter_func; uint8_t header; - size_t expected_size; const size_t data_size = width * height; + const uint8_t* output = NULL; + size_t output_size = 0; + VP8LBitWriter tmp_bw; assert((uint64_t)data_size == (uint64_t)width * height); // as per spec assert(filter >= 0 && filter < WEBP_FILTER_LAST); @@ -125,15 +125,6 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, assert(sizeof(header) == ALPHA_HEADER_LEN); // TODO(skal): have a common function and #define's to validate alpha params. - expected_size = - (method == ALPHA_NO_COMPRESSION) ? (ALPHA_HEADER_LEN + data_size) - : (data_size >> 5); - header = method | (filter << 2); - if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4; - - VP8BitWriterInit(&result->bw, expected_size); - VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN); - filter_func = WebPFilters[filter]; if (filter_func != NULL) { filter_func(data, width, height, width, tmp_alpha); @@ -142,14 +133,42 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, alpha_src = data; } + if (method != ALPHA_NO_COMPRESSION) { + ok = VP8LBitWriterInit(&tmp_bw, data_size >> 3); + ok = ok && EncodeLossless(alpha_src, width, height, effort_level, + &tmp_bw, &result->stats); + if (ok) { + output = VP8LBitWriterFinish(&tmp_bw); + output_size = VP8LBitWriterNumBytes(&tmp_bw); + if (output_size > data_size) { + // compressed size is larger than source! Revert to uncompressed mode. + method = ALPHA_NO_COMPRESSION; + VP8LBitWriterDestroy(&tmp_bw); + } + } else { + VP8LBitWriterDestroy(&tmp_bw); + return 0; + } + } + if (method == ALPHA_NO_COMPRESSION) { - ok = VP8BitWriterAppend(&result->bw, alpha_src, width * height); - ok = ok && !result->bw.error_; - } else { - ok = EncodeLossless(alpha_src, width, height, effort_level, - &result->bw, &result->stats); - VP8BitWriterFinish(&result->bw); + output = alpha_src; + output_size = data_size; + ok = 1; + } + + // Emit final result. + header = method | (filter << 2); + if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4; + + VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size); + ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN); + ok = ok && VP8BitWriterAppend(&result->bw, output, output_size); + + if (method != ALPHA_NO_COMPRESSION) { + VP8LBitWriterDestroy(&tmp_bw); } + ok = ok && !result->bw.error_; result->score = VP8BitWriterSize(&result->bw); return ok; } diff --git a/src/enc/analysis.c b/src/enc/analysis.c index 934d0912..e019465b 100644 --- a/src/enc/analysis.c +++ b/src/enc/analysis.c @@ -141,7 +141,11 @@ static void MergeHistograms(const VP8Histogram* const in, static void AssignSegments(VP8Encoder* const enc, const int alphas[MAX_ALPHA + 1]) { - const int nb = enc->segment_hdr_.num_segments_; + // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an + // explicit check is needed to avoid spurious warning about 'n + 1' exceeding + // array bounds of 'centers' with some compilers (noticed with gcc-4.9). + const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ? + enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS; int centers[NUM_MB_SEGMENTS]; int weighted_average = 0; int map[MAX_ALPHA + 1]; diff --git a/src/enc/config.c b/src/enc/config.c index 8a2eef08..58c03e48 100644 --- a/src/enc/config.c +++ b/src/enc/config.c @@ -111,7 +111,11 @@ int WebPValidateConfig(const WebPConfig* config) { return 0; if (config->show_compressed < 0 || config->show_compressed > 1) return 0; +#if WEBP_ENCODER_ABI_VERSION > 0x0204 + if (config->preprocessing < 0 || config->preprocessing > 7) +#else if (config->preprocessing < 0 || config->preprocessing > 3) +#endif return 0; if (config->partitions < 0 || config->partitions > 3) return 0; diff --git a/src/enc/cost.h b/src/enc/cost.h index 5d107569..4e558952 100644 --- a/src/enc/cost.h +++ b/src/enc/cost.h @@ -42,7 +42,7 @@ typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs, VP8Residual* const res); extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; -extern void VP8SetResidualCoeffsInit(void); // must be called first +void VP8SetResidualCoeffsInit(void); // must be called first int VP8RecordCoeffs(int ctx, const VP8Residual* const res); @@ -59,7 +59,7 @@ static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) { typedef int (*VP8GetResidualCostFunc)(int ctx0, const VP8Residual* const res); extern VP8GetResidualCostFunc VP8GetResidualCost; -extern void VP8GetResidualCostInit(void); // must be called first +void VP8GetResidualCostInit(void); // must be called first // Level cost calculations extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2]; diff --git a/src/enc/frame.c b/src/enc/frame.c index ff3cd653..6fd20bb7 100644 --- a/src/enc/frame.c +++ b/src/enc/frame.c @@ -508,7 +508,7 @@ static void StoreSideInfo(const VP8EncIterator* const it) { } case 7: *info = mb->alpha_; break; default: *info = 0; break; - }; + } } #if SEGMENT_VISU // visualize segments and prediction modes SetBlock(it->yuv_out_ + Y_OFF, mb->segment_ * 64, 16); diff --git a/src/enc/picture_csp.c b/src/enc/picture_csp.c index 7964f257..7875f625 100644 --- a/src/enc/picture_csp.c +++ b/src/enc/picture_csp.c @@ -17,11 +17,15 @@ #include "./vp8enci.h" #include "../utils/random.h" +#include "../utils/utils.h" #include "../dsp/yuv.h" // Uncomment to disable gamma-compression during RGB->U/V averaging #define USE_GAMMA_COMPRESSION +// If defined, use table to compute x / alpha. +#define USE_INVERSE_ALPHA_TABLE + static const union { uint32_t argb; uint8_t bytes[4]; @@ -70,26 +74,12 @@ int WebPPictureHasTransparency(const WebPPicture* picture) { } //------------------------------------------------------------------------------ -// RGB -> YUV conversion - -static int RGBToY(int r, int g, int b, VP8Random* const rg) { - return VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX)); -} - -static int RGBToU(int r, int g, int b, VP8Random* const rg) { - return VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); -} - -static int RGBToV(int r, int g, int b, VP8Random* const rg) { - return VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); -} - -//------------------------------------------------------------------------------ +// Code for gamma correction #if defined(USE_GAMMA_COMPRESSION) // gamma-compensates loss of resolution during chroma subsampling -#define kGamma 0.80 +#define kGamma 0.80 // for now we use a different gamma value than kGammaF #define kGammaFix 12 // fixed-point precision for linear values #define kGammaScale ((1 << kGammaFix) - 1) #define kGammaTabFix 7 // fixed-point fractional bits precision @@ -104,14 +94,14 @@ static int kGammaTablesOk = 0; static void InitGammaTables(void) { if (!kGammaTablesOk) { int v; - const double scale = 1. / kGammaScale; + const double scale = (double)(1 << kGammaTabFix) / kGammaScale; + const double norm = 1. / 255.; for (v = 0; v <= 255; ++v) { kGammaToLinearTab[v] = - (uint16_t)(pow(v / 255., kGamma) * kGammaScale + .5); + (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5); } for (v = 0; v <= kGammaTabSize; ++v) { - const double x = scale * (v << kGammaTabFix); - kLinearToGammaTab[v] = (int)(pow(x, 1. / kGamma) * 255. + .5); + kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5); } kGammaTablesOk = 1; } @@ -121,16 +111,21 @@ static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return kGammaToLinearTab[v]; } -// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision -// U/V value, suitable for RGBToU/V calls. -static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { - const int v = base_value << shift; // final uplifted value +static WEBP_INLINE int Interpolate(int v) { const int tab_pos = v >> (kGammaTabFix + 2); // integer part const int x = v & ((kGammaTabScale << 2) - 1); // fractional part const int v0 = kLinearToGammaTab[tab_pos]; const int v1 = kLinearToGammaTab[tab_pos + 1]; const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate - return (y + kGammaTabRounder) >> kGammaTabFix; // descale + assert(tab_pos + 1 < kGammaTabSize + 1); + return y; +} + +// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision +// U/V value, suitable for RGBToU/V calls. +static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { + const int y = Interpolate(base_value << shift); // final uplifted value + return (y + kGammaTabRounder) >> kGammaTabFix; // descale } #else @@ -144,28 +139,700 @@ static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { #endif // USE_GAMMA_COMPRESSION //------------------------------------------------------------------------------ +// RGB -> YUV conversion + +static int RGBToY(int r, int g, int b, VP8Random* const rg) { + return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF) + : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX)); +} + +static int RGBToU(int r, int g, int b, VP8Random* const rg) { + return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2) + : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); +} + +static int RGBToV(int r, int g, int b, VP8Random* const rg) { + return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2) + : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); +} + +//------------------------------------------------------------------------------ +// Smart RGB->YUV conversion + +static const int kNumIterations = 6; +static const int kMinDimensionIterativeConversion = 4; + +// We use a-priori a different precision for storing RGB and Y/W components +// We could use YFIX=0 and only uint8_t for fixed_y_t, but it produces some +// banding sometimes. Better use extra precision. +// TODO(skal): cleanup once TFIX/YFIX values are fixed. + +typedef int16_t fixed_t; // signed type with extra TFIX precision for UV +typedef uint16_t fixed_y_t; // unsigned type with extra YFIX precision for W +#define TFIX 6 // fixed-point precision of RGB +#define YFIX 2 // fixed point precision for Y/W + +#define THALF ((1 << TFIX) >> 1) +#define MAX_Y_T ((256 << YFIX) - 1) +#define TROUNDER (1 << (YUV_FIX + TFIX - 1)) + +#if defined(USE_GAMMA_COMPRESSION) + +// float variant of gamma-correction +// We use tables of different size and precision, along with a 'real-world' +// Gamma value close to ~2. +#define kGammaF 2.2 +static float kGammaToLinearTabF[MAX_Y_T + 1]; // size scales with Y_FIX +static float kLinearToGammaTabF[kGammaTabSize + 2]; +static int kGammaTablesFOk = 0; + +static void InitGammaTablesF(void) { + if (!kGammaTablesFOk) { + int v; + const double norm = 1. / MAX_Y_T; + const double scale = 1. / kGammaTabSize; + for (v = 0; v <= MAX_Y_T; ++v) { + kGammaToLinearTabF[v] = (float)pow(norm * v, kGammaF); + } + for (v = 0; v <= kGammaTabSize; ++v) { + kLinearToGammaTabF[v] = (float)(MAX_Y_T * pow(scale * v, 1. / kGammaF)); + } + // to prevent small rounding errors to cause read-overflow: + kLinearToGammaTabF[kGammaTabSize + 1] = kLinearToGammaTabF[kGammaTabSize]; + kGammaTablesFOk = 1; + } +} + +static WEBP_INLINE float GammaToLinearF(int v) { + return kGammaToLinearTabF[v]; +} + +static WEBP_INLINE float LinearToGammaF(float value) { + const float v = value * kGammaTabSize; + const int tab_pos = (int)v; + const float x = v - (float)tab_pos; // fractional part + const float v0 = kLinearToGammaTabF[tab_pos + 0]; + const float v1 = kLinearToGammaTabF[tab_pos + 1]; + const float y = v1 * x + v0 * (1.f - x); // interpolate + return y; +} + +#else + +static void InitGammaTablesF(void) {} +static WEBP_INLINE float GammaToLinearF(int v) { + const float norm = 1.f / MAX_Y_T; + return norm * v; +} +static WEBP_INLINE float LinearToGammaF(float value) { + return MAX_Y_T * value; +} + +#endif // USE_GAMMA_COMPRESSION + +//------------------------------------------------------------------------------ + +// precision: YFIX -> TFIX +static WEBP_INLINE int FixedYToW(int v) { +#if TFIX == YFIX + return v; +#elif TFIX >= YFIX + return v << (TFIX - YFIX); +#else + return v >> (YFIX - TFIX); +#endif +} + +static WEBP_INLINE int FixedWToY(int v) { +#if TFIX == YFIX + return v; +#elif YFIX >= TFIX + return v << (YFIX - TFIX); +#else + return v >> (TFIX - YFIX); +#endif +} + +static uint8_t clip_8b(fixed_t v) { + return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; +} + +static fixed_y_t clip_y(int y) { + return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T; +} + +// precision: TFIX -> YFIX +static fixed_y_t clip_fixed_t(fixed_t v) { + const int y = FixedWToY(v); + const fixed_y_t w = clip_y(y); + return w; +} + +//------------------------------------------------------------------------------ -#define SUM4(ptr) LinearToGamma( \ - GammaToLinear((ptr)[0]) + \ - GammaToLinear((ptr)[step]) + \ - GammaToLinear((ptr)[rgb_stride]) + \ - GammaToLinear((ptr)[rgb_stride + step]), 0) \ +static int RGBToGray(int r, int g, int b) { + const int luma = 19595 * r + 38470 * g + 7471 * b + YUV_HALF; + return (luma >> YUV_FIX); +} + +static float RGBToGrayF(float r, float g, float b) { + return 0.299f * r + 0.587f * g + 0.114f * b; +} -#define SUM2H(ptr) \ - LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[step]), 1) -#define SUM2V(ptr) \ +static float ScaleDown(int a, int b, int c, int d) { + const float A = GammaToLinearF(a); + const float B = GammaToLinearF(b); + const float C = GammaToLinearF(c); + const float D = GammaToLinearF(d); + return LinearToGammaF(0.25f * (A + B + C + D)); +} + +static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int len) { + while (len-- > 0) { + const float R = GammaToLinearF(src[0]); + const float G = GammaToLinearF(src[1]); + const float B = GammaToLinearF(src[2]); + const float Y = RGBToGrayF(R, G, B); + *dst++ = (fixed_y_t)(LinearToGammaF(Y) + .5); + src += 3; + } +} + +static WEBP_INLINE void UpdateChroma(const fixed_y_t* src1, + const fixed_y_t* src2, + fixed_t* dst, fixed_y_t* tmp, int len) { + while (len--> 0) { + const float r = ScaleDown(src1[0], src1[3], src2[0], src2[3]); + const float g = ScaleDown(src1[1], src1[4], src2[1], src2[4]); + const float b = ScaleDown(src1[2], src1[5], src2[2], src2[5]); + const float W = RGBToGrayF(r, g, b); + dst[0] = (fixed_t)FixedYToW((int)(r - W)); + dst[1] = (fixed_t)FixedYToW((int)(g - W)); + dst[2] = (fixed_t)FixedYToW((int)(b - W)); + dst += 3; + src1 += 6; + src2 += 6; + if (tmp != NULL) { + tmp[0] = tmp[1] = clip_y((int)(W + .5)); + tmp += 2; + } + } +} + +//------------------------------------------------------------------------------ + +static WEBP_INLINE int Filter(const fixed_t* const A, const fixed_t* const B, + int rightwise) { + int v; + if (!rightwise) { + v = (A[0] * 9 + A[-3] * 3 + B[0] * 3 + B[-3]); + } else { + v = (A[0] * 9 + A[+3] * 3 + B[0] * 3 + B[+3]); + } + return (v + 8) >> 4; +} + +static WEBP_INLINE int Filter2(int A, int B) { return (A * 3 + B + 2) >> 2; } + +//------------------------------------------------------------------------------ + +// 8bit -> YFIX +static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { + return ((fixed_y_t)a << YFIX) | (1 << (YFIX - 1)); +} + +static void ImportOneRow(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, + int pic_width, + fixed_y_t* const dst) { + int i; + for (i = 0; i < pic_width; ++i) { + const int off = i * step; + dst[3 * i + 0] = UpLift(r_ptr[off]); + dst[3 * i + 1] = UpLift(g_ptr[off]); + dst[3 * i + 2] = UpLift(b_ptr[off]); + } + if (pic_width & 1) { // replicate rightmost pixel + memcpy(dst + 3 * pic_width, dst + 3 * (pic_width - 1), 3 * sizeof(*dst)); + } +} + +static void InterpolateTwoRows(const fixed_y_t* const best_y, + const fixed_t* const prev_uv, + const fixed_t* const cur_uv, + const fixed_t* const next_uv, + int w, + fixed_y_t* const out1, + fixed_y_t* const out2) { + int i, k; + { // special boundary case for i==0 + const int W0 = FixedYToW(best_y[0]); + const int W1 = FixedYToW(best_y[w]); + for (k = 0; k <= 2; ++k) { + out1[k] = clip_fixed_t(Filter2(cur_uv[k], prev_uv[k]) + W0); + out2[k] = clip_fixed_t(Filter2(cur_uv[k], next_uv[k]) + W1); + } + } + for (i = 1; i < w - 1; ++i) { + const int W0 = FixedYToW(best_y[i + 0]); + const int W1 = FixedYToW(best_y[i + w]); + const int off = 3 * (i >> 1); + for (k = 0; k <= 2; ++k) { + const int tmp0 = Filter(cur_uv + off + k, prev_uv + off + k, i & 1); + const int tmp1 = Filter(cur_uv + off + k, next_uv + off + k, i & 1); + out1[3 * i + k] = clip_fixed_t(tmp0 + W0); + out2[3 * i + k] = clip_fixed_t(tmp1 + W1); + } + } + { // special boundary case for i == w - 1 + const int W0 = FixedYToW(best_y[i + 0]); + const int W1 = FixedYToW(best_y[i + w]); + const int off = 3 * (i >> 1); + for (k = 0; k <= 2; ++k) { + out1[3 * i + k] = + clip_fixed_t(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0); + out2[3 * i + k] = + clip_fixed_t(Filter2(cur_uv[off + k], next_uv[off + k]) + W1); + } + } +} + +static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) { + const int luma = 16839 * r + 33059 * g + 6420 * b + TROUNDER; + return clip_8b(16 + (luma >> (YUV_FIX + TFIX))); +} + +static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) { + const int u = -9719 * r - 19081 * g + 28800 * b + TROUNDER; + return clip_8b(128 + (u >> (YUV_FIX + TFIX))); +} + +static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) { + const int v = +28800 * r - 24116 * g - 4684 * b + TROUNDER; + return clip_8b(128 + (v >> (YUV_FIX + TFIX))); +} + +static int ConvertWRGBToYUV(const fixed_y_t* const best_y, + const fixed_t* const best_uv, + WebPPicture* const picture) { + int i, j; + const int w = (picture->width + 1) & ~1; + const int h = (picture->height + 1) & ~1; + const int uv_w = w >> 1; + const int uv_h = h >> 1; + for (j = 0; j < picture->height; ++j) { + for (i = 0; i < picture->width; ++i) { + const int off = 3 * ((i >> 1) + (j >> 1) * uv_w); + const int off2 = i + j * picture->y_stride; + const int W = FixedYToW(best_y[i + j * w]); + const int r = best_uv[off + 0] + W; + const int g = best_uv[off + 1] + W; + const int b = best_uv[off + 2] + W; + picture->y[off2] = ConvertRGBToY(r, g, b); + } + } + for (j = 0; j < uv_h; ++j) { + uint8_t* const dst_u = picture->u + j * picture->uv_stride; + uint8_t* const dst_v = picture->v + j * picture->uv_stride; + for (i = 0; i < uv_w; ++i) { + const int off = 3 * (i + j * uv_w); + const int r = best_uv[off + 0]; + const int g = best_uv[off + 1]; + const int b = best_uv[off + 2]; + dst_u[i] = ConvertRGBToU(r, g, b); + dst_v[i] = ConvertRGBToV(r, g, b); + } + } + return 1; +} + +//------------------------------------------------------------------------------ +// Main function + +#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T))) + +static int PreprocessARGB(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, int rgb_stride, + WebPPicture* const picture) { + // we expand the right/bottom border if needed + const int w = (picture->width + 1) & ~1; + const int h = (picture->height + 1) & ~1; + const int uv_w = w >> 1; + const int uv_h = h >> 1; + int i, j, iter; + + // TODO(skal): allocate one big memory chunk. But for now, it's easier + // for valgrind debugging to have several chunks. + fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch + fixed_y_t* const best_y = SAFE_ALLOC(w, h, fixed_y_t); + fixed_y_t* const target_y = SAFE_ALLOC(w, h, fixed_y_t); + fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); + fixed_t* const best_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); + fixed_t* const target_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); + fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); + int ok; + + if (best_y == NULL || best_uv == NULL || + target_y == NULL || target_uv == NULL || + best_rgb_y == NULL || best_rgb_uv == NULL || + tmp_buffer == NULL) { + ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto End; + } + assert(picture->width >= kMinDimensionIterativeConversion); + assert(picture->height >= kMinDimensionIterativeConversion); + + // Import RGB samples to W/RGB representation. + for (j = 0; j < picture->height; j += 2) { + const int is_last_row = (j == picture->height - 1); + fixed_y_t* const src1 = tmp_buffer; + fixed_y_t* const src2 = tmp_buffer + 3 * w; + const int off1 = j * rgb_stride; + const int off2 = off1 + rgb_stride; + const int uv_off = (j >> 1) * 3 * uv_w; + fixed_y_t* const dst_y = best_y + j * w; + + // prepare two rows of input + ImportOneRow(r_ptr + off1, g_ptr + off1, b_ptr + off1, + step, picture->width, src1); + if (!is_last_row) { + ImportOneRow(r_ptr + off2, g_ptr + off2, b_ptr + off2, + step, picture->width, src2); + } else { + memcpy(src2, src1, 3 * w * sizeof(*src2)); + } + UpdateW(src1, target_y + (j + 0) * w, w); + UpdateW(src2, target_y + (j + 1) * w, w); + UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w); + memcpy(best_uv + uv_off, target_uv + uv_off, 3 * uv_w * sizeof(*best_uv)); + memcpy(dst_y + w, dst_y, w * sizeof(*dst_y)); + } + + // Iterate and resolve clipping conflicts. + for (iter = 0; iter < kNumIterations; ++iter) { + int k; + const fixed_t* cur_uv = best_uv; + const fixed_t* prev_uv = best_uv; + for (j = 0; j < h; j += 2) { + fixed_y_t* const src1 = tmp_buffer; + fixed_y_t* const src2 = tmp_buffer + 3 * w; + + { + const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); + InterpolateTwoRows(best_y + j * w, prev_uv, cur_uv, next_uv, + w, src1, src2); + prev_uv = cur_uv; + cur_uv = next_uv; + } + + UpdateW(src1, best_rgb_y + 0 * w, w); + UpdateW(src2, best_rgb_y + 1 * w, w); + UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w); + + // update two rows of Y and one row of RGB + for (i = 0; i < 2 * w; ++i) { + const int off = i + j * w; + const int diff_y = target_y[off] - best_rgb_y[i]; + const int new_y = (int)best_y[off] + diff_y; + best_y[off] = clip_y(new_y); + } + for (i = 0; i < uv_w; ++i) { + const int off = 3 * (i + (j >> 1) * uv_w); + int W; + for (k = 0; k <= 2; ++k) { + const int diff_uv = (int)target_uv[off + k] - best_rgb_uv[3 * i + k]; + best_uv[off + k] += diff_uv; + } + W = RGBToGray(best_uv[off + 0], best_uv[off + 1], best_uv[off + 2]); + for (k = 0; k <= 2; ++k) { + best_uv[off + k] -= W; + } + } + } + // TODO(skal): add early-termination criterion + } + + // final reconstruction + ok = ConvertWRGBToYUV(best_y, best_uv, picture); + + End: + WebPSafeFree(best_y); + WebPSafeFree(best_uv); + WebPSafeFree(target_y); + WebPSafeFree(target_uv); + WebPSafeFree(best_rgb_y); + WebPSafeFree(best_rgb_uv); + WebPSafeFree(tmp_buffer); + return ok; +} +#undef SAFE_ALLOC + +//------------------------------------------------------------------------------ +// "Fast" regular RGB->YUV + +#define SUM4(ptr, step) LinearToGamma( \ + GammaToLinear((ptr)[0]) + \ + GammaToLinear((ptr)[(step)]) + \ + GammaToLinear((ptr)[rgb_stride]) + \ + GammaToLinear((ptr)[rgb_stride + (step)]), 0) \ + +#define SUM2(ptr) \ LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1) -#define SUM1(ptr) \ - LinearToGamma(GammaToLinear((ptr)[0]), 2) -#define RGB_TO_UV(x, y, SUM) { \ - const int src = (2 * (step * (x) + (y) * rgb_stride)); \ - const int dst = (x) + (y) * picture->uv_stride; \ - const int r = SUM(r_ptr + src); \ - const int g = SUM(g_ptr + src); \ - const int b = SUM(b_ptr + src); \ - picture->u[dst] = RGBToU(r, g, b, &rg); \ - picture->v[dst] = RGBToV(r, g, b, &rg); \ +#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride]) +#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4)) + +#if defined(USE_INVERSE_ALPHA_TABLE) + +static const int kAlphaFix = 19; +// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix +// formula is then equal to v / a in most (99.6%) cases. Note that this table +// and constant are adjusted very tightly to fit 32b arithmetic. +// In particular, they use the fact that the operands for 'v / a' are actually +// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3 +// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid +// overflow is: kGammaFix + kAlphaFix <= 31. +static const uint32_t kInvAlpha[4 * 0xff + 1] = { + 0, /* alpha = 0 */ + 524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536, + 58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768, + 30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845, + 20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384, + 15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107, + 12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922, + 10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362, + 9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192, + 8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281, + 7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553, + 6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957, + 5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461, + 5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041, + 4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681, + 4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369, + 4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096, + 4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855, + 3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640, + 3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449, + 3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276, + 3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120, + 3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978, + 2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849, + 2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730, + 2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621, + 2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520, + 2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427, + 2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340, + 2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259, + 2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184, + 2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114, + 2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048, + 2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985, + 1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927, + 1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872, + 1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820, + 1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771, + 1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724, + 1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680, + 1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638, + 1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598, + 1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560, + 1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524, + 1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489, + 1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456, + 1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424, + 1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394, + 1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365, + 1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337, + 1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310, + 1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285, + 1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260, + 1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236, + 1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213, + 1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191, + 1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170, + 1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149, + 1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129, + 1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110, + 1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092, + 1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074, + 1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057, + 1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040, + 1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024, + 1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008, + 1006, 1004, 1002, 1000, 998, 996, 994, 992, + 991, 989, 987, 985, 983, 981, 979, 978, + 976, 974, 972, 970, 969, 967, 965, 963, + 961, 960, 958, 956, 954, 953, 951, 949, + 948, 946, 944, 942, 941, 939, 937, 936, + 934, 932, 931, 929, 927, 926, 924, 923, + 921, 919, 918, 916, 914, 913, 911, 910, + 908, 907, 905, 903, 902, 900, 899, 897, + 896, 894, 893, 891, 890, 888, 887, 885, + 884, 882, 881, 879, 878, 876, 875, 873, + 872, 870, 869, 868, 866, 865, 863, 862, + 860, 859, 858, 856, 855, 853, 852, 851, + 849, 848, 846, 845, 844, 842, 841, 840, + 838, 837, 836, 834, 833, 832, 830, 829, + 828, 826, 825, 824, 823, 821, 820, 819, + 817, 816, 815, 814, 812, 811, 810, 809, + 807, 806, 805, 804, 802, 801, 800, 799, + 798, 796, 795, 794, 793, 791, 790, 789, + 788, 787, 786, 784, 783, 782, 781, 780, + 779, 777, 776, 775, 774, 773, 772, 771, + 769, 768, 767, 766, 765, 764, 763, 762, + 760, 759, 758, 757, 756, 755, 754, 753, + 752, 751, 750, 748, 747, 746, 745, 744, + 743, 742, 741, 740, 739, 738, 737, 736, + 735, 734, 733, 732, 731, 730, 729, 728, + 727, 726, 725, 724, 723, 722, 721, 720, + 719, 718, 717, 716, 715, 714, 713, 712, + 711, 710, 709, 708, 707, 706, 705, 704, + 703, 702, 701, 700, 699, 699, 698, 697, + 696, 695, 694, 693, 692, 691, 690, 689, + 688, 688, 687, 686, 685, 684, 683, 682, + 681, 680, 680, 679, 678, 677, 676, 675, + 674, 673, 673, 672, 671, 670, 669, 668, + 667, 667, 666, 665, 664, 663, 662, 661, + 661, 660, 659, 658, 657, 657, 656, 655, + 654, 653, 652, 652, 651, 650, 649, 648, + 648, 647, 646, 645, 644, 644, 643, 642, + 641, 640, 640, 639, 638, 637, 637, 636, + 635, 634, 633, 633, 632, 631, 630, 630, + 629, 628, 627, 627, 626, 625, 624, 624, + 623, 622, 621, 621, 620, 619, 618, 618, + 617, 616, 616, 615, 614, 613, 613, 612, + 611, 611, 610, 609, 608, 608, 607, 606, + 606, 605, 604, 604, 603, 602, 601, 601, + 600, 599, 599, 598, 597, 597, 596, 595, + 595, 594, 593, 593, 592, 591, 591, 590, + 589, 589, 588, 587, 587, 586, 585, 585, + 584, 583, 583, 582, 581, 581, 580, 579, + 579, 578, 578, 577, 576, 576, 575, 574, + 574, 573, 572, 572, 571, 571, 570, 569, + 569, 568, 568, 567, 566, 566, 565, 564, + 564, 563, 563, 562, 561, 561, 560, 560, + 559, 558, 558, 557, 557, 556, 555, 555, + 554, 554, 553, 553, 552, 551, 551, 550, + 550, 549, 548, 548, 547, 547, 546, 546, + 545, 544, 544, 543, 543, 542, 542, 541, + 541, 540, 539, 539, 538, 538, 537, 537, + 536, 536, 535, 534, 534, 533, 533, 532, + 532, 531, 531, 530, 530, 529, 529, 528, + 527, 527, 526, 526, 525, 525, 524, 524, + 523, 523, 522, 522, 521, 521, 520, 520, + 519, 519, 518, 518, 517, 517, 516, 516, + 515, 515, 514, 514 +}; + +// Note that LinearToGamma() expects the values to be premultiplied by 4, +// so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly. +#define DIVIDE_BY_ALPHA(sum, a) (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2)) + +#else + +#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a)) + +#endif // USE_INVERSE_ALPHA_TABLE + +static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src, + const uint8_t* a_ptr, + uint32_t total_a, int step, + int rgb_stride) { + const uint32_t sum = + a_ptr[0] * GammaToLinear(src[0]) + + a_ptr[step] * GammaToLinear(src[step]) + + a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) + + a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]); + assert(total_a > 0 && total_a <= 4 * 0xff); +#if defined(USE_INVERSE_ALPHA_TABLE) + assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32)); +#endif + return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0); +} + +static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, + uint8_t* const dst_y, + int width, + VP8Random* const rg) { + int i, j; + for (i = 0, j = 0; i < width; ++i, j += step) { + dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg); + } +} + +static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + const uint8_t* const a_ptr, + int rgb_stride, + uint8_t* const dst_u, + uint8_t* const dst_v, + int width, + VP8Random* const rg) { + int i, j; + // we loop over 2x2 blocks and produce one U/V value for each. + for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * sizeof(uint32_t)) { + const uint32_t a = SUM4ALPHA(a_ptr + j); + int r, g, b; + if (a == 4 * 0xff || a == 0) { + r = SUM4(r_ptr + j, 4); + g = SUM4(g_ptr + j, 4); + b = SUM4(b_ptr + j, 4); + } else { + r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride); + g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride); + b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride); + } + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } + if (width & 1) { + const uint32_t a = 2u * SUM2ALPHA(a_ptr + j); + int r, g, b; + if (a == 4 * 0xff || a == 0) { + r = SUM2(r_ptr + j); + g = SUM2(g_ptr + j); + b = SUM2(b_ptr + j); + } else { + r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride); + g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride); + b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride); + } + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } +} + +static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, int rgb_stride, + uint8_t* const dst_u, + uint8_t* const dst_v, + int width, + VP8Random* const rg) { + int i, j; + for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * step) { + const int r = SUM4(r_ptr + j, step); + const int g = SUM4(g_ptr + j, step); + const int b = SUM4(b_ptr + j, step); + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } + if (width & 1) { + const int r = SUM2(r_ptr + j); + const int g = SUM2(g_ptr + j); + const int b = SUM2(b_ptr + j); + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } } static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, @@ -175,59 +842,99 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, int step, // bytes per pixel int rgb_stride, // bytes per scanline float dithering, + int use_iterative_conversion, WebPPicture* const picture) { - int x, y; + int y; const int width = picture->width; const int height = picture->height; const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride); - VP8Random rg; - if (has_alpha) { - picture->colorspace |= WEBP_CSP_ALPHA_BIT; - } else { - picture->colorspace &= WEBP_CSP_UV_MASK; - } + picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420; picture->use_argb = 0; - if (!WebPPictureAllocYUVA(picture, width, height)) return 0; - - VP8InitRandom(&rg, dithering); - InitGammaTables(); + // disable smart conversion if source is too small (overkill). + if (width < kMinDimensionIterativeConversion || + height < kMinDimensionIterativeConversion) { + use_iterative_conversion = 0; + } - // Import luma plane - for (y = 0; y < height; ++y) { - uint8_t* const dst = &picture->y[y * picture->y_stride]; - for (x = 0; x < width; ++x) { - const int offset = step * x + y * rgb_stride; - dst[x] = RGBToY(r_ptr[offset], g_ptr[offset], b_ptr[offset], &rg); - } + if (!WebPPictureAllocYUVA(picture, width, height)) { + return 0; + } + if (has_alpha) { + WebPInitAlphaProcessing(); + assert(step == 4); +#if defined(USE_INVERSE_ALPHA_TABLE) + assert(kAlphaFix + kGammaFix <= 31); +#endif } - // Downsample U/V plane - for (y = 0; y < (height >> 1); ++y) { - for (x = 0; x < (width >> 1); ++x) { - RGB_TO_UV(x, y, SUM4); - } - if (width & 1) { - RGB_TO_UV(x, y, SUM2V); + if (use_iterative_conversion) { + InitGammaTablesF(); + if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) { + return 0; } - } - if (height & 1) { - for (x = 0; x < (width >> 1); ++x) { - RGB_TO_UV(x, y, SUM2H); + if (has_alpha) { + WebPExtractAlpha(a_ptr, rgb_stride, width, height, + picture->a, picture->a_stride); } - if (width & 1) { - RGB_TO_UV(x, y, SUM1); + } else { + uint8_t* dst_y = picture->y; + uint8_t* dst_u = picture->u; + uint8_t* dst_v = picture->v; + uint8_t* dst_a = picture->a; + + VP8Random base_rg; + VP8Random* rg = NULL; + if (dithering > 0.) { + VP8InitRandom(&base_rg, dithering); + rg = &base_rg; } - } - if (has_alpha) { - assert(step >= 4); - assert(picture->a != NULL); - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - picture->a[x + y * picture->a_stride] = - a_ptr[step * x + y * rgb_stride]; + InitGammaTables(); + + // Downsample Y/U/V planes, two rows at a time + for (y = 0; y < (height >> 1); ++y) { + int rows_have_alpha = has_alpha; + const int off1 = (2 * y + 0) * rgb_stride; + const int off2 = (2 * y + 1) * rgb_stride; + ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step, + dst_y, width, rg); + ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step, + dst_y + picture->y_stride, width, rg); + dst_y += 2 * picture->y_stride; + if (has_alpha) { + rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride, + width, 2, + dst_a, picture->a_stride); + dst_a += 2 * picture->a_stride; + } + if (!rows_have_alpha) { + ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1, + step, rgb_stride, dst_u, dst_v, width, rg); + } else { + ConvertRowsToUVWithAlpha(r_ptr + off1, g_ptr + off1, b_ptr + off1, + a_ptr + off1, rgb_stride, + dst_u, dst_v, width, rg); + } + dst_u += picture->uv_stride; + dst_v += picture->uv_stride; + } + if (height & 1) { // extra last row + const int off = 2 * y * rgb_stride; + int row_has_alpha = has_alpha; + ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step, + dst_y, width, rg); + if (row_has_alpha) { + row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0); + } + if (!row_has_alpha) { + ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off, + step, 0, dst_u, dst_v, width, rg); + } else { + ConvertRowsToUVWithAlpha(r_ptr + off, g_ptr + off, b_ptr + off, + a_ptr + off, 0, + dst_u, dst_v, width, rg); } } } @@ -235,19 +942,20 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, } #undef SUM4 -#undef SUM2V -#undef SUM2H -#undef SUM1 -#undef RGB_TO_UV +#undef SUM2 +#undef SUM4ALPHA +#undef SUM2ALPHA //------------------------------------------------------------------------------ // call for ARGB->YUVA conversion -int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace, - float dithering) { +static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace, + float dithering, int use_iterative_conversion) { if (picture == NULL) return 0; if (picture->argb == NULL) { return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); + } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); } else { const uint8_t* const argb = (const uint8_t*)picture->argb; const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1; @@ -255,15 +963,26 @@ int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace, const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3; const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0; - picture->colorspace = colorspace; + picture->colorspace = WEBP_YUV420; return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, - dithering, picture); + dithering, use_iterative_conversion, picture); } } +int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace, + float dithering) { + return PictureARGBToYUVA(picture, colorspace, dithering, 0); +} + int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) { - return WebPPictureARGBToYUVADithered(picture, colorspace, 0.f); + return PictureARGBToYUVA(picture, colorspace, 0.f, 0); +} + +#if WEBP_ENCODER_ABI_VERSION > 0x0204 +int WebPPictureSmartARGBToYUVA(WebPPicture* picture) { + return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1); } +#endif //------------------------------------------------------------------------------ // call for YUVA -> ARGB conversion @@ -343,7 +1062,7 @@ static int Import(WebPPicture* const picture, if (!picture->use_argb) { return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, - 0.f /* no dithering */, picture); + 0.f /* no dithering */, 0, picture); } if (!WebPPictureAlloc(picture)) return 0; diff --git a/src/enc/vp8enci.h b/src/enc/vp8enci.h index b543172e..dbc4b66f 100644 --- a/src/enc/vp8enci.h +++ b/src/enc/vp8enci.h @@ -30,7 +30,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 0 #define ENC_MIN_VERSION 4 -#define ENC_REV_VERSION 1 +#define ENC_REV_VERSION 2 // intra prediction modes enum { B_DC_PRED = 0, // 4x4 modes @@ -457,10 +457,10 @@ struct VP8Encoder { VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1) uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1) uint32_t* nz_; // non-zero bit context: mb_w+1 - uint8_t *y_top_; // top luma samples. - uint8_t *uv_top_; // top u/v samples. + uint8_t* y_top_; // top luma samples. + uint8_t* uv_top_; // top u/v samples. // U and V are packed into 16 bytes (8 U + 8 V) - LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off) + LFStats* lf_stats_; // autofilter stats (if NULL, autofilter is off) }; //------------------------------------------------------------------------------ @@ -571,7 +571,7 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); //------------------------------------------------------------------------------ -#if WEBP_ENCODER_ABI_VERSION <= 0x0202 +#if WEBP_ENCODER_ABI_VERSION <= 0x0203 void WebPMemoryWriterClear(WebPMemoryWriter* writer); #endif diff --git a/src/enc/webpenc.c b/src/enc/webpenc.c index fe8a358f..0cb83f12 100644 --- a/src/enc/webpenc.c +++ b/src/enc/webpenc.c @@ -328,16 +328,24 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { VP8Encoder* enc = NULL; if (pic->y == NULL || pic->u == NULL || pic->v == NULL) { // Make sure we have YUVA samples. - float dithering = 0.f; - if (config->preprocessing & 2) { - const float x = config->quality / 100.f; - const float x2 = x * x; - // slowly decreasing from max dithering at low quality (q->0) - // to 0.5 dithering amplitude at high quality (q->100) - dithering = 1.0f + (0.5f - 1.0f) * x2 * x2; - } - if (!WebPPictureARGBToYUVADithered(pic, WEBP_YUV420, dithering)) { - return 0; + if (config->preprocessing & 4) { +#if WEBP_ENCODER_ABI_VERSION > 0x0204 + if (!WebPPictureSmartARGBToYUVA(pic)) { + return 0; + } +#endif + } else { + float dithering = 0.f; + if (config->preprocessing & 2) { + const float x = config->quality / 100.f; + const float x2 = x * x; + // slowly decreasing from max dithering at low quality (q->0) + // to 0.5 dithering amplitude at high quality (q->100) + dithering = 1.0f + (0.5f - 1.0f) * x2 * x2; + } + if (!WebPPictureARGBToYUVADithered(pic, WEBP_YUV420, dithering)) { + return 0; + } } } diff --git a/src/utils/bit_reader.c b/src/utils/bit_reader.c index 55b08cc1..bbddd42c 100644 --- a/src/utils/bit_reader.c +++ b/src/utils/bit_reader.c @@ -105,9 +105,7 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) { //------------------------------------------------------------------------------ // VP8LBitReader -#define LBITS 64 // Number of bits prefetched. -#define WBITS 32 // Minimum number of bytes needed after VP8LFillBitWindow. -#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits. +#define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits. #if !defined(WEBP_FORCE_ALIGNED) && \ (defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \ @@ -151,16 +149,6 @@ void VP8LInitBitReader(VP8LBitReader* const br, const uint8_t* const start, br->buf_ = start; } -// Special version that assumes br->pos_ <= br_len_. -static int IsEndOfStreamSpecial(const VP8LBitReader* const br) { - assert(br->pos_ <= br->len_); - return br->pos_ == br->len_ && br->bit_pos_ >= LBITS; -} - -static int IsEndOfStream(const VP8LBitReader* const br) { - return (br->pos_ > br->len_) || IsEndOfStreamSpecial(br); -} - void VP8LBitReaderSetBuffer(VP8LBitReader* const br, const uint8_t* const buf, size_t len) { assert(br != NULL); @@ -168,38 +156,39 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br, assert(len < 0xfffffff8u); // can't happen with a RIFF chunk. br->buf_ = buf; br->len_ = len; - br->eos_ = IsEndOfStream(br); + // pos_ > len_ should be considered a param error. + br->error_ = (br->pos_ > br->len_); + br->eos_ = br->error_ || VP8LIsEndOfStream(br); } -// If not at EOS, reload up to LBITS byte-by-byte +// If not at EOS, reload up to VP8L_LBITS byte-by-byte static void ShiftBytes(VP8LBitReader* const br) { while (br->bit_pos_ >= 8 && br->pos_ < br->len_) { br->val_ >>= 8; - br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (LBITS - 8); + br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (VP8L_LBITS - 8); ++br->pos_; br->bit_pos_ -= 8; } + br->eos_ = VP8LIsEndOfStream(br); } -void VP8LFillBitWindow(VP8LBitReader* const br) { - if (br->bit_pos_ >= WBITS) { - // TODO(jzern): given the fixed read size it may be possible to force - // alignment in this block. +void VP8LDoFillBitWindow(VP8LBitReader* const br) { + assert(br->bit_pos_ >= VP8L_WBITS); + // TODO(jzern): given the fixed read size it may be possible to force + // alignment in this block. #if defined(VP8L_USE_UNALIGNED_LOAD) - if (br->pos_ + sizeof(br->val_) < br->len_) { - br->val_ >>= WBITS; - br->bit_pos_ -= WBITS; - // The expression below needs a little-endian arch to work correctly. - // This gives a large speedup for decoding speed. - br->val_ |= (vp8l_val_t)*(const uint32_t*)(br->buf_ + br->pos_) << - (LBITS - WBITS); - br->pos_ += LOG8_WBITS; - return; - } -#endif - ShiftBytes(br); // Slow path. - br->eos_ = IsEndOfStreamSpecial(br); + if (br->pos_ + sizeof(br->val_) < br->len_) { + br->val_ >>= VP8L_WBITS; + br->bit_pos_ -= VP8L_WBITS; + // The expression below needs a little-endian arch to work correctly. + // This gives a large speedup for decoding speed. + br->val_ |= (vp8l_val_t)*(const uint32_t*)(br->buf_ + br->pos_) << + (VP8L_LBITS - VP8L_WBITS); + br->pos_ += VP8L_LOG8_WBITS; + return; } +#endif + ShiftBytes(br); // Slow path. } uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) { @@ -210,8 +199,6 @@ uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) { (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits]; const int new_bits = br->bit_pos_ + n_bits; br->bit_pos_ = new_bits; - // If this read is going to cross the read buffer, set the eos flag. - br->eos_ = IsEndOfStreamSpecial(br); ShiftBytes(br); return val; } else { diff --git a/src/utils/bit_reader.h b/src/utils/bit_reader.h index 2c1e0872..a6ae85e5 100644 --- a/src/utils/bit_reader.h +++ b/src/utils/bit_reader.h @@ -107,6 +107,9 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits); // maximum number of bits (inclusive) the bit-reader can handle: #define VP8L_MAX_NUM_BIT_READ 24 +#define VP8L_LBITS 64 // Number of bits prefetched. +#define VP8L_WBITS 32 // Minimum number of bytes ready after VP8LFillBitWindow. + typedef uint64_t vp8l_val_t; // right now, this bit-reader can only use 64bit. typedef struct { @@ -138,14 +141,26 @@ static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) { return (uint32_t)(br->val_ >> br->bit_pos_); } +// Returns true if there was an attempt at reading bit past the end of +// the buffer. Doesn't set br->eos_ flag. +static WEBP_INLINE int VP8LIsEndOfStream(const VP8LBitReader* const br) { + assert(br->pos_ <= br->len_); + return (br->pos_ == br->len_) && (br->bit_pos_ > VP8L_LBITS); +} + // For jumping over a number of bits in the bit stream when accessed with // VP8LPrefetchBits and VP8LFillBitWindow. static WEBP_INLINE void VP8LSetBitPos(VP8LBitReader* const br, int val) { br->bit_pos_ = val; + br->eos_ = VP8LIsEndOfStream(br); } // Advances the read buffer by 4 bytes to make room for reading next 32 bits. -void VP8LFillBitWindow(VP8LBitReader* const br); +// Speed critical, but infrequent part of the code can be non-inlined. +extern void VP8LDoFillBitWindow(VP8LBitReader* const br); +static WEBP_INLINE void VP8LFillBitWindow(VP8LBitReader* const br) { + if (br->bit_pos_ >= VP8L_WBITS) VP8LDoFillBitWindow(br); +} #ifdef __cplusplus } // extern "C" diff --git a/src/utils/bit_writer.c b/src/utils/bit_writer.c index 23031f60..9875ca66 100644 --- a/src/utils/bit_writer.c +++ b/src/utils/bit_writer.c @@ -52,7 +52,7 @@ static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) { return 1; } -static void kFlush(VP8BitWriter* const bw) { +static void Flush(VP8BitWriter* const bw) { const int s = 8 + bw->nb_bits_; const int32_t bits = bw->value_ >> s; assert(bw->nb_bits_ >= 0); @@ -118,7 +118,7 @@ int VP8PutBit(VP8BitWriter* const bw, int bit, int prob) { bw->range_ = kNewRange[bw->range_]; bw->value_ <<= shift; bw->nb_bits_ += shift; - if (bw->nb_bits_ > 0) kFlush(bw); + if (bw->nb_bits_ > 0) Flush(bw); } return bit; } @@ -135,7 +135,7 @@ int VP8PutBitUniform(VP8BitWriter* const bw, int bit) { bw->range_ = kNewRange[bw->range_]; bw->value_ <<= 1; bw->nb_bits_ += 1; - if (bw->nb_bits_ > 0) kFlush(bw); + if (bw->nb_bits_ > 0) Flush(bw); } return bit; } @@ -173,14 +173,14 @@ int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size) { uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw) { VP8PutValue(bw, 0, 9 - bw->nb_bits_); bw->nb_bits_ = 0; // pad with zeroes - kFlush(bw); + Flush(bw); return bw->buf_; } int VP8BitWriterAppend(VP8BitWriter* const bw, const uint8_t* data, size_t size) { assert(data != NULL); - if (bw->nb_bits_ != -8) return 0; // kFlush() must have been called + if (bw->nb_bits_ != -8) return 0; // Flush() must have been called if (!BitWriterResize(bw, size)) return 0; memcpy(bw->buf_ + bw->pos_, data, size); bw->pos_ += size; diff --git a/src/utils/endian_inl.h b/src/utils/endian_inl.h index 4c6b4fe4..f362a6e8 100644 --- a/src/utils/endian_inl.h +++ b/src/utils/endian_inl.h @@ -16,6 +16,7 @@ #include "webp/config.h" #endif +#include "../dsp/dsp.h" #include "webp/types.h" // some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) @@ -34,25 +35,13 @@ #endif #if !defined(HAVE_CONFIG_H) -#ifdef __GNUC__ -# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) -#else -# define LOCAL_GCC_VERSION 0 -#endif // __GNUC__ - -#ifdef __clang__ -# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) -#else -# define LOCAL_CLANG_VERSION 0 -#endif // __clang__ - // clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64 -#if LOCAL_GCC_VERSION >= 0x403 || LOCAL_CLANG_VERSION >= 0x303 +#if LOCAL_GCC_PREREQ(4,3) || LOCAL_CLANG_PREREQ(3,3) #define HAVE_BUILTIN_BSWAP32 #define HAVE_BUILTIN_BSWAP64 #endif // clang-3.3 and gcc-4.8 have a builtin function for swap16 -#if LOCAL_GCC_VERSION >= 0x408 || LOCAL_CLANG_VERSION >= 0x303 +#if LOCAL_GCC_PREREQ(4,8) || LOCAL_CLANG_PREREQ(3,3) #define HAVE_BUILTIN_BSWAP16 #endif #endif // !HAVE_CONFIG_H @@ -69,7 +58,16 @@ static WEBP_INLINE uint16_t BSwap16(uint16_t x) { } static WEBP_INLINE uint32_t BSwap32(uint32_t x) { -#if defined(HAVE_BUILTIN_BSWAP32) +#if defined(WEBP_USE_MIPS32_R2) + uint32_t ret; + __asm__ volatile ( + "wsbh %[ret], %[x] \n\t" + "rotr %[ret], %[ret], 16 \n\t" + : [ret]"=r"(ret) + : [x]"r"(x) + ); + return ret; +#elif defined(HAVE_BUILTIN_BSWAP32) return __builtin_bswap32(x); #elif defined(__i386__) || defined(__x86_64__) uint32_t swapped_bytes; diff --git a/src/utils/quant_levels_dec.c b/src/utils/quant_levels_dec.c index c599e40a..5b8b8b49 100644 --- a/src/utils/quant_levels_dec.c +++ b/src/utils/quant_levels_dec.c @@ -32,10 +32,10 @@ #define DSIZE 4 // dithering size (must be a power of two) // cf. http://en.wikipedia.org/wiki/Ordered_dithering static const uint8_t kOrderedDither[DSIZE][DSIZE] = { - { 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision - { 12, 4, 14, 6 }, - { 3, 11, 1, 9 }, - { 15, 7, 13, 5 } + { 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision + { 12, 4, 14, 6 }, + { 3, 11, 1, 9 }, + { 15, 7, 13, 5 } }; #else diff --git a/src/utils/utils.c b/src/utils/utils.c index 4a86886e..8ff7f12f 100644 --- a/src/utils/utils.c +++ b/src/utils/utils.c @@ -155,9 +155,9 @@ static void SubMem(void* ptr) { } #else -#define Increment(v) do {} while(0) -#define AddMem(p, s) do {} while(0) -#define SubMem(p) do {} while(0) +#define Increment(v) do {} while (0) +#define AddMem(p, s) do {} while (0) +#define SubMem(p) do {} while (0) #endif // Returns 0 in case of overflow of nmemb * size. |