diff options
author | Jonathan Wright <jonathan.wright@arm.com> | 2021-01-12 11:33:28 +0000 |
---|---|---|
committer | Jonathan Wright <jonathan.wright@arm.com> | 2021-01-12 13:37:36 +0000 |
commit | 518d81558c797486e125e37cb529d65b560a6ea0 (patch) | |
tree | 9672a5c5cac55fcac716ee135f4583486c67e268 | |
parent | 09efc26aff7983f4377a1743a197ca3d74796d7d (diff) | |
download | libjpeg-turbo-518d81558c797486e125e37cb529d65b560a6ea0.tar.gz |
Cherry-pick Arm CLZ fixes from upstream
Cherry-pick two patches from upstream that fix the Neon intrinsics
Huffman encoding path and reduce the memory footprint on Windows on
Arm:
https://github.com/libjpeg-turbo/libjpeg-turbo/commit/d2c407995992be1f128704ae2479adfd7906c158
https://github.com/libjpeg-turbo/libjpeg-turbo/commit/74e6ea45e3547ae85cd43efcdfc24a6907a2154e
Re-enable the Neon intrinsics Huffman encoding path for WoA compiled
with clang-cl.
Bug: 1160249
Change-Id: I0849ca54b8f4f8f38c9b293ea48c9de1c60be86f
-rw-r--r-- | README.chromium | 7 | ||||
-rw-r--r-- | jchuff.c | 8 | ||||
-rw-r--r-- | jcphuff.c | 9 | ||||
-rw-r--r-- | simd/arm/aarch64/jchuff-neon.c | 6 | ||||
-rw-r--r-- | simd/arm/aarch64/jsimd.c | 3 | ||||
-rw-r--r-- | simd/arm/jcphuff-neon.c | 4 | ||||
-rw-r--r-- | simd/arm/neon-compat.h | 6 | ||||
-rw-r--r-- | simd/arm/neon-compat.h.in | 6 |
8 files changed, 27 insertions, 22 deletions
diff --git a/README.chromium b/README.chromium index b0a16238..e8b0bc67 100644 --- a/README.chromium +++ b/README.chromium @@ -14,11 +14,12 @@ This consists of the components: * An OWNERS file * A codereview.settings file * Patched header files used by Chromium -* Cherry-picked three additional patches from upstream master to fix bugs found - by fuzzers: +* Additional patches cherry-picked from upstream master to fix various bugs: https://github.com/libjpeg-turbo/libjpeg-turbo/commit/ccaba5d7894ecfb5a8f11e48d3f86e1f14d5a469 https://github.com/libjpeg-turbo/libjpeg-turbo/commit/c7ca521bc85b57d41d3ad4963c13fc0100481084 https://github.com/libjpeg-turbo/libjpeg-turbo/commit/110d8d6dcafaed517e8f77a6253169535ee3a20e + https://github.com/libjpeg-turbo/libjpeg-turbo/commit/d2c407995992be1f128704ae2479adfd7906c158 + https://github.com/libjpeg-turbo/libjpeg-turbo/commit/74e6ea45e3547ae85cd43efcdfc24a6907a2154e * Deleted unused directories: ci, cmakescripts, doc, java, release, sharedlib, simd/loongson, simd/mips, simd/powerpc, and win * Deleted unused files: appveyor.yml, CMakeLists.txt, doxygen.config, @@ -73,8 +74,6 @@ following changes which are not merged to upstream: - Refactor djpeg.c to provide test interface A new gtest directory contains GTest wrappers (and associated utilities) for each of tjunittest, tjbench, cjpeg, djpeg and jpegtran. -* Disable Neon SIMD path for Huffman encoding when compiling for Windows on Arm - using Clang-cl: http://crbug.com/1160249 Refer to working-with-nested-repos [1] for details of how to setup your git svn client to update the code (for making local changes, cherry picking from @@ -44,15 +44,19 @@ * flags (this defines __thumb__). */ -/* NOTE: Both GCC and Clang define __GNUC__ */ -#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__)) +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \ + defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif #endif #ifdef USE_CLZ_INTRINSIC +#if defined(_MSC_VER) && !defined(__clang__) +#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x)) +#else #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#endif #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) #else #include "jpeg_nbits_table.h" @@ -6,6 +6,7 @@ * libjpeg-turbo Modifications: * Copyright (C) 2011, 2015, 2018, D. R. Commander. * Copyright (C) 2016, 2018, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -51,15 +52,19 @@ * flags (this defines __thumb__). */ -/* NOTE: Both GCC and Clang define __GNUC__ */ -#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__)) +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \ + defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif #endif #ifdef USE_CLZ_INTRINSIC +#if defined(_MSC_VER) && !defined(__clang__) +#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x)) +#else #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#endif #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) #else #include "jpeg_nbits_table.h" diff --git a/simd/arm/aarch64/jchuff-neon.c b/simd/arm/aarch64/jchuff-neon.c index a0a57a66..f13fd1b5 100644 --- a/simd/arm/aarch64/jchuff-neon.c +++ b/simd/arm/aarch64/jchuff-neon.c @@ -1,7 +1,7 @@ /* * jchuff-neon.c - Huffman entropy encoding (64-bit Arm Neon) * - * Copyright (C) 2020, Arm Limited. All Rights Reserved. + * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. * Copyright (C) 2020, D. R. Commander. All Rights Reserved. * * This software is provided 'as-is', without any express or implied @@ -331,7 +331,7 @@ JOCTET *jsimd_huff_encode_one_block_neon(void *state, JOCTET *buffer, vst1q_u16(block_diff + 7 * DCTSIZE, row7_diff); while (bitmap != 0) { - r = BUILTIN_CLZL(bitmap); + r = BUILTIN_CLZLL(bitmap); i += r; bitmap <<= r; nbits = block_nbits[i]; @@ -370,7 +370,7 @@ JOCTET *jsimd_huff_encode_one_block_neon(void *state, JOCTET *buffer, /* Same as above but must mask diff bits and compute nbits on demand. */ while (bitmap != 0) { - r = BUILTIN_CLZL(bitmap); + r = BUILTIN_CLZLL(bitmap); i += r; bitmap <<= r; lz = BUILTIN_CLZ(block_abs[i]); diff --git a/simd/arm/aarch64/jsimd.c b/simd/arm/aarch64/jsimd.c index 4991bc0a..8570b82c 100644 --- a/simd/arm/aarch64/jsimd.c +++ b/simd/arm/aarch64/jsimd.c @@ -977,8 +977,6 @@ jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, GLOBAL(int) jsimd_can_huff_encode_one_block(void) { -/* Disable for Windows on Arm compiled with Clang-cl: crbug.com/1160249 */ -#if !(defined(_MSC_VER) && defined(__clang__)) init_simd(); if (DCTSIZE != 8) @@ -988,7 +986,6 @@ jsimd_can_huff_encode_one_block(void) if (simd_support & JSIMD_NEON && simd_huffman) return 1; -#endif return 0; } diff --git a/simd/arm/jcphuff-neon.c b/simd/arm/jcphuff-neon.c index 8b6d53be..86a263fa 100644 --- a/simd/arm/jcphuff-neon.c +++ b/simd/arm/jcphuff-neon.c @@ -1,7 +1,7 @@ /* * jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon) * - * Copyright (C) 2020, Arm Limited. All Rights Reserved. + * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -572,7 +572,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon /* EOB position is defined to be 0 if all coefficients != 1. */ return 0; } else { - return 63 - BUILTIN_CLZL(bitmap); + return 63 - BUILTIN_CLZLL(bitmap); } #else /* Move bitmap to two 32-bit scalar registers. */ diff --git a/simd/arm/neon-compat.h b/simd/arm/neon-compat.h index 3ce3bcbe..543d8600 100644 --- a/simd/arm/neon-compat.h +++ b/simd/arm/neon-compat.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2020, D. R. Commander. All Rights Reserved. - * Copyright (C) 2020, Arm Limited. All Rights Reserved. + * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -28,10 +28,10 @@ /* Define compiler-independent count-leading-zeros macros */ #if defined(_MSC_VER) && !defined(__clang__) #define BUILTIN_CLZ(x) _CountLeadingZeros(x) -#define BUILTIN_CLZL(x) _CountLeadingZeros64(x) +#define BUILTIN_CLZLL(x) _CountLeadingZeros64(x) #elif defined(__clang__) || defined(__GNUC__) #define BUILTIN_CLZ(x) __builtin_clz(x) -#define BUILTIN_CLZL(x) __builtin_clzl(x) +#define BUILTIN_CLZLL(x) __builtin_clzll(x) #else #error "Unknown compiler" #endif diff --git a/simd/arm/neon-compat.h.in b/simd/arm/neon-compat.h.in index e2347b9b..23d6d28c 100644 --- a/simd/arm/neon-compat.h.in +++ b/simd/arm/neon-compat.h.in @@ -1,6 +1,6 @@ /* * Copyright (C) 2020, D. R. Commander. All Rights Reserved. - * Copyright (C) 2020, Arm Limited. All Rights Reserved. + * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -26,10 +26,10 @@ /* Define compiler-independent count-leading-zeros macros */ #if defined(_MSC_VER) && !defined(__clang__) #define BUILTIN_CLZ(x) _CountLeadingZeros(x) -#define BUILTIN_CLZL(x) _CountLeadingZeros64(x) +#define BUILTIN_CLZLL(x) _CountLeadingZeros64(x) #elif defined(__clang__) || defined(__GNUC__) #define BUILTIN_CLZ(x) __builtin_clz(x) -#define BUILTIN_CLZL(x) __builtin_clzl(x) +#define BUILTIN_CLZLL(x) __builtin_clzll(x) #else #error "Unknown compiler" #endif |