diff options
author | Elliott Hughes <enh@google.com> | 2021-07-15 22:24:53 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2021-07-15 22:24:53 +0000 |
commit | 38c581ad0a759827dc223eab9271942882e632ba (patch) | |
tree | 5b6d5909153f1616f407a0c4bd037747367011f0 | |
parent | a9020c06a34b341c5badf22ff6799293e728bd7b (diff) | |
parent | f1ea27aab6803cbf6bb202d2fdd188a122e32b44 (diff) | |
download | libjpeg-turbo-38c581ad0a759827dc223eab9271942882e632ba.tar.gz |
Upgrade libjpeg-turbo to ad8b3b0f84baf155f3bde5626c3bf9d20535bcae am: 3457af89e3 am: bbd5cf7276 am: 189297ae25 am: f1ea27aab6
Original change: https://android-review.googlesource.com/c/platform/external/libjpeg-turbo/+/1768605
Change-Id: I019171003d990a22f38a8c7ad194cb9b712c26a9
-rw-r--r-- | ChangeLog.md | 36 | ||||
-rw-r--r-- | METADATA | 4 | ||||
-rw-r--r-- | README.chromium | 4 | ||||
-rw-r--r-- | jchuff.c | 5 | ||||
-rw-r--r-- | jcmaster.c | 2 | ||||
-rw-r--r-- | jconfigint.h | 10 | ||||
-rw-r--r-- | jconfigint.h.in | 10 | ||||
-rw-r--r-- | jcphuff.c | 5 | ||||
-rw-r--r-- | jdapimin.c | 3 | ||||
-rw-r--r-- | jdmainct.c | 5 | ||||
-rw-r--r-- | simd/arm/jchuff.h | 20 | ||||
-rw-r--r-- | simd/arm/jcphuff-neon.c | 31 | ||||
-rw-r--r-- | simd/arm/jdcolext-neon.c | 21 | ||||
-rw-r--r-- | simd/arm/jdcolor-neon.c | 1 | ||||
-rw-r--r-- | simd/arm/jdmerge-neon.c | 1 | ||||
-rw-r--r-- | simd/arm/jdmrgext-neon.c | 56 | ||||
-rw-r--r-- | simd/arm/neon-compat.h | 2 | ||||
-rw-r--r-- | simd/arm/neon-compat.h.in | 2 |
18 files changed, 190 insertions, 28 deletions
diff --git a/ChangeLog.md b/ChangeLog.md index 498b8f27..ca5208be 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,21 @@ +2.1.1 +===== + +### Significant changes relative to 2.1.0 + +1. Fixed a regression introduced in 2.1.0 that caused build failures with +non-GCC-compatible compilers for Un*x/Arm platforms. + +2. Fixed a regression introduced by 2.1 beta1[13] that prevented the Arm 32-bit +(AArch32) Neon SIMD extensions from building unless the C compiler flags +included `-mfloat-abi=softfp` or `-mfloat-abi=hard`. + +3. Fixed an issue in the AArch32 Neon SIMD Huffman encoder whereby reliance on +undefined C compiler behavior led to crashes ("SIGBUS: illegal alignment") on +Android systems when running AArch32/Thumb builds of libjpeg-turbo built with +recent versions of Clang. + + 2.1.0 ===== @@ -284,15 +302,15 @@ JPEG images. This was known to cause a buffer overflow when attempting to decompress some such images using `tjDecompressToYUV2()` or `tjDecompressToYUVPlanes()`. -5. Fixed an issue, detected by ASan, whereby attempting to losslessly transform -a specially-crafted malformed JPEG image containing an extremely-high-frequency -coefficient block (junk image data that could never be generated by a -legitimate JPEG compressor) could cause the Huffman encoder's local buffer to -be overrun. (Refer to 1.4.0[9] and 1.4beta1[15].) Given that the buffer -overrun was fully contained within the stack and did not cause a segfault or -other user-visible errant behavior, and given that the lossless transformer -(unlike the decompressor) is not generally exposed to arbitrary data exploits, -this issue did not likely pose a security risk. +5. Fixed an issue (CVE-2020-17541), detected by ASan, whereby attempting to +losslessly transform a specially-crafted malformed JPEG image containing an +extremely-high-frequency coefficient block (junk image data that could never be +generated by a legitimate JPEG compressor) could cause the Huffman encoder's +local buffer to be overrun. (Refer to 1.4.0[9] and 1.4beta1[15].) Given that +the buffer overrun was fully contained within the stack and did not cause a +segfault or other user-visible errant behavior, and given that the lossless +transformer (unlike the decompressor) is not generally exposed to arbitrary +data exploits, this issue did not likely pose a security risk. 6. The Arm 64-bit (Armv8) Neon SIMD assembly code now stores constants in a separate read-only data section rather than in the text section, to support @@ -5,11 +5,11 @@ third_party { type: GIT value: "https://chromium.googlesource.com/chromium/deps/libjpeg_turbo" } - version: "b7bef8c05b7cdb1a038ae271a2c2b6647af4c879" + version: "ad8b3b0f84baf155f3bde5626c3bf9d20535bcae" license_type: NOTICE last_upgrade_date { year: 2021 month: 7 - day: 8 + day: 15 } } diff --git a/README.chromium b/README.chromium index d260cb2c..de1fe85e 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libjpeg-turbo URL: https://github.com/libjpeg-turbo/libjpeg-turbo/ -Version: 2.1.0 +Version: b201838d8b5f2f80c9f86ec8405a62a002232b2c (post 2.1.0) License: Custom license License File: LICENSE.md Security Critical: yes @@ -8,7 +8,7 @@ License Android Compatible: yes Description: This consists of the components: -* libjpeg-turbo 2.1.0 +* libjpeg-turbo b201838d8b5f2f80c9f86ec8405a62a002232b2c (post 2.1.0) * This file (README.chromium) * A build file (BUILD.gn) * An OWNERS file @@ -44,8 +44,9 @@ * flags (this defines __thumb__). */ -#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \ - defined(_M_ARM64) +/* NOTE: Both GCC and Clang define __GNUC__ */ +#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \ + defined(_M_ARM) || defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif @@ -493,7 +493,7 @@ prepare_for_pass(j_compress_ptr cinfo) master->pass_type = output_pass; master->pass_number++; #endif - /*FALLTHROUGH*/ + FALLTHROUGH /*FALLTHROUGH*/ case output_pass: /* Do a data-output pass. */ /* We need not repeat per-scan setup if prior optimization pass did it. */ diff --git a/jconfigint.h b/jconfigint.h index 4f23dffb..cb9915c9 100644 --- a/jconfigint.h +++ b/jconfigint.h @@ -52,3 +52,13 @@ #define HAVEBITSCANFORWARD #endif #endif + +#if defined(__has_attribute) +#if __has_attribute(fallthrough) +#define FALLTHROUGH __attribute__((fallthrough)); +#else +#define FALLTHROUGH +#endif +#else +#define FALLTHROUGH +#endif diff --git a/jconfigint.h.in b/jconfigint.h.in index 68cbc2a5..d087d7b5 100644 --- a/jconfigint.h.in +++ b/jconfigint.h.in @@ -32,3 +32,13 @@ #define HAVE_BITSCANFORWARD #endif #endif + +#if defined(__has_attribute) +#if __has_attribute(fallthrough) +#define FALLTHROUGH __attribute__((fallthrough)); +#else +#define FALLTHROUGH +#endif +#else +#define FALLTHROUGH +#endif @@ -52,8 +52,9 @@ * flags (this defines __thumb__). */ -#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \ - defined(_M_ARM64) +/* NOTE: Both GCC and Clang define __GNUC__ */ +#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \ + defined(_M_ARM) || defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif @@ -23,6 +23,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jdmaster.h" +#include "jconfigint.h" /* @@ -308,7 +309,7 @@ jpeg_consume_input(j_decompress_ptr cinfo) /* Initialize application's data source module */ (*cinfo->src->init_source) (cinfo); cinfo->global_state = DSTATE_INHEADER; - /*FALLTHROUGH*/ + FALLTHROUGH /*FALLTHROUGH*/ case DSTATE_INHEADER: retcode = (*cinfo->inputctl->consume_input) (cinfo); if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */ @@ -18,6 +18,7 @@ #include "jinclude.h" #include "jdmainct.h" +#include "jconfigint.h" /* @@ -360,7 +361,7 @@ process_data_context_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf, main_ptr->context_state = CTX_PREPARE_FOR_IMCU; if (*out_row_ctr >= out_rows_avail) return; /* Postprocessor exactly filled output buf */ - /*FALLTHROUGH*/ + FALLTHROUGH /*FALLTHROUGH*/ case CTX_PREPARE_FOR_IMCU: /* Prepare to process first M-1 row groups of this iMCU row */ main_ptr->rowgroup_ctr = 0; @@ -371,7 +372,7 @@ process_data_context_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf, if (main_ptr->iMCU_row_ctr == cinfo->total_iMCU_rows) set_bottom_pointers(cinfo); main_ptr->context_state = CTX_PROCESS_IMCU; - /*FALLTHROUGH*/ + FALLTHROUGH /*FALLTHROUGH*/ case CTX_PROCESS_IMCU: /* Call postprocessor using previously set pointers */ (*cinfo->post->post_process_data) (cinfo, diff --git a/simd/arm/jchuff.h b/simd/arm/jchuff.h index d4edd5eb..2fbd252b 100644 --- a/simd/arm/jchuff.h +++ b/simd/arm/jchuff.h @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2009, 2018, D. R. Commander. + * Copyright (C) 2009, 2018, 2021, D. R. Commander. * Copyright (C) 2018, Matthias Räncker. * Copyright (C) 2020-2021, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg @@ -74,6 +74,21 @@ typedef struct { #else +#if defined(_MSC_VER) && !defined(__clang__) +#define SPLAT() { \ + buffer[0] = (JOCTET)(put_buffer >> 24); \ + buffer[1] = (JOCTET)(put_buffer >> 16); \ + buffer[2] = (JOCTET)(put_buffer >> 8); \ + buffer[3] = (JOCTET)(put_buffer ); \ + buffer += 4; \ +} +#else +#define SPLAT() { \ + put_buffer = __builtin_bswap32(put_buffer); \ + __asm__("str %1, [%0], #4" : "+r" (buffer) : "r" (put_buffer)); \ +} +#endif + #define FLUSH() { \ if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \ EMIT_BYTE(put_buffer >> 24) \ @@ -81,8 +96,7 @@ typedef struct { EMIT_BYTE(put_buffer >> 8) \ EMIT_BYTE(put_buffer ) \ } else { \ - *((uint32_t *)buffer) = BUILTIN_BSWAP32(put_buffer); \ - buffer += 4; \ + SPLAT(); \ } \ } diff --git a/simd/arm/jcphuff-neon.c b/simd/arm/jcphuff-neon.c index 86a263fa..b91c5db4 100644 --- a/simd/arm/jcphuff-neon.c +++ b/simd/arm/jcphuff-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "jconfigint.h" #include "../../jinclude.h" #include "../../jpeglib.h" #include "../../jsimd.h" @@ -105,18 +106,25 @@ void jsimd_encode_mcu_AC_first_prepare_neon switch (remaining_coefs) { case 15: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[14], coefs2, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[13], coefs2, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[12], coefs2, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[11], coefs2, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[10], coefs2, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[9], coefs2, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[8], coefs2, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -149,20 +157,28 @@ void jsimd_encode_mcu_AC_first_prepare_neon switch (remaining_coefs) { case 8: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[7], coefs, 7); + FALLTHROUGH /*FALLTHROUGH*/ case 7: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[6], coefs, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[5], coefs, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[4], coefs, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[3], coefs, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[2], coefs, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[1], coefs, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[0], coefs, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -337,18 +353,25 @@ int jsimd_encode_mcu_AC_refine_prepare_neon switch (remaining_coefs) { case 15: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[14], coefs2, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[13], coefs2, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[12], coefs2, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[11], coefs2, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[10], coefs2, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[9], coefs2, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[8], coefs2, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -389,20 +412,28 @@ int jsimd_encode_mcu_AC_refine_prepare_neon switch (remaining_coefs) { case 8: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[7], coefs, 7); + FALLTHROUGH /*FALLTHROUGH*/ case 7: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[6], coefs, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[5], coefs, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[4], coefs, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[3], coefs, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[2], coefs, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[1], coefs, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[0], coefs, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } diff --git a/simd/arm/jdcolext-neon.c b/simd/arm/jdcolext-neon.c index ae440f45..c3c07a19 100644 --- a/simd/arm/jdcolext-neon.c +++ b/simd/arm/jdcolext-neon.c @@ -283,18 +283,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf, switch (cols_remaining) { case 7: vst4_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgba, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst4_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgba, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst4_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgba, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst4_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgba, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst4_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgba, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst4_lane_u8(outptr + RGB_PIXELSIZE, rgba, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst4_lane_u8(outptr, rgba, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -308,18 +315,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf, switch (cols_remaining) { case 7: vst3_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgb, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst3_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgb, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst3_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgb, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst3_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgb, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst3_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgb, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst3_lane_u8(outptr + RGB_PIXELSIZE, rgb, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst3_lane_u8(outptr, rgb, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -332,18 +346,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf, switch (cols_remaining) { case 7: vst1q_lane_u16((uint16_t *)(outptr + 6 * RGB_PIXELSIZE), rgb565, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst1q_lane_u16((uint16_t *)(outptr + 5 * RGB_PIXELSIZE), rgb565, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst1q_lane_u16((uint16_t *)(outptr + 4 * RGB_PIXELSIZE), rgb565, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst1q_lane_u16((uint16_t *)(outptr + 3 * RGB_PIXELSIZE), rgb565, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst1q_lane_u16((uint16_t *)(outptr + 2 * RGB_PIXELSIZE), rgb565, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst1q_lane_u16((uint16_t *)(outptr + RGB_PIXELSIZE), rgb565, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst1q_lane_u16((uint16_t *)outptr, rgb565, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } diff --git a/simd/arm/jdcolor-neon.c b/simd/arm/jdcolor-neon.c index 28dbc572..ea4668f1 100644 --- a/simd/arm/jdcolor-neon.c +++ b/simd/arm/jdcolor-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "jconfigint.h" #include "../../jinclude.h" #include "../../jpeglib.h" #include "../../jsimd.h" diff --git a/simd/arm/jdmerge-neon.c b/simd/arm/jdmerge-neon.c index 18fb9d8a..e4f91fdc 100644 --- a/simd/arm/jdmerge-neon.c +++ b/simd/arm/jdmerge-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "jconfigint.h" #include "../../jinclude.h" #include "../../jpeglib.h" #include "../../jsimd.h" diff --git a/simd/arm/jdmrgext-neon.c b/simd/arm/jdmrgext-neon.c index fa2ec056..5b89bdb3 100644 --- a/simd/arm/jdmrgext-neon.c +++ b/simd/arm/jdmrgext-neon.c @@ -226,35 +226,49 @@ void jsimd_h2v1_merged_upsample_neon(JDIMENSION output_width, switch (cols_remaining) { case 15: vst4_lane_u8(outptr + 14 * RGB_PIXELSIZE, rgba_h, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: vst4_lane_u8(outptr + 13 * RGB_PIXELSIZE, rgba_h, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: vst4_lane_u8(outptr + 12 * RGB_PIXELSIZE, rgba_h, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: vst4_lane_u8(outptr + 11 * RGB_PIXELSIZE, rgba_h, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: vst4_lane_u8(outptr + 10 * RGB_PIXELSIZE, rgba_h, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: vst4_lane_u8(outptr + 9 * RGB_PIXELSIZE, rgba_h, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: vst4_lane_u8(outptr + 8 * RGB_PIXELSIZE, rgba_h, 0); + FALLTHROUGH /*FALLTHROUGH*/ case 8: vst4_u8(outptr, rgba_l); break; case 7: vst4_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgba_l, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst4_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgba_l, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst4_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgba_l, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst4_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgba_l, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst4_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgba_l, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst4_lane_u8(outptr + RGB_PIXELSIZE, rgba_l, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst4_lane_u8(outptr, rgba_l, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -271,35 +285,49 @@ void jsimd_h2v1_merged_upsample_neon(JDIMENSION output_width, switch (cols_remaining) { case 15: vst3_lane_u8(outptr + 14 * RGB_PIXELSIZE, rgb_h, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: vst3_lane_u8(outptr + 13 * RGB_PIXELSIZE, rgb_h, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: vst3_lane_u8(outptr + 12 * RGB_PIXELSIZE, rgb_h, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: vst3_lane_u8(outptr + 11 * RGB_PIXELSIZE, rgb_h, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: vst3_lane_u8(outptr + 10 * RGB_PIXELSIZE, rgb_h, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: vst3_lane_u8(outptr + 9 * RGB_PIXELSIZE, rgb_h, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: vst3_lane_u8(outptr + 8 * RGB_PIXELSIZE, rgb_h, 0); + FALLTHROUGH /*FALLTHROUGH*/ case 8: vst3_u8(outptr, rgb_l); break; case 7: vst3_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgb_l, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst3_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgb_l, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst3_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgb_l, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst3_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgb_l, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst3_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgb_l, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst3_lane_u8(outptr + RGB_PIXELSIZE, rgb_l, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst3_lane_u8(outptr, rgb_l, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -549,24 +577,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width, case 15: vst4_lane_u8(outptr0 + 14 * RGB_PIXELSIZE, rgba0_h, 6); vst4_lane_u8(outptr1 + 14 * RGB_PIXELSIZE, rgba1_h, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: vst4_lane_u8(outptr0 + 13 * RGB_PIXELSIZE, rgba0_h, 5); vst4_lane_u8(outptr1 + 13 * RGB_PIXELSIZE, rgba1_h, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: vst4_lane_u8(outptr0 + 12 * RGB_PIXELSIZE, rgba0_h, 4); vst4_lane_u8(outptr1 + 12 * RGB_PIXELSIZE, rgba1_h, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: vst4_lane_u8(outptr0 + 11 * RGB_PIXELSIZE, rgba0_h, 3); vst4_lane_u8(outptr1 + 11 * RGB_PIXELSIZE, rgba1_h, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: vst4_lane_u8(outptr0 + 10 * RGB_PIXELSIZE, rgba0_h, 2); vst4_lane_u8(outptr1 + 10 * RGB_PIXELSIZE, rgba1_h, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: vst4_lane_u8(outptr0 + 9 * RGB_PIXELSIZE, rgba0_h, 1); vst4_lane_u8(outptr1 + 9 * RGB_PIXELSIZE, rgba1_h, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: vst4_lane_u8(outptr0 + 8 * RGB_PIXELSIZE, rgba0_h, 0); vst4_lane_u8(outptr1 + 8 * RGB_PIXELSIZE, rgba1_h, 0); + FALLTHROUGH /*FALLTHROUGH*/ case 8: vst4_u8(outptr0, rgba0_l); vst4_u8(outptr1, rgba1_l); @@ -574,24 +609,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width, case 7: vst4_lane_u8(outptr0 + 6 * RGB_PIXELSIZE, rgba0_l, 6); vst4_lane_u8(outptr1 + 6 * RGB_PIXELSIZE, rgba1_l, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst4_lane_u8(outptr0 + 5 * RGB_PIXELSIZE, rgba0_l, 5); vst4_lane_u8(outptr1 + 5 * RGB_PIXELSIZE, rgba1_l, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst4_lane_u8(outptr0 + 4 * RGB_PIXELSIZE, rgba0_l, 4); vst4_lane_u8(outptr1 + 4 * RGB_PIXELSIZE, rgba1_l, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst4_lane_u8(outptr0 + 3 * RGB_PIXELSIZE, rgba0_l, 3); vst4_lane_u8(outptr1 + 3 * RGB_PIXELSIZE, rgba1_l, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst4_lane_u8(outptr0 + 2 * RGB_PIXELSIZE, rgba0_l, 2); vst4_lane_u8(outptr1 + 2 * RGB_PIXELSIZE, rgba1_l, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst4_lane_u8(outptr0 + 1 * RGB_PIXELSIZE, rgba0_l, 1); vst4_lane_u8(outptr1 + 1 * RGB_PIXELSIZE, rgba1_l, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst4_lane_u8(outptr0, rgba0_l, 0); vst4_lane_u8(outptr1, rgba1_l, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -616,24 +658,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width, case 15: vst3_lane_u8(outptr0 + 14 * RGB_PIXELSIZE, rgb0_h, 6); vst3_lane_u8(outptr1 + 14 * RGB_PIXELSIZE, rgb1_h, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: vst3_lane_u8(outptr0 + 13 * RGB_PIXELSIZE, rgb0_h, 5); vst3_lane_u8(outptr1 + 13 * RGB_PIXELSIZE, rgb1_h, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: vst3_lane_u8(outptr0 + 12 * RGB_PIXELSIZE, rgb0_h, 4); vst3_lane_u8(outptr1 + 12 * RGB_PIXELSIZE, rgb1_h, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: vst3_lane_u8(outptr0 + 11 * RGB_PIXELSIZE, rgb0_h, 3); vst3_lane_u8(outptr1 + 11 * RGB_PIXELSIZE, rgb1_h, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: vst3_lane_u8(outptr0 + 10 * RGB_PIXELSIZE, rgb0_h, 2); vst3_lane_u8(outptr1 + 10 * RGB_PIXELSIZE, rgb1_h, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: vst3_lane_u8(outptr0 + 9 * RGB_PIXELSIZE, rgb0_h, 1); vst3_lane_u8(outptr1 + 9 * RGB_PIXELSIZE, rgb1_h, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: vst3_lane_u8(outptr0 + 8 * RGB_PIXELSIZE, rgb0_h, 0); vst3_lane_u8(outptr1 + 8 * RGB_PIXELSIZE, rgb1_h, 0); + FALLTHROUGH /*FALLTHROUGH*/ case 8: vst3_u8(outptr0, rgb0_l); vst3_u8(outptr1, rgb1_l); @@ -641,24 +690,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width, case 7: vst3_lane_u8(outptr0 + 6 * RGB_PIXELSIZE, rgb0_l, 6); vst3_lane_u8(outptr1 + 6 * RGB_PIXELSIZE, rgb1_l, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst3_lane_u8(outptr0 + 5 * RGB_PIXELSIZE, rgb0_l, 5); vst3_lane_u8(outptr1 + 5 * RGB_PIXELSIZE, rgb1_l, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst3_lane_u8(outptr0 + 4 * RGB_PIXELSIZE, rgb0_l, 4); vst3_lane_u8(outptr1 + 4 * RGB_PIXELSIZE, rgb1_l, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst3_lane_u8(outptr0 + 3 * RGB_PIXELSIZE, rgb0_l, 3); vst3_lane_u8(outptr1 + 3 * RGB_PIXELSIZE, rgb1_l, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst3_lane_u8(outptr0 + 2 * RGB_PIXELSIZE, rgb0_l, 2); vst3_lane_u8(outptr1 + 2 * RGB_PIXELSIZE, rgb1_l, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst3_lane_u8(outptr0 + 1 * RGB_PIXELSIZE, rgb0_l, 1); vst3_lane_u8(outptr1 + 1 * RGB_PIXELSIZE, rgb1_l, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst3_lane_u8(outptr0, rgb0_l, 0); vst3_lane_u8(outptr1, rgb1_l, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } diff --git a/simd/arm/neon-compat.h b/simd/arm/neon-compat.h index 3d77527c..73c57aec 100644 --- a/simd/arm/neon-compat.h +++ b/simd/arm/neon-compat.h @@ -29,12 +29,10 @@ #if defined(_MSC_VER) && !defined(__clang__) #define BUILTIN_CLZ(x) _CountLeadingZeros(x) #define BUILTIN_CLZLL(x) _CountLeadingZeros64(x) -#define BUILTIN_BSWAP32(x) _byteswap_ulong(x) #define BUILTIN_BSWAP64(x) _byteswap_uint64(x) #elif defined(__clang__) || defined(__GNUC__) #define BUILTIN_CLZ(x) __builtin_clz(x) #define BUILTIN_CLZLL(x) __builtin_clzll(x) -#define BUILTIN_BSWAP32(x) __builtin_bswap32(x) #define BUILTIN_BSWAP64(x) __builtin_bswap64(x) #else #error "Unknown compiler" diff --git a/simd/arm/neon-compat.h.in b/simd/arm/neon-compat.h.in index 436c402a..d403f228 100644 --- a/simd/arm/neon-compat.h.in +++ b/simd/arm/neon-compat.h.in @@ -27,12 +27,10 @@ #if defined(_MSC_VER) && !defined(__clang__) #define BUILTIN_CLZ(x) _CountLeadingZeros(x) #define BUILTIN_CLZLL(x) _CountLeadingZeros64(x) -#define BUILTIN_BSWAP32(x) _byteswap_ulong(x) #define BUILTIN_BSWAP64(x) _byteswap_uint64(x) #elif defined(__clang__) || defined(__GNUC__) #define BUILTIN_CLZ(x) __builtin_clz(x) #define BUILTIN_CLZLL(x) __builtin_clzll(x) -#define BUILTIN_BSWAP32(x) __builtin_bswap32(x) #define BUILTIN_BSWAP64(x) __builtin_bswap64(x) #else #error "Unknown compiler" |