aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2021-07-15 21:26:39 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2021-07-15 21:26:39 +0000
commitbbd5cf7276693c4bf2572d80845c8d283639bfa2 (patch)
tree5b6d5909153f1616f407a0c4bd037747367011f0
parenta14c12e1b430f4247c1d241db28ba88502ae73fb (diff)
parent3457af89e30cd924f687c412c6797aa63ed0c1e3 (diff)
downloadlibjpeg-turbo-bbd5cf7276693c4bf2572d80845c8d283639bfa2.tar.gz
Upgrade libjpeg-turbo to ad8b3b0f84baf155f3bde5626c3bf9d20535bcae am: 3457af89e3
Original change: https://android-review.googlesource.com/c/platform/external/libjpeg-turbo/+/1768605 Change-Id: I2f65f72ea3ed30bf91a5fee1704d5e0858a9d009
-rw-r--r--ChangeLog.md36
-rw-r--r--METADATA4
-rw-r--r--README.chromium4
-rw-r--r--jchuff.c5
-rw-r--r--jcmaster.c2
-rw-r--r--jconfigint.h10
-rw-r--r--jconfigint.h.in10
-rw-r--r--jcphuff.c5
-rw-r--r--jdapimin.c3
-rw-r--r--jdmainct.c5
-rw-r--r--simd/arm/jchuff.h20
-rw-r--r--simd/arm/jcphuff-neon.c31
-rw-r--r--simd/arm/jdcolext-neon.c21
-rw-r--r--simd/arm/jdcolor-neon.c1
-rw-r--r--simd/arm/jdmerge-neon.c1
-rw-r--r--simd/arm/jdmrgext-neon.c56
-rw-r--r--simd/arm/neon-compat.h2
-rw-r--r--simd/arm/neon-compat.h.in2
18 files changed, 190 insertions, 28 deletions
diff --git a/ChangeLog.md b/ChangeLog.md
index 498b8f27..ca5208be 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -1,3 +1,21 @@
+2.1.1
+=====
+
+### Significant changes relative to 2.1.0
+
+1. Fixed a regression introduced in 2.1.0 that caused build failures with
+non-GCC-compatible compilers for Un*x/Arm platforms.
+
+2. Fixed a regression introduced by 2.1 beta1[13] that prevented the Arm 32-bit
+(AArch32) Neon SIMD extensions from building unless the C compiler flags
+included `-mfloat-abi=softfp` or `-mfloat-abi=hard`.
+
+3. Fixed an issue in the AArch32 Neon SIMD Huffman encoder whereby reliance on
+undefined C compiler behavior led to crashes ("SIGBUS: illegal alignment") on
+Android systems when running AArch32/Thumb builds of libjpeg-turbo built with
+recent versions of Clang.
+
+
2.1.0
=====
@@ -284,15 +302,15 @@ JPEG images. This was known to cause a buffer overflow when attempting to
decompress some such images using `tjDecompressToYUV2()` or
`tjDecompressToYUVPlanes()`.
-5. Fixed an issue, detected by ASan, whereby attempting to losslessly transform
-a specially-crafted malformed JPEG image containing an extremely-high-frequency
-coefficient block (junk image data that could never be generated by a
-legitimate JPEG compressor) could cause the Huffman encoder's local buffer to
-be overrun. (Refer to 1.4.0[9] and 1.4beta1[15].) Given that the buffer
-overrun was fully contained within the stack and did not cause a segfault or
-other user-visible errant behavior, and given that the lossless transformer
-(unlike the decompressor) is not generally exposed to arbitrary data exploits,
-this issue did not likely pose a security risk.
+5. Fixed an issue (CVE-2020-17541), detected by ASan, whereby attempting to
+losslessly transform a specially-crafted malformed JPEG image containing an
+extremely-high-frequency coefficient block (junk image data that could never be
+generated by a legitimate JPEG compressor) could cause the Huffman encoder's
+local buffer to be overrun. (Refer to 1.4.0[9] and 1.4beta1[15].) Given that
+the buffer overrun was fully contained within the stack and did not cause a
+segfault or other user-visible errant behavior, and given that the lossless
+transformer (unlike the decompressor) is not generally exposed to arbitrary
+data exploits, this issue did not likely pose a security risk.
6. The Arm 64-bit (Armv8) Neon SIMD assembly code now stores constants in a
separate read-only data section rather than in the text section, to support
diff --git a/METADATA b/METADATA
index 9623a961..8d4fd50a 100644
--- a/METADATA
+++ b/METADATA
@@ -5,11 +5,11 @@ third_party {
type: GIT
value: "https://chromium.googlesource.com/chromium/deps/libjpeg_turbo"
}
- version: "b7bef8c05b7cdb1a038ae271a2c2b6647af4c879"
+ version: "ad8b3b0f84baf155f3bde5626c3bf9d20535bcae"
license_type: NOTICE
last_upgrade_date {
year: 2021
month: 7
- day: 8
+ day: 15
}
}
diff --git a/README.chromium b/README.chromium
index d260cb2c..de1fe85e 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
Name: libjpeg-turbo
URL: https://github.com/libjpeg-turbo/libjpeg-turbo/
-Version: 2.1.0
+Version: b201838d8b5f2f80c9f86ec8405a62a002232b2c (post 2.1.0)
License: Custom license
License File: LICENSE.md
Security Critical: yes
@@ -8,7 +8,7 @@ License Android Compatible: yes
Description:
This consists of the components:
-* libjpeg-turbo 2.1.0
+* libjpeg-turbo b201838d8b5f2f80c9f86ec8405a62a002232b2c (post 2.1.0)
* This file (README.chromium)
* A build file (BUILD.gn)
* An OWNERS file
diff --git a/jchuff.c b/jchuff.c
index 2bce767e..8ff817b1 100644
--- a/jchuff.c
+++ b/jchuff.c
@@ -44,8 +44,9 @@
* flags (this defines __thumb__).
*/
-#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
- defined(_M_ARM64)
+/* NOTE: Both GCC and Clang define __GNUC__ */
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
+ defined(_M_ARM) || defined(_M_ARM64)
#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
diff --git a/jcmaster.c b/jcmaster.c
index 998dc40a..c2b26000 100644
--- a/jcmaster.c
+++ b/jcmaster.c
@@ -493,7 +493,7 @@ prepare_for_pass(j_compress_ptr cinfo)
master->pass_type = output_pass;
master->pass_number++;
#endif
- /*FALLTHROUGH*/
+ FALLTHROUGH /*FALLTHROUGH*/
case output_pass:
/* Do a data-output pass. */
/* We need not repeat per-scan setup if prior optimization pass did it. */
diff --git a/jconfigint.h b/jconfigint.h
index 4f23dffb..cb9915c9 100644
--- a/jconfigint.h
+++ b/jconfigint.h
@@ -52,3 +52,13 @@
#define HAVEBITSCANFORWARD
#endif
#endif
+
+#if defined(__has_attribute)
+#if __has_attribute(fallthrough)
+#define FALLTHROUGH __attribute__((fallthrough));
+#else
+#define FALLTHROUGH
+#endif
+#else
+#define FALLTHROUGH
+#endif
diff --git a/jconfigint.h.in b/jconfigint.h.in
index 68cbc2a5..d087d7b5 100644
--- a/jconfigint.h.in
+++ b/jconfigint.h.in
@@ -32,3 +32,13 @@
#define HAVE_BITSCANFORWARD
#endif
#endif
+
+#if defined(__has_attribute)
+#if __has_attribute(fallthrough)
+#define FALLTHROUGH __attribute__((fallthrough));
+#else
+#define FALLTHROUGH
+#endif
+#else
+#define FALLTHROUGH
+#endif
diff --git a/jcphuff.c b/jcphuff.c
index bd14fc27..9bf96124 100644
--- a/jcphuff.c
+++ b/jcphuff.c
@@ -52,8 +52,9 @@
* flags (this defines __thumb__).
*/
-#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
- defined(_M_ARM64)
+/* NOTE: Both GCC and Clang define __GNUC__ */
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
+ defined(_M_ARM) || defined(_M_ARM64)
#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
diff --git a/jdapimin.c b/jdapimin.c
index 21a41d2e..4609b132 100644
--- a/jdapimin.c
+++ b/jdapimin.c
@@ -23,6 +23,7 @@
#include "jinclude.h"
#include "jpeglib.h"
#include "jdmaster.h"
+#include "jconfigint.h"
/*
@@ -308,7 +309,7 @@ jpeg_consume_input(j_decompress_ptr cinfo)
/* Initialize application's data source module */
(*cinfo->src->init_source) (cinfo);
cinfo->global_state = DSTATE_INHEADER;
- /*FALLTHROUGH*/
+ FALLTHROUGH /*FALLTHROUGH*/
case DSTATE_INHEADER:
retcode = (*cinfo->inputctl->consume_input) (cinfo);
if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
diff --git a/jdmainct.c b/jdmainct.c
index 50301d6b..f466b259 100644
--- a/jdmainct.c
+++ b/jdmainct.c
@@ -18,6 +18,7 @@
#include "jinclude.h"
#include "jdmainct.h"
+#include "jconfigint.h"
/*
@@ -360,7 +361,7 @@ process_data_context_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
if (*out_row_ctr >= out_rows_avail)
return; /* Postprocessor exactly filled output buf */
- /*FALLTHROUGH*/
+ FALLTHROUGH /*FALLTHROUGH*/
case CTX_PREPARE_FOR_IMCU:
/* Prepare to process first M-1 row groups of this iMCU row */
main_ptr->rowgroup_ctr = 0;
@@ -371,7 +372,7 @@ process_data_context_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
if (main_ptr->iMCU_row_ctr == cinfo->total_iMCU_rows)
set_bottom_pointers(cinfo);
main_ptr->context_state = CTX_PROCESS_IMCU;
- /*FALLTHROUGH*/
+ FALLTHROUGH /*FALLTHROUGH*/
case CTX_PROCESS_IMCU:
/* Call postprocessor using previously set pointers */
(*cinfo->post->post_process_data) (cinfo,
diff --git a/simd/arm/jchuff.h b/simd/arm/jchuff.h
index d4edd5eb..2fbd252b 100644
--- a/simd/arm/jchuff.h
+++ b/simd/arm/jchuff.h
@@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2018, D. R. Commander.
+ * Copyright (C) 2009, 2018, 2021, D. R. Commander.
* Copyright (C) 2018, Matthias Räncker.
* Copyright (C) 2020-2021, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
@@ -74,6 +74,21 @@ typedef struct {
#else
+#if defined(_MSC_VER) && !defined(__clang__)
+#define SPLAT() { \
+ buffer[0] = (JOCTET)(put_buffer >> 24); \
+ buffer[1] = (JOCTET)(put_buffer >> 16); \
+ buffer[2] = (JOCTET)(put_buffer >> 8); \
+ buffer[3] = (JOCTET)(put_buffer ); \
+ buffer += 4; \
+}
+#else
+#define SPLAT() { \
+ put_buffer = __builtin_bswap32(put_buffer); \
+ __asm__("str %1, [%0], #4" : "+r" (buffer) : "r" (put_buffer)); \
+}
+#endif
+
#define FLUSH() { \
if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \
EMIT_BYTE(put_buffer >> 24) \
@@ -81,8 +96,7 @@ typedef struct {
EMIT_BYTE(put_buffer >> 8) \
EMIT_BYTE(put_buffer ) \
} else { \
- *((uint32_t *)buffer) = BUILTIN_BSWAP32(put_buffer); \
- buffer += 4; \
+ SPLAT(); \
} \
}
diff --git a/simd/arm/jcphuff-neon.c b/simd/arm/jcphuff-neon.c
index 86a263fa..b91c5db4 100644
--- a/simd/arm/jcphuff-neon.c
+++ b/simd/arm/jcphuff-neon.c
@@ -21,6 +21,7 @@
*/
#define JPEG_INTERNALS
+#include "jconfigint.h"
#include "../../jinclude.h"
#include "../../jpeglib.h"
#include "../../jsimd.h"
@@ -105,18 +106,25 @@ void jsimd_encode_mcu_AC_first_prepare_neon
switch (remaining_coefs) {
case 15:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[14], coefs2, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[13], coefs2, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[12], coefs2, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[11], coefs2, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[10], coefs2, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[9], coefs2, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[8], coefs2, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -149,20 +157,28 @@ void jsimd_encode_mcu_AC_first_prepare_neon
switch (remaining_coefs) {
case 8:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[7], coefs, 7);
+ FALLTHROUGH /*FALLTHROUGH*/
case 7:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[6], coefs, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[5], coefs, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[4], coefs, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[3], coefs, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[2], coefs, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[1], coefs, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[0], coefs, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -337,18 +353,25 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
switch (remaining_coefs) {
case 15:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[14], coefs2, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[13], coefs2, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[12], coefs2, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[11], coefs2, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[10], coefs2, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[9], coefs2, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[8], coefs2, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -389,20 +412,28 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
switch (remaining_coefs) {
case 8:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[7], coefs, 7);
+ FALLTHROUGH /*FALLTHROUGH*/
case 7:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[6], coefs, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[5], coefs, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[4], coefs, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[3], coefs, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[2], coefs, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[1], coefs, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[0], coefs, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
diff --git a/simd/arm/jdcolext-neon.c b/simd/arm/jdcolext-neon.c
index ae440f45..c3c07a19 100644
--- a/simd/arm/jdcolext-neon.c
+++ b/simd/arm/jdcolext-neon.c
@@ -283,18 +283,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf,
switch (cols_remaining) {
case 7:
vst4_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgba, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst4_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgba, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst4_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgba, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst4_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgba, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst4_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgba, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst4_lane_u8(outptr + RGB_PIXELSIZE, rgba, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst4_lane_u8(outptr, rgba, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -308,18 +315,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf,
switch (cols_remaining) {
case 7:
vst3_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgb, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst3_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgb, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst3_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgb, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst3_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgb, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst3_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgb, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst3_lane_u8(outptr + RGB_PIXELSIZE, rgb, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst3_lane_u8(outptr, rgb, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -332,18 +346,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf,
switch (cols_remaining) {
case 7:
vst1q_lane_u16((uint16_t *)(outptr + 6 * RGB_PIXELSIZE), rgb565, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst1q_lane_u16((uint16_t *)(outptr + 5 * RGB_PIXELSIZE), rgb565, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst1q_lane_u16((uint16_t *)(outptr + 4 * RGB_PIXELSIZE), rgb565, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst1q_lane_u16((uint16_t *)(outptr + 3 * RGB_PIXELSIZE), rgb565, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst1q_lane_u16((uint16_t *)(outptr + 2 * RGB_PIXELSIZE), rgb565, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst1q_lane_u16((uint16_t *)(outptr + RGB_PIXELSIZE), rgb565, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst1q_lane_u16((uint16_t *)outptr, rgb565, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
diff --git a/simd/arm/jdcolor-neon.c b/simd/arm/jdcolor-neon.c
index 28dbc572..ea4668f1 100644
--- a/simd/arm/jdcolor-neon.c
+++ b/simd/arm/jdcolor-neon.c
@@ -21,6 +21,7 @@
*/
#define JPEG_INTERNALS
+#include "jconfigint.h"
#include "../../jinclude.h"
#include "../../jpeglib.h"
#include "../../jsimd.h"
diff --git a/simd/arm/jdmerge-neon.c b/simd/arm/jdmerge-neon.c
index 18fb9d8a..e4f91fdc 100644
--- a/simd/arm/jdmerge-neon.c
+++ b/simd/arm/jdmerge-neon.c
@@ -21,6 +21,7 @@
*/
#define JPEG_INTERNALS
+#include "jconfigint.h"
#include "../../jinclude.h"
#include "../../jpeglib.h"
#include "../../jsimd.h"
diff --git a/simd/arm/jdmrgext-neon.c b/simd/arm/jdmrgext-neon.c
index fa2ec056..5b89bdb3 100644
--- a/simd/arm/jdmrgext-neon.c
+++ b/simd/arm/jdmrgext-neon.c
@@ -226,35 +226,49 @@ void jsimd_h2v1_merged_upsample_neon(JDIMENSION output_width,
switch (cols_remaining) {
case 15:
vst4_lane_u8(outptr + 14 * RGB_PIXELSIZE, rgba_h, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
vst4_lane_u8(outptr + 13 * RGB_PIXELSIZE, rgba_h, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
vst4_lane_u8(outptr + 12 * RGB_PIXELSIZE, rgba_h, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
vst4_lane_u8(outptr + 11 * RGB_PIXELSIZE, rgba_h, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
vst4_lane_u8(outptr + 10 * RGB_PIXELSIZE, rgba_h, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
vst4_lane_u8(outptr + 9 * RGB_PIXELSIZE, rgba_h, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
vst4_lane_u8(outptr + 8 * RGB_PIXELSIZE, rgba_h, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
case 8:
vst4_u8(outptr, rgba_l);
break;
case 7:
vst4_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgba_l, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst4_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgba_l, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst4_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgba_l, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst4_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgba_l, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst4_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgba_l, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst4_lane_u8(outptr + RGB_PIXELSIZE, rgba_l, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst4_lane_u8(outptr, rgba_l, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -271,35 +285,49 @@ void jsimd_h2v1_merged_upsample_neon(JDIMENSION output_width,
switch (cols_remaining) {
case 15:
vst3_lane_u8(outptr + 14 * RGB_PIXELSIZE, rgb_h, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
vst3_lane_u8(outptr + 13 * RGB_PIXELSIZE, rgb_h, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
vst3_lane_u8(outptr + 12 * RGB_PIXELSIZE, rgb_h, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
vst3_lane_u8(outptr + 11 * RGB_PIXELSIZE, rgb_h, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
vst3_lane_u8(outptr + 10 * RGB_PIXELSIZE, rgb_h, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
vst3_lane_u8(outptr + 9 * RGB_PIXELSIZE, rgb_h, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
vst3_lane_u8(outptr + 8 * RGB_PIXELSIZE, rgb_h, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
case 8:
vst3_u8(outptr, rgb_l);
break;
case 7:
vst3_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgb_l, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst3_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgb_l, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst3_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgb_l, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst3_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgb_l, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst3_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgb_l, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst3_lane_u8(outptr + RGB_PIXELSIZE, rgb_l, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst3_lane_u8(outptr, rgb_l, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -549,24 +577,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width,
case 15:
vst4_lane_u8(outptr0 + 14 * RGB_PIXELSIZE, rgba0_h, 6);
vst4_lane_u8(outptr1 + 14 * RGB_PIXELSIZE, rgba1_h, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
vst4_lane_u8(outptr0 + 13 * RGB_PIXELSIZE, rgba0_h, 5);
vst4_lane_u8(outptr1 + 13 * RGB_PIXELSIZE, rgba1_h, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
vst4_lane_u8(outptr0 + 12 * RGB_PIXELSIZE, rgba0_h, 4);
vst4_lane_u8(outptr1 + 12 * RGB_PIXELSIZE, rgba1_h, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
vst4_lane_u8(outptr0 + 11 * RGB_PIXELSIZE, rgba0_h, 3);
vst4_lane_u8(outptr1 + 11 * RGB_PIXELSIZE, rgba1_h, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
vst4_lane_u8(outptr0 + 10 * RGB_PIXELSIZE, rgba0_h, 2);
vst4_lane_u8(outptr1 + 10 * RGB_PIXELSIZE, rgba1_h, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
vst4_lane_u8(outptr0 + 9 * RGB_PIXELSIZE, rgba0_h, 1);
vst4_lane_u8(outptr1 + 9 * RGB_PIXELSIZE, rgba1_h, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
vst4_lane_u8(outptr0 + 8 * RGB_PIXELSIZE, rgba0_h, 0);
vst4_lane_u8(outptr1 + 8 * RGB_PIXELSIZE, rgba1_h, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
case 8:
vst4_u8(outptr0, rgba0_l);
vst4_u8(outptr1, rgba1_l);
@@ -574,24 +609,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width,
case 7:
vst4_lane_u8(outptr0 + 6 * RGB_PIXELSIZE, rgba0_l, 6);
vst4_lane_u8(outptr1 + 6 * RGB_PIXELSIZE, rgba1_l, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst4_lane_u8(outptr0 + 5 * RGB_PIXELSIZE, rgba0_l, 5);
vst4_lane_u8(outptr1 + 5 * RGB_PIXELSIZE, rgba1_l, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst4_lane_u8(outptr0 + 4 * RGB_PIXELSIZE, rgba0_l, 4);
vst4_lane_u8(outptr1 + 4 * RGB_PIXELSIZE, rgba1_l, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst4_lane_u8(outptr0 + 3 * RGB_PIXELSIZE, rgba0_l, 3);
vst4_lane_u8(outptr1 + 3 * RGB_PIXELSIZE, rgba1_l, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst4_lane_u8(outptr0 + 2 * RGB_PIXELSIZE, rgba0_l, 2);
vst4_lane_u8(outptr1 + 2 * RGB_PIXELSIZE, rgba1_l, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst4_lane_u8(outptr0 + 1 * RGB_PIXELSIZE, rgba0_l, 1);
vst4_lane_u8(outptr1 + 1 * RGB_PIXELSIZE, rgba1_l, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst4_lane_u8(outptr0, rgba0_l, 0);
vst4_lane_u8(outptr1, rgba1_l, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -616,24 +658,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width,
case 15:
vst3_lane_u8(outptr0 + 14 * RGB_PIXELSIZE, rgb0_h, 6);
vst3_lane_u8(outptr1 + 14 * RGB_PIXELSIZE, rgb1_h, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
vst3_lane_u8(outptr0 + 13 * RGB_PIXELSIZE, rgb0_h, 5);
vst3_lane_u8(outptr1 + 13 * RGB_PIXELSIZE, rgb1_h, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
vst3_lane_u8(outptr0 + 12 * RGB_PIXELSIZE, rgb0_h, 4);
vst3_lane_u8(outptr1 + 12 * RGB_PIXELSIZE, rgb1_h, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
vst3_lane_u8(outptr0 + 11 * RGB_PIXELSIZE, rgb0_h, 3);
vst3_lane_u8(outptr1 + 11 * RGB_PIXELSIZE, rgb1_h, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
vst3_lane_u8(outptr0 + 10 * RGB_PIXELSIZE, rgb0_h, 2);
vst3_lane_u8(outptr1 + 10 * RGB_PIXELSIZE, rgb1_h, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
vst3_lane_u8(outptr0 + 9 * RGB_PIXELSIZE, rgb0_h, 1);
vst3_lane_u8(outptr1 + 9 * RGB_PIXELSIZE, rgb1_h, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
vst3_lane_u8(outptr0 + 8 * RGB_PIXELSIZE, rgb0_h, 0);
vst3_lane_u8(outptr1 + 8 * RGB_PIXELSIZE, rgb1_h, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
case 8:
vst3_u8(outptr0, rgb0_l);
vst3_u8(outptr1, rgb1_l);
@@ -641,24 +690,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width,
case 7:
vst3_lane_u8(outptr0 + 6 * RGB_PIXELSIZE, rgb0_l, 6);
vst3_lane_u8(outptr1 + 6 * RGB_PIXELSIZE, rgb1_l, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst3_lane_u8(outptr0 + 5 * RGB_PIXELSIZE, rgb0_l, 5);
vst3_lane_u8(outptr1 + 5 * RGB_PIXELSIZE, rgb1_l, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst3_lane_u8(outptr0 + 4 * RGB_PIXELSIZE, rgb0_l, 4);
vst3_lane_u8(outptr1 + 4 * RGB_PIXELSIZE, rgb1_l, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst3_lane_u8(outptr0 + 3 * RGB_PIXELSIZE, rgb0_l, 3);
vst3_lane_u8(outptr1 + 3 * RGB_PIXELSIZE, rgb1_l, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst3_lane_u8(outptr0 + 2 * RGB_PIXELSIZE, rgb0_l, 2);
vst3_lane_u8(outptr1 + 2 * RGB_PIXELSIZE, rgb1_l, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst3_lane_u8(outptr0 + 1 * RGB_PIXELSIZE, rgb0_l, 1);
vst3_lane_u8(outptr1 + 1 * RGB_PIXELSIZE, rgb1_l, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst3_lane_u8(outptr0, rgb0_l, 0);
vst3_lane_u8(outptr1, rgb1_l, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
diff --git a/simd/arm/neon-compat.h b/simd/arm/neon-compat.h
index 3d77527c..73c57aec 100644
--- a/simd/arm/neon-compat.h
+++ b/simd/arm/neon-compat.h
@@ -29,12 +29,10 @@
#if defined(_MSC_VER) && !defined(__clang__)
#define BUILTIN_CLZ(x) _CountLeadingZeros(x)
#define BUILTIN_CLZLL(x) _CountLeadingZeros64(x)
-#define BUILTIN_BSWAP32(x) _byteswap_ulong(x)
#define BUILTIN_BSWAP64(x) _byteswap_uint64(x)
#elif defined(__clang__) || defined(__GNUC__)
#define BUILTIN_CLZ(x) __builtin_clz(x)
#define BUILTIN_CLZLL(x) __builtin_clzll(x)
-#define BUILTIN_BSWAP32(x) __builtin_bswap32(x)
#define BUILTIN_BSWAP64(x) __builtin_bswap64(x)
#else
#error "Unknown compiler"
diff --git a/simd/arm/neon-compat.h.in b/simd/arm/neon-compat.h.in
index 436c402a..d403f228 100644
--- a/simd/arm/neon-compat.h.in
+++ b/simd/arm/neon-compat.h.in
@@ -27,12 +27,10 @@
#if defined(_MSC_VER) && !defined(__clang__)
#define BUILTIN_CLZ(x) _CountLeadingZeros(x)
#define BUILTIN_CLZLL(x) _CountLeadingZeros64(x)
-#define BUILTIN_BSWAP32(x) _byteswap_ulong(x)
#define BUILTIN_BSWAP64(x) _byteswap_uint64(x)
#elif defined(__clang__) || defined(__GNUC__)
#define BUILTIN_CLZ(x) __builtin_clz(x)
#define BUILTIN_CLZLL(x) __builtin_clzll(x)
-#define BUILTIN_BSWAP32(x) __builtin_bswap32(x)
#define BUILTIN_BSWAP64(x) __builtin_bswap64(x)
#else
#error "Unknown compiler"