aboutsummaryrefslogtreecommitdiff
path: root/simd/arm
diff options
context:
space:
mode:
Diffstat (limited to 'simd/arm')
-rw-r--r--simd/arm/jchuff.h20
-rw-r--r--simd/arm/jcphuff-neon.c31
-rw-r--r--simd/arm/jdcolext-neon.c21
-rw-r--r--simd/arm/jdcolor-neon.c1
-rw-r--r--simd/arm/jdmerge-neon.c1
-rw-r--r--simd/arm/jdmrgext-neon.c56
-rw-r--r--simd/arm/neon-compat.h2
-rw-r--r--simd/arm/neon-compat.h.in2
8 files changed, 127 insertions, 7 deletions
diff --git a/simd/arm/jchuff.h b/simd/arm/jchuff.h
index d4edd5eb..2fbd252b 100644
--- a/simd/arm/jchuff.h
+++ b/simd/arm/jchuff.h
@@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2018, D. R. Commander.
+ * Copyright (C) 2009, 2018, 2021, D. R. Commander.
* Copyright (C) 2018, Matthias Räncker.
* Copyright (C) 2020-2021, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
@@ -74,6 +74,21 @@ typedef struct {
#else
+#if defined(_MSC_VER) && !defined(__clang__)
+#define SPLAT() { \
+ buffer[0] = (JOCTET)(put_buffer >> 24); \
+ buffer[1] = (JOCTET)(put_buffer >> 16); \
+ buffer[2] = (JOCTET)(put_buffer >> 8); \
+ buffer[3] = (JOCTET)(put_buffer ); \
+ buffer += 4; \
+}
+#else
+#define SPLAT() { \
+ put_buffer = __builtin_bswap32(put_buffer); \
+ __asm__("str %1, [%0], #4" : "+r" (buffer) : "r" (put_buffer)); \
+}
+#endif
+
#define FLUSH() { \
if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \
EMIT_BYTE(put_buffer >> 24) \
@@ -81,8 +96,7 @@ typedef struct {
EMIT_BYTE(put_buffer >> 8) \
EMIT_BYTE(put_buffer ) \
} else { \
- *((uint32_t *)buffer) = BUILTIN_BSWAP32(put_buffer); \
- buffer += 4; \
+ SPLAT(); \
} \
}
diff --git a/simd/arm/jcphuff-neon.c b/simd/arm/jcphuff-neon.c
index 86a263fa..b91c5db4 100644
--- a/simd/arm/jcphuff-neon.c
+++ b/simd/arm/jcphuff-neon.c
@@ -21,6 +21,7 @@
*/
#define JPEG_INTERNALS
+#include "jconfigint.h"
#include "../../jinclude.h"
#include "../../jpeglib.h"
#include "../../jsimd.h"
@@ -105,18 +106,25 @@ void jsimd_encode_mcu_AC_first_prepare_neon
switch (remaining_coefs) {
case 15:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[14], coefs2, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[13], coefs2, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[12], coefs2, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[11], coefs2, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[10], coefs2, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[9], coefs2, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[8], coefs2, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -149,20 +157,28 @@ void jsimd_encode_mcu_AC_first_prepare_neon
switch (remaining_coefs) {
case 8:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[7], coefs, 7);
+ FALLTHROUGH /*FALLTHROUGH*/
case 7:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[6], coefs, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[5], coefs, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[4], coefs, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[3], coefs, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[2], coefs, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[1], coefs, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[0], coefs, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -337,18 +353,25 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
switch (remaining_coefs) {
case 15:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[14], coefs2, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[13], coefs2, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[12], coefs2, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[11], coefs2, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[10], coefs2, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[9], coefs2, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[8], coefs2, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -389,20 +412,28 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
switch (remaining_coefs) {
case 8:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[7], coefs, 7);
+ FALLTHROUGH /*FALLTHROUGH*/
case 7:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[6], coefs, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[5], coefs, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[4], coefs, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[3], coefs, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[2], coefs, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[1], coefs, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
coefs = vld1q_lane_s16(block + jpeg_natural_order_start[0], coefs, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
diff --git a/simd/arm/jdcolext-neon.c b/simd/arm/jdcolext-neon.c
index ae440f45..c3c07a19 100644
--- a/simd/arm/jdcolext-neon.c
+++ b/simd/arm/jdcolext-neon.c
@@ -283,18 +283,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf,
switch (cols_remaining) {
case 7:
vst4_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgba, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst4_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgba, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst4_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgba, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst4_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgba, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst4_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgba, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst4_lane_u8(outptr + RGB_PIXELSIZE, rgba, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst4_lane_u8(outptr, rgba, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -308,18 +315,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf,
switch (cols_remaining) {
case 7:
vst3_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgb, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst3_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgb, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst3_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgb, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst3_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgb, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst3_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgb, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst3_lane_u8(outptr + RGB_PIXELSIZE, rgb, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst3_lane_u8(outptr, rgb, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -332,18 +346,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf,
switch (cols_remaining) {
case 7:
vst1q_lane_u16((uint16_t *)(outptr + 6 * RGB_PIXELSIZE), rgb565, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst1q_lane_u16((uint16_t *)(outptr + 5 * RGB_PIXELSIZE), rgb565, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst1q_lane_u16((uint16_t *)(outptr + 4 * RGB_PIXELSIZE), rgb565, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst1q_lane_u16((uint16_t *)(outptr + 3 * RGB_PIXELSIZE), rgb565, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst1q_lane_u16((uint16_t *)(outptr + 2 * RGB_PIXELSIZE), rgb565, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst1q_lane_u16((uint16_t *)(outptr + RGB_PIXELSIZE), rgb565, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst1q_lane_u16((uint16_t *)outptr, rgb565, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
diff --git a/simd/arm/jdcolor-neon.c b/simd/arm/jdcolor-neon.c
index 28dbc572..ea4668f1 100644
--- a/simd/arm/jdcolor-neon.c
+++ b/simd/arm/jdcolor-neon.c
@@ -21,6 +21,7 @@
*/
#define JPEG_INTERNALS
+#include "jconfigint.h"
#include "../../jinclude.h"
#include "../../jpeglib.h"
#include "../../jsimd.h"
diff --git a/simd/arm/jdmerge-neon.c b/simd/arm/jdmerge-neon.c
index 18fb9d8a..e4f91fdc 100644
--- a/simd/arm/jdmerge-neon.c
+++ b/simd/arm/jdmerge-neon.c
@@ -21,6 +21,7 @@
*/
#define JPEG_INTERNALS
+#include "jconfigint.h"
#include "../../jinclude.h"
#include "../../jpeglib.h"
#include "../../jsimd.h"
diff --git a/simd/arm/jdmrgext-neon.c b/simd/arm/jdmrgext-neon.c
index fa2ec056..5b89bdb3 100644
--- a/simd/arm/jdmrgext-neon.c
+++ b/simd/arm/jdmrgext-neon.c
@@ -226,35 +226,49 @@ void jsimd_h2v1_merged_upsample_neon(JDIMENSION output_width,
switch (cols_remaining) {
case 15:
vst4_lane_u8(outptr + 14 * RGB_PIXELSIZE, rgba_h, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
vst4_lane_u8(outptr + 13 * RGB_PIXELSIZE, rgba_h, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
vst4_lane_u8(outptr + 12 * RGB_PIXELSIZE, rgba_h, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
vst4_lane_u8(outptr + 11 * RGB_PIXELSIZE, rgba_h, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
vst4_lane_u8(outptr + 10 * RGB_PIXELSIZE, rgba_h, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
vst4_lane_u8(outptr + 9 * RGB_PIXELSIZE, rgba_h, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
vst4_lane_u8(outptr + 8 * RGB_PIXELSIZE, rgba_h, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
case 8:
vst4_u8(outptr, rgba_l);
break;
case 7:
vst4_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgba_l, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst4_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgba_l, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst4_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgba_l, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst4_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgba_l, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst4_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgba_l, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst4_lane_u8(outptr + RGB_PIXELSIZE, rgba_l, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst4_lane_u8(outptr, rgba_l, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -271,35 +285,49 @@ void jsimd_h2v1_merged_upsample_neon(JDIMENSION output_width,
switch (cols_remaining) {
case 15:
vst3_lane_u8(outptr + 14 * RGB_PIXELSIZE, rgb_h, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
vst3_lane_u8(outptr + 13 * RGB_PIXELSIZE, rgb_h, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
vst3_lane_u8(outptr + 12 * RGB_PIXELSIZE, rgb_h, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
vst3_lane_u8(outptr + 11 * RGB_PIXELSIZE, rgb_h, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
vst3_lane_u8(outptr + 10 * RGB_PIXELSIZE, rgb_h, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
vst3_lane_u8(outptr + 9 * RGB_PIXELSIZE, rgb_h, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
vst3_lane_u8(outptr + 8 * RGB_PIXELSIZE, rgb_h, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
case 8:
vst3_u8(outptr, rgb_l);
break;
case 7:
vst3_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgb_l, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst3_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgb_l, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst3_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgb_l, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst3_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgb_l, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst3_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgb_l, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst3_lane_u8(outptr + RGB_PIXELSIZE, rgb_l, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst3_lane_u8(outptr, rgb_l, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -549,24 +577,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width,
case 15:
vst4_lane_u8(outptr0 + 14 * RGB_PIXELSIZE, rgba0_h, 6);
vst4_lane_u8(outptr1 + 14 * RGB_PIXELSIZE, rgba1_h, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
vst4_lane_u8(outptr0 + 13 * RGB_PIXELSIZE, rgba0_h, 5);
vst4_lane_u8(outptr1 + 13 * RGB_PIXELSIZE, rgba1_h, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
vst4_lane_u8(outptr0 + 12 * RGB_PIXELSIZE, rgba0_h, 4);
vst4_lane_u8(outptr1 + 12 * RGB_PIXELSIZE, rgba1_h, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
vst4_lane_u8(outptr0 + 11 * RGB_PIXELSIZE, rgba0_h, 3);
vst4_lane_u8(outptr1 + 11 * RGB_PIXELSIZE, rgba1_h, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
vst4_lane_u8(outptr0 + 10 * RGB_PIXELSIZE, rgba0_h, 2);
vst4_lane_u8(outptr1 + 10 * RGB_PIXELSIZE, rgba1_h, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
vst4_lane_u8(outptr0 + 9 * RGB_PIXELSIZE, rgba0_h, 1);
vst4_lane_u8(outptr1 + 9 * RGB_PIXELSIZE, rgba1_h, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
vst4_lane_u8(outptr0 + 8 * RGB_PIXELSIZE, rgba0_h, 0);
vst4_lane_u8(outptr1 + 8 * RGB_PIXELSIZE, rgba1_h, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
case 8:
vst4_u8(outptr0, rgba0_l);
vst4_u8(outptr1, rgba1_l);
@@ -574,24 +609,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width,
case 7:
vst4_lane_u8(outptr0 + 6 * RGB_PIXELSIZE, rgba0_l, 6);
vst4_lane_u8(outptr1 + 6 * RGB_PIXELSIZE, rgba1_l, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst4_lane_u8(outptr0 + 5 * RGB_PIXELSIZE, rgba0_l, 5);
vst4_lane_u8(outptr1 + 5 * RGB_PIXELSIZE, rgba1_l, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst4_lane_u8(outptr0 + 4 * RGB_PIXELSIZE, rgba0_l, 4);
vst4_lane_u8(outptr1 + 4 * RGB_PIXELSIZE, rgba1_l, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst4_lane_u8(outptr0 + 3 * RGB_PIXELSIZE, rgba0_l, 3);
vst4_lane_u8(outptr1 + 3 * RGB_PIXELSIZE, rgba1_l, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst4_lane_u8(outptr0 + 2 * RGB_PIXELSIZE, rgba0_l, 2);
vst4_lane_u8(outptr1 + 2 * RGB_PIXELSIZE, rgba1_l, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst4_lane_u8(outptr0 + 1 * RGB_PIXELSIZE, rgba0_l, 1);
vst4_lane_u8(outptr1 + 1 * RGB_PIXELSIZE, rgba1_l, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst4_lane_u8(outptr0, rgba0_l, 0);
vst4_lane_u8(outptr1, rgba1_l, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
@@ -616,24 +658,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width,
case 15:
vst3_lane_u8(outptr0 + 14 * RGB_PIXELSIZE, rgb0_h, 6);
vst3_lane_u8(outptr1 + 14 * RGB_PIXELSIZE, rgb1_h, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 14:
vst3_lane_u8(outptr0 + 13 * RGB_PIXELSIZE, rgb0_h, 5);
vst3_lane_u8(outptr1 + 13 * RGB_PIXELSIZE, rgb1_h, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 13:
vst3_lane_u8(outptr0 + 12 * RGB_PIXELSIZE, rgb0_h, 4);
vst3_lane_u8(outptr1 + 12 * RGB_PIXELSIZE, rgb1_h, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 12:
vst3_lane_u8(outptr0 + 11 * RGB_PIXELSIZE, rgb0_h, 3);
vst3_lane_u8(outptr1 + 11 * RGB_PIXELSIZE, rgb1_h, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 11:
vst3_lane_u8(outptr0 + 10 * RGB_PIXELSIZE, rgb0_h, 2);
vst3_lane_u8(outptr1 + 10 * RGB_PIXELSIZE, rgb1_h, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 10:
vst3_lane_u8(outptr0 + 9 * RGB_PIXELSIZE, rgb0_h, 1);
vst3_lane_u8(outptr1 + 9 * RGB_PIXELSIZE, rgb1_h, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 9:
vst3_lane_u8(outptr0 + 8 * RGB_PIXELSIZE, rgb0_h, 0);
vst3_lane_u8(outptr1 + 8 * RGB_PIXELSIZE, rgb1_h, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
case 8:
vst3_u8(outptr0, rgb0_l);
vst3_u8(outptr1, rgb1_l);
@@ -641,24 +690,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width,
case 7:
vst3_lane_u8(outptr0 + 6 * RGB_PIXELSIZE, rgb0_l, 6);
vst3_lane_u8(outptr1 + 6 * RGB_PIXELSIZE, rgb1_l, 6);
+ FALLTHROUGH /*FALLTHROUGH*/
case 6:
vst3_lane_u8(outptr0 + 5 * RGB_PIXELSIZE, rgb0_l, 5);
vst3_lane_u8(outptr1 + 5 * RGB_PIXELSIZE, rgb1_l, 5);
+ FALLTHROUGH /*FALLTHROUGH*/
case 5:
vst3_lane_u8(outptr0 + 4 * RGB_PIXELSIZE, rgb0_l, 4);
vst3_lane_u8(outptr1 + 4 * RGB_PIXELSIZE, rgb1_l, 4);
+ FALLTHROUGH /*FALLTHROUGH*/
case 4:
vst3_lane_u8(outptr0 + 3 * RGB_PIXELSIZE, rgb0_l, 3);
vst3_lane_u8(outptr1 + 3 * RGB_PIXELSIZE, rgb1_l, 3);
+ FALLTHROUGH /*FALLTHROUGH*/
case 3:
vst3_lane_u8(outptr0 + 2 * RGB_PIXELSIZE, rgb0_l, 2);
vst3_lane_u8(outptr1 + 2 * RGB_PIXELSIZE, rgb1_l, 2);
+ FALLTHROUGH /*FALLTHROUGH*/
case 2:
vst3_lane_u8(outptr0 + 1 * RGB_PIXELSIZE, rgb0_l, 1);
vst3_lane_u8(outptr1 + 1 * RGB_PIXELSIZE, rgb1_l, 1);
+ FALLTHROUGH /*FALLTHROUGH*/
case 1:
vst3_lane_u8(outptr0, rgb0_l, 0);
vst3_lane_u8(outptr1, rgb1_l, 0);
+ FALLTHROUGH /*FALLTHROUGH*/
default:
break;
}
diff --git a/simd/arm/neon-compat.h b/simd/arm/neon-compat.h
index 3d77527c..73c57aec 100644
--- a/simd/arm/neon-compat.h
+++ b/simd/arm/neon-compat.h
@@ -29,12 +29,10 @@
#if defined(_MSC_VER) && !defined(__clang__)
#define BUILTIN_CLZ(x) _CountLeadingZeros(x)
#define BUILTIN_CLZLL(x) _CountLeadingZeros64(x)
-#define BUILTIN_BSWAP32(x) _byteswap_ulong(x)
#define BUILTIN_BSWAP64(x) _byteswap_uint64(x)
#elif defined(__clang__) || defined(__GNUC__)
#define BUILTIN_CLZ(x) __builtin_clz(x)
#define BUILTIN_CLZLL(x) __builtin_clzll(x)
-#define BUILTIN_BSWAP32(x) __builtin_bswap32(x)
#define BUILTIN_BSWAP64(x) __builtin_bswap64(x)
#else
#error "Unknown compiler"
diff --git a/simd/arm/neon-compat.h.in b/simd/arm/neon-compat.h.in
index 436c402a..d403f228 100644
--- a/simd/arm/neon-compat.h.in
+++ b/simd/arm/neon-compat.h.in
@@ -27,12 +27,10 @@
#if defined(_MSC_VER) && !defined(__clang__)
#define BUILTIN_CLZ(x) _CountLeadingZeros(x)
#define BUILTIN_CLZLL(x) _CountLeadingZeros64(x)
-#define BUILTIN_BSWAP32(x) _byteswap_ulong(x)
#define BUILTIN_BSWAP64(x) _byteswap_uint64(x)
#elif defined(__clang__) || defined(__GNUC__)
#define BUILTIN_CLZ(x) __builtin_clz(x)
#define BUILTIN_CLZLL(x) __builtin_clzll(x)
-#define BUILTIN_BSWAP32(x) __builtin_bswap32(x)
#define BUILTIN_BSWAP64(x) __builtin_bswap64(x)
#else
#error "Unknown compiler"