diff options
Diffstat (limited to 'simd/arm')
-rw-r--r-- | simd/arm/jchuff.h | 20 | ||||
-rw-r--r-- | simd/arm/jcphuff-neon.c | 31 | ||||
-rw-r--r-- | simd/arm/jdcolext-neon.c | 21 | ||||
-rw-r--r-- | simd/arm/jdcolor-neon.c | 1 | ||||
-rw-r--r-- | simd/arm/jdmerge-neon.c | 1 | ||||
-rw-r--r-- | simd/arm/jdmrgext-neon.c | 56 | ||||
-rw-r--r-- | simd/arm/neon-compat.h | 2 | ||||
-rw-r--r-- | simd/arm/neon-compat.h.in | 2 |
8 files changed, 127 insertions, 7 deletions
diff --git a/simd/arm/jchuff.h b/simd/arm/jchuff.h index d4edd5eb..2fbd252b 100644 --- a/simd/arm/jchuff.h +++ b/simd/arm/jchuff.h @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2009, 2018, D. R. Commander. + * Copyright (C) 2009, 2018, 2021, D. R. Commander. * Copyright (C) 2018, Matthias Räncker. * Copyright (C) 2020-2021, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg @@ -74,6 +74,21 @@ typedef struct { #else +#if defined(_MSC_VER) && !defined(__clang__) +#define SPLAT() { \ + buffer[0] = (JOCTET)(put_buffer >> 24); \ + buffer[1] = (JOCTET)(put_buffer >> 16); \ + buffer[2] = (JOCTET)(put_buffer >> 8); \ + buffer[3] = (JOCTET)(put_buffer ); \ + buffer += 4; \ +} +#else +#define SPLAT() { \ + put_buffer = __builtin_bswap32(put_buffer); \ + __asm__("str %1, [%0], #4" : "+r" (buffer) : "r" (put_buffer)); \ +} +#endif + #define FLUSH() { \ if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \ EMIT_BYTE(put_buffer >> 24) \ @@ -81,8 +96,7 @@ typedef struct { EMIT_BYTE(put_buffer >> 8) \ EMIT_BYTE(put_buffer ) \ } else { \ - *((uint32_t *)buffer) = BUILTIN_BSWAP32(put_buffer); \ - buffer += 4; \ + SPLAT(); \ } \ } diff --git a/simd/arm/jcphuff-neon.c b/simd/arm/jcphuff-neon.c index 86a263fa..b91c5db4 100644 --- a/simd/arm/jcphuff-neon.c +++ b/simd/arm/jcphuff-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "jconfigint.h" #include "../../jinclude.h" #include "../../jpeglib.h" #include "../../jsimd.h" @@ -105,18 +106,25 @@ void jsimd_encode_mcu_AC_first_prepare_neon switch (remaining_coefs) { case 15: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[14], coefs2, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[13], coefs2, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[12], coefs2, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[11], coefs2, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[10], coefs2, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[9], coefs2, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[8], coefs2, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -149,20 +157,28 @@ void jsimd_encode_mcu_AC_first_prepare_neon switch (remaining_coefs) { case 8: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[7], coefs, 7); + FALLTHROUGH /*FALLTHROUGH*/ case 7: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[6], coefs, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[5], coefs, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[4], coefs, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[3], coefs, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[2], coefs, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[1], coefs, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[0], coefs, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -337,18 +353,25 @@ int jsimd_encode_mcu_AC_refine_prepare_neon switch (remaining_coefs) { case 15: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[14], coefs2, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[13], coefs2, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[12], coefs2, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[11], coefs2, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[10], coefs2, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[9], coefs2, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[8], coefs2, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -389,20 +412,28 @@ int jsimd_encode_mcu_AC_refine_prepare_neon switch (remaining_coefs) { case 8: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[7], coefs, 7); + FALLTHROUGH /*FALLTHROUGH*/ case 7: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[6], coefs, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[5], coefs, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[4], coefs, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[3], coefs, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[2], coefs, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[1], coefs, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: coefs = vld1q_lane_s16(block + jpeg_natural_order_start[0], coefs, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } diff --git a/simd/arm/jdcolext-neon.c b/simd/arm/jdcolext-neon.c index ae440f45..c3c07a19 100644 --- a/simd/arm/jdcolext-neon.c +++ b/simd/arm/jdcolext-neon.c @@ -283,18 +283,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf, switch (cols_remaining) { case 7: vst4_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgba, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst4_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgba, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst4_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgba, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst4_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgba, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst4_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgba, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst4_lane_u8(outptr + RGB_PIXELSIZE, rgba, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst4_lane_u8(outptr, rgba, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -308,18 +315,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf, switch (cols_remaining) { case 7: vst3_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgb, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst3_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgb, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst3_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgb, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst3_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgb, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst3_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgb, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst3_lane_u8(outptr + RGB_PIXELSIZE, rgb, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst3_lane_u8(outptr, rgb, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -332,18 +346,25 @@ void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf, switch (cols_remaining) { case 7: vst1q_lane_u16((uint16_t *)(outptr + 6 * RGB_PIXELSIZE), rgb565, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst1q_lane_u16((uint16_t *)(outptr + 5 * RGB_PIXELSIZE), rgb565, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst1q_lane_u16((uint16_t *)(outptr + 4 * RGB_PIXELSIZE), rgb565, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst1q_lane_u16((uint16_t *)(outptr + 3 * RGB_PIXELSIZE), rgb565, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst1q_lane_u16((uint16_t *)(outptr + 2 * RGB_PIXELSIZE), rgb565, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst1q_lane_u16((uint16_t *)(outptr + RGB_PIXELSIZE), rgb565, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst1q_lane_u16((uint16_t *)outptr, rgb565, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } diff --git a/simd/arm/jdcolor-neon.c b/simd/arm/jdcolor-neon.c index 28dbc572..ea4668f1 100644 --- a/simd/arm/jdcolor-neon.c +++ b/simd/arm/jdcolor-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "jconfigint.h" #include "../../jinclude.h" #include "../../jpeglib.h" #include "../../jsimd.h" diff --git a/simd/arm/jdmerge-neon.c b/simd/arm/jdmerge-neon.c index 18fb9d8a..e4f91fdc 100644 --- a/simd/arm/jdmerge-neon.c +++ b/simd/arm/jdmerge-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "jconfigint.h" #include "../../jinclude.h" #include "../../jpeglib.h" #include "../../jsimd.h" diff --git a/simd/arm/jdmrgext-neon.c b/simd/arm/jdmrgext-neon.c index fa2ec056..5b89bdb3 100644 --- a/simd/arm/jdmrgext-neon.c +++ b/simd/arm/jdmrgext-neon.c @@ -226,35 +226,49 @@ void jsimd_h2v1_merged_upsample_neon(JDIMENSION output_width, switch (cols_remaining) { case 15: vst4_lane_u8(outptr + 14 * RGB_PIXELSIZE, rgba_h, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: vst4_lane_u8(outptr + 13 * RGB_PIXELSIZE, rgba_h, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: vst4_lane_u8(outptr + 12 * RGB_PIXELSIZE, rgba_h, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: vst4_lane_u8(outptr + 11 * RGB_PIXELSIZE, rgba_h, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: vst4_lane_u8(outptr + 10 * RGB_PIXELSIZE, rgba_h, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: vst4_lane_u8(outptr + 9 * RGB_PIXELSIZE, rgba_h, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: vst4_lane_u8(outptr + 8 * RGB_PIXELSIZE, rgba_h, 0); + FALLTHROUGH /*FALLTHROUGH*/ case 8: vst4_u8(outptr, rgba_l); break; case 7: vst4_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgba_l, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst4_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgba_l, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst4_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgba_l, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst4_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgba_l, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst4_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgba_l, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst4_lane_u8(outptr + RGB_PIXELSIZE, rgba_l, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst4_lane_u8(outptr, rgba_l, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -271,35 +285,49 @@ void jsimd_h2v1_merged_upsample_neon(JDIMENSION output_width, switch (cols_remaining) { case 15: vst3_lane_u8(outptr + 14 * RGB_PIXELSIZE, rgb_h, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: vst3_lane_u8(outptr + 13 * RGB_PIXELSIZE, rgb_h, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: vst3_lane_u8(outptr + 12 * RGB_PIXELSIZE, rgb_h, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: vst3_lane_u8(outptr + 11 * RGB_PIXELSIZE, rgb_h, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: vst3_lane_u8(outptr + 10 * RGB_PIXELSIZE, rgb_h, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: vst3_lane_u8(outptr + 9 * RGB_PIXELSIZE, rgb_h, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: vst3_lane_u8(outptr + 8 * RGB_PIXELSIZE, rgb_h, 0); + FALLTHROUGH /*FALLTHROUGH*/ case 8: vst3_u8(outptr, rgb_l); break; case 7: vst3_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgb_l, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst3_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgb_l, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst3_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgb_l, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst3_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgb_l, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst3_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgb_l, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst3_lane_u8(outptr + RGB_PIXELSIZE, rgb_l, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst3_lane_u8(outptr, rgb_l, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -549,24 +577,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width, case 15: vst4_lane_u8(outptr0 + 14 * RGB_PIXELSIZE, rgba0_h, 6); vst4_lane_u8(outptr1 + 14 * RGB_PIXELSIZE, rgba1_h, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: vst4_lane_u8(outptr0 + 13 * RGB_PIXELSIZE, rgba0_h, 5); vst4_lane_u8(outptr1 + 13 * RGB_PIXELSIZE, rgba1_h, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: vst4_lane_u8(outptr0 + 12 * RGB_PIXELSIZE, rgba0_h, 4); vst4_lane_u8(outptr1 + 12 * RGB_PIXELSIZE, rgba1_h, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: vst4_lane_u8(outptr0 + 11 * RGB_PIXELSIZE, rgba0_h, 3); vst4_lane_u8(outptr1 + 11 * RGB_PIXELSIZE, rgba1_h, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: vst4_lane_u8(outptr0 + 10 * RGB_PIXELSIZE, rgba0_h, 2); vst4_lane_u8(outptr1 + 10 * RGB_PIXELSIZE, rgba1_h, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: vst4_lane_u8(outptr0 + 9 * RGB_PIXELSIZE, rgba0_h, 1); vst4_lane_u8(outptr1 + 9 * RGB_PIXELSIZE, rgba1_h, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: vst4_lane_u8(outptr0 + 8 * RGB_PIXELSIZE, rgba0_h, 0); vst4_lane_u8(outptr1 + 8 * RGB_PIXELSIZE, rgba1_h, 0); + FALLTHROUGH /*FALLTHROUGH*/ case 8: vst4_u8(outptr0, rgba0_l); vst4_u8(outptr1, rgba1_l); @@ -574,24 +609,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width, case 7: vst4_lane_u8(outptr0 + 6 * RGB_PIXELSIZE, rgba0_l, 6); vst4_lane_u8(outptr1 + 6 * RGB_PIXELSIZE, rgba1_l, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst4_lane_u8(outptr0 + 5 * RGB_PIXELSIZE, rgba0_l, 5); vst4_lane_u8(outptr1 + 5 * RGB_PIXELSIZE, rgba1_l, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst4_lane_u8(outptr0 + 4 * RGB_PIXELSIZE, rgba0_l, 4); vst4_lane_u8(outptr1 + 4 * RGB_PIXELSIZE, rgba1_l, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst4_lane_u8(outptr0 + 3 * RGB_PIXELSIZE, rgba0_l, 3); vst4_lane_u8(outptr1 + 3 * RGB_PIXELSIZE, rgba1_l, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst4_lane_u8(outptr0 + 2 * RGB_PIXELSIZE, rgba0_l, 2); vst4_lane_u8(outptr1 + 2 * RGB_PIXELSIZE, rgba1_l, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst4_lane_u8(outptr0 + 1 * RGB_PIXELSIZE, rgba0_l, 1); vst4_lane_u8(outptr1 + 1 * RGB_PIXELSIZE, rgba1_l, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst4_lane_u8(outptr0, rgba0_l, 0); vst4_lane_u8(outptr1, rgba1_l, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } @@ -616,24 +658,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width, case 15: vst3_lane_u8(outptr0 + 14 * RGB_PIXELSIZE, rgb0_h, 6); vst3_lane_u8(outptr1 + 14 * RGB_PIXELSIZE, rgb1_h, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 14: vst3_lane_u8(outptr0 + 13 * RGB_PIXELSIZE, rgb0_h, 5); vst3_lane_u8(outptr1 + 13 * RGB_PIXELSIZE, rgb1_h, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 13: vst3_lane_u8(outptr0 + 12 * RGB_PIXELSIZE, rgb0_h, 4); vst3_lane_u8(outptr1 + 12 * RGB_PIXELSIZE, rgb1_h, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 12: vst3_lane_u8(outptr0 + 11 * RGB_PIXELSIZE, rgb0_h, 3); vst3_lane_u8(outptr1 + 11 * RGB_PIXELSIZE, rgb1_h, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 11: vst3_lane_u8(outptr0 + 10 * RGB_PIXELSIZE, rgb0_h, 2); vst3_lane_u8(outptr1 + 10 * RGB_PIXELSIZE, rgb1_h, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 10: vst3_lane_u8(outptr0 + 9 * RGB_PIXELSIZE, rgb0_h, 1); vst3_lane_u8(outptr1 + 9 * RGB_PIXELSIZE, rgb1_h, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 9: vst3_lane_u8(outptr0 + 8 * RGB_PIXELSIZE, rgb0_h, 0); vst3_lane_u8(outptr1 + 8 * RGB_PIXELSIZE, rgb1_h, 0); + FALLTHROUGH /*FALLTHROUGH*/ case 8: vst3_u8(outptr0, rgb0_l); vst3_u8(outptr1, rgb1_l); @@ -641,24 +690,31 @@ void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width, case 7: vst3_lane_u8(outptr0 + 6 * RGB_PIXELSIZE, rgb0_l, 6); vst3_lane_u8(outptr1 + 6 * RGB_PIXELSIZE, rgb1_l, 6); + FALLTHROUGH /*FALLTHROUGH*/ case 6: vst3_lane_u8(outptr0 + 5 * RGB_PIXELSIZE, rgb0_l, 5); vst3_lane_u8(outptr1 + 5 * RGB_PIXELSIZE, rgb1_l, 5); + FALLTHROUGH /*FALLTHROUGH*/ case 5: vst3_lane_u8(outptr0 + 4 * RGB_PIXELSIZE, rgb0_l, 4); vst3_lane_u8(outptr1 + 4 * RGB_PIXELSIZE, rgb1_l, 4); + FALLTHROUGH /*FALLTHROUGH*/ case 4: vst3_lane_u8(outptr0 + 3 * RGB_PIXELSIZE, rgb0_l, 3); vst3_lane_u8(outptr1 + 3 * RGB_PIXELSIZE, rgb1_l, 3); + FALLTHROUGH /*FALLTHROUGH*/ case 3: vst3_lane_u8(outptr0 + 2 * RGB_PIXELSIZE, rgb0_l, 2); vst3_lane_u8(outptr1 + 2 * RGB_PIXELSIZE, rgb1_l, 2); + FALLTHROUGH /*FALLTHROUGH*/ case 2: vst3_lane_u8(outptr0 + 1 * RGB_PIXELSIZE, rgb0_l, 1); vst3_lane_u8(outptr1 + 1 * RGB_PIXELSIZE, rgb1_l, 1); + FALLTHROUGH /*FALLTHROUGH*/ case 1: vst3_lane_u8(outptr0, rgb0_l, 0); vst3_lane_u8(outptr1, rgb1_l, 0); + FALLTHROUGH /*FALLTHROUGH*/ default: break; } diff --git a/simd/arm/neon-compat.h b/simd/arm/neon-compat.h index 3d77527c..73c57aec 100644 --- a/simd/arm/neon-compat.h +++ b/simd/arm/neon-compat.h @@ -29,12 +29,10 @@ #if defined(_MSC_VER) && !defined(__clang__) #define BUILTIN_CLZ(x) _CountLeadingZeros(x) #define BUILTIN_CLZLL(x) _CountLeadingZeros64(x) -#define BUILTIN_BSWAP32(x) _byteswap_ulong(x) #define BUILTIN_BSWAP64(x) _byteswap_uint64(x) #elif defined(__clang__) || defined(__GNUC__) #define BUILTIN_CLZ(x) __builtin_clz(x) #define BUILTIN_CLZLL(x) __builtin_clzll(x) -#define BUILTIN_BSWAP32(x) __builtin_bswap32(x) #define BUILTIN_BSWAP64(x) __builtin_bswap64(x) #else #error "Unknown compiler" diff --git a/simd/arm/neon-compat.h.in b/simd/arm/neon-compat.h.in index 436c402a..d403f228 100644 --- a/simd/arm/neon-compat.h.in +++ b/simd/arm/neon-compat.h.in @@ -27,12 +27,10 @@ #if defined(_MSC_VER) && !defined(__clang__) #define BUILTIN_CLZ(x) _CountLeadingZeros(x) #define BUILTIN_CLZLL(x) _CountLeadingZeros64(x) -#define BUILTIN_BSWAP32(x) _byteswap_ulong(x) #define BUILTIN_BSWAP64(x) _byteswap_uint64(x) #elif defined(__clang__) || defined(__GNUC__) #define BUILTIN_CLZ(x) __builtin_clz(x) #define BUILTIN_CLZLL(x) __builtin_clzll(x) -#define BUILTIN_BSWAP32(x) __builtin_bswap32(x) #define BUILTIN_BSWAP64(x) __builtin_bswap64(x) #else #error "Unknown compiler" |