From 363d461cc22d7ee2590470a7203594f86ac3600a Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Thu, 29 Aug 2019 22:26:05 -0700 Subject: Optimized blends and FP32 min/max for WAsm SIMD128 --- include/psimd.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/psimd.h b/include/psimd.h index cc2d391..b08a2b6 100644 --- a/include/psimd.h +++ b/include/psimd.h @@ -632,6 +632,8 @@ PSIMD_INTRINSIC psimd_s8 psimd_blend_s8(psimd_s8 mask, psimd_s8 a, psimd_s8 b) { #if defined(__ARM_NEON__) || defined(__ARM_NEON) return (psimd_s8) vbslq_s8((uint8x16_t) mask, (int8x16_t) a, (int8x16_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_s8) __builtin_wasm_bitselect(a, b, mask); #else return (mask & a) | (~mask & b); #endif @@ -640,6 +642,8 @@ PSIMD_INTRINSIC psimd_u8 psimd_blend_u8(psimd_u8 mask, psimd_u8 a, psimd_u8 b) { #if defined(__ARM_NEON__) || defined(__ARM_NEON) return (psimd_u8) vbslq_u8((uint8x16_t) mask, (uint8x16_t) a, (uint8x16_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_u8) __builtin_wasm_bitselect(a, b, mask); #else return (mask & a) | (~mask & b); #endif @@ -648,6 +652,8 @@ PSIMD_INTRINSIC psimd_s16 psimd_blend_s16(psimd_s16 mask, psimd_s16 a, psimd_s16 b) { #if defined(__ARM_NEON__) || defined(__ARM_NEON) return (psimd_s16) vbslq_s16((uint16x8_t) mask, (int16x8_t) a, (int16x8_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_s16) __builtin_wasm_bitselect(a, b, mask); #else return (mask & a) | (~mask & b); #endif @@ -656,6 +662,8 @@ PSIMD_INTRINSIC psimd_u16 psimd_blend_u16(psimd_u16 mask, psimd_u16 a, psimd_u16 b) { #if defined(__ARM_NEON__) || defined(__ARM_NEON) return (psimd_u16) vbslq_u16((uint16x8_t) mask, (uint16x8_t) a, (uint16x8_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_u16) __builtin_wasm_bitselect(a, b, mask); #else return (mask & a) | (~mask & b); #endif @@ -664,6 +672,8 @@ PSIMD_INTRINSIC psimd_s32 psimd_blend_s32(psimd_s32 mask, psimd_s32 a, psimd_s32 b) { #if defined(__ARM_NEON__) || defined(__ARM_NEON) return (psimd_s32) vbslq_s32((uint32x4_t) mask, (int32x4_t) a, (int32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_s32) __builtin_wasm_bitselect(a, b, mask); #else return (mask & a) | (~mask & b); #endif @@ -672,6 +682,8 @@ PSIMD_INTRINSIC psimd_u32 psimd_blend_u32(psimd_u32 mask, psimd_u32 a, psimd_u32 b) { #if defined(__ARM_NEON__) || defined(__ARM_NEON) return (psimd_u32) vbslq_u32((uint32x4_t) mask, (uint32x4_t) a, (uint32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_u32) __builtin_wasm_bitselect(a, b, mask); #else return (mask & a) | (~mask & b); #endif @@ -680,6 +692,8 @@ PSIMD_INTRINSIC psimd_f32 psimd_blend_f32(psimd_s32 mask, psimd_f32 a, psimd_f32 b) { #if defined(__ARM_NEON__) || defined(__ARM_NEON) return (psimd_f32) vbslq_f32((uint32x4_t) mask, (float32x4_t) a, (float32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_f32) __builtin_wasm_bitselect(a, b, mask); #else return (psimd_f32) psimd_blend_s32(mask, (psimd_s32) a, (psimd_s32) b); #endif @@ -779,6 +793,8 @@ PSIMD_INTRINSIC psimd_f32 psimd_max_f32(psimd_f32 a, psimd_f32 b) { #if defined(__ARM_NEON__) || defined(__ARM_NEON) return (psimd_f32) vmaxq_f32((float32x4_t) a, (float32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return __builtin_wasm_max_f32x4(a, b); #else return psimd_blend_f32(a > b, a, b); #endif @@ -836,6 +852,8 @@ PSIMD_INTRINSIC psimd_f32 psimd_min_f32(psimd_f32 a, psimd_f32 b) { #if defined(__ARM_NEON__) || defined(__ARM_NEON) return (psimd_f32) vminq_f32((float32x4_t) a, (float32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return __builtin_wasm_min_f32x4(a, b); #else return psimd_blend_f32(a < b, a, b); #endif -- cgit v1.2.3