aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2019-08-29 22:26:05 -0700
committerMarat Dukhan <maratek@google.com>2019-08-29 22:26:05 -0700
commit363d461cc22d7ee2590470a7203594f86ac3600a (patch)
tree9647225eda31c22225092f365d31deac940caac7
parent4822bd4a8a57612c1001833e94b7e2315840f1e1 (diff)
downloadpsimd-363d461cc22d7ee2590470a7203594f86ac3600a.tar.gz
Optimized blends and FP32 min/max for WAsm SIMD128
-rw-r--r--include/psimd.h18
1 files changed, 18 insertions, 0 deletions
diff --git a/include/psimd.h b/include/psimd.h
index cc2d391..b08a2b6 100644
--- a/include/psimd.h
+++ b/include/psimd.h
@@ -632,6 +632,8 @@
PSIMD_INTRINSIC psimd_s8 psimd_blend_s8(psimd_s8 mask, psimd_s8 a, psimd_s8 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_s8) vbslq_s8((uint8x16_t) mask, (int8x16_t) a, (int8x16_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_s8) __builtin_wasm_bitselect(a, b, mask);
#else
return (mask & a) | (~mask & b);
#endif
@@ -640,6 +642,8 @@
PSIMD_INTRINSIC psimd_u8 psimd_blend_u8(psimd_u8 mask, psimd_u8 a, psimd_u8 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_u8) vbslq_u8((uint8x16_t) mask, (uint8x16_t) a, (uint8x16_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_u8) __builtin_wasm_bitselect(a, b, mask);
#else
return (mask & a) | (~mask & b);
#endif
@@ -648,6 +652,8 @@
PSIMD_INTRINSIC psimd_s16 psimd_blend_s16(psimd_s16 mask, psimd_s16 a, psimd_s16 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_s16) vbslq_s16((uint16x8_t) mask, (int16x8_t) a, (int16x8_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_s16) __builtin_wasm_bitselect(a, b, mask);
#else
return (mask & a) | (~mask & b);
#endif
@@ -656,6 +662,8 @@
PSIMD_INTRINSIC psimd_u16 psimd_blend_u16(psimd_u16 mask, psimd_u16 a, psimd_u16 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_u16) vbslq_u16((uint16x8_t) mask, (uint16x8_t) a, (uint16x8_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_u16) __builtin_wasm_bitselect(a, b, mask);
#else
return (mask & a) | (~mask & b);
#endif
@@ -664,6 +672,8 @@
PSIMD_INTRINSIC psimd_s32 psimd_blend_s32(psimd_s32 mask, psimd_s32 a, psimd_s32 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_s32) vbslq_s32((uint32x4_t) mask, (int32x4_t) a, (int32x4_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_s32) __builtin_wasm_bitselect(a, b, mask);
#else
return (mask & a) | (~mask & b);
#endif
@@ -672,6 +682,8 @@
PSIMD_INTRINSIC psimd_u32 psimd_blend_u32(psimd_u32 mask, psimd_u32 a, psimd_u32 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_u32) vbslq_u32((uint32x4_t) mask, (uint32x4_t) a, (uint32x4_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_u32) __builtin_wasm_bitselect(a, b, mask);
#else
return (mask & a) | (~mask & b);
#endif
@@ -680,6 +692,8 @@
PSIMD_INTRINSIC psimd_f32 psimd_blend_f32(psimd_s32 mask, psimd_f32 a, psimd_f32 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_f32) vbslq_f32((uint32x4_t) mask, (float32x4_t) a, (float32x4_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_f32) __builtin_wasm_bitselect(a, b, mask);
#else
return (psimd_f32) psimd_blend_s32(mask, (psimd_s32) a, (psimd_s32) b);
#endif
@@ -779,6 +793,8 @@
PSIMD_INTRINSIC psimd_f32 psimd_max_f32(psimd_f32 a, psimd_f32 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_f32) vmaxq_f32((float32x4_t) a, (float32x4_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return __builtin_wasm_max_f32x4(a, b);
#else
return psimd_blend_f32(a > b, a, b);
#endif
@@ -836,6 +852,8 @@
PSIMD_INTRINSIC psimd_f32 psimd_min_f32(psimd_f32 a, psimd_f32 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_f32) vminq_f32((float32x4_t) a, (float32x4_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return __builtin_wasm_min_f32x4(a, b);
#else
return psimd_blend_f32(a < b, a, b);
#endif