aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@gmail.com>2017-02-22 04:23:57 -0500
committerMarat Dukhan <maratek@gmail.com>2017-02-22 04:23:57 -0500
commit457042cd2b43670899bed644da29ab0c3a6141db (patch)
tree56abb212918cfe213f6d073f442522032a67665a
parent0e58925daeaf88a305b3bb04b18e17831a5b2f78 (diff)
downloadpsimd-457042cd2b43670899bed644da29ab0c3a6141db.tar.gz
F32 reduce and all-reduce operations
-rw-r--r--include/psimd.h66
1 files changed, 66 insertions, 0 deletions
diff --git a/include/psimd.h b/include/psimd.h
index b0fb7df..e3df2d1 100644
--- a/include/psimd.h
+++ b/include/psimd.h
@@ -506,6 +506,72 @@
return __builtin_shuffle(a, b, (psimd_s32) { 2, 4+2, 3, 4+3 });
}
#endif
+
+ /* Vector reduce */
+ #if defined(__clang__)
+ PSIMD_INTRINSIC psimd_f32 psimd_allreduce_sum_f32(psimd_f32 v) {
+ const psimd_f32 temp = v + __builtin_shufflevector(v, v, 2, 3, 0, 1);
+ return temp + __builtin_shufflevector(temp, temp, 1, 0, 3, 2);
+ }
+
+ PSIMD_INTRINSIC psimd_f32 psimd_allreduce_max_f32(psimd_f32 v) {
+ const psimd_f32 temp = psimd_max_f32(v, __builtin_shufflevector(v, v, 2, 3, 0, 1));
+ return psimd_max_f32(temp, __builtin_shufflevector(temp, temp, 1, 0, 3, 2));
+ }
+
+ PSIMD_INTRINSIC psimd_f32 psimd_allreduce_min_f32(psimd_f32 v) {
+ const psimd_f32 temp = psimd_min_f32(v, __builtin_shufflevector(v, v, 2, 3, 0, 1));
+ return psimd_min_f32(temp, __builtin_shufflevector(temp, temp, 1, 0, 3, 2));
+ }
+
+ PSIMD_INTRINSIC float psimd_reduce_sum_f32(psimd_f32 v) {
+ const psimd_f32 temp = v + __builtin_shufflevector(v, v, 2, 3, -1, -1);
+ const psimd_f32 result = temp + __builtin_shufflevector(temp, temp, 1, -1, -1, -1);
+ return result[0];
+ }
+
+ PSIMD_INTRINSIC float psimd_reduce_max_f32(psimd_f32 v) {
+ const psimd_f32 temp = psimd_max_f32(v, __builtin_shufflevector(v, v, 2, 3, -1, -1));
+ const psimd_f32 result = psimd_max_f32(temp, __builtin_shufflevector(temp, temp, 1, -1, -1, -1));
+ return result[0];
+ }
+
+ PSIMD_INTRINSIC float psimd_reduce_min_f32(psimd_f32 v) {
+ const psimd_f32 temp = psimd_min_f32(v, __builtin_shufflevector(v, v, 2, 3, -1, -1));
+ const psimd_f32 result = psimd_min_f32(temp, __builtin_shufflevector(temp, temp, 1, -1, -1, -1));
+ return result[0];
+ }
+ #else
+ PSIMD_INTRINSIC psimd_f32 psimd_allreduce_sum_f32(psimd_f32 v) {
+ const psimd_f32 temp = v + __builtin_shuffle(v, (psimd_s32) { 2, 3, 0, 1 });
+ return temp + __builtin_shuffle(temp, (psimd_s32) { 1, 0, 3, 2 });
+ }
+
+ PSIMD_INTRINSIC psimd_f32 psimd_allreduce_max_f32(psimd_f32 v) {
+ const psimd_f32 temp = psimd_max_f32(v, __builtin_shuffle(v, (psimd_s32) { 2, 3, 0, 1 }));
+ return psimd_max_f32(temp, __builtin_shuffle(temp, (psimd_s32) { 1, 0, 3, 2 }));
+ }
+
+ PSIMD_INTRINSIC psimd_f32 psimd_allreduce_min_f32(psimd_f32 v) {
+ const psimd_f32 temp = psimd_min_f32(v, __builtin_shuffle(v, (psimd_s32) { 2, 3, 0, 1 }));
+ return psimd_min_f32(temp, __builtin_shuffle(temp, (psimd_s32) { 1, 0, 3, 2 }));
+ }
+
+ PSIMD_INTRINSIC float psimd_reduce_sum_f32(psimd_f32 v) {
+ const psimd_f32 result = psimd_allreduce_sum_f32(v);
+ return result[0];
+ }
+
+ PSIMD_INTRINSIC float psimd_reduce_max_f32(psimd_f32 v) {
+ const psimd_f32 result = psimd_allreduce_max_f32(v);
+ return result[0];
+ }
+
+ PSIMD_INTRINSIC float psimd_reduce_min_f32(psimd_f32 v) {
+ const psimd_f32 result = psimd_allreduce_min_f32(v);
+ return result[0];
+ }
+ #endif
#endif
#endif /* PSIMD_H */