aboutsummaryrefslogtreecommitdiff
path: root/src/coresimd.rs
blob: c6a87d5f468c2457631ef901c1c0e60ae158f214 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
use core::simd::*;

/// Calculates the vector 3 dot product and returns answer in x lane of f32x4.
#[inline(always)]
pub(crate) fn dot3_in_x(lhs: f32x4, rhs: f32x4) -> f32x4 {
    let x2_y2_z2_w2 = lhs * rhs;
    let y2_0_0_0 = simd_swizzle!(x2_y2_z2_w2, [1, 0, 0, 0]);
    let z2_0_0_0 = simd_swizzle!(x2_y2_z2_w2, [2, 0, 0, 0]);
    let x2y2_0_0_0 = x2_y2_z2_w2 + y2_0_0_0;
    x2y2_0_0_0 + z2_0_0_0
}

/// Calculates the vector 4 dot product and returns answer in x lane of f32x4.
#[inline(always)]
pub(crate) fn dot4_in_x(lhs: f32x4, rhs: f32x4) -> f32x4 {
    let x2_y2_z2_w2 = lhs * rhs;
    let z2_w2_0_0 = simd_swizzle!(x2_y2_z2_w2, [2, 3, 0, 0]);
    let x2z2_y2w2_0_0 = x2_y2_z2_w2 + z2_w2_0_0;
    let y2w2_0_0_0 = simd_swizzle!(x2z2_y2w2_0_0, [1, 0, 0, 0]);
    x2z2_y2w2_0_0 + y2w2_0_0_0
}

#[inline]
pub(crate) fn dot3(lhs: f32x4, rhs: f32x4) -> f32 {
    dot3_in_x(lhs, rhs)[0]
}

#[inline]
pub(crate) fn dot3_into_f32x4(lhs: f32x4, rhs: f32x4) -> f32x4 {
    let dot_in_x = dot3_in_x(lhs, rhs);
    simd_swizzle!(dot_in_x, [0, 0, 0, 0])
}

#[inline]
pub(crate) fn dot4(lhs: f32x4, rhs: f32x4) -> f32 {
    dot4_in_x(lhs, rhs)[0]
}

#[inline]
pub(crate) fn dot4_into_f32x4(lhs: f32x4, rhs: f32x4) -> f32x4 {
    let dot_in_x = dot4_in_x(lhs, rhs);
    simd_swizzle!(dot_in_x, [0, 0, 0, 0])
}

#[inline(always)]
pub(crate) fn f32x4_bitand(a: f32x4, b: f32x4) -> f32x4 {
    let a = a.to_bits();
    let b = b.to_bits();
    f32x4::from_bits(a & b)
}

#[inline(always)]
pub(crate) fn f32x4_bitxor(a: f32x4, b: f32x4) -> f32x4 {
    let a = a.to_bits();
    let b = b.to_bits();
    f32x4::from_bits(a ^ b)
}