diff options
Diffstat (limited to 'src/f32/sse2/vec4.rs')
-rw-r--r-- | src/f32/sse2/vec4.rs | 216 |
1 files changed, 173 insertions, 43 deletions
diff --git a/src/f32/sse2/vec4.rs b/src/f32/sse2/vec4.rs index f1a1311..3792aae 100644 --- a/src/f32/sse2/vec4.rs +++ b/src/f32/sse2/vec4.rs @@ -1,6 +1,6 @@ // Generated from vec.rs.tera template. Edit the template, not the generated file. -use crate::{sse2::*, BVec4A, Vec2, Vec3, Vec3A}; +use crate::{f32::math, sse2::*, BVec4A, Vec2, Vec3, Vec3A}; #[cfg(not(target_arch = "spirv"))] use core::fmt; @@ -12,10 +12,7 @@ use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; -#[cfg(feature = "libm")] -#[allow(unused_imports)] -use num_traits::Float; - +#[repr(C)] union UnionCast { a: [f32; 4], v: Vec4, @@ -23,13 +20,16 @@ union UnionCast { /// Creates a 4-dimensional vector. #[inline(always)] +#[must_use] pub const fn vec4(x: f32, y: f32, z: f32, w: f32) -> Vec4 { Vec4::new(x, y, z, w) } -/// A 4-dimensional vector with SIMD support. +/// A 4-dimensional vector. +/// +/// SIMD vector types are used for storage on supported platforms. /// -/// This type uses 16 byte aligned SIMD vector type for storage. +/// This type is 16 byte aligned. #[derive(Clone, Copy)] #[repr(transparent)] pub struct Vec4(pub(crate) __m128); @@ -44,31 +44,43 @@ impl Vec4 { /// All negative ones. pub const NEG_ONE: Self = Self::splat(-1.0); - /// All NAN. + /// All `f32::MIN`. + pub const MIN: Self = Self::splat(f32::MIN); + + /// All `f32::MAX`. + pub const MAX: Self = Self::splat(f32::MAX); + + /// All `f32::NAN`. pub const NAN: Self = Self::splat(f32::NAN); - /// A unit-length vector pointing along the positive X axis. + /// All `f32::INFINITY`. + pub const INFINITY: Self = Self::splat(f32::INFINITY); + + /// All `f32::NEG_INFINITY`. + pub const NEG_INFINITY: Self = Self::splat(f32::NEG_INFINITY); + + /// A unit vector pointing along the positive X axis. pub const X: Self = Self::new(1.0, 0.0, 0.0, 0.0); - /// A unit-length vector pointing along the positive Y axis. + /// A unit vector pointing along the positive Y axis. pub const Y: Self = Self::new(0.0, 1.0, 0.0, 0.0); - /// A unit-length vector pointing along the positive Z axis. + /// A unit vector pointing along the positive Z axis. pub const Z: Self = Self::new(0.0, 0.0, 1.0, 0.0); - /// A unit-length vector pointing along the positive W axis. + /// A unit vector pointing along the positive W axis. pub const W: Self = Self::new(0.0, 0.0, 0.0, 1.0); - /// A unit-length vector pointing along the negative X axis. + /// A unit vector pointing along the negative X axis. pub const NEG_X: Self = Self::new(-1.0, 0.0, 0.0, 0.0); - /// A unit-length vector pointing along the negative Y axis. + /// A unit vector pointing along the negative Y axis. pub const NEG_Y: Self = Self::new(0.0, -1.0, 0.0, 0.0); - /// A unit-length vector pointing along the negative Z axis. + /// A unit vector pointing along the negative Z axis. pub const NEG_Z: Self = Self::new(0.0, 0.0, -1.0, 0.0); - /// A unit-length vector pointing along the negative W axis. + /// A unit vector pointing along the negative W axis. pub const NEG_W: Self = Self::new(0.0, 0.0, 0.0, -1.0); /// The unit axes. @@ -76,12 +88,14 @@ impl Vec4 { /// Creates a new vector. #[inline(always)] + #[must_use] pub const fn new(x: f32, y: f32, z: f32, w: f32) -> Self { unsafe { UnionCast { a: [x, y, z, w] }.v } } /// Creates a vector with all elements set to `v`. #[inline] + #[must_use] pub const fn splat(v: f32) -> Self { unsafe { UnionCast { a: [v; 4] }.v } } @@ -92,6 +106,7 @@ impl Vec4 { /// A true element in the mask uses the corresponding element from `if_true`, and false /// uses the element from `if_false`. #[inline] + #[must_use] pub fn select(mask: BVec4A, if_true: Self, if_false: Self) -> Self { Self(unsafe { _mm_or_ps( @@ -103,12 +118,14 @@ impl Vec4 { /// Creates a new vector from an array. #[inline] + #[must_use] pub const fn from_array(a: [f32; 4]) -> Self { Self::new(a[0], a[1], a[2], a[3]) } /// `[x, y, z, w]` #[inline] + #[must_use] pub const fn to_array(&self) -> [f32; 4] { unsafe { *(self as *const Vec4 as *const [f32; 4]) } } @@ -119,6 +136,7 @@ impl Vec4 { /// /// Panics if `slice` is less than 4 elements long. #[inline] + #[must_use] pub const fn from_slice(slice: &[f32]) -> Self { Self::new(slice[0], slice[1], slice[2], slice[3]) } @@ -136,12 +154,13 @@ impl Vec4 { } } - /// Creates a 2D vector from the `x`, `y` and `z` elements of `self`, discarding `w`. + /// Creates a 3D vector from the `x`, `y` and `z` elements of `self`, discarding `w`. /// - /// Truncation to `Vec3` may also be performed by using `self.xyz()` or `Vec3::from()`. + /// Truncation to [`Vec3`] may also be performed by using [`self.xyz()`][crate::swizzles::Vec4Swizzles::xyz()]. /// - /// To truncate to `Vec3A` use `Vec3A::from()`. + /// To truncate to [`Vec3A`] use [`Vec3A::from()`]. #[inline] + #[must_use] pub fn truncate(self) -> Vec3 { use crate::swizzles::Vec4Swizzles; self.xyz() @@ -149,12 +168,14 @@ impl Vec4 { /// Computes the dot product of `self` and `rhs`. #[inline] + #[must_use] pub fn dot(self, rhs: Self) -> f32 { unsafe { dot4(self.0, rhs.0) } } /// Returns a vector where every component is the dot product of `self` and `rhs`. #[inline] + #[must_use] pub fn dot_into_vec(self, rhs: Self) -> Self { Self(unsafe { dot4_into_m128(self.0, rhs.0) }) } @@ -163,6 +184,7 @@ impl Vec4 { /// /// In other words this computes `[self.x.min(rhs.x), self.y.min(rhs.y), ..]`. #[inline] + #[must_use] pub fn min(self, rhs: Self) -> Self { Self(unsafe { _mm_min_ps(self.0, rhs.0) }) } @@ -171,6 +193,7 @@ impl Vec4 { /// /// In other words this computes `[self.x.max(rhs.x), self.y.max(rhs.y), ..]`. #[inline] + #[must_use] pub fn max(self, rhs: Self) -> Self { Self(unsafe { _mm_max_ps(self.0, rhs.0) }) } @@ -183,6 +206,7 @@ impl Vec4 { /// /// Will panic if `min` is greater than `max` when `glam_assert` is enabled. #[inline] + #[must_use] pub fn clamp(self, min: Self, max: Self) -> Self { glam_assert!(min.cmple(max).all(), "clamp: expected min <= max"); self.max(min).min(max) @@ -192,6 +216,7 @@ impl Vec4 { /// /// In other words this computes `min(x, y, ..)`. #[inline] + #[must_use] pub fn min_element(self) -> f32 { unsafe { let v = self.0; @@ -205,6 +230,7 @@ impl Vec4 { /// /// In other words this computes `max(x, y, ..)`. #[inline] + #[must_use] pub fn max_element(self) -> f32 { unsafe { let v = self.0; @@ -220,6 +246,7 @@ impl Vec4 { /// In other words, this computes `[self.x == rhs.x, self.y == rhs.y, ..]` for all /// elements. #[inline] + #[must_use] pub fn cmpeq(self, rhs: Self) -> BVec4A { BVec4A(unsafe { _mm_cmpeq_ps(self.0, rhs.0) }) } @@ -230,6 +257,7 @@ impl Vec4 { /// In other words this computes `[self.x != rhs.x, self.y != rhs.y, ..]` for all /// elements. #[inline] + #[must_use] pub fn cmpne(self, rhs: Self) -> BVec4A { BVec4A(unsafe { _mm_cmpneq_ps(self.0, rhs.0) }) } @@ -240,6 +268,7 @@ impl Vec4 { /// In other words this computes `[self.x >= rhs.x, self.y >= rhs.y, ..]` for all /// elements. #[inline] + #[must_use] pub fn cmpge(self, rhs: Self) -> BVec4A { BVec4A(unsafe { _mm_cmpge_ps(self.0, rhs.0) }) } @@ -250,6 +279,7 @@ impl Vec4 { /// In other words this computes `[self.x > rhs.x, self.y > rhs.y, ..]` for all /// elements. #[inline] + #[must_use] pub fn cmpgt(self, rhs: Self) -> BVec4A { BVec4A(unsafe { _mm_cmpgt_ps(self.0, rhs.0) }) } @@ -260,6 +290,7 @@ impl Vec4 { /// In other words this computes `[self.x <= rhs.x, self.y <= rhs.y, ..]` for all /// elements. #[inline] + #[must_use] pub fn cmple(self, rhs: Self) -> BVec4A { BVec4A(unsafe { _mm_cmple_ps(self.0, rhs.0) }) } @@ -270,12 +301,14 @@ impl Vec4 { /// In other words this computes `[self.x < rhs.x, self.y < rhs.y, ..]` for all /// elements. #[inline] + #[must_use] pub fn cmplt(self, rhs: Self) -> BVec4A { BVec4A(unsafe { _mm_cmplt_ps(self.0, rhs.0) }) } /// Returns a vector containing the absolute value of each element of `self`. #[inline] + #[must_use] pub fn abs(self) -> Self { Self(unsafe { crate::sse2::m128_abs(self.0) }) } @@ -286,6 +319,7 @@ impl Vec4 { /// - `-1.0` if the number is negative, `-0.0` or `NEG_INFINITY` /// - `NAN` if the number is `NAN` #[inline] + #[must_use] pub fn signum(self) -> Self { unsafe { let result = Self(_mm_or_ps(_mm_and_ps(self.0, Self::NEG_ONE.0), Self::ONE.0)); @@ -296,6 +330,7 @@ impl Vec4 { /// Returns a vector with signs of `rhs` and the magnitudes of `self`. #[inline] + #[must_use] pub fn copysign(self, rhs: Self) -> Self { unsafe { let mask = Self::splat(-0.0); @@ -311,6 +346,7 @@ impl Vec4 { /// A negative element results in a `1` bit and a positive element in a `0` bit. Element `x` goes /// into the first lowest bit, element `y` into the second, etc. #[inline] + #[must_use] pub fn is_negative_bitmask(self) -> u32 { unsafe { _mm_movemask_ps(self.0) as u32 } } @@ -318,12 +354,14 @@ impl Vec4 { /// Returns `true` if, and only if, all elements are finite. If any element is either /// `NaN`, positive or negative infinity, this will return `false`. #[inline] + #[must_use] pub fn is_finite(self) -> bool { self.x.is_finite() && self.y.is_finite() && self.z.is_finite() && self.w.is_finite() } /// Returns `true` if any elements are `NaN`. #[inline] + #[must_use] pub fn is_nan(self) -> bool { self.is_nan_mask().any() } @@ -332,6 +370,7 @@ impl Vec4 { /// /// In other words, this computes `[x.is_nan(), y.is_nan(), z.is_nan(), w.is_nan()]`. #[inline] + #[must_use] pub fn is_nan_mask(self) -> BVec4A { BVec4A(unsafe { _mm_cmpunord_ps(self.0, self.0) }) } @@ -339,6 +378,7 @@ impl Vec4 { /// Computes the length of `self`. #[doc(alias = "magnitude")] #[inline] + #[must_use] pub fn length(self) -> f32 { unsafe { let dot = dot4_in_x(self.0, self.0); @@ -351,6 +391,7 @@ impl Vec4 { /// This is faster than `length()` as it avoids a square root operation. #[doc(alias = "magnitude2")] #[inline] + #[must_use] pub fn length_squared(self) -> f32 { self.dot(self) } @@ -359,6 +400,7 @@ impl Vec4 { /// /// For valid results, `self` must _not_ be of length zero. #[inline] + #[must_use] pub fn length_recip(self) -> f32 { unsafe { let dot = dot4_in_x(self.0, self.0); @@ -368,27 +410,55 @@ impl Vec4 { /// Computes the Euclidean distance between two points in space. #[inline] + #[must_use] pub fn distance(self, rhs: Self) -> f32 { (self - rhs).length() } /// Compute the squared euclidean distance between two points in space. #[inline] + #[must_use] pub fn distance_squared(self, rhs: Self) -> f32 { (self - rhs).length_squared() } + /// Returns the element-wise quotient of [Euclidean division] of `self` by `rhs`. + #[inline] + #[must_use] + pub fn div_euclid(self, rhs: Self) -> Self { + Self::new( + math::div_euclid(self.x, rhs.x), + math::div_euclid(self.y, rhs.y), + math::div_euclid(self.z, rhs.z), + math::div_euclid(self.w, rhs.w), + ) + } + + /// Returns the element-wise remainder of [Euclidean division] of `self` by `rhs`. + /// + /// [Euclidean division]: f32::rem_euclid + #[inline] + #[must_use] + pub fn rem_euclid(self, rhs: Self) -> Self { + Self::new( + math::rem_euclid(self.x, rhs.x), + math::rem_euclid(self.y, rhs.y), + math::rem_euclid(self.z, rhs.z), + math::rem_euclid(self.w, rhs.w), + ) + } + /// Returns `self` normalized to length 1.0. /// /// For valid results, `self` must _not_ be of length zero, nor very close to zero. /// - /// See also [`Self::try_normalize`] and [`Self::normalize_or_zero`]. + /// See also [`Self::try_normalize()`] and [`Self::normalize_or_zero()`]. /// /// Panics /// /// Will panic if `self` is zero length when `glam_assert` is enabled. - #[must_use] #[inline] + #[must_use] pub fn normalize(self) -> Self { unsafe { let length = _mm_sqrt_ps(dot4_into_m128(self.0, self.0)); @@ -404,9 +474,9 @@ impl Vec4 { /// In particular, if the input is zero (or very close to zero), or non-finite, /// the result of this operation will be `None`. /// - /// See also [`Self::normalize_or_zero`]. - #[must_use] + /// See also [`Self::normalize_or_zero()`]. #[inline] + #[must_use] pub fn try_normalize(self) -> Option<Self> { let rcp = self.length_recip(); if rcp.is_finite() && rcp > 0.0 { @@ -421,9 +491,9 @@ impl Vec4 { /// In particular, if the input is zero (or very close to zero), or non-finite, /// the result of this operation will be zero. /// - /// See also [`Self::try_normalize`]. - #[must_use] + /// See also [`Self::try_normalize()`]. #[inline] + #[must_use] pub fn normalize_or_zero(self) -> Self { let rcp = self.length_recip(); if rcp.is_finite() && rcp > 0.0 { @@ -437,9 +507,10 @@ impl Vec4 { /// /// Uses a precision threshold of `1e-6`. #[inline] + #[must_use] pub fn is_normalized(self) -> bool { // TODO: do something with epsilon - (self.length_squared() - 1.0).abs() <= 1e-4 + math::abs(self.length_squared() - 1.0) <= 1e-4 } /// Returns the vector projection of `self` onto `rhs`. @@ -449,8 +520,8 @@ impl Vec4 { /// # Panics /// /// Will panic if `rhs` is zero length when `glam_assert` is enabled. - #[must_use] #[inline] + #[must_use] pub fn project_onto(self, rhs: Self) -> Self { let other_len_sq_rcp = rhs.dot(rhs).recip(); glam_assert!(other_len_sq_rcp.is_finite()); @@ -467,8 +538,8 @@ impl Vec4 { /// # Panics /// /// Will panic if `rhs` has a length of zero when `glam_assert` is enabled. - #[must_use] #[inline] + #[must_use] pub fn reject_from(self, rhs: Self) -> Self { self - self.project_onto(rhs) } @@ -480,8 +551,8 @@ impl Vec4 { /// # Panics /// /// Will panic if `rhs` is not normalized when `glam_assert` is enabled. - #[must_use] #[inline] + #[must_use] pub fn project_onto_normalized(self, rhs: Self) -> Self { glam_assert!(rhs.is_normalized()); rhs * self.dot(rhs) @@ -497,8 +568,8 @@ impl Vec4 { /// # Panics /// /// Will panic if `rhs` is not normalized when `glam_assert` is enabled. - #[must_use] #[inline] + #[must_use] pub fn reject_from_normalized(self, rhs: Self) -> Self { self - self.project_onto_normalized(rhs) } @@ -506,6 +577,7 @@ impl Vec4 { /// Returns a vector containing the nearest integer to a number for each element of `self`. /// Round half-way cases away from 0.0. #[inline] + #[must_use] pub fn round(self) -> Self { Self(unsafe { m128_round(self.0) }) } @@ -513,6 +585,7 @@ impl Vec4 { /// Returns a vector containing the largest integer less than or equal to a number for each /// element of `self`. #[inline] + #[must_use] pub fn floor(self) -> Self { Self(unsafe { m128_floor(self.0) }) } @@ -520,15 +593,25 @@ impl Vec4 { /// Returns a vector containing the smallest integer greater than or equal to a number for /// each element of `self`. #[inline] + #[must_use] pub fn ceil(self) -> Self { Self(unsafe { m128_ceil(self.0) }) } + /// Returns a vector containing the integer part each element of `self`. This means numbers are + /// always truncated towards zero. + #[inline] + #[must_use] + pub fn trunc(self) -> Self { + Self(unsafe { m128_trunc(self.0) }) + } + /// Returns a vector containing the fractional part of the vector, e.g. `self - /// self.floor()`. /// /// Note that this is fast but not precise for large numbers. #[inline] + #[must_use] pub fn fract(self) -> Self { self - self.floor() } @@ -536,23 +619,31 @@ impl Vec4 { /// Returns a vector containing `e^self` (the exponential function) for each element of /// `self`. #[inline] + #[must_use] pub fn exp(self) -> Self { - Self::new(self.x.exp(), self.y.exp(), self.z.exp(), self.w.exp()) + Self::new( + math::exp(self.x), + math::exp(self.y), + math::exp(self.z), + math::exp(self.w), + ) } /// Returns a vector containing each element of `self` raised to the power of `n`. #[inline] + #[must_use] pub fn powf(self, n: f32) -> Self { Self::new( - self.x.powf(n), - self.y.powf(n), - self.z.powf(n), - self.w.powf(n), + math::powf(self.x, n), + math::powf(self.y, n), + math::powf(self.z, n), + math::powf(self.w, n), ) } /// Returns a vector containing the reciprocal `1.0/n` of each element of `self`. #[inline] + #[must_use] pub fn recip(self) -> Self { Self(unsafe { _mm_div_ps(Self::ONE.0, self.0) }) } @@ -564,6 +655,7 @@ impl Vec4 { /// extrapolated. #[doc(alias = "mix")] #[inline] + #[must_use] pub fn lerp(self, rhs: Self, s: f32) -> Self { self + ((rhs - self) * s) } @@ -578,6 +670,7 @@ impl Vec4 { /// For more see /// [comparing floating point numbers](https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/). #[inline] + #[must_use] pub fn abs_diff_eq(self, rhs: Self, max_abs_diff: f32) -> bool { self.sub(rhs).abs().cmple(Self::splat(max_abs_diff)).all() } @@ -588,33 +681,38 @@ impl Vec4 { /// /// Will panic if `min` is greater than `max` when `glam_assert` is enabled. #[inline] + #[must_use] pub fn clamp_length(self, min: f32, max: f32) -> Self { glam_assert!(min <= max); let length_sq = self.length_squared(); if length_sq < min * min { - self * (length_sq.sqrt().recip() * min) + min * (self / math::sqrt(length_sq)) } else if length_sq > max * max { - self * (length_sq.sqrt().recip() * max) + max * (self / math::sqrt(length_sq)) } else { self } } /// Returns a vector with a length no more than `max` + #[inline] + #[must_use] pub fn clamp_length_max(self, max: f32) -> Self { let length_sq = self.length_squared(); if length_sq > max * max { - self * (length_sq.sqrt().recip() * max) + max * (self / math::sqrt(length_sq)) } else { self } } /// Returns a vector with a length no less than `min` + #[inline] + #[must_use] pub fn clamp_length_min(self, min: f32) -> Self { let length_sq = self.length_squared(); if length_sq < min * min { - self * (length_sq.sqrt().recip() * min) + min * (self / math::sqrt(length_sq)) } else { self } @@ -628,6 +726,7 @@ impl Vec4 { /// and will be heavily dependant on designing algorithms with specific target hardware in /// mind. #[inline] + #[must_use] pub fn mul_add(self, a: Self, b: Self) -> Self { #[cfg(target_feature = "fma")] unsafe { @@ -635,30 +734,61 @@ impl Vec4 { } #[cfg(not(target_feature = "fma"))] Self::new( - self.x.mul_add(a.x, b.x), - self.y.mul_add(a.y, b.y), - self.z.mul_add(a.z, b.z), - self.w.mul_add(a.w, b.w), + math::mul_add(self.x, a.x, b.x), + math::mul_add(self.y, a.y, b.y), + math::mul_add(self.z, a.z, b.z), + math::mul_add(self.w, a.w, b.w), ) } /// Casts all elements of `self` to `f64`. #[inline] + #[must_use] pub fn as_dvec4(&self) -> crate::DVec4 { crate::DVec4::new(self.x as f64, self.y as f64, self.z as f64, self.w as f64) } + /// Casts all elements of `self` to `i16`. + #[inline] + #[must_use] + pub fn as_i16vec4(&self) -> crate::I16Vec4 { + crate::I16Vec4::new(self.x as i16, self.y as i16, self.z as i16, self.w as i16) + } + + /// Casts all elements of `self` to `u16`. + #[inline] + #[must_use] + pub fn as_u16vec4(&self) -> crate::U16Vec4 { + crate::U16Vec4::new(self.x as u16, self.y as u16, self.z as u16, self.w as u16) + } + /// Casts all elements of `self` to `i32`. #[inline] + #[must_use] pub fn as_ivec4(&self) -> crate::IVec4 { crate::IVec4::new(self.x as i32, self.y as i32, self.z as i32, self.w as i32) } /// Casts all elements of `self` to `u32`. #[inline] + #[must_use] pub fn as_uvec4(&self) -> crate::UVec4 { crate::UVec4::new(self.x as u32, self.y as u32, self.z as u32, self.w as u32) } + + /// Casts all elements of `self` to `i64`. + #[inline] + #[must_use] + pub fn as_i64vec4(&self) -> crate::I64Vec4 { + crate::I64Vec4::new(self.x as i64, self.y as i64, self.z as i64, self.w as i64) + } + + /// Casts all elements of `self` to `u64`. + #[inline] + #[must_use] + pub fn as_u64vec4(&self) -> crate::U64Vec4 { + crate::U64Vec4::new(self.x as u64, self.y as u64, self.z as u64, self.w as u64) + } } impl Default for Vec4 { |