summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTreehugger Robot <treehugger-gerrit@google.com>2021-01-12 15:40:07 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2021-01-12 15:40:07 +0000
commit5e61866b794fcfeeb6504e42beffc316f1792fea (patch)
treecd0ba6275c0e7f7037cfb7b8b5797dd5ca570ffc
parent0fd661add5c561173b45e5375fed0cfe46a2df38 (diff)
parentafeb3121b3d84d238473cb09da89c268a3a2eceb (diff)
downloadppv-lite86-5e61866b794fcfeeb6504e42beffc316f1792fea.tar.gz
Merge "Upgrade rust/crates/ppv-lite86 to 0.2.10"
-rw-r--r--.cargo_vcs_info.json2
-rw-r--r--Android.bp2
-rw-r--r--Cargo.toml2
-rw-r--r--Cargo.toml.orig2
-rw-r--r--METADATA8
-rw-r--r--src/generic.rs8
-rw-r--r--src/lib.rs8
-rw-r--r--src/soft.rs11
-rw-r--r--src/types.rs311
-rw-r--r--src/x86_64/mod.rs2
-rw-r--r--src/x86_64/sse2.rs103
11 files changed, 225 insertions, 234 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 0cd0fb1..adb1fc4 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
{
"git": {
- "sha1": "99df5dba5fa2471d51f58e3691621e5cd22d464d"
+ "sha1": "3012849c2d9c50228a780031e7c200b193a6b4fa"
}
}
diff --git a/Android.bp b/Android.bp
index 5a1adfe..0cbc067 100644
--- a/Android.bp
+++ b/Android.bp
@@ -2,7 +2,6 @@
rust_library {
name: "libppv_lite86",
- // has rustc warnings
host_supported: true,
crate_name: "ppv_lite86",
srcs: ["src/lib.rs"],
@@ -16,7 +15,6 @@ rust_library {
rust_defaults {
name: "ppv-lite86_defaults",
crate_name: "ppv_lite86",
- // has rustc warnings
srcs: ["src/lib.rs"],
test_suites: ["general-tests"],
auto_gen_config: true,
diff --git a/Cargo.toml b/Cargo.toml
index 6b87ae6..6ffa7ba 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@
[package]
edition = "2018"
name = "ppv-lite86"
-version = "0.2.9"
+version = "0.2.10"
authors = ["The CryptoCorrosion Contributors"]
description = "Implementation of the crypto-simd API for x86"
keywords = ["crypto", "simd", "x86"]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 84a59ad..8f3fb52 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
[package]
name = "ppv-lite86"
-version = "0.2.9"
+version = "0.2.10"
authors = ["The CryptoCorrosion Contributors"]
edition = "2018"
license = "MIT/Apache-2.0"
diff --git a/METADATA b/METADATA
index 6fc2138..1ac3aef 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@ third_party {
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/ppv-lite86/ppv-lite86-0.2.9.crate"
+ value: "https://static.crates.io/crates/ppv-lite86/ppv-lite86-0.2.10.crate"
}
- version: "0.2.9"
+ version: "0.2.10"
license_type: NOTICE
last_upgrade_date {
year: 2020
- month: 8
- day: 21
+ month: 11
+ day: 2
}
}
diff --git a/src/generic.rs b/src/generic.rs
index d26266c..f0e83d9 100644
--- a/src/generic.rs
+++ b/src/generic.rs
@@ -1,8 +1,8 @@
#![allow(non_camel_case_types)]
-use core::ops::*;
use crate::soft::{x2, x4};
use crate::types::*;
+use core::ops::*;
#[repr(C)]
#[derive(Clone, Copy)]
@@ -61,12 +61,6 @@ impl vec256_storage {
self.v128
}
}
-impl From<[u64; 4]> for vec256_storage {
- #[inline]
- fn from(q: [u64; 4]) -> Self {
- Self { v128: [[0, 1].into(), [2, 3].into()] }
- }
-}
impl From<vec256_storage> for [u64; 4] {
#[inline]
fn from(q: vec256_storage) -> Self {
diff --git a/src/lib.rs b/src/lib.rs
index 2f32286..a4fbabe 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -12,14 +12,14 @@ mod soft;
mod types;
pub use self::types::*;
-#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
+#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
pub mod x86_64;
-#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
+#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
use self::x86_64 as arch;
-#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
+#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
pub mod generic;
-#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
+#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
use self::generic as arch;
pub use self::arch::{vec128_storage, vec256_storage, vec512_storage};
diff --git a/src/soft.rs b/src/soft.rs
index d12dac5..8976c48 100644
--- a/src/soft.rs
+++ b/src/soft.rs
@@ -1,9 +1,9 @@
//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
-use core::marker::PhantomData;
-use core::ops::*;
use crate::types::*;
use crate::{vec128_storage, vec256_storage, vec512_storage};
+use core::marker::PhantomData;
+use core::ops::*;
#[derive(Copy, Clone, Default)]
#[allow(non_camel_case_types)]
@@ -238,7 +238,12 @@ macro_rules! fwd_unop_x4 {
($fn:ident) => {
#[inline(always)]
fn $fn(self) -> Self {
- x4([self.0[0].$fn(), self.0[1].$fn(), self.0[2].$fn(), self.0[3].$fn()])
+ x4([
+ self.0[0].$fn(),
+ self.0[1].$fn(),
+ self.0[2].$fn(),
+ self.0[3].$fn(),
+ ])
}
};
}
diff --git a/src/types.rs b/src/types.rs
index 119b6bb..a282670 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -1,3 +1,4 @@
+#![allow(non_camel_case_types)]
use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
pub trait AndNot {
@@ -44,182 +45,178 @@ pub trait RotateEachWord64 {
pub trait RotateEachWord128 {}
-#[allow(non_camel_case_types)]
-mod types {
- //! Vector type naming scheme:
- //! uN[xP]xL
- //! Unsigned; N-bit words * P bits per lane * L lanes
- //!
- //! A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
- //! wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
- //! slow inter-lane operations.
+// Vector type naming scheme:
+// uN[xP]xL
+// Unsigned; N-bit words * P bits per lane * L lanes
+//
+// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
+// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
+// slow inter-lane operations.
- use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
- use crate::{ArithOps, BitOps128, BitOps32, BitOps64, Machine, Store, StoreBytes};
+use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
- pub trait UnsafeFrom<T> {
- unsafe fn unsafe_from(t: T) -> Self;
- }
+#[allow(clippy::missing_safety_doc)]
+pub trait UnsafeFrom<T> {
+ unsafe fn unsafe_from(t: T) -> Self;
+}
- /// A vector composed of two elements, which may be words or themselves vectors.
- pub trait Vec2<W> {
- fn extract(self, i: u32) -> W;
- fn insert(self, w: W, i: u32) -> Self;
- }
+/// A vector composed of two elements, which may be words or themselves vectors.
+pub trait Vec2<W> {
+ fn extract(self, i: u32) -> W;
+ fn insert(self, w: W, i: u32) -> Self;
+}
- /// A vector composed of four elements, which may be words or themselves vectors.
- pub trait Vec4<W> {
- fn extract(self, i: u32) -> W;
- fn insert(self, w: W, i: u32) -> Self;
- }
+/// A vector composed of four elements, which may be words or themselves vectors.
+pub trait Vec4<W> {
+ fn extract(self, i: u32) -> W;
+ fn insert(self, w: W, i: u32) -> Self;
+}
- // TODO: multiples of 4 should inherit this
- /// A vector composed of four words; depending on their size, operations may cross lanes.
- pub trait Words4 {
- fn shuffle1230(self) -> Self;
- fn shuffle2301(self) -> Self;
- fn shuffle3012(self) -> Self;
- }
+// TODO: multiples of 4 should inherit this
+/// A vector composed of four words; depending on their size, operations may cross lanes.
+pub trait Words4 {
+ fn shuffle1230(self) -> Self;
+ fn shuffle2301(self) -> Self;
+ fn shuffle3012(self) -> Self;
+}
- /// A vector composed one or more lanes each composed of four words.
- pub trait LaneWords4 {
- fn shuffle_lane_words1230(self) -> Self;
- fn shuffle_lane_words2301(self) -> Self;
- fn shuffle_lane_words3012(self) -> Self;
- }
+/// A vector composed one or more lanes each composed of four words.
+pub trait LaneWords4 {
+ fn shuffle_lane_words1230(self) -> Self;
+ fn shuffle_lane_words2301(self) -> Self;
+ fn shuffle_lane_words3012(self) -> Self;
+}
- // TODO: make this a part of BitOps
- /// Exchange neigboring ranges of bits of the specified size
- pub trait Swap64 {
- fn swap1(self) -> Self;
- fn swap2(self) -> Self;
- fn swap4(self) -> Self;
- fn swap8(self) -> Self;
- fn swap16(self) -> Self;
- fn swap32(self) -> Self;
- fn swap64(self) -> Self;
- }
+// TODO: make this a part of BitOps
+/// Exchange neigboring ranges of bits of the specified size
+pub trait Swap64 {
+ fn swap1(self) -> Self;
+ fn swap2(self) -> Self;
+ fn swap4(self) -> Self;
+ fn swap8(self) -> Self;
+ fn swap16(self) -> Self;
+ fn swap32(self) -> Self;
+ fn swap64(self) -> Self;
+}
- pub trait u32x4<M: Machine>:
- BitOps32
- + Store<vec128_storage>
- + ArithOps
- + Vec4<u32>
- + Words4
- + LaneWords4
- + StoreBytes
- + MultiLane<[u32; 4]>
- + Into<vec128_storage>
- {
+pub trait u32x4<M: Machine>:
+ BitOps32
+ + Store<vec128_storage>
+ + ArithOps
+ + Vec4<u32>
+ + Words4
+ + LaneWords4
+ + StoreBytes
+ + MultiLane<[u32; 4]>
+ + Into<vec128_storage>
+{
}
- pub trait u64x2<M: Machine>:
- BitOps64
- + Store<vec128_storage>
- + ArithOps
- + Vec2<u64>
- + MultiLane<[u64; 2]>
- + Into<vec128_storage>
- {
+pub trait u64x2<M: Machine>:
+ BitOps64
+ + Store<vec128_storage>
+ + ArithOps
+ + Vec2<u64>
+ + MultiLane<[u64; 2]>
+ + Into<vec128_storage>
+{
}
- pub trait u128x1<M: Machine>:
- BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
- {
+pub trait u128x1<M: Machine>:
+ BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
+{
}
- pub trait u32x4x2<M: Machine>:
- BitOps32
- + Store<vec256_storage>
- + Vec2<M::u32x4>
- + MultiLane<[M::u32x4; 2]>
- + ArithOps
- + Into<vec256_storage>
- {
+pub trait u32x4x2<M: Machine>:
+ BitOps32
+ + Store<vec256_storage>
+ + Vec2<M::u32x4>
+ + MultiLane<[M::u32x4; 2]>
+ + ArithOps
+ + Into<vec256_storage>
+{
}
- pub trait u64x2x2<M: Machine>:
- BitOps64
- + Store<vec256_storage>
- + Vec2<M::u64x2>
- + MultiLane<[M::u64x2; 2]>
- + ArithOps
- + StoreBytes
- + Into<vec256_storage>
- {
+pub trait u64x2x2<M: Machine>:
+ BitOps64
+ + Store<vec256_storage>
+ + Vec2<M::u64x2>
+ + MultiLane<[M::u64x2; 2]>
+ + ArithOps
+ + StoreBytes
+ + Into<vec256_storage>
+{
}
- pub trait u64x4<M: Machine>:
- BitOps64
- + Store<vec256_storage>
- + Vec4<u64>
- + MultiLane<[u64; 4]>
- + ArithOps
- + Words4
- + StoreBytes
- + Into<vec256_storage>
- {
+pub trait u64x4<M: Machine>:
+ BitOps64
+ + Store<vec256_storage>
+ + Vec4<u64>
+ + MultiLane<[u64; 4]>
+ + ArithOps
+ + Words4
+ + StoreBytes
+ + Into<vec256_storage>
+{
}
- pub trait u128x2<M: Machine>:
- BitOps128
- + Store<vec256_storage>
- + Vec2<M::u128x1>
- + MultiLane<[M::u128x1; 2]>
- + Swap64
- + Into<vec256_storage>
- {
+pub trait u128x2<M: Machine>:
+ BitOps128
+ + Store<vec256_storage>
+ + Vec2<M::u128x1>
+ + MultiLane<[M::u128x1; 2]>
+ + Swap64
+ + Into<vec256_storage>
+{
}
- pub trait u32x4x4<M: Machine>:
- BitOps32
- + Store<vec512_storage>
- + Vec4<M::u32x4>
- + MultiLane<[M::u32x4; 4]>
- + ArithOps
- + LaneWords4
- + Into<vec512_storage>
- {
+pub trait u32x4x4<M: Machine>:
+ BitOps32
+ + Store<vec512_storage>
+ + Vec4<M::u32x4>
+ + MultiLane<[M::u32x4; 4]>
+ + ArithOps
+ + LaneWords4
+ + Into<vec512_storage>
+{
}
- pub trait u64x2x4<M: Machine>:
- BitOps64
- + Store<vec512_storage>
- + Vec4<M::u64x2>
- + MultiLane<[M::u64x2; 4]>
- + ArithOps
- + Into<vec512_storage>
- {
+pub trait u64x2x4<M: Machine>:
+ BitOps64
+ + Store<vec512_storage>
+ + Vec4<M::u64x2>
+ + MultiLane<[M::u64x2; 4]>
+ + ArithOps
+ + Into<vec512_storage>
+{
}
- // TODO: Words4
- pub trait u128x4<M: Machine>:
- BitOps128
- + Store<vec512_storage>
- + Vec4<M::u128x1>
- + MultiLane<[M::u128x1; 4]>
- + Swap64
- + Into<vec512_storage>
- {
+// TODO: Words4
+pub trait u128x4<M: Machine>:
+ BitOps128
+ + Store<vec512_storage>
+ + Vec4<M::u128x1>
+ + MultiLane<[M::u128x1; 4]>
+ + Swap64
+ + Into<vec512_storage>
+{
}
- /// A vector composed of multiple 128-bit lanes.
- pub trait MultiLane<Lanes> {
- /// Split a multi-lane vector into single-lane vectors.
- fn to_lanes(self) -> Lanes;
- /// Build a multi-lane vector from individual lanes.
- fn from_lanes(lanes: Lanes) -> Self;
- }
+/// A vector composed of multiple 128-bit lanes.
+pub trait MultiLane<Lanes> {
+ /// Split a multi-lane vector into single-lane vectors.
+ fn to_lanes(self) -> Lanes;
+ /// Build a multi-lane vector from individual lanes.
+ fn from_lanes(lanes: Lanes) -> Self;
+}
- /// Combine single vectors into a multi-lane vector.
- pub trait VZip<V> {
- fn vzip(self) -> V;
- }
+/// Combine single vectors into a multi-lane vector.
+pub trait VZip<V> {
+ fn vzip(self) -> V;
+}
- impl<V, T> VZip<V> for T
- where
- V: MultiLane<T>,
- {
- #[inline(always)]
- fn vzip(self) -> V {
- V::from_lanes(self)
- }
+impl<V, T> VZip<V> for T
+where
+ V: MultiLane<T>,
+{
+ #[inline(always)]
+ fn vzip(self) -> V {
+ V::from_lanes(self)
}
}
-pub use self::types::*;
pub trait Machine: Sized + Copy {
type u32x4: u32x4<Self>;
@@ -264,15 +261,27 @@ pub trait Machine: Sized + Copy {
unsafe { V::unsafe_read_be(input) }
}
+ /// # Safety
+ /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+ /// environment.
unsafe fn instance() -> Self;
}
pub trait Store<S> {
+ /// # Safety
+ /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+ /// environment.
unsafe fn unpack(p: S) -> Self;
}
pub trait StoreBytes {
+ /// # Safety
+ /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+ /// environment.
unsafe fn unsafe_read_le(input: &[u8]) -> Self;
+ /// # Safety
+ /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+ /// environment.
unsafe fn unsafe_read_be(input: &[u8]) -> Self;
fn write_le(self, out: &mut [u8]);
fn write_be(self, out: &mut [u8]);
diff --git a/src/x86_64/mod.rs b/src/x86_64/mod.rs
index ecf184f..d7455d0 100644
--- a/src/x86_64/mod.rs
+++ b/src/x86_64/mod.rs
@@ -1,7 +1,7 @@
// crate minimums: sse2, x86_64
-use core::arch::x86_64::{__m128i, __m256i};
use crate::types::*;
+use core::arch::x86_64::{__m128i, __m256i};
mod sse2;
diff --git a/src/x86_64/sse2.rs b/src/x86_64/sse2.rs
index 60e7681..bf0063f 100644
--- a/src/x86_64/sse2.rs
+++ b/src/x86_64/sse2.rs
@@ -166,28 +166,23 @@ macro_rules! impl_bitops128 {
macro_rules! rotr_32_s3 {
($name:ident, $k0:expr, $k1:expr) => {
- #[inline(always)]
- fn $name(self) -> Self {
- Self::new(unsafe {
- _mm_shuffle_epi8(
- self.x,
- _mm_set_epi64x($k0, $k1),
- )
- })
+ #[inline(always)]
+ fn $name(self) -> Self {
+ Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
}
};
}
macro_rules! rotr_32 {
($name:ident, $i:expr) => {
- #[inline(always)]
- fn $name(self) -> Self {
- Self::new(unsafe {
- _mm_or_si128(
- _mm_srli_epi32(self.x, $i as i32),
- _mm_slli_epi32(self.x, 32 - $i as i32),
- )
- })
- }
+ #[inline(always)]
+ fn $name(self) -> Self {
+ Self::new(unsafe {
+ _mm_or_si128(
+ _mm_srli_epi32(self.x, $i as i32),
+ _mm_slli_epi32(self.x, 32 - $i as i32),
+ )
+ })
+ }
};
}
impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<YesS3, S4, NI> {
@@ -228,28 +223,23 @@ impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<NoS3, S4, NI> {
macro_rules! rotr_64_s3 {
($name:ident, $k0:expr, $k1:expr) => {
- #[inline(always)]
- fn $name(self) -> Self {
- Self::new(unsafe {
- _mm_shuffle_epi8(
- self.x,
- _mm_set_epi64x($k0, $k1),
- )
- })
+ #[inline(always)]
+ fn $name(self) -> Self {
+ Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
}
};
}
macro_rules! rotr_64 {
($name:ident, $i:expr) => {
- #[inline(always)]
- fn $name(self) -> Self {
- Self::new(unsafe {
- _mm_or_si128(
- _mm_srli_epi64(self.x, $i as i32),
- _mm_slli_epi64(self.x, 64 - $i as i32),
- )
- })
- }
+ #[inline(always)]
+ fn $name(self) -> Self {
+ Self::new(unsafe {
+ _mm_or_si128(
+ _mm_srli_epi64(self.x, $i as i32),
+ _mm_slli_epi64(self.x, 64 - $i as i32),
+ )
+ })
+ }
};
}
impl<S4: Copy, NI: Copy> RotateEachWord32 for u64x2_sse2<YesS3, S4, NI> {
@@ -296,15 +286,15 @@ impl<S3: Copy, S4: Copy, NI: Copy> RotateEachWord64 for u64x2_sse2<S3, S4, NI> {
macro_rules! rotr_128 {
($name:ident, $i:expr) => {
- #[inline(always)]
- fn $name(self) -> Self {
- Self::new(unsafe {
- _mm_or_si128(
- _mm_srli_si128(self.x, $i as i32),
- _mm_slli_si128(self.x, 128 - $i as i32),
- )
- })
- }
+ #[inline(always)]
+ fn $name(self) -> Self {
+ Self::new(unsafe {
+ _mm_or_si128(
+ _mm_srli_si128(self.x, $i as i32),
+ _mm_slli_si128(self.x, 128 - $i as i32),
+ )
+ })
+ }
};
}
// TODO: completely unoptimized
@@ -411,7 +401,7 @@ impl<S3, S4, NI> MultiLane<[u128; 1]> for u128x1_sse2<S3, S4, NI> {
}
#[inline(always)]
fn from_lanes(xs: [u128; 1]) -> Self {
- unimplemented!()
+ unimplemented!("{:?}", xs)
}
}
@@ -780,7 +770,7 @@ impl<S4, NI> BSwap for u128x1_sse2<YesS3, S4, NI> {
impl<S4, NI> BSwap for u128x1_sse2<NoS3, S4, NI> {
#[inline(always)]
fn bswap(self) -> Self {
- Self::new(unsafe { unimplemented!() })
+ unimplemented!()
}
}
@@ -1078,6 +1068,7 @@ impl<W: PartialEq, G> PartialEq for x2<W, G> {
}
}
+#[allow(unused)]
#[inline(always)]
unsafe fn eq128_s4(x: __m128i, y: __m128i) -> bool {
let q = _mm_shuffle_epi32(_mm_cmpeq_epi64(x, y), 0b1100_0110);
@@ -1492,19 +1483,13 @@ pub mod avx2 {
impl<NI> ArithOps for u32x4x4_avx2<NI> where NI: Copy {}
macro_rules! shuf_lane_bytes {
($name:ident, $k0:expr, $k1:expr) => {
- #[inline(always)]
- fn $name(self) -> Self {
- Self::new(unsafe {
- [
- _mm256_shuffle_epi8(
- self.x[0],
- _mm256_set_epi64x($k0, $k1, $k0, $k1),
- ),
- _mm256_shuffle_epi8(
- self.x[1],
- _mm256_set_epi64x($k0, $k1, $k0, $k1),
- )
- ]
+ #[inline(always)]
+ fn $name(self) -> Self {
+ Self::new(unsafe {
+ [
+ _mm256_shuffle_epi8(self.x[0], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
+ _mm256_shuffle_epi8(self.x[1], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
+ ]
})
}
};
@@ -1522,7 +1507,7 @@ pub mod avx2 {
_mm256_or_si256(
_mm256_srli_epi32(self.x[1], $i as i32),
_mm256_slli_epi32(self.x[1], 32 - $i as i32),
- )
+ ),
]
})
}