diff options
author | Yi Kong <yikong@google.com> | 2022-02-25 17:02:53 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2022-02-25 17:02:53 +0000 |
commit | edb0ad5bb04b48aab7dd0978f0475edd3550de7c (patch) | |
tree | fb979fb4cf4f8052c8cc66b1ec9516d91fcd859b /Eigen/src/Core/arch/NEON/TypeCasting.h | |
parent | 8fd413e275f78a4c240f1442ce5cf77c73a20a55 (diff) | |
parent | bc0f5df265caa21a2120c22453655a7fcc941991 (diff) | |
download | eigen-edb0ad5bb04b48aab7dd0978f0475edd3550de7c.tar.gz |
Merge changes Iee153445,Iee274471 am: 79df15ea88 am: 10f298fc41 am: 7cb5001398 am: bc0f5df265aml_uwb_331910010aml_uwb_331820070aml_uwb_331613010aml_uwb_331611010aml_uwb_331410010aml_uwb_331310030aml_uwb_331115000aml_uwb_331015040aml_uwb_330810010aml_tz4_332714070aml_tz4_332714050aml_tz4_332714010aml_tz4_331910000aml_tz4_331314030aml_tz4_331314020aml_tz4_331314010aml_tz4_331012050aml_tz4_331012040aml_tz4_331012000aml_ase_331311020aml_ase_331112000aml_ase_331011020android13-mainline-uwb-releaseandroid13-mainline-tzdata4-releaseandroid13-mainline-appsearch-releaseaml_tz4_332714010
Original change: https://android-review.googlesource.com/c/platform/external/eigen/+/1999079
Change-Id: Ife39d10c8b23d3eeb174cd52f462f9d20527ad03
Diffstat (limited to 'Eigen/src/Core/arch/NEON/TypeCasting.h')
-rw-r--r-- | Eigen/src/Core/arch/NEON/TypeCasting.h | 1419 |
1 files changed, 1419 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/NEON/TypeCasting.h b/Eigen/src/Core/arch/NEON/TypeCasting.h new file mode 100644 index 000000000..54f97336e --- /dev/null +++ b/Eigen/src/Core/arch/NEON/TypeCasting.h @@ -0,0 +1,1419 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2018 Rasmus Munk Larsen <rmlarsen@google.com> +// Copyright (C) 2020 Antonio Sanchez <cantonios@google.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TYPE_CASTING_NEON_H +#define EIGEN_TYPE_CASTING_NEON_H + +namespace Eigen { + +namespace internal { + +//============================================================================== +// pcast, SrcType = float +//============================================================================== +template <> +struct type_casting_traits<float, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4f pcast<Packet4f, Packet4f>(const Packet4f& a) { + return a; +} +template <> +EIGEN_STRONG_INLINE Packet2f pcast<Packet2f, Packet2f>(const Packet2f& a) { + return a; +} + +template <> +struct type_casting_traits<float, numext::int64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +struct type_casting_traits<float, numext::uint64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +// If float64 exists, first convert to that to keep as much precision as possible. +#if EIGEN_ARCH_ARM64 +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) { + // Discard second half of input. + return vcvtq_s64_f64(vcvt_f64_f32(vget_low_f32(a))); +} +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) { + // Discard second half of input. + return vcvtq_u64_f64(vcvt_f64_f32(vget_low_f32(a))); +} +#else +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) { + // Discard second half of input. + return vmovl_s32(vget_low_s32(vcvtq_s32_f32(a))); +} +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) { + // Discard second half of input. + return vmovl_u32(vget_low_u32(vcvtq_u32_f32(a))); +} +#endif // EIGEN_ARCH_ARM64 + +template <> +struct type_casting_traits<float, numext::int32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) { + return vcvtq_s32_f32(a); +} +template <> +EIGEN_STRONG_INLINE Packet2i pcast<Packet2f, Packet2i>(const Packet2f& a) { + return vcvt_s32_f32(a); +} + +template <> +struct type_casting_traits<float, numext::uint32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4ui pcast<Packet4f, Packet4ui>(const Packet4f& a) { + return vcvtq_u32_f32(a); +} +template <> +EIGEN_STRONG_INLINE Packet2ui pcast<Packet2f, Packet2ui>(const Packet2f& a) { + return vcvt_u32_f32(a); +} + +template <> +struct type_casting_traits<float, numext::int16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8s pcast<Packet4f, Packet8s>(const Packet4f& a, const Packet4f& b) { + return vcombine_s16(vmovn_s32(vcvtq_s32_f32(a)), vmovn_s32(vcvtq_s32_f32(b))); +} +template <> +EIGEN_STRONG_INLINE Packet4s pcast<Packet2f, Packet4s>(const Packet2f& a, const Packet2f& b) { + return vmovn_s32(vcombine_s32(vcvt_s32_f32(a), vcvt_s32_f32(b))); +} + +template <> +struct type_casting_traits<float, numext::uint16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8us pcast<Packet4f, Packet8us>(const Packet4f& a, const Packet4f& b) { + return vcombine_u16(vmovn_u32(vcvtq_u32_f32(a)), vmovn_u32(vcvtq_u32_f32(b))); +} +template <> +EIGEN_STRONG_INLINE Packet4us pcast<Packet2f, Packet4us>(const Packet2f& a, const Packet2f& b) { + return vmovn_u32(vcombine_u32(vcvt_u32_f32(a), vcvt_u32_f32(b))); +} + +template <> +struct type_casting_traits<float, numext::int8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16c pcast<Packet4f, Packet16c>(const Packet4f& a, const Packet4f& b, const Packet4f& c, + const Packet4f& d) { + const int16x8_t ab_s16 = pcast<Packet4f, Packet8s>(a, b); + const int16x8_t cd_s16 = pcast<Packet4f, Packet8s>(c, d); + return vcombine_s8(vmovn_s16(ab_s16), vmovn_s16(cd_s16)); +} +template <> +EIGEN_STRONG_INLINE Packet8c pcast<Packet2f, Packet8c>(const Packet2f& a, const Packet2f& b, const Packet2f& c, + const Packet2f& d) { + const int16x4_t ab_s16 = pcast<Packet2f, Packet4s>(a, b); + const int16x4_t cd_s16 = pcast<Packet2f, Packet4s>(c, d); + return vmovn_s16(vcombine_s16(ab_s16, cd_s16)); +} + +template <> +struct type_casting_traits<float, numext::uint8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16uc pcast<Packet4f, Packet16uc>(const Packet4f& a, const Packet4f& b, const Packet4f& c, + const Packet4f& d) { + const uint16x8_t ab_u16 = pcast<Packet4f, Packet8us>(a, b); + const uint16x8_t cd_u16 = pcast<Packet4f, Packet8us>(c, d); + return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16)); +} +template <> +EIGEN_STRONG_INLINE Packet8uc pcast<Packet2f, Packet8uc>(const Packet2f& a, const Packet2f& b, const Packet2f& c, + const Packet2f& d) { + const uint16x4_t ab_u16 = pcast<Packet2f, Packet4us>(a, b); + const uint16x4_t cd_u16 = pcast<Packet2f, Packet4us>(c, d); + return vmovn_u16(vcombine_u16(ab_u16, cd_u16)); +} + +//============================================================================== +// pcast, SrcType = int8_t +//============================================================================== +template <> +struct type_casting_traits<numext::int8_t, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4f pcast<Packet16c, Packet4f>(const Packet16c& a) { + // Discard all but first 4 bytes. + return vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a))))); +} +template <> +EIGEN_STRONG_INLINE Packet2f pcast<Packet8c, Packet2f>(const Packet8c& a) { + // Discard all but first 2 bytes. + return vcvt_f32_s32(vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(a))))); +} + +template <> +struct type_casting_traits<numext::int8_t, numext::int64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet16c, Packet2l>(const Packet16c& a) { + // Discard all but first two bytes. + return vmovl_s32(vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a)))))); +} + +template <> +struct type_casting_traits<numext::int8_t, numext::uint64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet16c, Packet2ul>(const Packet16c& a) { + return vreinterpretq_u64_s64(pcast<Packet16c, Packet2l>(a)); +} + +template <> +struct type_casting_traits<numext::int8_t, numext::int32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4i pcast<Packet16c, Packet4i>(const Packet16c& a) { + // Discard all but first 4 bytes. + return vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a)))); +} +template <> +EIGEN_STRONG_INLINE Packet2i pcast<Packet8c, Packet2i>(const Packet8c& a) { + // Discard all but first 2 bytes. + return vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(a)))); +} + +template <> +struct type_casting_traits<numext::int8_t, numext::uint32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4ui pcast<Packet16c, Packet4ui>(const Packet16c& a) { + return vreinterpretq_u32_s32(pcast<Packet16c, Packet4i>(a)); +} +template <> +EIGEN_STRONG_INLINE Packet2ui pcast<Packet8c, Packet2ui>(const Packet8c& a) { + return vreinterpret_u32_s32(pcast<Packet8c, Packet2i>(a)); +} + +template <> +struct type_casting_traits<numext::int8_t, numext::int16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8s pcast<Packet16c, Packet8s>(const Packet16c& a) { + // Discard second half of input. + return vmovl_s8(vget_low_s8(a)); +} +template <> +EIGEN_STRONG_INLINE Packet4s pcast<Packet8c, Packet4s>(const Packet8c& a) { + // Discard second half of input. + return vget_low_s16(vmovl_s8(a)); +} + +template <> +struct type_casting_traits<numext::int8_t, numext::uint16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8us pcast<Packet16c, Packet8us>(const Packet16c& a) { + return vreinterpretq_u16_s16(pcast<Packet16c, Packet8s>(a)); +} +template <> +EIGEN_STRONG_INLINE Packet4us pcast<Packet8c, Packet4us>(const Packet8c& a) { + return vreinterpret_u16_s16(pcast<Packet8c, Packet4s>(a)); +} + +template <> +struct type_casting_traits<numext::int8_t, numext::int8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16c pcast<Packet16c, Packet16c>(const Packet16c& a) { + return a; +} +template <> +EIGEN_STRONG_INLINE Packet8c pcast<Packet8c, Packet8c>(const Packet8c& a) { + return a; +} +template <> +EIGEN_STRONG_INLINE Packet4c pcast<Packet4c, Packet4c>(const Packet4c& a) { + return a; +} + +template <> +struct type_casting_traits<numext::int8_t, numext::uint8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16uc pcast<Packet16c, Packet16uc>(const Packet16c& a) { + return vreinterpretq_u8_s8(a); +} +template <> +EIGEN_STRONG_INLINE Packet8uc pcast<Packet8c, Packet8uc>(const Packet8c& a) { + return vreinterpret_u8_s8(a); +} +template <> +EIGEN_STRONG_INLINE Packet4uc pcast<Packet4c, Packet4uc>(const Packet4c& a) { + return static_cast<Packet4uc>(a); +} + +//============================================================================== +// pcast, SrcType = uint8_t +//============================================================================== +template <> +struct type_casting_traits<numext::uint8_t, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4f pcast<Packet16uc, Packet4f>(const Packet16uc& a) { + // Discard all but first 4 bytes. + return vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a))))); +} +template <> +EIGEN_STRONG_INLINE Packet2f pcast<Packet8uc, Packet2f>(const Packet8uc& a) { + // Discard all but first 2 bytes. + return vcvt_f32_u32(vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(a))))); +} + +template <> +struct type_casting_traits<numext::uint8_t, numext::uint64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet16uc, Packet2ul>(const Packet16uc& a) { + // Discard all but first two bytes. + return vmovl_u32(vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a)))))); +} + +template <> +struct type_casting_traits<numext::uint8_t, numext::int64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet16uc, Packet2l>(const Packet16uc& a) { + return vreinterpretq_s64_u64(pcast<Packet16uc, Packet2ul>(a)); +} + +template <> +struct type_casting_traits<numext::uint8_t, numext::uint32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4ui pcast<Packet16uc, Packet4ui>(const Packet16uc& a) { + // Discard all but first 4 bytes. + return vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a)))); +} +template <> +EIGEN_STRONG_INLINE Packet2ui pcast<Packet8uc, Packet2ui>(const Packet8uc& a) { + // Discard all but first 2 bytes. + return vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(a)))); +} + +template <> +struct type_casting_traits<numext::uint8_t, numext::int32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4i pcast<Packet16uc, Packet4i>(const Packet16uc& a) { + return vreinterpretq_s32_u32(pcast<Packet16uc, Packet4ui>(a)); +} +template <> +EIGEN_STRONG_INLINE Packet2i pcast<Packet8uc, Packet2i>(const Packet8uc& a) { + return vreinterpret_s32_u32(pcast<Packet8uc, Packet2ui>(a)); +} + +template <> +struct type_casting_traits<numext::uint8_t, numext::uint16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8us pcast<Packet16uc, Packet8us>(const Packet16uc& a) { + // Discard second half of input. + return vmovl_u8(vget_low_u8(a)); +} +template <> +EIGEN_STRONG_INLINE Packet4us pcast<Packet8uc, Packet4us>(const Packet8uc& a) { + // Discard second half of input. + return vget_low_u16(vmovl_u8(a)); +} + +template <> +struct type_casting_traits<numext::uint8_t, numext::int16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8s pcast<Packet16uc, Packet8s>(const Packet16uc& a) { + return vreinterpretq_s16_u16(pcast<Packet16uc, Packet8us>(a)); +} +template <> +EIGEN_STRONG_INLINE Packet4s pcast<Packet8uc, Packet4s>(const Packet8uc& a) { + return vreinterpret_s16_u16(pcast<Packet8uc, Packet4us>(a)); +} + +template <> +struct type_casting_traits<numext::uint8_t, numext::uint8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16uc pcast<Packet16uc, Packet16uc>(const Packet16uc& a) { + return a; +} +template <> +EIGEN_STRONG_INLINE Packet8uc pcast<Packet8uc, Packet8uc>(const Packet8uc& a) { + return a; +} +template <> +EIGEN_STRONG_INLINE Packet4uc pcast<Packet4uc, Packet4uc>(const Packet4uc& a) { + return a; +} + +template <> +struct type_casting_traits<numext::uint8_t, numext::int8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16c pcast<Packet16uc, Packet16c>(const Packet16uc& a) { + return vreinterpretq_s8_u8(a); +} +template <> +EIGEN_STRONG_INLINE Packet8c pcast<Packet8uc, Packet8c>(const Packet8uc& a) { + return vreinterpret_s8_u8(a); +} +template <> +EIGEN_STRONG_INLINE Packet4c pcast<Packet4uc, Packet4c>(const Packet4uc& a) { + return static_cast<Packet4c>(a); +} + +//============================================================================== +// pcast, SrcType = int16_t +//============================================================================== +template <> +struct type_casting_traits<numext::int16_t, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4f pcast<Packet8s, Packet4f>(const Packet8s& a) { + // Discard second half of input. + return vcvtq_f32_s32(vmovl_s16(vget_low_s16(a))); +} +template <> +EIGEN_STRONG_INLINE Packet2f pcast<Packet4s, Packet2f>(const Packet4s& a) { + // Discard second half of input. + return vcvt_f32_s32(vget_low_s32(vmovl_s16(a))); +} + +template <> +struct type_casting_traits<numext::int16_t, numext::int64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet8s, Packet2l>(const Packet8s& a) { + // Discard all but first two values. + return vmovl_s32(vget_low_s32(vmovl_s16(vget_low_s16(a)))); +} + +template <> +struct type_casting_traits<numext::int16_t, numext::uint64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet8s, Packet2ul>(const Packet8s& a) { + return vreinterpretq_u64_s64(pcast<Packet8s, Packet2l>(a)); +} + +template <> +struct type_casting_traits<numext::int16_t, numext::int32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4i pcast<Packet8s, Packet4i>(const Packet8s& a) { + // Discard second half of input. + return vmovl_s16(vget_low_s16(a)); +} +template <> +EIGEN_STRONG_INLINE Packet2i pcast<Packet4s, Packet2i>(const Packet4s& a) { + // Discard second half of input. + return vget_low_s32(vmovl_s16(a)); +} + +template <> +struct type_casting_traits<numext::int16_t, numext::uint32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4ui pcast<Packet8s, Packet4ui>(const Packet8s& a) { + return vreinterpretq_u32_s32(pcast<Packet8s, Packet4i>(a)); +} +template <> +EIGEN_STRONG_INLINE Packet2ui pcast<Packet4s, Packet2ui>(const Packet4s& a) { + return vreinterpret_u32_s32(pcast<Packet4s, Packet2i>(a)); +} + +template <> +struct type_casting_traits<numext::int16_t, numext::int16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8s pcast<Packet8s, Packet8s>(const Packet8s& a) { + return a; +} +template <> +EIGEN_STRONG_INLINE Packet4s pcast<Packet4s, Packet4s>(const Packet4s& a) { + return a; +} + +template <> +struct type_casting_traits<numext::int16_t, numext::uint16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8us pcast<Packet8s, Packet8us>(const Packet8s& a) { + return vreinterpretq_u16_s16(a); +} +template <> +EIGEN_STRONG_INLINE Packet4us pcast<Packet4s, Packet4us>(const Packet4s& a) { + return vreinterpret_u16_s16(a); +} + +template <> +struct type_casting_traits<numext::int16_t, numext::int8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16c pcast<Packet8s, Packet16c>(const Packet8s& a, const Packet8s& b) { + return vcombine_s8(vmovn_s16(a), vmovn_s16(b)); +} +template <> +EIGEN_STRONG_INLINE Packet8c pcast<Packet4s, Packet8c>(const Packet4s& a, const Packet4s& b) { + return vmovn_s16(vcombine_s16(a, b)); +} + +template <> +struct type_casting_traits<numext::int16_t, numext::uint8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16uc pcast<Packet8s, Packet16uc>(const Packet8s& a, const Packet8s& b) { + return vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(a)), vmovn_u16(vreinterpretq_u16_s16(b))); +} +template <> +EIGEN_STRONG_INLINE Packet8uc pcast<Packet4s, Packet8uc>(const Packet4s& a, const Packet4s& b) { + return vmovn_u16(vcombine_u16(vreinterpret_u16_s16(a), vreinterpret_u16_s16(b))); +} + +//============================================================================== +// pcast, SrcType = uint16_t +//============================================================================== +template <> +struct type_casting_traits<numext::uint16_t, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4f pcast<Packet8us, Packet4f>(const Packet8us& a) { + // Discard second half of input. + return vcvtq_f32_u32(vmovl_u16(vget_low_u16(a))); +} +template <> +EIGEN_STRONG_INLINE Packet2f pcast<Packet4us, Packet2f>(const Packet4us& a) { + // Discard second half of input. + return vcvt_f32_u32(vget_low_u32(vmovl_u16(a))); +} + +template <> +struct type_casting_traits<numext::uint16_t, numext::uint64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet8us, Packet2ul>(const Packet8us& a) { + // Discard all but first two values. + return vmovl_u32(vget_low_u32(vmovl_u16(vget_low_u16(a)))); +} + +template <> +struct type_casting_traits<numext::uint16_t, numext::int64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet8us, Packet2l>(const Packet8us& a) { + return vreinterpretq_s64_u64(pcast<Packet8us, Packet2ul>(a)); +} + +template <> +struct type_casting_traits<numext::uint16_t, numext::uint32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4ui pcast<Packet8us, Packet4ui>(const Packet8us& a) { + // Discard second half of input. + return vmovl_u16(vget_low_u16(a)); +} +template <> +EIGEN_STRONG_INLINE Packet2ui pcast<Packet4us, Packet2ui>(const Packet4us& a) { + // Discard second half of input. + return vget_low_u32(vmovl_u16(a)); +} + +template <> +struct type_casting_traits<numext::uint16_t, numext::int32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4i pcast<Packet8us, Packet4i>(const Packet8us& a) { + return vreinterpretq_s32_u32(pcast<Packet8us, Packet4ui>(a)); +} +template <> +EIGEN_STRONG_INLINE Packet2i pcast<Packet4us, Packet2i>(const Packet4us& a) { + return vreinterpret_s32_u32(pcast<Packet4us, Packet2ui>(a)); +} + +template <> +struct type_casting_traits<numext::uint16_t, numext::uint16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8us pcast<Packet8us, Packet8us>(const Packet8us& a) { + return a; +} +template <> +EIGEN_STRONG_INLINE Packet4us pcast<Packet4us, Packet4us>(const Packet4us& a) { + return a; +} + +template <> +struct type_casting_traits<numext::uint16_t, numext::int16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8s pcast<Packet8us, Packet8s>(const Packet8us& a) { + return vreinterpretq_s16_u16(a); +} +template <> +EIGEN_STRONG_INLINE Packet4s pcast<Packet4us, Packet4s>(const Packet4us& a) { + return vreinterpret_s16_u16(a); +} + +template <> +struct type_casting_traits<numext::uint16_t, numext::uint8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16uc pcast<Packet8us, Packet16uc>(const Packet8us& a, const Packet8us& b) { + return vcombine_u8(vmovn_u16(a), vmovn_u16(b)); +} +template <> +EIGEN_STRONG_INLINE Packet8uc pcast<Packet4us, Packet8uc>(const Packet4us& a, const Packet4us& b) { + return vmovn_u16(vcombine_u16(a, b)); +} + +template <> +struct type_casting_traits<numext::uint16_t, numext::int8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16c pcast<Packet8us, Packet16c>(const Packet8us& a, const Packet8us& b) { + return vreinterpretq_s8_u8(pcast<Packet8us, Packet16uc>(a, b)); +} +template <> +EIGEN_STRONG_INLINE Packet8c pcast<Packet4us, Packet8c>(const Packet4us& a, const Packet4us& b) { + return vreinterpret_s8_u8(pcast<Packet4us, Packet8uc>(a, b)); +} + +//============================================================================== +// pcast, SrcType = int32_t +//============================================================================== +template <> +struct type_casting_traits<numext::int32_t, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) { + return vcvtq_f32_s32(a); +} +template <> +EIGEN_STRONG_INLINE Packet2f pcast<Packet2i, Packet2f>(const Packet2i& a) { + return vcvt_f32_s32(a); +} + +template <> +struct type_casting_traits<numext::int32_t, numext::int64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet4i, Packet2l>(const Packet4i& a) { + // Discard second half of input. + return vmovl_s32(vget_low_s32(a)); +} + +template <> +struct type_casting_traits<numext::int32_t, numext::uint64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet4i, Packet2ul>(const Packet4i& a) { + return vreinterpretq_u64_s64(pcast<Packet4i, Packet2l>(a)); +} + +template <> +struct type_casting_traits<numext::int32_t, numext::int32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4i pcast<Packet4i, Packet4i>(const Packet4i& a) { + return a; +} +template <> +EIGEN_STRONG_INLINE Packet2i pcast<Packet2i, Packet2i>(const Packet2i& a) { + return a; +} + +template <> +struct type_casting_traits<numext::int32_t, numext::uint32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4ui pcast<Packet4i, Packet4ui>(const Packet4i& a) { + return vreinterpretq_u32_s32(a); +} +template <> +EIGEN_STRONG_INLINE Packet2ui pcast<Packet2i, Packet2ui>(const Packet2i& a) { + return vreinterpret_u32_s32(a); +} + +template <> +struct type_casting_traits<numext::int32_t, numext::int16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8s pcast<Packet4i, Packet8s>(const Packet4i& a, const Packet4i& b) { + return vcombine_s16(vmovn_s32(a), vmovn_s32(b)); +} +template <> +EIGEN_STRONG_INLINE Packet4s pcast<Packet2i, Packet4s>(const Packet2i& a, const Packet2i& b) { + return vmovn_s32(vcombine_s32(a, b)); +} + +template <> +struct type_casting_traits<numext::int32_t, numext::uint16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8us pcast<Packet4i, Packet8us>(const Packet4i& a, const Packet4i& b) { + return vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(a)), vmovn_u32(vreinterpretq_u32_s32(b))); +} +template <> +EIGEN_STRONG_INLINE Packet4us pcast<Packet2i, Packet4us>(const Packet2i& a, const Packet2i& b) { + return vmovn_u32(vreinterpretq_u32_s32(vcombine_s32(a, b))); +} + +template <> +struct type_casting_traits<numext::int32_t, numext::int8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16c pcast<Packet4i, Packet16c>(const Packet4i& a, const Packet4i& b, const Packet4i& c, + const Packet4i& d) { + const int16x8_t ab_s16 = pcast<Packet4i, Packet8s>(a, b); + const int16x8_t cd_s16 = pcast<Packet4i, Packet8s>(c, d); + return vcombine_s8(vmovn_s16(ab_s16), vmovn_s16(cd_s16)); +} +template <> +EIGEN_STRONG_INLINE Packet8c pcast<Packet2i, Packet8c>(const Packet2i& a, const Packet2i& b, const Packet2i& c, + const Packet2i& d) { + const int16x4_t ab_s16 = vmovn_s32(vcombine_s32(a, b)); + const int16x4_t cd_s16 = vmovn_s32(vcombine_s32(c, d)); + return vmovn_s16(vcombine_s16(ab_s16, cd_s16)); +} + +template <> +struct type_casting_traits<numext::int32_t, numext::uint8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16uc pcast<Packet4i, Packet16uc>(const Packet4i& a, const Packet4i& b, const Packet4i& c, + const Packet4i& d) { + const uint16x8_t ab_u16 = pcast<Packet4i, Packet8us>(a, b); + const uint16x8_t cd_u16 = pcast<Packet4i, Packet8us>(c, d); + return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16)); +} +template <> +EIGEN_STRONG_INLINE Packet8uc pcast<Packet2i, Packet8uc>(const Packet2i& a, const Packet2i& b, const Packet2i& c, + const Packet2i& d) { + const uint16x4_t ab_u16 = pcast<Packet2i, Packet4us>(a, b); + const uint16x4_t cd_u16 = pcast<Packet2i, Packet4us>(c, d); + return vmovn_u16(vcombine_u16(ab_u16, cd_u16)); +} + +//============================================================================== +// pcast, SrcType = uint32_t +//============================================================================== +template <> +struct type_casting_traits<numext::uint32_t, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4f pcast<Packet4ui, Packet4f>(const Packet4ui& a) { + return vcvtq_f32_u32(a); +} +template <> +EIGEN_STRONG_INLINE Packet2f pcast<Packet2ui, Packet2f>(const Packet2ui& a) { + return vcvt_f32_u32(a); +} + +template <> +struct type_casting_traits<numext::uint32_t, numext::uint64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet4ui, Packet2ul>(const Packet4ui& a) { + // Discard second half of input. + return vmovl_u32(vget_low_u32(a)); +} + +template <> +struct type_casting_traits<numext::uint32_t, numext::int64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet4ui, Packet2l>(const Packet4ui& a) { + return vreinterpretq_s64_u64(pcast<Packet4ui, Packet2ul>(a)); +} + +template <> +struct type_casting_traits<numext::uint32_t, numext::uint32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4ui pcast<Packet4ui, Packet4ui>(const Packet4ui& a) { + return a; +} +template <> +EIGEN_STRONG_INLINE Packet2ui pcast<Packet2ui, Packet2ui>(const Packet2ui& a) { + return a; +} + +template <> +struct type_casting_traits<numext::uint32_t, numext::int32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4i pcast<Packet4ui, Packet4i>(const Packet4ui& a) { + return vreinterpretq_s32_u32(a); +} +template <> +EIGEN_STRONG_INLINE Packet2i pcast<Packet2ui, Packet2i>(const Packet2ui& a) { + return vreinterpret_s32_u32(a); +} + +template <> +struct type_casting_traits<numext::uint32_t, numext::uint16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8us pcast<Packet4ui, Packet8us>(const Packet4ui& a, const Packet4ui& b) { + return vcombine_u16(vmovn_u32(a), vmovn_u32(b)); +} +template <> +EIGEN_STRONG_INLINE Packet4us pcast<Packet2ui, Packet4us>(const Packet2ui& a, const Packet2ui& b) { + return vmovn_u32(vcombine_u32(a, b)); +} + +template <> +struct type_casting_traits<numext::uint32_t, numext::int16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8s pcast<Packet4ui, Packet8s>(const Packet4ui& a, const Packet4ui& b) { + return vreinterpretq_s16_u16(pcast<Packet4ui, Packet8us>(a, b)); +} +template <> +EIGEN_STRONG_INLINE Packet4s pcast<Packet2ui, Packet4s>(const Packet2ui& a, const Packet2ui& b) { + return vreinterpret_s16_u16(pcast<Packet2ui, Packet4us>(a, b)); +} + +template <> +struct type_casting_traits<numext::uint32_t, numext::uint8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16uc pcast<Packet4ui, Packet16uc>(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c, + const Packet4ui& d) { + const uint16x8_t ab_u16 = vcombine_u16(vmovn_u32(a), vmovn_u32(b)); + const uint16x8_t cd_u16 = vcombine_u16(vmovn_u32(c), vmovn_u32(d)); + return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16)); +} +template <> +EIGEN_STRONG_INLINE Packet8uc pcast<Packet2ui, Packet8uc>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c, + const Packet2ui& d) { + const uint16x4_t ab_u16 = vmovn_u32(vcombine_u32(a, b)); + const uint16x4_t cd_u16 = vmovn_u32(vcombine_u32(c, d)); + return vmovn_u16(vcombine_u16(ab_u16, cd_u16)); +} + +template <> +struct type_casting_traits<numext::uint32_t, numext::int8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16c pcast<Packet4ui, Packet16c>(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c, + const Packet4ui& d) { + return vreinterpretq_s8_u8(pcast<Packet4ui, Packet16uc>(a, b, c, d)); +} +template <> +EIGEN_STRONG_INLINE Packet8c pcast<Packet2ui, Packet8c>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c, + const Packet2ui& d) { + return vreinterpret_s8_u8(pcast<Packet2ui, Packet8uc>(a, b, c, d)); +} + +//============================================================================== +// pcast, SrcType = int64_t +//============================================================================== +template <> +struct type_casting_traits<numext::int64_t, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4f pcast<Packet2l, Packet4f>(const Packet2l& a, const Packet2l& b) { + return vcvtq_f32_s32(vcombine_s32(vmovn_s64(a), vmovn_s64(b))); +} + +template <> +struct type_casting_traits<numext::int64_t, numext::int64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet2l, Packet2l>(const Packet2l& a) { + return a; +} + +template <> +struct type_casting_traits<numext::int64_t, numext::uint64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet2l, Packet2ul>(const Packet2l& a) { + return vreinterpretq_u64_s64(a); +} + +template <> +struct type_casting_traits<numext::int64_t, numext::int32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4i pcast<Packet2l, Packet4i>(const Packet2l& a, const Packet2l& b) { + return vcombine_s32(vmovn_s64(a), vmovn_s64(b)); +} + +template <> +struct type_casting_traits<numext::int64_t, numext::uint32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4ui pcast<Packet2l, Packet4ui>(const Packet2l& a, const Packet2l& b) { + return vcombine_u32(vmovn_u64(vreinterpretq_u64_s64(a)), vmovn_u64(vreinterpretq_u64_s64(b))); +} + +template <> +struct type_casting_traits<numext::int64_t, numext::int16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8s pcast<Packet2l, Packet8s>(const Packet2l& a, const Packet2l& b, const Packet2l& c, + const Packet2l& d) { + const int32x4_t ab_s32 = pcast<Packet2l, Packet4i>(a, b); + const int32x4_t cd_s32 = pcast<Packet2l, Packet4i>(c, d); + return vcombine_s16(vmovn_s32(ab_s32), vmovn_s32(cd_s32)); +} + +template <> +struct type_casting_traits<numext::int64_t, numext::uint16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8us pcast<Packet2l, Packet8us>(const Packet2l& a, const Packet2l& b, const Packet2l& c, + const Packet2l& d) { + const uint32x4_t ab_u32 = pcast<Packet2l, Packet4ui>(a, b); + const uint32x4_t cd_u32 = pcast<Packet2l, Packet4ui>(c, d); + return vcombine_u16(vmovn_u32(ab_u32), vmovn_u32(cd_u32)); +} + +template <> +struct type_casting_traits<numext::int64_t, numext::int8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16c pcast<Packet2l, Packet16c>(const Packet2l& a, const Packet2l& b, const Packet2l& c, + const Packet2l& d, const Packet2l& e, const Packet2l& f, + const Packet2l& g, const Packet2l& h) { + const int16x8_t abcd_s16 = pcast<Packet2l, Packet8s>(a, b, c, d); + const int16x8_t efgh_s16 = pcast<Packet2l, Packet8s>(e, f, g, h); + return vcombine_s8(vmovn_s16(abcd_s16), vmovn_s16(efgh_s16)); +} + +template <> +struct type_casting_traits<numext::int64_t, numext::uint8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16uc pcast<Packet2l, Packet16uc>(const Packet2l& a, const Packet2l& b, const Packet2l& c, + const Packet2l& d, const Packet2l& e, const Packet2l& f, + const Packet2l& g, const Packet2l& h) { + const uint16x8_t abcd_u16 = pcast<Packet2l, Packet8us>(a, b, c, d); + const uint16x8_t efgh_u16 = pcast<Packet2l, Packet8us>(e, f, g, h); + return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16)); +} + +//============================================================================== +// pcast, SrcType = uint64_t +//============================================================================== +template <> +struct type_casting_traits<numext::uint64_t, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4f pcast<Packet2ul, Packet4f>(const Packet2ul& a, const Packet2ul& b) { + return vcvtq_f32_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b))); +} + +template <> +struct type_casting_traits<numext::uint64_t, numext::uint64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet2ul, Packet2ul>(const Packet2ul& a) { + return a; +} + +template <> +struct type_casting_traits<numext::uint64_t, numext::int64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet2ul, Packet2l>(const Packet2ul& a) { + return vreinterpretq_s64_u64(a); +} + +template <> +struct type_casting_traits<numext::uint64_t, numext::uint32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4ui pcast<Packet2ul, Packet4ui>(const Packet2ul& a, const Packet2ul& b) { + return vcombine_u32(vmovn_u64(a), vmovn_u64(b)); +} + +template <> +struct type_casting_traits<numext::uint64_t, numext::int32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4i pcast<Packet2ul, Packet4i>(const Packet2ul& a, const Packet2ul& b) { + return vreinterpretq_s32_u32(pcast<Packet2ul, Packet4ui>(a, b)); +} + +template <> +struct type_casting_traits<numext::uint64_t, numext::uint16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8us pcast<Packet2ul, Packet8us>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c, + const Packet2ul& d) { + const uint16x4_t ab_u16 = vmovn_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b))); + const uint16x4_t cd_u16 = vmovn_u32(vcombine_u32(vmovn_u64(c), vmovn_u64(d))); + return vcombine_u16(ab_u16, cd_u16); +} + +template <> +struct type_casting_traits<numext::uint64_t, numext::int16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8s pcast<Packet2ul, Packet8s>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c, + const Packet2ul& d) { + return vreinterpretq_s16_u16(pcast<Packet2ul, Packet8us>(a, b, c, d)); +} + +template <> +struct type_casting_traits<numext::uint64_t, numext::uint8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16uc pcast<Packet2ul, Packet16uc>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c, + const Packet2ul& d, const Packet2ul& e, const Packet2ul& f, + const Packet2ul& g, const Packet2ul& h) { + const uint16x8_t abcd_u16 = pcast<Packet2ul, Packet8us>(a, b, c, d); + const uint16x8_t efgh_u16 = pcast<Packet2ul, Packet8us>(e, f, g, h); + return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16)); +} + +template <> +struct type_casting_traits<numext::uint64_t, numext::int8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16c pcast<Packet2ul, Packet16c>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c, + const Packet2ul& d, const Packet2ul& e, const Packet2ul& f, + const Packet2ul& g, const Packet2ul& h) { + return vreinterpretq_s8_u8(pcast<Packet2ul, Packet16uc>(a, b, c, d, e, f, g, h)); +} + +//============================================================================== +// preinterpret +//============================================================================== +template <> +EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2i>(const Packet2i& a) { + return vreinterpret_f32_s32(a); +} +template <> +EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2ui>(const Packet2ui& a) { + return vreinterpret_f32_u32(a); +} +template <> +EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(const Packet4i& a) { + return vreinterpretq_f32_s32(a); +} +template <> +EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4ui>(const Packet4ui& a) { + return vreinterpretq_f32_u32(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc& a) { + return static_cast<Packet4c>(a); +} +template <> +EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet8uc>(const Packet8uc& a) { + return vreinterpret_s8_u8(a); +} +template <> +EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) { + return vreinterpretq_s8_u8(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet4c>(const Packet4c& a) { + return static_cast<Packet4uc>(a); +} +template <> +EIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc, Packet8c>(const Packet8c& a) { + return vreinterpret_u8_s8(a); +} +template <> +EIGEN_STRONG_INLINE Packet16uc preinterpret<Packet16uc, Packet16c>(const Packet16c& a) { + return vreinterpretq_u8_s8(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s, Packet4us>(const Packet4us& a) { + return vreinterpret_s16_u16(a); +} +template <> +EIGEN_STRONG_INLINE Packet8s preinterpret<Packet8s, Packet8us>(const Packet8us& a) { + return vreinterpretq_s16_u16(a); +} + +template <> +EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet4s>(const Packet4s& a) { + return vreinterpret_u16_s16(a); +} +template <> +EIGEN_STRONG_INLINE Packet8us preinterpret<Packet8us, Packet8s>(const Packet8s& a) { + return vreinterpretq_u16_s16(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2f>(const Packet2f& a) { + return vreinterpret_s32_f32(a); +} +template <> +EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2ui>(const Packet2ui& a) { + return vreinterpret_s32_u32(a); +} +template <> +EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4f>(const Packet4f& a) { + return vreinterpretq_s32_f32(a); +} +template <> +EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) { + return vreinterpretq_s32_u32(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2f>(const Packet2f& a) { + return vreinterpret_u32_f32(a); +} +template <> +EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2i>(const Packet2i& a) { + return vreinterpret_u32_s32(a); +} +template <> +EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4f>(const Packet4f& a) { + return vreinterpretq_u32_f32(a); +} +template <> +EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) { + return vreinterpretq_u32_s32(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2ul>(const Packet2ul& a) { + return vreinterpretq_s64_u64(a); +} +template <> +EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2l>(const Packet2l& a) { + return vreinterpretq_u64_s64(a); +} + +#if EIGEN_ARCH_ARM64 + +//============================================================================== +// pcast/preinterpret, Double +//============================================================================== + +template <> +struct type_casting_traits<double, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2d pcast<Packet2d, Packet2d>(const Packet2d& a) { + return a; +} + +template <> +struct type_casting_traits<double, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) { + return vcombine_f32(vcvt_f32_f64(a), vcvt_f32_f64(b)); +} + +template <> +struct type_casting_traits<double, numext::int64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(const Packet2d& a) { + return vcvtq_s64_f64(a); +} + +template <> +struct type_casting_traits<double, numext::uint64_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2ul pcast<Packet2d, Packet2ul>(const Packet2d& a) { + return vcvtq_u64_f64(a); +} + +template <> +struct type_casting_traits<double, numext::int32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) { + return vcombine_s32(vmovn_s64(vcvtq_s64_f64(a)), vmovn_s64(vcvtq_s64_f64(b))); +} + +template <> +struct type_casting_traits<double, numext::uint32_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet4ui pcast<Packet2d, Packet4ui>(const Packet2d& a, const Packet2d& b) { + return vcombine_u32(vmovn_u64(vcvtq_u64_f64(a)), vmovn_u64(vcvtq_u64_f64(b))); +} + +template <> +struct type_casting_traits<double, numext::int16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8s pcast<Packet2d, Packet8s>(const Packet2d& a, const Packet2d& b, const Packet2d& c, + const Packet2d& d) { + const int32x4_t ab_s32 = pcast<Packet2d, Packet4i>(a, b); + const int32x4_t cd_s32 = pcast<Packet2d, Packet4i>(c, d); + return vcombine_s16(vmovn_s32(ab_s32), vmovn_s32(cd_s32)); +} + +template <> +struct type_casting_traits<double, numext::uint16_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet8us pcast<Packet2d, Packet8us>(const Packet2d& a, const Packet2d& b, const Packet2d& c, + const Packet2d& d) { + const uint32x4_t ab_u32 = pcast<Packet2d, Packet4ui>(a, b); + const uint32x4_t cd_u32 = pcast<Packet2d, Packet4ui>(c, d); + return vcombine_u16(vmovn_u32(ab_u32), vmovn_u32(cd_u32)); +} + +template <> +struct type_casting_traits<double, numext::int8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16c pcast<Packet2d, Packet16c>(const Packet2d& a, const Packet2d& b, const Packet2d& c, + const Packet2d& d, const Packet2d& e, const Packet2d& f, + const Packet2d& g, const Packet2d& h) { + const int16x8_t abcd_s16 = pcast<Packet2d, Packet8s>(a, b, c, d); + const int16x8_t efgh_s16 = pcast<Packet2d, Packet8s>(e, f, g, h); + return vcombine_s8(vmovn_s16(abcd_s16), vmovn_s16(efgh_s16)); +} + +template <> +struct type_casting_traits<double, numext::uint8_t> { + enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet16uc pcast<Packet2d, Packet16uc>(const Packet2d& a, const Packet2d& b, const Packet2d& c, + const Packet2d& d, const Packet2d& e, const Packet2d& f, + const Packet2d& g, const Packet2d& h) { + const uint16x8_t abcd_u16 = pcast<Packet2d, Packet8us>(a, b, c, d); + const uint16x8_t efgh_u16 = pcast<Packet2d, Packet8us>(e, f, g, h); + return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16)); +} + +template <> +struct type_casting_traits<float, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) { + // Discard second-half of input. + return vcvt_f64_f32(vget_low_f32(a)); +} + +template <> +struct type_casting_traits<numext::int8_t, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2d pcast<Packet16c, Packet2d>(const Packet16c& a) { + // Discard all but first two values. + return vcvt_f64_f32(pcast<Packet8c, Packet2f>(vget_low_s8(a))); +} + +template <> +struct type_casting_traits<numext::uint8_t, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2d pcast<Packet16uc, Packet2d>(const Packet16uc& a) { + // Discard all but first two values. + return vcvt_f64_f32(pcast<Packet8uc, Packet2f>(vget_low_u8(a))); +} + +template <> +struct type_casting_traits<numext::int16_t, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2d pcast<Packet8s, Packet2d>(const Packet8s& a) { + // Discard all but first two values. + return vcvt_f64_f32(pcast<Packet4s, Packet2f>(vget_low_s16(a))); +} + +template <> +struct type_casting_traits<numext::uint16_t, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2d pcast<Packet8us, Packet2d>(const Packet8us& a) { + // Discard all but first two values. + return vcvt_f64_f32(pcast<Packet4us, Packet2f>(vget_low_u16(a))); +} + +template <> +struct type_casting_traits<numext::int32_t, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2d pcast<Packet4i, Packet2d>(const Packet4i& a) { + // Discard second half of input. + return vcvtq_f64_s64(vmovl_s32(vget_low_s32(a))); +} + +template <> +struct type_casting_traits<numext::uint32_t, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2d pcast<Packet4ui, Packet2d>(const Packet4ui& a) { + // Discard second half of input. + return vcvtq_f64_u64(vmovl_u32(vget_low_u32(a))); +} + +template <> +struct type_casting_traits<numext::int64_t, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2d pcast<Packet2l, Packet2d>(const Packet2l& a) { + return vcvtq_f64_s64(a); +} + +template <> +struct type_casting_traits<numext::uint64_t, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; +template <> +EIGEN_STRONG_INLINE Packet2d pcast<Packet2ul, Packet2d>(const Packet2ul& a) { + return vcvtq_f64_u64(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) { + return vreinterpretq_f64_s64(a); +} +template <> +EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2ul>(const Packet2ul& a) { + return vreinterpretq_f64_u64(a); +} +template <> +EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) { + return vreinterpretq_s64_f64(a); +} +template <> +EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2d>(const Packet2d& a) { + return vreinterpretq_u64_f64(a); +} +template <> +EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(const Packet4i& a) { + return vreinterpretq_f64_s32(a); +} +template <> +EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) { + return vreinterpretq_s32_f64(a); +} + +#endif // EIGEN_ARCH_ARM64 + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_NEON_H |