diff options
Diffstat (limited to 'Eigen/src/Core/arch/GPU/TypeCasting.h')
-rw-r--r-- | Eigen/src/Core/arch/GPU/TypeCasting.h | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/GPU/TypeCasting.h b/Eigen/src/Core/arch/GPU/TypeCasting.h new file mode 100644 index 000000000..754546225 --- /dev/null +++ b/Eigen/src/Core/arch/GPU/TypeCasting.h @@ -0,0 +1,80 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TYPE_CASTING_GPU_H +#define EIGEN_TYPE_CASTING_GPU_H + +namespace Eigen { + +namespace internal { + +#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \ + (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE)) + + +template <> +struct type_casting_traits<Eigen::half, float> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 2 + }; +}; + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) { + float2 r1 = __half22float2(a); + float2 r2 = __half22float2(b); + return make_float4(r1.x, r1.y, r2.x, r2.y); +} + + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) { + Packet4h2 r; + half2* r_alias=reinterpret_cast<half2*>(&r); + r_alias[0]=__floats2half2_rn(a.x,a.y); + r_alias[1]=__floats2half2_rn(a.z,a.w); + r_alias[2]=__floats2half2_rn(b.x,b.y); + r_alias[3]=__floats2half2_rn(b.z,b.w); + return r; +} + +template <> +struct type_casting_traits<float, Eigen::half> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 2, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) { + // Simply discard the second half of the input + float4 r; + const half2* a_alias=reinterpret_cast<const half2*>(&a); + float2 r1 = __half22float2(a_alias[0]); + float2 r2 = __half22float2(a_alias[1]); + r.x=static_cast<float>(r1.x); + r.y=static_cast<float>(r1.y); + r.z=static_cast<float>(r2.x); + r.w=static_cast<float>(r2.y); + return r; +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) { + // Simply discard the second half of the input + return __floats2half2_rn(a.x, a.y); +} + +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_GPU_H |