aboutsummaryrefslogtreecommitdiff
path: root/internal/simd_wrappers_sse.h
diff options
context:
space:
mode:
authorMiao Wang <miaowang@google.com>2018-02-23 23:31:32 +0000
committerandroid-build-merger <android-build-merger@google.com>2018-02-23 23:31:32 +0000
commit0ed4f31d5ced2432473aa7063bc1e28d990ff3f2 (patch)
treea6ece8759b2fc774b39edea08417e08fa633a73c /internal/simd_wrappers_sse.h
parent97962621d25000e4eda770f4dd399a4378fd6b8b (diff)
parent1f4ec3258fe3b77841065990a20fe2047464688b (diff)
downloadgemmlowp-0ed4f31d5ced2432473aa7063bc1e28d990ff3f2.tar.gz
Rebase gemmlowp to ecae4d1 am: 7d0d5a611e am: 9fa88931b4android-wear-8.0.0_r2android-o-mr1-iot-release-smart-display-r9android-o-mr1-iot-release-smart-display-r8android-o-mr1-iot-release-smart-display-r5android-o-mr1-iot-release-smart-display-r40.1Jandroid-o-mr1-iot-release-smart-display-r4android-o-mr1-iot-release-smart-display-r39android-o-mr1-iot-release-smart-display-r30android-o-mr1-iot-release-smart-display-r3android-o-mr1-iot-release-smart-display-r22android-o-mr1-iot-release-smart-display-r14android-o-mr1-iot-release-smart-clock-r6android-o-mr1-iot-release-smart-clock-r2android-o-mr1-iot-release-smart-clock-fsiandroid-o-mr1-iot-release-smart-clock-fcsandroid-o-mr1-iot-release-cube_r2android-o-mr1-iot-release-cube-fsiandroid-o-mr1-iot-release-cube-fcsandroid-o-mr1-iot-release-1.0.5android-o-mr1-iot-release-1.0.4android-o-mr1-iot-release-1.0.3android-n-iot-release-ihome-igv1android-9.0.0_r47android-9.0.0_r46android-9.0.0_r45android-9.0.0_r44android-9.0.0_r43android-9.0.0_r42android-9.0.0_r41android-9.0.0_r40android-9.0.0_r39android-9.0.0_r38android-9.0.0_r37android-9.0.0_r36android-9.0.0_r35android-9.0.0_r34android-9.0.0_r33android-9.0.0_r32android-9.0.0_r31android-9.0.0_r30android-9.0.0_r22android-9.0.0_r21android-9.0.0_r20android-9.0.0_r19android-9.0.0_r16android-9.0.0_r12android-9.0.0_r11pie-qpr3-s1-releasepie-qpr3-releasepie-qpr3-b-releasepie-qpr2-releasepie-qpr1-s3-releasepie-qpr1-s2-releasepie-qpr1-s1-releasepie-qpr1-releasepie-dr1-releasepie-dr1-devpie-devpie-b4s4-releasepie-b4s4-devoreo-mr1-1.2-iot-releasenougat-iot-releasemaster-cuttlefish-testing-release
am: 1f4ec3258f Change-Id: Icb9df1558e7d87c03080597ffbb5a6212817cba6
Diffstat (limited to 'internal/simd_wrappers_sse.h')
-rw-r--r--internal/simd_wrappers_sse.h26
1 files changed, 26 insertions, 0 deletions
diff --git a/internal/simd_wrappers_sse.h b/internal/simd_wrappers_sse.h
index 6480b66..3b78cb4 100644
--- a/internal/simd_wrappers_sse.h
+++ b/internal/simd_wrappers_sse.h
@@ -22,6 +22,7 @@
namespace gemmlowp {
using Int32x4 = __m128i;
+using Int16x8 = __m128i;
using Uint8x16 = __m128i;
template <int ScalarCount>
@@ -31,6 +32,12 @@ struct RegisterType<std::int32_t, ScalarCount> {
};
template <int ScalarCount>
+struct RegisterType<std::int16_t, ScalarCount> {
+ using Type =
+ typename std::conditional<ScalarCount >= 8, Int16x8, std::int16_t>::type;
+};
+
+template <int ScalarCount>
struct RegisterType<std::uint8_t, ScalarCount> {
using Type = typename std::conditional<
ScalarCount >= 16, Uint8x16,
@@ -42,10 +49,18 @@ inline Int32x4 LoadInt32x4(const std::int32_t* src) {
return _mm_loadu_si128(reinterpret_cast<const Int32x4*>(src));
}
+inline Int32x4 LoadInt16x8(const std::int16_t* src) {
+ return _mm_loadu_si128(reinterpret_cast<const Int16x8*>(src));
+}
+
inline void StoreInt32x4(std::int32_t* dst, Int32x4 value) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(dst), value);
}
+inline void StoreInt16x8(std::int16_t* dst, Int16x8 value) {
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), value);
+}
+
inline Uint8x16 LoadUint8x16(const std::uint8_t* src) {
return _mm_loadu_si128(reinterpret_cast<const Uint8x16*>(src));
}
@@ -116,6 +131,17 @@ struct LoadContiguousImpl<RegBlockInt32<8, 8>> {
}
};
+template <>
+struct LoadContiguousImpl<RegBlockInt16<8, 8>> {
+ static RegBlockInt16<8, 8> Run(const std::int16_t* src) {
+ RegBlockInt16<8, 8> result;
+ for (int i = 0; i < 8; i++) {
+ result.buf.reg[i] = LoadInt16x8(src + 8 * i);
+ }
+ return result;
+ }
+};
+
} // end namespace gemmlowp
#include "simd_wrappers_common_neon_sse.h"