diff options
Diffstat (limited to 'test/test_fixedpoint.cc')
-rw-r--r-- | test/test_fixedpoint.cc | 952 |
1 files changed, 530 insertions, 422 deletions
diff --git a/test/test_fixedpoint.cc b/test/test_fixedpoint.cc index da222f0..44e6fae 100644 --- a/test/test_fixedpoint.cc +++ b/test/test_fixedpoint.cc @@ -17,479 +17,587 @@ #define GEMMLOWP_ENABLE_FIXEDPOINT_CONSTANTS_CHECKS #include <algorithm> +#include <cinttypes> #include <cmath> +#include <cstdio> #include <random> #include <vector> -#include "test.h" #include "../fixedpoint/fixedpoint.h" +#include "test.h" namespace gemmlowp { namespace { -// Explanation of SimdVector type and associated functions -// (LoadSimdVector, StoreSimdVector): -// The fixedpoint stuff being tested here is generic in an underlying -// integer type which may be either scalar (int32_t) or SIMD (e.g. -// NEON int32x4_t). We want to write uniform tests that can test -// both the scalar and SIMD paths. We achieve this by having this -// generic SimdVector abstraction, local to this test. - +template <typename T> +T Load(const typename FixedPointRawTypeTraits<T>::ScalarRawType* src) { + return *src; +} +template <typename T> +void Store(typename FixedPointRawTypeTraits<T>::ScalarRawType* dst, T v) { + *dst = v; +} #ifdef GEMMLOWP_NEON -using SimdVector = int32x4_t; -constexpr std::size_t SimdVectorSize = 4; -SimdVector LoadSimdVector(const std::int32_t* src) { return vld1q_s32(src); } -void StoreSimdVector(std::int32_t* dst, SimdVector v) { vst1q_s32(dst, v); } -#elif defined(GEMMLOWP_SSE4) -using SimdVector = __m128i; -constexpr std::size_t SimdVectorSize = 4; -SimdVector LoadSimdVector(const std::int32_t* src) { +template <> +int32x4_t Load<int32x4_t>(const std::int32_t* src) { + return vld1q_s32(src); +} +template <> +int16x8_t Load<int16x8_t>(const std::int16_t* src) { + return vld1q_s16(src); +} +template <> +void Store<int32x4_t>(std::int32_t* dst, int32x4_t v) { + vst1q_s32(dst, v); +} +template <> +void Store<int16x8_t>(std::int16_t* dst, int16x8_t v) { + vst1q_s16(dst, v); +} +#endif +#ifdef GEMMLOWP_SSE4 +template <> +__m128i Load<__m128i>(const std::int32_t* src) { return _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)); } -void StoreSimdVector(std::int32_t* dst, SimdVector v) { +template <> +void Store<__m128i>(std::int32_t* dst, __m128i v) { _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), v); } -#else -using SimdVector = std::int32_t; -constexpr std::size_t SimdVectorSize = 1; -SimdVector LoadSimdVector(const std::int32_t* src) { return *src; } -void StoreSimdVector(std::int32_t* dst, SimdVector v) { *dst = v; } +template <> +int16x8_m128i Load<int16x8_m128i>(const std::int16_t* src) { + return to_int16x8_m128i( + _mm_loadu_si128(reinterpret_cast<const __m128i*>(src))); +} +template <> +void Store<int16x8_m128i>(std::int16_t* dst, int16x8_m128i v) { + _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), v.v); +} +#endif +#ifdef GEMMLOWP_MSA +template <> +v4i32 Load<v4i32>(const std::int32_t* src) { + return __builtin_msa_ld_w(const_cast<std::int32_t*>(src), 0); +} +template <> +v8i16 Load<v8i16>(const std::int16_t* src) { + return __builtin_msa_ld_h(const_cast<std::int16_t*>(src), 0); +} +template <> +void Store<v4i32>(std::int32_t* dst, v4i32 v) { + __builtin_msa_st_w(v, dst, 0); +} +template <> +void Store<v8i16>(std::int16_t* dst, v8i16 v) { + __builtin_msa_st_h(v, dst, 0); +} #endif -// Explanation of UnaryOpBase, its *Op subclasses below, and TestUnaryOp: -// Most (though not all) of the fixedpoint functionality being tested -// consists of functions taking one fixedpoint value and returning one -// fixedpoint value, e.g. "exp" or "tanh". We call them "unary operators". -// We factor a lot of testing boilerplate into a common TestUnaryOp function -// taking a "unary op" object that fully describes the function to be tested. -// These objects inherit UnaryOpBase mostly as a means to share some default -// values for some properties. -// -// An important design element here is that the fixed-point values are passed -// around as raw integers (e.g. int32_t or SIMD types such as int32x4_t), not -// as higher-level FixedPoint objects. The motivation for this design is 1) to -// avoid having to templatize everything in the tIntegerBits parameter of -// class FixedPoint, and 2) to allow directly testing low-level functions -// operating on raw types (e.g. RoundingDivideByPOT) without needlessly -// requiring -// wrapping raw values in FixedPoint objects. -class UnaryOpBase { - public: - // Min bound of the input range of this op. For example, an op only handling - // nonnegative values would return 0. - std::int32_t MinInput() const { - return std::numeric_limits<std::int32_t>::min(); - } - // Max bound of the input range of this op. For example, an op only handling - // nonpositive values would return 0. - std::int32_t MaxInput() const { - return std::numeric_limits<std::int32_t>::max(); - } - // Tolerated difference between actual and reference int32 values. - // Note that the corresponding real-numbers tolerance depends on the number - // of integer bits of the fixed-point representation of the results of this - // op. - // For example, for an op returning fixed-point values with 0 integer bits, - // the correspondence between real-number values and raw values is - // real_number = (2^31) * raw_value. - std::int32_t Tolerance() const { return 0; } -}; +#ifdef GEMMLOWP_AVX2 +template <> +__m256i Load<__m256i>(const std::int32_t* src) { + return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src)); +} -// Op wrapping RoundingDivideByPOT -class RoundingDivideByPOTOp final : public UnaryOpBase { - public: - RoundingDivideByPOTOp(int exponent) : exponent_(exponent) {} - std::int32_t ReferenceOp(std::int32_t x) const { - const double d = static_cast<double>(x) / (1ll << exponent_); - return static_cast<std::int32_t>(std::round(d)); - } - template <typename tRawType> - tRawType Op(tRawType x) const { - return RoundingDivideByPOT(x, exponent_); - } +template <> +int16x16_m256i Load<int16x16_m256i>(const std::int16_t* src) { + return to_int16x16_m256i( + _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src))); +} - private: - const int exponent_; -}; +template <> +void Store<__m256i>(std::int32_t* dst, __m256i v) { + _mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), v); +} -// Op wrapping SaturatingRoundingMultiplyByPOT -template <int tExponent> -class SaturatingRoundingMultiplyByPOTOp final : public UnaryOpBase { - public: - std::int32_t ReferenceOp(std::int32_t x) const { - const double d = static_cast<double>(x) * std::pow(2., tExponent); - const double clamp_min = std::numeric_limits<std::int32_t>::min(); - const double clamp_max = std::numeric_limits<std::int32_t>::max(); - const double clamped = std::min(clamp_max, std::max(clamp_min, d)); - return static_cast<std::int32_t>(std::round(clamped)); - } - template <typename tRawType> - tRawType Op(tRawType x) const { - return SaturatingRoundingMultiplyByPOT<tExponent>(x); - } -}; +template <> +void Store<int16x16_m256i>(std::int16_t* dst, int16x16_m256i v) { + _mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), v.v); +} +#endif -// Op wrapping exp_on_interval_between_negative_one_quarter_and_0_excl -class ExpOnIntervalBetweenNegativeOneQuarterAnd0ExclOp final - : public UnaryOpBase { +template <typename tSimdType> +class TestFixedPoint { public: - std::int32_t MinInput() const { return -(1 << 29); } - std::int32_t MaxInput() const { return 0; } - std::int32_t Tolerance() const { return 500; } - std::int32_t ReferenceOp(std::int32_t x) const { - using F = FixedPoint<std::int32_t, 0>; - const double d = ToDouble(F::FromRaw(x)); - const double e = std::exp(d); - return F::FromDouble(e).raw(); - } - template <typename tRawType> - tRawType Op(tRawType x) const { - using F = FixedPoint<tRawType, 0>; - const F f = F::FromRaw(x); - const F e = exp_on_interval_between_negative_one_quarter_and_0_excl(f); - return e.raw(); - } -}; + using SimdType = tSimdType; + using SimdTypeTraits = FixedPointRawTypeTraits<SimdType>; + using ScalarType = typename SimdTypeTraits::ScalarRawType; + static constexpr int kSimdLanes = SimdTypeTraits::kLanes; + static constexpr int kScalarTypeBits = 8 * sizeof(ScalarType); + + // Explanation of UnaryOpBase, its *Op subclasses below, and TestUnaryOp: + // Most (though not all) of the fixedpoint functionality being tested + // consists of functions taking one fixedpoint value and returning one + // fixedpoint value, e.g. "exp" or "tanh". We call them "unary operators". + // We factor a lot of testing boilerplate into a common TestUnaryOp function + // taking a "unary op" object that fully describes the function to be tested. + // These objects inherit UnaryOpBase mostly as a means to share some default + // values for some properties. + // + // An important design element here is that the fixed-point values are passed + // around as raw integers (e.g. int32_t or SIMD types such as int32x4_t), not + // as higher-level FixedPoint objects. The motivation for this design is 1) to + // avoid having to templatize everything in the tIntegerBits parameter of + // class FixedPoint, and 2) to allow directly testing low-level functions + // operating on raw types (e.g. RoundingDivideByPOT) without needlessly + // requiring + // wrapping raw values in FixedPoint objects. + class UnaryOpBase { + public: + // Min bound of the input range of this op. For example, an op only handling + // nonnegative values would return 0. + ScalarType MinInput() const { + return std::numeric_limits<ScalarType>::min(); + } + // Max bound of the input range of this op. For example, an op only handling + // nonpositive values would return 0. + ScalarType MaxInput() const { + return std::numeric_limits<ScalarType>::max(); + } + // Tolerated difference between actual and reference ScalarType values. + // Note that the corresponding real-numbers tolerance depends on the number + // of integer bits of the fixed-point representation of the results of this + // op. + // For example, for an op returning fixed-point values with 0 integer bits, + // the correspondence between real-number values and raw values is + // real_number = (2^31) * raw_value. + ScalarType Tolerance() const { return 0; } + }; + + // Op wrapping RoundingDivideByPOT + class RoundingDivideByPOTOp final : public UnaryOpBase { + public: + RoundingDivideByPOTOp(int exponent) : exponent_(exponent) {} + ScalarType ReferenceOp(ScalarType x) const { + const double d = static_cast<double>(x) / (1ll << exponent_); + return static_cast<ScalarType>(std::round(d)); + } + template <typename RawType> + RawType Op(RawType x) const { + return RoundingDivideByPOT(x, exponent_); + } -// Op wrapping exp_on_negative_values -template <int tIntegerBits> -class ExpOnNegativeValuesOp final : public UnaryOpBase { - public: - std::int32_t MaxInput() const { return 0; } - std::int32_t Tolerance() const { return 500; } - std::int32_t ReferenceOp(std::int32_t x) const { - using F = FixedPoint<std::int32_t, tIntegerBits>; - using F0 = FixedPoint<std::int32_t, 0>; - const double d = ToDouble(F::FromRaw(x)); - const double e = std::exp(d); - return F0::FromDouble(e).raw(); - } - template <typename tRawType> - tRawType Op(tRawType x) const { - using F = FixedPoint<tRawType, tIntegerBits>; - const F f = F::FromRaw(x); - return exp_on_negative_values(f).raw(); + private: + const int exponent_; + }; + + // Op wrapping SaturatingRoundingMultiplyByPOT + template <int tExponent> + class SaturatingRoundingMultiplyByPOTOp final : public UnaryOpBase { + public: + ScalarType ReferenceOp(ScalarType x) const { + const double d = static_cast<double>(x) * std::pow(2., tExponent); + const double clamp_min = std::numeric_limits<ScalarType>::min(); + const double clamp_max = std::numeric_limits<ScalarType>::max(); + const double clamped = std::min(clamp_max, std::max(clamp_min, d)); + return static_cast<ScalarType>(std::round(clamped)); + } + template <typename RawType> + RawType Op(RawType x) const { + return SaturatingRoundingMultiplyByPOT<tExponent>(x); + } + }; + + // Op wrapping exp_on_interval_between_negative_one_quarter_and_0_excl + class ExpOnIntervalBetweenNegativeOneQuarterAnd0ExclOp final + : public UnaryOpBase { + public: + ScalarType MinInput() const { return -(1 << (kScalarTypeBits - 3)); } + ScalarType MaxInput() const { return 0; } + ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 500 : 1; } + ScalarType ReferenceOp(ScalarType x) const { + using F = FixedPoint<ScalarType, 0>; + const double d = ToDouble(F::FromRaw(x)); + const double e = std::exp(d); + return F::FromDouble(e).raw(); + } + template <typename RawType> + RawType Op(RawType x) const { + using F = FixedPoint<RawType, 0>; + const F f = F::FromRaw(x); + const F e = exp_on_interval_between_negative_one_quarter_and_0_excl(f); + return e.raw(); + } + }; + + // Op wrapping exp_on_negative_values + template <int tIntegerBits> + class ExpOnNegativeValuesOp final : public UnaryOpBase { + public: + ScalarType MaxInput() const { return 0; } + ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 500 : 2; } + ScalarType ReferenceOp(ScalarType x) const { + using F = FixedPoint<ScalarType, tIntegerBits>; + using F0 = FixedPoint<ScalarType, 0>; + const double d = ToDouble(F::FromRaw(x)); + const double e = std::exp(d); + return F0::FromDouble(e).raw(); + } + template <typename RawType> + RawType Op(RawType x) const { + using F = FixedPoint<RawType, tIntegerBits>; + const F f = F::FromRaw(x); + return exp_on_negative_values(f).raw(); + } + }; + + // Op wrapping one_minus_x_over_one_plus_x_for_x_in_0_1 + class OneMinusXOverOnePlusXForXIn01Op final : public UnaryOpBase { + public: + ScalarType MinInput() const { return 0; } + ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 12 : 11; } + ScalarType ReferenceOp(ScalarType x) const { + using F = FixedPoint<ScalarType, 0>; + const double d = ToDouble(F::FromRaw(x)); + const double e = (1 - d) / (1 + d); + return F::FromDouble(e).raw(); + } + template <typename RawType> + RawType Op(RawType x) const { + using F = FixedPoint<RawType, 0>; + const F f = F::FromRaw(x); + return one_minus_x_over_one_plus_x_for_x_in_0_1(f).raw(); + } + }; + + // Op wrapping tanh + template <int tIntegerBits> + class TanhOp final : public UnaryOpBase { + public: + ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 310 : 12; } + ScalarType ReferenceOp(ScalarType x) const { + using F = FixedPoint<ScalarType, tIntegerBits>; + using F0 = FixedPoint<ScalarType, 0>; + const double d = ToDouble(F::FromRaw(x)); + const double e = std::tanh(d); + return F0::FromDouble(e).raw(); + } + template <typename RawType> + RawType Op(RawType x) const { + using F = FixedPoint<RawType, tIntegerBits>; + const F f = F::FromRaw(x); + return tanh(f).raw(); + } + }; + + // Op wrapping one_over_one_plus_x_for_x_in_0_1 + class OneOverOnePlusXForXIn01Op final : public UnaryOpBase { + public: + ScalarType MinInput() const { return 0; } + ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 6 : 5; } + ScalarType ReferenceOp(ScalarType x) const { + using F = FixedPoint<ScalarType, 0>; + const double d = ToDouble(F::FromRaw(x)); + const double e = 1 / (1 + d); + return F::FromDouble(e).raw(); + } + template <typename RawType> + RawType Op(RawType x) const { + using F = FixedPoint<RawType, 0>; + const F f = F::FromRaw(x); + return one_over_one_plus_x_for_x_in_0_1(f).raw(); + } + }; + + // Op wrapping logistic + template <int tIntegerBits> + class LogisticOp final : public UnaryOpBase { + public: + ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 155 : 6; } + ScalarType ReferenceOp(ScalarType x) const { + using F = FixedPoint<ScalarType, tIntegerBits>; + using F0 = FixedPoint<ScalarType, 0>; + const double d = ToDouble(F::FromRaw(x)); + const double e = 1 / (1 + std::exp(-d)); + return F0::FromDouble(e).raw(); + } + template <typename RawType> + RawType Op(RawType x) const { + using F = FixedPoint<RawType, tIntegerBits>; + const F f = F::FromRaw(x); + return logistic(f).raw(); + } + }; + + // Tests a given op, on a given list of int32 input values. + template <typename tUnaryOpType> + void TestUnaryOp(const tUnaryOpType& unary_op, + const std::vector<ScalarType>& testvals) { + Check(0 == (testvals.size() % kSimdLanes)); + for (std::size_t i = 0; i < testvals.size(); i += kSimdLanes) { + // First, clamp input values accoding to the MinInput() and MaxInput() + // bounds returned by the op. + ScalarType input[kSimdLanes] = {0}; + for (std::size_t j = 0; j < kSimdLanes; j++) { + const ScalarType raw_input = testvals[i + j]; + input[j] = std::min(unary_op.MaxInput(), + std::max(unary_op.MinInput(), raw_input)); + } + // Compute reference results and check that the actual results on + // scalar inputs agree with them, to the Tolerance() returned by the op. + ScalarType reference[kSimdLanes] = {0}; + ScalarType actual_scalar[kSimdLanes] = {0}; + for (std::size_t j = 0; j < kSimdLanes; j++) { + reference[j] = unary_op.ReferenceOp(input[j]); + actual_scalar[j] = unary_op.Op(input[j]); + const std::int64_t diff = static_cast<std::int64_t>(actual_scalar[j]) - + static_cast<std::int64_t>(reference[j]); + if (std::abs(diff) > unary_op.Tolerance()) { + fprintf(stderr, "abs(diff) (%" PRId64 ") > tolerance (%d)\n", diff, + unary_op.Tolerance()); + } + Check(std::abs(diff) <= unary_op.Tolerance()); + } + // Check that the actual results on SIMD inputs agree *exactly* with the + // actual results on scalar inputs. I.e. SIMD must make absolutely no + // difference + // to the results, regardless of the fact that both scalar and SIMD + // results may differ from the reference results. + ScalarType actual_simd[kSimdLanes] = {0}; + Store<SimdType>(actual_simd, unary_op.Op(Load<SimdType>(input))); + for (std::size_t j = 0; j < kSimdLanes; j++) { + if (actual_simd[j] != actual_scalar[j]) { + fprintf(stderr, "SIMD (%d) != scalar (%d)\n", actual_simd[j], + actual_scalar[j]); + } + Check(actual_simd[j] == actual_scalar[j]); + } + } } -}; -// Op wrapping one_minus_x_over_one_plus_x_for_x_in_0_1 -class OneMinusXOverOnePlusXForXIn01Op final : public UnaryOpBase { - public: - std::int32_t MinInput() const { return 0; } - std::int32_t Tolerance() const { return 12; } - std::int32_t ReferenceOp(std::int32_t x) const { - using F = FixedPoint<std::int32_t, 0>; - const double d = ToDouble(F::FromRaw(x)); - const double e = (1 - d) / (1 + d); - return F::FromDouble(e).raw(); - } - template <typename tRawType> - tRawType Op(tRawType x) const { - using F = FixedPoint<tRawType, 0>; - const F f = F::FromRaw(x); - return one_minus_x_over_one_plus_x_for_x_in_0_1(f).raw(); + template <int tIntegerBits> + void test_convert(FixedPoint<ScalarType, tIntegerBits> x) { + typedef FixedPoint<ScalarType, tIntegerBits> F; + F y = F::FromDouble(ToDouble(x)); + Check(y == x); } -}; -// Op wrapping tanh -template <int tIntegerBits> -class TanhOp final : public UnaryOpBase { - public: - std::int32_t Tolerance() const { return 310; } - std::int32_t ReferenceOp(std::int32_t x) const { - using F = FixedPoint<std::int32_t, tIntegerBits>; - using F0 = FixedPoint<std::int32_t, 0>; - const double d = ToDouble(F::FromRaw(x)); - const double e = std::tanh(d); - return F0::FromDouble(e).raw(); - } - template <typename tRawType> - tRawType Op(tRawType x) const { - using F = FixedPoint<tRawType, tIntegerBits>; - const F f = F::FromRaw(x); - return tanh(f).raw(); + template <int tIntegerBits_a, int tIntegerBits_b> + void test_Rescale(FixedPoint<ScalarType, tIntegerBits_a> a) { + FixedPoint<ScalarType, tIntegerBits_b> actual = Rescale<tIntegerBits_b>(a); + FixedPoint<ScalarType, tIntegerBits_b> expected = + FixedPoint<ScalarType, tIntegerBits_b>::FromDouble(ToDouble(a)); + Check(actual == expected); } -}; -// Op wrapping one_over_one_plus_x_for_x_in_0_1 -class OneOverOnePlusXForXIn01Op final : public UnaryOpBase { - public: - std::int32_t MinInput() const { return 0; } - std::int32_t Tolerance() const { return 6; } - std::int32_t ReferenceOp(std::int32_t x) const { - using F = FixedPoint<std::int32_t, 0>; - const double d = ToDouble(F::FromRaw(x)); - const double e = 1 / (1 + d); - return F::FromDouble(e).raw(); - } - template <typename tRawType> - tRawType Op(tRawType x) const { - using F = FixedPoint<tRawType, 0>; - const F f = F::FromRaw(x); - return one_over_one_plus_x_for_x_in_0_1(f).raw(); + template <int tIntegerBits_a, int tIntegerBits_b> + void test_Rescale(const std::vector<ScalarType>& testvals) { + for (auto a : testvals) { + FixedPoint<ScalarType, tIntegerBits_a> aq; + aq.raw() = a; + test_Rescale<tIntegerBits_a, tIntegerBits_b>(aq); + } } -}; -// Op wrapping logistic -template <int tIntegerBits> -class LogisticOp final : public UnaryOpBase { - public: - std::int32_t Tolerance() const { return 155; } - std::int32_t ReferenceOp(std::int32_t x) const { - using F = FixedPoint<std::int32_t, tIntegerBits>; - using F0 = FixedPoint<std::int32_t, 0>; - const double d = ToDouble(F::FromRaw(x)); - const double e = 1 / (1 + std::exp(-d)); - return F0::FromDouble(e).raw(); + template <int tIntegerBits_a, int tIntegerBits_b> + void test_mul(FixedPoint<ScalarType, tIntegerBits_a> a, + FixedPoint<ScalarType, tIntegerBits_b> b) { + static const int ProductIntegerBits = tIntegerBits_a + tIntegerBits_b; + using ProductFixedPoint = FixedPoint<ScalarType, ProductIntegerBits>; + ProductFixedPoint ab; + ab = a * b; + double a_double = ToDouble(a); + double b_double = ToDouble(b); + double ab_double = a_double * b_double; + ProductFixedPoint expected = ProductFixedPoint::FromDouble(ab_double); + std::int64_t diff = std::int64_t(ab.raw()) - std::int64_t(expected.raw()); + Check(std::abs(diff) <= 1); } - template <typename tRawType> - tRawType Op(tRawType x) const { - using F = FixedPoint<tRawType, tIntegerBits>; - const F f = F::FromRaw(x); - return logistic(f).raw(); - } -}; -// Tests a given op, on a given list of int32 input values. -template <typename tUnaryOpType> -void TestUnaryOp(const tUnaryOpType& unary_op, - const std::vector<std::int32_t>& testvals_int32) { - Check(0 == (testvals_int32.size() % SimdVectorSize)); - for (std::size_t i = 0; i < testvals_int32.size(); i += SimdVectorSize) { - // First, clamp input int32 values accoding to the MinInput() and MaxInput() - // bounds returned by the op. - std::int32_t input[SimdVectorSize] = {0}; - for (std::size_t j = 0; j < SimdVectorSize; j++) { - const std::int32_t raw_input = testvals_int32[i + j]; - input[j] = std::min(unary_op.MaxInput(), - std::max(unary_op.MinInput(), raw_input)); - } - // Compute reference results and check that the actual results on - // scalar inputs agree with them, to the Tolerance() returned by the op. - std::int32_t reference[SimdVectorSize] = {0}; - std::int32_t actual_scalar[SimdVectorSize] = {0}; - for (std::size_t j = 0; j < SimdVectorSize; j++) { - reference[j] = unary_op.ReferenceOp(input[j]); - actual_scalar[j] = unary_op.Op(input[j]); - const std::int64_t diff = static_cast<std::int64_t>(actual_scalar[j]) - - static_cast<std::int64_t>(reference[j]); - Check(std::abs(diff) <= unary_op.Tolerance()); - } - // Check that the actual results on SIMD inputs agree *exactly* with the - // actual results on scalar inputs. I.e. SIMD must make absolutely no - // difference - // to the results, regardless of the fact that both scalar and SIMD results - // may differ from the reference results. - std::int32_t actual_simd[SimdVectorSize] = {0}; - StoreSimdVector(actual_simd, unary_op.Op(LoadSimdVector(input))); - for (std::size_t j = 0; j < SimdVectorSize; j++) { - Check(actual_simd[j] == actual_scalar[j]); + template <int tIntegerBits_a, int tIntegerBits_b> + void test_mul(const std::vector<ScalarType>& testvals) { + for (auto a : testvals) { + for (auto b : testvals) { + FixedPoint<ScalarType, tIntegerBits_a> aq; + FixedPoint<ScalarType, tIntegerBits_b> bq; + aq.raw() = a; + bq.raw() = b; + test_mul(aq, bq); + } } } -} -template <int tIntegerBits> -void test_convert(FixedPoint<std::int32_t, tIntegerBits> x) { - typedef FixedPoint<std::int32_t, tIntegerBits> F; - F y = F::FromDouble(ToDouble(x)); - Check(y == x); -} - -template <int tIntegerBits_a, int tIntegerBits_b> -void test_Rescale(FixedPoint<std::int32_t, tIntegerBits_a> a) { - FixedPoint<std::int32_t, tIntegerBits_b> actual = Rescale<tIntegerBits_b>(a); - FixedPoint<std::int32_t, tIntegerBits_b> expected = - FixedPoint<std::int32_t, tIntegerBits_b>::FromDouble(ToDouble(a)); - Check(actual == expected); -} - -template <int tIntegerBits_a, int tIntegerBits_b> -void test_Rescale(const std::vector<std::int32_t>& testvals_int32) { - for (auto a : testvals_int32) { - FixedPoint<std::int32_t, tIntegerBits_a> aq; - aq.raw() = a; - test_Rescale<tIntegerBits_a, tIntegerBits_b>(aq); + template <int tExponent, int tIntegerBits_a> + void test_ExactMulByPot(FixedPoint<ScalarType, tIntegerBits_a> a) { + double x = ToDouble(a) * std::pow(2.0, tExponent); + double y = ToDouble(ExactMulByPot<tExponent>(a)); + Check(x == y); } -} - -template <int tIntegerBits_a, int tIntegerBits_b> -void test_mul(FixedPoint<std::int32_t, tIntegerBits_a> a, - FixedPoint<std::int32_t, tIntegerBits_b> b) { - static const int ProductIntegerBits = tIntegerBits_a + tIntegerBits_b; - using ProductFixedPoint = FixedPoint<std::int32_t, ProductIntegerBits>; - ProductFixedPoint ab; - ab = a * b; - double a_double = ToDouble(a); - double b_double = ToDouble(b); - double ab_double = a_double * b_double; - ProductFixedPoint expected = ProductFixedPoint::FromDouble(ab_double); - std::int64_t diff = std::int64_t(ab.raw()) - std::int64_t(expected.raw()); - Check(std::abs(diff) <= 1); -} -template <int tIntegerBits_a, int tIntegerBits_b> -void test_mul(const std::vector<std::int32_t>& testvals_int32) { - for (auto a : testvals_int32) { - for (auto b : testvals_int32) { - FixedPoint<std::int32_t, tIntegerBits_a> aq; - FixedPoint<std::int32_t, tIntegerBits_b> bq; + template <int tExponent, int tIntegerBits_a> + void test_ExactMulByPot(const std::vector<ScalarType>& testvals) { + for (auto a : testvals) { + FixedPoint<ScalarType, tIntegerBits_a> aq; aq.raw() = a; - bq.raw() = b; - test_mul(aq, bq); + test_ExactMulByPot<tExponent, tIntegerBits_a>(aq); } } -} -template <int tExponent, int tIntegerBits_a> -void test_ExactMulByPot(FixedPoint<std::int32_t, tIntegerBits_a> a) { - double x = ToDouble(a) * std::pow(2.0, tExponent); - double y = ToDouble(ExactMulByPot<tExponent>(a)); - Check(x == y); -} + // Make the list of test values to test each op against. + std::vector<ScalarType> MakeTestVals() { + std::vector<ScalarType> testvals; + + for (int i = 0; i < kScalarTypeBits - 1; i++) { + testvals.push_back((1 << i) - 2); + testvals.push_back((1 << i) - 1); + testvals.push_back((1 << i)); + testvals.push_back((1 << i) + 1); + testvals.push_back((1 << i) + 2); + testvals.push_back(-(1 << i) - 2); + testvals.push_back(-(1 << i) - 1); + testvals.push_back(-(1 << i)); + testvals.push_back(-(1 << i) + 1); + testvals.push_back(-(1 << i) + 2); + } + testvals.push_back(std::numeric_limits<ScalarType>::min()); + testvals.push_back(std::numeric_limits<ScalarType>::min() + 1); + testvals.push_back(std::numeric_limits<ScalarType>::min() + 2); + testvals.push_back(std::numeric_limits<ScalarType>::max() - 2); + testvals.push_back(std::numeric_limits<ScalarType>::max() - 1); + testvals.push_back(std::numeric_limits<ScalarType>::max()); + + std::mt19937 random_engine; + std::uniform_int_distribution<ScalarType> uniform_distribution( + std::numeric_limits<ScalarType>::min(), + std::numeric_limits<ScalarType>::max()); + for (int i = 0; i < 1000; i++) { + testvals.push_back(uniform_distribution(random_engine)); + } -template <int tExponent, int tIntegerBits_a> -void test_ExactMulByPot(const std::vector<std::int32_t>& testvals_int32) { - for (auto a : testvals_int32) { - FixedPoint<std::int32_t, tIntegerBits_a> aq; - aq.raw() = a; - test_ExactMulByPot<tExponent, tIntegerBits_a>(aq); - } -} + // SIMD tests will require the length of testvals to be a multiple + // of SIMD vector size. + while (testvals.size() % kSimdLanes) { + testvals.push_back(0); + } -// Make the list of test values to test each op against. -std::vector<std::int32_t> MakeTestValsInt32() { - std::vector<std::int32_t> testvals_int32; - - for (int i = 0; i < 31; i++) { - testvals_int32.push_back((1 << i) - 2); - testvals_int32.push_back((1 << i) - 1); - testvals_int32.push_back((1 << i)); - testvals_int32.push_back((1 << i) + 1); - testvals_int32.push_back((1 << i) + 2); - testvals_int32.push_back(-(1 << i) - 2); - testvals_int32.push_back(-(1 << i) - 1); - testvals_int32.push_back(-(1 << i)); - testvals_int32.push_back(-(1 << i) + 1); - testvals_int32.push_back(-(1 << i) + 2); - } - testvals_int32.push_back(std::numeric_limits<std::int32_t>::min()); - testvals_int32.push_back(std::numeric_limits<std::int32_t>::min() + 1); - testvals_int32.push_back(std::numeric_limits<std::int32_t>::min() + 2); - testvals_int32.push_back(std::numeric_limits<std::int32_t>::max() - 2); - testvals_int32.push_back(std::numeric_limits<std::int32_t>::max() - 1); - testvals_int32.push_back(std::numeric_limits<std::int32_t>::max()); - - std::mt19937 random_engine; - std::uniform_int_distribution<std::int32_t> uniform_distribution( - std::numeric_limits<std::int32_t>::min(), - std::numeric_limits<std::int32_t>::max()); - for (int i = 0; i < 1000; i++) { - testvals_int32.push_back(uniform_distribution(random_engine)); + std::sort(testvals.begin(), testvals.end()); + return testvals; } - // SIMD tests will require the length of testvals_int32 to be a multiple - // of SIMD vector size. - while (testvals_int32.size() % SimdVectorSize) { - testvals_int32.push_back(0); - } + void RunTests(const char* msg) { + const std::vector<ScalarType> testvals = MakeTestVals(); - std::sort(testvals_int32.begin(), testvals_int32.end()); - return testvals_int32; -} + for (int s = 0; s < kScalarTypeBits; s++) { + TestUnaryOp(RoundingDivideByPOTOp(s), testvals); + } + + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<1 - kScalarTypeBits>(), + testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<2 - kScalarTypeBits>(), + testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<3 - kScalarTypeBits>(), + testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<14 - kScalarTypeBits>(), + testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<15 - kScalarTypeBits>(), + testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-15>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-4>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-3>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-2>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-1>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<0>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<1>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<2>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<3>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<4>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<15>(), testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<kScalarTypeBits - 15>(), + testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<kScalarTypeBits - 14>(), + testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<kScalarTypeBits - 3>(), + testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<kScalarTypeBits - 2>(), + testvals); + TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<kScalarTypeBits - 1>(), + testvals); + + TestUnaryOp(ExpOnIntervalBetweenNegativeOneQuarterAnd0ExclOp(), testvals); + TestUnaryOp(ExpOnNegativeValuesOp<0>(), testvals); + TestUnaryOp(ExpOnNegativeValuesOp<1>(), testvals); + TestUnaryOp(ExpOnNegativeValuesOp<2>(), testvals); + TestUnaryOp(ExpOnNegativeValuesOp<3>(), testvals); + TestUnaryOp(ExpOnNegativeValuesOp<4>(), testvals); + TestUnaryOp(ExpOnNegativeValuesOp<5>(), testvals); + TestUnaryOp(ExpOnNegativeValuesOp<6>(), testvals); + + TestUnaryOp(OneMinusXOverOnePlusXForXIn01Op(), testvals); + TestUnaryOp(TanhOp<0>(), testvals); + TestUnaryOp(TanhOp<1>(), testvals); + TestUnaryOp(TanhOp<2>(), testvals); + TestUnaryOp(TanhOp<3>(), testvals); + TestUnaryOp(TanhOp<4>(), testvals); + TestUnaryOp(TanhOp<5>(), testvals); + TestUnaryOp(TanhOp<6>(), testvals); + + TestUnaryOp(OneOverOnePlusXForXIn01Op(), testvals); + TestUnaryOp(LogisticOp<0>(), testvals); + TestUnaryOp(LogisticOp<1>(), testvals); + TestUnaryOp(LogisticOp<2>(), testvals); + TestUnaryOp(LogisticOp<3>(), testvals); + TestUnaryOp(LogisticOp<4>(), testvals); + TestUnaryOp(LogisticOp<5>(), testvals); + TestUnaryOp(LogisticOp<6>(), testvals); + + for (auto a : testvals) { + FixedPoint<ScalarType, 4> x; + x.raw() = a; + test_convert(x); + } + + test_mul<0, 0>(testvals); + test_mul<0, 1>(testvals); + test_mul<2, 0>(testvals); + test_mul<1, 1>(testvals); + test_mul<4, 4>(testvals); + test_mul<3, 5>(testvals); + test_mul<7, 2>(testvals); + test_mul<kScalarTypeBits / 2 - 1, kScalarTypeBits / 2 - 2>(testvals); + + test_Rescale<0, 0>(testvals); + test_Rescale<0, 1>(testvals); + test_Rescale<2, 0>(testvals); + test_Rescale<4, 4>(testvals); + test_Rescale<4, 5>(testvals); + test_Rescale<6, 3>(testvals); + test_Rescale<13, 9>(testvals); + + test_ExactMulByPot<0, 0>(testvals); + test_ExactMulByPot<0, 4>(testvals); + test_ExactMulByPot<1, 4>(testvals); + test_ExactMulByPot<3, 2>(testvals); + test_ExactMulByPot<-4, 5>(testvals); + test_ExactMulByPot<-2, 6>(testvals); + + fprintf(stderr, "PASS (%s)\n", msg); + } +}; } // end anonymous namespace } // end namespace gemmlowp int main() { - using namespace gemmlowp; - - const std::vector<std::int32_t> testvals_int32 = MakeTestValsInt32(); - - for (int s = 0; s < 32; s++) { - TestUnaryOp(RoundingDivideByPOTOp(s), testvals_int32); - } - - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-31>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-30>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-29>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-17>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-16>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-15>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-4>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-3>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-2>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-1>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<0>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<1>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<2>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<3>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<4>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<15>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<16>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<17>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<29>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<30>(), testvals_int32); - TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<31>(), testvals_int32); - - TestUnaryOp(ExpOnIntervalBetweenNegativeOneQuarterAnd0ExclOp(), - testvals_int32); - TestUnaryOp(ExpOnNegativeValuesOp<0>(), testvals_int32); - TestUnaryOp(ExpOnNegativeValuesOp<1>(), testvals_int32); - TestUnaryOp(ExpOnNegativeValuesOp<2>(), testvals_int32); - TestUnaryOp(ExpOnNegativeValuesOp<3>(), testvals_int32); - TestUnaryOp(ExpOnNegativeValuesOp<4>(), testvals_int32); - TestUnaryOp(ExpOnNegativeValuesOp<5>(), testvals_int32); - TestUnaryOp(ExpOnNegativeValuesOp<6>(), testvals_int32); - - TestUnaryOp(OneMinusXOverOnePlusXForXIn01Op(), testvals_int32); - TestUnaryOp(TanhOp<0>(), testvals_int32); - TestUnaryOp(TanhOp<1>(), testvals_int32); - TestUnaryOp(TanhOp<2>(), testvals_int32); - TestUnaryOp(TanhOp<3>(), testvals_int32); - TestUnaryOp(TanhOp<4>(), testvals_int32); - TestUnaryOp(TanhOp<5>(), testvals_int32); - TestUnaryOp(TanhOp<6>(), testvals_int32); - - TestUnaryOp(OneOverOnePlusXForXIn01Op(), testvals_int32); - TestUnaryOp(LogisticOp<0>(), testvals_int32); - TestUnaryOp(LogisticOp<1>(), testvals_int32); - TestUnaryOp(LogisticOp<2>(), testvals_int32); - TestUnaryOp(LogisticOp<3>(), testvals_int32); - TestUnaryOp(LogisticOp<4>(), testvals_int32); - TestUnaryOp(LogisticOp<5>(), testvals_int32); - TestUnaryOp(LogisticOp<6>(), testvals_int32); - - for (auto a : testvals_int32) { - FixedPoint<std::int32_t, 4> x; - x.raw() = a; - test_convert(x); - } - - test_mul<0, 0>(testvals_int32); - test_mul<0, 1>(testvals_int32); - test_mul<2, 0>(testvals_int32); - test_mul<1, 1>(testvals_int32); - test_mul<4, 4>(testvals_int32); - test_mul<3, 5>(testvals_int32); - test_mul<7, 2>(testvals_int32); - test_mul<14, 15>(testvals_int32); - - test_Rescale<0, 0>(testvals_int32); - test_Rescale<0, 1>(testvals_int32); - test_Rescale<2, 0>(testvals_int32); - test_Rescale<4, 4>(testvals_int32); - test_Rescale<4, 5>(testvals_int32); - test_Rescale<6, 3>(testvals_int32); - test_Rescale<13, 9>(testvals_int32); - - test_ExactMulByPot<0, 0>(testvals_int32); - test_ExactMulByPot<0, 4>(testvals_int32); - test_ExactMulByPot<1, 4>(testvals_int32); - test_ExactMulByPot<3, 2>(testvals_int32); - test_ExactMulByPot<-4, 5>(testvals_int32); - test_ExactMulByPot<-2, 6>(testvals_int32); - - std::cerr << "All tests passed." << std::endl; + gemmlowp::TestFixedPoint<std::int32_t>().RunTests("Scalar int32"); + gemmlowp::TestFixedPoint<std::int16_t>().RunTests("Scalar int16"); +#ifdef GEMMLOWP_SSE4 + gemmlowp::TestFixedPoint<__m128i>().RunTests("SSE4 __m128i = int32x4"); + gemmlowp::TestFixedPoint<gemmlowp::int16x8_m128i>().RunTests( + "SSE4 __m128i = int16x8"); +#endif +#ifdef GEMMLOWP_NEON + gemmlowp::TestFixedPoint<int32x4_t>().RunTests("NEON int32x4_t"); + gemmlowp::TestFixedPoint<int16x8_t>().RunTests("NEON int16x8_t"); +#endif +#ifdef GEMMLOWP_MSA + gemmlowp::TestFixedPoint<v4i32>().RunTests("MSA v4i32"); + gemmlowp::TestFixedPoint<v8i16>().RunTests("MSA v8i16"); +#endif +#ifdef GEMMLOWP_AVX2 + gemmlowp::TestFixedPoint<__m256i>().RunTests("AVX __m256i"); + gemmlowp::TestFixedPoint<gemmlowp::int16x16_m256i>().RunTests( + "AVX2 __m256i = int16x16"); +#endif } |