aboutsummaryrefslogtreecommitdiff
path: root/internal/kernel_default.h
diff options
context:
space:
mode:
Diffstat (limited to 'internal/kernel_default.h')
-rw-r--r--internal/kernel_default.h70
1 files changed, 44 insertions, 26 deletions
diff --git a/internal/kernel_default.h b/internal/kernel_default.h
index a919ffe..29b0991 100644
--- a/internal/kernel_default.h
+++ b/internal/kernel_default.h
@@ -20,66 +20,84 @@
#include "../public/bit_depth.h"
#include "common.h"
+#include "kernel.h"
#include "kernel_reference.h"
namespace gemmlowp {
-template <bool MaxProductIsLessThan4096, bool LhsAlwaysNonzero>
+template <bool MaxProductIsLessThan4096, bool IsUnsigned, bool LhsNonZero>
struct DefaultKernelImpl {};
// Partial specialization implementing the logic that if we want to use
-// a kernel for LhsAlwaysNonzero but do not have such a kernel, then we fall
-// back to a generic kernel not taking advantage of LhsAlwaysNonzero.
-template <bool LhsAlwaysNonzero>
-struct DefaultKernelImpl<true, LhsAlwaysNonzero>
- : DefaultKernelImpl<false, LhsAlwaysNonzero> {};
-
-// Partial specialization implementing the logic that if we want to use
// a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we
// fall back to a generic kernel not taking advantage of
// MaxProductIsLessThan4096.
+template <bool LhsNonZero>
+struct DefaultKernelImpl<true, true, LhsNonZero>
+ : DefaultKernelImpl<false, true, LhsNonZero> {};
+
+// Partial specialization implementing the logic that if we want to use
+// a kernel for LhsNonZero but do not have such a kernel, then we fall
+// back to a generic kernel not taking advantage of LhsNonZero.
template <bool MaxProductIsLessThan4096>
-struct DefaultKernelImpl<MaxProductIsLessThan4096, true>
- : DefaultKernelImpl<MaxProductIsLessThan4096, false> {};
+struct DefaultKernelImpl<MaxProductIsLessThan4096, true, true>
+ : DefaultKernelImpl<MaxProductIsLessThan4096, true, false> {};
template <typename BitDepthParams>
struct DefaultKernel
: DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue *
BitDepthParams::RhsRange::kMaxValue <
4096),
- (BitDepthParams::LhsRange::kMinValue > 0)> {};
+ (BitDepthParams::LhsRange::kMinValue >= 0),
+ (BitDepthParams::LhsRange::kMinValue > 0 ||
+ (BitDepthParams::LhsRange::kMaxValue <= 127 &&
+ BitDepthParams::LhsRange::kMinValue > -128))> {};
} // end namespace gemmlowp
-#define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096, \
- LhsAlwaysNonzero, Kernel) \
- namespace gemmlowp { \
- template <> \
- struct DefaultKernelImpl<MaxProductIsLessThan4096, LhsAlwaysNonzero> \
- : Kernel {}; \
+#define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096, IsUnsigned, \
+ LhsAlwaysNonZero, Kernel) \
+ namespace gemmlowp { \
+ template <> \
+ struct DefaultKernelImpl<MaxProductIsLessThan4096, IsUnsigned, \
+ LhsAlwaysNonZero> : Kernel {}; \
}
+// User-provided int8 inputs is only supported in the NEON path currently.
#if defined GEMMLOWP_NEON_32
#include "kernel_neon.h"
-GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_32_Kernel12x4Depth2)
-GEMMLOWP_SET_DEFAULT_KERNEL(true, false,
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, NEON_32_Kernel12x4Depth2)
+GEMMLOWP_SET_DEFAULT_KERNEL(true, true, false,
NEON_32_Kernel12x4Depth2Assuming12BitProducts)
-GEMMLOWP_SET_DEFAULT_KERNEL(false, true,
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true,
NEON_32bit_GEMM_Int8Operands_LhsNonzero)
+GEMMLOWP_SET_DEFAULT_KERNEL(false, false, true,
+ NEON_32bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs)
#elif defined GEMMLOWP_NEON_64
#include "kernel_neon.h"
-GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_64_Kernel12x8Depth2)
-GEMMLOWP_SET_DEFAULT_KERNEL(false, true,
+#if defined GEMMLOWP_DOTPROD_KERNEL
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false,
+ NEON_64_Kernel12x8Depth4_dotprod)
+#else
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, NEON_64_Kernel12x8Depth2)
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true,
NEON_64bit_GEMM_Int8Operands_LhsNonzero)
+#endif
+GEMMLOWP_SET_DEFAULT_KERNEL(false, false, true,
+ NEON_64bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs)
#elif defined(GEMMLOWP_MSA)
#include "kernel_msa.h"
-GEMMLOWP_SET_DEFAULT_KERNEL(false, false, MSA_Kernel12x8Depth2)
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, MSA_Kernel12x8Depth2)
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true, MSA_GEMM_Int8Operands_LhsNonzero)
#elif defined GEMMLOWP_SSE4_32
#include "kernel_sse.h"
-GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_32_Kernel4x4Depth2)
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, SSE4_32_Kernel4x4Depth2)
#elif defined GEMMLOWP_SSE4_64
#include "kernel_sse.h"
-GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_64_Kernel12x4Depth2)
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, SSE4_64_Kernel12x4Depth2)
+#elif defined GEMMLOWP_AVX2_64
+#include "kernel_avx.h"
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, AVX2_64_Kernel24x8Depth2)
#else
#include "kernel_reference.h"
namespace gemmlowp {
@@ -88,7 +106,7 @@ typedef ReferenceKernel<KernelFormat<
KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1> > >
DefaultReferenceKernel;
}
-GEMMLOWP_SET_DEFAULT_KERNEL(false, false, DefaultReferenceKernel)
+GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, DefaultReferenceKernel)
#endif
#endif // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_