aboutsummaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
Diffstat (limited to 'internal')
-rw-r--r--internal/allocator.h4
-rw-r--r--internal/common.h2
-rw-r--r--internal/detect_platform.h5
-rw-r--r--internal/dispatch_gemm_shape.h6
-rw-r--r--internal/kernel.h20
-rw-r--r--internal/output_sse.h21
-rw-r--r--internal/pack.h24
-rw-r--r--internal/pack_sse.h10
-rw-r--r--internal/platform.h3
9 files changed, 61 insertions, 34 deletions
diff --git a/internal/allocator.h b/internal/allocator.h
index 3a6f077..e71df15 100644
--- a/internal/allocator.h
+++ b/internal/allocator.h
@@ -86,11 +86,11 @@ class Allocator {
}
// Alignment of allocated blocks.
- static const std::size_t kAlignment = kDefaultCacheLineSize;
+ static constexpr std::size_t kAlignment = kDefaultCacheLineSize;
// This is all we need so far, and since the usage pattern is fixed,
// there is no point in allowing more until we need to.
- static const std::size_t kMaxBlocks = 5;
+ static constexpr std::size_t kMaxBlocks = 5;
void Commit() {
assert(!committed_);
diff --git a/internal/common.h b/internal/common.h
index 332ad07..708cc40 100644
--- a/internal/common.h
+++ b/internal/common.h
@@ -165,7 +165,7 @@ Integer RoundUpToPowerOfTwo(Integer n) {
template <int N>
struct IsPowerOfTwo {
- static const bool value = !(N & (N - 1));
+ static constexpr bool value = !(N & (N - 1));
};
template <typename T>
diff --git a/internal/detect_platform.h b/internal/detect_platform.h
index 6f06d19..7f0d78c 100644
--- a/internal/detect_platform.h
+++ b/internal/detect_platform.h
@@ -71,6 +71,11 @@
#define GEMMLOWP_X86
#endif
+// Detect WebAssembly SIMD.
+#if defined(__wasm_simd128__)
+#define GEMMLOWP_WASMSIMD
+#endif
+
// Some of our optimized paths use inline assembly and for
// now we don't bother enabling some other optimized paths using intrinddics
// where we can't use inline assembly paths.
diff --git a/internal/dispatch_gemm_shape.h b/internal/dispatch_gemm_shape.h
index ba4f341..b844f78 100644
--- a/internal/dispatch_gemm_shape.h
+++ b/internal/dispatch_gemm_shape.h
@@ -74,7 +74,8 @@ struct TransposeImpl<MatrixMap<Scalar, Order>> {
template <VectorShape Shape>
struct TransposeImpl<OutputStageQuantizeDownInt32ToUint8ScalePC<Shape>> {
typedef OutputStageQuantizeDownInt32ToUint8ScalePC<Shape> SrcType;
- static const VectorShape TransposedShape = TransposeVectorShape<Shape>::Value;
+ static constexpr VectorShape TransposedShape =
+ TransposeVectorShape<Shape>::Value;
typedef OutputStageQuantizeDownInt32ToUint8ScalePC<TransposedShape> DstType;
static DstType Run(const SrcType& src) {
DstType dst;
@@ -88,7 +89,8 @@ struct TransposeImpl<OutputStageQuantizeDownInt32ToUint8ScalePC<Shape>> {
template <VectorShape Shape>
struct TransposeImpl<OutputStageScaleInt32ByFixedPointAndExponentPC<Shape>> {
typedef OutputStageScaleInt32ByFixedPointAndExponentPC<Shape> SrcType;
- static const VectorShape TransposedShape = TransposeVectorShape<Shape>::Value;
+ static constexpr VectorShape TransposedShape =
+ TransposeVectorShape<Shape>::Value;
typedef OutputStageScaleInt32ByFixedPointAndExponentPC<TransposedShape>
DstType;
static DstType Run(const SrcType& src) {
diff --git a/internal/kernel.h b/internal/kernel.h
index 3120216..f1a3fd8 100644
--- a/internal/kernel.h
+++ b/internal/kernel.h
@@ -126,11 +126,11 @@ enum class CellOrder { DepthMajor, WidthMajor, Diagonal };
// out in a cell. That is, a CellOrder together with actual dimensions.
template <int tWidth, int tDepth, CellOrder tOrder = CellOrder::DepthMajor>
struct CellFormat {
- static const int kWidth = tWidth;
- static const int kDepth = tDepth;
- static const CellOrder kOrder = tOrder;
+ static constexpr int kWidth = tWidth;
+ static constexpr int kDepth = tDepth;
+ static constexpr CellOrder kOrder = tOrder;
- static const int kSize = kWidth * kDepth;
+ static constexpr int kSize = kWidth * kDepth;
};
// KernelSideFormat describes how data is laid out in a kernel side
@@ -142,9 +142,9 @@ struct CellFormat {
template <typename tCellFormat, int tCells>
struct KernelSideFormat {
typedef tCellFormat Cell;
- static const int kCells = tCells;
- static const int kWidth = kCells * Cell::kWidth;
- static const int kDepth = Cell::kDepth;
+ static constexpr int kCells = tCells;
+ static constexpr int kWidth = kCells * Cell::kWidth;
+ static constexpr int kDepth = Cell::kDepth;
typedef std::uint8_t Scalar; // The scalar type of the Format.
typedef std::uint8_t InputScalar; // The scalar type of the original input.
};
@@ -173,9 +173,9 @@ struct KernelFormat {
typedef tRhs Rhs;
static_assert(Lhs::Cell::kDepth == Rhs::Cell::kDepth, "");
- static const int kDepth = Lhs::Cell::kDepth;
- static const int kRows = Lhs::Cell::kWidth * Lhs::kCells;
- static const int kCols = Rhs::Cell::kWidth * Rhs::kCells;
+ static constexpr int kDepth = Lhs::Cell::kDepth;
+ static constexpr int kRows = Lhs::Cell::kWidth * Lhs::kCells;
+ static constexpr int kCols = Rhs::Cell::kWidth * Rhs::kCells;
};
inline const char* CellOrderName(CellOrder o) {
diff --git a/internal/output_sse.h b/internal/output_sse.h
index 75aebfd..6ea3290 100644
--- a/internal/output_sse.h
+++ b/internal/output_sse.h
@@ -535,6 +535,27 @@ struct StoreFinalOutputImpl<RegBlockUint8<8, 8>, DstType> {
}
};
+// Specialization for MatrixMap, for performance.
+template <typename tScalar, MapOrder tOrder>
+struct StoreFinalOutputImpl<RegBlockUint8<8, 8>, MatrixMap<tScalar, tOrder>> {
+ static void Run(const RegBlockUint8<8, 8>& src,
+ MatrixMap<tScalar, tOrder>* dst, int row, int col) {
+ std::uint8_t buf[64];
+ StoreUint8x16(buf, src.buf.reg[0]);
+ StoreUint8x16(buf + 16, src.buf.reg[1]);
+ StoreUint8x16(buf + 32, src.buf.reg[2]);
+ StoreUint8x16(buf + 48, src.buf.reg[3]);
+ // Make a local copy so that the compiler can prove that data_ does not
+ // alias &data_ or &stride_.
+ MatrixMap<tScalar, tOrder> local = *dst;
+ for (int c = 0; c < 8; c++) {
+ for (int r = 0; r < 8; r++) {
+ *local.data(row + r, col + c) = buf[r + 8 * c];
+ }
+ }
+ }
+};
+
} // namespace gemmlowp
#endif // GEMMLOWP_INTERNAL_OUTPUT_SSE_H_
diff --git a/internal/pack.h b/internal/pack.h
index 7c43d6e..82f0dd1 100644
--- a/internal/pack.h
+++ b/internal/pack.h
@@ -143,7 +143,7 @@ template <typename tScalar, SideMapOrder tOrder>
class SideMap {
public:
typedef tScalar Scalar;
- static const SideMapOrder kOrder = tOrder;
+ static constexpr SideMapOrder kOrder = tOrder;
SideMap(Scalar* data, int width, int depth, int stride)
: data_(data), width_(width), depth_(depth), stride_(stride) {}
@@ -214,13 +214,13 @@ class PackingRegisterBlockBase {
typedef typename KernelSideFormat::Cell CellFormat;
typedef typename KernelSideFormat::InputScalar KernelInputScalar;
typedef typename KernelSideFormat::Scalar KernelScalar;
- static const int kCells = KernelSideFormat::kCells;
- static const int kCellWidth = CellFormat::kWidth;
- static const int kKernelWidth = CellFormat::kWidth * kCells;
- static const int kCellDepth = CellFormat::kDepth;
- static const int kCellSize = CellFormat::kSize;
- static const SideMapOrder kSrcOrder = SrcMapType::kOrder;
- static const int kZeroPointInputValue =
+ static constexpr int kCells = KernelSideFormat::kCells;
+ static constexpr int kCellWidth = CellFormat::kWidth;
+ static constexpr int kKernelWidth = CellFormat::kWidth * kCells;
+ static constexpr int kCellDepth = CellFormat::kDepth;
+ static constexpr int kCellSize = CellFormat::kSize;
+ static constexpr SideMapOrder kSrcOrder = SrcMapType::kOrder;
+ static constexpr int kZeroPointInputValue =
ZeroPointInputValue<KernelInputScalar, KernelScalar>::kValue;
PackingRegisterBlockBase() : complete_src_(nullptr, 0, 0, 0) {}
@@ -302,10 +302,10 @@ class PackSideBlockImpl {
public:
typedef typename PackedSideBlock::KernelSideFormat KernelSideFormat;
typedef typename KernelSideFormat::Cell CellFormat;
- static const int kCells = KernelSideFormat::kCells;
- static const int kCellWidth = CellFormat::kWidth;
- static const int kKernelWidth = CellFormat::kWidth * kCells;
- static const int kCellDepth = CellFormat::kDepth;
+ static constexpr int kCells = KernelSideFormat::kCells;
+ static constexpr int kCellWidth = CellFormat::kWidth;
+ static constexpr int kKernelWidth = CellFormat::kWidth * kCells;
+ static constexpr int kCellDepth = CellFormat::kDepth;
typedef PackingRegisterBlock<SrcMapType, PackedSideBlock>
PackingRegisterBlockType;
diff --git a/internal/pack_sse.h b/internal/pack_sse.h
index 52163c4..b729014 100644
--- a/internal/pack_sse.h
+++ b/internal/pack_sse.h
@@ -41,11 +41,11 @@ class PackingRegisterBlock<
public:
typedef WidthMajorSideFormatNCells4x2<Cells> KernelSideFormat;
typedef typename KernelSideFormat::Cell CellFormat;
- static const int kCells = KernelSideFormat::kCells;
- static const int kCellWidth = CellFormat::kWidth;
- static const int kKernelWidth = CellFormat::kWidth * kCells;
- static const int kCellDepth = CellFormat::kDepth;
- static const int kCellSize = CellFormat::kSize;
+ static constexpr int kCells = KernelSideFormat::kCells;
+ static constexpr int kCellWidth = CellFormat::kWidth;
+ static constexpr int kKernelWidth = CellFormat::kWidth * kCells;
+ static constexpr int kCellDepth = CellFormat::kDepth;
+ static constexpr int kCellSize = CellFormat::kSize;
void Pack(PackedSideBlock<KernelSideFormat>* dst, int start_width) {
std::uint8_t* dst_ptr = dst->current_data();
diff --git a/internal/platform.h b/internal/platform.h
index 54517c3..0f3a2b8 100644
--- a/internal/platform.h
+++ b/internal/platform.h
@@ -30,8 +30,7 @@
#include <sys/time.h>
#endif
-#if defined __ANDROID__
-#include <android/api-level.h>
+#if defined ANDROID || defined __ANDROID__
#include <malloc.h>
// The 18 here should be 16, but has to be 18 for now due
// to a Google-internal issue.