diff options
Diffstat (limited to 'internal')
-rw-r--r-- | internal/allocator.h | 4 | ||||
-rw-r--r-- | internal/common.h | 2 | ||||
-rw-r--r-- | internal/detect_platform.h | 5 | ||||
-rw-r--r-- | internal/dispatch_gemm_shape.h | 6 | ||||
-rw-r--r-- | internal/kernel.h | 20 | ||||
-rw-r--r-- | internal/output_sse.h | 21 | ||||
-rw-r--r-- | internal/pack.h | 24 | ||||
-rw-r--r-- | internal/pack_sse.h | 10 | ||||
-rw-r--r-- | internal/platform.h | 3 |
9 files changed, 61 insertions, 34 deletions
diff --git a/internal/allocator.h b/internal/allocator.h index 3a6f077..e71df15 100644 --- a/internal/allocator.h +++ b/internal/allocator.h @@ -86,11 +86,11 @@ class Allocator { } // Alignment of allocated blocks. - static const std::size_t kAlignment = kDefaultCacheLineSize; + static constexpr std::size_t kAlignment = kDefaultCacheLineSize; // This is all we need so far, and since the usage pattern is fixed, // there is no point in allowing more until we need to. - static const std::size_t kMaxBlocks = 5; + static constexpr std::size_t kMaxBlocks = 5; void Commit() { assert(!committed_); diff --git a/internal/common.h b/internal/common.h index 332ad07..708cc40 100644 --- a/internal/common.h +++ b/internal/common.h @@ -165,7 +165,7 @@ Integer RoundUpToPowerOfTwo(Integer n) { template <int N> struct IsPowerOfTwo { - static const bool value = !(N & (N - 1)); + static constexpr bool value = !(N & (N - 1)); }; template <typename T> diff --git a/internal/detect_platform.h b/internal/detect_platform.h index 6f06d19..7f0d78c 100644 --- a/internal/detect_platform.h +++ b/internal/detect_platform.h @@ -71,6 +71,11 @@ #define GEMMLOWP_X86 #endif +// Detect WebAssembly SIMD. +#if defined(__wasm_simd128__) +#define GEMMLOWP_WASMSIMD +#endif + // Some of our optimized paths use inline assembly and for // now we don't bother enabling some other optimized paths using intrinddics // where we can't use inline assembly paths. diff --git a/internal/dispatch_gemm_shape.h b/internal/dispatch_gemm_shape.h index ba4f341..b844f78 100644 --- a/internal/dispatch_gemm_shape.h +++ b/internal/dispatch_gemm_shape.h @@ -74,7 +74,8 @@ struct TransposeImpl<MatrixMap<Scalar, Order>> { template <VectorShape Shape> struct TransposeImpl<OutputStageQuantizeDownInt32ToUint8ScalePC<Shape>> { typedef OutputStageQuantizeDownInt32ToUint8ScalePC<Shape> SrcType; - static const VectorShape TransposedShape = TransposeVectorShape<Shape>::Value; + static constexpr VectorShape TransposedShape = + TransposeVectorShape<Shape>::Value; typedef OutputStageQuantizeDownInt32ToUint8ScalePC<TransposedShape> DstType; static DstType Run(const SrcType& src) { DstType dst; @@ -88,7 +89,8 @@ struct TransposeImpl<OutputStageQuantizeDownInt32ToUint8ScalePC<Shape>> { template <VectorShape Shape> struct TransposeImpl<OutputStageScaleInt32ByFixedPointAndExponentPC<Shape>> { typedef OutputStageScaleInt32ByFixedPointAndExponentPC<Shape> SrcType; - static const VectorShape TransposedShape = TransposeVectorShape<Shape>::Value; + static constexpr VectorShape TransposedShape = + TransposeVectorShape<Shape>::Value; typedef OutputStageScaleInt32ByFixedPointAndExponentPC<TransposedShape> DstType; static DstType Run(const SrcType& src) { diff --git a/internal/kernel.h b/internal/kernel.h index 3120216..f1a3fd8 100644 --- a/internal/kernel.h +++ b/internal/kernel.h @@ -126,11 +126,11 @@ enum class CellOrder { DepthMajor, WidthMajor, Diagonal }; // out in a cell. That is, a CellOrder together with actual dimensions. template <int tWidth, int tDepth, CellOrder tOrder = CellOrder::DepthMajor> struct CellFormat { - static const int kWidth = tWidth; - static const int kDepth = tDepth; - static const CellOrder kOrder = tOrder; + static constexpr int kWidth = tWidth; + static constexpr int kDepth = tDepth; + static constexpr CellOrder kOrder = tOrder; - static const int kSize = kWidth * kDepth; + static constexpr int kSize = kWidth * kDepth; }; // KernelSideFormat describes how data is laid out in a kernel side @@ -142,9 +142,9 @@ struct CellFormat { template <typename tCellFormat, int tCells> struct KernelSideFormat { typedef tCellFormat Cell; - static const int kCells = tCells; - static const int kWidth = kCells * Cell::kWidth; - static const int kDepth = Cell::kDepth; + static constexpr int kCells = tCells; + static constexpr int kWidth = kCells * Cell::kWidth; + static constexpr int kDepth = Cell::kDepth; typedef std::uint8_t Scalar; // The scalar type of the Format. typedef std::uint8_t InputScalar; // The scalar type of the original input. }; @@ -173,9 +173,9 @@ struct KernelFormat { typedef tRhs Rhs; static_assert(Lhs::Cell::kDepth == Rhs::Cell::kDepth, ""); - static const int kDepth = Lhs::Cell::kDepth; - static const int kRows = Lhs::Cell::kWidth * Lhs::kCells; - static const int kCols = Rhs::Cell::kWidth * Rhs::kCells; + static constexpr int kDepth = Lhs::Cell::kDepth; + static constexpr int kRows = Lhs::Cell::kWidth * Lhs::kCells; + static constexpr int kCols = Rhs::Cell::kWidth * Rhs::kCells; }; inline const char* CellOrderName(CellOrder o) { diff --git a/internal/output_sse.h b/internal/output_sse.h index 75aebfd..6ea3290 100644 --- a/internal/output_sse.h +++ b/internal/output_sse.h @@ -535,6 +535,27 @@ struct StoreFinalOutputImpl<RegBlockUint8<8, 8>, DstType> { } }; +// Specialization for MatrixMap, for performance. +template <typename tScalar, MapOrder tOrder> +struct StoreFinalOutputImpl<RegBlockUint8<8, 8>, MatrixMap<tScalar, tOrder>> { + static void Run(const RegBlockUint8<8, 8>& src, + MatrixMap<tScalar, tOrder>* dst, int row, int col) { + std::uint8_t buf[64]; + StoreUint8x16(buf, src.buf.reg[0]); + StoreUint8x16(buf + 16, src.buf.reg[1]); + StoreUint8x16(buf + 32, src.buf.reg[2]); + StoreUint8x16(buf + 48, src.buf.reg[3]); + // Make a local copy so that the compiler can prove that data_ does not + // alias &data_ or &stride_. + MatrixMap<tScalar, tOrder> local = *dst; + for (int c = 0; c < 8; c++) { + for (int r = 0; r < 8; r++) { + *local.data(row + r, col + c) = buf[r + 8 * c]; + } + } + } +}; + } // namespace gemmlowp #endif // GEMMLOWP_INTERNAL_OUTPUT_SSE_H_ diff --git a/internal/pack.h b/internal/pack.h index 7c43d6e..82f0dd1 100644 --- a/internal/pack.h +++ b/internal/pack.h @@ -143,7 +143,7 @@ template <typename tScalar, SideMapOrder tOrder> class SideMap { public: typedef tScalar Scalar; - static const SideMapOrder kOrder = tOrder; + static constexpr SideMapOrder kOrder = tOrder; SideMap(Scalar* data, int width, int depth, int stride) : data_(data), width_(width), depth_(depth), stride_(stride) {} @@ -214,13 +214,13 @@ class PackingRegisterBlockBase { typedef typename KernelSideFormat::Cell CellFormat; typedef typename KernelSideFormat::InputScalar KernelInputScalar; typedef typename KernelSideFormat::Scalar KernelScalar; - static const int kCells = KernelSideFormat::kCells; - static const int kCellWidth = CellFormat::kWidth; - static const int kKernelWidth = CellFormat::kWidth * kCells; - static const int kCellDepth = CellFormat::kDepth; - static const int kCellSize = CellFormat::kSize; - static const SideMapOrder kSrcOrder = SrcMapType::kOrder; - static const int kZeroPointInputValue = + static constexpr int kCells = KernelSideFormat::kCells; + static constexpr int kCellWidth = CellFormat::kWidth; + static constexpr int kKernelWidth = CellFormat::kWidth * kCells; + static constexpr int kCellDepth = CellFormat::kDepth; + static constexpr int kCellSize = CellFormat::kSize; + static constexpr SideMapOrder kSrcOrder = SrcMapType::kOrder; + static constexpr int kZeroPointInputValue = ZeroPointInputValue<KernelInputScalar, KernelScalar>::kValue; PackingRegisterBlockBase() : complete_src_(nullptr, 0, 0, 0) {} @@ -302,10 +302,10 @@ class PackSideBlockImpl { public: typedef typename PackedSideBlock::KernelSideFormat KernelSideFormat; typedef typename KernelSideFormat::Cell CellFormat; - static const int kCells = KernelSideFormat::kCells; - static const int kCellWidth = CellFormat::kWidth; - static const int kKernelWidth = CellFormat::kWidth * kCells; - static const int kCellDepth = CellFormat::kDepth; + static constexpr int kCells = KernelSideFormat::kCells; + static constexpr int kCellWidth = CellFormat::kWidth; + static constexpr int kKernelWidth = CellFormat::kWidth * kCells; + static constexpr int kCellDepth = CellFormat::kDepth; typedef PackingRegisterBlock<SrcMapType, PackedSideBlock> PackingRegisterBlockType; diff --git a/internal/pack_sse.h b/internal/pack_sse.h index 52163c4..b729014 100644 --- a/internal/pack_sse.h +++ b/internal/pack_sse.h @@ -41,11 +41,11 @@ class PackingRegisterBlock< public: typedef WidthMajorSideFormatNCells4x2<Cells> KernelSideFormat; typedef typename KernelSideFormat::Cell CellFormat; - static const int kCells = KernelSideFormat::kCells; - static const int kCellWidth = CellFormat::kWidth; - static const int kKernelWidth = CellFormat::kWidth * kCells; - static const int kCellDepth = CellFormat::kDepth; - static const int kCellSize = CellFormat::kSize; + static constexpr int kCells = KernelSideFormat::kCells; + static constexpr int kCellWidth = CellFormat::kWidth; + static constexpr int kKernelWidth = CellFormat::kWidth * kCells; + static constexpr int kCellDepth = CellFormat::kDepth; + static constexpr int kCellSize = CellFormat::kSize; void Pack(PackedSideBlock<KernelSideFormat>* dst, int start_width) { std::uint8_t* dst_ptr = dst->current_data(); diff --git a/internal/platform.h b/internal/platform.h index 54517c3..0f3a2b8 100644 --- a/internal/platform.h +++ b/internal/platform.h @@ -30,8 +30,7 @@ #include <sys/time.h> #endif -#if defined __ANDROID__ -#include <android/api-level.h> +#if defined ANDROID || defined __ANDROID__ #include <malloc.h> // The 18 here should be 16, but has to be 18 for now due // to a Google-internal issue. |