aboutsummaryrefslogtreecommitdiff
path: root/internal/output_sse.h
diff options
context:
space:
mode:
authorLev Proleev <levp@google.com>2021-02-26 21:44:39 +0000
committerLev Proleev <levp@google.com>2021-02-26 22:17:12 +0000
commit123f384187504585be3fe01002381dd459c17d96 (patch)
treea29716289a0b730ca66a3e632c6ce054eb3b90d6 /internal/output_sse.h
parent8dd5f1b93261d6ea0fe0c8e51d13f89657ceb0b8 (diff)
downloadgemmlowp-123f384187504585be3fe01002381dd459c17d96.tar.gz
Update gemmlowp to 13d57703abca3005d97b19df1f2db731607a7dc2
An updated is needed after TF Lite rebase. Bug: 178609672 Test: mma, NeuralNetworksStatic_test Change-Id: Ia7f04fc5b6bd760549395854618d8b20f5c8d228
Diffstat (limited to 'internal/output_sse.h')
-rw-r--r--internal/output_sse.h21
1 files changed, 21 insertions, 0 deletions
diff --git a/internal/output_sse.h b/internal/output_sse.h
index 75aebfd..6ea3290 100644
--- a/internal/output_sse.h
+++ b/internal/output_sse.h
@@ -535,6 +535,27 @@ struct StoreFinalOutputImpl<RegBlockUint8<8, 8>, DstType> {
}
};
+// Specialization for MatrixMap, for performance.
+template <typename tScalar, MapOrder tOrder>
+struct StoreFinalOutputImpl<RegBlockUint8<8, 8>, MatrixMap<tScalar, tOrder>> {
+ static void Run(const RegBlockUint8<8, 8>& src,
+ MatrixMap<tScalar, tOrder>* dst, int row, int col) {
+ std::uint8_t buf[64];
+ StoreUint8x16(buf, src.buf.reg[0]);
+ StoreUint8x16(buf + 16, src.buf.reg[1]);
+ StoreUint8x16(buf + 32, src.buf.reg[2]);
+ StoreUint8x16(buf + 48, src.buf.reg[3]);
+ // Make a local copy so that the compiler can prove that data_ does not
+ // alias &data_ or &stride_.
+ MatrixMap<tScalar, tOrder> local = *dst;
+ for (int c = 0; c < 8; c++) {
+ for (int r = 0; r < 8; r++) {
+ *local.data(row + r, col + c) = buf[r + 8 * c];
+ }
+ }
+ }
+};
+
} // namespace gemmlowp
#endif // GEMMLOWP_INTERNAL_OUTPUT_SSE_H_