diff options
author | Lev Proleev <levp@google.com> | 2021-02-26 21:44:39 +0000 |
---|---|---|
committer | Lev Proleev <levp@google.com> | 2021-02-26 22:17:12 +0000 |
commit | 123f384187504585be3fe01002381dd459c17d96 (patch) | |
tree | a29716289a0b730ca66a3e632c6ce054eb3b90d6 /internal/output_sse.h | |
parent | 8dd5f1b93261d6ea0fe0c8e51d13f89657ceb0b8 (diff) | |
download | gemmlowp-123f384187504585be3fe01002381dd459c17d96.tar.gz |
Update gemmlowp to 13d57703abca3005d97b19df1f2db731607a7dc2
An updated is needed after TF Lite rebase.
Bug: 178609672
Test: mma, NeuralNetworksStatic_test
Change-Id: Ia7f04fc5b6bd760549395854618d8b20f5c8d228
Diffstat (limited to 'internal/output_sse.h')
-rw-r--r-- | internal/output_sse.h | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/internal/output_sse.h b/internal/output_sse.h index 75aebfd..6ea3290 100644 --- a/internal/output_sse.h +++ b/internal/output_sse.h @@ -535,6 +535,27 @@ struct StoreFinalOutputImpl<RegBlockUint8<8, 8>, DstType> { } }; +// Specialization for MatrixMap, for performance. +template <typename tScalar, MapOrder tOrder> +struct StoreFinalOutputImpl<RegBlockUint8<8, 8>, MatrixMap<tScalar, tOrder>> { + static void Run(const RegBlockUint8<8, 8>& src, + MatrixMap<tScalar, tOrder>* dst, int row, int col) { + std::uint8_t buf[64]; + StoreUint8x16(buf, src.buf.reg[0]); + StoreUint8x16(buf + 16, src.buf.reg[1]); + StoreUint8x16(buf + 32, src.buf.reg[2]); + StoreUint8x16(buf + 48, src.buf.reg[3]); + // Make a local copy so that the compiler can prove that data_ does not + // alias &data_ or &stride_. + MatrixMap<tScalar, tOrder> local = *dst; + for (int c = 0; c < 8; c++) { + for (int r = 0; r < 8; r++) { + *local.data(row + r, col + c) = buf[r + 8 * c]; + } + } + } +}; + } // namespace gemmlowp #endif // GEMMLOWP_INTERNAL_OUTPUT_SSE_H_ |