diff options
author | Nigel Tao <nigeltao@google.com> | 2018-11-08 10:47:24 +1100 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2018-11-08 18:15:10 +0000 |
commit | 0185b9578dbb2bd4b99b1a79e39771a5ccd44ba8 (patch) | |
tree | c9d3d7fa71c07683091fc98cfdf1c907707f6f8c /src/codec | |
parent | 9d4f4506184791fd085edb05947ee6c1189a666c (diff) | |
download | skqp-0185b9578dbb2bd4b99b1a79e39771a5ccd44ba8.tar.gz |
Have SkWuffsCodec use a SkSwizzler
To measure the performance impact, I patched in
https://skia-review.googlesource.com/c/skia/+/162980 "Add
bench/DecodeBench.cpp" and took the best time (the "min" column):
gcc 7.3, x86_64 Broadwell
Other Before After
11.6ms 7.58ms 5.01ms Decode_GIF_test640x479.gif
21.0ms 15.8ms 11.9ms Decode_GIF_flightAnim.gif
79.8µs 60.1µs 46.6µs Decode_GIF_color_wheel.gif
As ratios:
Other Before After
2.31x 1.51x 1.00x Decode_GIF_test640x479.gif
1.76x 1.33x 1.00x Decode_GIF_flightAnim.gif
1.71x 1.29x 1.00x Decode_GIF_color_wheel.gif
Other is with skia_use_wuffs=false
Before is with skia_use_wuffs=true, before this CL
After is with skia_use_wuffs=true, after this CL
Bug: skia:8235
Change-Id: I7a1df172d172ccd668dfcbeb9728ee053fe831b1
Reviewed-on: https://skia-review.googlesource.com/c/165640
Reviewed-by: Leon Scroggins <scroggo@google.com>
Commit-Queue: Leon Scroggins <scroggo@google.com>
Diffstat (limited to 'src/codec')
-rw-r--r-- | src/codec/SkWuffsCodec.cpp | 221 |
1 files changed, 140 insertions, 81 deletions
diff --git a/src/codec/SkWuffsCodec.cpp b/src/codec/SkWuffsCodec.cpp index cb647487bf..4d2bbe7e7a 100644 --- a/src/codec/SkWuffsCodec.cpp +++ b/src/codec/SkWuffsCodec.cpp @@ -10,41 +10,12 @@ #include "../private/SkMalloc.h" #include "SkFrameHolder.h" #include "SkSampler.h" +#include "SkSwizzler.h" +#include "SkUtils.h" #include "wuffs-v0.2.h" #define SK_WUFFS_CODEC_BUFFER_SIZE 4096 -// TODO(nigeltao): use a swizzler instead of load_u32le and store_etc. - -static inline uint32_t load_u32le(uint8_t* p) { - return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) | ((uint32_t)(p[2]) << 16) | - ((uint32_t)(p[3]) << 24); -} - -static inline void store_u32le(uint8_t* p, uint32_t x) { - p[0] = x >> 0; - p[1] = x >> 8; - p[2] = x >> 16; - p[3] = x >> 24; -} - -static inline void store_u32le_switched(uint8_t* p, uint32_t x) { - // This could probably be optimized, but in any case, we should use a - // swizzler. - p[0] = x >> 16; - p[1] = x >> 8; - p[2] = x >> 0; - p[3] = x >> 24; -} - -static inline void store_565(uint8_t* p, uint32_t argb) { - uint32_t r5 = 0x1F & (argb >> ((8 - 5) + 16)); - uint32_t g6 = 0x3F & (argb >> ((8 - 6) + 8)); - uint32_t b5 = 0x1F & (argb >> ((8 - 5) + 0)); - p[0] = (b5 << 0) | (g6 << 5); - p[1] = (g6 >> 3) | (r5 << 3); -} - static bool fill_buffer(wuffs_base__io_buffer* b, SkStream* s) { b->compact(); size_t num_read = s->read(b->data.ptr + b->meta.wi, b->data.len - b->meta.wi); @@ -133,6 +104,47 @@ private: typedef SkFrameHolder INHERITED; }; +// SkWuffsSpySampler is a placeholder SkSampler implementation. The Skia API +// expects to manipulate the codec's sampler (i.e. call setSampleX and +// setSampleY) in between the startIncrementalDecode (SID) and +// incrementalDecode (ID) calls. But creating the SkSwizzler (the real sampler) +// requires knowing the destination buffer's dimensions, i.e. the animation +// frame's width and height. That width and height are decoded in ID, not SID. +// +// To break that circle, the SkWuffsSpySampler always exists, so its methods +// can be called between SID and ID. It doesn't actually do any sampling, it +// merely records the arguments given to setSampleX (explicitly) and setSampleY +// (implicitly, via the superclass' implementation). Inside ID, those recorded +// arguments are forwarded on to the SkSwizzler (the real sampler) when that +// SkSwizzler is created, after the frame width and height are known. +// +// Roughly speaking, the SkWuffsSpySampler is an eager proxy for the lazily +// constructed real sampler. But that laziness is out of necessity. +// +// The "Spy" name is because it records its arguments. See +// https://martinfowler.com/articles/mocksArentStubs.html#TheDifferenceBetweenMocksAndStubs +class SkWuffsSpySampler final : public SkSampler { +public: + SkWuffsSpySampler(int imageWidth) + : INHERITED(), fFillWidth(0), fImageWidth(imageWidth), fSampleX(1) {} + + void reset(); + int sampleX() const; + + int fFillWidth; + +private: + // SkSampler overrides. + int fillWidth() const override; + int onSetSampleX(int sampleX) override; + + const int fImageWidth; + + int fSampleX; + + typedef SkSampler INHERITED; +}; + class SkWuffsCodec final : public SkCodec { public: SkWuffsCodec(SkEncodedInfo&& encodedInfo, @@ -160,6 +172,7 @@ private: int onGetFrameCount() override; bool onGetFrameInfo(int, FrameInfo*) const override; int onGetRepetitionCount() override; + SkSampler* getSampler(bool createIfNecessary) override; void readFrames(); Result seekFrame(int frameIndex); @@ -169,6 +182,7 @@ private: const char* decodeFrame(); void updateNumFullyReceivedFrames(); + SkWuffsSpySampler fSpySampler; SkWuffsFrameHolder fFrameHolder; std::unique_ptr<SkStream> fStream; std::unique_ptr<wuffs_gif__decoder, decltype(&sk_free)> fDecoder; @@ -182,10 +196,12 @@ private: wuffs_base__io_buffer fIOBuffer; // Incremental decoding state. - SkColorType fIncrDecColorType; - uint8_t* fIncrDecDst; - bool fIncrDecHaveFrameConfig; - size_t fIncrDecRowBytes; + uint8_t* fIncrDecDst; + bool fIncrDecHaveFrameConfig; + size_t fIncrDecRowBytes; + + std::unique_ptr<SkSwizzler> fSwizzler; + SkPMColor fColorTable[256]; uint64_t fNumFullyReceivedFrames; std::vector<SkWuffsFrame> fFrames; @@ -252,6 +268,27 @@ const SkFrame* SkWuffsFrameHolder::onGetFrame(int i) const { return fCodec->frame(i); }; +// -------------------------------- SkWuffsSpySampler implementation + +void SkWuffsSpySampler::reset() { + fFillWidth = 0; + fSampleX = 1; + this->setSampleY(1); +} + +int SkWuffsSpySampler::sampleX() const { + return fSampleX; +} + +int SkWuffsSpySampler::fillWidth() const { + return fFillWidth; +} + +int SkWuffsSpySampler::onSetSampleX(int sampleX) { + fSampleX = sampleX; + return get_scaled_dimension(fImageWidth, sampleX); +} + // -------------------------------- SkWuffsCodec implementation SkWuffsCodec::SkWuffsCodec(SkEncodedInfo&& encodedInfo, @@ -269,6 +306,8 @@ SkWuffsCodec::SkWuffsCodec(SkEncodedInfo&& // manage the stream ourselves, as the default SkCodec behavior // is too trigger-happy on rewinding the stream. nullptr), + fSpySampler(imgcfg.pixcfg.width()), + fFrameHolder(), fStream(std::move(stream)), fDecoder(std::move(dec)), fPixbufPtr(std::move(pixbuf_ptr)), @@ -278,14 +317,15 @@ SkWuffsCodec::SkWuffsCodec(SkEncodedInfo&& fFrameConfig((wuffs_base__frame_config){}), fPixelBuffer(pixbuf), fIOBuffer((wuffs_base__io_buffer){}), - fIncrDecColorType(kUnknown_SkColorType), fIncrDecDst(nullptr), fIncrDecHaveFrameConfig(false), fIncrDecRowBytes(0), + fSwizzler(nullptr), fNumFullyReceivedFrames(0), fFramesComplete(false), fDecoderIsSuspended(false) { fFrameHolder.init(this, imgcfg.pixcfg.width(), imgcfg.pixcfg.height()); + sk_memset32(fColorTable, 0, SK_ARRAY_COUNT(fColorTable)); // Initialize fIOBuffer's fields, copying any outstanding data from iobuf to // fIOBuffer, as iobuf's backing array may not be valid for the lifetime of @@ -340,12 +380,11 @@ SkCodec::Result SkWuffsCodec::onStartIncrementalDecode(const SkImageInfo& d return result; } - SkSampler::Fill(dstInfo, dst, rowBytes, options.fZeroInitialized); - - fIncrDecColorType = dstInfo.colorType(); + fSpySampler.reset(); fIncrDecDst = static_cast<uint8_t*>(dst); fIncrDecHaveFrameConfig = false; fIncrDecRowBytes = rowBytes; + fSwizzler = nullptr; return SkCodec::kSuccess; } @@ -379,51 +418,57 @@ SkCodec::Result SkWuffsCodec::onIncrementalDecode(int* rowsDecoded) { return SkCodec::kErrorInInput; } - // TODO(nigeltao): use a swizzler, once I figure out how it works. For - // now, a C style load/store loop gets the job done. wuffs_base__rect_ie_u32 r = fFrameConfig.bounds(); - wuffs_base__table_u8 pixels = fPixelBuffer.plane(0); - wuffs_base__slice_u8 palette = fPixelBuffer.palette(); + if (!fSwizzler) { + SkIRect swizzleRect = SkIRect::MakeLTRB(r.min_incl_x, 0, r.max_excl_x, 1); + fSwizzler = SkSwizzler::Make(this->getEncodedInfo(), fColorTable, dstInfo(), Options(), + &swizzleRect); + fSwizzler->setSampleX(fSpySampler.sampleX()); + fSwizzler->setSampleY(fSpySampler.sampleY()); + + auto fillInfo = dstInfo().makeWH( + fSwizzler->fillWidth(), get_scaled_dimension(dstInfo().height(), fSwizzler->sampleY())); + SkSampler::Fill(fillInfo, fIncrDecDst, fIncrDecRowBytes, options().fZeroInitialized); + } + + wuffs_base__slice_u8 palette = fPixelBuffer.palette(); SkASSERT(palette.len == 4 * 256); - switch (fIncrDecColorType) { - case kRGB_565_SkColorType: - for (uint32_t y = r.min_incl_y; y < r.max_excl_y; y++) { - uint8_t* d = fIncrDecDst + (y * fIncrDecRowBytes) + (r.min_incl_x * 2); - uint8_t* s = pixels.ptr + (y * pixels.stride) + (r.min_incl_x * 1); - for (uint32_t x = r.min_incl_x; x < r.max_excl_x; x++) { - uint8_t index = *s++; - uint32_t argb = load_u32le(palette.ptr + 4 * static_cast<size_t>(index)); - store_565(d, argb); - d += 2; - } - } - break; - case kBGRA_8888_SkColorType: - for (uint32_t y = r.min_incl_y; y < r.max_excl_y; y++) { - uint8_t* d = fIncrDecDst + (y * fIncrDecRowBytes) + (r.min_incl_x * 4); - uint8_t* s = pixels.ptr + (y * pixels.stride) + (r.min_incl_x * 1); - for (uint32_t x = r.min_incl_x; x < r.max_excl_x; x++) { - uint8_t index = *s++; - uint32_t argb = load_u32le(palette.ptr + 4 * static_cast<size_t>(index)); - store_u32le(d, argb); - d += 4; - } + auto proc = choose_pack_color_proc(false, dstInfo().colorType()); + for (int i = 0; i < 256; i++) { + uint8_t* p = palette.ptr + 4 * i; + fColorTable[i] = proc(p[3], p[2], p[1], p[0]); + } + + wuffs_base__table_u8 pixels = fPixelBuffer.plane(0); + const int sampleY = fSwizzler->sampleY(); + const int scaledHeight = get_scaled_dimension(dstInfo().height(), sampleY); + for (uint32_t y = r.min_incl_y; y < r.max_excl_y; y++) { + // In Wuffs, a paletted image is always 1 byte per pixel. + static constexpr size_t src_bpp = 1; + + int dstY = y; + if (sampleY != 1) { + if (!fSwizzler->rowNeeded(y)) { + continue; } - break; - case kRGBA_8888_SkColorType: - for (uint32_t y = r.min_incl_y; y < r.max_excl_y; y++) { - uint8_t* d = fIncrDecDst + (y * fIncrDecRowBytes) + (r.min_incl_x * 4); - uint8_t* s = pixels.ptr + (y * pixels.stride) + (r.min_incl_x * 1); - for (uint32_t x = r.min_incl_x; x < r.max_excl_x; x++) { - uint8_t index = *s++; - uint32_t argb = load_u32le(palette.ptr + 4 * static_cast<size_t>(index)); - store_u32le_switched(d, argb); - d += 4; - } + dstY /= sampleY; + if (dstY >= scaledHeight) { + break; } - break; - default: - return SkCodec::kUnimplemented; + } + + // We don't adjust d by (r.min_incl_x * dst_bpp) as we have already + // accounted for that in swizzleRect, above. + uint8_t* d = fIncrDecDst + (dstY * fIncrDecRowBytes); + + // The Wuffs model is that the dst buffer is the image, not the frame. + // The expectation is that you allocate the buffer once, but re-use it + // for the N frames, regardless of each frame's top-left co-ordinate. + // + // To get from the start (in the X-direction) of the image to the start + // of the frame, we adjust s by (r.min_incl_x * src_bpp). + uint8_t* s = pixels.ptr + (y * pixels.stride) + (r.min_incl_x * src_bpp); + fSwizzler->swizzle(d, s); } // The semantics of *rowsDecoded is: say you have a 10 pixel high image @@ -444,10 +489,14 @@ SkCodec::Result SkWuffsCodec::onIncrementalDecode(int* rowsDecoded) { } if (result == SkCodec::kSuccess) { - fIncrDecColorType = kUnknown_SkColorType; + fSpySampler.reset(); fIncrDecDst = nullptr; fIncrDecHaveFrameConfig = false; fIncrDecRowBytes = 0; + fSwizzler = nullptr; + } else { + // Make fSpySampler return whatever fSwizzler would have for fillWidth. + fSpySampler.fFillWidth = fSwizzler->fillWidth(); } return result; } @@ -484,6 +533,16 @@ int SkWuffsCodec::onGetRepetitionCount() { return n < INT_MAX ? n : INT_MAX; } +SkSampler* SkWuffsCodec::getSampler(bool createIfNecessary) { + // fIncrDst being non-nullptr means that we are between an + // onStartIncrementalDecode call and the matching final (successful) + // onIncrementalDecode call. + if (createIfNecessary || fIncrDecDst) { + return &fSpySampler; + } + return nullptr; +} + void SkWuffsCodec::readFrames() { size_t n = fFrames.size(); int i = n ? n - 1 : 0; |