From 070e01056acdd9980619e71e2da390efb94e912e Mon Sep 17 00:00:00 2001 From: kkinnunen Date: Thu, 21 May 2015 00:37:30 -0700 Subject: Improve caching of special case paths in GrStencilAndCoverPathRenderer Cache lines and oval paths in their own cache domains. Skia has many hard-to-replace codepaths that create volatile paths out of lines or ovals. Results for amd64: desk_tigersvg.skp_1 3.06ms -> 3.07ms 1x tabl_cnet.skp_1 2.3ms -> 2.3ms 1x desk_baidu.skp_1 8.99ms -> 8.98ms 1x desk_weather.skp_1 4.58ms -> 4.57ms 1x desk_samoasvg.skp_1 12.3ms -> 12.3ms 1x tabl_gamedeksiam.skp_1 15.8ms -> 15.7ms 1x desk_chalkboard.skp_1 14.9ms -> 14.9ms 1x desk_mapsvg.skp_1 6.57ms -> 6.54ms 1x desk_wordpress.skp_1 2.2ms -> 2.19ms 1x tabl_slashdot.skp_1 6.91ms -> 6.84ms 0.99x desk_linkedin.skp_1 7.7ms -> 7.58ms 0.98x desk_googlespreadsheet.skp_1 58.7ms -> 57.7ms 0.98x tabl_ukwsj.skp_1 7.67ms -> 7.53ms 0.98x tabl_engadget.skp_1 4.71ms -> 4.61ms 0.98x desk_carsvg.skp_1 116ms -> 114ms 0.98x tabl_nytimes.skp_1 3.71ms -> 3.61ms 0.97x desk_googlespreadsheetdashed.skp_1 25.8ms -> 24.9ms 0.97x tabl_hsfi.skp_1 5.3ms -> 5.1ms 0.96x tabl_techmeme.skp_1 2.9ms -> 2.8ms 0.96x tabl_cnn.skp_1 6.88ms -> 6.62ms 0.96x desk_espn.skp_1 9.12ms -> 8.64ms 0.95x desk_gws.skp_1 5.15ms -> 4.88ms 0.95x tabl_gspro.skp_1 4.48ms -> 4.2ms 0.94x desk_yahooanswers.skp_1 6.84ms -> 6.3ms 0.92x tabl_pravda.skp_1 11.1ms -> 10.1ms 0.91x tabl_sahadan.skp_1 11.8ms -> 10.7ms 0.91x desk_wowwiki.skp_1 8.48ms -> 7.58ms 0.89x tabl_googleblog.skp_1 6ms -> 5.34ms 0.89x tabl_worldjournal.skp_1 6.08ms -> 5.35ms 0.88x desk_booking.skp_1 15.3ms -> 13.4ms 0.88x tabl_androidpolice.skp_1 15.5ms -> 13.5ms 0.87x desk_twitter.skp_1 12.2ms -> 10.6ms 0.87x tabl_nofolo.skp_1 5.49ms -> 4.76ms 0.87x desk_mobilenews.skp_1 22ms -> 18.9ms 0.86x desk_forecastio.skp_1 9.47ms -> 8.05ms 0.85x tabl_culturalsolutions.skp_1 6.21ms -> 5.28ms 0.85x desk_youtube.skp_1 16.1ms -> 13.5ms 0.84x tabl_mlb.skp_1 9.75ms -> 8.01ms 0.82x tabl_digg.skp_1 5.2ms -> 4.22ms 0.81x desk_blogger.skp_1 10.2ms -> 8.24ms 0.81x desk_gmailthread.skp_1 26.8ms -> 21.6ms 0.81x desk_googleplus.skp_1 10.5ms -> 8.39ms 0.8x tabl_frantzen.skp_1 4.55ms -> 3.58ms 0.79x desk_pinterest.skp_1 8.85ms -> 6.88ms 0.78x desk_ebay.skp_1 10.5ms -> 8.15ms 0.77x tabl_transformice.skp_1 4.93ms -> 3.5ms 0.71x Results for arm_v7_neon: desk_samoasvg.skp_1 13.9ms -> 14.6ms 1.05x desk_mapsvg.skp_1 8.31ms -> 8.75ms 1.05x tabl_deviantart.skp_1 1.41ms -> 1.45ms 1.02x desk_weather.skp_1 3.8ms -> 3.88ms 1.02x desk_sfgate.skp_1 3.06ms -> 3.1ms 1.01x desk_css3gradients.skp_1 2.78ms -> 2.79ms 1x desk_espn.skp_1 6.52ms -> 6.43ms 0.99x desk_gws.skp_1 4.16ms -> 4.09ms 0.98x tabl_cnn.skp_1 4.66ms -> 4.58ms 0.98x tabl_hsfi.skp_1 3.49ms -> 3.42ms 0.98x tabl_cuteoverload.skp_1 2.41ms -> 2.35ms 0.98x desk_yahooanswers.skp_1 5.28ms -> 5.14ms 0.97x desk_carsvg.skp_1 90.8ms -> 87.9ms 0.97x tabl_gspro.skp_1 2.81ms -> 2.71ms 0.96x desk_wowwiki.skp_1 5.85ms -> 5.63ms 0.96x tabl_pravda.skp_1 7.8ms -> 7.5ms 0.96x desk_twitter.skp_1 8.14ms -> 7.8ms 0.96x tabl_androidpolice.skp_1 10.4ms -> 9.96ms 0.96x tabl_googleblog.skp_1 4.06ms -> 3.83ms 0.95x desk_mobilenews.skp_1 15.2ms -> 14ms 0.93x desk_booking.skp_1 9.89ms -> 9.08ms 0.92x desk_forecastio.skp_1 6.16ms -> 5.65ms 0.92x desk_blogger.skp_1 6.17ms -> 5.66ms 0.92x tabl_digg.skp_1 3.73ms -> 3.41ms 0.91x tabl_nofolo.skp_1 3.82ms -> 3.47ms 0.91x tabl_worldjournal.skp_1 4.24ms -> 3.84ms 0.9x desk_youtube.skp_1 10.5ms -> 9.39ms 0.9x desk_googleplus.skp_1 7.01ms -> 6.19ms 0.88x tabl_mlb.skp_1 5.91ms -> 5.22ms 0.88x tabl_googlecalendar.skp_1 10.7ms -> 9.44ms 0.88x desk_gmailthread.skp_1 19.2ms -> 16.8ms 0.88x desk_ebay.skp_1 5.68ms -> 4.93ms 0.87x desk_pinterest.skp_1 5.99ms -> 5.08ms 0.85x desk_googlehome.skp_1 3.31ms -> 2.71ms 0.82x tabl_transformice.skp_1 3.03ms -> 2.44ms 0.81x desk_amazon.skp_1 6.05ms -> 4.84ms 0.8x desk_facebook.skp_1 12.6ms -> 9.62ms 0.76x Review URL: https://codereview.chromium.org/1120023005 --- src/gpu/GrPath.cpp | 162 +++++++++++++++++++++++++++++- src/gpu/GrPath.h | 3 +- src/gpu/GrStencilAndCoverPathRenderer.cpp | 7 +- 3 files changed, 165 insertions(+), 7 deletions(-) diff --git a/src/gpu/GrPath.cpp b/src/gpu/GrPath.cpp index e76bdf2466..5b75683628 100644 --- a/src/gpu/GrPath.cpp +++ b/src/gpu/GrPath.cpp @@ -7,14 +7,168 @@ #include "GrPath.h" -void GrPath::ComputeKey(const SkPath& path, const GrStrokeInfo& stroke, GrUniqueKey* key) { - static const GrUniqueKey::Domain kPathDomain = GrUniqueKey::GenerateDomain(); +namespace { +// Verb count limit for generating path key from content of a volatile path. +// The value should accomodate at least simple rects and rrects. +static const int kSimpleVolatilePathVerbLimit = 10; + +inline static bool compute_key_for_line_path(const SkPath& path, const GrStrokeInfo& stroke, + GrUniqueKey* key) { + SkPoint pts[2]; + if (!path.isLine(pts)) { + return false; + } + SK_COMPILE_ASSERT((sizeof(pts) % sizeof(uint32_t)) == 0 && sizeof(pts) > sizeof(uint32_t), + pts_needs_padding); + + const int kBaseData32Cnt = 1 + sizeof(pts) / sizeof(uint32_t); + int strokeDataCnt = stroke.computeUniqueKeyFragmentData32Cnt(); + static const GrUniqueKey::Domain kOvalPathDomain = GrUniqueKey::GenerateDomain(); + GrUniqueKey::Builder builder(key, kOvalPathDomain, kBaseData32Cnt + strokeDataCnt); + builder[0] = path.getFillType(); + memcpy(&builder[1], &pts, sizeof(pts)); + if (strokeDataCnt > 0) { + stroke.asUniqueKeyFragment(&builder[kBaseData32Cnt]); + } + return true; +} + +inline static bool compute_key_for_oval_path(const SkPath& path, const GrStrokeInfo& stroke, + GrUniqueKey* key) { + SkRect rect; + if (!path.isOval(&rect)) { + return false; + } + SK_COMPILE_ASSERT((sizeof(rect) % sizeof(uint32_t)) == 0 && sizeof(rect) > sizeof(uint32_t), + rect_needs_padding); + + const int kBaseData32Cnt = 1 + sizeof(rect) / sizeof(uint32_t); int strokeDataCnt = stroke.computeUniqueKeyFragmentData32Cnt(); - GrUniqueKey::Builder builder(key, kPathDomain, 2 + strokeDataCnt); + static const GrUniqueKey::Domain kOvalPathDomain = GrUniqueKey::GenerateDomain(); + GrUniqueKey::Builder builder(key, kOvalPathDomain, kBaseData32Cnt + strokeDataCnt); + builder[0] = path.getFillType(); + memcpy(&builder[1], &rect, sizeof(rect)); + if (strokeDataCnt > 0) { + stroke.asUniqueKeyFragment(&builder[kBaseData32Cnt]); + } + return true; +} + +// Encodes the full path data to the unique key for very small, volatile paths. This is typically +// hit when clipping stencils the clip stack. Intention is that this handles rects too, since +// SkPath::isRect seems to do non-trivial amount of work. +inline static bool compute_key_for_simple_path(const SkPath& path, const GrStrokeInfo& stroke, + GrUniqueKey* key) { + if (!path.isVolatile()) { + return false; + } + + // The check below should take care of negative values casted positive. + const int verbCnt = path.countVerbs(); + if (verbCnt > kSimpleVolatilePathVerbLimit) { + return false; + } + + // If somebody goes wild with the constant, it might cause an overflow. + SK_COMPILE_ASSERT(kSimpleVolatilePathVerbLimit <= 100, + big_simple_volatile_path_verb_limit_may_cause_overflow); + + const int pointCnt = path.countPoints(); + if (pointCnt < 0) { + SkASSERT(false); + return false; + } + + // Construct counts that align as uint32_t counts. +#define ARRAY_DATA32_COUNT(array_type, count) \ + static_cast((((count) * sizeof(array_type) + sizeof(uint32_t) - 1) / sizeof(uint32_t))) + + const int verbData32Cnt = ARRAY_DATA32_COUNT(uint8_t, verbCnt); + const int pointData32Cnt = ARRAY_DATA32_COUNT(SkPoint, pointCnt); + +#undef ARRAY_DATA32_COUNT + + // The unique key data is a "message" with following fragments: + // 0) domain, key length, uint32_t for fill type and uint32_t for verbCnt + // (fragment 0, fixed size) + // 1) verb and point data (varying size) + // 2) stroke data (varying size) + + const int baseData32Cnt = 2 + verbData32Cnt + pointData32Cnt; + const int strokeDataCnt = stroke.computeUniqueKeyFragmentData32Cnt(); + static const GrUniqueKey::Domain kSimpleVolatilePathDomain = GrUniqueKey::GenerateDomain(); + GrUniqueKey::Builder builder(key, kSimpleVolatilePathDomain, baseData32Cnt + strokeDataCnt); + int i = 0; + builder[i++] = path.getFillType(); + + // Serialize the verbCnt to make the whole message unambiguous. + // We serialize two variable length fragments to the message: + // * verb and point data (fragment 1) + // * stroke data (fragment 2) + // "Proof:" + // Verb count establishes unambiguous verb data. + // Unambiguous verb data establishes unambiguous point data, making fragment 1 unambiguous. + // Unambiguous fragment 1 establishes unambiguous fragment 2, since the length of the message + // has been established. + + builder[i++] = SkToU32(verbCnt); // The path limit is compile-asserted above, so the cast is ok. + + // Fill the last uint32_t with 0 first, since the last uint8_ts of the uint32_t may be + // uninitialized. This does not produce ambiguous verb data, since we have serialized the exact + // verb count. + if (verbData32Cnt != static_cast((verbCnt * sizeof(uint8_t) / sizeof(uint32_t)))) { + builder[i + verbData32Cnt - 1] = 0; + } + path.getVerbs(reinterpret_cast(&builder[i]), verbCnt); + i += verbData32Cnt; + + SK_COMPILE_ASSERT(((sizeof(SkPoint) % sizeof(uint32_t)) == 0) && + sizeof(SkPoint) > sizeof(uint32_t), skpoint_array_needs_padding); + + // Here we assume getPoints does a memcpy, so that we do not need to worry about the alignment. + path.getPoints(reinterpret_cast(&builder[i]), pointCnt); + SkDEBUGCODE(i += pointData32Cnt); + + SkASSERT(i == baseData32Cnt); + if (strokeDataCnt > 0) { + stroke.asUniqueKeyFragment(&builder[baseData32Cnt]); + } + return true; +} + +inline static void compute_key_for_general_path(const SkPath& path, const GrStrokeInfo& stroke, + GrUniqueKey* key) { + const int kBaseData32Cnt = 2; + int strokeDataCnt = stroke.computeUniqueKeyFragmentData32Cnt(); + static const GrUniqueKey::Domain kGeneralPathDomain = GrUniqueKey::GenerateDomain(); + GrUniqueKey::Builder builder(key, kGeneralPathDomain, kBaseData32Cnt + strokeDataCnt); builder[0] = path.getGenerationID(); builder[1] = path.getFillType(); if (strokeDataCnt > 0) { - stroke.asUniqueKeyFragment(&builder[2]); + stroke.asUniqueKeyFragment(&builder[kBaseData32Cnt]); + } +} + +} + +void GrPath::ComputeKey(const SkPath& path, const GrStrokeInfo& stroke, GrUniqueKey* key, + bool* outIsVolatile) { + if (compute_key_for_line_path(path, stroke, key)) { + *outIsVolatile = false; + return; } + + if (compute_key_for_oval_path(path, stroke, key)) { + *outIsVolatile = false; + return; + } + + if (compute_key_for_simple_path(path, stroke, key)) { + *outIsVolatile = false; + return; + } + + compute_key_for_general_path(path, stroke, key); + *outIsVolatile = path.isVolatile(); } diff --git a/src/gpu/GrPath.h b/src/gpu/GrPath.h index a535e697ca..91975218c2 100644 --- a/src/gpu/GrPath.h +++ b/src/gpu/GrPath.h @@ -30,7 +30,8 @@ public: { } - static void ComputeKey(const SkPath& path, const GrStrokeInfo& stroke, GrUniqueKey* key); + static void ComputeKey(const SkPath& path, const GrStrokeInfo& stroke, GrUniqueKey* key, + bool* outIsVolatile); const SkRect& getBounds() const { return fBounds; } diff --git a/src/gpu/GrStencilAndCoverPathRenderer.cpp b/src/gpu/GrStencilAndCoverPathRenderer.cpp index e60bea215a..97b20773a7 100644 --- a/src/gpu/GrStencilAndCoverPathRenderer.cpp +++ b/src/gpu/GrStencilAndCoverPathRenderer.cpp @@ -75,12 +75,15 @@ GrStencilAndCoverPathRenderer::onGetStencilSupport(const GrDrawTarget*, static GrPath* get_gr_path(GrGpu* gpu, const SkPath& skPath, const GrStrokeInfo& stroke) { GrContext* ctx = gpu->getContext(); GrUniqueKey key; - GrPath::ComputeKey(skPath, stroke, &key); + bool isVolatile; + GrPath::ComputeKey(skPath, stroke, &key, &isVolatile); SkAutoTUnref path( static_cast(ctx->resourceProvider()->findAndRefResourceByUniqueKey(key))); if (NULL == path) { path.reset(gpu->pathRendering()->createPath(skPath, stroke)); - ctx->resourceProvider()->assignUniqueKeyToResource(key, path); + if (!isVolatile) { + ctx->resourceProvider()->assignUniqueKeyToResource(key, path); + } } else { SkASSERT(path->isEqualTo(skPath, stroke)); } -- cgit v1.2.3