Merge from Chromium at DEPS revision 269336HEAD master main

This commit was generated by merge_to_master.py. Change-Id: I9d3335dcb08fc53d18bec993b89047b409bd7eb1
author: Torne (Richard Coles) <torne@google.com> 2014-05-14 12:13:15 +0100
committer: Torne (Richard Coles) <torne@google.com> 2014-05-14 12:13:15 +0100
commit: 6b0bad3aba066dc8d7d47a5f6738ebf773ffc2e9 (patch)
tree: ad988be7ad87928b7b98c44f2def4cce7d3330b6
parent: 27ab20dffff01006f5d20fdb2b3f4ea503d69114 (diff)
parent: d60f7edf0fa7eb2eb7c99de486abfe61ad3dcd69 (diff)
download: src-master.tar.gz
147 files changed, 2666 insertions, 1948 deletions
diff --git a/animator/SkAnimateSet.cpp b/animator/SkAnimateSet.cpp
index f153b16b..d146118e 100644
--- a/animator/SkAnimateSet.cpp
+++ b/animator/SkAnimateSet.cpp
@@ -62,7 +62,7 @@ void SkSet::onEndElement(SkAnimateMaker& maker) {
     fReset = dur != 1;
     SkDisplayTypes outType = fFieldInfo->getType();
     int comps = outType == SkType_String || outType == SkType_DynamicString ? 1 :
-        fFieldInfo->getSize((const SkDisplayable*) fTarget) / sizeof(int);
+        (int)fFieldInfo->getSize((const SkDisplayable*) fTarget) / sizeof(int);
     if (fValues.getType() == SkType_Unknown) {
         fValues.setType(outType);
         fValues.setCount(comps);
diff --git a/animator/SkScript2.h b/animator/SkScript2.h
index 33e2af7f..d182e8c7 100644
--- a/animator/SkScript2.h
+++ b/animator/SkScript2.h
@@ -188,8 +188,9 @@ protected:
         Branch() {
         }
 
-        Branch(Op op, int depth, unsigned offset) : fOffset(offset), fOpStackDepth(depth), fOperator(op),
-            fPrimed(kIsNotPrimed), fDone(kIsNotDone) {
+        Branch(Op op, int depth, size_t offset)
+            : fOffset(SkToU16(offset)), fOpStackDepth(depth), fOperator(op)
+            , fPrimed(kIsNotPrimed), fDone(kIsNotDone) {
         }
 
         enum Primed {
diff --git a/animator/SkScriptTokenizer.cpp b/animator/SkScriptTokenizer.cpp
index 8fc5d802..03ffaa4a 100644
--- a/animator/SkScriptTokenizer.cpp
+++ b/animator/SkScriptTokenizer.cpp
@@ -170,7 +170,7 @@ void SkScriptEngine2::addTokenScalar(SkScalar scalar) {
 }
 
 void SkScriptEngine2::addTokenString(const SkString& string) {
-    int size = string.size();
+    int size = SkToInt(string.size());
     addTokenInt(size);
     fActiveStream->write(string.c_str(), size);
 }
@@ -1023,7 +1023,7 @@ void SkScriptEngine2::processLogicalOp(Op op) {
             branch.fOperator = op;
             branch.fDone = Branch::kIsNotDone;
             SkASSERT(branch.fOpStackDepth == fOpStack.count());
-            branch.fOffset = newOffset;
+            branch.fOffset = SkToU16(newOffset);
             fAccumulatorType = SkOperand2::kNoType;
         } break;
         case kLogicalAnd:
diff --git a/core/ARGB32_Clamp_Bilinear_BitmapShader.h b/core/ARGB32_Clamp_Bilinear_BitmapShader.h
deleted file mode 100644
index 87121cfd..00000000
--- a/core/ARGB32_Clamp_Bilinear_BitmapShader.h
+++ /dev/null
@@ -1,177 +0,0 @@
-
-/*
- * Copyright 2011 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-class ARGB32_Clamp_Bilinear_BitmapShader : public SkBitmapShader {
-public:
-    ARGB32_Clamp_Bilinear_BitmapShader(const SkBitmap& src)
-        : SkBitmapShader(src, true,
-                         SkShader::kClamp_TileMode, SkShader::kClamp_TileMode)
-    {}
-
-    virtual void shadeSpan(int x, int y, SkPMColor dstC[], int count);
-};
-
-SkPMColor sample_bilerp(SkFixed fx, SkFixed fy, unsigned srcMaxX, unsigned srcMaxY,
-                        const SkPMColor* srcPixels, int srcRB, const SkFilterPtrProc* proc_table);
-SkPMColor sample_bilerp(SkFixed fx, SkFixed fy, unsigned srcMaxX, unsigned srcMaxY,
-                        const SkPMColor* srcPixels, int srcRB, const SkFilterPtrProc* proc_table)
-{
-    int ix = fx >> 16;
-    int iy = fy >> 16;
-
-    const SkPMColor *p00, *p01, *p10, *p11;
-
-    p00 = p01 = ((const SkPMColor*)((const char*)srcPixels
-                                    + SkClampMax(iy, srcMaxY) * srcRB))
-                                    + SkClampMax(ix, srcMaxX);
-
-    if ((unsigned)ix < srcMaxX)
-        p01 += 1;
-    p10 = p00;
-    p11 = p01;
-    if ((unsigned)iy < srcMaxY)
-    {
-        p10 = (const SkPMColor*)((const char*)p10 + srcRB);
-        p11 = (const SkPMColor*)((const char*)p11 + srcRB);
-    }
-
-    SkFilterPtrProc proc = SkGetBilinearFilterPtrProc(proc_table, fx, fy);
-    return proc(p00, p01, p10, p11);
-}
-
-static inline SkPMColor sample_bilerpx(SkFixed fx, unsigned srcMaxX, const SkPMColor* srcPixels,
-                                       int srcRB, const SkFilterPtrProc* proc_table)
-{
-    int ix = fx >> 16;
-
-    const SkPMColor *p00, *p01, *p10, *p11;
-
-    p00 = p01 = srcPixels + SkClampMax(ix, srcMaxX);
-    if ((unsigned)ix < srcMaxX)
-        p01 += 1;
-
-    p10 = (const SkPMColor*)((const char*)p00 + srcRB);
-    p11 = (const SkPMColor*)((const char*)p01 + srcRB);
-
-    SkFilterPtrProc proc = SkGetBilinearFilterPtrXProc(proc_table, fx);
-    return proc(p00, p01, p10, p11);
-}
-
-void ARGB32_Clamp_Bilinear_BitmapShader::shadeSpan(int x, int y, SkPMColor dstC[], int count)
-{
-    SkASSERT(count > 0);
-
-    unsigned srcScale = SkAlpha255To256(this->getPaintAlpha());
-
-    const SkMatrix& inv = this->getTotalInverse();
-    const SkBitmap& srcBitmap = this->getSrcBitmap();
-    unsigned        srcMaxX = srcBitmap.width() - 1;
-    unsigned        srcMaxY = srcBitmap.height() - 1;
-    unsigned        srcRB = srcBitmap.rowBytes();
-
-    const SkFilterPtrProc* proc_table = SkGetBilinearFilterPtrProcTable();
-    const SkPMColor* srcPixels = (const SkPMColor*)srcBitmap.getPixels();
-
-    if (this->getInverseClass() == kPerspective_MatrixClass)
-    {
-        SkPerspIter   iter(inv, SkIntToScalar(x) + SK_ScalarHalf,
-                                SkIntToScalar(y) + SK_ScalarHalf, count);
-
-        if (256 == srcScale)
-        {
-            while ((count = iter.next()) != 0)
-            {
-                const SkFixed* srcXY = iter.getXY();
-                while (--count >= 0)
-                {
-                    SkFixed fx = *srcXY++ - SK_FixedHalf;
-                    SkFixed fy = *srcXY++ - SK_FixedHalf;
-                    *dstC++ = sample_bilerp(fx, fy, srcMaxX, srcMaxY, srcPixels, srcRB, proc_table);
-                }
-            }
-        }
-        else    // scale by srcScale
-        {
-            while ((count = iter.next()) != 0)
-            {
-                const SkFixed* srcXY = iter.getXY();
-                while (--count >= 0)
-                {
-                    SkFixed fx = *srcXY++ - SK_FixedHalf;
-                    SkFixed fy = *srcXY++ - SK_FixedHalf;
-                    SkPMColor c = sample_bilerp(fx, fy, srcMaxX, srcMaxY, srcPixels, srcRB, proc_table);
-                    *dstC++ = SkAlphaMulQ(c, srcScale);
-                }
-            }
-        }
-    }
-    else    // linear case
-    {
-        SkFixed fx, fy, dx, dy;
-
-        // now init fx, fy, dx, dy
-        {
-            SkPoint srcPt;
-            this->getInverseMapPtProc()(inv, SkIntToScalar(x) + SK_ScalarHalf,
-                                             SkIntToScalar(y) + SK_ScalarHalf,
-                                             &srcPt);
-
-            fx = SkScalarToFixed(srcPt.fX) - SK_FixedHalf;
-            fy = SkScalarToFixed(srcPt.fY) - SK_FixedHalf;
-
-            if (this->getInverseClass() == kFixedStepInX_MatrixClass)
-                (void)inv.fixedStepInX(SkIntToScalar(y), &dx, &dy);
-            else
-            {
-                dx = SkScalarToFixed(inv.getScaleX());
-                dy = SkScalarToFixed(inv.getSkewY());
-            }
-        }
-
-        if (dy == 0 && (unsigned)(fy >> 16) < srcMaxY)
-        {
-            srcPixels = (const SkPMColor*)((const char*)srcPixels + (fy >> 16) * srcRB);
-            proc_table = SkGetBilinearFilterPtrProcYTable(proc_table, fy);
-            if (256 == srcScale)
-            {
-                do {
-                    *dstC++ = sample_bilerpx(fx, srcMaxX, srcPixels, srcRB, proc_table);
-                    fx += dx;
-                } while (--count != 0);
-            }
-            else
-            {
-                do {
-                    SkPMColor c = sample_bilerpx(fx, srcMaxX, srcPixels, srcRB, proc_table);
-                    *dstC++ = SkAlphaMulQ(c, srcScale);
-                    fx += dx;
-                } while (--count != 0);
-            }
-        }
-        else    // dy is != 0
-        {
-            if (256 == srcScale)
-            {
-                do {
-                    *dstC++ = sample_bilerp(fx, fy, srcMaxX, srcMaxY, srcPixels, srcRB, proc_table);
-                    fx += dx;
-                    fy += dy;
-                } while (--count != 0);
-            }
-            else
-            {
-                do {
-                    SkPMColor c = sample_bilerp(fx, fy, srcMaxX, srcMaxY, srcPixels, srcRB, proc_table);
-                    *dstC++ = SkAlphaMulQ(c, srcScale);
-                    fx += dx;
-                    fy += dy;
-                } while (--count != 0);
-            }
-        }
-    }
-}
diff --git a/core/SkBitmapProcShader.cpp b/core/SkBitmapProcShader.cpp
index 44bdc6d3..00d938be 100644
--- a/core/SkBitmapProcShader.cpp
+++ b/core/SkBitmapProcShader.cpp
@@ -97,52 +97,30 @@ static bool valid_for_drawing(const SkBitmap& bm) {
     return true;
 }
 
-bool SkBitmapProcShader::validInternal(const SkBitmap& device,
-                                       const SkPaint& paint,
-                                       const SkMatrix& matrix,
-                                       SkMatrix* totalInverse,
-                                       SkBitmapProcState* state) const {
+SkShader::Context* SkBitmapProcShader::onCreateContext(const ContextRec& rec, void* storage) const {
     if (!fRawBitmap.getTexture() && !valid_for_drawing(fRawBitmap)) {
-        return false;
-    }
-
-    // Make sure we can use totalInverse as a cache.
-    SkMatrix totalInverseLocal;
-    if (NULL == totalInverse) {
-        totalInverse = &totalInverseLocal;
+        return NULL;
     }
 
+    SkMatrix totalInverse;
     // Do this first, so we know the matrix can be inverted.
-    if (!this->INHERITED::validContext(device, paint, matrix, totalInverse)) {
-        return false;
+    if (!this->computeTotalInverse(rec, &totalInverse)) {
+        return NULL;
     }
 
+    void* stateStorage = (char*)storage + sizeof(BitmapProcShaderContext);
+    SkBitmapProcState* state = SkNEW_PLACEMENT(stateStorage, SkBitmapProcState);
+
     SkASSERT(state);
     state->fTileModeX = fTileModeX;
     state->fTileModeY = fTileModeY;
     state->fOrigBitmap = fRawBitmap;
-    return state->chooseProcs(*totalInverse, paint);
-}
-
-bool SkBitmapProcShader::validContext(const SkBitmap& device,
-                                      const SkPaint& paint,
-                                      const SkMatrix& matrix,
-                                      SkMatrix* totalInverse) const {
-    SkBitmapProcState state;
-    return this->validInternal(device, paint, matrix, totalInverse, &state);
-}
-
-SkShader::Context* SkBitmapProcShader::createContext(const SkBitmap& device, const SkPaint& paint,
-                                                     const SkMatrix& matrix, void* storage) const {
-    void* stateStorage = (char*)storage + sizeof(BitmapProcShaderContext);
-    SkBitmapProcState* state = SkNEW_PLACEMENT(stateStorage, SkBitmapProcState);
-    if (!this->validInternal(device, paint, matrix, NULL, state)) {
+    if (!state->chooseProcs(totalInverse, *rec.fPaint)) {
         state->~SkBitmapProcState();
         return NULL;
     }
 
-    return SkNEW_PLACEMENT_ARGS(storage, BitmapProcShaderContext,
-                                (*this, device, paint, matrix, state));
+    return SkNEW_PLACEMENT_ARGS(storage, BitmapProcShaderContext, (*this, rec, state));
 }
 
 size_t SkBitmapProcShader::contextSize() const {
@@ -152,9 +130,8 @@ size_t SkBitmapProcShader::contextSize() const {
 }
 
 SkBitmapProcShader::BitmapProcShaderContext::BitmapProcShaderContext(
-        const SkBitmapProcShader& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix, SkBitmapProcState* state)
-    : INHERITED(shader, device, paint, matrix)
+        const SkBitmapProcShader& shader, const ContextRec& rec, SkBitmapProcState* state)
+    : INHERITED(shader, rec)
     , fState(state)
 {
     const SkBitmap& bitmap = *fState->fBitmap;
@@ -182,7 +159,7 @@ SkBitmapProcShader::BitmapProcShaderContext::BitmapProcShaderContext(
             break;
     }
 
-    if (paint.isDither() && bitmap.colorType() != kRGB_565_SkColorType) {
+    if (rec.fPaint->isDither() && bitmap.colorType() != kRGB_565_SkColorType) {
         // gradients can auto-dither in their 16bit sampler, but we don't so
         // we clear the flag here.
         flags &= ~kHasSpan16_Flag;
diff --git a/core/SkBitmapProcShader.h b/core/SkBitmapProcShader.h
index 78b46ce6..8d312564 100644
--- a/core/SkBitmapProcShader.h
+++ b/core/SkBitmapProcShader.h
@@ -23,12 +23,6 @@ public:
     virtual bool isOpaque() const SK_OVERRIDE;
     virtual BitmapType asABitmap(SkBitmap*, SkMatrix*, TileMode*) const SK_OVERRIDE;
 
-    virtual bool validContext(const SkBitmap& device,
-                              const SkPaint& paint,
-                              const SkMatrix& matrix,
-                              SkMatrix* totalInverse = NULL) const SK_OVERRIDE;
-    virtual SkShader::Context* createContext(const SkBitmap&, const SkPaint&,
-                                             const SkMatrix&, void* storage) const SK_OVERRIDE;
     virtual size_t contextSize() const SK_OVERRIDE;
 
     static bool CanDo(const SkBitmap&, TileMode tx, TileMode ty);
@@ -44,11 +38,7 @@ public:
     public:
         // The context takes ownership of the state. It will call its destructor
         // but will NOT free the memory.
-        BitmapProcShaderContext(const SkBitmapProcShader& shader,
-                                const SkBitmap& device,
-                                const SkPaint& paint,
-                                const SkMatrix& matrix,
-                                SkBitmapProcState* state);
+        BitmapProcShaderContext(const SkBitmapProcShader&, const ContextRec&, SkBitmapProcState*);
         virtual ~BitmapProcShaderContext();
 
         virtual void shadeSpan(int x, int y, SkPMColor dstC[], int count) SK_OVERRIDE;
@@ -67,15 +57,12 @@ public:
 protected:
     SkBitmapProcShader(SkReadBuffer& );
     virtual void flatten(SkWriteBuffer&) const SK_OVERRIDE;
+    virtual Context* onCreateContext(const ContextRec&, void* storage) const SK_OVERRIDE;
 
     SkBitmap    fRawBitmap;   // experimental for RLE encoding
     uint8_t     fTileModeX, fTileModeY;
 
 private:
-    bool validInternal(const SkBitmap& device, const SkPaint& paint,
-                       const SkMatrix& matrix, SkMatrix* totalInverse,
-                       SkBitmapProcState* state) const;
-
     typedef SkShader INHERITED;
 };
 
@@ -84,10 +71,7 @@ private:
 // an Sk3DBlitter in SkDraw.cpp
 // Note that some contexts may contain other contexts (e.g. for compose shaders), but we've not
 // yet found a situation where the size below isn't big enough.
-typedef SkSmallAllocator<3, sizeof(SkBitmapProcShader) +
-                            sizeof(SkBitmapProcShader::BitmapProcShaderContext) +
-                            sizeof(SkBitmapProcState) +
-                            sizeof(void*) * 2> SkTBlitterAllocator;
+typedef SkSmallAllocator<3, 768> SkTBlitterAllocator;
 
 // If alloc is non-NULL, it will be used to allocate the returned SkShader, and MUST outlive
 // the SkShader.
diff --git a/core/SkBitmapProcState_matrixProcs.cpp b/core/SkBitmapProcState_matrixProcs.cpp
index a05c13ed..02204b67 100644
--- a/core/SkBitmapProcState_matrixProcs.cpp
+++ b/core/SkBitmapProcState_matrixProcs.cpp
@@ -65,7 +65,7 @@ struct ClampTileProcs {
     }
 };
 
-// Referenced in opts_check_SSE2.cpp
+// Referenced in opts_check_x86.cpp
 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
                                   int count, int x, int y) {
     return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
diff --git a/core/SkBlitter.cpp b/core/SkBlitter.cpp
index 41f37e65..81e46c56 100644
--- a/core/SkBlitter.cpp
+++ b/core/SkBlitter.cpp
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2006 The Android Open Source Project
  *
@@ -6,7 +5,6 @@
  * found in the LICENSE file.
  */
 
-
 #include "SkBlitter.h"
 #include "SkAntiRun.h"
 #include "SkColor.h"
@@ -26,8 +24,7 @@ SkBlitter::~SkBlitter() {}
 
 bool SkBlitter::isNullBlitter() const { return false; }
 
-bool SkBlitter::resetShaderContext(const SkBitmap& device, const SkPaint& paint,
-                                   const SkMatrix& matrix) {
+bool SkBlitter::resetShaderContext(const SkShader::ContextRec&) {
     return true;
 }
 
@@ -591,51 +588,29 @@ public:
         return size;
     }
 
-    virtual bool validContext(const SkBitmap& device, const SkPaint& paint,
-                              const SkMatrix& matrix, SkMatrix* totalInverse = NULL) const
-            SK_OVERRIDE
-    {
-        if (!this->INHERITED::validContext(device, paint, matrix, totalInverse)) {
-            return false;
-        }
-        if (fProxy) {
-            return fProxy->validContext(device, paint, matrix);
-        }
-        return true;
-    }
-
-    virtual SkShader::Context* createContext(const SkBitmap& device,
-                                             const SkPaint& paint,
-                                             const SkMatrix& matrix,
-                                             void* storage) const SK_OVERRIDE
-    {
-        if (!this->validContext(device, paint, matrix)) {
-            return NULL;
-        }
-
-        SkShader::Context* proxyContext;
+    virtual Context* onCreateContext(const ContextRec& rec, void* storage) const SK_OVERRIDE {
+        SkShader::Context* proxyContext = NULL;
         if (fProxy) {
             char* proxyContextStorage = (char*) storage + sizeof(Sk3DShaderContext);
-            proxyContext = fProxy->createContext(device, paint, matrix, proxyContextStorage);
-            SkASSERT(proxyContext);
-        } else {
-            proxyContext = NULL;
+            proxyContext = fProxy->createContext(rec, proxyContextStorage);
+            if (!proxyContext) {
+                return NULL;
+            }
         }
-        return SkNEW_PLACEMENT_ARGS(storage, Sk3DShaderContext, (*this, device, paint, matrix,
-                                                                 proxyContext));
+        return SkNEW_PLACEMENT_ARGS(storage, Sk3DShaderContext, (*this, rec, proxyContext));
     }
 
     class Sk3DShaderContext : public SkShader::Context {
     public:
         // Calls proxyContext's destructor but will NOT free its memory.
-        Sk3DShaderContext(const Sk3DShader& shader, const SkBitmap& device, const SkPaint& paint,
-                          const SkMatrix& matrix, SkShader::Context* proxyContext)
-            : INHERITED(shader, device, paint, matrix)
+        Sk3DShaderContext(const Sk3DShader& shader, const ContextRec& rec,
+                          SkShader::Context* proxyContext)
+            : INHERITED(shader, rec)
             , fMask(NULL)
             , fProxyContext(proxyContext)
         {
             if (!fProxyContext) {
-                fPMColor = SkPreMultiplyColor(paint.getColor());
+                fPMColor = SkPreMultiplyColor(rec.fPaint->getColor());
             }
         }
 
@@ -954,9 +929,10 @@ SkBlitter* SkBlitter::Choose(const SkBitmap& device,
      */
     SkShader::Context* shaderContext;
     if (shader) {
+        SkShader::ContextRec rec(device, *paint, matrix);
         // Try to create the ShaderContext
         void* storage = allocator->reserveT<SkShader::Context>(shader->contextSize());
-        shaderContext = shader->createContext(device, *paint, matrix, storage);
+        shaderContext = shader->createContext(rec, storage);
         if (!shaderContext) {
             allocator->freeLast();
             blitter = allocator->createT<SkNullBlitter>();
@@ -1022,6 +998,19 @@ const uint32_t gMask_00FF00FF = 0xFF00FF;
 
 ///////////////////////////////////////////////////////////////////////////////
 
+class SkTransparentShaderContext : public SkShader::Context {
+public:
+    SkTransparentShaderContext(const SkShader& shader, const SkShader::ContextRec& rec)
+        : INHERITED(shader, rec) {}
+
+    virtual void shadeSpan(int x, int y, SkPMColor colors[], int count) SK_OVERRIDE {
+        sk_bzero(colors, count * sizeof(SkPMColor));
+    }
+
+private:
+    typedef SkShader::Context INHERITED;
+};
+
 SkShaderBlitter::SkShaderBlitter(const SkBitmap& device, const SkPaint& paint,
                                  SkShader::Context* shaderContext)
         : INHERITED(device)
@@ -1038,20 +1027,19 @@ SkShaderBlitter::~SkShaderBlitter() {
     fShader->unref();
 }
 
-bool SkShaderBlitter::resetShaderContext(const SkBitmap& device, const SkPaint& paint,
-                                         const SkMatrix& matrix) {
-    if (!fShader->validContext(device, paint, matrix)) {
-        return false;
-    }
-
+bool SkShaderBlitter::resetShaderContext(const SkShader::ContextRec& rec) {
     // Only destroy the old context if we have a new one. We need to ensure to have a
     // live context in fShaderContext because the storage is owned by an SkSmallAllocator
     // outside of this class.
     // The new context will be of the same size as the old one because we use the same
     // shader to create it. It is therefore safe to re-use the storage.
     fShaderContext->~Context();
-    fShaderContext = fShader->createContext(device, paint, matrix, (void*)fShaderContext);
-    SkASSERT(fShaderContext);
-
+    SkShader::Context* ctx = fShader->createContext(rec, (void*)fShaderContext);
+    if (NULL == ctx) {
+        // Need a valid context in fShaderContext's storage, so we can later (or our caller) call
+        // the in-place destructor.
+        SkNEW_PLACEMENT_ARGS(fShaderContext, SkTransparentShaderContext, (*fShader, rec));
+        return false;
+    }
     return true;
 }
diff --git a/core/SkBlitter.h b/core/SkBlitter.h
index f76839e8..a3a21961 100644
--- a/core/SkBlitter.h
+++ b/core/SkBlitter.h
@@ -64,8 +64,7 @@ public:
     /**
      *  Special methods for SkShaderBlitter. On all other classes this is a no-op.
      */
-    virtual bool resetShaderContext(const SkBitmap& device, const SkPaint& paint,
-                                    const SkMatrix& matrix);
+    virtual bool resetShaderContext(const SkShader::ContextRec&);
     virtual SkShader::Context* getShaderContext() const;
 
     ///@name non-virtual helpers
diff --git a/core/SkComposeShader.cpp b/core/SkComposeShader.cpp
index 2c27c9e7..b2f69b4b 100644
--- a/core/SkComposeShader.cpp
+++ b/core/SkComposeShader.cpp
@@ -73,65 +73,46 @@ void SkComposeShader::flatten(SkWriteBuffer& buffer) const {
     buffer.writeFlattenable(fMode);
 }
 
-/*  We call validContext/createContext on our two worker shaders.
-    However, we always let them see opaque alpha, and if the paint
-    really is translucent, then we apply that after the fact.
-
- */
-bool SkComposeShader::validContext(const SkBitmap& device,
-                                   const SkPaint& paint,
-                                   const SkMatrix& matrix,
-                                   SkMatrix* totalInverse) const {
-    if (!this->INHERITED::validContext(device, paint, matrix, totalInverse)) {
-        return false;
+template <typename T> void safe_call_destructor(T* obj) {
+    if (obj) {
+        obj->~T();
     }
-
-    // we preconcat our localMatrix (if any) with the device matrix
-    // before calling our sub-shaders
-
-    SkMatrix tmpM;
-
-    tmpM.setConcat(matrix, this->getLocalMatrix());
-
-    return fShaderA->validContext(device, paint, tmpM) &&
-           fShaderB->validContext(device, paint, tmpM);
 }
 
-SkShader::Context* SkComposeShader::createContext(const SkBitmap& device, const SkPaint& paint,
-                                                  const SkMatrix& matrix, void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
-        return NULL;
-    }
+SkShader::Context* SkComposeShader::onCreateContext(const ContextRec& rec, void* storage) const {
+    char* aStorage = (char*) storage + sizeof(ComposeShaderContext);
+    char* bStorage = aStorage + fShaderA->contextSize();
 
     // we preconcat our localMatrix (if any) with the device matrix
     // before calling our sub-shaders
-
     SkMatrix tmpM;
+    tmpM.setConcat(*rec.fMatrix, this->getLocalMatrix());
+
+    // Our sub-shaders need to see opaque, so by combining them we don't double-alphatize the
+    // result. ComposeShader itself will respect the alpha, and post-apply it after calling the
+    // sub-shaders.
+    SkPaint opaquePaint(*rec.fPaint);
+    opaquePaint.setAlpha(0xFF);
+
+    ContextRec newRec(rec);
+    newRec.fMatrix = &tmpM;
+    newRec.fPaint = &opaquePaint;
+
+    SkShader::Context* contextA = fShaderA->createContext(newRec, aStorage);
+    SkShader::Context* contextB = fShaderB->createContext(newRec, bStorage);
+    if (!contextA || !contextB) {
+        safe_call_destructor(contextA);
+        safe_call_destructor(contextB);
+        return NULL;
+    }
 
-    tmpM.setConcat(matrix, this->getLocalMatrix());
-
-    SkAutoAlphaRestore  restore(const_cast<SkPaint*>(&paint), 0xFF);
-
-    char* aStorage = (char*) storage + sizeof(ComposeShaderContext);
-    char* bStorage = aStorage + fShaderA->contextSize();
-
-    SkShader::Context* contextA = fShaderA->createContext(device, paint, tmpM, aStorage);
-    SkShader::Context* contextB = fShaderB->createContext(device, paint, tmpM, bStorage);
-
-    // Both functions must succeed; otherwise validContext should have returned
-    // false.
-    SkASSERT(contextA);
-    SkASSERT(contextB);
-
-    return SkNEW_PLACEMENT_ARGS(storage, ComposeShaderContext,
-                                (*this, device, paint, matrix, contextA, contextB));
+    return SkNEW_PLACEMENT_ARGS(storage, ComposeShaderContext, (*this, rec, contextA, contextB));
 }
 
 SkComposeShader::ComposeShaderContext::ComposeShaderContext(
-        const SkComposeShader& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix,
+        const SkComposeShader& shader, const ContextRec& rec,
         SkShader::Context* contextA, SkShader::Context* contextB)
-    : INHERITED(shader, device, paint, matrix)
+    : INHERITED(shader, rec)
     , fShaderContextA(contextA)
     , fShaderContextB(contextB) {}
 
@@ -150,6 +131,10 @@ void SkComposeShader::ComposeShaderContext::shadeSpan(int x, int y, SkPMColor re
     SkXfermode*        mode = static_cast<const SkComposeShader&>(fShader).fMode;
     unsigned           scale = SkAlpha255To256(this->getPaintAlpha());
 
+#ifdef SK_BUILD_FOR_ANDROID
+    scale = 256;    // ugh -- maintain old bug/behavior for now
+#endif
+
     SkPMColor   tmp[TMP_COLOR_COUNT];
 
     if (NULL == mode) {   // implied SRC_OVER
@@ -190,7 +175,7 @@ void SkComposeShader::ComposeShaderContext::shadeSpan(int x, int y, SkPMColor re
             shaderContextB->shadeSpan(x, y, tmp, n);
             mode->xfer32(result, tmp, n, NULL);
 
-            if (256 == scale) {
+            if (256 != scale) {
                 for (int i = 0; i < n; i++) {
                     result[i] = SkAlphaMulQ(result[i], scale);
                 }
diff --git a/core/SkCoreBlitters.h b/core/SkCoreBlitters.h
index 2d22d38e..20f9437a 100644
--- a/core/SkCoreBlitters.h
+++ b/core/SkCoreBlitters.h
@@ -41,8 +41,7 @@ public:
       *  Will create the context at the same location as the old one (this is safe
       *  because the shader itself is unchanged).
       */
-    virtual bool resetShaderContext(const SkBitmap& device, const SkPaint& paint,
-                                    const SkMatrix& matrix) SK_OVERRIDE;
+    virtual bool resetShaderContext(const SkShader::ContextRec&) SK_OVERRIDE;
 
     virtual SkShader::Context* getShaderContext() const SK_OVERRIDE { return fShaderContext; }
 
diff --git a/core/SkDraw.cpp b/core/SkDraw.cpp
index f9e06e52..24c80557 100644
--- a/core/SkDraw.cpp
+++ b/core/SkDraw.cpp
@@ -2353,14 +2353,11 @@ class SkTriColorShader : public SkShader {
 public:
     SkTriColorShader() {}
 
-    virtual SkShader::Context* createContext(
-            const SkBitmap&, const SkPaint&, const SkMatrix&, void*) const SK_OVERRIDE;
     virtual size_t contextSize() const SK_OVERRIDE;
 
     class TriColorShaderContext : public SkShader::Context {
     public:
-        TriColorShaderContext(const SkTriColorShader& shader, const SkBitmap& device,
-                              const SkPaint& paint, const SkMatrix& matrix);
+        TriColorShaderContext(const SkTriColorShader& shader, const ContextRec&);
         virtual ~TriColorShaderContext();
 
         bool setup(const SkPoint pts[], const SkColor colors[], int, int, int);
@@ -2380,19 +2377,14 @@ public:
 protected:
     SkTriColorShader(SkReadBuffer& buffer) : SkShader(buffer) {}
 
+    virtual Context* onCreateContext(const ContextRec& rec, void* storage) const SK_OVERRIDE {
+        return SkNEW_PLACEMENT_ARGS(storage, TriColorShaderContext, (*this, rec));
+    }
+
 private:
     typedef SkShader INHERITED;
 };
 
-SkShader::Context* SkTriColorShader::createContext(const SkBitmap& device, const SkPaint& paint,
-                                                   const SkMatrix& matrix, void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
-        return NULL;
-    }
-
-    return SkNEW_PLACEMENT_ARGS(storage, TriColorShaderContext, (*this, device, paint, matrix));
-}
-
 bool SkTriColorShader::TriColorShaderContext::setup(const SkPoint pts[], const SkColor colors[],
                                                     int index0, int index1, int index2) {
 
@@ -2411,7 +2403,13 @@ bool SkTriColorShader::TriColorShaderContext::setup(const SkPoint pts[], const S
     if (!m.invert(&im)) {
         return false;
     }
-    fDstToUnit.setConcat(im, this->getTotalInverse());
+    // We can't call getTotalInverse(), because we explicitly don't want to look at the localmatrix
+    // as our interators are intrinsically tied to the vertices, and nothing else.
+    SkMatrix ctmInv;
+    if (!this->getCTM().invert(&ctmInv)) {
+        return false;
+    }
+    fDstToUnit.setConcat(im, ctmInv);
     return true;
 }
 
@@ -2430,10 +2428,9 @@ static int ScalarTo256(SkScalar v) {
 }
 
 
-SkTriColorShader::TriColorShaderContext::TriColorShaderContext(
-        const SkTriColorShader& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix)
-    : INHERITED(shader, device, paint, matrix) {}
+SkTriColorShader::TriColorShaderContext::TriColorShaderContext(const SkTriColorShader& shader,
+                                                               const ContextRec& rec)
+    : INHERITED(shader, rec) {}
 
 SkTriColorShader::TriColorShaderContext::~TriColorShaderContext() {}
 
@@ -2441,6 +2438,8 @@ size_t SkTriColorShader::contextSize() const {
     return sizeof(TriColorShaderContext);
 }
 void SkTriColorShader::TriColorShaderContext::shadeSpan(int x, int y, SkPMColor dstC[], int count) {
+    const int alphaScale = Sk255To256(this->getPaintAlpha());
+
     SkPoint src;
 
     for (int i = 0; i < count; i++) {
@@ -2459,9 +2458,15 @@ void SkTriColorShader::TriColorShaderContext::shadeSpan(int x, int y, SkPMColor
             scale0 = 0;
         }
 
+        if (256 != alphaScale) {
+            scale0 = SkAlphaMul(scale0, alphaScale);
+            scale1 = SkAlphaMul(scale1, alphaScale);
+            scale2 = SkAlphaMul(scale2, alphaScale);
+        }
+
         dstC[i] = SkAlphaMulQ(fColors[0], scale0) +
-        SkAlphaMulQ(fColors[1], scale1) +
-        SkAlphaMulQ(fColors[2], scale2);
+                  SkAlphaMulQ(fColors[1], scale1) +
+                  SkAlphaMulQ(fColors[2], scale2);
     }
 }
 
@@ -2557,18 +2562,13 @@ void SkDraw::drawVertices(SkCanvas::VertexMode vmode, int count,
     VertState::Proc vertProc = state.chooseProc(vmode);
 
     if (NULL != textures || NULL != colors) {
-        SkMatrix  tempM;
-        SkMatrix  savedLocalM;
-        if (shader) {
-            savedLocalM = shader->getLocalMatrix();
-        }
-
         while (vertProc(&state)) {
             if (NULL != textures) {
+                SkMatrix tempM;
                 if (texture_to_matrix(state, vertices, textures, &tempM)) {
-                    tempM.postConcat(savedLocalM);
-                    shader->setLocalMatrix(tempM);
-                    if (!blitter->resetShaderContext(*fBitmap, p, *fMatrix)) {
+                    SkShader::ContextRec rec(*fBitmap, p, *fMatrix);
+                    rec.fLocalMatrix = &tempM;
+                    if (!blitter->resetShaderContext(rec)) {
                         continue;
                     }
                 }
@@ -2604,11 +2604,6 @@ void SkDraw::drawVertices(SkCanvas::VertexMode vmode, int count,
             };
             SkScan::FillTriangle(tmp, *fRC, blitter.get());
         }
-
-        // now restore the shader's original local matrix
-        if (NULL != shader) {
-            shader->setLocalMatrix(savedLocalM);
-        }
     } else {
         // no colors[] and no texture
         HairProc hairProc = ChooseHairProc(paint.isAntiAlias());
diff --git a/core/SkFilterShader.cpp b/core/SkFilterShader.cpp
index 5c5e8f31..5094706e 100644
--- a/core/SkFilterShader.cpp
+++ b/core/SkFilterShader.cpp
@@ -55,41 +55,22 @@ uint32_t SkFilterShader::FilterShaderContext::getFlags() const {
     return shaderF;
 }
 
-SkShader::Context* SkFilterShader::createContext(const SkBitmap& device,
-                                                 const SkPaint& paint,
-                                                 const SkMatrix& matrix,
-                                                 void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
-        return NULL;
-    }
-
+SkShader::Context* SkFilterShader::onCreateContext(const ContextRec& rec, void* storage) const {
     char* shaderContextStorage = (char*)storage + sizeof(FilterShaderContext);
-    SkShader::Context* shaderContext = fShader->createContext(device, paint, matrix,
-                                                              shaderContextStorage);
+    SkShader::Context* shaderContext = fShader->createContext(rec, shaderContextStorage);
     SkASSERT(shaderContext);
 
-    return SkNEW_PLACEMENT_ARGS(storage, FilterShaderContext,
-                                (*this, shaderContext, device, paint, matrix));
+    return SkNEW_PLACEMENT_ARGS(storage, FilterShaderContext, (*this, shaderContext, rec));
 }
 
 size_t SkFilterShader::contextSize() const {
     return sizeof(FilterShaderContext) + fShader->contextSize();
 }
 
-bool SkFilterShader::validContext(const SkBitmap& device,
-                                  const SkPaint& paint,
-                                  const SkMatrix& matrix,
-                                  SkMatrix* totalInverse) const {
-    return this->INHERITED::validContext(device, paint, matrix, totalInverse) &&
-           fShader->validContext(device, paint, matrix);
-}
-
 SkFilterShader::FilterShaderContext::FilterShaderContext(const SkFilterShader& filterShader,
                                                          SkShader::Context* shaderContext,
-                                                         const SkBitmap& device,
-                                                         const SkPaint& paint,
-                                                         const SkMatrix& matrix)
-    : INHERITED(filterShader, device, paint, matrix)
+                                                         const ContextRec& rec)
+    : INHERITED(filterShader, rec)
     , fShaderContext(shaderContext) {}
 
 SkFilterShader::FilterShaderContext::~FilterShaderContext() {
diff --git a/core/SkFilterShader.h b/core/SkFilterShader.h
index 4ef45772..1a4b71fe 100644
--- a/core/SkFilterShader.h
+++ b/core/SkFilterShader.h
@@ -17,17 +17,12 @@ public:
     SkFilterShader(SkShader* shader, SkColorFilter* filter);
     virtual ~SkFilterShader();
 
-    virtual bool validContext(const SkBitmap&, const SkPaint&,
-                              const SkMatrix&, SkMatrix* totalInverse = NULL) const SK_OVERRIDE;
-    virtual SkShader::Context* createContext(const SkBitmap&, const SkPaint&,
-                                             const SkMatrix&, void* storage) const SK_OVERRIDE;
     virtual size_t contextSize() const SK_OVERRIDE;
 
     class FilterShaderContext : public SkShader::Context {
     public:
         // Takes ownership of shaderContext and calls its destructor.
-        FilterShaderContext(const SkFilterShader& filterShader, SkShader::Context* shaderContext,
-                            const SkBitmap& device, const SkPaint& paint, const SkMatrix& matrix);
+        FilterShaderContext(const SkFilterShader&, SkShader::Context*, const ContextRec&);
         virtual ~FilterShaderContext();
 
         virtual uint32_t getFlags() const SK_OVERRIDE;
@@ -47,6 +42,8 @@ public:
 protected:
     SkFilterShader(SkReadBuffer& );
     virtual void flatten(SkWriteBuffer&) const SK_OVERRIDE;
+    virtual Context* onCreateContext(const ContextRec&, void* storage) const SK_OVERRIDE;
+
 
 private:
     SkShader*       fShader;
diff --git a/core/SkPaint.cpp b/core/SkPaint.cpp
index 176992f4..88e5b432 100644
--- a/core/SkPaint.cpp
+++ b/core/SkPaint.cpp
@@ -107,6 +107,11 @@ SkPaint::SkPaint() {
 }
 
 SkPaint::SkPaint(const SkPaint& src) {
+    // Diagnoistic. May remove later. See crbug.com/364224
+    if (NULL == &src) {
+        sk_throw();
+    }
+
 #define COPY(field) field = src.field
 #define REF_COPY(field) field = SkSafeRef(src.field)
 
@@ -153,6 +158,10 @@ SkPaint::~SkPaint() {
 }
 
 SkPaint& SkPaint::operator=(const SkPaint& src) {
+    if (this == &src) {
+        return *this;
+    }
+
 #define COPY(field) field = src.field
 #define REF_COPY(field) SkSafeUnref(field); field = SkSafeRef(src.field)
 
diff --git a/core/SkPaintPriv.cpp b/core/SkPaintPriv.cpp
index ce053890..65fd0e75 100644
--- a/core/SkPaintPriv.cpp
+++ b/core/SkPaintPriv.cpp
@@ -76,3 +76,24 @@ bool isPaintOpaque(const SkPaint* paint,
     }
     return false;
 }
+
+bool NeedsDeepCopy(const SkPaint& paint) {
+    /*
+     *  These fields are known to be immutable, and so can be shallow-copied
+     *
+     *  getTypeface()
+     *  getAnnotation()
+     *  paint.getColorFilter()
+     *  getXfermode()
+     *  getPathEffect()
+     *  getMaskFilter()
+     */
+
+    return paint.getShader() ||
+#ifdef SK_SUPPORT_LEGACY_LAYERRASTERIZER_API
+           paint.getRasterizer() ||
+#endif
+           paint.getLooper() || // needs to hide its addLayer...
+           paint.getImageFilter();
+}
+
diff --git a/core/SkPaintPriv.h b/core/SkPaintPriv.h
index 38c9063e..9668fef1 100644
--- a/core/SkPaintPriv.h
+++ b/core/SkPaintPriv.h
@@ -22,4 +22,11 @@ class SkPaint;
 */
 bool isPaintOpaque(const SkPaint* paint,
                    const SkBitmap* bmpReplacesShader = NULL);
+
+/** Returns true if the provided paint has fields which are not
+    immutable (and will thus require deep copying).
+    @param paint the paint to be analyzed
+    @return true if the paint requires a deep copy
+*/
+bool NeedsDeepCopy(const SkPaint& paint);
 #endif
diff --git a/core/SkPicture.cpp b/core/SkPicture.cpp
index 3b04906e..68434303 100644
--- a/core/SkPicture.cpp
+++ b/core/SkPicture.cpp
@@ -15,6 +15,7 @@
 #include "SkBitmapDevice.h"
 #include "SkCanvas.h"
 #include "SkChunkAlloc.h"
+#include "SkPaintPriv.h"
 #include "SkPicture.h"
 #include "SkRegion.h"
 #include "SkStream.h"
@@ -217,26 +218,6 @@ SkPicture* SkPicture::clone() const {
     return clonedPicture;
 }
 
-static bool needs_deep_copy(const SkPaint& paint) {
-    /*
-     *  These fields are known to be immutable, and so can be shallow-copied
-     *
-     *  getTypeface()
-     *  getAnnotation()
-     *  paint.getColorFilter()
-     *  getXfermode()
-     *  getPathEffect()
-     *  getMaskFilter()
-     */
-
-    return paint.getShader() ||
-#ifdef SK_SUPPORT_LEGACY_LAYERRASTERIZER_API
-           paint.getRasterizer() ||
-#endif
-           paint.getLooper() || // needs to hide its addLayer...
-           paint.getImageFilter();
-}
-
 void SkPicture::clone(SkPicture* pictures, int count) const {
     SkPictCopyInfo copyInfo;
     SkPictInfo info;
@@ -282,7 +263,7 @@ void SkPicture::clone(SkPicture* pictures, int count) const {
 
                 SkDEBUGCODE(int heapSize = SafeCount(fPlayback->fBitmapHeap.get());)
                 for (int i = 0; i < paintCount; i++) {
-                    if (needs_deep_copy(fPlayback->fPaints->at(i))) {
+                    if (NeedsDeepCopy(fPlayback->fPaints->at(i))) {
                         copyInfo.paintData[i] =
                             SkFlatData::Create<SkPaint::FlatteningTraits>(&copyInfo.controller,
                                                               fPlayback->fPaints->at(i), 0);
diff --git a/core/SkPictureRecord.cpp b/core/SkPictureRecord.cpp
index f3d108c4..f6da2f27 100644
--- a/core/SkPictureRecord.cpp
+++ b/core/SkPictureRecord.cpp
@@ -15,6 +15,15 @@
 
 #define HEAP_BLOCK_SIZE 4096
 
+// If SK_RECORD_LITERAL_PICTURES is defined, record our inputs as literally as possible.
+// Otherwise, we can be clever and record faster equivalents.  kBeClever is normally true.
+static const bool kBeClever =
+#ifdef SK_RECORD_LITERAL_PICTURES
+    false;
+#else
+    true;
+#endif
+
 enum {
     // just need a value that save or getSaveCount would never return
     kNoInitialSave = -1,
@@ -34,7 +43,7 @@ SkPictureRecord::SkPictureRecord(SkPicture* picture, const SkISize& dimensions,
     , fFlattenableHeap(HEAP_BLOCK_SIZE)
     , fPaints(&fFlattenableHeap)
     , fRecordFlags(flags)
-    , fOptsEnabled(true) {
+    , fOptsEnabled(kBeClever) {
 #ifdef SK_DEBUG_SIZE
     fPointBytes = fRectBytes = fTextBytes = 0;
     fPointWrites = fRectWrites = fTextWrites = 0;
@@ -1031,9 +1040,9 @@ void SkPictureRecord::drawRRect(const SkRRect& rrect, const SkPaint& paint) {
     fMCMgr.call(SkMatrixClipStateMgr::kOther_CallType);
 #endif
 
-    if (rrect.isRect()) {
+    if (rrect.isRect() && kBeClever) {
         this->SkPictureRecord::drawRect(rrect.getBounds(), paint);
-    } else if (rrect.isOval()) {
+    } else if (rrect.isOval() && kBeClever) {
         this->SkPictureRecord::drawOval(rrect.getBounds(), paint);
     } else {
         // op + paint index + rrect
@@ -1089,7 +1098,7 @@ void SkPictureRecord::drawPath(const SkPath& path, const SkPaint& paint) {
 
 void SkPictureRecord::drawBitmap(const SkBitmap& bitmap, SkScalar left, SkScalar top,
                                  const SkPaint* paint = NULL) {
-    if (bitmap.drawsNothing()) {
+    if (bitmap.drawsNothing() && kBeClever) {
         return;
     }
 
@@ -1111,7 +1120,7 @@ void SkPictureRecord::drawBitmap(const SkBitmap& bitmap, SkScalar left, SkScalar
 void SkPictureRecord::drawBitmapRectToRect(const SkBitmap& bitmap, const SkRect* src,
                                            const SkRect& dst, const SkPaint* paint,
                                            DrawBitmapRectFlags flags) {
-    if (bitmap.drawsNothing()) {
+    if (bitmap.drawsNothing() && kBeClever) {
         return;
     }
 
@@ -1138,7 +1147,7 @@ void SkPictureRecord::drawBitmapRectToRect(const SkBitmap& bitmap, const SkRect*
 
 void SkPictureRecord::drawBitmapMatrix(const SkBitmap& bitmap, const SkMatrix& matrix,
                                        const SkPaint* paint) {
-    if (bitmap.drawsNothing()) {
+    if (bitmap.drawsNothing() && kBeClever) {
         return;
     }
 
@@ -1158,7 +1167,7 @@ void SkPictureRecord::drawBitmapMatrix(const SkBitmap& bitmap, const SkMatrix& m
 
 void SkPictureRecord::drawBitmapNine(const SkBitmap& bitmap, const SkIRect& center,
                                      const SkRect& dst, const SkPaint* paint) {
-    if (bitmap.drawsNothing()) {
+    if (bitmap.drawsNothing() && kBeClever) {
         return;
     }
 
@@ -1179,7 +1188,7 @@ void SkPictureRecord::drawBitmapNine(const SkBitmap& bitmap, const SkIRect& cent
 
 void SkPictureRecord::drawSprite(const SkBitmap& bitmap, int left, int top,
                                  const SkPaint* paint = NULL) {
-    if (bitmap.drawsNothing()) {
+    if (bitmap.drawsNothing() && kBeClever) {
         return;
     }
 
@@ -1224,7 +1233,7 @@ void SkPictureRecord::onDrawText(const void* text, size_t byteLength, SkScalar x
     fMCMgr.call(SkMatrixClipStateMgr::kOther_CallType);
 #endif
 
-    bool fast = !paint.isVerticalText() && paint.canComputeFastBounds();
+    bool fast = !paint.isVerticalText() && paint.canComputeFastBounds() && kBeClever;
 
     // op + paint index + length + 'length' worth of chars + x + y
     size_t size = 3 * kUInt32Size + SkAlign4(byteLength) + 2 * sizeof(SkScalar);
@@ -1275,8 +1284,8 @@ void SkPictureRecord::onDrawPosText(const void* text, size_t byteLength, const S
         }
     }
 
-    bool fastBounds = !paint.isVerticalText() && paint.canComputeFastBounds();
-    bool fast = canUseDrawH && fastBounds;
+    bool fastBounds = !paint.isVerticalText() && paint.canComputeFastBounds() && kBeClever;
+    bool fast = canUseDrawH && fastBounds && kBeClever;
 
     // op + paint index + length + 'length' worth of data + num points
     size_t size = 3 * kUInt32Size + SkAlign4(byteLength) + 1 * kUInt32Size;
@@ -1349,10 +1358,11 @@ void SkPictureRecord::drawPosTextHImpl(const void* text, size_t byteLength,
                           const SkScalar xpos[], SkScalar constY,
                           const SkPaint& paint, const SkFlatData* flatPaintData) {
     int points = paint.countText(text, byteLength);
-    if (0 == points)
+    if (0 == points && kBeClever) {
         return;
+    }
 
-    bool fast = !paint.isVerticalText() && paint.canComputeFastBounds();
+    bool fast = !paint.isVerticalText() && paint.canComputeFastBounds() && kBeClever;
 
     // op + paint index + length + 'length' worth of data + num points
     size_t size = 3 * kUInt32Size + SkAlign4(byteLength) + 1 * kUInt32Size;
@@ -1545,7 +1555,7 @@ void SkPictureRecord::onPopCull() {
     fCullOffsetStack.pop();
 
     // Collapse empty push/pop pairs.
-    if ((size_t)(cullSkipOffset + kUInt32Size) == fWriter.bytesWritten()) {
+    if ((size_t)(cullSkipOffset + kUInt32Size) == fWriter.bytesWritten() && kBeClever) {
         SkASSERT(fWriter.bytesWritten() >= kPushCullOpSize);
         SkASSERT(PUSH_CULL == peek_op(&fWriter, fWriter.bytesWritten() - kPushCullOpSize));
         fWriter.rewindToOffset(fWriter.bytesWritten() - kPushCullOpSize);
diff --git a/core/SkPictureShader.cpp b/core/SkPictureShader.cpp
index 466c5e12..9655e85b 100644
--- a/core/SkPictureShader.cpp
+++ b/core/SkPictureShader.cpp
@@ -18,8 +18,10 @@
 #include "GrContext.h"
 #endif
 
-SkPictureShader::SkPictureShader(SkPicture* picture, TileMode tmx, TileMode tmy)
-    : fPicture(SkRef(picture))
+SkPictureShader::SkPictureShader(SkPicture* picture, TileMode tmx, TileMode tmy,
+                                 const SkMatrix* localMatrix)
+    : INHERITED(localMatrix)
+    , fPicture(SkRef(picture))
     , fTmx(tmx)
     , fTmy(tmy) { }
 
@@ -34,11 +36,12 @@ SkPictureShader::~SkPictureShader() {
     fPicture->unref();
 }
 
-SkPictureShader* SkPictureShader::Create(SkPicture* picture, TileMode tmx, TileMode tmy) {
+SkPictureShader* SkPictureShader::Create(SkPicture* picture, TileMode tmx, TileMode tmy,
+                                         const SkMatrix* localMatrix) {
     if (!picture || 0 == picture->width() || 0 == picture->height()) {
         return NULL;
     }
-    return SkNEW_ARGS(SkPictureShader, (picture, tmx, tmy));
+    return SkNEW_ARGS(SkPictureShader, (picture, tmx, tmy, localMatrix));
 }
 
 void SkPictureShader::flatten(SkWriteBuffer& buffer) const {
@@ -49,7 +52,7 @@ void SkPictureShader::flatten(SkWriteBuffer& buffer) const {
     fPicture->flatten(buffer);
 }
 
-SkShader* SkPictureShader::refBitmapShader(const SkMatrix& matrix) const {
+SkShader* SkPictureShader::refBitmapShader(const SkMatrix& matrix, const SkMatrix* localM) const {
     SkASSERT(fPicture && fPicture->width() > 0 && fPicture->height() > 0);
 
     SkMatrix m;
@@ -58,6 +61,9 @@ SkShader* SkPictureShader::refBitmapShader(const SkMatrix& matrix) const {
     } else {
         m = matrix;
     }
+    if (localM) {
+        m.preConcat(*localM);
+    }
 
     // Use a rotation-invariant scale
     SkPoint scale;
@@ -79,6 +85,7 @@ SkShader* SkPictureShader::refBitmapShader(const SkMatrix& matrix) const {
 
     SkAutoMutexAcquire ama(fCachedBitmapShaderMutex);
 
+    // TODO(fmalita): remove fCachedLocalMatrix from this key after getLocalMatrix is removed.
     if (!fCachedBitmapShader || tileScale != fCachedTileScale ||
         this->getLocalMatrix() != fCachedLocalMatrix) {
         SkBitmap bm;
@@ -106,64 +113,55 @@ SkShader* SkPictureShader::refBitmapShader(const SkMatrix& matrix) const {
     return fCachedBitmapShader;
 }
 
-SkShader* SkPictureShader::validInternal(const SkBitmap& device, const SkPaint& paint,
-                                         const SkMatrix& matrix, SkMatrix* totalInverse) const {
-    if (!this->INHERITED::validContext(device, paint, matrix, totalInverse)) {
-        return NULL;
-    }
+size_t SkPictureShader::contextSize() const {
+    return sizeof(PictureShaderContext);
+}
 
-    SkAutoTUnref<SkShader> bitmapShader(this->refBitmapShader(matrix));
-    if (!bitmapShader || !bitmapShader->validContext(device, paint, matrix)) {
+SkShader::Context* SkPictureShader::onCreateContext(const ContextRec& rec, void* storage) const {
+    SkAutoTUnref<SkShader> bitmapShader(this->refBitmapShader(*rec.fMatrix, rec.fLocalMatrix));
+    if (NULL == bitmapShader.get()) {
         return NULL;
     }
-
-    return bitmapShader.detach();
+    return PictureShaderContext::Create(storage, *this, rec, bitmapShader);
 }
 
-bool SkPictureShader::validContext(const SkBitmap& device, const SkPaint& paint,
-                                   const SkMatrix& matrix, SkMatrix* totalInverse) const {
-    SkAutoTUnref<SkShader> shader(this->validInternal(device, paint, matrix, totalInverse));
-    return shader != NULL;
-}
+/////////////////////////////////////////////////////////////////////////////////////////
 
-SkShader::Context* SkPictureShader::createContext(const SkBitmap& device, const SkPaint& paint,
-                                                  const SkMatrix& matrix, void* storage) const {
-    SkAutoTUnref<SkShader> bitmapShader(this->validInternal(device, paint, matrix, NULL));
-    if (!bitmapShader) {
-        return NULL;
+SkShader::Context* SkPictureShader::PictureShaderContext::Create(void* storage,
+                   const SkPictureShader& shader, const ContextRec& rec, SkShader* bitmapShader) {
+    PictureShaderContext* ctx = SkNEW_PLACEMENT_ARGS(storage, PictureShaderContext,
+                                                     (shader, rec, bitmapShader));
+    if (NULL == ctx->fBitmapShaderContext) {
+        ctx->~PictureShaderContext();
+        ctx = NULL;
     }
-
-    return SkNEW_PLACEMENT_ARGS(storage, PictureShaderContext,
-                                (*this, device, paint, matrix, bitmapShader.detach()));
-}
-
-size_t SkPictureShader::contextSize() const {
-    return sizeof(PictureShaderContext);
+    return ctx;
 }
 
 SkPictureShader::PictureShaderContext::PictureShaderContext(
-        const SkPictureShader& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix, SkShader* bitmapShader)
-    : INHERITED(shader, device, paint, matrix)
-    , fBitmapShader(bitmapShader)
+        const SkPictureShader& shader, const ContextRec& rec, SkShader* bitmapShader)
+    : INHERITED(shader, rec)
+    , fBitmapShader(SkRef(bitmapShader))
 {
-    SkASSERT(fBitmapShader);
-    fBitmapShaderContextStorage = sk_malloc_throw(fBitmapShader->contextSize());
-    fBitmapShaderContext = fBitmapShader->createContext(
-            device, paint, matrix, fBitmapShaderContextStorage);
-    SkASSERT(fBitmapShaderContext);
+    fBitmapShaderContextStorage = sk_malloc_throw(bitmapShader->contextSize());
+    fBitmapShaderContext = bitmapShader->createContext(rec, fBitmapShaderContextStorage);
+    //if fBitmapShaderContext is null, we are invalid
 }
 
 SkPictureShader::PictureShaderContext::~PictureShaderContext() {
-    fBitmapShaderContext->~Context();
+    if (fBitmapShaderContext) {
+        fBitmapShaderContext->~Context();
+    }
     sk_free(fBitmapShaderContextStorage);
 }
 
 uint32_t SkPictureShader::PictureShaderContext::getFlags() const {
+    SkASSERT(fBitmapShaderContext);
     return fBitmapShaderContext->getFlags();
 }
 
 SkShader::Context::ShadeProc SkPictureShader::PictureShaderContext::asAShadeProc(void** ctx) {
+    SkASSERT(fBitmapShaderContext);
     return fBitmapShaderContext->asAShadeProc(ctx);
 }
 
@@ -195,7 +193,7 @@ void SkPictureShader::toString(SkString* str) const {
 
 #if SK_SUPPORT_GPU
 GrEffectRef* SkPictureShader::asNewEffect(GrContext* context, const SkPaint& paint) const {
-    SkAutoTUnref<SkShader> bitmapShader(this->refBitmapShader(context->getMatrix()));
+    SkAutoTUnref<SkShader> bitmapShader(this->refBitmapShader(context->getMatrix(), NULL));
     if (!bitmapShader) {
         return NULL;
     }
diff --git a/core/SkPictureShader.h b/core/SkPictureShader.h
index d1be0591..27fb674b 100644
--- a/core/SkPictureShader.h
+++ b/core/SkPictureShader.h
@@ -21,37 +21,11 @@ class SkPicture;
  */
 class SkPictureShader : public SkShader {
 public:
-    static SkPictureShader* Create(SkPicture*, TileMode, TileMode);
+    static SkPictureShader* Create(SkPicture*, TileMode, TileMode, const SkMatrix* = NULL);
     virtual ~SkPictureShader();
 
-    virtual bool validContext(const SkBitmap&, const SkPaint&,
-                              const SkMatrix&, SkMatrix* totalInverse = NULL) const SK_OVERRIDE;
-    virtual SkShader::Context* createContext(const SkBitmap& device, const SkPaint& paint,
-                                             const SkMatrix& matrix, void* storage) const
-            SK_OVERRIDE;
     virtual size_t contextSize() const SK_OVERRIDE;
 
-    class PictureShaderContext : public SkShader::Context {
-    public:
-        PictureShaderContext(const SkPictureShader& shader, const SkBitmap& device,
-                             const SkPaint& paint, const SkMatrix& matrix,
-                             SkShader* bitmapShader);
-        virtual ~PictureShaderContext();
-
-        virtual uint32_t getFlags() const SK_OVERRIDE;
-
-        virtual ShadeProc asAShadeProc(void** ctx) SK_OVERRIDE;
-        virtual void shadeSpan(int x, int y, SkPMColor dstC[], int count) SK_OVERRIDE;
-        virtual void shadeSpan16(int x, int y, uint16_t dstC[], int count) SK_OVERRIDE;
-
-    private:
-        SkAutoTUnref<SkShader>  fBitmapShader;
-        SkShader::Context*      fBitmapShaderContext;
-        void*                   fBitmapShaderContextStorage;
-
-        typedef SkShader::Context INHERITED;
-    };
-
     SK_TO_STRING_OVERRIDE()
     SK_DECLARE_PUBLIC_FLATTENABLE_DESERIALIZATION_PROCS(SkPictureShader)
 
@@ -62,14 +36,12 @@ public:
 protected:
     SkPictureShader(SkReadBuffer&);
     virtual void flatten(SkWriteBuffer&) const SK_OVERRIDE;
+    virtual Context* onCreateContext(const ContextRec&, void* storage) const SK_OVERRIDE;
 
 private:
-    SkPictureShader(SkPicture*, TileMode, TileMode);
+    SkPictureShader(SkPicture*, TileMode, TileMode, const SkMatrix* = NULL);
 
-    SkShader* validInternal(const SkBitmap& device, const SkPaint& paint,
-                            const SkMatrix& matrix, SkMatrix* totalInverse) const;
-
-    SkShader* refBitmapShader(const SkMatrix&) const;
+    SkShader* refBitmapShader(const SkMatrix&, const SkMatrix* localMatrix) const;
 
     SkPicture*  fPicture;
     TileMode    fTmx, fTmy;
@@ -79,6 +51,29 @@ private:
     mutable SkSize                  fCachedTileScale;
     mutable SkMatrix                fCachedLocalMatrix;
 
+    class PictureShaderContext : public SkShader::Context {
+    public:
+        static Context* Create(void* storage, const SkPictureShader&, const ContextRec&,
+                               SkShader* bitmapShader);
+
+        virtual ~PictureShaderContext();
+
+        virtual uint32_t getFlags() const SK_OVERRIDE;
+
+        virtual ShadeProc asAShadeProc(void** ctx) SK_OVERRIDE;
+        virtual void shadeSpan(int x, int y, SkPMColor dstC[], int count) SK_OVERRIDE;
+        virtual void shadeSpan16(int x, int y, uint16_t dstC[], int count) SK_OVERRIDE;
+
+    private:
+        PictureShaderContext(const SkPictureShader&, const ContextRec&, SkShader* bitmapShader);
+
+        SkAutoTUnref<SkShader>  fBitmapShader;
+        SkShader::Context*      fBitmapShaderContext;
+        void*                   fBitmapShaderContextStorage;
+
+        typedef SkShader::Context INHERITED;
+    };
+
     typedef SkShader INHERITED;
 };
 
diff --git a/core/SkScan_Path.cpp b/core/SkScan_Path.cpp
index 66e95076..b32d68e7 100644
--- a/core/SkScan_Path.cpp
+++ b/core/SkScan_Path.cpp
@@ -602,7 +602,11 @@ void SkScan::FillPath(const SkPath& path, const SkRegion& origClip,
         // don't reference "origClip" any more, just use clipPtr
 
     SkIRect ir;
-    path.getBounds().round(&ir);
+    // We deliberately call dround() instead of round(), since we can't afford to generate a
+    // bounds that is tighter than the corresponding SkEdges. The edge code basically converts
+    // the floats to fixed, and then "rounds". If we called round() instead of dround() here,
+    // we could generate the wrong ir for values like 0.4999997.
+    path.getBounds().dround(&ir);
     if (ir.isEmpty()) {
         if (path.isInverseFillType()) {
             blitter->blitRegion(*clipPtr);
diff --git a/core/SkShader.cpp b/core/SkShader.cpp
index 4ddd2915..0c954f86 100644
--- a/core/SkShader.cpp
+++ b/core/SkShader.cpp
@@ -6,6 +6,7 @@
  */
 
 #include "SkBitmapProcShader.h"
+#include "SkEmptyShader.h"
 #include "SkReadBuffer.h"
 #include "SkMallocPixelRef.h"
 #include "SkPaint.h"
@@ -44,37 +45,45 @@ void SkShader::flatten(SkWriteBuffer& buffer) const {
     }
 }
 
-bool SkShader::computeTotalInverse(const SkMatrix& matrix, SkMatrix* totalInverse) const {
-    const SkMatrix* m = &matrix;
+bool SkShader::computeTotalInverse(const ContextRec& rec, SkMatrix* totalInverse) const {
+    const SkMatrix* m = rec.fMatrix;
     SkMatrix        total;
 
     if (this->hasLocalMatrix()) {
-        total.setConcat(matrix, this->getLocalMatrix());
+        total.setConcat(*m, this->getLocalMatrix());
+        m = &total;
+    }
+    if (rec.fLocalMatrix) {
+        total.setConcat(*m, *rec.fLocalMatrix);
         m = &total;
     }
-
     return m->invert(totalInverse);
 }
 
-bool SkShader::validContext(const SkBitmap& device,
-                            const SkPaint& paint,
-                            const SkMatrix& matrix,
-                            SkMatrix* totalInverse) const {
-    return this->computeTotalInverse(matrix, totalInverse);
+SkShader::Context* SkShader::createContext(const ContextRec& rec, void* storage) const {
+    if (!this->computeTotalInverse(rec, NULL)) {
+        return NULL;
+    }
+    return this->onCreateContext(rec, storage);
 }
 
-SkShader::Context::Context(const SkShader& shader, const SkBitmap& device,
-                           const SkPaint& paint, const SkMatrix& matrix)
-    : fShader(shader)
-{
-    SkASSERT(fShader.validContext(device, paint, matrix));
+SkShader::Context* SkShader::onCreateContext(const ContextRec& rec, void*) const {
+    return NULL;
+}
 
+size_t SkShader::contextSize() const {
+    return 0;
+}
+
+SkShader::Context::Context(const SkShader& shader, const ContextRec& rec)
+    : fShader(shader), fCTM(*rec.fMatrix)
+{
     // Because the context parameters must be valid at this point, we know that the matrix is
     // invertible.
-    SkAssertResult(fShader.computeTotalInverse(matrix, &fTotalInverse));
+    SkAssertResult(fShader.computeTotalInverse(rec, &fTotalInverse));
     fTotalInverseClass = (uint8_t)ComputeMatrixClass(fTotalInverse);
 
-    fPaintAlpha = paint.getAlpha();
+    fPaintAlpha = rec.fPaint->getAlpha();
 }
 
 SkShader::Context::~Context() {}
@@ -184,13 +193,18 @@ GrEffectRef* SkShader::asNewEffect(GrContext*, const SkPaint&) const {
     return NULL;
 }
 
+SkShader* SkShader::CreateEmptyShader() {
+    return SkNEW(SkEmptyShader);
+}
+
 SkShader* SkShader::CreateBitmapShader(const SkBitmap& src, TileMode tmx, TileMode tmy,
                                        const SkMatrix* localMatrix) {
     return ::CreateBitmapShader(src, tmx, tmy, localMatrix, NULL);
 }
 
-SkShader* SkShader::CreatePictureShader(SkPicture* src, TileMode tmx, TileMode tmy) {
-    return SkPictureShader::Create(src, tmx, tmy);
+SkShader* SkShader::CreatePictureShader(SkPicture* src, TileMode tmx, TileMode tmy,
+                                       const SkMatrix* localMatrix) {
+    return SkPictureShader::Create(src, tmx, tmy, localMatrix);
 }
 
 #ifndef SK_IGNORE_TO_STRING
@@ -241,23 +255,16 @@ uint8_t SkColorShader::ColorShaderContext::getSpan16Alpha() const {
     return SkGetPackedA32(fPMColor);
 }
 
-SkShader::Context* SkColorShader::createContext(const SkBitmap& device, const SkPaint& paint,
-                                                const SkMatrix& matrix, void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
-        return NULL;
-    }
-
-    return SkNEW_PLACEMENT_ARGS(storage, ColorShaderContext, (*this, device, paint, matrix));
+SkShader::Context* SkColorShader::onCreateContext(const ContextRec& rec, void* storage) const {
+    return SkNEW_PLACEMENT_ARGS(storage, ColorShaderContext, (*this, rec));
 }
 
 SkColorShader::ColorShaderContext::ColorShaderContext(const SkColorShader& shader,
-                                                      const SkBitmap& device,
-                                                      const SkPaint& paint,
-                                                      const SkMatrix& matrix)
-    : INHERITED(shader, device, paint, matrix)
+                                                      const ContextRec& rec)
+    : INHERITED(shader, rec)
 {
     SkColor color = shader.fColor;
-    unsigned a = SkAlphaMul(SkColorGetA(color), SkAlpha255To256(paint.getAlpha()));
+    unsigned a = SkAlphaMul(SkColorGetA(color), SkAlpha255To256(rec.fPaint->getAlpha()));
 
     unsigned r = SkColorGetR(color);
     unsigned g = SkColorGetG(color);
@@ -276,7 +283,7 @@ SkColorShader::ColorShaderContext::ColorShaderContext(const SkColorShader& shade
     fFlags = kConstInY32_Flag;
     if (255 == a) {
         fFlags |= kOpaqueAlpha_Flag;
-        if (paint.isDither() == false) {
+        if (rec.fPaint->isDither() == false) {
             fFlags |= kHasSpan16_Flag;
         }
     }
diff --git a/core/SkUtils.cpp b/core/SkUtils.cpp
index e460ac8f..76da23a6 100644
--- a/core/SkUtils.cpp
+++ b/core/SkUtils.cpp
@@ -8,6 +8,7 @@
 
 
 #include "SkUtils.h"
+#include "SkOnce.h"
 
 #if 0
 #define assign_16_longs(dst, value)             \
@@ -37,7 +38,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-void sk_memset16_portable(uint16_t dst[], uint16_t value, int count) {
+static void sk_memset16_portable(uint16_t dst[], uint16_t value, int count) {
     SkASSERT(dst != NULL && count >= 0);
 
     if (count <= 0) {
@@ -90,7 +91,7 @@ void sk_memset16_portable(uint16_t dst[], uint16_t value, int count) {
     }
 }
 
-void sk_memset32_portable(uint32_t dst[], uint32_t value, int count) {
+static void sk_memset32_portable(uint32_t dst[], uint32_t value, int count) {
     SkASSERT(dst != NULL && count >= 0);
 
     int sixteenlongs = count >> 4;
@@ -108,21 +109,37 @@ void sk_memset32_portable(uint32_t dst[], uint32_t value, int count) {
     }
 }
 
-static void sk_memset16_stub(uint16_t dst[], uint16_t value, int count) {
-    SkMemset16Proc proc = SkMemset16GetPlatformProc();
-    sk_memset16 = proc ? proc : sk_memset16_portable;
-    sk_memset16(dst, value, count);
+static void choose_memset16(SkMemset16Proc* proc) {
+    *proc = SkMemset16GetPlatformProc();
+    if (NULL == *proc) {
+        *proc = &sk_memset16_portable;
+    }
 }
 
-SkMemset16Proc sk_memset16 = sk_memset16_stub;
+void sk_memset16(uint16_t dst[], uint16_t value, int count) {
+    SK_DECLARE_STATIC_ONCE(once);
+    static SkMemset16Proc proc = NULL;
+    SkOnce(&once, choose_memset16, &proc);
+    SkASSERT(proc != NULL);
+
+    return proc(dst, value, count);
+}
 
-static void sk_memset32_stub(uint32_t dst[], uint32_t value, int count) {
-    SkMemset32Proc proc = SkMemset32GetPlatformProc();
-    sk_memset32 = proc ? proc : sk_memset32_portable;
-    sk_memset32(dst, value, count);
+static void choose_memset32(SkMemset32Proc* proc) {
+    *proc = SkMemset32GetPlatformProc();
+    if (NULL == *proc) {
+        *proc = &sk_memset32_portable;
+    }
 }
 
-SkMemset32Proc sk_memset32 = sk_memset32_stub;
+void sk_memset32(uint32_t dst[], uint32_t value, int count) {
+    SK_DECLARE_STATIC_ONCE(once);
+    static SkMemset32Proc proc = NULL;
+    SkOnce(&once, choose_memset32, &proc);
+    SkASSERT(proc != NULL);
+
+    return proc(dst, value, count);
+}
 
 ///////////////////////////////////////////////////////////////////////////////
 
diff --git a/core/SkXfermode.cpp b/core/SkXfermode.cpp
index 53c431b7..182d3b71 100644
--- a/core/SkXfermode.cpp
+++ b/core/SkXfermode.cpp
@@ -774,118 +774,6 @@ void SkXfermode::xferA8(SkAlpha* SK_RESTRICT dst,
     }
 }
 
-///////////////////////////////////////////////////////////////////////////////
-#ifdef SK_SUPPORT_LEGACY_PROCXFERMODE
-
-void SkProcXfermode::xfer32(SkPMColor* SK_RESTRICT dst,
-                            const SkPMColor* SK_RESTRICT src, int count,
-                            const SkAlpha* SK_RESTRICT aa) const {
-    SkASSERT(dst && src && count >= 0);
-
-    SkXfermodeProc proc = fProc;
-
-    if (NULL != proc) {
-        if (NULL == aa) {
-            for (int i = count - 1; i >= 0; --i) {
-                dst[i] = proc(src[i], dst[i]);
-            }
-        } else {
-            for (int i = count - 1; i >= 0; --i) {
-                unsigned a = aa[i];
-                if (0 != a) {
-                    SkPMColor dstC = dst[i];
-                    SkPMColor C = proc(src[i], dstC);
-                    if (a != 0xFF) {
-                        C = SkFourByteInterp(C, dstC, a);
-                    }
-                    dst[i] = C;
-                }
-            }
-        }
-    }
-}
-
-void SkProcXfermode::xfer16(uint16_t* SK_RESTRICT dst,
-                            const SkPMColor* SK_RESTRICT src, int count,
-                            const SkAlpha* SK_RESTRICT aa) const {
-    SkASSERT(dst && src && count >= 0);
-
-    SkXfermodeProc proc = fProc;
-
-    if (NULL != proc) {
-        if (NULL == aa) {
-            for (int i = count - 1; i >= 0; --i) {
-                SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
-                dst[i] = SkPixel32ToPixel16_ToU16(proc(src[i], dstC));
-            }
-        } else {
-            for (int i = count - 1; i >= 0; --i) {
-                unsigned a = aa[i];
-                if (0 != a) {
-                    SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
-                    SkPMColor C = proc(src[i], dstC);
-                    if (0xFF != a) {
-                        C = SkFourByteInterp(C, dstC, a);
-                    }
-                    dst[i] = SkPixel32ToPixel16_ToU16(C);
-                }
-            }
-        }
-    }
-}
-
-void SkProcXfermode::xferA8(SkAlpha* SK_RESTRICT dst,
-                            const SkPMColor* SK_RESTRICT src, int count,
-                            const SkAlpha* SK_RESTRICT aa) const {
-    SkASSERT(dst && src && count >= 0);
-
-    SkXfermodeProc proc = fProc;
-
-    if (NULL != proc) {
-        if (NULL == aa) {
-            for (int i = count - 1; i >= 0; --i) {
-                SkPMColor res = proc(src[i], dst[i] << SK_A32_SHIFT);
-                dst[i] = SkToU8(SkGetPackedA32(res));
-            }
-        } else {
-            for (int i = count - 1; i >= 0; --i) {
-                unsigned a = aa[i];
-                if (0 != a) {
-                    SkAlpha dstA = dst[i];
-                    SkPMColor res = proc(src[i], dstA << SK_A32_SHIFT);
-                    unsigned A = SkGetPackedA32(res);
-                    if (0xFF != a) {
-                        A = SkAlphaBlend(A, dstA, SkAlpha255To256(a));
-                    }
-                    dst[i] = SkToU8(A);
-                }
-            }
-        }
-    }
-}
-
-SkProcXfermode::SkProcXfermode(SkReadBuffer& buffer)
-        : SkXfermode(buffer) {
-    fProc = NULL;
-    if (!buffer.isCrossProcess()) {
-        fProc = (SkXfermodeProc)buffer.readFunctionPtr();
-    }
-}
-
-void SkProcXfermode::flatten(SkWriteBuffer& buffer) const {
-    this->INHERITED::flatten(buffer);
-    if (!buffer.isCrossProcess()) {
-        buffer.writeFunctionPtr((void*)fProc);
-    }
-}
-
-#ifndef SK_IGNORE_TO_STRING
-void SkProcXfermode::toString(SkString* str) const {
-    str->appendf("SkProcXfermode: %p", fProc);
-}
-#endif
-
-#endif
 //////////////////////////////////////////////////////////////////////////////
 
 #if SK_SUPPORT_GPU
@@ -1076,7 +964,7 @@ public:
                     break;
                 }
                 default:
-                    GrCrash("Unknown XferEffect mode.");
+                    SkFAIL("Unknown XferEffect mode.");
                     break;
             }
         }
diff --git a/device/xps/SkXPSDevice.cpp b/device/xps/SkXPSDevice.cpp
index 389db1db..62161df2 100644
--- a/device/xps/SkXPSDevice.cpp
+++ b/device/xps/SkXPSDevice.cpp
@@ -2253,7 +2253,7 @@ static void text_draw_init(const SkPaint& paint,
             numGlyphGuess = byteLength / 2;
             break;
         default:
-            SK_DEBUGBREAK(true);
+            SK_ALWAYSBREAK(true);
     }
     procs.xpsGlyphs.setReserve(numGlyphGuess);
     procs.glyphUse = &glyphsUsed;
diff --git a/effects/SkBlurMaskFilter.cpp b/effects/SkBlurMaskFilter.cpp
index 5dffd6fd..2169a42c 100644
--- a/effects/SkBlurMaskFilter.cpp
+++ b/effects/SkBlurMaskFilter.cpp
@@ -28,6 +28,10 @@
 #include "SkDraw.h"
 #endif
 
+SkScalar SkBlurMaskFilter::ConvertRadiusToSigma(SkScalar radius) {
+    return SkBlurMask::ConvertRadiusToSigma(radius);
+}
+
 class SkBlurMaskFilterImpl : public SkMaskFilter {
 public:
     SkBlurMaskFilterImpl(SkScalar sigma, SkBlurStyle, uint32_t flags);
diff --git a/effects/SkColorFilters.cpp b/effects/SkColorFilters.cpp
index 65766c1f..81d70a7f 100644
--- a/effects/SkColorFilters.cpp
+++ b/effects/SkColorFilters.cpp
@@ -145,7 +145,7 @@ static inline ColorExpr blend_term(SkXfermode::Coeff coeff,
                                    const ColorExpr& value) {
     switch (coeff) {
     default:
-        GrCrash("Unexpected xfer coeff.");
+        SkFAIL("Unexpected xfer coeff.");
     case SkXfermode::kZero_Coeff:    /** 0 */
         return ColorExpr(0);
     case SkXfermode::kOne_Coeff:     /** 1 */
diff --git a/effects/SkLightingImageFilter.cpp b/effects/SkLightingImageFilter.cpp
index b485ae1b..24fdd0e9 100644
--- a/effects/SkLightingImageFilter.cpp
+++ b/effects/SkLightingImageFilter.cpp
@@ -1115,7 +1115,7 @@ SkLight* create_random_light(SkRandom* random) {
                                             random->nextU()));
         }
         default:
-            GrCrash();
+            SkFAIL("Unexpected value.");
             return NULL;
     }
 }
diff --git a/effects/SkMatrixConvolutionImageFilter.cpp b/effects/SkMatrixConvolutionImageFilter.cpp
index 3c9fc877..f6bc6a1f 100644
--- a/effects/SkMatrixConvolutionImageFilter.cpp
+++ b/effects/SkMatrixConvolutionImageFilter.cpp
@@ -151,8 +151,12 @@ public:
 template<class PixelFetcher, bool convolveAlpha>
 void SkMatrixConvolutionImageFilter::filterPixels(const SkBitmap& src,
                                                   SkBitmap* result,
-                                                  const SkIRect& rect,
+                                                  const SkIRect& r,
                                                   const SkIRect& bounds) const {
+    SkIRect rect(r);
+    if (!rect.intersect(bounds)) {
+        return;
+    }
     for (int y = rect.fTop; y < rect.fBottom; ++y) {
         SkPMColor* dptr = result->getAddr32(rect.fLeft - bounds.fLeft, y - bounds.fTop);
         for (int x = rect.fLeft; x < rect.fRight; ++x) {
diff --git a/effects/SkMorphologyImageFilter.cpp b/effects/SkMorphologyImageFilter.cpp
index 2350e6c2..8803f8aa 100644
--- a/effects/SkMorphologyImageFilter.cpp
+++ b/effects/SkMorphologyImageFilter.cpp
@@ -369,7 +369,7 @@ void GrGLMorphologyEffect::emitCode(GrGLShaderBuilder* builder,
             func = "max";
             break;
         default:
-            GrCrash("Unexpected type");
+            SkFAIL("Unexpected type");
             func = ""; // suppress warning
             break;
     }
@@ -410,7 +410,7 @@ void GrGLMorphologyEffect::setData(const GrGLUniformManager& uman,
             imageIncrement[1] = 1.0f / texture.height();
             break;
         default:
-            GrCrash("Unknown filter direction.");
+            SkFAIL("Unknown filter direction.");
     }
     uman.set2fv(fImageIncrementUni, 1, imageIncrement);
 }
diff --git a/effects/SkPerlinNoiseShader.cpp b/effects/SkPerlinNoiseShader.cpp
index 5adb5825..c6d61180 100644
--- a/effects/SkPerlinNoiseShader.cpp
+++ b/effects/SkPerlinNoiseShader.cpp
@@ -425,13 +425,9 @@ SkPMColor SkPerlinNoiseShader::PerlinNoiseShaderContext::shade(
     return SkPreMultiplyARGB(rgba[3], rgba[0], rgba[1], rgba[2]);
 }
 
-SkShader::Context* SkPerlinNoiseShader::createContext(const SkBitmap& device, const SkPaint& paint,
-                                                      const SkMatrix& matrix, void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
-        return NULL;
-    }
-
-    return SkNEW_PLACEMENT_ARGS(storage, PerlinNoiseShaderContext, (*this, device, paint, matrix));
+SkShader::Context* SkPerlinNoiseShader::onCreateContext(const ContextRec& rec,
+                                                        void* storage) const {
+    return SkNEW_PLACEMENT_ARGS(storage, PerlinNoiseShaderContext, (*this, rec));
 }
 
 size_t SkPerlinNoiseShader::contextSize() const {
@@ -439,11 +435,10 @@ size_t SkPerlinNoiseShader::contextSize() const {
 }
 
 SkPerlinNoiseShader::PerlinNoiseShaderContext::PerlinNoiseShaderContext(
-        const SkPerlinNoiseShader& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix)
-    : INHERITED(shader, device, paint, matrix)
+        const SkPerlinNoiseShader& shader, const ContextRec& rec)
+    : INHERITED(shader, rec)
 {
-    SkMatrix newMatrix = matrix;
+    SkMatrix newMatrix = *rec.fMatrix;
     newMatrix.postConcat(shader.getLocalMatrix());
     SkMatrix invMatrix;
     if (!newMatrix.invert(&invMatrix)) {
diff --git a/effects/SkTransparentShader.cpp b/effects/SkTransparentShader.cpp
index 0997e620..f290d0dc 100644
--- a/effects/SkTransparentShader.cpp
+++ b/effects/SkTransparentShader.cpp
@@ -11,15 +11,9 @@
 #include "SkColorPriv.h"
 #include "SkString.h"
 
-SkShader::Context* SkTransparentShader::createContext(const SkBitmap& device,
-                                                      const SkPaint& paint,
-                                                      const SkMatrix& matrix,
-                                                      void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
-        return NULL;
-    }
-
-    return SkNEW_PLACEMENT_ARGS(storage, TransparentShaderContext, (*this, device, paint, matrix));
+SkShader::Context* SkTransparentShader::onCreateContext(const ContextRec& rec,
+                                                        void* storage) const {
+    return SkNEW_PLACEMENT_ARGS(storage, TransparentShaderContext, (*this, rec));
 }
 
 size_t SkTransparentShader::contextSize() const {
@@ -27,10 +21,9 @@ size_t SkTransparentShader::contextSize() const {
 }
 
 SkTransparentShader::TransparentShaderContext::TransparentShaderContext(
-        const SkTransparentShader& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix)
-    : INHERITED(shader, device, paint, matrix)
-    , fDevice(&device) {}
+        const SkTransparentShader& shader, const ContextRec& rec)
+    : INHERITED(shader, rec)
+    , fDevice(rec.fDevice) {}
 
 SkTransparentShader::TransparentShaderContext::~TransparentShaderContext() {}
 
diff --git a/effects/gradients/SkGradientShader.cpp b/effects/gradients/SkGradientShader.cpp
index 6d753a95..d376b222 100644
--- a/effects/gradients/SkGradientShader.cpp
+++ b/effects/gradients/SkGradientShader.cpp
@@ -253,9 +253,8 @@ bool SkGradientShaderBase::isOpaque() const {
 }
 
 SkGradientShaderBase::GradientShaderBaseContext::GradientShaderBaseContext(
-        const SkGradientShaderBase& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix)
-    : INHERITED(shader, device, paint, matrix)
+        const SkGradientShaderBase& shader, const ContextRec& rec)
+    : INHERITED(shader, rec)
     , fCache(shader.refCache(getPaintAlpha()))
 {
     const SkMatrix& inverse = this->getTotalInverse();
diff --git a/effects/gradients/SkGradientShaderPriv.h b/effects/gradients/SkGradientShaderPriv.h
index c1e253fd..a699c4ce 100644
--- a/effects/gradients/SkGradientShaderPriv.h
+++ b/effects/gradients/SkGradientShaderPriv.h
@@ -142,9 +142,7 @@ public:
 
     class GradientShaderBaseContext : public SkShader::Context {
     public:
-        GradientShaderBaseContext(const SkGradientShaderBase& shader, const SkBitmap& device,
-                                  const SkPaint& paint, const SkMatrix& matrix);
-        ~GradientShaderBaseContext() {}
+        GradientShaderBaseContext(const SkGradientShaderBase& shader, const ContextRec&);
 
         virtual uint32_t getFlags() const SK_OVERRIDE { return fFlags; }
 
diff --git a/effects/gradients/SkLinearGradient.cpp b/effects/gradients/SkLinearGradient.cpp
index 70bbbf3b..f37759c1 100644
--- a/effects/gradients/SkLinearGradient.cpp
+++ b/effects/gradients/SkLinearGradient.cpp
@@ -76,24 +76,18 @@ size_t SkLinearGradient::contextSize() const {
     return sizeof(LinearGradientContext);
 }
 
-SkShader::Context* SkLinearGradient::createContext(const SkBitmap& device, const SkPaint& paint,
-                                                   const SkMatrix& matrix, void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
-        return NULL;
-    }
-
-    return SkNEW_PLACEMENT_ARGS(storage, LinearGradientContext, (*this, device, paint, matrix));
+SkShader::Context* SkLinearGradient::onCreateContext(const ContextRec& rec, void* storage) const {
+    return SkNEW_PLACEMENT_ARGS(storage, LinearGradientContext, (*this, rec));
 }
 
 SkLinearGradient::LinearGradientContext::LinearGradientContext(
-        const SkLinearGradient& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix)
-    : INHERITED(shader, device, paint, matrix)
+        const SkLinearGradient& shader, const ContextRec& rec)
+    : INHERITED(shader, rec)
 {
     unsigned mask = SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask;
     if ((fDstToIndex.getType() & ~mask) == 0) {
         // when we dither, we are (usually) not const-in-Y
-        if ((fFlags & SkShader::kHasSpan16_Flag) && !paint.isDither()) {
+        if ((fFlags & SkShader::kHasSpan16_Flag) && !rec.fPaint->isDither()) {
             // only claim this if we do have a 16bit mode (i.e. none of our
             // colors have alpha), and if we are not dithering (which obviously
             // is not const in Y).
diff --git a/effects/gradients/SkLinearGradient.h b/effects/gradients/SkLinearGradient.h
index 699d76ed..e892fe33 100644
--- a/effects/gradients/SkLinearGradient.h
+++ b/effects/gradients/SkLinearGradient.h
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2012 Google Inc.
  *
@@ -15,14 +14,11 @@ class SkLinearGradient : public SkGradientShaderBase {
 public:
     SkLinearGradient(const SkPoint pts[2], const Descriptor&, const SkMatrix* localMatrix);
 
-    virtual SkShader::Context* createContext(const SkBitmap&, const SkPaint&, const SkMatrix&,
-                                             void* storage) const SK_OVERRIDE;
     virtual size_t contextSize() const SK_OVERRIDE;
 
     class LinearGradientContext : public SkGradientShaderBase::GradientShaderBaseContext {
     public:
-        LinearGradientContext(const SkLinearGradient& shader, const SkBitmap& device,
-                              const SkPaint& paint, const SkMatrix& matrix);
+        LinearGradientContext(const SkLinearGradient&, const ContextRec&);
         ~LinearGradientContext() {}
 
         virtual void shadeSpan(int x, int y, SkPMColor dstC[], int count) SK_OVERRIDE;
@@ -42,6 +38,7 @@ public:
 protected:
     SkLinearGradient(SkReadBuffer& buffer);
     virtual void flatten(SkWriteBuffer& buffer) const SK_OVERRIDE;
+    virtual Context* onCreateContext(const ContextRec&, void* storage) const SK_OVERRIDE;
 
 private:
     typedef SkGradientShaderBase INHERITED;
diff --git a/effects/gradients/SkRadialGradient.cpp b/effects/gradients/SkRadialGradient.cpp
index f13d55c6..e379f36c 100644
--- a/effects/gradients/SkRadialGradient.cpp
+++ b/effects/gradients/SkRadialGradient.cpp
@@ -161,19 +161,13 @@ size_t SkRadialGradient::contextSize() const {
     return sizeof(RadialGradientContext);
 }
 
-SkShader::Context* SkRadialGradient::createContext(const SkBitmap& device, const SkPaint& paint,
-                                                   const SkMatrix& matrix, void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
-        return NULL;
-    }
-
-    return SkNEW_PLACEMENT_ARGS(storage, RadialGradientContext, (*this, device, paint, matrix));
+SkShader::Context* SkRadialGradient::onCreateContext(const ContextRec& rec, void* storage) const {
+    return SkNEW_PLACEMENT_ARGS(storage, RadialGradientContext, (*this, rec));
 }
 
 SkRadialGradient::RadialGradientContext::RadialGradientContext(
-        const SkRadialGradient& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix)
-    : INHERITED(shader, device, paint, matrix) {}
+        const SkRadialGradient& shader, const ContextRec& rec)
+    : INHERITED(shader, rec) {}
 
 void SkRadialGradient::RadialGradientContext::shadeSpan16(int x, int y, uint16_t* dstCParam,
                                                           int count) {
diff --git a/effects/gradients/SkRadialGradient.h b/effects/gradients/SkRadialGradient.h
index 7aafe2d1..2c60ba5e 100644
--- a/effects/gradients/SkRadialGradient.h
+++ b/effects/gradients/SkRadialGradient.h
@@ -16,15 +16,11 @@ public:
     SkRadialGradient(const SkPoint& center, SkScalar radius, const Descriptor&,
                      const SkMatrix* localMatrix);
 
-    virtual SkShader::Context* createContext(const SkBitmap&, const SkPaint&, const SkMatrix&,
-                                             void* storage) const SK_OVERRIDE;
     virtual size_t contextSize() const SK_OVERRIDE;
 
     class RadialGradientContext : public SkGradientShaderBase::GradientShaderBaseContext {
     public:
-        RadialGradientContext(const SkRadialGradient& shader, const SkBitmap& device,
-                              const SkPaint& paint, const SkMatrix& matrix);
-        ~RadialGradientContext() {}
+        RadialGradientContext(const SkRadialGradient&, const ContextRec&);
 
         virtual void shadeSpan(int x, int y, SkPMColor dstC[], int count) SK_OVERRIDE;
         virtual void shadeSpan16(int x, int y, uint16_t dstC[], int count) SK_OVERRIDE;
@@ -45,6 +41,7 @@ public:
 protected:
     SkRadialGradient(SkReadBuffer& buffer);
     virtual void flatten(SkWriteBuffer& buffer) const SK_OVERRIDE;
+    virtual Context* onCreateContext(const ContextRec&, void* storage) const SK_OVERRIDE;
 
 private:
     typedef SkGradientShaderBase INHERITED;
diff --git a/effects/gradients/SkSweepGradient.cpp b/effects/gradients/SkSweepGradient.cpp
index a65631c6..81ebb344 100644
--- a/effects/gradients/SkSweepGradient.cpp
+++ b/effects/gradients/SkSweepGradient.cpp
@@ -56,19 +56,13 @@ size_t SkSweepGradient::contextSize() const {
     return sizeof(SweepGradientContext);
 }
 
-SkShader::Context* SkSweepGradient::createContext(const SkBitmap& device, const SkPaint& paint,
-                                                  const SkMatrix& matrix, void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
-        return NULL;
-    }
-
-    return SkNEW_PLACEMENT_ARGS(storage, SweepGradientContext, (*this, device, paint, matrix));
+SkShader::Context* SkSweepGradient::onCreateContext(const ContextRec& rec, void* storage) const {
+    return SkNEW_PLACEMENT_ARGS(storage, SweepGradientContext, (*this, rec));
 }
 
 SkSweepGradient::SweepGradientContext::SweepGradientContext(
-        const SkSweepGradient& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix)
-    : INHERITED(shader, device, paint, matrix) {}
+        const SkSweepGradient& shader, const ContextRec& rec)
+    : INHERITED(shader, rec) {}
 
 //  returns angle in a circle [0..2PI) -> [0..255]
 static unsigned SkATan2_255(float y, float x) {
diff --git a/effects/gradients/SkSweepGradient.h b/effects/gradients/SkSweepGradient.h
index 15c5b634..36cdd638 100644
--- a/effects/gradients/SkSweepGradient.h
+++ b/effects/gradients/SkSweepGradient.h
@@ -16,15 +16,11 @@ public:
     SkSweepGradient(SkScalar cx, SkScalar cy, const Descriptor&,
                     const SkMatrix* localMatrix);
 
-    virtual SkShader::Context* createContext(const SkBitmap&, const SkPaint&, const SkMatrix&,
-                                             void* storage) const SK_OVERRIDE;
     virtual size_t contextSize() const SK_OVERRIDE;
 
     class SweepGradientContext : public SkGradientShaderBase::GradientShaderBaseContext {
     public:
-        SweepGradientContext(const SkSweepGradient& shader, const SkBitmap& device,
-                             const SkPaint& paint, const SkMatrix& matrix);
-        ~SweepGradientContext() {}
+        SweepGradientContext(const SkSweepGradient& shader, const ContextRec&);
 
         virtual void shadeSpan(int x, int y, SkPMColor dstC[], int count) SK_OVERRIDE;
         virtual void shadeSpan16(int x, int y, uint16_t dstC[], int count) SK_OVERRIDE;
@@ -47,6 +43,7 @@ public:
 protected:
     SkSweepGradient(SkReadBuffer& buffer);
     virtual void flatten(SkWriteBuffer& buffer) const SK_OVERRIDE;
+    virtual Context* onCreateContext(const ContextRec&, void* storage) const SK_OVERRIDE;
 
 private:
     const SkPoint fCenter;
diff --git a/effects/gradients/SkTwoPointConicalGradient.cpp b/effects/gradients/SkTwoPointConicalGradient.cpp
index 9d1f8f1a..574df082 100644
--- a/effects/gradients/SkTwoPointConicalGradient.cpp
+++ b/effects/gradients/SkTwoPointConicalGradient.cpp
@@ -221,21 +221,14 @@ size_t SkTwoPointConicalGradient::contextSize() const {
     return sizeof(TwoPointConicalGradientContext);
 }
 
-SkShader::Context* SkTwoPointConicalGradient::createContext(
-        const SkBitmap& device, const SkPaint& paint,
-        const SkMatrix& matrix, void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
-        return NULL;
-    }
-
-    return SkNEW_PLACEMENT_ARGS(storage, TwoPointConicalGradientContext,
-                                (*this, device, paint, matrix));
+SkShader::Context* SkTwoPointConicalGradient::onCreateContext(const ContextRec& rec,
+                                                              void* storage) const {
+    return SkNEW_PLACEMENT_ARGS(storage, TwoPointConicalGradientContext, (*this, rec));
 }
 
 SkTwoPointConicalGradient::TwoPointConicalGradientContext::TwoPointConicalGradientContext(
-        const SkTwoPointConicalGradient& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix)
-    : INHERITED(shader, device, paint, matrix)
+        const SkTwoPointConicalGradient& shader, const ContextRec& rec)
+    : INHERITED(shader, rec)
 {
     // we don't have a span16 proc
     fFlags &= ~kHasSpan16_Flag;
diff --git a/effects/gradients/SkTwoPointConicalGradient.h b/effects/gradients/SkTwoPointConicalGradient.h
index 13ce3eaf..85e0bc0b 100644
--- a/effects/gradients/SkTwoPointConicalGradient.h
+++ b/effects/gradients/SkTwoPointConicalGradient.h
@@ -48,16 +48,11 @@ public:
                               const SkMatrix* localMatrix);
 
 
-    virtual SkShader::Context* createContext(const SkBitmap&, const SkPaint&, const SkMatrix&,
-                                             void* storage) const SK_OVERRIDE;
     virtual size_t contextSize() const SK_OVERRIDE;
 
     class TwoPointConicalGradientContext : public SkGradientShaderBase::GradientShaderBaseContext {
     public:
-        TwoPointConicalGradientContext(const SkTwoPointConicalGradient& shader,
-                                       const SkBitmap& device,
-                                       const SkPaint& paint,
-                                       const SkMatrix& matrix);
+        TwoPointConicalGradientContext(const SkTwoPointConicalGradient&, const ContextRec&);
         ~TwoPointConicalGradientContext() {}
 
         virtual void shadeSpan(int x, int y, SkPMColor dstC[], int count) SK_OVERRIDE;
@@ -87,6 +82,7 @@ public:
 protected:
     SkTwoPointConicalGradient(SkReadBuffer& buffer);
     virtual void flatten(SkWriteBuffer& buffer) const SK_OVERRIDE;
+    virtual Context* onCreateContext(const ContextRec&, void* storage) const SK_OVERRIDE;
 
 private:
     SkPoint fCenter1;
diff --git a/effects/gradients/SkTwoPointRadialGradient.cpp b/effects/gradients/SkTwoPointRadialGradient.cpp
index 41e577fb..d85be5d3 100644
--- a/effects/gradients/SkTwoPointRadialGradient.cpp
+++ b/effects/gradients/SkTwoPointRadialGradient.cpp
@@ -224,31 +224,18 @@ size_t SkTwoPointRadialGradient::contextSize() const {
     return sizeof(TwoPointRadialGradientContext);
 }
 
-bool SkTwoPointRadialGradient::validContext(const SkBitmap& device, const SkPaint& paint,
-                                            const SkMatrix& matrix, SkMatrix* totalInverse) const {
+SkShader::Context* SkTwoPointRadialGradient::onCreateContext(const ContextRec& rec,
+                                                             void* storage) const {
     // For now, we might have divided by zero, so detect that.
     if (0 == fDiffRadius) {
-        return false;
-    }
-
-    return this->INHERITED::validContext(device, paint, matrix, totalInverse);
-}
-
-SkShader::Context* SkTwoPointRadialGradient::createContext(
-        const SkBitmap& device, const SkPaint& paint,
-        const SkMatrix& matrix, void* storage) const {
-    if (!this->validContext(device, paint, matrix)) {
         return NULL;
     }
-
-    return SkNEW_PLACEMENT_ARGS(storage, TwoPointRadialGradientContext,
-                                (*this, device, paint, matrix));
+    return SkNEW_PLACEMENT_ARGS(storage, TwoPointRadialGradientContext, (*this, rec));
 }
 
 SkTwoPointRadialGradient::TwoPointRadialGradientContext::TwoPointRadialGradientContext(
-        const SkTwoPointRadialGradient& shader, const SkBitmap& device,
-        const SkPaint& paint, const SkMatrix& matrix)
-    : INHERITED(shader, device, paint, matrix)
+        const SkTwoPointRadialGradient& shader, const ContextRec& rec)
+    : INHERITED(shader, rec)
 {
     // we don't have a span16 proc
     fFlags &= ~kHasSpan16_Flag;
diff --git a/effects/gradients/SkTwoPointRadialGradient.h b/effects/gradients/SkTwoPointRadialGradient.h
index 1b387e68..6d36fe44 100644
--- a/effects/gradients/SkTwoPointRadialGradient.h
+++ b/effects/gradients/SkTwoPointRadialGradient.h
@@ -23,20 +23,11 @@ public:
     virtual GradientType asAGradient(GradientInfo* info) const SK_OVERRIDE;
     virtual GrEffectRef* asNewEffect(GrContext* context, const SkPaint&) const SK_OVERRIDE;
 
-
     virtual size_t contextSize() const SK_OVERRIDE;
-    virtual bool validContext(const SkBitmap&, const SkPaint&,
-                              const SkMatrix&, SkMatrix* totalInverse = NULL) const SK_OVERRIDE;
-    virtual SkShader::Context* createContext(const SkBitmap&, const SkPaint&, const SkMatrix&,
-                                             void* storage) const SK_OVERRIDE;
 
     class TwoPointRadialGradientContext : public SkGradientShaderBase::GradientShaderBaseContext {
     public:
-        TwoPointRadialGradientContext(const SkTwoPointRadialGradient& shader,
-                                      const SkBitmap& device,
-                                      const SkPaint& paint,
-                                      const SkMatrix& matrix);
-        ~TwoPointRadialGradientContext() {}
+        TwoPointRadialGradientContext(const SkTwoPointRadialGradient&, const ContextRec&);
 
         virtual void shadeSpan(int x, int y, SkPMColor dstC[], int count) SK_OVERRIDE;
 
@@ -54,6 +45,7 @@ public:
 protected:
     SkTwoPointRadialGradient(SkReadBuffer& buffer);
     virtual void flatten(SkWriteBuffer& buffer) const SK_OVERRIDE;
+    virtual Context* onCreateContext(const ContextRec&, void* storage) const SK_OVERRIDE;
 
 private:
     const SkPoint fCenter1;
diff --git a/gpu/GrAAConvexPathRenderer.cpp b/gpu/GrAAConvexPathRenderer.cpp
index 2af5bb2e..d0f6e0ef 100644
--- a/gpu/GrAAConvexPathRenderer.cpp
+++ b/gpu/GrAAConvexPathRenderer.cpp
@@ -203,7 +203,7 @@ static void update_degenerate_test(DegenerateTestData* data, const SkPoint& pt)
         case DegenerateTestData::kNonDegenerate:
             break;
         default:
-            GrCrash("Unexpected degenerate test stage.");
+            SkFAIL("Unexpected degenerate test stage.");
     }
 }
 
diff --git a/gpu/GrAARectRenderer.cpp b/gpu/GrAARectRenderer.cpp
index d23041f2..eebda01b 100644
--- a/gpu/GrAARectRenderer.cpp
+++ b/gpu/GrAARectRenderer.cpp
@@ -327,7 +327,7 @@ GrIndexBuffer* GrAARectRenderer::aaFillRectIndexBuffer(GrGpu* gpu) {
             }
             if (useTempData) {
                 if (!fAAFillRectIndexBuffer->updateData(data, kAAFillRectIndexBufferSize)) {
-                    GrCrash("Can't get AA Fill Rect indices into buffer!");
+                    SkFAIL("Can't get AA Fill Rect indices into buffer!");
                 }
                 SkDELETE_ARRAY(data);
             } else {
diff --git a/gpu/GrBufferAllocPool.cpp b/gpu/GrBufferAllocPool.cpp
index 2dbf3eb2..7318cd07 100644
--- a/gpu/GrBufferAllocPool.cpp
+++ b/gpu/GrBufferAllocPool.cpp
@@ -109,7 +109,7 @@ void GrBufferAllocPool::unlock() {
         if (block.fBuffer->isLocked()) {
             block.fBuffer->unlock();
         } else {
-            size_t flushSize = block.fBuffer->sizeInBytes() - block.fBytesFree;
+            size_t flushSize = block.fBuffer->gpuMemorySize() - block.fBytesFree;
             flushCpuData(fBlocks.back().fBuffer, flushSize);
         }
         fBufferPtr = NULL;
@@ -135,7 +135,7 @@ void GrBufferAllocPool::validate(bool unusedBlockAllowed) const {
         SkASSERT(!fBlocks[i].fBuffer->isLocked());
     }
     for (int i = 0; i < fBlocks.count(); ++i) {
-        size_t bytes = fBlocks[i].fBuffer->sizeInBytes() - fBlocks[i].fBytesFree;
+        size_t bytes = fBlocks[i].fBuffer->gpuMemorySize() - fBlocks[i].fBytesFree;
         bytesInUse += bytes;
         SkASSERT(bytes || unusedBlockAllowed);
     }
@@ -161,7 +161,7 @@ void* GrBufferAllocPool::makeSpace(size_t size,
 
     if (NULL != fBufferPtr) {
         BufferBlock& back = fBlocks.back();
-        size_t usedBytes = back.fBuffer->sizeInBytes() - back.fBytesFree;
+        size_t usedBytes = back.fBuffer->gpuMemorySize() - back.fBytesFree;
         size_t pad = GrSizeAlignUpPad(usedBytes,
                                       alignment);
         if ((size + pad) <= back.fBytesFree) {
@@ -201,7 +201,7 @@ int GrBufferAllocPool::currentBufferItems(size_t itemSize) const {
     VALIDATE();
     if (NULL != fBufferPtr) {
         const BufferBlock& back = fBlocks.back();
-        size_t usedBytes = back.fBuffer->sizeInBytes() - back.fBytesFree;
+        size_t usedBytes = back.fBuffer->gpuMemorySize() - back.fBytesFree;
         size_t pad = GrSizeAlignUpPad(usedBytes, itemSize);
         return static_cast<int>((back.fBytesFree - pad) / itemSize);
     } else if (fPreallocBuffersInUse < fPreallocBuffers.count()) {
@@ -231,7 +231,7 @@ void GrBufferAllocPool::putBack(size_t bytes) {
         // caller shouldnt try to put back more than they've taken
         SkASSERT(!fBlocks.empty());
         BufferBlock& block = fBlocks.back();
-        size_t bytesUsed = block.fBuffer->sizeInBytes() - block.fBytesFree;
+        size_t bytesUsed = block.fBuffer->gpuMemorySize() - block.fBytesFree;
         if (bytes >= bytesUsed) {
             bytes -= bytesUsed;
             fBytesInUse -= bytesUsed;
@@ -290,7 +290,7 @@ bool GrBufferAllocPool::createBlock(size_t requestSize) {
             prev.fBuffer->unlock();
         } else {
             flushCpuData(prev.fBuffer,
-                         prev.fBuffer->sizeInBytes() - prev.fBytesFree);
+                         prev.fBuffer->gpuMemorySize() - prev.fBytesFree);
         }
         fBufferPtr = NULL;
     }
@@ -303,7 +303,7 @@ bool GrBufferAllocPool::createBlock(size_t requestSize) {
     //      threshold (since we don't expect it is likely that we will see more vertex data)
     //      b) If the hint is not set we lock if the buffer size is greater than the threshold.
     bool attemptLock = block.fBuffer->isCPUBacked();
-    if (!attemptLock && fGpu->caps()->bufferLockSupport()) {
+    if (!attemptLock && GrDrawTargetCaps::kNone_MapFlags != fGpu->caps()->mapBufferFlags()) {
         if (fFrequentResetHint) {
             attemptLock = requestSize > GR_GEOM_BUFFER_LOCK_THRESHOLD;
         } else {
@@ -348,10 +348,10 @@ void GrBufferAllocPool::flushCpuData(GrGeometryBuffer* buffer,
     SkASSERT(NULL != buffer);
     SkASSERT(!buffer->isLocked());
     SkASSERT(fCpuData.get() == fBufferPtr);
-    SkASSERT(flushSize <= buffer->sizeInBytes());
+    SkASSERT(flushSize <= buffer->gpuMemorySize());
     VALIDATE(true);
 
-    if (fGpu->caps()->bufferLockSupport() &&
+    if (GrDrawTargetCaps::kNone_MapFlags != fGpu->caps()->mapBufferFlags() &&
         flushSize > GR_GEOM_BUFFER_LOCK_THRESHOLD) {
         void* data = buffer->lock();
         if (NULL != data) {
diff --git a/gpu/GrCacheID.cpp b/gpu/GrCacheID.cpp
index 87917ac9..8d0be0da 100644
--- a/gpu/GrCacheID.cpp
+++ b/gpu/GrCacheID.cpp
@@ -27,7 +27,7 @@ GrCacheID::Domain GrCacheID::GenerateDomain() {
 
     int32_t domain = sk_atomic_inc(&gNextDomain);
     if (domain >= 1 << (8 * sizeof(Domain))) {
-        GrCrash("Too many Cache Domains");
+        SkFAIL("Too many Cache Domains");
     }
 
     return static_cast<Domain>(domain);
diff --git a/gpu/GrClipMaskManager.cpp b/gpu/GrClipMaskManager.cpp
index 12b3360d..071b9d31 100644
--- a/gpu/GrClipMaskManager.cpp
+++ b/gpu/GrClipMaskManager.cpp
@@ -1000,7 +1000,7 @@ void GrClipMaskManager::adjustStencilParams(GrStencilSettings* settings,
                         funcRef = clipBit;
                         break;
                     default:
-                        GrCrash("Unknown stencil func");
+                        SkFAIL("Unknown stencil func");
                 }
             } else {
                 funcMask &= userBits;
diff --git a/gpu/GrContext.cpp b/gpu/GrContext.cpp
index 90bf8c04..d2664c3b 100644
--- a/gpu/GrContext.cpp
+++ b/gpu/GrContext.cpp
@@ -238,7 +238,7 @@ GrTexture* GrContext::findAndRefTexture(const GrTextureDesc& desc,
                                         const GrCacheID& cacheID,
                                         const GrTextureParams* params) {
     GrResourceKey resourceKey = GrTexture::ComputeKey(fGpu, params, desc, cacheID);
-    GrResource* resource = fTextureCache->find(resourceKey);
+    GrCacheable* resource = fTextureCache->find(resourceKey);
     SkSafeRef(resource);
     return static_cast<GrTexture*>(resource);
 }
@@ -264,7 +264,7 @@ GrStencilBuffer* GrContext::findStencilBuffer(int width, int height,
     GrResourceKey resourceKey = GrStencilBuffer::ComputeKey(width,
                                                             height,
                                                             sampleCnt);
-    GrResource* resource = fTextureCache->find(resourceKey);
+    GrCacheable* resource = fTextureCache->find(resourceKey);
     return static_cast<GrStencilBuffer*>(resource);
 }
 
@@ -397,7 +397,7 @@ GrTexture* GrContext::createTexture(const GrTextureParams* params,
     if (NULL != texture) {
         // Adding a resource could put us overbudget. Try to free up the
         // necessary space before adding it.
-        fTextureCache->purgeAsNeeded(1, texture->sizeInBytes());
+        fTextureCache->purgeAsNeeded(1, texture->gpuMemorySize());
         fTextureCache->addResource(resourceKey, texture);
 
         if (NULL != cacheKey) {
@@ -416,7 +416,7 @@ static GrTexture* create_scratch_texture(GrGpu* gpu,
         GrResourceKey key = GrTexture::ComputeScratchKey(texture->desc());
         // Adding a resource could put us overbudget. Try to free up the
         // necessary space before adding it.
-        textureCache->purgeAsNeeded(1, texture->sizeInBytes());
+        textureCache->purgeAsNeeded(1, texture->gpuMemorySize());
         // Make the resource exclusive so future 'find' calls don't return it
         textureCache->addResource(key, texture, GrResourceCache::kHide_OwnershipFlag);
     }
@@ -448,7 +448,7 @@ GrTexture* GrContext::lockAndRefScratchTexture(const GrTextureDesc& inDesc, Scra
         desc.fHeight = SkTMax(MIN_SIZE, GrNextPow2(desc.fHeight));
     }
 
-    GrResource* resource = NULL;
+    GrCacheable* resource = NULL;
     int origWidth = desc.fWidth;
     int origHeight = desc.fHeight;
 
@@ -1819,12 +1819,23 @@ GrPath* GrContext::createPath(const SkPath& inPath, const SkStrokeRec& stroke) {
         path->ref();
     } else {
         path = fGpu->createPath(inPath, stroke);
-        fTextureCache->purgeAsNeeded(1, path->sizeInBytes());
+        fTextureCache->purgeAsNeeded(1, path->gpuMemorySize());
         fTextureCache->addResource(resourceKey, path);
     }
     return path;
 }
 
+void GrContext::addResourceToCache(const GrResourceKey& resourceKey, GrCacheable* resource) {
+    fTextureCache->purgeAsNeeded(1, resource->gpuMemorySize());
+    fTextureCache->addResource(resourceKey, resource);
+}
+
+GrCacheable* GrContext::findAndRefCachedResource(const GrResourceKey& resourceKey) {
+    GrCacheable* resource = fTextureCache->find(resourceKey);
+    SkSafeRef(resource);
+    return resource;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 #if GR_CACHE_STATS
 void GrContext::printCacheStats() const {
diff --git a/gpu/GrDistanceFieldTextContext.cpp b/gpu/GrDistanceFieldTextContext.cpp
index 238bcca4..512420eb 100755
--- a/gpu/GrDistanceFieldTextContext.cpp
+++ b/gpu/GrDistanceFieldTextContext.cpp
@@ -33,15 +33,15 @@ static const int kLargeDFFontSize = 128;
 SK_CONF_DECLARE(bool, c_DumpFontCache, "gpu.dumpFontCache", false,
                 "Dump the contents of the font cache before every purge.");
 
+GrDistanceFieldTextContext::GrDistanceFieldTextContext(GrContext* context,
+                                                       const SkDeviceProperties& properties,
+                                                       bool enable)
+                                                    : GrTextContext(context, properties) {
 #if SK_FORCE_DISTANCEFIELD_FONTS
-static const bool kForceDistanceFieldFonts = true;
+    fEnableDFRendering = true;
 #else
-static const bool kForceDistanceFieldFonts = false;
+    fEnableDFRendering = enable;
 #endif
-
-GrDistanceFieldTextContext::GrDistanceFieldTextContext(GrContext* context,
-                                                       const SkDeviceProperties& properties)
-                                                    : GrTextContext(context, properties) {
     fStrike = NULL;
 
     fCurrTexture = NULL;
@@ -56,7 +56,7 @@ GrDistanceFieldTextContext::~GrDistanceFieldTextContext() {
 }
 
 bool GrDistanceFieldTextContext::canDraw(const SkPaint& paint) {
-    if (!kForceDistanceFieldFonts && !paint.isDistanceFieldTextTEMP()) {
+    if (!fEnableDFRendering && !paint.isDistanceFieldTextTEMP()) {
         return false;
     }
 
diff --git a/gpu/GrDistanceFieldTextContext.h b/gpu/GrDistanceFieldTextContext.h
index 58c0824e..3dfffd1c 100644
--- a/gpu/GrDistanceFieldTextContext.h
+++ b/gpu/GrDistanceFieldTextContext.h
@@ -17,7 +17,7 @@ class GrTextStrike;
  */
 class GrDistanceFieldTextContext : public GrTextContext {
 public:
-    GrDistanceFieldTextContext(GrContext*, const SkDeviceProperties&);
+    GrDistanceFieldTextContext(GrContext*, const SkDeviceProperties&, bool enable);
     virtual ~GrDistanceFieldTextContext();
 
     virtual void drawText(const GrPaint&, const SkPaint&, const char text[], size_t byteLength,
@@ -33,6 +33,7 @@ private:
     GrTextStrike*           fStrike;
     SkScalar                fTextRatio;
     bool                    fUseLCDText;
+    bool                    fEnableDFRendering;
 
     void init(const GrPaint&, const SkPaint&);
     void drawPackedGlyph(GrGlyph::PackedID, SkFixed left, SkFixed top, GrFontScaler*);
diff --git a/gpu/GrDrawTarget.cpp b/gpu/GrDrawTarget.cpp
index 9cea214a..6d8d1846 100644
--- a/gpu/GrDrawTarget.cpp
+++ b/gpu/GrDrawTarget.cpp
@@ -244,7 +244,7 @@ void GrDrawTarget::releasePreviousVertexSource() {
 #endif
             break;
         default:
-            GrCrash("Unknown Vertex Source Type.");
+            SkFAIL("Unknown Vertex Source Type.");
             break;
     }
 }
@@ -267,7 +267,7 @@ void GrDrawTarget::releasePreviousIndexSource() {
 #endif
             break;
         default:
-            GrCrash("Unknown Index Source Type.");
+            SkFAIL("Unknown Index Source Type.");
             break;
     }
 }
@@ -355,34 +355,34 @@ bool GrDrawTarget::checkDraw(GrPrimitiveType type, int startVertex,
     int maxValidVertex;
     switch (geoSrc.fVertexSrc) {
         case kNone_GeometrySrcType:
-            GrCrash("Attempting to draw without vertex src.");
+            SkFAIL("Attempting to draw without vertex src.");
         case kReserved_GeometrySrcType: // fallthrough
         case kArray_GeometrySrcType:
             maxValidVertex = geoSrc.fVertexCount;
             break;
         case kBuffer_GeometrySrcType:
-            maxValidVertex = static_cast<int>(geoSrc.fVertexBuffer->sizeInBytes() / geoSrc.fVertexSize);
+            maxValidVertex = static_cast<int>(geoSrc.fVertexBuffer->gpuMemorySize() / geoSrc.fVertexSize);
             break;
     }
     if (maxVertex > maxValidVertex) {
-        GrCrash("Drawing outside valid vertex range.");
+        SkFAIL("Drawing outside valid vertex range.");
     }
     if (indexCount > 0) {
         int maxIndex = startIndex + indexCount;
         int maxValidIndex;
         switch (geoSrc.fIndexSrc) {
             case kNone_GeometrySrcType:
-                GrCrash("Attempting to draw indexed geom without index src.");
+                SkFAIL("Attempting to draw indexed geom without index src.");
             case kReserved_GeometrySrcType: // fallthrough
             case kArray_GeometrySrcType:
                 maxValidIndex = geoSrc.fIndexCount;
                 break;
             case kBuffer_GeometrySrcType:
-                maxValidIndex = static_cast<int>(geoSrc.fIndexBuffer->sizeInBytes() / sizeof(uint16_t));
+                maxValidIndex = static_cast<int>(geoSrc.fIndexBuffer->gpuMemorySize() / sizeof(uint16_t));
                 break;
         }
         if (maxIndex > maxValidIndex) {
-            GrCrash("Index reads outside valid index range.");
+            SkFAIL("Index reads outside valid index range.");
         }
     }
 
@@ -1016,13 +1016,14 @@ void GrDrawTargetCaps::reset() {
     fShaderDerivativeSupport = false;
     fGeometryShaderSupport = false;
     fDualSourceBlendingSupport = false;
-    fBufferLockSupport = false;
     fPathRenderingSupport = false;
     fDstReadInShaderSupport = false;
     fDiscardRenderTargetSupport = false;
     fReuseScratchTextures = true;
     fGpuTracingSupport = false;
 
+    fMapBufferFlags = kNone_MapFlags;
+
     fMaxRenderTargetSize = 0;
     fMaxTextureSize = 0;
     fMaxSampleCount = 0;
@@ -1040,13 +1041,14 @@ GrDrawTargetCaps& GrDrawTargetCaps::operator=(const GrDrawTargetCaps& other) {
     fShaderDerivativeSupport = other.fShaderDerivativeSupport;
     fGeometryShaderSupport = other.fGeometryShaderSupport;
     fDualSourceBlendingSupport = other.fDualSourceBlendingSupport;
-    fBufferLockSupport = other.fBufferLockSupport;
     fPathRenderingSupport = other.fPathRenderingSupport;
     fDstReadInShaderSupport = other.fDstReadInShaderSupport;
     fDiscardRenderTargetSupport = other.fDiscardRenderTargetSupport;
     fReuseScratchTextures = other.fReuseScratchTextures;
     fGpuTracingSupport = other.fGpuTracingSupport;
 
+    fMapBufferFlags = other.fMapBufferFlags;
+
     fMaxRenderTargetSize = other.fMaxRenderTargetSize;
     fMaxTextureSize = other.fMaxTextureSize;
     fMaxSampleCount = other.fMaxSampleCount;
@@ -1056,6 +1058,26 @@ GrDrawTargetCaps& GrDrawTargetCaps::operator=(const GrDrawTargetCaps& other) {
     return *this;
 }
 
+static SkString map_flags_to_string(uint32_t flags) {
+    SkString str;
+    if (GrDrawTargetCaps::kNone_MapFlags == flags) {
+        str = "none";
+    } else {
+        SkASSERT(GrDrawTargetCaps::kCanMap_MapFlag & flags);
+        SkDEBUGCODE(flags &= ~GrDrawTargetCaps::kCanMap_MapFlag);
+        str = "can_map";
+
+        if (GrDrawTargetCaps::kSubset_MapFlag & flags) {
+            str.append(" partial");
+        } else {
+            str.append(" full");
+        }
+        SkDEBUGCODE(flags &= ~GrDrawTargetCaps::kSubset_MapFlag);
+    }
+    SkASSERT(0 == flags); // Make sure we handled all the flags.
+    return str;
+}
+
 SkString GrDrawTargetCaps::dump() const {
     SkString r;
     static const char* gNY[] = {"NO", "YES"};
@@ -1068,7 +1090,6 @@ SkString GrDrawTargetCaps::dump() const {
     r.appendf("Shader Derivative Support    : %s\n", gNY[fShaderDerivativeSupport]);
     r.appendf("Geometry Shader Support      : %s\n", gNY[fGeometryShaderSupport]);
     r.appendf("Dual Source Blending Support : %s\n", gNY[fDualSourceBlendingSupport]);
-    r.appendf("Buffer Lock Support          : %s\n", gNY[fBufferLockSupport]);
     r.appendf("Path Rendering Support       : %s\n", gNY[fPathRenderingSupport]);
     r.appendf("Dst Read In Shader Support   : %s\n", gNY[fDstReadInShaderSupport]);
     r.appendf("Discard Render Target Support: %s\n", gNY[fDiscardRenderTargetSupport]);
@@ -1078,6 +1099,8 @@ SkString GrDrawTargetCaps::dump() const {
     r.appendf("Max Render Target Size       : %d\n", fMaxRenderTargetSize);
     r.appendf("Max Sample Count             : %d\n", fMaxSampleCount);
 
+    r.appendf("Map Buffer Support           : %s\n", map_flags_to_string(fMapBufferFlags).c_str());
+
     static const char* kConfigNames[] = {
         "Unknown",  // kUnknown_GrPixelConfig
         "Alpha8",   // kAlpha_8_GrPixelConfig,
diff --git a/gpu/GrDrawTarget.h b/gpu/GrDrawTarget.h
index 732bad07..bbaf5a96 100644
--- a/gpu/GrDrawTarget.h
+++ b/gpu/GrDrawTarget.h
@@ -742,9 +742,9 @@ protected:
             case kArray_GeometrySrcType:
                 return src.fIndexCount;
             case kBuffer_GeometrySrcType:
-                return static_cast<int>(src.fIndexBuffer->sizeInBytes() / sizeof(uint16_t));
+                return static_cast<int>(src.fIndexBuffer->gpuMemorySize() / sizeof(uint16_t));
             default:
-                GrCrash("Unexpected Index Source.");
+                SkFAIL("Unexpected Index Source.");
                 return 0;
         }
     }
diff --git a/gpu/GrDrawTargetCaps.h b/gpu/GrDrawTargetCaps.h
index a77bce44..648b5c36 100644
--- a/gpu/GrDrawTargetCaps.h
+++ b/gpu/GrDrawTargetCaps.h
@@ -37,12 +37,25 @@ public:
     bool shaderDerivativeSupport() const { return fShaderDerivativeSupport; }
     bool geometryShaderSupport() const { return fGeometryShaderSupport; }
     bool dualSourceBlendingSupport() const { return fDualSourceBlendingSupport; }
-    bool bufferLockSupport() const { return fBufferLockSupport; }
     bool pathRenderingSupport() const { return fPathRenderingSupport; }
     bool dstReadInShaderSupport() const { return fDstReadInShaderSupport; }
     bool discardRenderTargetSupport() const { return fDiscardRenderTargetSupport; }
     bool gpuTracingSupport() const { return fGpuTracingSupport; }
 
+    /**
+     * Indicates whether GPU->CPU memory mapping for GPU resources such as vertex buffers and
+     * textures allows partial mappings or full mappings.
+     */
+    enum MapFlags {
+        kNone_MapFlags   = 0x0,       //<! Cannot map the resource.
+
+        kCanMap_MapFlag  = 0x1,       //<! The resource can be mapped. Must be set for any of
+                                      //   the other flags to have meaning.k
+        kSubset_MapFlag  = 0x2,       //<! The resource can be partially mapped.
+    };
+
+    uint32_t mapBufferFlags() const { return fMapBufferFlags; }
+
     // Scratch textures not being reused means that those scratch textures
     // that we upload to (i.e., don't have a render target) will not be
     // recycled in the texture cache. This is to prevent ghosting by drivers
@@ -69,13 +82,14 @@ protected:
     bool fShaderDerivativeSupport   : 1;
     bool fGeometryShaderSupport     : 1;
     bool fDualSourceBlendingSupport : 1;
-    bool fBufferLockSupport         : 1;
     bool fPathRenderingSupport      : 1;
     bool fDstReadInShaderSupport    : 1;
     bool fDiscardRenderTargetSupport: 1;
     bool fReuseScratchTextures      : 1;
     bool fGpuTracingSupport         : 1;
 
+    uint32_t fMapBufferFlags;
+
     int fMaxRenderTargetSize;
     int fMaxTextureSize;
     int fMaxSampleCount;
diff --git a/gpu/GrGeometryBuffer.h b/gpu/GrGeometryBuffer.h
index 3bb7118f..2a5aab7a 100644
--- a/gpu/GrGeometryBuffer.h
+++ b/gpu/GrGeometryBuffer.h
@@ -10,14 +10,14 @@
 #ifndef GrGeometryBuffer_DEFINED
 #define GrGeometryBuffer_DEFINED
 
-#include "GrResource.h"
+#include "GrGpuObject.h"
 
 class GrGpu;
 
 /**
  * Parent class for vertex and index buffers
  */
-class GrGeometryBuffer : public GrResource {
+class GrGeometryBuffer : public GrGpuObject {
 public:
     SK_DECLARE_INST_COUNT(GrGeometryBuffer);
 
@@ -82,22 +82,22 @@ public:
      */
     virtual bool updateData(const void* src, size_t srcSizeInBytes) = 0;
 
-    // GrResource overrides
-    virtual size_t sizeInBytes() const { return fSizeInBytes; }
+    // GrGpuObject overrides
+    virtual size_t gpuMemorySize() const { return fGpuMemorySize; }
 
 protected:
-    GrGeometryBuffer(GrGpu* gpu, bool isWrapped, size_t sizeInBytes, bool dynamic, bool cpuBacked)
+    GrGeometryBuffer(GrGpu* gpu, bool isWrapped, size_t gpuMemorySize, bool dynamic, bool cpuBacked)
         : INHERITED(gpu, isWrapped)
-        , fSizeInBytes(sizeInBytes)
+        , fGpuMemorySize(gpuMemorySize)
         , fDynamic(dynamic)
         , fCPUBacked(cpuBacked) {}
 
 private:
-    size_t   fSizeInBytes;
+    size_t   fGpuMemorySize;
     bool     fDynamic;
     bool     fCPUBacked;
 
-    typedef GrResource INHERITED;
+    typedef GrGpuObject INHERITED;
 };
 
 #endif
diff --git a/gpu/GrGpu.cpp b/gpu/GrGpu.cpp
index d8f65d52..bc929525 100644
--- a/gpu/GrGpu.cpp
+++ b/gpu/GrGpu.cpp
@@ -57,11 +57,11 @@ void GrGpu::abandonResources() {
 
     fClipMaskManager.releaseResources();
 
-    while (NULL != fResourceList.head()) {
-        fResourceList.head()->abandon();
+    while (NULL != fObjectList.head()) {
+        fObjectList.head()->abandon();
     }
 
-    SkASSERT(NULL == fQuadIndexBuffer || !fQuadIndexBuffer->isValid());
+    SkASSERT(NULL == fQuadIndexBuffer || fQuadIndexBuffer->wasDestroyed());
     SkSafeSetNull(fQuadIndexBuffer);
     delete fVertexPool;
     fVertexPool = NULL;
@@ -73,11 +73,11 @@ void GrGpu::releaseResources() {
 
     fClipMaskManager.releaseResources();
 
-    while (NULL != fResourceList.head()) {
-        fResourceList.head()->release();
+    while (NULL != fObjectList.head()) {
+        fObjectList.head()->release();
     }
 
-    SkASSERT(NULL == fQuadIndexBuffer || !fQuadIndexBuffer->isValid());
+    SkASSERT(NULL == fQuadIndexBuffer || fQuadIndexBuffer->wasDestroyed());
     SkSafeSetNull(fQuadIndexBuffer);
     delete fVertexPool;
     fVertexPool = NULL;
@@ -85,18 +85,18 @@ void GrGpu::releaseResources() {
     fIndexPool = NULL;
 }
 
-void GrGpu::insertResource(GrResource* resource) {
-    SkASSERT(NULL != resource);
-    SkASSERT(this == resource->getGpu());
+void GrGpu::insertObject(GrGpuObject* object) {
+    SkASSERT(NULL != object);
+    SkASSERT(this == object->getGpu());
 
-    fResourceList.addToHead(resource);
+    fObjectList.addToHead(object);
 }
 
-void GrGpu::removeResource(GrResource* resource) {
-    SkASSERT(NULL != resource);
-    SkASSERT(this == resource->getGpu());
+void GrGpu::removeObject(GrGpuObject* object) {
+    SkASSERT(NULL != object);
+    SkASSERT(this == object->getGpu());
 
-    fResourceList.remove(resource);
+    fObjectList.remove(object);
 }
 
 
@@ -265,7 +265,7 @@ void GrGpu::getPathStencilSettingsForFillType(SkPath::FillType fill, GrStencilSe
 
     switch (fill) {
         default:
-            GrCrash("Unexpected path fill.");
+            SkFAIL("Unexpected path fill.");
             /* fallthrough */;
         case SkPath::kWinding_FillType:
         case SkPath::kInverseWinding_FillType:
@@ -313,7 +313,7 @@ const GrIndexBuffer* GrGpu::getQuadIndexBuffer() const {
                 if (!fQuadIndexBuffer->updateData(indices, SIZE)) {
                     fQuadIndexBuffer->unref();
                     fQuadIndexBuffer = NULL;
-                    GrCrash("Can't get indices into buffer!");
+                    SkFAIL("Can't get indices into buffer!");
                 }
                 sk_free(indices);
             }
diff --git a/gpu/GrGpu.h b/gpu/GrGpu.h
index c051f912..fc162370 100644
--- a/gpu/GrGpu.h
+++ b/gpu/GrGpu.h
@@ -13,11 +13,11 @@
 #include "SkPath.h"
 
 class GrContext;
+class GrGpuObject;
 class GrIndexBufferAllocPool;
 class GrPath;
 class GrPathRenderer;
 class GrPathRendererChain;
-class GrResource;
 class GrStencilBuffer;
 class GrVertexBufferAllocPool;
 
@@ -231,29 +231,28 @@ public:
                             size_t rowBytes);
 
     /**
-     * Called to tell Gpu object that all GrResources have been lost and should
+     * Called to tell GrGpu that all GrGpuObjects have been lost and should
      * be abandoned. Overrides must call INHERITED::abandonResources().
      */
     virtual void abandonResources();
 
     /**
-     * Called to tell Gpu object to release all GrResources. Overrides must call
+     * Called to tell GrGpu to release all GrGpuObjects. Overrides must call
      * INHERITED::releaseResources().
      */
     void releaseResources();
 
     /**
-     * Add resource to list of resources. Should only be called by GrResource.
+     * Add object to list of objects. Should only be called by GrGpuObject.
      * @param resource  the resource to add.
      */
-    void insertResource(GrResource* resource);
+    void insertObject(GrGpuObject* object);
 
     /**
-     * Remove resource from list of resources. Should only be called by
-     * GrResource.
+     * Remove object from list of objects. Should only be called by GrGpuObject.
      * @param resource  the resource to remove.
      */
-    void removeResource(GrResource* resource);
+    void removeObject(GrGpuObject* object);
 
     // GrDrawTarget overrides
     virtual void clear(const SkIRect* rect,
@@ -345,7 +344,7 @@ protected:
             case kLineStrip_GrPrimitiveType:
                 return kDrawLines_DrawType;
             default:
-                GrCrash("Unexpected primitive type");
+                SkFAIL("Unexpected primitive type");
                 return kDrawTriangles_DrawType;
         }
     }
@@ -503,7 +502,7 @@ private:
     enum {
         kPreallocGeomPoolStateStackCnt = 4,
     };
-    typedef SkTInternalLList<GrResource> ResourceList;
+    typedef SkTInternalLList<GrGpuObject> ObjectList;
     SkSTArray<kPreallocGeomPoolStateStackCnt, GeometryPoolState, true>  fGeomPoolStateStack;
     ResetTimestamp                                                      fResetTimestamp;
     uint32_t                                                            fResetBits;
@@ -516,7 +515,7 @@ private:
     mutable GrIndexBuffer*                                              fQuadIndexBuffer;
     // Used to abandon/release all resources created by this GrGpu. TODO: Move this
     // functionality to GrResourceCache.
-    ResourceList                                                        fResourceList;
+    ObjectList                                                          fObjectList;
 
     typedef GrDrawTarget INHERITED;
 };
diff --git a/gpu/GrResource.cpp b/gpu/GrGpuObject.cpp
index e20a30ff..43a86f2d 100644
--- a/gpu/GrResource.cpp
+++ b/gpu/GrGpuObject.cpp
@@ -7,44 +7,43 @@
  */
 
 
-#include "GrResource.h"
+#include "GrGpuObject.h"
 #include "GrGpu.h"
 
-GrResource::GrResource(GrGpu* gpu, bool isWrapped) {
+GrGpuObject::GrGpuObject(GrGpu* gpu, bool isWrapped) {
     fGpu              = gpu;
-    fCacheEntry       = NULL;
     fDeferredRefCount = 0;
     if (isWrapped) {
         fFlags = kWrapped_FlagBit;
     } else {
         fFlags = 0;
     }
-    fGpu->insertResource(this);
+    fGpu->insertObject(this);
 }
 
-GrResource::~GrResource() {
+GrGpuObject::~GrGpuObject() {
     // subclass should have released this.
     SkASSERT(0 == fDeferredRefCount);
-    SkASSERT(!this->isValid());
+    SkASSERT(this->wasDestroyed());
 }
 
-void GrResource::release() {
+void GrGpuObject::release() {
     if (NULL != fGpu) {
         this->onRelease();
-        fGpu->removeResource(this);
+        fGpu->removeObject(this);
         fGpu = NULL;
     }
 }
 
-void GrResource::abandon() {
+void GrGpuObject::abandon() {
     if (NULL != fGpu) {
         this->onAbandon();
-        fGpu->removeResource(this);
+        fGpu->removeObject(this);
         fGpu = NULL;
     }
 }
 
-const GrContext* GrResource::getContext() const {
+const GrContext* GrGpuObject::getContext() const {
     if (NULL != fGpu) {
         return fGpu->getContext();
     } else {
@@ -52,7 +51,7 @@ const GrContext* GrResource::getContext() const {
     }
 }
 
-GrContext* GrResource::getContext() {
+GrContext* GrGpuObject::getContext() {
     if (NULL != fGpu) {
         return fGpu->getContext();
     } else {
diff --git a/gpu/GrInOrderDrawBuffer.cpp b/gpu/GrInOrderDrawBuffer.cpp
index 5b3bc3a7..44d0b1aa 100644
--- a/gpu/GrInOrderDrawBuffer.cpp
+++ b/gpu/GrInOrderDrawBuffer.cpp
@@ -386,7 +386,7 @@ void GrInOrderDrawBuffer::onDraw(const DrawInfo& info) {
             break;
         }
         default:
-            GrCrash("unknown geom src type");
+            SkFAIL("unknown geom src type");
     }
     draw->fVertexBuffer->ref();
 
@@ -404,7 +404,7 @@ void GrInOrderDrawBuffer::onDraw(const DrawInfo& info) {
                 break;
             }
             default:
-                GrCrash("unknown geom src type");
+                SkFAIL("unknown geom src type");
         }
         draw->fIndexBuffer->ref();
     } else {
diff --git a/gpu/GrIndexBuffer.h b/gpu/GrIndexBuffer.h
index e23bc9b1..113b89d3 100644
--- a/gpu/GrIndexBuffer.h
+++ b/gpu/GrIndexBuffer.h
@@ -21,11 +21,11 @@ public:
      * @return the maximum number of quads using full size of index buffer.
      */
     int maxQuads() const {
-        return static_cast<int>(this->sizeInBytes() / (sizeof(uint16_t) * 6));
+        return static_cast<int>(this->gpuMemorySize() / (sizeof(uint16_t) * 6));
     }
 protected:
-    GrIndexBuffer(GrGpu* gpu, bool isWrapped, size_t sizeInBytes, bool dynamic, bool cpuBacked)
-        : INHERITED(gpu, isWrapped, sizeInBytes, dynamic, cpuBacked) {}
+    GrIndexBuffer(GrGpu* gpu, bool isWrapped, size_t gpuMemorySize, bool dynamic, bool cpuBacked)
+        : INHERITED(gpu, isWrapped, gpuMemorySize, dynamic, cpuBacked) {}
 private:
     typedef GrGeometryBuffer INHERITED;
 };
diff --git a/gpu/GrPaint.cpp b/gpu/GrPaint.cpp
index 7499cd0e..35912a94 100644
--- a/gpu/GrPaint.cpp
+++ b/gpu/GrPaint.cpp
@@ -104,7 +104,7 @@ bool GrPaint::getOpaqueAndKnownColor(GrColor* solidColor,
                 case kDA_GrBlendCoeff:
                 case kIDA_GrBlendCoeff:
                 default:
-                    GrCrash("srcCoeff should not refer to src or dst.");
+                    SkFAIL("srcCoeff should not refer to src or dst.");
                     break;
 
                 // TODO: update this once GrPaint actually has a const color.
diff --git a/gpu/GrPath.h b/gpu/GrPath.h
index f481ea42..d324e6a7 100644
--- a/gpu/GrPath.h
+++ b/gpu/GrPath.h
@@ -8,13 +8,13 @@
 #ifndef GrPath_DEFINED
 #define GrPath_DEFINED
 
-#include "GrResource.h"
+#include "GrGpuObject.h"
 #include "GrResourceCache.h"
 #include "SkPath.h"
 #include "SkRect.h"
 #include "SkStrokeRec.h"
 
-class GrPath : public GrResource {
+class GrPath : public GrGpuObject {
 public:
     SK_DECLARE_INST_COUNT(GrPath);
 
@@ -41,7 +41,7 @@ protected:
     SkRect fBounds;
 
 private:
-    typedef GrResource INHERITED;
+    typedef GrGpuObject INHERITED;
 };
 
 #endif
diff --git a/gpu/GrPictureUtils.cpp b/gpu/GrPictureUtils.cpp
index e8c3b504..089e4219 100644
--- a/gpu/GrPictureUtils.cpp
+++ b/gpu/GrPictureUtils.cpp
@@ -7,6 +7,14 @@
 
 #include "GrPictureUtils.h"
 #include "SkDevice.h"
+#include "SkDraw.h"
+#include "SkPaintPriv.h"
+
+SkPicture::AccelData::Key GPUAccelData::ComputeAccelDataKey() {
+    static const SkPicture::AccelData::Key gGPUID = SkPicture::AccelData::GenerateDomain();
+
+    return gGPUID;
+}
 
 // The GrGather device performs GPU-backend-specific preprocessing on
 // a picture. The results are stored in a GPUAccelData.
@@ -20,12 +28,17 @@ class GrGatherDevice : public SkBaseDevice {
 public:
     SK_DECLARE_INST_COUNT(GrGatherDevice)
 
-    GrGatherDevice(int width, int height, SkPicture* picture, GPUAccelData* accelData) {
+    GrGatherDevice(int width, int height, SkPicture* picture, GPUAccelData* accelData,
+                   int saveLayerDepth) {
         fPicture = picture;
+        fSaveLayerDepth = saveLayerDepth;
+        fInfo.fValid = true;
         fInfo.fSize.set(width, height);
+        fInfo.fPaint = NULL;
         fInfo.fSaveLayerOpID = fPicture->EXPERIMENTAL_curOpID();
         fInfo.fRestoreOpID = 0;
         fInfo.fHasNestedLayers = false;
+        fInfo.fIsNested = (2 == fSaveLayerDepth);
 
         fEmptyBitmap.setConfig(SkImageInfo::Make(fInfo.fSize.fWidth,
                                                  fInfo.fSize.fHeight,
@@ -110,7 +123,8 @@ protected:
                               const SkPaint& paint) SK_OVERRIDE {
     }
     virtual void drawDevice(const SkDraw& draw, SkBaseDevice* deviceIn, int x, int y,
-                            const SkPaint&) SK_OVERRIDE {
+                            const SkPaint& paint) SK_OVERRIDE {
+        // deviceIn is the one that is being "restored" back to its parent
         GrGatherDevice* device = static_cast<GrGatherDevice*>(deviceIn);
 
         if (device->fAlreadyDrawn) {
@@ -118,6 +132,29 @@ protected:
         }
 
         device->fInfo.fRestoreOpID = fPicture->EXPERIMENTAL_curOpID();
+        device->fInfo.fCTM = *draw.fMatrix;
+        device->fInfo.fCTM.postTranslate(SkIntToScalar(-device->getOrigin().fX),
+                                         SkIntToScalar(-device->getOrigin().fY));
+
+        // We need the x & y values that will yield 'getOrigin' when transformed
+        // by 'draw.fMatrix'.
+        device->fInfo.fOffset.iset(device->getOrigin());
+
+        SkMatrix invMatrix;
+        if (draw.fMatrix->invert(&invMatrix)) {
+            invMatrix.mapPoints(&device->fInfo.fOffset, 1);
+        } else {
+            device->fInfo.fValid = false;
+        }
+
+        if (NeedsDeepCopy(paint)) {
+            // This NULL acts as a signal that the paint was uncopyable (for now)
+            device->fInfo.fPaint = NULL;
+            device->fInfo.fValid = false;
+        } else {
+            device->fInfo.fPaint = SkNEW_ARGS(SkPaint, (paint));
+        }
+
         fAccelData->addSaveLayerInfo(device->fInfo);
         device->fAlreadyDrawn = true;
     }
@@ -158,6 +195,9 @@ private:
     // The information regarding the saveLayer call this device represents.
     GPUAccelData::SaveLayerInfo fInfo;
 
+    // The depth of this device in the saveLayer stack
+    int fSaveLayerDepth;
+
     virtual void replaceBitmapBackendForRasterSurface(const SkBitmap&) SK_OVERRIDE {
         NotSupported();
     }
@@ -167,7 +207,8 @@ private:
         SkASSERT(kSaveLayer_Usage == usage);
 
         fInfo.fHasNestedLayers = true;
-        return SkNEW_ARGS(GrGatherDevice, (info.width(), info.height(), fPicture, fAccelData));
+        return SkNEW_ARGS(GrGatherDevice, (info.width(), info.height(), fPicture, 
+                                           fAccelData, fSaveLayerDepth+1));
     }
 
     virtual void flush() SK_OVERRIDE {}
@@ -239,7 +280,7 @@ void GatherGPUInfo(SkPicture* pict, GPUAccelData* accelData) {
         return ;
     }
 
-    GrGatherDevice device(pict->width(), pict->height(), pict, accelData);
+    GrGatherDevice device(pict->width(), pict->height(), pict, accelData, 0);
     GrGatherCanvas canvas(&device, pict);
 
     canvas.gather();
diff --git a/gpu/GrPictureUtils.h b/gpu/GrPictureUtils.h
index 6b4d901c..c6252986 100644
--- a/gpu/GrPictureUtils.h
+++ b/gpu/GrPictureUtils.h
@@ -17,8 +17,21 @@ class GPUAccelData : public SkPicture::AccelData {
 public:
     // Information about a given saveLayer in an SkPicture
     struct SaveLayerInfo {
+        // True if the SaveLayerInfo is valid. False if either 'fOffset' is
+        // invalid (due to a non-invertible CTM) or 'fPaint' is NULL (due
+        // to a non-copyable paint).
+        bool fValid;
         // The size of the saveLayer
         SkISize fSize;
+        // The CTM in which this layer's draws must occur. It already incorporates
+        // the translation needed to map the layer's top-left point to the origin.
+        SkMatrix fCTM;
+        // The offset that needs to be passed to drawBitmap to correctly
+        // position the pre-rendered layer.
+        SkPoint fOffset;
+        // The paint to use on restore. NULL if the paint was not copyable (and
+        // thus that this layer should not be pulled forward).
+        const SkPaint* fPaint;
         // The ID of this saveLayer in the picture. 0 is an invalid ID.
         size_t  fSaveLayerOpID;
         // The ID of the matching restore in the picture. 0 is an invalid ID.
@@ -26,10 +39,18 @@ public:
         // True if this saveLayer has at least one other saveLayer nested within it.
         // False otherwise.
         bool    fHasNestedLayers;
+        // True if this saveLayer is nested within another. False otherwise.
+        bool    fIsNested;
     };
 
     GPUAccelData(Key key) : INHERITED(key) { }
 
+    virtual ~GPUAccelData() {
+        for (int i = 0; i < fSaveLayerInfo.count(); ++i) {
+            SkDELETE(fSaveLayerInfo[i].fPaint);
+        }
+    }
+
     void addSaveLayerInfo(const SaveLayerInfo& info) {
         SkASSERT(info.fSaveLayerOpID < info.fRestoreOpID);
         *fSaveLayerInfo.push() = info;
@@ -43,6 +64,10 @@ public:
         return fSaveLayerInfo[index];
     }
 
+    // We may, in the future, need to pass in the GPUDevice in order to
+    // incorporate the clip and matrix state into the key
+    static SkPicture::AccelData::Key ComputeAccelDataKey();
+
 protected:
     SkTDArray<SaveLayerInfo> fSaveLayerInfo;
 
diff --git a/gpu/GrRenderTarget.cpp b/gpu/GrRenderTarget.cpp
index 9348dc16..13fc2290 100644
--- a/gpu/GrRenderTarget.cpp
+++ b/gpu/GrRenderTarget.cpp
@@ -63,7 +63,7 @@ void GrRenderTarget::discard() {
     context->discardRenderTarget(this);
 }
 
-size_t GrRenderTarget::sizeInBytes() const {
+size_t GrRenderTarget::gpuMemorySize() const {
     size_t colorBits;
     if (kUnknown_GrPixelConfig == fDesc.fConfig) {
         colorBits = 32; // don't know, make a guess
diff --git a/gpu/GrResourceCache.cpp b/gpu/GrResourceCache.cpp
index 938f016e..529c3a5d 100644
--- a/gpu/GrResourceCache.cpp
+++ b/gpu/GrResourceCache.cpp
@@ -9,16 +9,26 @@
 
 
 #include "GrResourceCache.h"
-#include "GrResource.h"
+#include "GrCacheable.h"
 
 DECLARE_SKMESSAGEBUS_MESSAGE(GrResourceInvalidatedMessage);
 
+///////////////////////////////////////////////////////////////////////////////
+
+void GrCacheable::didChangeGpuMemorySize() const {
+    if (this->isInCache()) {
+        fCacheEntry->didChangeResourceSize();
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
 GrResourceKey::ResourceType GrResourceKey::GenerateResourceType() {
     static int32_t gNextType = 0;
 
     int32_t type = sk_atomic_inc(&gNextType);
     if (type >= (1 << 8 * sizeof(ResourceType))) {
-        GrCrash("Too many Resource Types");
+        SkFAIL("Too many Resource Types");
     }
 
     return static_cast<ResourceType>(type);
@@ -26,26 +36,44 @@ GrResourceKey::ResourceType GrResourceKey::GenerateResourceType() {
 
 ///////////////////////////////////////////////////////////////////////////////
 
-GrResourceEntry::GrResourceEntry(const GrResourceKey& key, GrResource* resource)
-        : fKey(key), fResource(resource) {
+GrResourceCacheEntry::GrResourceCacheEntry(GrResourceCache* resourceCache,
+                                           const GrResourceKey& key,
+                                           GrCacheable* resource)
+        : fResourceCache(resourceCache),
+          fKey(key),
+          fResource(resource),
+          fCachedSize(resource->gpuMemorySize()),
+          fIsExclusive(false) {
     // we assume ownership of the resource, and will unref it when we die
     SkASSERT(resource);
     resource->ref();
 }
 
-GrResourceEntry::~GrResourceEntry() {
+GrResourceCacheEntry::~GrResourceCacheEntry() {
     fResource->setCacheEntry(NULL);
     fResource->unref();
 }
 
 #ifdef SK_DEBUG
-void GrResourceEntry::validate() const {
+void GrResourceCacheEntry::validate() const {
+    SkASSERT(fResourceCache);
     SkASSERT(fResource);
     SkASSERT(fResource->getCacheEntry() == this);
+    SkASSERT(fResource->gpuMemorySize() == fCachedSize);
     fResource->validate();
 }
 #endif
 
+void GrResourceCacheEntry::didChangeResourceSize() {
+    size_t oldSize = fCachedSize;
+    fCachedSize = fResource->gpuMemorySize();
+    if (fCachedSize > oldSize) {
+        fResourceCache->didIncreaseResourceSize(this, fCachedSize - oldSize);
+    } else if (fCachedSize < oldSize) {
+        fResourceCache->didDecreaseResourceSize(this, oldSize - fCachedSize);
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 
 GrResourceCache::GrResourceCache(int maxCount, size_t maxBytes) :
@@ -75,7 +103,7 @@ GrResourceCache::~GrResourceCache() {
     EntryList::Iter iter;
 
     // Unlike the removeAll, here we really remove everything, including locked resources.
-    while (GrResourceEntry* entry = fList.head()) {
+    while (GrResourceCacheEntry* entry = fList.head()) {
         GrAutoResourceCacheValidate atcv(this);
 
         // remove from our cache
@@ -108,14 +136,14 @@ void GrResourceCache::setLimits(int maxResources, size_t maxResourceBytes) {
     }
 }
 
-void GrResourceCache::internalDetach(GrResourceEntry* entry,
+void GrResourceCache::internalDetach(GrResourceCacheEntry* entry,
                                      BudgetBehaviors behavior) {
     fList.remove(entry);
 
     // update our stats
     if (kIgnore_BudgetBehavior == behavior) {
         fClientDetachedCount += 1;
-        fClientDetachedBytes += entry->resource()->sizeInBytes();
+        fClientDetachedBytes += entry->fCachedSize;
 
 #if GR_CACHE_STATS
         if (fHighWaterClientDetachedCount < fClientDetachedCount) {
@@ -130,23 +158,23 @@ void GrResourceCache::internalDetach(GrResourceEntry* entry,
         SkASSERT(kAccountFor_BudgetBehavior == behavior);
 
         fEntryCount -= 1;
-        fEntryBytes -= entry->resource()->sizeInBytes();
+        fEntryBytes -= entry->fCachedSize;
     }
 }
 
-void GrResourceCache::attachToHead(GrResourceEntry* entry,
+void GrResourceCache::attachToHead(GrResourceCacheEntry* entry,
                                    BudgetBehaviors behavior) {
     fList.addToHead(entry);
 
     // update our stats
     if (kIgnore_BudgetBehavior == behavior) {
         fClientDetachedCount -= 1;
-        fClientDetachedBytes -= entry->resource()->sizeInBytes();
+        fClientDetachedBytes -= entry->fCachedSize;
     } else {
         SkASSERT(kAccountFor_BudgetBehavior == behavior);
 
         fEntryCount += 1;
-        fEntryBytes += entry->resource()->sizeInBytes();
+        fEntryBytes += entry->fCachedSize;
 
 #if GR_CACHE_STATS
         if (fHighWaterEntryCount < fEntryCount) {
@@ -164,15 +192,15 @@ void GrResourceCache::attachToHead(GrResourceEntry* entry,
 // is relying on the texture.
 class GrTFindUnreffedFunctor {
 public:
-    bool operator()(const GrResourceEntry* entry) const {
+    bool operator()(const GrResourceCacheEntry* entry) const {
         return entry->resource()->unique();
     }
 };
 
-GrResource* GrResourceCache::find(const GrResourceKey& key, uint32_t ownershipFlags) {
+GrCacheable* GrResourceCache::find(const GrResourceKey& key, uint32_t ownershipFlags) {
     GrAutoResourceCacheValidate atcv(this);
 
-    GrResourceEntry* entry = NULL;
+    GrResourceCacheEntry* entry = NULL;
 
     if (ownershipFlags & kNoOtherOwners_OwnershipFlag) {
         GrTFindUnreffedFunctor functor;
@@ -198,7 +226,7 @@ GrResource* GrResourceCache::find(const GrResourceKey& key, uint32_t ownershipFl
 }
 
 void GrResourceCache::addResource(const GrResourceKey& key,
-                                  GrResource* resource,
+                                  GrCacheable* resource,
                                   uint32_t ownershipFlags) {
     SkASSERT(NULL == resource->getCacheEntry());
     // we don't expect to create new resources during a purge. In theory
@@ -208,7 +236,7 @@ void GrResourceCache::addResource(const GrResourceKey& key,
     SkASSERT(!fPurging);
     GrAutoResourceCacheValidate atcv(this);
 
-    GrResourceEntry* entry = SkNEW_ARGS(GrResourceEntry, (key, resource));
+    GrResourceCacheEntry* entry = SkNEW_ARGS(GrResourceCacheEntry, (this, key, resource));
     resource->setCacheEntry(entry);
 
     this->attachToHead(entry);
@@ -220,9 +248,12 @@ void GrResourceCache::addResource(const GrResourceKey& key,
 
 }
 
-void GrResourceCache::makeExclusive(GrResourceEntry* entry) {
+void GrResourceCache::makeExclusive(GrResourceCacheEntry* entry) {
     GrAutoResourceCacheValidate atcv(this);
 
+    SkASSERT(!entry->fIsExclusive);
+    entry->fIsExclusive = true;
+
     // When scratch textures are detached (to hide them from future finds) they
     // still count against the resource budget
     this->internalDetach(entry, kIgnore_BudgetBehavior);
@@ -233,37 +264,59 @@ void GrResourceCache::makeExclusive(GrResourceEntry* entry) {
 #endif
 }
 
-void GrResourceCache::removeInvalidResource(GrResourceEntry* entry) {
+void GrResourceCache::removeInvalidResource(GrResourceCacheEntry* entry) {
     // If the resource went invalid while it was detached then purge it
     // This can happen when a 3D context was lost,
     // the client called GrContext::contextDestroyed() to notify Gr,
     // and then later an SkGpuDevice's destructor releases its backing
     // texture (which was invalidated at contextDestroyed time).
+    // TODO: Safely delete the GrResourceCacheEntry as well.
     fClientDetachedCount -= 1;
     fEntryCount -= 1;
-    size_t size = entry->resource()->sizeInBytes();
-    fClientDetachedBytes -= size;
-    fEntryBytes -= size;
+    fClientDetachedBytes -= entry->fCachedSize;
+    fEntryBytes -= entry->fCachedSize;
+    entry->fCachedSize = 0;
 }
 
-void GrResourceCache::makeNonExclusive(GrResourceEntry* entry) {
+void GrResourceCache::makeNonExclusive(GrResourceCacheEntry* entry) {
     GrAutoResourceCacheValidate atcv(this);
 
 #ifdef SK_DEBUG
     fExclusiveList.remove(entry);
 #endif
 
-    if (entry->resource()->isValid()) {
+    if (entry->resource()->isValidOnGpu()) {
         // Since scratch textures still count against the cache budget even
         // when they have been removed from the cache, re-adding them doesn't
         // alter the budget information.
         attachToHead(entry, kIgnore_BudgetBehavior);
         fCache.insert(entry->key(), entry);
+
+        SkASSERT(entry->fIsExclusive);
+        entry->fIsExclusive = false;
     } else {
         this->removeInvalidResource(entry);
     }
 }
 
+void GrResourceCache::didIncreaseResourceSize(const GrResourceCacheEntry* entry, size_t amountInc) {
+    fEntryBytes += amountInc;
+    if (entry->fIsExclusive) {
+        fClientDetachedBytes += amountInc;
+    }
+    this->purgeAsNeeded();
+}
+
+void GrResourceCache::didDecreaseResourceSize(const GrResourceCacheEntry* entry, size_t amountDec) {
+    fEntryBytes -= amountDec;
+    if (entry->fIsExclusive) {
+        fClientDetachedBytes -= amountDec;
+    }
+#ifdef SK_DEBUG
+    this->validate();
+#endif
+}
+
 /**
  * Destroying a resource may potentially trigger the unlock of additional
  * resources which in turn will trigger a nested purge. We block the nested
@@ -313,13 +366,13 @@ void GrResourceCache::purgeInvalidated() {
         //
         // This is complicated and confusing.  May try this in the future.  For
         // now, these resources are just LRU'd as if we never got the message.
-        while (GrResourceEntry* entry = fCache.find(invalidated[i].key, GrTFindUnreffedFunctor())) {
+        while (GrResourceCacheEntry* entry = fCache.find(invalidated[i].key, GrTFindUnreffedFunctor())) {
             this->deleteResource(entry);
         }
     }
 }
 
-void GrResourceCache::deleteResource(GrResourceEntry* entry) {
+void GrResourceCache::deleteResource(GrResourceCacheEntry* entry) {
     SkASSERT(1 == entry->fResource->getRefCnt());
 
     // remove from our cache
@@ -347,7 +400,7 @@ void GrResourceCache::internalPurge(int extraCount, size_t extraBytes) {
         // doubly linked list doesn't invalidate its data/pointers
         // outside of the specific area where a deletion occurs (e.g.,
         // in internalDetach)
-        GrResourceEntry* entry = iter.init(fList, EntryList::Iter::kTail_IterStart);
+        GrResourceCacheEntry* entry = iter.init(fList, EntryList::Iter::kTail_IterStart);
 
         while (NULL != entry) {
             GrAutoResourceCacheValidate atcv(this);
@@ -358,7 +411,7 @@ void GrResourceCache::internalPurge(int extraCount, size_t extraBytes) {
                 break;
             }
 
-            GrResourceEntry* prev = iter.prev();
+            GrResourceCacheEntry* prev = iter.prev();
             if (entry->fResource->unique()) {
                 changed = true;
                 this->deleteResource(entry);
@@ -371,7 +424,7 @@ void GrResourceCache::internalPurge(int extraCount, size_t extraBytes) {
 void GrResourceCache::purgeAllUnlocked() {
     GrAutoResourceCacheValidate atcv(this);
 
-    // we can have one GrResource holding a lock on another
+    // we can have one GrCacheable holding a lock on another
     // so we don't want to just do a simple loop kicking each
     // entry out. Instead change the budget and purge.
 
@@ -406,11 +459,11 @@ size_t GrResourceCache::countBytes(const EntryList& list) {
 
     EntryList::Iter iter;
 
-    const GrResourceEntry* entry = iter.init(const_cast<EntryList&>(list),
-                                             EntryList::Iter::kTail_IterStart);
+    const GrResourceCacheEntry* entry = iter.init(const_cast<EntryList&>(list),
+                                                  EntryList::Iter::kTail_IterStart);
 
     for ( ; NULL != entry; entry = iter.prev()) {
-        bytes += entry->resource()->sizeInBytes();
+        bytes += entry->resource()->gpuMemorySize();
     }
     return bytes;
 }
@@ -431,8 +484,8 @@ void GrResourceCache::validate() const {
     EntryList::Iter iter;
 
     // check that the exclusively held entries are okay
-    const GrResourceEntry* entry = iter.init(const_cast<EntryList&>(fExclusiveList),
-                                             EntryList::Iter::kHead_IterStart);
+    const GrResourceCacheEntry* entry = iter.init(const_cast<EntryList&>(fExclusiveList),
+                                                  EntryList::Iter::kHead_IterStart);
 
     for ( ; NULL != entry; entry = iter.next()) {
         entry->validate();
@@ -468,7 +521,7 @@ void GrResourceCache::printStats() {
 
     EntryList::Iter iter;
 
-    GrResourceEntry* entry = iter.init(fList, EntryList::Iter::kTail_IterStart);
+    GrResourceCacheEntry* entry = iter.init(fList, EntryList::Iter::kTail_IterStart);
 
     for ( ; NULL != entry; entry = iter.prev()) {
         if (entry->fResource->getRefCnt() > 1) {
diff --git a/gpu/GrResourceCache.h b/gpu/GrResourceCache.h
index a8309188..1a81fe61 100644
--- a/gpu/GrResourceCache.h
+++ b/gpu/GrResourceCache.h
@@ -18,8 +18,9 @@
 #include "SkMessageBus.h"
 #include "SkTInternalLList.h"
 
-class GrResource;
-class GrResourceEntry;
+class GrCacheable;
+class GrResourceCache;
+class GrResourceCacheEntry;
 
 class GrResourceKey {
 public:
@@ -28,11 +29,11 @@ public:
         return gDomain;
     }
 
-    /** Uniquely identifies the GrResource subclass in the key to avoid collisions
+    /** Uniquely identifies the GrCacheable subclass in the key to avoid collisions
         across resource types. */
     typedef uint8_t ResourceType;
 
-    /** Flags set by the GrResource subclass. */
+    /** Flags set by the GrCacheable subclass. */
     typedef uint8_t ResourceFlags;
 
     /** Generate a unique ResourceType */
@@ -115,12 +116,12 @@ struct GrResourceInvalidatedMessage {
 
 ///////////////////////////////////////////////////////////////////////////////
 
-class GrResourceEntry {
+class GrResourceCacheEntry {
 public:
-    GrResource* resource() const { return fResource; }
+    GrCacheable* resource() const { return fResource; }
     const GrResourceKey& key() const { return fKey; }
 
-    static const GrResourceKey& GetKey(const GrResourceEntry& e) { return e.key(); }
+    static const GrResourceKey& GetKey(const GrResourceCacheEntry& e) { return e.key(); }
     static uint32_t Hash(const GrResourceKey& key) { return key.getHash(); }
 #ifdef SK_DEBUG
     void validate() const;
@@ -128,15 +129,27 @@ public:
     void validate() const {}
 #endif
 
+    /**
+     *  Update the cached size for this entry and inform the resource cache that
+     *  it has changed. Usually invoked from GrCacheable::didChangeGpuMemorySize,
+     *  not directly from here.
+     */
+    void didChangeResourceSize();
+
 private:
-    GrResourceEntry(const GrResourceKey& key, GrResource* resource);
-    ~GrResourceEntry();
+    GrResourceCacheEntry(GrResourceCache* resourceCache,
+                         const GrResourceKey& key,
+                         GrCacheable* resource);
+    ~GrResourceCacheEntry();
 
+    GrResourceCache* fResourceCache;
     GrResourceKey    fKey;
-    GrResource*      fResource;
+    GrCacheable*     fResource;
+    size_t           fCachedSize;
+    bool             fIsExclusive;
 
     // Linked list for the LRU ordering.
-    SK_DECLARE_INTERNAL_LLIST_INTERFACE(GrResourceEntry);
+    SK_DECLARE_INTERNAL_LLIST_INTERFACE(GrResourceCacheEntry);
 
     friend class GrResourceCache;
 };
@@ -144,7 +157,7 @@ private:
 ///////////////////////////////////////////////////////////////////////////////
 
 /**
- *  Cache of GrResource objects.
+ *  Cache of GrCacheable objects.
  *
  *  These have a corresponding GrResourceKey, built from 128bits identifying the
  *  resource. Multiple resources can map to same GrResourceKey.
@@ -157,7 +170,7 @@ private:
  *  For fast searches, we maintain a hash map based on the GrResourceKey.
  *
  *  It is a goal to make the GrResourceCache the central repository and bookkeeper
- *  of all resources. It should replace the linked list of GrResources that
+ *  of all resources. It should replace the linked list of GrGpuObjects that
  *  GrGpu uses to call abandon/release.
  */
 class GrResourceCache {
@@ -233,8 +246,8 @@ public:
      *  For a resource to be completely exclusive to a caller both kNoOtherOwners
      *  and kHide must be specified.
      */
-    GrResource* find(const GrResourceKey& key,
-                     uint32_t ownershipFlags = 0);
+    GrCacheable* find(const GrResourceKey& key,
+                      uint32_t ownershipFlags = 0);
 
     /**
      *  Add the new resource to the cache (by creating a new cache entry based
@@ -248,7 +261,7 @@ public:
      *  is called.
      */
     void addResource(const GrResourceKey& key,
-                     GrResource* resource,
+                     GrCacheable* resource,
                      uint32_t ownershipFlags = 0);
 
     /**
@@ -263,18 +276,24 @@ public:
      * the cache's budget and should be made non-exclusive when exclusive access
      * is no longer needed.
      */
-    void makeExclusive(GrResourceEntry* entry);
+    void makeExclusive(GrResourceCacheEntry* entry);
 
     /**
      * Restore 'entry' so that it can be found by future searches. 'entry'
      * will also be purgeable (provided its lock count is now 0.)
      */
-    void makeNonExclusive(GrResourceEntry* entry);
+    void makeNonExclusive(GrResourceCacheEntry* entry);
+
+    /**
+     * Notify the cache that the size of a resource has changed.
+     */
+    void didIncreaseResourceSize(const GrResourceCacheEntry*, size_t amountInc);
+    void didDecreaseResourceSize(const GrResourceCacheEntry*, size_t amountDec);
 
     /**
      * Remove a resource from the cache and delete it!
      */
-    void deleteResource(GrResourceEntry* entry);
+    void deleteResource(GrResourceCacheEntry* entry);
 
     /**
      * Removes every resource in the cache that isn't locked.
@@ -310,15 +329,15 @@ private:
         kIgnore_BudgetBehavior
     };
 
-    void internalDetach(GrResourceEntry*, BudgetBehaviors behavior = kAccountFor_BudgetBehavior);
-    void attachToHead(GrResourceEntry*, BudgetBehaviors behavior = kAccountFor_BudgetBehavior);
+    void internalDetach(GrResourceCacheEntry*, BudgetBehaviors behavior = kAccountFor_BudgetBehavior);
+    void attachToHead(GrResourceCacheEntry*, BudgetBehaviors behavior = kAccountFor_BudgetBehavior);
 
-    void removeInvalidResource(GrResourceEntry* entry);
+    void removeInvalidResource(GrResourceCacheEntry* entry);
 
-    GrTMultiMap<GrResourceEntry, GrResourceKey> fCache;
+    GrTMultiMap<GrResourceCacheEntry, GrResourceKey> fCache;
 
     // We're an internal doubly linked list
-    typedef SkTInternalLList<GrResourceEntry> EntryList;
+    typedef SkTInternalLList<GrResourceCacheEntry> EntryList;
     EntryList      fList;
 
 #ifdef SK_DEBUG
@@ -356,7 +375,7 @@ private:
     void purgeInvalidated();
 
 #ifdef SK_DEBUG
-    static size_t countBytes(const SkTInternalLList<GrResourceEntry>& list);
+    static size_t countBytes(const SkTInternalLList<GrResourceCacheEntry>& list);
 #endif
 };
 
diff --git a/gpu/GrStencil.cpp b/gpu/GrStencil.cpp
index 76772608..f37aa317 100644
--- a/gpu/GrStencil.cpp
+++ b/gpu/GrStencil.cpp
@@ -389,7 +389,7 @@ bool GrStencilSettings::GetClipPasses(
             }
             break;
         default:
-            GrCrash("Unknown set op");
+            SkFAIL("Unknown set op");
     }
     return false;
 }
diff --git a/gpu/GrStencilBuffer.h b/gpu/GrStencilBuffer.h
index 37d40f16..696ba839 100644
--- a/gpu/GrStencilBuffer.h
+++ b/gpu/GrStencilBuffer.h
@@ -11,13 +11,12 @@
 #define GrStencilBuffer_DEFINED
 
 #include "GrClipData.h"
-#include "GrResource.h"
+#include "GrGpuObject.h"
 
 class GrRenderTarget;
-class GrResourceEntry;
 class GrResourceKey;
 
-class GrStencilBuffer : public GrResource {
+class GrStencilBuffer : public GrGpuObject {
 public:
     SK_DECLARE_INST_COUNT(GrStencilBuffer);
 
@@ -55,7 +54,7 @@ public:
 
 protected:
     GrStencilBuffer(GrGpu* gpu, bool isWrapped, int width, int height, int bits, int sampleCnt)
-        : GrResource(gpu, isWrapped)
+        : GrGpuObject(gpu, isWrapped)
         , fWidth(width)
         , fHeight(height)
         , fBits(bits)
@@ -75,7 +74,7 @@ private:
     SkIRect     fLastClipStackRect;
     SkIPoint    fLastClipSpaceOffset;
 
-    typedef GrResource INHERITED;
+    typedef GrGpuObject INHERITED;
 };
 
 #endif
diff --git a/gpu/GrTexture.cpp b/gpu/GrTexture.cpp
index f8515153..3186d89d 100644
--- a/gpu/GrTexture.cpp
+++ b/gpu/GrTexture.cpp
@@ -44,6 +44,33 @@ void GrTexture::internal_dispose() const {
     this->INHERITED::internal_dispose();
 }
 
+void GrTexture::dirtyMipMaps(bool mipMapsDirty) {
+    if (mipMapsDirty) {
+        if (kValid_MipMapsStatus == fMipMapsStatus) {
+            fMipMapsStatus = kAllocated_MipMapsStatus;
+        }
+    } else {
+        const bool sizeChanged = kNotAllocated_MipMapsStatus == fMipMapsStatus;
+        fMipMapsStatus = kValid_MipMapsStatus;
+        if (sizeChanged) {
+            // This must not be called until after changing fMipMapsStatus.
+            this->didChangeGpuMemorySize();
+        }
+    }
+}
+
+size_t GrTexture::gpuMemorySize() const {
+    size_t textureSize =  (size_t) fDesc.fWidth *
+                                   fDesc.fHeight *
+                                   GrBytesPerPixel(fDesc.fConfig);
+    if (kNotAllocated_MipMapsStatus != fMipMapsStatus) {
+        // We don't have to worry about the mipmaps being a different size than
+        // we'd expect because we never change fDesc.fWidth/fHeight.
+        textureSize *= 2;
+    }
+    return textureSize;
+}
+
 bool GrTexture::readPixels(int left, int top, int width, int height,
                            GrPixelConfig config, void* buffer,
                            size_t rowBytes, uint32_t pixelOpsFlags) {
diff --git a/gpu/GrTextureAccess.cpp b/gpu/GrTextureAccess.cpp
index e4b07869..91db08be 100644
--- a/gpu/GrTextureAccess.cpp
+++ b/gpu/GrTextureAccess.cpp
@@ -100,7 +100,7 @@ void GrTextureAccess::setSwizzle(const char* swizzle) {
                 fSwizzleMask |= kA_GrColorComponentFlag;
                 break;
             default:
-                GrCrash("Unexpected swizzle string character.");
+                SkFAIL("Unexpected swizzle string character.");
                 break;
         }
     }
diff --git a/gpu/GrVertexBuffer.h b/gpu/GrVertexBuffer.h
index a2bd5a1b..c3cf5348 100644
--- a/gpu/GrVertexBuffer.h
+++ b/gpu/GrVertexBuffer.h
@@ -15,8 +15,8 @@
 
 class GrVertexBuffer : public GrGeometryBuffer {
 protected:
-    GrVertexBuffer(GrGpu* gpu, bool isWrapped, size_t sizeInBytes, bool dynamic, bool cpuBacked)
-        : INHERITED(gpu, isWrapped, sizeInBytes, dynamic, cpuBacked) {}
+    GrVertexBuffer(GrGpu* gpu, bool isWrapped, size_t gpuMemorySize, bool dynamic, bool cpuBacked)
+        : INHERITED(gpu, isWrapped, gpuMemorySize, dynamic, cpuBacked) {}
 private:
     typedef GrGeometryBuffer INHERITED;
 };
diff --git a/gpu/SkGpuDevice.cpp b/gpu/SkGpuDevice.cpp
index 4af16109..517f082a 100644
--- a/gpu/SkGpuDevice.cpp
+++ b/gpu/SkGpuDevice.cpp
@@ -190,7 +190,9 @@ void SkGpuDevice::initFromRenderTarget(GrContext* context,
     fContext = context;
     fContext->ref();
 
-    fMainTextContext = SkNEW_ARGS(GrDistanceFieldTextContext, (fContext, fLeakyProperties));
+    bool useDFFonts = !!(flags & kDFFonts_Flag);
+    fMainTextContext = SkNEW_ARGS(GrDistanceFieldTextContext, (fContext, fLeakyProperties,
+                                                               useDFFonts));
     fFallbackTextContext = SkNEW_ARGS(GrBitmapTextContext, (fContext, fLeakyProperties));
 
     fRenderTarget = NULL;
@@ -1289,6 +1291,11 @@ void SkGpuDevice::drawTiledBitmap(const SkBitmap& bitmap,
                                   SkCanvas::DrawBitmapRectFlags flags,
                                   int tileSize,
                                   bool bicubic) {
+    // The following pixel lock is technically redundant, but it is desirable
+    // to lock outside of the tile loop to prevent redecoding the whole image
+    // at each tile in cases where 'bitmap' holds an SkDiscardablePixelRef that
+    // is larger than the limit of the discardable memory pool.
+    SkAutoLockPixels alp(bitmap);
     SkRect clippedSrcRect = SkRect::Make(clippedSrcIRect);
 
     int nx = bitmap.width() / tileSize;
@@ -1903,18 +1910,10 @@ SkSurface* SkGpuDevice::newSurface(const SkImageInfo& info) {
     return SkSurface::NewRenderTarget(fContext, info, fRenderTarget->numSamples());
 }
 
-// In the future this may not be a static method if we need to incorporate the
-// clip and matrix state into the key
-SkPicture::AccelData::Key SkGpuDevice::ComputeAccelDataKey() {
-    static const SkPicture::AccelData::Key gGPUID = SkPicture::AccelData::GenerateDomain();
-
-    return gGPUID;
-}
-
 void SkGpuDevice::EXPERIMENTAL_optimize(SkPicture* picture) {
-    SkPicture::AccelData::Key key = ComputeAccelDataKey();
+    SkPicture::AccelData::Key key = GPUAccelData::ComputeAccelDataKey();
 
-    GPUAccelData* data = SkNEW_ARGS(GPUAccelData, (key));
+    SkAutoTUnref<GPUAccelData> data(SkNEW_ARGS(GPUAccelData, (key)));
 
     picture->EXPERIMENTAL_addAccelData(data);
 
@@ -1927,7 +1926,7 @@ void SkGpuDevice::EXPERIMENTAL_purge(SkPicture* picture) {
 
 bool SkGpuDevice::EXPERIMENTAL_drawPicture(SkCanvas* canvas, SkPicture* picture) {
 
-    SkPicture::AccelData::Key key = ComputeAccelDataKey();
+    SkPicture::AccelData::Key key = GPUAccelData::ComputeAccelDataKey();
 
     const SkPicture::AccelData* data = picture->EXPERIMENTAL_getAccelData(key);
     if (NULL == data) {
@@ -1936,27 +1935,6 @@ bool SkGpuDevice::EXPERIMENTAL_drawPicture(SkCanvas* canvas, SkPicture* picture)
 
     const GPUAccelData *gpuData = static_cast<const GPUAccelData*>(data);
 
-//#define SK_PRINT_PULL_FORWARD_INFO 1
-
-#ifdef SK_PRINT_PULL_FORWARD_INFO
-    static bool gPrintedAccelData = false;
-
-    if (!gPrintedAccelData) {
-        for (int i = 0; i < gpuData->numSaveLayers(); ++i) {
-            const GPUAccelData::SaveLayerInfo& info = gpuData->saveLayerInfo(i);
-
-            SkDebugf("%d: Width: %d Height: %d SL: %d R: %d hasNestedLayers: %s\n",
-                                            i,
-                                            info.fSize.fWidth,
-                                            info.fSize.fHeight,
-                                            info.fSaveLayerOpID,
-                                            info.fRestoreOpID,
-                                            info.fHasNestedLayers ? "T" : "F");
-        }
-        gPrintedAccelData = true;
-    }
-#endif
-
     SkAutoTArray<bool> pullForward(gpuData->numSaveLayers());
     for (int i = 0; i < gpuData->numSaveLayers(); ++i) {
         pullForward[i] = false;
@@ -1977,10 +1955,6 @@ bool SkGpuDevice::EXPERIMENTAL_drawPicture(SkCanvas* canvas, SkPicture* picture)
 
     const SkPicture::OperationList& ops = picture->EXPERIMENTAL_getActiveOps(clip);
 
-#ifdef SK_PRINT_PULL_FORWARD_INFO
-    SkDebugf("rect: %d %d %d %d\n", clip.fLeft, clip.fTop, clip.fRight, clip.fBottom);
-#endif
-
     for (int i = 0; i < ops.numOps(); ++i) {
         for (int j = 0; j < gpuData->numSaveLayers(); ++j) {
             const GPUAccelData::SaveLayerInfo& info = gpuData->saveLayerInfo(j);
@@ -1991,17 +1965,5 @@ bool SkGpuDevice::EXPERIMENTAL_drawPicture(SkCanvas* canvas, SkPicture* picture)
         }
     }
 
-#ifdef SK_PRINT_PULL_FORWARD_INFO
-    SkDebugf("Need SaveLayers: ");
-    for (int i = 0; i < gpuData->numSaveLayers(); ++i) {
-        if (pullForward[i]) {
-            const GrCachedLayer* layer = fContext->getLayerCache()->findLayerOrCreate(picture, i);
-
-            SkDebugf("%d (%d), ", i, layer->layerID());
-        }
-    }
-    SkDebugf("\n");
-#endif
-
     return false;
 }
diff --git a/gpu/SkGrFontScaler.cpp b/gpu/SkGrFontScaler.cpp
index c0be4ff7..44856906 100644
--- a/gpu/SkGrFontScaler.cpp
+++ b/gpu/SkGrFontScaler.cpp
@@ -185,7 +185,7 @@ bool SkGrFontScaler::getPackedGlyphImage(GrGlyph::PackedID packed,
                 break;
             }
             default:
-                GrCrash("Invalid GrMaskFormat");
+                SkFAIL("Invalid GrMaskFormat");
         }
     } else if (srcRB == dstRB) {
         memcpy(dst, src, dstRB * height);
diff --git a/gpu/SkGrPixelRef.cpp b/gpu/SkGrPixelRef.cpp
index 18fefcc7..fd21f107 100644
--- a/gpu/SkGrPixelRef.cpp
+++ b/gpu/SkGrPixelRef.cpp
@@ -167,7 +167,7 @@ SkPixelRef* SkGrPixelRef::deepCopy(SkBitmap::Config dstConfig, const SkIRect* su
 }
 
 bool SkGrPixelRef::onReadPixels(SkBitmap* dst, const SkIRect* subset) {
-    if (NULL == fSurface || !fSurface->isValid()) {
+    if (NULL == fSurface || fSurface->wasDestroyed()) {
         return false;
     }
 
diff --git a/gpu/effects/GrBezierEffect.cpp b/gpu/effects/GrBezierEffect.cpp
index 78633e59..862c1d2c 100644
--- a/gpu/effects/GrBezierEffect.cpp
+++ b/gpu/effects/GrBezierEffect.cpp
@@ -109,7 +109,7 @@ void GrGLConicEffect::emitCode(GrGLFullShaderBuilder* builder,
             break;
         }
         default:
-            GrCrash("Shouldn't get here");
+            SkFAIL("Shouldn't get here");
     }
 
     builder->fsCodeAppendf("\t%s = %s;\n", outputColor,
@@ -244,7 +244,7 @@ void GrGLQuadEffect::emitCode(GrGLFullShaderBuilder* builder,
             break;
         }
         default:
-            GrCrash("Shouldn't get here");
+            SkFAIL("Shouldn't get here");
     }
 
     builder->fsCodeAppendf("\t%s = %s;\n", outputColor,
@@ -395,7 +395,7 @@ void GrGLCubicEffect::emitCode(GrGLFullShaderBuilder* builder,
             break;
         }
         default:
-            GrCrash("Shouldn't get here");
+            SkFAIL("Shouldn't get here");
     }
 
     builder->fsCodeAppendf("\t%s = %s;\n", outputColor,
diff --git a/gpu/effects/GrConfigConversionEffect.cpp b/gpu/effects/GrConfigConversionEffect.cpp
index 9b342fb4..f33ad239 100644
--- a/gpu/effects/GrConfigConversionEffect.cpp
+++ b/gpu/effects/GrConfigConversionEffect.cpp
@@ -61,7 +61,7 @@ public:
                         outputColor, outputColor, outputColor, swiz, outputColor, outputColor);
                     break;
                 default:
-                    GrCrash("Unknown conversion op.");
+                    SkFAIL("Unknown conversion op.");
                     break;
             }
         }
diff --git a/gpu/effects/GrConvolutionEffect.cpp b/gpu/effects/GrConvolutionEffect.cpp
index 57cdece9..aad7c878 100644
--- a/gpu/effects/GrConvolutionEffect.cpp
+++ b/gpu/effects/GrConvolutionEffect.cpp
@@ -118,7 +118,7 @@ void GrGLConvolutionEffect::setData(const GrGLUniformManager& uman,
             imageIncrement[1] = ySign / texture.height();
             break;
         default:
-            GrCrash("Unknown filter direction.");
+            SkFAIL("Unknown filter direction.");
     }
     uman.set2fv(fImageIncrementUni, 1, imageIncrement);
     if (conv.useBounds()) {
diff --git a/gpu/effects/GrOvalEffect.cpp b/gpu/effects/GrOvalEffect.cpp
index 40870e27..f2ee2788 100644
--- a/gpu/effects/GrOvalEffect.cpp
+++ b/gpu/effects/GrOvalEffect.cpp
@@ -342,7 +342,7 @@ void GLEllipseEffect::emitCode(GrGLShaderBuilder* builder,
             builder->fsCodeAppend("\t\tfloat alpha = approx_dist > 0.0 ? 1.0 : 0.0;\n");
             break;
         case kHairlineAA_GrEffectEdgeType:
-            GrCrash("Hairline not expected here.");
+            SkFAIL("Hairline not expected here.");
     }
 
     builder->fsCodeAppendf("\t\t%s = %s;\n", outputColor,
diff --git a/gpu/effects/GrRRectEffect.cpp b/gpu/effects/GrRRectEffect.cpp
index bc790574..11d8a18e 100644
--- a/gpu/effects/GrRRectEffect.cpp
+++ b/gpu/effects/GrRRectEffect.cpp
@@ -369,7 +369,7 @@ void GLCircularRRectEffect::setData(const GrGLUniformManager& uman,
                 rect.fBottom -= radius;
                 break;
             default:
-                GrCrash("Should have been one of the above cases.");
+                SkFAIL("Should have been one of the above cases.");
         }
         uman.set4f(fInnerRectUniform, rect.fLeft, rect.fTop, rect.fRight, rect.fBottom);
         uman.set1f(fRadiusPlusHalfUniform, radius + 0.5f);
@@ -566,7 +566,7 @@ void GLEllipticalRRectEffect::emitCode(GrGLShaderBuilder* builder,
             break;
         }
         default:
-            GrCrash("RRect should always be simple or nine-patch.");
+            SkFAIL("RRect should always be simple or nine-patch.");
     }
     // implicit is the evaluation of (x/a)^2 + (y/b)^2 - 1.
     builder->fsCodeAppend("\t\tfloat implicit = dot(Z, dxy) - 1.0;\n");
@@ -623,7 +623,7 @@ void GLEllipticalRRectEffect::setData(const GrGLUniformManager& uman,
                 break;
             }
         default:
-            GrCrash("RRect should always be simple or nine-patch.");
+            SkFAIL("RRect should always be simple or nine-patch.");
         }
         uman.set4f(fInnerRectUniform, rect.fLeft, rect.fTop, rect.fRight, rect.fBottom);
         fPrevRRect = rrect;
diff --git a/gpu/gl/GrGLAssembleInterface.cpp b/gpu/gl/GrGLAssembleInterface.cpp
index aed11e53..e4337259 100644
--- a/gpu/gl/GrGLAssembleInterface.cpp
+++ b/gpu/gl/GrGLAssembleInterface.cpp
@@ -173,6 +173,11 @@ const GrGLInterface* GrGLAssembleGLInterface(void* ctx, GrGLGetProc get) {
         GET_PROC(DeleteVertexArrays);
     }
 
+    if (glVer >= GR_GL_VER(3,0) || extensions.has("GL_ARB_map_buffer_range")) {
+        GET_PROC(MapBufferRange);
+        GET_PROC(FlushMappedBufferRange);
+    }
+
     // First look for GL3.0 FBO or GL_ARB_framebuffer_object (same since
     // GL_ARB_framebuffer_object doesn't use ARB suffix.)
     if (glVer >= GR_GL_VER(3,0) || extensions.has("GL_ARB_framebuffer_object")) {
diff --git a/gpu/gl/GrGLBufferImpl.cpp b/gpu/gl/GrGLBufferImpl.cpp
index 3c75b9fe..46e1f797 100644
--- a/gpu/gl/GrGLBufferImpl.cpp
+++ b/gpu/gl/GrGLBufferImpl.cpp
@@ -26,20 +26,22 @@ GrGLBufferImpl::GrGLBufferImpl(GrGpuGL* gpu, const Desc& desc, GrGLenum bufferTy
     , fLockPtr(NULL) {
     if (0 == desc.fID) {
         fCPUData = sk_malloc_flags(desc.fSizeInBytes, SK_MALLOC_THROW);
+        fGLSizeInBytes = 0;
     } else {
         fCPUData = NULL;
+        // We assume that the GL buffer was created at the desc's size initially.
+        fGLSizeInBytes = fDesc.fSizeInBytes;
     }
     VALIDATE();
 }
 
 void GrGLBufferImpl::release(GrGpuGL* gpu) {
+    VALIDATE();
     // make sure we've not been abandoned or already released
     if (NULL != fCPUData) {
-        VALIDATE();
         sk_free(fCPUData);
         fCPUData = NULL;
     } else if (fDesc.fID && !fDesc.fIsWrapped) {
-        VALIDATE();
         GL_CALL(gpu, DeleteBuffers(1, &fDesc.fID));
         if (GR_GL_ARRAY_BUFFER == fBufferType) {
             gpu->notifyVertexBufferDelete(fDesc.fID);
@@ -48,15 +50,19 @@ void GrGLBufferImpl::release(GrGpuGL* gpu) {
             gpu->notifyIndexBufferDelete(fDesc.fID);
         }
         fDesc.fID = 0;
+        fGLSizeInBytes = 0;
     }
     fLockPtr = NULL;
+    VALIDATE();
 }
 
 void GrGLBufferImpl::abandon() {
     fDesc.fID = 0;
+    fGLSizeInBytes = 0;
     fLockPtr = NULL;
     sk_free(fCPUData);
     fCPUData = NULL;
+    VALIDATE();
 }
 
 void GrGLBufferImpl::bind(GrGpuGL* gpu) const {
@@ -67,6 +73,7 @@ void GrGLBufferImpl::bind(GrGpuGL* gpu) const {
         SkASSERT(GR_GL_ELEMENT_ARRAY_BUFFER == fBufferType);
         gpu->bindIndexBufferAndDefaultVertexArray(fDesc.fID);
     }
+    VALIDATE();
 }
 
 void* GrGLBufferImpl::lock(GrGpuGL* gpu) {
@@ -74,17 +81,55 @@ void* GrGLBufferImpl::lock(GrGpuGL* gpu) {
     SkASSERT(!this->isLocked());
     if (0 == fDesc.fID) {
         fLockPtr = fCPUData;
-    } else if (gpu->caps()->bufferLockSupport()) {
-        this->bind(gpu);
-        // Let driver know it can discard the old data
-        GL_CALL(gpu, BufferData(fBufferType,
-                                (GrGLsizeiptr) fDesc.fSizeInBytes,
-                                NULL,
-                                fDesc.fDynamic ? DYNAMIC_USAGE_PARAM : GR_GL_STATIC_DRAW));
-        GR_GL_CALL_RET(gpu->glInterface(),
-                       fLockPtr,
-                       MapBuffer(fBufferType, GR_GL_WRITE_ONLY));
+    } else {
+        switch (gpu->glCaps().mapBufferType()) {
+            case GrGLCaps::kNone_MapBufferType:
+                VALIDATE();
+                return NULL;
+            case GrGLCaps::kMapBuffer_MapBufferType:
+                this->bind(gpu);
+                // Let driver know it can discard the old data
+                if (GR_GL_USE_BUFFER_DATA_NULL_HINT || fDesc.fSizeInBytes != fGLSizeInBytes) {
+                    fGLSizeInBytes = fDesc.fSizeInBytes;
+                    GL_CALL(gpu,
+                            BufferData(fBufferType, fGLSizeInBytes, NULL,
+                                       fDesc.fDynamic ? DYNAMIC_USAGE_PARAM : GR_GL_STATIC_DRAW));
+                }
+                GR_GL_CALL_RET(gpu->glInterface(), fLockPtr,
+                               MapBuffer(fBufferType, GR_GL_WRITE_ONLY));
+                break;
+            case GrGLCaps::kMapBufferRange_MapBufferType: {
+                this->bind(gpu);
+                // Make sure the GL buffer size agrees with fDesc before mapping.
+                if (fDesc.fSizeInBytes != fGLSizeInBytes) {
+                    fGLSizeInBytes = fDesc.fSizeInBytes;
+                    GL_CALL(gpu,
+                            BufferData(fBufferType, fGLSizeInBytes, NULL,
+                                       fDesc.fDynamic ? DYNAMIC_USAGE_PARAM : GR_GL_STATIC_DRAW));
+                }
+                static const GrGLbitfield kAccess = GR_GL_MAP_INVALIDATE_BUFFER_BIT |
+                                                    GR_GL_MAP_WRITE_BIT;
+                GR_GL_CALL_RET(gpu->glInterface(),
+                               fLockPtr,
+                               MapBufferRange(fBufferType, 0, fGLSizeInBytes, kAccess));
+                break;
+            }
+            case GrGLCaps::kChromium_MapBufferType:
+                this->bind(gpu);
+                // Make sure the GL buffer size agrees with fDesc before mapping.
+                if (fDesc.fSizeInBytes != fGLSizeInBytes) {
+                    fGLSizeInBytes = fDesc.fSizeInBytes;
+                    GL_CALL(gpu,
+                            BufferData(fBufferType, fGLSizeInBytes, NULL,
+                                       fDesc.fDynamic ? DYNAMIC_USAGE_PARAM : GR_GL_STATIC_DRAW));
+                }
+                GR_GL_CALL_RET(gpu->glInterface(),
+                               fLockPtr,
+                               MapBufferSubData(fBufferType, 0, fGLSizeInBytes, GR_GL_WRITE_ONLY));
+                break;
+        }
     }
+    VALIDATE();
     return fLockPtr;
 }
 
@@ -92,9 +137,20 @@ void GrGLBufferImpl::unlock(GrGpuGL* gpu) {
     VALIDATE();
     SkASSERT(this->isLocked());
     if (0 != fDesc.fID) {
-        SkASSERT(gpu->caps()->bufferLockSupport());
-        this->bind(gpu);
-        GL_CALL(gpu, UnmapBuffer(fBufferType));
+        switch (gpu->glCaps().mapBufferType()) {
+            case GrGLCaps::kNone_MapBufferType:
+                SkDEBUGFAIL("Shouldn't get here.");
+                return;
+            case GrGLCaps::kMapBuffer_MapBufferType: // fall through
+            case GrGLCaps::kMapBufferRange_MapBufferType:
+                this->bind(gpu);
+                GL_CALL(gpu, UnmapBuffer(fBufferType));
+                break;
+            case GrGLCaps::kChromium_MapBufferType:
+                this->bind(gpu);
+                GR_GL_CALL(gpu->glInterface(), UnmapBufferSubData(fLockPtr));
+                break;
+        }
     }
     fLockPtr = NULL;
 }
@@ -127,7 +183,8 @@ bool GrGLBufferImpl::updateData(GrGpuGL* gpu, const void* src, size_t srcSizeInB
         // draws that reference the old contents. With this hint it can
         // assign a different allocation for the new contents to avoid
         // flushing the gpu past draws consuming the old contents.
-        GL_CALL(gpu, BufferData(fBufferType, (GrGLsizeiptr) fDesc.fSizeInBytes, NULL, usage));
+        fGLSizeInBytes = fDesc.fSizeInBytes;
+        GL_CALL(gpu, BufferData(fBufferType, fGLSizeInBytes, NULL, usage));
         GL_CALL(gpu, BufferSubData(fBufferType, 0, (GrGLsizeiptr) srcSizeInBytes, src));
     }
 #else
@@ -147,10 +204,12 @@ bool GrGLBufferImpl::updateData(GrGpuGL* gpu, const void* src, size_t srcSizeInB
         // Chromium's command buffer may turn a glBufferSubData where the size
         // exactly matches the buffer size into a glBufferData. So we tack 1
         // extra byte onto the glBufferData.
-        GL_CALL(gpu, BufferData(fBufferType, srcSizeInBytes + 1, NULL, usage));
+        fGLSizeInBytes = srcSizeInBytes + 1;
+        GL_CALL(gpu, BufferData(fBufferType, fGLSizeInBytes, NULL, usage));
         GL_CALL(gpu, BufferSubData(fBufferType, 0, srcSizeInBytes, src));
     } else {
-        GL_CALL(gpu, BufferData(fBufferType, srcSizeInBytes, src, usage));
+        fGLSizeInBytes = srcSizeInBytes;
+        GL_CALL(gpu, BufferData(fBufferType, fGLSizeInBytes, src, usage));
     }
 #endif
     return true;
@@ -161,5 +220,7 @@ void GrGLBufferImpl::validate() const {
     // The following assert isn't valid when the buffer has been abandoned:
     // SkASSERT((0 == fDesc.fID) == (NULL != fCPUData));
     SkASSERT(0 != fDesc.fID || !fDesc.fIsWrapped);
+    SkASSERT(NULL == fCPUData || 0 == fGLSizeInBytes);
+    SkASSERT(NULL == fLockPtr || NULL != fCPUData || fGLSizeInBytes == fDesc.fSizeInBytes);
     SkASSERT(NULL == fCPUData || NULL == fLockPtr || fCPUData == fLockPtr);
 }
diff --git a/gpu/gl/GrGLBufferImpl.h b/gpu/gl/GrGLBufferImpl.h
index 148ca1b2..19d23e0d 100644
--- a/gpu/gl/GrGLBufferImpl.h
+++ b/gpu/gl/GrGLBufferImpl.h
@@ -53,6 +53,8 @@ private:
     GrGLenum     fBufferType; // GL_ARRAY_BUFFER or GL_ELEMENT_ARRAY_BUFFER
     void*        fCPUData;
     void*        fLockPtr;
+    size_t       fGLSizeInBytes;     // In certain cases we make the size of the GL buffer object
+                                     // smaller or larger than the size in fDesc.
 
     typedef SkNoncopyable INHERITED;
 };
diff --git a/gpu/gl/GrGLCaps.cpp b/gpu/gl/GrGLCaps.cpp
index 501411c0..f577e9d7 100644
--- a/gpu/gl/GrGLCaps.cpp
+++ b/gpu/gl/GrGLCaps.cpp
@@ -24,6 +24,7 @@ void GrGLCaps::reset() {
     fMSFBOType = kNone_MSFBOType;
     fFBFetchType = kNone_FBFetchType;
     fInvalidateFBType = kNone_InvalidateFBType;
+    fMapBufferType = kNone_MapBufferType;
     fMaxFragmentUniformVectors = 0;
     fMaxVertexAttributes = 0;
     fMaxFragmentTextureUnits = 0;
@@ -47,7 +48,6 @@ void GrGLCaps::reset() {
     fIsCoreProfile = false;
     fFullClearIsFree = false;
     fDropsTileOnZeroDivide = false;
-    fMapSubSupport = false;
 }
 
 GrGLCaps::GrGLCaps(const GrGLCaps& caps) : GrDrawTargetCaps() {
@@ -66,6 +66,7 @@ GrGLCaps& GrGLCaps::operator= (const GrGLCaps& caps) {
     fMSFBOType = caps.fMSFBOType;
     fFBFetchType = caps.fFBFetchType;
     fInvalidateFBType = caps.fInvalidateFBType;
+    fMapBufferType = caps.fMapBufferType;
     fRGBA8RenderbufferSupport = caps.fRGBA8RenderbufferSupport;
     fBGRAFormatSupport = caps.fBGRAFormatSupport;
     fBGRAIsInternalFormat = caps.fBGRAIsInternalFormat;
@@ -85,7 +86,6 @@ GrGLCaps& GrGLCaps::operator= (const GrGLCaps& caps) {
     fIsCoreProfile = caps.fIsCoreProfile;
     fFullClearIsFree = caps.fFullClearIsFree;
     fDropsTileOnZeroDivide = caps.fDropsTileOnZeroDivide;
-    fMapSubSupport = caps.fMapSubSupport;
 
     return *this;
 }
@@ -290,12 +290,27 @@ bool GrGLCaps::init(const GrGLContextInfo& ctxInfo, const GrGLInterface* gli) {
     }
 
     if (kGL_GrGLStandard == standard) {
-        fBufferLockSupport = true; // we require VBO support and the desktop VBO extension includes
-                                   // glMapBuffer.
-        fMapSubSupport = false;
+        fMapBufferFlags = kCanMap_MapFlag; // we require VBO support and the desktop VBO
+                                            // extension includes glMapBuffer.
+        if (version >= GR_GL_VER(3, 0) || ctxInfo.hasExtension("GL_ARB_map_buffer_range")) {
+            fMapBufferFlags |= kSubset_MapFlag;
+            fMapBufferType = kMapBufferRange_MapBufferType;
+        } else {
+            fMapBufferType = kMapBuffer_MapBufferType;
+        }
     } else {
-        fBufferLockSupport = ctxInfo.hasExtension("GL_OES_mapbuffer");
-        fMapSubSupport = ctxInfo.hasExtension("GL_CHROMIUM_map_sub");
+        // Unextended GLES2 doesn't have any buffer mapping.
+        fMapBufferFlags = kNone_MapBufferType;
+        if (ctxInfo.hasExtension("GL_CHROMIUM_map_sub")) {
+            fMapBufferFlags = kCanMap_MapFlag | kSubset_MapFlag;
+            fMapBufferType = kChromium_MapBufferType;
+        } else if (version >= GR_GL_VER(3, 0) || ctxInfo.hasExtension("GL_EXT_map_buffer_range")) {
+            fMapBufferFlags = kCanMap_MapFlag | kSubset_MapFlag;
+            fMapBufferType = kMapBufferRange_MapBufferType;
+        } else if (ctxInfo.hasExtension("GL_OES_mapbuffer")) {
+            fMapBufferFlags = kCanMap_MapFlag;
+            fMapBufferType = kMapBuffer_MapBufferType;
+        }
     }
 
     if (kGL_GrGLStandard == standard) {
@@ -579,7 +594,7 @@ void GrGLCaps::markColorConfigAndStencilFormatAsVerified(
             return;
         }
     }
-    GrCrash("Why are we seeing a stencil format that "
+    SkFAIL("Why are we seeing a stencil format that "
             "GrGLCaps doesn't know about.");
 }
 
@@ -600,7 +615,7 @@ bool GrGLCaps::isColorConfigAndStencilFormatVerified(
             return fStencilVerifiedColorConfigs[i].isVerified(config);
         }
     }
-    GrCrash("Why are we seeing a stencil format that "
+    SkFAIL("Why are we seeing a stencil format that "
             "GLCaps doesn't know about.");
     return false;
 }
@@ -655,10 +670,23 @@ SkString GrGLCaps::dump() const {
     GR_STATIC_ASSERT(2 == kInvalidate_InvalidateFBType);
     GR_STATIC_ASSERT(SK_ARRAY_COUNT(kInvalidateFBTypeStr) == kLast_InvalidateFBType + 1);
 
+    static const char* kMapBufferTypeStr[] = {
+        "None",
+        "MapBuffer",
+        "MapBufferRange",
+        "Chromium",
+    };
+    GR_STATIC_ASSERT(0 == kNone_MapBufferType);
+    GR_STATIC_ASSERT(1 == kMapBuffer_MapBufferType);
+    GR_STATIC_ASSERT(2 == kMapBufferRange_MapBufferType);
+    GR_STATIC_ASSERT(3 == kChromium_MapBufferType);
+    GR_STATIC_ASSERT(SK_ARRAY_COUNT(kMapBufferTypeStr) == kLast_MapBufferType + 1);
+
     r.appendf("Core Profile: %s\n", (fIsCoreProfile ? "YES" : "NO"));
     r.appendf("MSAA Type: %s\n", kMSFBOExtStr[fMSFBOType]);
     r.appendf("FB Fetch Type: %s\n", kFBFetchTypeStr[fFBFetchType]);
     r.appendf("Invalidate FB Type: %s\n", kInvalidateFBTypeStr[fInvalidateFBType]);
+    r.appendf("Map Buffer Type: %s\n", kMapBufferTypeStr[fMapBufferType]);
     r.appendf("Max FS Uniform Vectors: %d\n", fMaxFragmentUniformVectors);
     r.appendf("Max FS Texture Units: %d\n", fMaxFragmentTextureUnits);
     if (!fIsCoreProfile) {
diff --git a/gpu/gl/GrGLCaps.h b/gpu/gl/GrGLCaps.h
index 48925d48..ea0f4124 100644
--- a/gpu/gl/GrGLCaps.h
+++ b/gpu/gl/GrGLCaps.h
@@ -86,6 +86,15 @@ public:
         kLast_InvalidateFBType = kInvalidate_InvalidateFBType
     };
 
+    enum MapBufferType {
+        kNone_MapBufferType,
+        kMapBuffer_MapBufferType,         // glMapBuffer()
+        kMapBufferRange_MapBufferType,    // glMapBufferRange()
+        kChromium_MapBufferType,          // GL_CHROMIUM_map_sub
+
+        kLast_MapBufferType = kChromium_MapBufferType,
+    };
+
     /**
      * Creates a GrGLCaps that advertises no support for any extensions,
      * formats, etc. Call init to initialize from a GrGLContextInfo.
@@ -169,10 +178,8 @@ public:
 
     InvalidateFBType invalidateFBType() const { return fInvalidateFBType; }
 
-    /**
-     * Returs a string containeng the caps info.
-     */
-    virtual SkString dump() const SK_OVERRIDE;
+    /// What type of buffer mapping is supported?
+    MapBufferType mapBufferType() const { return fMapBufferType; }
 
     /**
      * Gets an array of legal stencil formats. These formats are not guaranteed
@@ -258,8 +265,10 @@ public:
 
     bool dropsTileOnZeroDivide() const { return fDropsTileOnZeroDivide; }
 
-    /// Is GL_CHROMIUM_map_sub supported?
-    bool mapSubSupport() const { return fMapSubSupport; }
+    /**
+     * Returns a string containing the caps info.
+     */
+    virtual SkString dump() const SK_OVERRIDE;
 
 private:
     /**
@@ -322,6 +331,7 @@ private:
     MSFBOType           fMSFBOType;
     FBFetchType         fFBFetchType;
     InvalidateFBType    fInvalidateFBType;
+    MapBufferType       fMapBufferType;
 
     bool fRGBA8RenderbufferSupport : 1;
     bool fBGRAFormatSupport : 1;
@@ -342,7 +352,6 @@ private:
     bool fIsCoreProfile : 1;
     bool fFullClearIsFree : 1;
     bool fDropsTileOnZeroDivide : 1;
-    bool fMapSubSupport : 1;
 
     typedef GrDrawTargetCaps INHERITED;
 };
diff --git a/gpu/gl/GrGLCreateNullInterface.cpp b/gpu/gl/GrGLCreateNullInterface.cpp
index 18a9d726..6cfa8c29 100644
--- a/gpu/gl/GrGLCreateNullInterface.cpp
+++ b/gpu/gl/GrGLCreateNullInterface.cpp
@@ -125,7 +125,7 @@ GrGLvoid GR_GL_FUNCTION_TYPE nullGLBufferData(GrGLenum target,
         id = gCurrElementArrayBuffer;
         break;
     default:
-        GrCrash("Unexpected target to nullGLBufferData");
+        SkFAIL("Unexpected target to nullGLBufferData");
         break;
     }
 
@@ -186,8 +186,29 @@ GrGLvoid GR_GL_FUNCTION_TYPE nullGLDeleteBuffers(GrGLsizei n, const GrGLuint* id
     }
 }
 
-GrGLvoid* GR_GL_FUNCTION_TYPE nullGLMapBuffer(GrGLenum target, GrGLenum access) {
+GrGLvoid* GR_GL_FUNCTION_TYPE nullGLMapBufferRange(GrGLenum target, GrGLintptr offset,
+                                                   GrGLsizeiptr length, GrGLbitfield access) {
+    GrGLuint id = 0;
+    switch (target) {
+        case GR_GL_ARRAY_BUFFER:
+            id = gCurrArrayBuffer;
+            break;
+        case GR_GL_ELEMENT_ARRAY_BUFFER:
+            id = gCurrElementArrayBuffer;
+            break;
+    }
 
+    if (id > 0) {
+        // We just ignore the offset and length here.
+        GrBufferObj* buffer = look_up(id);
+        SkASSERT(!buffer->mapped());
+        buffer->setMapped(true);
+        return buffer->dataPtr();
+    }
+    return NULL;
+}
+
+GrGLvoid* GR_GL_FUNCTION_TYPE nullGLMapBuffer(GrGLenum target, GrGLenum access) {
     GrGLuint id = 0;
     switch (target) {
         case GR_GL_ARRAY_BUFFER:
@@ -209,6 +230,11 @@ GrGLvoid* GR_GL_FUNCTION_TYPE nullGLMapBuffer(GrGLenum target, GrGLenum access)
     return NULL;            // no buffer bound to target
 }
 
+GrGLvoid GR_GL_FUNCTION_TYPE nullGLFlushMappedBufferRange(GrGLenum target,
+                                                          GrGLintptr offset,
+                                                          GrGLsizeiptr length) {}
+
+
 GrGLboolean GR_GL_FUNCTION_TYPE nullGLUnmapBuffer(GrGLenum target) {
     GrGLuint id = 0;
     switch (target) {
@@ -251,7 +277,7 @@ GrGLvoid GR_GL_FUNCTION_TYPE nullGLGetBufferParameteriv(GrGLenum target, GrGLenu
             }
             break; }
         default:
-            GrCrash("Unexpected pname to GetBufferParamateriv");
+            SkFAIL("Unexpected pname to GetBufferParamateriv");
             break;
     }
 };
@@ -304,6 +330,7 @@ const GrGLInterface* GrGLCreateNullInterface() {
     functions->fEndQuery = noOpGLEndQuery;
     functions->fFinish = noOpGLFinish;
     functions->fFlush = noOpGLFlush;
+    functions->fFlushMappedBufferRange = nullGLFlushMappedBufferRange;
     functions->fFrontFace = noOpGLFrontFace;
     functions->fGenBuffers = nullGLGenBuffers;
     functions->fGenerateMipmap = nullGLGenerateMipmap;
@@ -329,6 +356,8 @@ const GrGLInterface* GrGLCreateNullInterface() {
     functions->fInsertEventMarker = noOpGLInsertEventMarker;
     functions->fLineWidth = noOpGLLineWidth;
     functions->fLinkProgram = noOpGLLinkProgram;
+    functions->fMapBuffer = nullGLMapBuffer;
+    functions->fMapBufferRange = nullGLMapBufferRange;
     functions->fPixelStorei = nullGLPixelStorei;
     functions->fPopGroupMarker = noOpGLPopGroupMarker;
     functions->fPushGroupMarker = noOpGLPushGroupMarker;
@@ -368,6 +397,7 @@ const GrGLInterface* GrGLCreateNullInterface() {
     functions->fUniformMatrix2fv = noOpGLUniformMatrix2fv;
     functions->fUniformMatrix3fv = noOpGLUniformMatrix3fv;
     functions->fUniformMatrix4fv = noOpGLUniformMatrix4fv;
+    functions->fUnmapBuffer = nullGLUnmapBuffer;
     functions->fUseProgram = nullGLUseProgram;
     functions->fVertexAttrib4fv = noOpGLVertexAttrib4fv;
     functions->fVertexAttribPointer = noOpGLVertexAttribPointer;
@@ -387,10 +417,8 @@ const GrGLInterface* GrGLCreateNullInterface() {
     functions->fRenderbufferStorageMultisample = noOpGLRenderbufferStorageMultisample;
     functions->fBlitFramebuffer = noOpGLBlitFramebuffer;
     functions->fResolveMultisampleFramebuffer = noOpGLResolveMultisampleFramebuffer;
-    functions->fMapBuffer = nullGLMapBuffer;
     functions->fMatrixLoadf = noOpGLMatrixLoadf;
     functions->fMatrixLoadIdentity = noOpGLMatrixLoadIdentity;
-    functions->fUnmapBuffer = nullGLUnmapBuffer;
     functions->fBindFragDataLocationIndexed = noOpGLBindFragDataLocationIndexed;
 
     interface->fExtensions.init(kGL_GrGLStandard, functions->fGetString, functions->fGetStringi,
diff --git a/gpu/gl/GrGLDefines.h b/gpu/gl/GrGLDefines.h
index a4dc2f78..73f3d2e1 100644
--- a/gpu/gl/GrGLDefines.h
+++ b/gpu/gl/GrGLDefines.h
@@ -601,6 +601,14 @@
 /* Vertex Buffer Object */
 #define GR_GL_WRITE_ONLY                         0x88B9
 #define GR_GL_BUFFER_MAPPED                      0x88BC
+
+#define GR_GL_MAP_READ_BIT                       0x0001
+#define GR_GL_MAP_WRITE_BIT                      0x0002
+#define GR_GL_MAP_INVALIDATE_RANGE_BIT           0x0004
+#define GR_GL_MAP_INVALIDATE_BUFFER_BIT          0x0008
+#define GR_GL_MAP_FLUSH_EXPLICIT_BIT             0x0010
+#define GR_GL_MAP_UNSYNCHRONIZED_BIT             0x0020
+
 /* Read Format */
 #define GR_GL_IMPLEMENTATION_COLOR_READ_TYPE   0x8B9A
 #define GR_GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B
diff --git a/gpu/gl/GrGLIndexBuffer.cpp b/gpu/gl/GrGLIndexBuffer.cpp
index b6290b18..4e7f989c 100644
--- a/gpu/gl/GrGLIndexBuffer.cpp
+++ b/gpu/gl/GrGLIndexBuffer.cpp
@@ -14,7 +14,7 @@ GrGLIndexBuffer::GrGLIndexBuffer(GrGpuGL* gpu, const Desc& desc)
 }
 
 void GrGLIndexBuffer::onRelease() {
-    if (this->isValid()) {
+    if (!this->wasDestroyed()) {
         fImpl.release(this->getGpuGL());
     }
 
@@ -27,7 +27,7 @@ void GrGLIndexBuffer::onAbandon() {
 }
 
 void* GrGLIndexBuffer::lock() {
-    if (this->isValid()) {
+    if (!this->wasDestroyed()) {
         return fImpl.lock(this->getGpuGL());
     } else {
         return NULL;
@@ -39,7 +39,7 @@ void* GrGLIndexBuffer::lockPtr() const {
 }
 
 void GrGLIndexBuffer::unlock() {
-    if (this->isValid()) {
+    if (!this->wasDestroyed()) {
         fImpl.unlock(this->getGpuGL());
     }
 }
@@ -49,7 +49,7 @@ bool GrGLIndexBuffer::isLocked() const {
 }
 
 bool GrGLIndexBuffer::updateData(const void* src, size_t srcSizeInBytes) {
-    if (this->isValid()) {
+    if (!this->wasDestroyed()) {
         return fImpl.updateData(this->getGpuGL(), src, srcSizeInBytes);
     } else {
         return false;
diff --git a/gpu/gl/GrGLIndexBuffer.h b/gpu/gl/GrGLIndexBuffer.h
index 32a80860..893e3571 100644
--- a/gpu/gl/GrGLIndexBuffer.h
+++ b/gpu/gl/GrGLIndexBuffer.h
@@ -26,7 +26,7 @@ public:
     size_t baseOffset() const { return fImpl.baseOffset(); }
 
     void bind() const {
-        if (this->isValid()) {
+        if (!this->wasDestroyed()) {
             fImpl.bind(this->getGpuGL());
         }
     }
@@ -45,7 +45,7 @@ protected:
 
 private:
     GrGpuGL* getGpuGL() const {
-        SkASSERT(this->isValid());
+        SkASSERT(!this->wasDestroyed());
         return (GrGpuGL*)(this->getGpu());
     }
 
diff --git a/gpu/gl/GrGLInterface.cpp b/gpu/gl/GrGLInterface.cpp
index 7efa067d..ee184d0a 100644
--- a/gpu/gl/GrGLInterface.cpp
+++ b/gpu/gl/GrGLInterface.cpp
@@ -116,9 +116,15 @@ GrGLInterface* GrGLInterface::NewClone(const GrGLInterface* interface) {
     return clone;
 }
 
-#define RETURN_FALSE_INTERFACE                             \
-    GrDebugCrash("GrGLInterface::validate() failed.");     \
-    return false;                                          \
+#ifdef SK_DEBUG
+    static int kIsDebug = 1;
+#else
+    static int kIsDebug = 0;
+#endif
+
+#define RETURN_FALSE_INTERFACE                                                                   \
+    if (kIsDebug) { SkDebugf("%s:%d GrGLInterface::validate() failed.\n", __FILE__, __LINE__); } \
+    return false;
 
 bool GrGLInterface::validate() const {
 
@@ -480,8 +486,8 @@ bool GrGLInterface::validate() const {
         }
     }
 
-#if 0 // This can be enabled once Chromium is updated to set these functions pointers.
-    if ((kGL_GrGLStandard == fStandard) || fExtensions.has("GL_ARB_invalidate_subdata")) {
+    if ((kGL_GrGLStandard == fStandard && glVer >= GR_GL_VER(4,3)) ||
+        fExtensions.has("GL_ARB_invalidate_subdata")) {
         if (NULL == fFunctions.fInvalidateBufferData ||
             NULL == fFunctions.fInvalidateBufferSubData ||
             NULL == fFunctions.fInvalidateFramebuffer ||
@@ -490,7 +496,7 @@ bool GrGLInterface::validate() const {
             NULL == fFunctions.fInvalidateTexSubImage) {
             RETURN_FALSE_INTERFACE;
         }
-    } else if (glVer >= GR_GL_VER(3,0)) {
+    } else if (kGLES_GrGLStandard == fStandard && glVer >= GR_GL_VER(3,0)) {
         // ES 3.0 adds the framebuffer functions but not the others.
         if (NULL == fFunctions.fInvalidateFramebuffer ||
             NULL == fFunctions.fInvalidateSubFramebuffer) {
@@ -506,7 +512,15 @@ bool GrGLInterface::validate() const {
             RETURN_FALSE_INTERFACE;
         }
     }
-#endif
 
+    // These functions are added to the 3.0 version of both GLES and GL.
+    if (glVer >= GR_GL_VER(3,0) ||
+        (kGLES_GrGLStandard == fStandard && fExtensions.has("GL_EXT_map_buffer_range")) ||
+        (kGL_GrGLStandard == fStandard && fExtensions.has("GL_ARB_map_buffer_range"))) {
+        if (NULL == fFunctions.fMapBufferRange ||
+            NULL == fFunctions.fFlushMappedBufferRange) {
+            RETURN_FALSE_INTERFACE;
+        }
+    }
     return true;
 }
diff --git a/gpu/gl/GrGLNoOpInterface.cpp b/gpu/gl/GrGLNoOpInterface.cpp
index 2b84b280..a433c0e1 100644
--- a/gpu/gl/GrGLNoOpInterface.cpp
+++ b/gpu/gl/GrGLNoOpInterface.cpp
@@ -500,7 +500,7 @@ GrGLvoid GR_GL_FUNCTION_TYPE noOpGLGetIntegerv(GrGLenum pname, GrGLint* params)
             *params = SK_ARRAY_COUNT(kExtensions);
             break;
         default:
-            GrCrash("Unexpected pname to GetIntegerv");
+            SkFAIL("Unexpected pname to GetIntegerv");
    }
 }
 
@@ -529,7 +529,7 @@ GrGLvoid GR_GL_FUNCTION_TYPE noOpGLGetShaderOrProgramiv(GrGLuint program,
             break;
         // we don't expect any other pnames
         default:
-            GrCrash("Unexpected pname to GetProgramiv");
+            SkFAIL("Unexpected pname to GetProgramiv");
             break;
    }
 }
@@ -545,7 +545,7 @@ void query_result(GrGLenum GLtarget, GrGLenum pname, T *params) {
             *params = 0;
             break;
         default:
-            GrCrash("Unexpected pname passed to GetQueryObject.");
+            SkFAIL("Unexpected pname passed to GetQueryObject.");
             break;
    }
 }
@@ -562,7 +562,7 @@ GrGLvoid GR_GL_FUNCTION_TYPE noOpGLGetQueryiv(GrGLenum GLtarget,
             *params = 32;
             break;
         default:
-            GrCrash("Unexpected pname passed GetQueryiv.");
+            SkFAIL("Unexpected pname passed GetQueryiv.");
    }
 }
 
@@ -603,7 +603,7 @@ const GrGLubyte* GR_GL_FUNCTION_TYPE noOpGLGetString(GrGLenum name) {
         case GR_GL_RENDERER:
             return (const GrGLubyte*)"The Debug (Non-)Renderer";
         default:
-            GrCrash("Unexpected name passed to GetString");
+            SkFAIL("Unexpected name passed to GetString");
             return NULL;
    }
 }
@@ -617,7 +617,7 @@ const GrGLubyte* GR_GL_FUNCTION_TYPE noOpGLGetStringi(GrGLenum name, GrGLuint i)
                 return NULL;
             }
         default:
-            GrCrash("Unexpected name passed to GetStringi");
+            SkFAIL("Unexpected name passed to GetStringi");
             return NULL;
     }
 }
@@ -628,7 +628,7 @@ GrGLvoid GR_GL_FUNCTION_TYPE noOpGLGetTexLevelParameteriv(GrGLenum target,
                                                           GrGLint* params) {
     // we used to use this to query stuff about externally created textures,
     // now we just require clients to tell us everything about the texture.
-    GrCrash("Should never query texture parameters.");
+    SkFAIL("Should never query texture parameters.");
 }
 
 GrGLint GR_GL_FUNCTION_TYPE noOpGLGetUniformLocation(GrGLuint program, const char* name) {
diff --git a/gpu/gl/GrGLPath.h b/gpu/gl/GrGLPath.h
index 3647d4d6..3409547b 100644
--- a/gpu/gl/GrGLPath.h
+++ b/gpu/gl/GrGLPath.h
@@ -27,7 +27,7 @@ public:
     GrGLuint pathID() const { return fPathID; }
     // TODO: Figure out how to get an approximate size of the path in Gpu
     // memory.
-    virtual size_t sizeInBytes() const SK_OVERRIDE { return 100; }
+    virtual size_t gpuMemorySize() const SK_OVERRIDE { return 100; }
 
 protected:
     virtual void onRelease() SK_OVERRIDE;
diff --git a/gpu/gl/GrGLProgram.cpp b/gpu/gl/GrGLProgram.cpp
index 9b997c85..aa46aeda 100644
--- a/gpu/gl/GrGLProgram.cpp
+++ b/gpu/gl/GrGLProgram.cpp
@@ -89,7 +89,7 @@ void GrGLProgram::overrideBlend(GrBlendCoeff* srcCoeff,
             SkASSERT(kOne_GrBlendCoeff == *srcCoeff && kZero_GrBlendCoeff == *dstCoeff);
             break;
         default:
-            GrCrash("Unexpected coverage output");
+            SkFAIL("Unexpected coverage output");
             break;
     }
 }
@@ -270,7 +270,7 @@ void GrGLProgram::setColor(const GrDrawState& drawState,
                 sharedState->fConstAttribColorIndex = -1;
                 break;
             default:
-                GrCrash("Unknown color type.");
+                SkFAIL("Unknown color type.");
         }
     } else {
         sharedState->fConstAttribColorIndex = -1;
@@ -309,7 +309,7 @@ void GrGLProgram::setCoverage(const GrDrawState& drawState,
                 sharedState->fConstAttribCoverageIndex = -1;
                 break;
             default:
-                GrCrash("Unknown coverage type.");
+                SkFAIL("Unknown coverage type.");
         }
     } else {
         sharedState->fConstAttribCoverageIndex = -1;
diff --git a/gpu/gl/GrGLProgramEffects.cpp b/gpu/gl/GrGLProgramEffects.cpp
index 1695a8e3..04cebf85 100644
--- a/gpu/gl/GrGLProgramEffects.cpp
+++ b/gpu/gl/GrGLProgramEffects.cpp
@@ -341,7 +341,7 @@ void GrGLVertexProgramEffects::emitTransforms(GrGLFullShaderBuilder* builder,
                 varyingType = kVec3f_GrSLType;
                 break;
             default:
-                GrCrash("Unexpected key.");
+                SkFAIL("Unexpected key.");
         }
         SkString suffixedUniName;
         if (kVoid_GrSLType != transforms[t].fType) {
@@ -393,7 +393,7 @@ void GrGLVertexProgramEffects::emitTransforms(GrGLFullShaderBuilder* builder,
                 break;
             }
             default:
-                GrCrash("Unexpected uniform type.");
+                SkFAIL("Unexpected uniform type.");
         }
         SkNEW_APPEND_TO_TARRAY(outCoords, TransformedCoords,
                                (SkString(fsVaryingName), varyingType));
@@ -446,7 +446,7 @@ void GrGLVertexProgramEffects::setTransformData(const GrGLUniformManager& unifor
                 break;
             }
             default:
-                GrCrash("Unexpected uniform type.");
+                SkFAIL("Unexpected uniform type.");
         }
     }
 }
@@ -572,7 +572,7 @@ void GrGLPathTexGenProgramEffects::setPathTexGenState(GrGpuGL* gpu,
                 break;
             }
             default:
-                GrCrash("Unexpected matrixs type.");
+                SkFAIL("Unexpected matrixs type.");
         }
     }
 }
diff --git a/gpu/gl/GrGLSL.cpp b/gpu/gl/GrGLSL.cpp
index 7587fe8d..468b13b1 100644
--- a/gpu/gl/GrGLSL.cpp
+++ b/gpu/gl/GrGLSL.cpp
@@ -34,7 +34,7 @@ bool GrGetGLSLGeneration(const GrGLInterface* gl, GrGLSLGeneration* generation)
             *generation = k110_GrGLSLGeneration;
             return true;
         default:
-            GrCrash("Unknown GL Standard");
+            SkFAIL("Unknown GL Standard");
             return false;
     }
 }
@@ -64,7 +64,7 @@ const char* GrGetGLSLVersionDecl(const GrGLContextInfo& info) {
                 return "#version 150 compatibility\n";
             }
         default:
-            GrCrash("Unknown GL version.");
+            SkFAIL("Unknown GL version.");
             return ""; // suppress warning
     }
 }
diff --git a/gpu/gl/GrGLSL.h b/gpu/gl/GrGLSL.h
index 8234be9c..ff39c2b6 100644
--- a/gpu/gl/GrGLSL.h
+++ b/gpu/gl/GrGLSL.h
@@ -70,7 +70,7 @@ static inline const char* GrGLSLTypeString(GrSLType t) {
         case kSampler2D_GrSLType:
             return "sampler2D";
         default:
-            GrCrash("Unknown shader var type.");
+            SkFAIL("Unknown shader var type.");
             return ""; // suppress warning
     }
 }
diff --git a/gpu/gl/GrGLShaderBuilder.cpp b/gpu/gl/GrGLShaderBuilder.cpp
index b72e23f9..c5df4c80 100644
--- a/gpu/gl/GrGLShaderBuilder.cpp
+++ b/gpu/gl/GrGLShaderBuilder.cpp
@@ -187,7 +187,7 @@ bool GrGLShaderBuilder::enableFeature(GLSLFeature feature) {
             }
             return true;
         default:
-            GrCrash("Unexpected GLSLFeature requested.");
+            SkFAIL("Unexpected GLSLFeature requested.");
             return false;
     }
 }
@@ -218,7 +218,7 @@ bool GrGLShaderBuilder::enablePrivateFeature(GLSLPrivateFeature feature) {
                                "GL_NV_shader_framebuffer_fetch");
             return true;
         default:
-            GrCrash("Unexpected GLSLPrivateFeature requested.");
+            SkFAIL("Unexpected GLSLPrivateFeature requested.");
             return false;
     }
 }
@@ -249,7 +249,7 @@ const char* GrGLShaderBuilder::dstColor() {
     if (fCodeStage.inStageCode()) {
         const GrEffectRef& effect = *fCodeStage.effectStage()->getEffect();
         if (!effect->willReadDstColor()) {
-            GrDebugCrash("GrGLEffect asked for dst color but its generating GrEffect "
+            SkDEBUGFAIL("GrGLEffect asked for dst color but its generating GrEffect "
                          "did not request access.");
             return "";
         }
@@ -399,7 +399,7 @@ const char* GrGLShaderBuilder::fragmentPosition() {
     if (fCodeStage.inStageCode()) {
         const GrEffectRef& effect = *fCodeStage.effectStage()->getEffect();
         if (!effect->willReadFragmentPosition()) {
-            GrDebugCrash("GrGLEffect asked for frag position but its generating GrEffect "
+            SkDEBUGFAIL("GrGLEffect asked for frag position but its generating GrEffect "
                          "did not request access.");
             return "";
         }
@@ -483,9 +483,9 @@ inline void append_default_precision_qualifier(GrGLShaderVar::Precision p,
                 str->append("precision lowp float;\n");
                 break;
             case GrGLShaderVar::kDefault_Precision:
-                GrCrash("Default precision now allowed.");
+                SkFAIL("Default precision now allowed.");
             default:
-                GrCrash("Unknown precision value.");
+                SkFAIL("Unknown precision value.");
         }
     }
 }
diff --git a/gpu/gl/GrGLShaderVar.h b/gpu/gl/GrGLShaderVar.h
index 7862abdb..68c4bbd2 100644
--- a/gpu/gl/GrGLShaderVar.h
+++ b/gpu/gl/GrGLShaderVar.h
@@ -315,7 +315,7 @@ public:
                 case kDefault_Precision:
                     return "";
                 default:
-                    GrCrash("Unexpected precision type.");
+                    SkFAIL("Unexpected precision type.");
             }
         }
         return "";
@@ -341,7 +341,7 @@ private:
             case kVaryingOut_TypeModifier:
                 return k110_GrGLSLGeneration == gen ? "varying" : "out";
             default:
-                GrCrash("Unknown shader variable type modifier.");
+                SkFAIL("Unknown shader variable type modifier.");
                 return ""; // suppress warning
         }
     }
diff --git a/gpu/gl/GrGLStencilBuffer.cpp b/gpu/gl/GrGLStencilBuffer.cpp
index 33e346c6..abcb3c4b 100644
--- a/gpu/gl/GrGLStencilBuffer.cpp
+++ b/gpu/gl/GrGLStencilBuffer.cpp
@@ -13,7 +13,7 @@ GrGLStencilBuffer::~GrGLStencilBuffer() {
     this->release();
 }
 
-size_t GrGLStencilBuffer::sizeInBytes() const {
+size_t GrGLStencilBuffer::gpuMemorySize() const {
     uint64_t size = this->width();
     size *= this->height();
     size *= fFormat.fTotalBits;
diff --git a/gpu/gl/GrGLStencilBuffer.h b/gpu/gl/GrGLStencilBuffer.h
index 2bf33ef7..1cb0a330 100644
--- a/gpu/gl/GrGLStencilBuffer.h
+++ b/gpu/gl/GrGLStencilBuffer.h
@@ -36,7 +36,7 @@ public:
 
     virtual ~GrGLStencilBuffer();
 
-    virtual size_t sizeInBytes() const SK_OVERRIDE;
+    virtual size_t gpuMemorySize() const SK_OVERRIDE;
 
     GrGLuint renderbufferID() const {
         return fRenderbufferID;
diff --git a/gpu/gl/GrGLUtil.cpp b/gpu/gl/GrGLUtil.cpp
index ddfcfbf0..0fa2d2ce 100644
--- a/gpu/gl/GrGLUtil.cpp
+++ b/gpu/gl/GrGLUtil.cpp
@@ -99,7 +99,7 @@ bool get_gl_version_for_mesa(int mesaMajorVersion, int* major, int* minor) {
 
 GrGLStandard GrGLGetStandardInUseFromString(const char* versionString) {
     if (NULL == versionString) {
-        SkDEBUGFAIL("NULL GL version string.");
+        SkDebugf("NULL GL version string.");
         return kNone_GrGLStandard;
     }
 
@@ -139,7 +139,7 @@ bool GrGLIsChromiumFromRendererString(const char* rendererString) {
 
 GrGLVersion GrGLGetVersionFromString(const char* versionString) {
     if (NULL == versionString) {
-        SkDEBUGFAIL("NULL GL version string.");
+        SkDebugf("NULL GL version string.");
         return GR_GL_INVALID_VER;
     }
 
@@ -178,7 +178,7 @@ GrGLVersion GrGLGetVersionFromString(const char* versionString) {
 
 GrGLSLVersion GrGLGetGLSLVersionFromString(const char* versionString) {
     if (NULL == versionString) {
-        SkDEBUGFAIL("NULL GLSL version string.");
+        SkDebugf("NULL GLSL version string.");
         return GR_GLSL_INVALID_VER;
     }
 
diff --git a/gpu/gl/GrGLVertexArray.cpp b/gpu/gl/GrGLVertexArray.cpp
index abd337a8..66feb820 100644
--- a/gpu/gl/GrGLVertexArray.cpp
+++ b/gpu/gl/GrGLVertexArray.cpp
@@ -69,7 +69,7 @@ void GrGLAttribArrayState::disableUnusedArrays(const GrGpuGL* gpu, uint64_t used
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 GrGLVertexArray::GrGLVertexArray(GrGpuGL* gpu, GrGLint id, int attribCount)
-    : GrResource(gpu, false)
+    : INHERITED(gpu, false)
     , fID(id)
     , fAttribArrays(attribCount)
     , fIndexBufferIDIsValid(false) {
diff --git a/gpu/gl/GrGLVertexArray.h b/gpu/gl/GrGLVertexArray.h
index 8a61f1a2..0e5bffe4 100644
--- a/gpu/gl/GrGLVertexArray.h
+++ b/gpu/gl/GrGLVertexArray.h
@@ -8,7 +8,7 @@
 #ifndef GrGLVertexArray_DEFINED
 #define GrGLVertexArray_DEFINED
 
-#include "GrResource.h"
+#include "GrGpuObject.h"
 #include "GrTypesPriv.h"
 #include "gl/GrGLDefines.h"
 #include "gl/GrGLFunctions.h"
@@ -130,7 +130,7 @@ private:
  * This class represents an OpenGL vertex array object. It manages the lifetime of the vertex array
  * and is used to track the state of the vertex array to avoid redundant GL calls.
  */
-class GrGLVertexArray : public GrResource {
+class GrGLVertexArray : public GrGpuObject {
 public:
     GrGLVertexArray(GrGpuGL* gpu, GrGLint id, int attribCount);
 
@@ -157,7 +157,7 @@ public:
 
     void invalidateCachedState();
 
-    virtual size_t sizeInBytes() const SK_OVERRIDE { return 0; }
+    virtual size_t gpuMemorySize() const SK_OVERRIDE { return 0; }
 
 protected:
     virtual void onAbandon() SK_OVERRIDE;
@@ -170,7 +170,7 @@ private:
     GrGLuint                fIndexBufferID;
     bool                    fIndexBufferIDIsValid;
 
-    typedef GrResource INHERITED;
+    typedef GrGpuObject INHERITED;
 };
 
 #endif
diff --git a/gpu/gl/GrGLVertexBuffer.cpp b/gpu/gl/GrGLVertexBuffer.cpp
index 685166c9..8bfe1f0c 100644
--- a/gpu/gl/GrGLVertexBuffer.cpp
+++ b/gpu/gl/GrGLVertexBuffer.cpp
@@ -14,7 +14,7 @@ GrGLVertexBuffer::GrGLVertexBuffer(GrGpuGL* gpu, const Desc& desc)
 }
 
 void GrGLVertexBuffer::onRelease() {
-    if (this->isValid()) {
+    if (!this->wasDestroyed()) {
         fImpl.release(this->getGpuGL());
     }
 
@@ -28,7 +28,7 @@ void GrGLVertexBuffer::onAbandon() {
 }
 
 void* GrGLVertexBuffer::lock() {
-    if (this->isValid()) {
+    if (!this->wasDestroyed()) {
         return fImpl.lock(this->getGpuGL());
     } else {
         return NULL;
@@ -40,7 +40,7 @@ void* GrGLVertexBuffer::lockPtr() const {
 }
 
 void GrGLVertexBuffer::unlock() {
-    if (this->isValid()) {
+    if (!this->wasDestroyed()) {
         fImpl.unlock(this->getGpuGL());
     }
 }
@@ -50,7 +50,7 @@ bool GrGLVertexBuffer::isLocked() const {
 }
 
 bool GrGLVertexBuffer::updateData(const void* src, size_t srcSizeInBytes) {
-    if (this->isValid()) {
+    if (!this->wasDestroyed()) {
         return fImpl.updateData(this->getGpuGL(), src, srcSizeInBytes);
     } else {
         return false;
diff --git a/gpu/gl/GrGLVertexBuffer.h b/gpu/gl/GrGLVertexBuffer.h
index 1741adc2..1b9c4f17 100644
--- a/gpu/gl/GrGLVertexBuffer.h
+++ b/gpu/gl/GrGLVertexBuffer.h
@@ -26,7 +26,7 @@ public:
     size_t baseOffset() const { return fImpl.baseOffset(); }
 
     void bind() const {
-        if (this->isValid()) {
+        if (!this->wasDestroyed()) {
             fImpl.bind(this->getGpuGL());
         }
     }
@@ -45,7 +45,7 @@ protected:
 
 private:
     GrGpuGL* getGpuGL() const {
-        SkASSERT(this->isValid());
+        SkASSERT(!this->wasDestroyed());
         return (GrGpuGL*)(this->getGpu());
     }
 
diff --git a/gpu/gl/GrGLVertexEffect.h b/gpu/gl/GrGLVertexEffect.h
index 1b4c7444..40b4b340 100644
--- a/gpu/gl/GrGLVertexEffect.h
+++ b/gpu/gl/GrGLVertexEffect.h
@@ -42,7 +42,7 @@ public:
                           const char* inputColor,
                           const TransformedCoordsArray& coords,
                           const TextureSamplerArray& samplers) SK_OVERRIDE {
-        GrCrash("GrGLVertexEffect requires GrGLFullShaderBuilder* overload for emitCode().");
+        SkFAIL("GrGLVertexEffect requires GrGLFullShaderBuilder* overload for emitCode().");
     }
 
 private:
diff --git a/gpu/gl/GrGpuGL.cpp b/gpu/gl/GrGpuGL.cpp
index 4b39a163..1a1bad7f 100644
--- a/gpu/gl/GrGpuGL.cpp
+++ b/gpu/gl/GrGpuGL.cpp
@@ -730,7 +730,7 @@ static bool renderbuffer_storage_msaa(GrGLContext& ctx,
                                                                 width, height));
             break;
         case GrGLCaps::kNone_MSFBOType:
-            GrCrash("Shouldn't be here if we don't support multisampled renderbuffers.");
+            SkFAIL("Shouldn't be here if we don't support multisampled renderbuffers.");
             break;
     }
     return (GR_GL_NO_ERROR == CHECK_ALLOC_ERROR(ctx.interface()));;
@@ -1303,7 +1303,7 @@ void GrGpuGL::discard(GrRenderTarget* renderTarget) {
     }
     switch (this->glCaps().invalidateFBType()) {
         case GrGLCaps::kNone_FBFetchType:
-            GrCrash("Should never get here.");
+            SkFAIL("Should never get here.");
             break;
         case GrGLCaps::kInvalidate_InvalidateFBType:
             if (0 == glRT->renderFBOID()) {
@@ -1463,7 +1463,7 @@ bool GrGpuGL::onReadPixels(GrRenderTarget* target,
                                     tgt->textureFBOID()));
             break;
         default:
-            GrCrash("Unknown resolve type");
+            SkFAIL("Unknown resolve type");
     }
 
     const GrGLIRect& glvp = tgt->getViewport();
@@ -1656,7 +1656,7 @@ void GrGpuGL::onGpuDraw(const DrawInfo& info) {
 static GrGLenum gr_stencil_op_to_gl_path_rendering_fill_mode(GrStencilOp op) {
     switch (op) {
         default:
-            GrCrash("Unexpected path fill.");
+            SkFAIL("Unexpected path fill.");
             /* fallthrough */;
         case kIncClamp_StencilOp:
             return GR_GL_COUNT_UP;
@@ -2357,7 +2357,7 @@ void GrGpuGL::flushMiscFixedFunctionState() {
                 GL_CALL(Disable(GR_GL_CULL_FACE));
                 break;
             default:
-                GrCrash("Unknown draw face.");
+                SkFAIL("Unknown draw face.");
         }
         fHWDrawFace = drawState.getDrawFace();
     }
@@ -2788,7 +2788,7 @@ GrGLAttribArrayState* GrGpuGL::HWGeometryState::bindArrayAndBuffersToDraw(
 
     // We use a vertex array if we're on a core profile and the verts are in a VBO.
     if (gpu->glCaps().isCoreProfile() && !vbuffer->isCPUBacked()) {
-        if (NULL == fVBOVertexArray || !fVBOVertexArray->isValid()) {
+        if (NULL == fVBOVertexArray || fVBOVertexArray->wasDestroyed()) {
             SkSafeUnref(fVBOVertexArray);
             GrGLuint arrayID;
             GR_GL_CALL(gpu->glInterface(), GenVertexArrays(1, &arrayID));
diff --git a/gpu/gl/GrGpuGL_program.cpp b/gpu/gl/GrGpuGL_program.cpp
index 0a7bb0e3..b9b09847 100644
--- a/gpu/gl/GrGpuGL_program.cpp
+++ b/gpu/gl/GrGpuGL_program.cpp
@@ -309,7 +309,7 @@ void GrGpuGL::setupGeometry(const DrawInfo& info, size_t* indexOffsetInBytes) {
             break;
         default:
             vbuf = NULL; // suppress warning
-            GrCrash("Unknown geometry src type!");
+            SkFAIL("Unknown geometry src type!");
     }
 
     SkASSERT(NULL != vbuf);
@@ -333,7 +333,7 @@ void GrGpuGL::setupGeometry(const DrawInfo& info, size_t* indexOffsetInBytes) {
             break;
         default:
             ibuf = NULL; // suppress warning
-            GrCrash("Unknown geometry src type!");
+            SkFAIL("Unknown geometry src type!");
         }
 
         SkASSERT(NULL != ibuf);
diff --git a/gpu/gl/android/GrGLCreateNativeInterface_android.cpp b/gpu/gl/android/GrGLCreateNativeInterface_android.cpp
index b50063fb..312299ad 100644
--- a/gpu/gl/android/GrGLCreateNativeInterface_android.cpp
+++ b/gpu/gl/android/GrGLCreateNativeInterface_android.cpp
@@ -75,7 +75,7 @@ static GrGLInterface* create_es_interface(GrGLVersion version,
     functions->fGetShaderInfoLog = glGetShaderInfoLog;
     functions->fGetShaderiv = glGetShaderiv;
     functions->fGetString = glGetString;
-#if GL_ES_VERSION_30
+#if GL_ES_VERSION_3_0
     functions->fGetStringi = glGetStringi;
 #else
     functions->fGetStringi = (GrGLGetStringiProc) eglGetProcAddress("glGetStringi");
@@ -183,12 +183,24 @@ static GrGLInterface* create_es_interface(GrGLVersion version,
     functions->fGetFramebufferAttachmentParameteriv = glGetFramebufferAttachmentParameteriv;
     functions->fGetRenderbufferParameteriv = glGetRenderbufferParameteriv;
     functions->fRenderbufferStorage = glRenderbufferStorage;
+
 #if GL_OES_mapbuffer
     functions->fMapBuffer = glMapBufferOES;
     functions->fUnmapBuffer = glUnmapBufferOES;
 #else
     functions->fMapBuffer = (GrGLMapBufferProc) eglGetProcAddress("glMapBufferOES");
     functions->fUnmapBuffer = (GrGLUnmapBufferProc) eglGetProcAddress("glUnmapBufferOES");
+
+#endif
+
+#if GL_ES_VERSION_3_0 || GL_EXT_map_buffer_range
+    functions->fMapBufferRange = glMapBufferRange;
+    functions->fFlushMappedBufferRange = glFlushMappedBufferRange;
+#else
+    if (version >= GR_GL_VER(3,0) || extensions->has("GL_EXT_map_buffer_range")) {
+        functions->fMapBufferRange = (GrGLMapBufferRangeProc) eglGetProcAddress("glMapBufferRange");
+        functions->fFlushMappedBufferRange = (GrGLFlushMappedBufferRangeProc) eglGetProcAddress("glFlushMappedBufferRange");
+    }
 #endif
 
     if (extensions->has("GL_EXT_debug_marker")) {
diff --git a/gpu/gl/angle/GrGLCreateANGLEInterface.cpp b/gpu/gl/angle/GrGLCreateANGLEInterface.cpp
index a316ff1c..cb2fc953 100644
--- a/gpu/gl/angle/GrGLCreateANGLEInterface.cpp
+++ b/gpu/gl/angle/GrGLCreateANGLEInterface.cpp
@@ -154,6 +154,14 @@ const GrGLInterface* GrGLCreateANGLEInterface() {
     functions->fMapBuffer = (GrGLMapBufferProc) eglGetProcAddress("glMapBufferOES");
     functions->fUnmapBuffer = (GrGLUnmapBufferProc) eglGetProcAddress("glUnmapBufferOES");
 
+#if GL_ES_VERSION_3_0
+    functions->fMapBufferRange = GET_PROC(glMapBufferRange);
+    functions->fFlushMappedBufferRange = GET_PROC(glFlushMappedBufferRange);
+#else
+    functions->fMapBufferRange = (GrGLMapBufferRangeProc) eglGetProcAddress("glMapBufferRange");
+    functions->fFlushMappedBufferRange = (GrGLFlushMappedBufferRangeProc) eglGetProcAddress("glFlushMappedBufferRange");
+#endif
+
     functions->fInsertEventMarker = (GrGLInsertEventMarkerProc) eglGetProcAddress("glInsertEventMarkerEXT");
     functions->fPushGroupMarker = (GrGLInsertEventMarkerProc) eglGetProcAddress("glPushGroupMarkerEXT");
     functions->fPopGroupMarker = (GrGLPopGroupMarkerProc) eglGetProcAddress("glPopGroupMarkerEXT");
diff --git a/gpu/gl/debug/GrBufferObj.h b/gpu/gl/debug/GrBufferObj.h
index fecfeb5e..05d3cfdd 100644
--- a/gpu/gl/debug/GrBufferObj.h
+++ b/gpu/gl/debug/GrBufferObj.h
@@ -34,9 +34,15 @@ public:
         GrAlwaysAssert(!fMapped);
     }
 
-    void setMapped()             { fMapped = true; }
+    void setMapped(GrGLintptr offset, GrGLsizeiptr length) {
+        fMapped = true;
+        fMappedOffset = offset;
+        fMappedLength = length;
+    }
     void resetMapped()           { fMapped = false; }
     bool getMapped() const       { return fMapped; }
+    GrGLsizei getMappedOffset() const { return fMappedOffset; }
+    GrGLsizei getMappedLength() const { return fMappedLength; }
 
     void setBound()              { fBound = true; }
     void resetBound()            { fBound = false; }
@@ -55,7 +61,9 @@ protected:
 private:
 
     GrGLchar*    fDataPtr;
-    bool         fMapped;       // is the buffer object mapped via "glMapBuffer"?
+    bool         fMapped;       // is the buffer object mapped via "glMapBuffer[Range]"?
+    GrGLintptr   fMappedOffset; // the offset of the buffer range that is mapped
+    GrGLsizeiptr fMappedLength; // the size of the buffer range that is mapped
     bool         fBound;        // is the buffer object bound via "glBindBuffer"?
     GrGLsizeiptr fSize;         // size in bytes
     GrGLint      fUsage;        // one of: GL_STREAM_DRAW,
diff --git a/gpu/gl/debug/GrGLCreateDebugInterface.cpp b/gpu/gl/debug/GrGLCreateDebugInterface.cpp
index 0a8333b8..7c430b4b 100644
--- a/gpu/gl/debug/GrGLCreateDebugInterface.cpp
+++ b/gpu/gl/debug/GrGLCreateDebugInterface.cpp
@@ -93,7 +93,7 @@ GrGLvoid GR_GL_FUNCTION_TYPE debugGLBufferData(GrGLenum target,
             buffer = GrDebugGL::getInstance()->getElementArrayBuffer();
             break;
         default:
-            GrCrash("Unexpected target to glBufferData");
+            SkFAIL("Unexpected target to glBufferData");
             break;
     }
 
@@ -586,7 +586,7 @@ GrGLvoid GR_GL_FUNCTION_TYPE debugGLBindBuffer(GrGLenum target, GrGLuint bufferI
             GrDebugGL::getInstance()->setElementArrayBuffer(buffer);
             break;
         default:
-            GrCrash("Unexpected target to glBindBuffer");
+            SkFAIL("Unexpected target to glBindBuffer");
             break;
     }
 }
@@ -622,12 +622,14 @@ GrGLvoid GR_GL_FUNCTION_TYPE debugGLDeleteBuffers(GrGLsizei n, const GrGLuint* i
 }
 
 // map a buffer to the caller's address space
-GrGLvoid* GR_GL_FUNCTION_TYPE debugGLMapBuffer(GrGLenum target, GrGLenum access) {
-
+GrGLvoid* GR_GL_FUNCTION_TYPE debugGLMapBufferRange(GrGLenum target, GrGLintptr offset,
+                                                    GrGLsizeiptr length, GrGLbitfield access) {
     GrAlwaysAssert(GR_GL_ARRAY_BUFFER == target ||
                    GR_GL_ELEMENT_ARRAY_BUFFER == target);
-    // GR_GL_READ_ONLY == access ||  || GR_GL_READ_WRIT == access);
-    GrAlwaysAssert(GR_GL_WRITE_ONLY == access);
+
+    // We only expect read access and we expect that the buffer or range is always invalidated.
+    GrAlwaysAssert(!SkToBool(GR_GL_MAP_READ_BIT & access));
+    GrAlwaysAssert((GR_GL_MAP_INVALIDATE_BUFFER_BIT | GR_GL_MAP_INVALIDATE_RANGE_BIT) & access);
 
     GrBufferObj *buffer = NULL;
     switch (target) {
@@ -638,20 +640,41 @@ GrGLvoid* GR_GL_FUNCTION_TYPE debugGLMapBuffer(GrGLenum target, GrGLenum access)
             buffer = GrDebugGL::getInstance()->getElementArrayBuffer();
             break;
         default:
-            GrCrash("Unexpected target to glMapBuffer");
+            SkFAIL("Unexpected target to glMapBufferRange");
             break;
     }
 
-    if (buffer) {
+    if (NULL != buffer) {
+        GrAlwaysAssert(offset >= 0 && offset + length <= buffer->getSize());
         GrAlwaysAssert(!buffer->getMapped());
-        buffer->setMapped();
-        return buffer->getDataPtr();
+        buffer->setMapped(offset, length);
+        return buffer->getDataPtr() + offset;
     }
 
     GrAlwaysAssert(false);
     return NULL;        // no buffer bound to the target
 }
 
+GrGLvoid* GR_GL_FUNCTION_TYPE debugGLMapBuffer(GrGLenum target, GrGLenum access) {
+    GrAlwaysAssert(GR_GL_WRITE_ONLY == access);
+
+    GrBufferObj *buffer = NULL;
+    switch (target) {
+        case GR_GL_ARRAY_BUFFER:
+            buffer = GrDebugGL::getInstance()->getArrayBuffer();
+            break;
+        case GR_GL_ELEMENT_ARRAY_BUFFER:
+            buffer = GrDebugGL::getInstance()->getElementArrayBuffer();
+            break;
+        default:
+            SkFAIL("Unexpected target to glMapBuffer");
+            break;
+    }
+
+    return debugGLMapBufferRange(target, 0, buffer->getSize(),
+                                 GR_GL_MAP_WRITE_BIT | GR_GL_MAP_INVALIDATE_BUFFER_BIT);
+}
+
 // remove a buffer from the caller's address space
 // TODO: check if the "access" method from "glMapBuffer" was honored
 GrGLboolean GR_GL_FUNCTION_TYPE debugGLUnmapBuffer(GrGLenum target) {
@@ -668,11 +691,11 @@ GrGLboolean GR_GL_FUNCTION_TYPE debugGLUnmapBuffer(GrGLenum target) {
             buffer = GrDebugGL::getInstance()->getElementArrayBuffer();
             break;
         default:
-            GrCrash("Unexpected target to glUnmapBuffer");
+            SkFAIL("Unexpected target to glUnmapBuffer");
             break;
     }
 
-    if (buffer) {
+    if (NULL != buffer) {
         GrAlwaysAssert(buffer->getMapped());
         buffer->resetMapped();
         return GR_GL_TRUE;
@@ -682,6 +705,34 @@ GrGLboolean GR_GL_FUNCTION_TYPE debugGLUnmapBuffer(GrGLenum target) {
     return GR_GL_FALSE; // GR_GL_INVALID_OPERATION;
 }
 
+GrGLvoid GR_GL_FUNCTION_TYPE debugGLFlushMappedBufferRange(GrGLenum target,
+                                                           GrGLintptr offset,
+                                                           GrGLsizeiptr length) {
+    GrAlwaysAssert(GR_GL_ARRAY_BUFFER == target ||
+                   GR_GL_ELEMENT_ARRAY_BUFFER == target);
+
+    GrBufferObj *buffer = NULL;
+    switch (target) {
+        case GR_GL_ARRAY_BUFFER:
+            buffer = GrDebugGL::getInstance()->getArrayBuffer();
+            break;
+        case GR_GL_ELEMENT_ARRAY_BUFFER:
+            buffer = GrDebugGL::getInstance()->getElementArrayBuffer();
+            break;
+        default:
+            SkFAIL("Unexpected target to glUnmapBuffer");
+            break;
+    }
+
+    if (NULL != buffer) {
+        GrAlwaysAssert(buffer->getMapped());
+        GrAlwaysAssert(offset >= 0 && (offset + length) <= buffer->getMappedLength());
+    } else {
+        GrAlwaysAssert(false);
+    }
+}
+
+
 GrGLvoid GR_GL_FUNCTION_TYPE debugGLGetBufferParameteriv(GrGLenum target,
                                                          GrGLenum value,
                                                          GrGLint* params) {
@@ -706,21 +757,21 @@ GrGLvoid GR_GL_FUNCTION_TYPE debugGLGetBufferParameteriv(GrGLenum target,
     switch (value) {
         case GR_GL_BUFFER_MAPPED:
             *params = GR_GL_FALSE;
-            if (buffer)
+            if (NULL != buffer)
                 *params = buffer->getMapped() ? GR_GL_TRUE : GR_GL_FALSE;
             break;
         case GR_GL_BUFFER_SIZE:
             *params = 0;
-            if (buffer)
+            if (NULL != buffer)
                 *params = SkToInt(buffer->getSize());
             break;
         case GR_GL_BUFFER_USAGE:
             *params = GR_GL_STATIC_DRAW;
-            if (buffer)
+            if (NULL != buffer)
                 *params = buffer->getUsage();
             break;
         default:
-            GrCrash("Unexpected value to glGetBufferParamateriv");
+            SkFAIL("Unexpected value to glGetBufferParamateriv");
             break;
     }
 };
@@ -826,6 +877,7 @@ const GrGLInterface* GrGLCreateDebugInterface() {
     functions->fEndQuery = noOpGLEndQuery;
     functions->fFinish = noOpGLFinish;
     functions->fFlush = noOpGLFlush;
+    functions->fFlushMappedBufferRange = debugGLFlushMappedBufferRange;
     functions->fFrontFace = noOpGLFrontFace;
     functions->fGenerateMipmap = debugGLGenerateMipmap;
     functions->fGenBuffers = debugGLGenBuffers;
@@ -850,6 +902,8 @@ const GrGLInterface* GrGLCreateDebugInterface() {
     functions->fGenVertexArrays = debugGLGenVertexArrays;
     functions->fLineWidth = noOpGLLineWidth;
     functions->fLinkProgram = noOpGLLinkProgram;
+    functions->fMapBuffer = debugGLMapBuffer;
+    functions->fMapBufferRange = debugGLMapBufferRange;
     functions->fPixelStorei = debugGLPixelStorei;
     functions->fQueryCounter = noOpGLQueryCounter;
     functions->fReadBuffer = noOpGLReadBuffer;
@@ -887,6 +941,7 @@ const GrGLInterface* GrGLCreateDebugInterface() {
     functions->fUniformMatrix2fv = noOpGLUniformMatrix2fv;
     functions->fUniformMatrix3fv = noOpGLUniformMatrix3fv;
     functions->fUniformMatrix4fv = noOpGLUniformMatrix4fv;
+    functions->fUnmapBuffer = debugGLUnmapBuffer;
     functions->fUseProgram = debugGLUseProgram;
     functions->fVertexAttrib4fv = noOpGLVertexAttrib4fv;
     functions->fVertexAttribPointer = noOpGLVertexAttribPointer;
@@ -909,10 +964,9 @@ const GrGLInterface* GrGLCreateDebugInterface() {
     functions->fBlitFramebuffer = noOpGLBlitFramebuffer;
     functions->fResolveMultisampleFramebuffer =
                                     noOpGLResolveMultisampleFramebuffer;
-    functions->fMapBuffer = debugGLMapBuffer;
     functions->fMatrixLoadf = noOpGLMatrixLoadf;
     functions->fMatrixLoadIdentity = noOpGLMatrixLoadIdentity;
-    functions->fUnmapBuffer = debugGLUnmapBuffer;
+
     functions->fBindFragDataLocationIndexed =
                                     noOpGLBindFragDataLocationIndexed;
 
diff --git a/gpu/gl/iOS/GrGLCreateNativeInterface_iOS.cpp b/gpu/gl/iOS/GrGLCreateNativeInterface_iOS.cpp
index 6af04715..08e7ac8a 100644
--- a/gpu/gl/iOS/GrGLCreateNativeInterface_iOS.cpp
+++ b/gpu/gl/iOS/GrGLCreateNativeInterface_iOS.cpp
@@ -132,6 +132,11 @@ const GrGLInterface* GrGLCreateNativeInterface() {
     functions->fUnmapBuffer = glUnmapBufferOES;
 #endif
 
+#if GL_EXT_map_buffer_range || GL_ES_VERSION_3_0
+    functions->fMapBufferRange = glMapBufferRangeEXT;
+    functions->fFlushMappedBufferRange = glFlushMappedBufferRangeEXT;
+#endif
+
 #if GL_APPLE_framebuffer_multisample
     functions->fRenderbufferStorageMultisample = glRenderbufferStorageMultisampleAPPLE;
     functions->fResolveMultisampleFramebuffer = glResolveMultisampleFramebufferAPPLE;
diff --git a/image/SkSurface_Gpu.cpp b/image/SkSurface_Gpu.cpp
index 6f018bf2..a34b7743 100644
--- a/image/SkSurface_Gpu.cpp
+++ b/image/SkSurface_Gpu.cpp
@@ -14,7 +14,7 @@ class SkSurface_Gpu : public SkSurface_Base {
 public:
     SK_DECLARE_INST_COUNT(SkSurface_Gpu)
 
-    SkSurface_Gpu(GrRenderTarget*, bool cached);
+    SkSurface_Gpu(GrRenderTarget*, bool cached, TextRenderMode trm);
     virtual ~SkSurface_Gpu();
 
     virtual SkCanvas* onNewCanvas() SK_OVERRIDE;
@@ -33,9 +33,12 @@ private:
 
 ///////////////////////////////////////////////////////////////////////////////
 
-SkSurface_Gpu::SkSurface_Gpu(GrRenderTarget* renderTarget, bool cached)
+SkSurface_Gpu::SkSurface_Gpu(GrRenderTarget* renderTarget, bool cached, TextRenderMode trm)
         : INHERITED(renderTarget->width(), renderTarget->height()) {
-    fDevice = SkGpuDevice::Create(renderTarget, cached ? SkGpuDevice::kCached_Flag : 0);
+    int flags = 0;
+    flags |= cached ? SkGpuDevice::kCached_Flag : 0;
+    flags |= (kDistanceField_TextRenderMode == trm) ? SkGpuDevice::kDFFonts_Flag : 0;
+    fDevice = SkGpuDevice::Create(renderTarget, flags);
 
     if (kRGB_565_GrPixelConfig != renderTarget->config()) {
         fDevice->clear(0x0);
@@ -98,14 +101,15 @@ void SkSurface_Gpu::onDiscard() {
 
 ///////////////////////////////////////////////////////////////////////////////
 
-SkSurface* SkSurface::NewRenderTargetDirect(GrRenderTarget* target) {
+SkSurface* SkSurface::NewRenderTargetDirect(GrRenderTarget* target, TextRenderMode trm) {
     if (NULL == target) {
         return NULL;
     }
-    return SkNEW_ARGS(SkSurface_Gpu, (target, false));
+    return SkNEW_ARGS(SkSurface_Gpu, (target, false, trm));
 }
 
-SkSurface* SkSurface::NewRenderTarget(GrContext* ctx, const SkImageInfo& info, int sampleCount) {
+SkSurface* SkSurface::NewRenderTarget(GrContext* ctx, const SkImageInfo& info, int sampleCount,
+                                      TextRenderMode trm) {
     if (NULL == ctx) {
         return NULL;
     }
@@ -122,10 +126,11 @@ SkSurface* SkSurface::NewRenderTarget(GrContext* ctx, const SkImageInfo& info, i
         return NULL;
     }
 
-    return SkNEW_ARGS(SkSurface_Gpu, (tex->asRenderTarget(), false));
+    return SkNEW_ARGS(SkSurface_Gpu, (tex->asRenderTarget(), false, trm));
 }
 
-SkSurface* SkSurface::NewScratchRenderTarget(GrContext* ctx, const SkImageInfo& info, int sampleCount) {
+SkSurface* SkSurface::NewScratchRenderTarget(GrContext* ctx, const SkImageInfo& info,
+                                             int sampleCount, TextRenderMode trm) {
     if (NULL == ctx) {
         return NULL;
     }
@@ -143,5 +148,5 @@ SkSurface* SkSurface::NewScratchRenderTarget(GrContext* ctx, const SkImageInfo&
         return NULL;
     }
 
-    return SkNEW_ARGS(SkSurface_Gpu, (tex->asRenderTarget(), true));
+    return SkNEW_ARGS(SkSurface_Gpu, (tex->asRenderTarget(), true, trm));
 }
diff --git a/opts/SkBitmapFilter_opts_SSE2.cpp b/opts/SkBitmapFilter_opts_SSE2.cpp
index 259e2efc..b0405669 100644
--- a/opts/SkBitmapFilter_opts_SSE2.cpp
+++ b/opts/SkBitmapFilter_opts_SSE2.cpp
@@ -5,17 +5,15 @@
  * found in the LICENSE file.
  */
 
-#include "SkBitmapProcState.h"
+#include <emmintrin.h>
 #include "SkBitmap.h"
+#include "SkBitmapFilter_opts_SSE2.h"
+#include "SkBitmapProcState.h"
 #include "SkColor.h"
 #include "SkColorPriv.h"
-#include "SkUnPreMultiply.h"
-#include "SkShader.h"
 #include "SkConvolver.h"
-
-#include "SkBitmapFilter_opts_SSE2.h"
-
-#include <emmintrin.h>
+#include "SkShader.h"
+#include "SkUnPreMultiply.h"
 
 #if 0
 static inline void print128i(__m128i value) {
@@ -175,7 +173,6 @@ void highQualityFilter_ScaleOnly_SSE2(const SkBitmapProcState &s, int x, int y,
 
         s.fInvProc(s.fInvMatrix, SkIntToScalar(x),
                     SkIntToScalar(y), &srcPt);
-
     }
 }
 
@@ -185,126 +182,126 @@ void convolveHorizontally_SSE2(const unsigned char* src_data,
                                const SkConvolutionFilter1D& filter,
                                unsigned char* out_row,
                                bool /*has_alpha*/) {
-  int num_values = filter.numValues();
-
-  int filter_offset, filter_length;
-  __m128i zero = _mm_setzero_si128();
-  __m128i mask[4];
-  // |mask| will be used to decimate all extra filter coefficients that are
-  // loaded by SIMD when |filter_length| is not divisible by 4.
-  // mask[0] is not used in following algorithm.
-  mask[1] = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, -1);
-  mask[2] = _mm_set_epi16(0, 0, 0, 0, 0, 0, -1, -1);
-  mask[3] = _mm_set_epi16(0, 0, 0, 0, 0, -1, -1, -1);
-
-  // Output one pixel each iteration, calculating all channels (RGBA) together.
-  for (int out_x = 0; out_x < num_values; out_x++) {
-    const SkConvolutionFilter1D::ConvolutionFixed* filter_values =
-        filter.FilterForValue(out_x, &filter_offset, &filter_length);
-
-    __m128i accum = _mm_setzero_si128();
-
-    // Compute the first pixel in this row that the filter affects. It will
-    // touch |filter_length| pixels (4 bytes each) after this.
-    const __m128i* row_to_filter =
-        reinterpret_cast<const __m128i*>(&src_data[filter_offset << 2]);
-
-    // We will load and accumulate with four coefficients per iteration.
-    for (int filter_x = 0; filter_x < filter_length >> 2; filter_x++) {
-
-      // Load 4 coefficients => duplicate 1st and 2nd of them for all channels.
-      __m128i coeff, coeff16;
-      // [16] xx xx xx xx c3 c2 c1 c0
-      coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
-      // [16] xx xx xx xx c1 c1 c0 c0
-      coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
-      // [16] c1 c1 c1 c1 c0 c0 c0 c0
-      coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
-
-      // Load four pixels => unpack the first two pixels to 16 bits =>
-      // multiply with coefficients => accumulate the convolution result.
-      // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-      __m128i src8 = _mm_loadu_si128(row_to_filter);
-      // [16] a1 b1 g1 r1 a0 b0 g0 r0
-      __m128i src16 = _mm_unpacklo_epi8(src8, zero);
-      __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
-      __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
-      // [32]  a0*c0 b0*c0 g0*c0 r0*c0
-      __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-      accum = _mm_add_epi32(accum, t);
-      // [32]  a1*c1 b1*c1 g1*c1 r1*c1
-      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-      accum = _mm_add_epi32(accum, t);
-
-      // Duplicate 3rd and 4th coefficients for all channels =>
-      // unpack the 3rd and 4th pixels to 16 bits => multiply with coefficients
-      // => accumulate the convolution results.
-      // [16] xx xx xx xx c3 c3 c2 c2
-      coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
-      // [16] c3 c3 c3 c3 c2 c2 c2 c2
-      coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
-      // [16] a3 g3 b3 r3 a2 g2 b2 r2
-      src16 = _mm_unpackhi_epi8(src8, zero);
-      mul_hi = _mm_mulhi_epi16(src16, coeff16);
-      mul_lo = _mm_mullo_epi16(src16, coeff16);
-      // [32]  a2*c2 b2*c2 g2*c2 r2*c2
-      t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-      accum = _mm_add_epi32(accum, t);
-      // [32]  a3*c3 b3*c3 g3*c3 r3*c3
-      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-      accum = _mm_add_epi32(accum, t);
-
-      // Advance the pixel and coefficients pointers.
-      row_to_filter += 1;
-      filter_values += 4;
-    }
+    int num_values = filter.numValues();
+
+    int filter_offset, filter_length;
+    __m128i zero = _mm_setzero_si128();
+    __m128i mask[4];
+    // |mask| will be used to decimate all extra filter coefficients that are
+    // loaded by SIMD when |filter_length| is not divisible by 4.
+    // mask[0] is not used in following algorithm.
+    mask[1] = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, -1);
+    mask[2] = _mm_set_epi16(0, 0, 0, 0, 0, 0, -1, -1);
+    mask[3] = _mm_set_epi16(0, 0, 0, 0, 0, -1, -1, -1);
+
+    // Output one pixel each iteration, calculating all channels (RGBA) together.
+    for (int out_x = 0; out_x < num_values; out_x++) {
+        const SkConvolutionFilter1D::ConvolutionFixed* filter_values =
+            filter.FilterForValue(out_x, &filter_offset, &filter_length);
+
+        __m128i accum = _mm_setzero_si128();
+
+        // Compute the first pixel in this row that the filter affects. It will
+        // touch |filter_length| pixels (4 bytes each) after this.
+        const __m128i* row_to_filter =
+            reinterpret_cast<const __m128i*>(&src_data[filter_offset << 2]);
+
+        // We will load and accumulate with four coefficients per iteration.
+        for (int filter_x = 0; filter_x < filter_length >> 2; filter_x++) {
+
+            // Load 4 coefficients => duplicate 1st and 2nd of them for all channels.
+            __m128i coeff, coeff16;
+            // [16] xx xx xx xx c3 c2 c1 c0
+            coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
+            // [16] xx xx xx xx c1 c1 c0 c0
+            coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
+            // [16] c1 c1 c1 c1 c0 c0 c0 c0
+            coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
+
+            // Load four pixels => unpack the first two pixels to 16 bits =>
+            // multiply with coefficients => accumulate the convolution result.
+            // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
+            __m128i src8 = _mm_loadu_si128(row_to_filter);
+            // [16] a1 b1 g1 r1 a0 b0 g0 r0
+            __m128i src16 = _mm_unpacklo_epi8(src8, zero);
+            __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
+            __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
+            // [32]  a0*c0 b0*c0 g0*c0 r0*c0
+            __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
+            accum = _mm_add_epi32(accum, t);
+            // [32]  a1*c1 b1*c1 g1*c1 r1*c1
+            t = _mm_unpackhi_epi16(mul_lo, mul_hi);
+            accum = _mm_add_epi32(accum, t);
+
+            // Duplicate 3rd and 4th coefficients for all channels =>
+            // unpack the 3rd and 4th pixels to 16 bits => multiply with coefficients
+            // => accumulate the convolution results.
+            // [16] xx xx xx xx c3 c3 c2 c2
+            coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
+            // [16] c3 c3 c3 c3 c2 c2 c2 c2
+            coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
+            // [16] a3 g3 b3 r3 a2 g2 b2 r2
+            src16 = _mm_unpackhi_epi8(src8, zero);
+            mul_hi = _mm_mulhi_epi16(src16, coeff16);
+            mul_lo = _mm_mullo_epi16(src16, coeff16);
+            // [32]  a2*c2 b2*c2 g2*c2 r2*c2
+            t = _mm_unpacklo_epi16(mul_lo, mul_hi);
+            accum = _mm_add_epi32(accum, t);
+            // [32]  a3*c3 b3*c3 g3*c3 r3*c3
+            t = _mm_unpackhi_epi16(mul_lo, mul_hi);
+            accum = _mm_add_epi32(accum, t);
+
+            // Advance the pixel and coefficients pointers.
+            row_to_filter += 1;
+            filter_values += 4;
+        }
 
-    // When |filter_length| is not divisible by 4, we need to decimate some of
-    // the filter coefficient that was loaded incorrectly to zero; Other than
-    // that the algorithm is same with above, exceot that the 4th pixel will be
-    // always absent.
-    int r = filter_length&3;
-    if (r) {
-      // Note: filter_values must be padded to align_up(filter_offset, 8).
-      __m128i coeff, coeff16;
-      coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
-      // Mask out extra filter taps.
-      coeff = _mm_and_si128(coeff, mask[r]);
-      coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
-      coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
-
-      // Note: line buffer must be padded to align_up(filter_offset, 16).
-      // We resolve this by use C-version for the last horizontal line.
-      __m128i src8 = _mm_loadu_si128(row_to_filter);
-      __m128i src16 = _mm_unpacklo_epi8(src8, zero);
-      __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
-      __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
-      __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-      accum = _mm_add_epi32(accum, t);
-      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-      accum = _mm_add_epi32(accum, t);
-
-      src16 = _mm_unpackhi_epi8(src8, zero);
-      coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
-      coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
-      mul_hi = _mm_mulhi_epi16(src16, coeff16);
-      mul_lo = _mm_mullo_epi16(src16, coeff16);
-      t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-      accum = _mm_add_epi32(accum, t);
-    }
+        // When |filter_length| is not divisible by 4, we need to decimate some of
+        // the filter coefficient that was loaded incorrectly to zero; Other than
+        // that the algorithm is same with above, exceot that the 4th pixel will be
+        // always absent.
+        int r = filter_length&3;
+        if (r) {
+            // Note: filter_values must be padded to align_up(filter_offset, 8).
+            __m128i coeff, coeff16;
+            coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
+            // Mask out extra filter taps.
+            coeff = _mm_and_si128(coeff, mask[r]);
+            coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
+            coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
+
+            // Note: line buffer must be padded to align_up(filter_offset, 16).
+            // We resolve this by use C-version for the last horizontal line.
+            __m128i src8 = _mm_loadu_si128(row_to_filter);
+            __m128i src16 = _mm_unpacklo_epi8(src8, zero);
+            __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
+            __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
+            __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
+            accum = _mm_add_epi32(accum, t);
+            t = _mm_unpackhi_epi16(mul_lo, mul_hi);
+            accum = _mm_add_epi32(accum, t);
+
+            src16 = _mm_unpackhi_epi8(src8, zero);
+            coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
+            coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
+            mul_hi = _mm_mulhi_epi16(src16, coeff16);
+            mul_lo = _mm_mullo_epi16(src16, coeff16);
+            t = _mm_unpacklo_epi16(mul_lo, mul_hi);
+            accum = _mm_add_epi32(accum, t);
+        }
 
-    // Shift right for fixed point implementation.
-    accum = _mm_srai_epi32(accum, SkConvolutionFilter1D::kShiftBits);
+        // Shift right for fixed point implementation.
+        accum = _mm_srai_epi32(accum, SkConvolutionFilter1D::kShiftBits);
 
-    // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
-    accum = _mm_packs_epi32(accum, zero);
-    // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
-    accum = _mm_packus_epi16(accum, zero);
+        // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
+        accum = _mm_packs_epi32(accum, zero);
+        // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
+        accum = _mm_packus_epi16(accum, zero);
 
-    // Store the pixel value of 32 bits.
-    *(reinterpret_cast<int*>(out_row)) = _mm_cvtsi128_si32(accum);
-    out_row += 4;
-  }
+        // Store the pixel value of 32 bits.
+        *(reinterpret_cast<int*>(out_row)) = _mm_cvtsi128_si32(accum);
+        out_row += 4;
+    }
 }
 
 // Convolves horizontally along four rows. The row data is given in
@@ -314,116 +311,116 @@ void convolveHorizontally_SSE2(const unsigned char* src_data,
 void convolve4RowsHorizontally_SSE2(const unsigned char* src_data[4],
                                     const SkConvolutionFilter1D& filter,
                                     unsigned char* out_row[4]) {
-  int num_values = filter.numValues();
-
-  int filter_offset, filter_length;
-  __m128i zero = _mm_setzero_si128();
-  __m128i mask[4];
-  // |mask| will be used to decimate all extra filter coefficients that are
-  // loaded by SIMD when |filter_length| is not divisible by 4.
-  // mask[0] is not used in following algorithm.
-  mask[1] = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, -1);
-  mask[2] = _mm_set_epi16(0, 0, 0, 0, 0, 0, -1, -1);
-  mask[3] = _mm_set_epi16(0, 0, 0, 0, 0, -1, -1, -1);
-
-  // Output one pixel each iteration, calculating all channels (RGBA) together.
-  for (int out_x = 0; out_x < num_values; out_x++) {
-    const SkConvolutionFilter1D::ConvolutionFixed* filter_values =
-        filter.FilterForValue(out_x, &filter_offset, &filter_length);
-
-    // four pixels in a column per iteration.
-    __m128i accum0 = _mm_setzero_si128();
-    __m128i accum1 = _mm_setzero_si128();
-    __m128i accum2 = _mm_setzero_si128();
-    __m128i accum3 = _mm_setzero_si128();
-    int start = (filter_offset<<2);
-    // We will load and accumulate with four coefficients per iteration.
-    for (int filter_x = 0; filter_x < (filter_length >> 2); filter_x++) {
-      __m128i coeff, coeff16lo, coeff16hi;
-      // [16] xx xx xx xx c3 c2 c1 c0
-      coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
-      // [16] xx xx xx xx c1 c1 c0 c0
-      coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
-      // [16] c1 c1 c1 c1 c0 c0 c0 c0
-      coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
-      // [16] xx xx xx xx c3 c3 c2 c2
-      coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
-      // [16] c3 c3 c3 c3 c2 c2 c2 c2
-      coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
-
-      __m128i src8, src16, mul_hi, mul_lo, t;
-
-#define ITERATION(src, accum)                                          \
-      src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src));   \
-      src16 = _mm_unpacklo_epi8(src8, zero);                           \
-      mul_hi = _mm_mulhi_epi16(src16, coeff16lo);                      \
-      mul_lo = _mm_mullo_epi16(src16, coeff16lo);                      \
-      t = _mm_unpacklo_epi16(mul_lo, mul_hi);                          \
-      accum = _mm_add_epi32(accum, t);                                 \
-      t = _mm_unpackhi_epi16(mul_lo, mul_hi);                          \
-      accum = _mm_add_epi32(accum, t);                                 \
-      src16 = _mm_unpackhi_epi8(src8, zero);                           \
-      mul_hi = _mm_mulhi_epi16(src16, coeff16hi);                      \
-      mul_lo = _mm_mullo_epi16(src16, coeff16hi);                      \
-      t = _mm_unpacklo_epi16(mul_lo, mul_hi);                          \
-      accum = _mm_add_epi32(accum, t);                                 \
-      t = _mm_unpackhi_epi16(mul_lo, mul_hi);                          \
-      accum = _mm_add_epi32(accum, t)
-
-      ITERATION(src_data[0] + start, accum0);
-      ITERATION(src_data[1] + start, accum1);
-      ITERATION(src_data[2] + start, accum2);
-      ITERATION(src_data[3] + start, accum3);
-
-      start += 16;
-      filter_values += 4;
-    }
+    int num_values = filter.numValues();
+
+    int filter_offset, filter_length;
+    __m128i zero = _mm_setzero_si128();
+    __m128i mask[4];
+    // |mask| will be used to decimate all extra filter coefficients that are
+    // loaded by SIMD when |filter_length| is not divisible by 4.
+    // mask[0] is not used in following algorithm.
+    mask[1] = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, -1);
+    mask[2] = _mm_set_epi16(0, 0, 0, 0, 0, 0, -1, -1);
+    mask[3] = _mm_set_epi16(0, 0, 0, 0, 0, -1, -1, -1);
+
+    // Output one pixel each iteration, calculating all channels (RGBA) together.
+    for (int out_x = 0; out_x < num_values; out_x++) {
+        const SkConvolutionFilter1D::ConvolutionFixed* filter_values =
+            filter.FilterForValue(out_x, &filter_offset, &filter_length);
+
+        // four pixels in a column per iteration.
+        __m128i accum0 = _mm_setzero_si128();
+        __m128i accum1 = _mm_setzero_si128();
+        __m128i accum2 = _mm_setzero_si128();
+        __m128i accum3 = _mm_setzero_si128();
+        int start = (filter_offset<<2);
+        // We will load and accumulate with four coefficients per iteration.
+        for (int filter_x = 0; filter_x < (filter_length >> 2); filter_x++) {
+            __m128i coeff, coeff16lo, coeff16hi;
+            // [16] xx xx xx xx c3 c2 c1 c0
+            coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
+            // [16] xx xx xx xx c1 c1 c0 c0
+            coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
+            // [16] c1 c1 c1 c1 c0 c0 c0 c0
+            coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
+            // [16] xx xx xx xx c3 c3 c2 c2
+            coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
+            // [16] c3 c3 c3 c3 c2 c2 c2 c2
+            coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
+
+            __m128i src8, src16, mul_hi, mul_lo, t;
+
+#define ITERATION(src, accum)                                                \
+            src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src));   \
+            src16 = _mm_unpacklo_epi8(src8, zero);                           \
+            mul_hi = _mm_mulhi_epi16(src16, coeff16lo);                      \
+            mul_lo = _mm_mullo_epi16(src16, coeff16lo);                      \
+            t = _mm_unpacklo_epi16(mul_lo, mul_hi);                          \
+            accum = _mm_add_epi32(accum, t);                                 \
+            t = _mm_unpackhi_epi16(mul_lo, mul_hi);                          \
+            accum = _mm_add_epi32(accum, t);                                 \
+            src16 = _mm_unpackhi_epi8(src8, zero);                           \
+            mul_hi = _mm_mulhi_epi16(src16, coeff16hi);                      \
+            mul_lo = _mm_mullo_epi16(src16, coeff16hi);                      \
+            t = _mm_unpacklo_epi16(mul_lo, mul_hi);                          \
+            accum = _mm_add_epi32(accum, t);                                 \
+            t = _mm_unpackhi_epi16(mul_lo, mul_hi);                          \
+            accum = _mm_add_epi32(accum, t)
+
+            ITERATION(src_data[0] + start, accum0);
+            ITERATION(src_data[1] + start, accum1);
+            ITERATION(src_data[2] + start, accum2);
+            ITERATION(src_data[3] + start, accum3);
+
+            start += 16;
+            filter_values += 4;
+        }
 
-    int r = filter_length & 3;
-    if (r) {
-      // Note: filter_values must be padded to align_up(filter_offset, 8);
-      __m128i coeff;
-      coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
-      // Mask out extra filter taps.
-      coeff = _mm_and_si128(coeff, mask[r]);
-
-      __m128i coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
-      /* c1 c1 c1 c1 c0 c0 c0 c0 */
-      coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
-      __m128i coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
-      coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
-
-      __m128i src8, src16, mul_hi, mul_lo, t;
-
-      ITERATION(src_data[0] + start, accum0);
-      ITERATION(src_data[1] + start, accum1);
-      ITERATION(src_data[2] + start, accum2);
-      ITERATION(src_data[3] + start, accum3);
-    }
+        int r = filter_length & 3;
+        if (r) {
+            // Note: filter_values must be padded to align_up(filter_offset, 8);
+            __m128i coeff;
+            coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
+            // Mask out extra filter taps.
+            coeff = _mm_and_si128(coeff, mask[r]);
+
+            __m128i coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
+            /* c1 c1 c1 c1 c0 c0 c0 c0 */
+            coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
+            __m128i coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
+            coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
+
+            __m128i src8, src16, mul_hi, mul_lo, t;
+
+            ITERATION(src_data[0] + start, accum0);
+            ITERATION(src_data[1] + start, accum1);
+            ITERATION(src_data[2] + start, accum2);
+            ITERATION(src_data[3] + start, accum3);
+        }
 
-    accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
-    accum0 = _mm_packs_epi32(accum0, zero);
-    accum0 = _mm_packus_epi16(accum0, zero);
-    accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
-    accum1 = _mm_packs_epi32(accum1, zero);
-    accum1 = _mm_packus_epi16(accum1, zero);
-    accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
-    accum2 = _mm_packs_epi32(accum2, zero);
-    accum2 = _mm_packus_epi16(accum2, zero);
-    accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
-    accum3 = _mm_packs_epi32(accum3, zero);
-    accum3 = _mm_packus_epi16(accum3, zero);
-
-    *(reinterpret_cast<int*>(out_row[0])) = _mm_cvtsi128_si32(accum0);
-    *(reinterpret_cast<int*>(out_row[1])) = _mm_cvtsi128_si32(accum1);
-    *(reinterpret_cast<int*>(out_row[2])) = _mm_cvtsi128_si32(accum2);
-    *(reinterpret_cast<int*>(out_row[3])) = _mm_cvtsi128_si32(accum3);
-
-    out_row[0] += 4;
-    out_row[1] += 4;
-    out_row[2] += 4;
-    out_row[3] += 4;
-  }
+        accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
+        accum0 = _mm_packs_epi32(accum0, zero);
+        accum0 = _mm_packus_epi16(accum0, zero);
+        accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
+        accum1 = _mm_packs_epi32(accum1, zero);
+        accum1 = _mm_packus_epi16(accum1, zero);
+        accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
+        accum2 = _mm_packs_epi32(accum2, zero);
+        accum2 = _mm_packus_epi16(accum2, zero);
+        accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
+        accum3 = _mm_packs_epi32(accum3, zero);
+        accum3 = _mm_packus_epi16(accum3, zero);
+
+        *(reinterpret_cast<int*>(out_row[0])) = _mm_cvtsi128_si32(accum0);
+        *(reinterpret_cast<int*>(out_row[1])) = _mm_cvtsi128_si32(accum1);
+        *(reinterpret_cast<int*>(out_row[2])) = _mm_cvtsi128_si32(accum2);
+        *(reinterpret_cast<int*>(out_row[3])) = _mm_cvtsi128_si32(accum3);
+
+        out_row[0] += 4;
+        out_row[1] += 4;
+        out_row[2] += 4;
+        out_row[3] += 4;
+    }
 }
 
 // Does vertical convolution to produce one output row. The filter values and
@@ -438,166 +435,166 @@ void convolveVertically_SSE2(const SkConvolutionFilter1D::ConvolutionFixed* filt
                              unsigned char* const* source_data_rows,
                              int pixel_width,
                              unsigned char* out_row) {
-  int width = pixel_width & ~3;
-
-  __m128i zero = _mm_setzero_si128();
-  __m128i accum0, accum1, accum2, accum3, coeff16;
-  const __m128i* src;
-  // Output four pixels per iteration (16 bytes).
-  for (int out_x = 0; out_x < width; out_x += 4) {
-
-    // Accumulated result for each pixel. 32 bits per RGBA channel.
-    accum0 = _mm_setzero_si128();
-    accum1 = _mm_setzero_si128();
-    accum2 = _mm_setzero_si128();
-    accum3 = _mm_setzero_si128();
-
-    // Convolve with one filter coefficient per iteration.
-    for (int filter_y = 0; filter_y < filter_length; filter_y++) {
-
-      // Duplicate the filter coefficient 8 times.
-      // [16] cj cj cj cj cj cj cj cj
-      coeff16 = _mm_set1_epi16(filter_values[filter_y]);
-
-      // Load four pixels (16 bytes) together.
-      // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-      src = reinterpret_cast<const __m128i*>(
-          &source_data_rows[filter_y][out_x << 2]);
-      __m128i src8 = _mm_loadu_si128(src);
-
-      // Unpack 1st and 2nd pixels from 8 bits to 16 bits for each channels =>
-      // multiply with current coefficient => accumulate the result.
-      // [16] a1 b1 g1 r1 a0 b0 g0 r0
-      __m128i src16 = _mm_unpacklo_epi8(src8, zero);
-      __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
-      __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
-      // [32] a0 b0 g0 r0
-      __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-      accum0 = _mm_add_epi32(accum0, t);
-      // [32] a1 b1 g1 r1
-      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-      accum1 = _mm_add_epi32(accum1, t);
-
-      // Unpack 3rd and 4th pixels from 8 bits to 16 bits for each channels =>
-      // multiply with current coefficient => accumulate the result.
-      // [16] a3 b3 g3 r3 a2 b2 g2 r2
-      src16 = _mm_unpackhi_epi8(src8, zero);
-      mul_hi = _mm_mulhi_epi16(src16, coeff16);
-      mul_lo = _mm_mullo_epi16(src16, coeff16);
-      // [32] a2 b2 g2 r2
-      t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-      accum2 = _mm_add_epi32(accum2, t);
-      // [32] a3 b3 g3 r3
-      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-      accum3 = _mm_add_epi32(accum3, t);
-    }
-
-    // Shift right for fixed point implementation.
-    accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
-    accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
-    accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
-    accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
-
-    // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
-    // [16] a1 b1 g1 r1 a0 b0 g0 r0
-    accum0 = _mm_packs_epi32(accum0, accum1);
-    // [16] a3 b3 g3 r3 a2 b2 g2 r2
-    accum2 = _mm_packs_epi32(accum2, accum3);
+    int width = pixel_width & ~3;
+
+    __m128i zero = _mm_setzero_si128();
+    __m128i accum0, accum1, accum2, accum3, coeff16;
+    const __m128i* src;
+    // Output four pixels per iteration (16 bytes).
+    for (int out_x = 0; out_x < width; out_x += 4) {
+
+        // Accumulated result for each pixel. 32 bits per RGBA channel.
+        accum0 = _mm_setzero_si128();
+        accum1 = _mm_setzero_si128();
+        accum2 = _mm_setzero_si128();
+        accum3 = _mm_setzero_si128();
+
+        // Convolve with one filter coefficient per iteration.
+        for (int filter_y = 0; filter_y < filter_length; filter_y++) {
+
+            // Duplicate the filter coefficient 8 times.
+            // [16] cj cj cj cj cj cj cj cj
+            coeff16 = _mm_set1_epi16(filter_values[filter_y]);
+
+            // Load four pixels (16 bytes) together.
+            // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
+            src = reinterpret_cast<const __m128i*>(
+                &source_data_rows[filter_y][out_x << 2]);
+            __m128i src8 = _mm_loadu_si128(src);
+
+            // Unpack 1st and 2nd pixels from 8 bits to 16 bits for each channels =>
+            // multiply with current coefficient => accumulate the result.
+            // [16] a1 b1 g1 r1 a0 b0 g0 r0
+            __m128i src16 = _mm_unpacklo_epi8(src8, zero);
+            __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
+            __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
+            // [32] a0 b0 g0 r0
+            __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
+            accum0 = _mm_add_epi32(accum0, t);
+            // [32] a1 b1 g1 r1
+            t = _mm_unpackhi_epi16(mul_lo, mul_hi);
+            accum1 = _mm_add_epi32(accum1, t);
+
+            // Unpack 3rd and 4th pixels from 8 bits to 16 bits for each channels =>
+            // multiply with current coefficient => accumulate the result.
+            // [16] a3 b3 g3 r3 a2 b2 g2 r2
+            src16 = _mm_unpackhi_epi8(src8, zero);
+            mul_hi = _mm_mulhi_epi16(src16, coeff16);
+            mul_lo = _mm_mullo_epi16(src16, coeff16);
+            // [32] a2 b2 g2 r2
+            t = _mm_unpacklo_epi16(mul_lo, mul_hi);
+            accum2 = _mm_add_epi32(accum2, t);
+            // [32] a3 b3 g3 r3
+            t = _mm_unpackhi_epi16(mul_lo, mul_hi);
+            accum3 = _mm_add_epi32(accum3, t);
+        }
 
-    // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
-    // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-    accum0 = _mm_packus_epi16(accum0, accum2);
+        // Shift right for fixed point implementation.
+        accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
+        accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
+        accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
+        accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
+
+        // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
+        // [16] a1 b1 g1 r1 a0 b0 g0 r0
+        accum0 = _mm_packs_epi32(accum0, accum1);
+        // [16] a3 b3 g3 r3 a2 b2 g2 r2
+        accum2 = _mm_packs_epi32(accum2, accum3);
+
+        // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
+        // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
+        accum0 = _mm_packus_epi16(accum0, accum2);
+
+        if (has_alpha) {
+            // Compute the max(ri, gi, bi) for each pixel.
+            // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
+            __m128i a = _mm_srli_epi32(accum0, 8);
+            // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
+            __m128i b = _mm_max_epu8(a, accum0);  // Max of r and g.
+            // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
+            a = _mm_srli_epi32(accum0, 16);
+            // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
+            b = _mm_max_epu8(a, b);  // Max of r and g and b.
+            // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
+            b = _mm_slli_epi32(b, 24);
+
+            // Make sure the value of alpha channel is always larger than maximum
+            // value of color channels.
+            accum0 = _mm_max_epu8(b, accum0);
+        } else {
+            // Set value of alpha channels to 0xFF.
+            __m128i mask = _mm_set1_epi32(0xff000000);
+            accum0 = _mm_or_si128(accum0, mask);
+        }
 
-    if (has_alpha) {
-      // Compute the max(ri, gi, bi) for each pixel.
-      // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
-      __m128i a = _mm_srli_epi32(accum0, 8);
-      // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-      __m128i b = _mm_max_epu8(a, accum0);  // Max of r and g.
-      // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
-      a = _mm_srli_epi32(accum0, 16);
-      // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-      b = _mm_max_epu8(a, b);  // Max of r and g and b.
-      // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
-      b = _mm_slli_epi32(b, 24);
-
-      // Make sure the value of alpha channel is always larger than maximum
-      // value of color channels.
-      accum0 = _mm_max_epu8(b, accum0);
-    } else {
-      // Set value of alpha channels to 0xFF.
-      __m128i mask = _mm_set1_epi32(0xff000000);
-      accum0 = _mm_or_si128(accum0, mask);
+        // Store the convolution result (16 bytes) and advance the pixel pointers.
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(out_row), accum0);
+        out_row += 16;
     }
 
-    // Store the convolution result (16 bytes) and advance the pixel pointers.
-    _mm_storeu_si128(reinterpret_cast<__m128i*>(out_row), accum0);
-    out_row += 16;
-  }
-
-  // When the width of the output is not divisible by 4, We need to save one
-  // pixel (4 bytes) each time. And also the fourth pixel is always absent.
-  if (pixel_width & 3) {
-    accum0 = _mm_setzero_si128();
-    accum1 = _mm_setzero_si128();
-    accum2 = _mm_setzero_si128();
-    for (int filter_y = 0; filter_y < filter_length; ++filter_y) {
-      coeff16 = _mm_set1_epi16(filter_values[filter_y]);
-      // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-      src = reinterpret_cast<const __m128i*>(
-          &source_data_rows[filter_y][width<<2]);
-      __m128i src8 = _mm_loadu_si128(src);
-      // [16] a1 b1 g1 r1 a0 b0 g0 r0
-      __m128i src16 = _mm_unpacklo_epi8(src8, zero);
-      __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
-      __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
-      // [32] a0 b0 g0 r0
-      __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-      accum0 = _mm_add_epi32(accum0, t);
-      // [32] a1 b1 g1 r1
-      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-      accum1 = _mm_add_epi32(accum1, t);
-      // [16] a3 b3 g3 r3 a2 b2 g2 r2
-      src16 = _mm_unpackhi_epi8(src8, zero);
-      mul_hi = _mm_mulhi_epi16(src16, coeff16);
-      mul_lo = _mm_mullo_epi16(src16, coeff16);
-      // [32] a2 b2 g2 r2
-      t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-      accum2 = _mm_add_epi32(accum2, t);
-    }
+    // When the width of the output is not divisible by 4, We need to save one
+    // pixel (4 bytes) each time. And also the fourth pixel is always absent.
+    if (pixel_width & 3) {
+        accum0 = _mm_setzero_si128();
+        accum1 = _mm_setzero_si128();
+        accum2 = _mm_setzero_si128();
+        for (int filter_y = 0; filter_y < filter_length; ++filter_y) {
+            coeff16 = _mm_set1_epi16(filter_values[filter_y]);
+            // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
+            src = reinterpret_cast<const __m128i*>(
+                &source_data_rows[filter_y][width<<2]);
+            __m128i src8 = _mm_loadu_si128(src);
+            // [16] a1 b1 g1 r1 a0 b0 g0 r0
+            __m128i src16 = _mm_unpacklo_epi8(src8, zero);
+            __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
+            __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
+            // [32] a0 b0 g0 r0
+            __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
+            accum0 = _mm_add_epi32(accum0, t);
+            // [32] a1 b1 g1 r1
+            t = _mm_unpackhi_epi16(mul_lo, mul_hi);
+            accum1 = _mm_add_epi32(accum1, t);
+            // [16] a3 b3 g3 r3 a2 b2 g2 r2
+            src16 = _mm_unpackhi_epi8(src8, zero);
+            mul_hi = _mm_mulhi_epi16(src16, coeff16);
+            mul_lo = _mm_mullo_epi16(src16, coeff16);
+            // [32] a2 b2 g2 r2
+            t = _mm_unpacklo_epi16(mul_lo, mul_hi);
+            accum2 = _mm_add_epi32(accum2, t);
+        }
 
-    accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
-    accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
-    accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
-    // [16] a1 b1 g1 r1 a0 b0 g0 r0
-    accum0 = _mm_packs_epi32(accum0, accum1);
-    // [16] a3 b3 g3 r3 a2 b2 g2 r2
-    accum2 = _mm_packs_epi32(accum2, zero);
-    // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-    accum0 = _mm_packus_epi16(accum0, accum2);
-    if (has_alpha) {
-      // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
-      __m128i a = _mm_srli_epi32(accum0, 8);
-      // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-      __m128i b = _mm_max_epu8(a, accum0);  // Max of r and g.
-      // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
-      a = _mm_srli_epi32(accum0, 16);
-      // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-      b = _mm_max_epu8(a, b);  // Max of r and g and b.
-      // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
-      b = _mm_slli_epi32(b, 24);
-      accum0 = _mm_max_epu8(b, accum0);
-    } else {
-      __m128i mask = _mm_set1_epi32(0xff000000);
-      accum0 = _mm_or_si128(accum0, mask);
-    }
+        accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
+        accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
+        accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
+        // [16] a1 b1 g1 r1 a0 b0 g0 r0
+        accum0 = _mm_packs_epi32(accum0, accum1);
+        // [16] a3 b3 g3 r3 a2 b2 g2 r2
+        accum2 = _mm_packs_epi32(accum2, zero);
+        // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
+        accum0 = _mm_packus_epi16(accum0, accum2);
+        if (has_alpha) {
+            // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
+            __m128i a = _mm_srli_epi32(accum0, 8);
+            // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
+            __m128i b = _mm_max_epu8(a, accum0);  // Max of r and g.
+            // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
+            a = _mm_srli_epi32(accum0, 16);
+            // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
+            b = _mm_max_epu8(a, b);  // Max of r and g and b.
+            // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
+            b = _mm_slli_epi32(b, 24);
+            accum0 = _mm_max_epu8(b, accum0);
+        } else {
+            __m128i mask = _mm_set1_epi32(0xff000000);
+            accum0 = _mm_or_si128(accum0, mask);
+        }
 
-    for (int out_x = width; out_x < pixel_width; out_x++) {
-      *(reinterpret_cast<int*>(out_row)) = _mm_cvtsi128_si32(accum0);
-      accum0 = _mm_srli_si128(accum0, 4);
-      out_row += 4;
+        for (int out_x = width; out_x < pixel_width; out_x++) {
+            *(reinterpret_cast<int*>(out_row)) = _mm_cvtsi128_si32(accum0);
+            accum0 = _mm_srli_si128(accum0, 4);
+            out_row += 4;
+        }
     }
-  }
 }
 
 void convolveVertically_SSE2(const SkConvolutionFilter1D::ConvolutionFixed* filter_values,
@@ -606,19 +603,19 @@ void convolveVertically_SSE2(const SkConvolutionFilter1D::ConvolutionFixed* filt
                              int pixel_width,
                              unsigned char* out_row,
                              bool has_alpha) {
-  if (has_alpha) {
-    convolveVertically_SSE2<true>(filter_values,
-                                  filter_length,
-                                  source_data_rows,
-                                  pixel_width,
-                                  out_row);
-  } else {
-    convolveVertically_SSE2<false>(filter_values,
-                                   filter_length,
-                                   source_data_rows,
-                                   pixel_width,
-                                   out_row);
-  }
+    if (has_alpha) {
+        convolveVertically_SSE2<true>(filter_values,
+                                      filter_length,
+                                      source_data_rows,
+                                      pixel_width,
+                                      out_row);
+    } else {
+        convolveVertically_SSE2<false>(filter_values,
+                                       filter_length,
+                                       source_data_rows,
+                                       pixel_width,
+                                       out_row);
+    }
 }
 
 void applySIMDPadding_SSE2(SkConvolutionFilter1D *filter) {
diff --git a/opts/SkBitmapFilter_opts_SSE2.h b/opts/SkBitmapFilter_opts_SSE2.h
index 588f4ef1..661a824e 100644
--- a/opts/SkBitmapFilter_opts_SSE2.h
+++ b/opts/SkBitmapFilter_opts_SSE2.h
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2013 Google Inc.
  *
@@ -6,7 +5,6 @@
  * found in the LICENSE file.
  */
 
-
 #ifndef SkBitmapFilter_opts_sse2_DEFINED
 #define SkBitmapFilter_opts_sse2_DEFINED
 
@@ -14,9 +12,9 @@
 #include "SkConvolver.h"
 
 void highQualityFilter_ScaleOnly_SSE2(const SkBitmapProcState &s, int x, int y,
-                          SkPMColor *SK_RESTRICT colors, int count);
+                                      SkPMColor *SK_RESTRICT colors, int count);
 void highQualityFilter_SSE2(const SkBitmapProcState &s, int x, int y,
-                SkPMColor *SK_RESTRICT colors, int count);
+                            SkPMColor *SK_RESTRICT colors, int count);
 
 
 void convolveVertically_SSE2(const SkConvolutionFilter1D::ConvolutionFixed* filter_values,
diff --git a/opts/SkBitmapProcState_opts_SSE2.cpp b/opts/SkBitmapProcState_opts_SSE2.cpp
index 54a2f2da..2279b9d1 100644
--- a/opts/SkBitmapProcState_opts_SSE2.cpp
+++ b/opts/SkBitmapProcState_opts_SSE2.cpp
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2009 The Android Open Source Project
  *
@@ -6,7 +5,6 @@
  * found in the LICENSE file.
  */
 
-
 #include <emmintrin.h>
 #include "SkBitmapProcState_opts_SSE2.h"
 #include "SkColorPriv.h"
diff --git a/opts/SkBitmapProcState_opts_SSE2.h b/opts/SkBitmapProcState_opts_SSE2.h
index 46e35a0f..82c5cc8d 100644
--- a/opts/SkBitmapProcState_opts_SSE2.h
+++ b/opts/SkBitmapProcState_opts_SSE2.h
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2009 The Android Open Source Project
  *
@@ -6,6 +5,8 @@
  * found in the LICENSE file.
  */
 
+#ifndef SkBitmapProcState_opts_SSE2_DEFINED
+#define SkBitmapProcState_opts_SSE2_DEFINED
 
 #include "SkBitmapProcState.h"
 
@@ -24,7 +25,9 @@ void ClampX_ClampY_nofilter_scale_SSE2(const SkBitmapProcState& s,
 void ClampX_ClampY_filter_affine_SSE2(const SkBitmapProcState& s,
                                       uint32_t xy[], int count, int x, int y);
 void ClampX_ClampY_nofilter_affine_SSE2(const SkBitmapProcState& s,
-                                       uint32_t xy[], int count, int x, int y);
+                                        uint32_t xy[], int count, int x, int y);
 void S32_D16_filter_DX_SSE2(const SkBitmapProcState& s,
-                                  const uint32_t* xy,
-                                  int count, uint16_t* colors);
+                            const uint32_t* xy,
+                            int count, uint16_t* colors);
+
+#endif
diff --git a/opts/SkBitmapProcState_opts_SSSE3.cpp b/opts/SkBitmapProcState_opts_SSSE3.cpp
index ddc8ccc5..4622937c 100644
--- a/opts/SkBitmapProcState_opts_SSSE3.cpp
+++ b/opts/SkBitmapProcState_opts_SSSE3.cpp
@@ -425,9 +425,10 @@ void S32_generic_D32_filter_DX_SSSE3(const SkBitmapProcState& s,
     const __m128i zero = _mm_setzero_si128();
 
     __m128i alpha = _mm_setzero_si128();
-    if (has_alpha)
+    if (has_alpha) {
         // 8x(alpha)
         alpha = _mm_set1_epi16(s.fAlphaScale);
+    }
 
     if (sub_y == 0) {
         // Unroll 4x, interleave bytes, use pmaddubsw (all_x is small)
@@ -705,7 +706,7 @@ void S32_generic_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
         *colors++ = _mm_cvtsi128_si32(sum0);
     }
 }
-}  // namepace
+}  // namespace
 
 void S32_opaque_D32_filter_DX_SSSE3(const SkBitmapProcState& s,
                                     const uint32_t* xy,
diff --git a/opts/SkBitmapProcState_opts_SSSE3.h b/opts/SkBitmapProcState_opts_SSSE3.h
index 176f2bfb..9fd074aa 100644
--- a/opts/SkBitmapProcState_opts_SSSE3.h
+++ b/opts/SkBitmapProcState_opts_SSSE3.h
@@ -5,6 +5,9 @@
  * found in the LICENSE file.
  */
 
+#ifndef SkBitmapProcState_opts_SSSE3_DEFINED
+#define SkBitmapProcState_opts_SSSE3_DEFINED
+
 #include "SkBitmapProcState.h"
 
 void S32_opaque_D32_filter_DX_SSSE3(const SkBitmapProcState& s,
@@ -19,3 +22,5 @@ void S32_opaque_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
 void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
                                    const uint32_t* xy,
                                    int count, uint32_t* colors);
+
+#endif
diff --git a/opts/SkBlitRect_opts_SSE2.cpp b/opts/SkBlitRect_opts_SSE2.cpp
index 3cb2b9c6..d65a313d 100644
--- a/opts/SkBlitRect_opts_SSE2.cpp
+++ b/opts/SkBlitRect_opts_SSE2.cpp
@@ -5,15 +5,14 @@
  * found in the LICENSE file.
  */
 
+#include <emmintrin.h>
 #include "SkBlitRect_opts_SSE2.h"
 #include "SkBlitRow.h"
 #include "SkColorPriv.h"
 
-#include <emmintrin.h>
-
-/** Simple blitting of opaque rectangles less than 31 pixels wide:
-    inlines and merges sections of Color32_SSE2 and sk_memset32_SSE2.
-*/
+/* Simple blitting of opaque rectangles less than 31 pixels wide:
+ * inlines and merges sections of Color32_SSE2 and sk_memset32_SSE2.
+ */
 static void BlitRect32_OpaqueNarrow_SSE2(SkPMColor* SK_RESTRICT destination,
                                   int width, int height,
                                   size_t rowBytes, uint32_t color) {
@@ -42,12 +41,12 @@ static void BlitRect32_OpaqueNarrow_SSE2(SkPMColor* SK_RESTRICT destination,
     }
 }
 
-/**
-  Fast blitting of opaque rectangles at least 31 pixels wide:
-  inlines and merges sections of Color32_SSE2 and sk_memset32_SSE2.
-  A 31 pixel rectangle is guaranteed to have at least one
-  16-pixel aligned span that can take advantage of mm_store.
-*/
+/*
+ * Fast blitting of opaque rectangles at least 31 pixels wide:
+ * inlines and merges sections of Color32_SSE2 and sk_memset32_SSE2.
+ * A 31 pixel rectangle is guaranteed to have at least one
+ * 16-pixel aligned span that can take advantage of mm_store.
+ */
 static void BlitRect32_OpaqueWide_SSE2(SkPMColor* SK_RESTRICT destination,
                                 int width, int height,
                                 size_t rowBytes, uint32_t color) {
diff --git a/opts/SkBlitRect_opts_SSE2.h b/opts/SkBlitRect_opts_SSE2.h
index 4d2f74a4..3d09f5c3 100644
--- a/opts/SkBlitRect_opts_SSE2.h
+++ b/opts/SkBlitRect_opts_SSE2.h
@@ -8,13 +8,11 @@
 #ifndef SkBlitRect_opts_SSE2_DEFINED
 #define SkBlitRect_opts_SSE2_DEFINED
 
-/*
-  These functions' implementations copy sections of both
-  SkBlitRow_opts_SSE2 and SkUtils_opts_SSE2.
-*/
-
 #include "SkColor.h"
 
+/* These functions' implementations copy sections of both
+ * SkBlitRow_opts_SSE2 and SkUtils_opts_SSE2.
+ */
 void ColorRect32_SSE2(SkPMColor* SK_RESTRICT dst,
                       int width, int height,
                       size_t rowBytes, uint32_t color);
diff --git a/opts/SkBlitRow_opts_SSE2.cpp b/opts/SkBlitRow_opts_SSE2.cpp
index d1474f4a..391b24c8 100644
--- a/opts/SkBlitRow_opts_SSE2.cpp
+++ b/opts/SkBlitRow_opts_SSE2.cpp
@@ -5,16 +5,14 @@
  * found in the LICENSE file.
  */
 
-
-#include "SkBlitRow_opts_SSE2.h"
+#include <emmintrin.h>
 #include "SkBitmapProcState_opts_SSE2.h"
+#include "SkBlitRow_opts_SSE2.h"
 #include "SkColorPriv.h"
 #include "SkColor_opts_SSE2.h"
 #include "SkDither.h"
 #include "SkUtils.h"
 
-#include <emmintrin.h>
-
 /* SSE2 version of S32_Blend_BlitRow32()
  * portable version is in core/SkBlitRow_D32.cpp
  */
@@ -179,7 +177,7 @@ void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
             d++;
             count -= 4;
         }
-    #else
+#else
         __m128i rb_mask = _mm_set1_epi32(0x00FF00FF);
         __m128i c_256 = _mm_set1_epi16(0x0100);  // 8 copies of 256 (16-bit)
         while (count >= 4) {
@@ -342,7 +340,6 @@ void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
  */
 void Color32_SSE2(SkPMColor dst[], const SkPMColor src[], int count,
                   SkPMColor color) {
-
     if (count <= 0) {
         return;
     }
@@ -406,7 +403,7 @@ void Color32_SSE2(SkPMColor dst[], const SkPMColor src[], int count,
             }
             src = reinterpret_cast<const SkPMColor*>(s);
             dst = reinterpret_cast<SkPMColor*>(d);
-         }
+        }
 
         while (count > 0) {
             *dst = color + SkAlphaMulQ(*src, scale);
@@ -504,7 +501,7 @@ void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
             }
             dst = reinterpret_cast<SkPMColor *>(d);
         }
-        while(count > 0) {
+        while (count > 0) {
             *dst= SkBlendARGB32(color, *dst, *mask);
             dst += 1;
             mask++;
diff --git a/opts/SkBlitRow_opts_SSE2.h b/opts/SkBlitRow_opts_SSE2.h
index fcf82d08..29fd96e5 100644
--- a/opts/SkBlitRow_opts_SSE2.h
+++ b/opts/SkBlitRow_opts_SSE2.h
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2009 The Android Open Source Project
  *
@@ -6,6 +5,8 @@
  * found in the LICENSE file.
  */
 
+#ifndef SkBlitRow_opts_SSE2_DEFINED
+#define SkBlitRow_opts_SSE2_DEFINED
 
 #include "SkBlitRow.h"
 
@@ -41,3 +42,5 @@ void S32_D565_Opaque_Dither_SSE2(uint16_t* SK_RESTRICT dst,
 void S32A_D565_Opaque_Dither_SSE2(uint16_t* SK_RESTRICT dst,
                                   const SkPMColor* SK_RESTRICT src,
                                   int count, U8CPU alpha, int x, int y);
+
+#endif
diff --git a/opts/SkBlurImage_opts_SSE2.cpp b/opts/SkBlurImage_opts_SSE2.cpp
index 93830d78..bbc6a664 100644
--- a/opts/SkBlurImage_opts_SSE2.cpp
+++ b/opts/SkBlurImage_opts_SSE2.cpp
@@ -5,36 +5,31 @@
  * found in the LICENSE file.
  */
 
-
+#include <emmintrin.h>
 #include "SkBitmap.h"
-#include "SkColorPriv.h"
 #include "SkBlurImage_opts_SSE2.h"
+#include "SkColorPriv.h"
 #include "SkRect.h"
 
-#include <emmintrin.h>
-
 namespace {
-
 enum BlurDirection {
     kX, kY
 };
 
-/**
- * Helper function to spread the components of a 32-bit integer into the
+/* Helper function to spread the components of a 32-bit integer into the
  * lower 8 bits of each 32-bit element of an SSE register.
  */
-
 inline __m128i expand(int a) {
-      const __m128i zero = _mm_setzero_si128();
+    const __m128i zero = _mm_setzero_si128();
 
-      // 0 0 0 0   0 0 0 0   0 0 0 0   A R G B
-      __m128i result = _mm_cvtsi32_si128(a);
+    // 0 0 0 0   0 0 0 0   0 0 0 0   A R G B
+    __m128i result = _mm_cvtsi32_si128(a);
 
-      // 0 0 0 0   0 0 0 0   0 A 0 R   0 G 0 B
-      result = _mm_unpacklo_epi8(result, zero);
+    // 0 0 0 0   0 0 0 0   0 A 0 R   0 G 0 B
+    result = _mm_unpacklo_epi8(result, zero);
 
-      // 0 0 0 A   0 0 0 R   0 0 0 G   0 0 0 B
-      return _mm_unpacklo_epi16(result, zero);
+    // 0 0 0 A   0 0 0 R   0 0 0 G   0 0 0 B
+    return _mm_unpacklo_epi16(result, zero);
 }
 
 template<BlurDirection srcDirection, BlurDirection dstDirection>
diff --git a/opts/SkBlurImage_opts_SSE2.h b/opts/SkBlurImage_opts_SSE2.h
index c8deea4b..db104bac 100644
--- a/opts/SkBlurImage_opts_SSE2.h
+++ b/opts/SkBlurImage_opts_SSE2.h
@@ -5,9 +5,14 @@
  * found in the LICENSE file.
  */
 
+#ifndef SkBlurImage_opts_SSE2_DEFINED
+#define SkBlurImage_opts_SSE2_DEFINED
+
 #include "SkBlurImage_opts.h"
 
 bool SkBoxBlurGetPlatformProcs_SSE2(SkBoxBlurProc* boxBlurX,
                                     SkBoxBlurProc* boxBlurY,
                                     SkBoxBlurProc* boxBlurXY,
                                     SkBoxBlurProc* boxBlurYX);
+
+#endif
diff --git a/opts/SkMorphology_opts_SSE2.cpp b/opts/SkMorphology_opts_SSE2.cpp
index b58fced2..e7829509 100644
--- a/opts/SkMorphology_opts_SSE2.cpp
+++ b/opts/SkMorphology_opts_SSE2.cpp
@@ -5,12 +5,10 @@
  * found in the LICENSE file.
  */
 
-
+#include <emmintrin.h>
 #include "SkColorPriv.h"
 #include "SkMorphology_opts_SSE2.h"
 
-#include <emmintrin.h>
-
 /* SSE2 version of dilateX, dilateY, erodeX, erodeY.
  * portable versions are in src/effects/SkMorphologyImageFilter.cpp.
  */
@@ -48,8 +46,12 @@ static void SkMorph_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
             lp += srcStrideY;
             up += srcStrideY;
         }
-        if (x >= radius) src += srcStrideX;
-        if (x + radius < width - 1) upperSrc += srcStrideX;
+        if (x >= radius) {
+            src += srcStrideX;
+        }
+        if (x + radius < width - 1) {
+            upperSrc += srcStrideX;
+        }
         dst += dstStrideX;
     }
 }
diff --git a/opts/SkMorphology_opts_SSE2.h b/opts/SkMorphology_opts_SSE2.h
index bd103e6e..bf5aa03b 100644
--- a/opts/SkMorphology_opts_SSE2.h
+++ b/opts/SkMorphology_opts_SSE2.h
@@ -5,6 +5,11 @@
  * found in the LICENSE file.
  */
 
+#ifndef SkMorphology_opts_SSE2_DEFINED
+#define SkMorphology_opts_SSE2_DEFINED
+
+#include "SkColor.h"
+
 void SkDilateX_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
                     int width, int height, int srcStride, int dstStride);
 void SkDilateY_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
@@ -13,3 +18,5 @@ void SkErodeX_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
                    int width, int height, int srcStride, int dstStride);
 void SkErodeY_SSE2(const SkPMColor* src, SkPMColor* dst, int radius,
                    int width, int height, int srcStride, int dstStride);
+
+#endif
diff --git a/opts/SkUtils_opts_SSE2.cpp b/opts/SkUtils_opts_SSE2.cpp
index e22044d3..a3c5aa5d 100644
--- a/opts/SkUtils_opts_SSE2.cpp
+++ b/opts/SkUtils_opts_SSE2.cpp
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2009 The Android Open Source Project
  *
@@ -6,7 +5,6 @@
  * found in the LICENSE file.
  */
 
-
 #include <emmintrin.h>
 #include "SkUtils_opts_SSE2.h"
 
diff --git a/opts/SkUtils_opts_SSE2.h b/opts/SkUtils_opts_SSE2.h
index ed24c1ff..5f0bc329 100644
--- a/opts/SkUtils_opts_SSE2.h
+++ b/opts/SkUtils_opts_SSE2.h
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2009 The Android Open Source Project
  *
@@ -6,8 +5,12 @@
  * found in the LICENSE file.
  */
 
+#ifndef SkUtils_opts_SSE2_DEFINED
+#define SkUtils_opts_SSE2_DEFINED
 
 #include "SkTypes.h"
 
 void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count);
 void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count);
+
+#endif
diff --git a/opts/SkXfermode_opts_SSE2.cpp b/opts/SkXfermode_opts_SSE2.cpp
index 4e4532b7..ec76ab3c 100644
--- a/opts/SkXfermode_opts_SSE2.cpp
+++ b/opts/SkXfermode_opts_SSE2.cpp
@@ -1,3 +1,10 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
 #include "SkColorPriv.h"
 #include "SkColor_opts_SSE2.h"
 #include "SkMathPriv.h"
diff --git a/opts/SkXfermode_opts_SSE2.h b/opts/SkXfermode_opts_SSE2.h
index 9f17f8b3..bfc14393 100644
--- a/opts/SkXfermode_opts_SSE2.h
+++ b/opts/SkXfermode_opts_SSE2.h
@@ -1,3 +1,10 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
 #ifndef SkXfermode_opts_SSE2_DEFINED
 #define SkXfermode_opts_SSE2_DEFINED
 
diff --git a/opts/opts_check_SSE2.cpp b/opts/opts_check_x86.cpp
index 6c684c27..0b0debb2 100644
--- a/opts/opts_check_SSE2.cpp
+++ b/opts/opts_check_x86.cpp
@@ -5,23 +5,22 @@
  * found in the LICENSE file.
  */
 
+#include "SkBitmapFilter_opts_SSE2.h"
 #include "SkBitmapProcState_opts_SSE2.h"
 #include "SkBitmapProcState_opts_SSSE3.h"
-#include "SkBitmapFilter_opts_SSE2.h"
 #include "SkBlitMask.h"
-#include "SkBlitRow.h"
 #include "SkBlitRect_opts_SSE2.h"
+#include "SkBlitRow.h"
 #include "SkBlitRow_opts_SSE2.h"
 #include "SkBlurImage_opts_SSE2.h"
-#include "SkUtils_opts_SSE2.h"
-#include "SkUtils.h"
 #include "SkMorphology_opts.h"
 #include "SkMorphology_opts_SSE2.h"
+#include "SkRTConf.h"
+#include "SkUtils.h"
+#include "SkUtils_opts_SSE2.h"
 #include "SkXfermode.h"
 #include "SkXfermode_proccoeff.h"
 
-#include "SkRTConf.h"
-
 #if defined(_MSC_VER) && defined(_WIN64)
 #include <intrin.h>
 #endif
@@ -32,6 +31,7 @@
    in this directory should be compiled with -msse2. */
 
 
+/* Function to get the CPU SSE-level in runtime, for different compilers. */
 #ifdef _MSC_VER
 static inline void getcpuid(int info_type, int info[4]) {
 #if defined(_WIN64)
@@ -72,6 +72,8 @@ static inline void getcpuid(int info_type, int info[4]) {
 #endif
 #endif
 
+////////////////////////////////////////////////////////////////////////////////
+
 #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
 /* All x86_64 machines have SSE2, or we know it's supported at compile time,  so don't even bother checking. */
 static inline bool hasSSE2() {
@@ -120,6 +122,8 @@ static bool cachedHasSSSE3() {
     return gHasSSSE3;
 }
 
+////////////////////////////////////////////////////////////////////////////////
+
 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
 
 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
@@ -132,6 +136,8 @@ void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
     }
 }
 
+////////////////////////////////////////////////////////////////////////////////
+
 void SkBitmapProcState::platformProcs() {
     /* Every optimization in the function requires at least SSE2 */
     if (!cachedHasSSE2()) {
@@ -185,6 +191,8 @@ void SkBitmapProcState::platformProcs() {
     }
 }
 
+////////////////////////////////////////////////////////////////////////////////
+
 static SkBlitRow::Proc platform_16_procs[] = {
     S32_D565_Opaque_SSE2,               // S32_D565_Opaque
     NULL,                               // S32_D565_Blend
@@ -196,6 +204,14 @@ static SkBlitRow::Proc platform_16_procs[] = {
     NULL,                               // S32A_D565_Blend_Dither
 };
 
+SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
+    if (cachedHasSSE2()) {
+        return platform_16_procs[flags];
+    } else {
+        return NULL;
+    }
+}
+
 static SkBlitRow::Proc32 platform_32_procs[] = {
     NULL,                               // S32_Opaque,
     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
@@ -203,9 +219,9 @@ static SkBlitRow::Proc32 platform_32_procs[] = {
     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
 };
 
-SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
+SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
     if (cachedHasSSE2()) {
-        return platform_16_procs[flags];
+        return platform_32_procs[flags];
     } else {
         return NULL;
     }
@@ -219,14 +235,20 @@ SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
     }
 }
 
-SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
+SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
+
+SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
+/* Return NULL for now, since the optimized path in ColorRect32_SSE2 is disabled.
     if (cachedHasSSE2()) {
-        return platform_32_procs[flags];
+        return ColorRect32_SSE2;
     } else {
         return NULL;
     }
+*/
+    return NULL;
 }
 
+////////////////////////////////////////////////////////////////////////////////
 
 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
                                                      SkMask::Format maskFormat,
@@ -264,12 +286,15 @@ SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
     }
 
 }
+
 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
                                                  SkMask::Format maskFormat,
                                                  RowFlags flags) {
     return NULL;
 }
 
+////////////////////////////////////////////////////////////////////////////////
+
 SkMemset16Proc SkMemset16GetPlatformProc() {
     if (cachedHasSSE2()) {
         return sk_memset16_SSE2;
@@ -286,6 +311,8 @@ SkMemset32Proc SkMemset32GetPlatformProc() {
     }
 }
 
+////////////////////////////////////////////////////////////////////////////////
+
 SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
     if (!cachedHasSSE2()) {
         return NULL;
@@ -304,6 +331,8 @@ SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType t
     }
 }
 
+////////////////////////////////////////////////////////////////////////////////
+
 bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX,
                                SkBoxBlurProc* boxBlurY,
                                SkBoxBlurProc* boxBlurXY,
@@ -318,15 +347,7 @@ bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX,
 #endif
 }
 
-SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
-
-SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
-    if (cachedHasSSE2()) {
-        return ColorRect32_SSE2;
-    } else {
-        return NULL;
-    }
-}
+////////////////////////////////////////////////////////////////////////////////
 
 extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
                                                                 SkXfermode::Mode mode);
diff --git a/pathops/SkDCubicLineIntersection.cpp b/pathops/SkDCubicLineIntersection.cpp
index be38ddbf..da4b983d 100644
--- a/pathops/SkDCubicLineIntersection.cpp
+++ b/pathops/SkDCubicLineIntersection.cpp
@@ -307,7 +307,7 @@ public:
         if (!lPt.moreRoughlyEqual(cPt)) {
             return false;
         }
-        // FIXME: if points are roughly equal but not approximately equal, need to do 
+        // FIXME: if points are roughly equal but not approximately equal, need to do
         // a binary search like quad/quad intersection to find more precise t values
         if (lT == 0 || lT == 1 || (ptSet == kPointUninitialized && cT != 0 && cT != 1)) {
             *pt = lPt;
diff --git a/pathops/SkPathOpsDebug.cpp b/pathops/SkPathOpsDebug.cpp
index 1f2b0133..56813b8b 100644
--- a/pathops/SkPathOpsDebug.cpp
+++ b/pathops/SkPathOpsDebug.cpp
@@ -171,15 +171,15 @@ void SkOpAngle::debugOne(bool functionHeader) const {
 
 #if DEBUG_ANGLE
 void SkOpAngle::debugSameAs(const SkOpAngle* compare) const {
-    SK_DEBUGBREAK(fSegment == compare->fSegment);
+    SK_ALWAYSBREAK(fSegment == compare->fSegment);
     const SkOpSpan& startSpan = fSegment->span(fStart);
     const SkOpSpan& oStartSpan = fSegment->span(compare->fStart);
-    SK_DEBUGBREAK(startSpan.fToAngleIndex == oStartSpan.fToAngleIndex);
-    SK_DEBUGBREAK(startSpan.fFromAngleIndex == oStartSpan.fFromAngleIndex);
+    SK_ALWAYSBREAK(startSpan.fToAngleIndex == oStartSpan.fToAngleIndex);
+    SK_ALWAYSBREAK(startSpan.fFromAngleIndex == oStartSpan.fFromAngleIndex);
     const SkOpSpan& endSpan = fSegment->span(fEnd);
     const SkOpSpan& oEndSpan = fSegment->span(compare->fEnd);
-    SK_DEBUGBREAK(endSpan.fToAngleIndex == oEndSpan.fToAngleIndex);
-    SK_DEBUGBREAK(endSpan.fFromAngleIndex == oEndSpan.fFromAngleIndex);
+    SK_ALWAYSBREAK(endSpan.fToAngleIndex == oEndSpan.fToAngleIndex);
+    SK_ALWAYSBREAK(endSpan.fFromAngleIndex == oEndSpan.fFromAngleIndex);
 }
 #endif
 
@@ -189,13 +189,13 @@ void SkOpAngle::debugValidateNext() const {
     const SkOpAngle* next = first;
     SkTDArray<const SkOpAngle*>(angles);
     do {
-        SK_DEBUGBREAK(next->fSegment->debugContains(next));
+        SK_ALWAYSBREAK(next->fSegment->debugContains(next));
         angles.push(next);
         next = next->next();
         if (next == first) {
             break;
         }
-        SK_DEBUGBREAK(!angles.contains(next));
+        SK_ALWAYSBREAK(!angles.contains(next));
         if (!next) {
             return;
         }
@@ -205,7 +205,7 @@ void SkOpAngle::debugValidateNext() const {
 void SkOpAngle::debugValidateLoop() const {
     const SkOpAngle* first = this;
     const SkOpAngle* next = first;
-    SK_DEBUGBREAK(first->next() != first);
+    SK_ALWAYSBREAK(first->next() != first);
     int signSum = 0;
     int oppSum = 0;
     bool firstOperand = fSegment->operand();
@@ -218,12 +218,12 @@ void SkOpAngle::debugValidateLoop() const {
         oppSum += operandsMatch ? segment->oppSign(next) : segment->spanSign(next);
         const SkOpSpan& span = segment->span(SkMin32(next->fStart, next->fEnd));
         if (segment->_xor()) {
-//            SK_DEBUGBREAK(span.fWindValue == 1);
-//            SK_DEBUGBREAK(span.fWindSum == SK_MinS32 || span.fWindSum == 1);
+//            SK_ALWAYSBREAK(span.fWindValue == 1);
+//            SK_ALWAYSBREAK(span.fWindSum == SK_MinS32 || span.fWindSum == 1);
         }
         if (segment->oppXor()) {
-            SK_DEBUGBREAK(span.fOppValue == 0 || abs(span.fOppValue) == 1);
-//            SK_DEBUGBREAK(span.fOppSum == SK_MinS32 || span.fOppSum == 0 || abs(span.fOppSum) == 1);
+            SK_ALWAYSBREAK(span.fOppValue == 0 || abs(span.fOppValue) == 1);
+//            SK_ALWAYSBREAK(span.fOppSum == SK_MinS32 || span.fOppSum == 0 || abs(span.fOppSum) == 1);
         }
         next = next->next();
         if (!next) {
@@ -233,8 +233,8 @@ void SkOpAngle::debugValidateLoop() const {
     if (unorderable) {
         return;
     }
-    SK_DEBUGBREAK(!signSum || fSegment->_xor());
-    SK_DEBUGBREAK(!oppSum || fSegment->oppXor());
+    SK_ALWAYSBREAK(!signSum || fSegment->_xor());
+    SK_ALWAYSBREAK(!oppSum || fSegment->oppXor());
     int lastWinding;
     int lastOppWinding;
     int winding;
@@ -244,16 +244,16 @@ void SkOpAngle::debugValidateLoop() const {
         const SkOpSpan& span = segment->span(SkMin32(next->fStart, next->fEnd));
         winding = span.fWindSum;
         if (winding != SK_MinS32) {
-//            SK_DEBUGBREAK(winding != 0);
-            SK_DEBUGBREAK(SkPathOpsDebug::ValidWind(winding));
+//            SK_ALWAYSBREAK(winding != 0);
+            SK_ALWAYSBREAK(SkPathOpsDebug::ValidWind(winding));
             lastWinding = winding;
             int diffWinding = segment->spanSign(next);
             if (!segment->_xor()) {
-                SK_DEBUGBREAK(diffWinding != 0);
+                SK_ALWAYSBREAK(diffWinding != 0);
                 bool sameSign = (winding > 0) == (diffWinding > 0);
                 winding -= sameSign ? diffWinding : -diffWinding;
-                SK_DEBUGBREAK(SkPathOpsDebug::ValidWind(winding));
-                SK_DEBUGBREAK(abs(winding) <= abs(lastWinding));
+                SK_ALWAYSBREAK(SkPathOpsDebug::ValidWind(winding));
+                SK_ALWAYSBREAK(abs(winding) <= abs(lastWinding));
                 if (!sameSign) {
                     SkTSwap(winding, lastWinding);
                 }
@@ -261,12 +261,12 @@ void SkOpAngle::debugValidateLoop() const {
             lastOppWinding = oppWinding = span.fOppSum;
             if (oppWinding != SK_MinS32 && !segment->oppXor()) {
                 int oppDiffWinding = segment->oppSign(next);
-//                SK_DEBUGBREAK(abs(oppDiffWinding) <= abs(diffWinding) || segment->_xor());
+//                SK_ALWAYSBREAK(abs(oppDiffWinding) <= abs(diffWinding) || segment->_xor());
                 if (oppDiffWinding) {
                     bool oppSameSign = (oppWinding > 0) == (oppDiffWinding > 0);
                     oppWinding -= oppSameSign ? oppDiffWinding : -oppDiffWinding;
-                    SK_DEBUGBREAK(SkPathOpsDebug::ValidWind(oppWinding));
-                    SK_DEBUGBREAK(abs(oppWinding) <= abs(lastOppWinding));
+                    SK_ALWAYSBREAK(SkPathOpsDebug::ValidWind(oppWinding));
+                    SK_ALWAYSBREAK(abs(oppWinding) <= abs(lastOppWinding));
                     if (!oppSameSign) {
                         SkTSwap(oppWinding, lastOppWinding);
                     }
@@ -275,13 +275,13 @@ void SkOpAngle::debugValidateLoop() const {
             firstOperand = segment->operand();
             break;
         }
-        SK_DEBUGBREAK(span.fOppSum == SK_MinS32);
+        SK_ALWAYSBREAK(span.fOppSum == SK_MinS32);
         next = next->next();
     } while (next != first);
     if (winding == SK_MinS32) {
         return;
     }
-    SK_DEBUGBREAK(oppWinding == SK_MinS32 || SkPathOpsDebug::ValidWind(oppWinding));
+    SK_ALWAYSBREAK(oppWinding == SK_MinS32 || SkPathOpsDebug::ValidWind(oppWinding));
     first = next;
     next = next->next();
     do {
@@ -292,27 +292,27 @@ void SkOpAngle::debugValidateLoop() const {
         if (operandsMatch) {
             if (!segment->_xor()) {
                 winding -= segment->spanSign(next);
-                SK_DEBUGBREAK(winding != lastWinding);
-                SK_DEBUGBREAK(SkPathOpsDebug::ValidWind(winding));
+                SK_ALWAYSBREAK(winding != lastWinding);
+                SK_ALWAYSBREAK(SkPathOpsDebug::ValidWind(winding));
             }
             if (!segment->oppXor()) {
                 int oppDiffWinding = segment->oppSign(next);
                 if (oppWinding != SK_MinS32) {
                     oppWinding -= oppDiffWinding;
-                    SK_DEBUGBREAK(SkPathOpsDebug::ValidWind(oppWinding));
+                    SK_ALWAYSBREAK(SkPathOpsDebug::ValidWind(oppWinding));
                 } else {
-                    SK_DEBUGBREAK(oppDiffWinding == 0);
+                    SK_ALWAYSBREAK(oppDiffWinding == 0);
                 }
             }
         } else {
             if (!segment->oppXor()) {
                 winding -= segment->oppSign(next);
-                SK_DEBUGBREAK(SkPathOpsDebug::ValidWind(winding));
+                SK_ALWAYSBREAK(SkPathOpsDebug::ValidWind(winding));
             }
             if (!segment->_xor()) {
                 oppWinding -= segment->spanSign(next);
-                SK_DEBUGBREAK(oppWinding != lastOppWinding);
-                SK_DEBUGBREAK(SkPathOpsDebug::ValidWind(oppWinding));
+                SK_ALWAYSBREAK(oppWinding != lastOppWinding);
+                SK_ALWAYSBREAK(SkPathOpsDebug::ValidWind(oppWinding));
             }
         }
         bool useInner = SkOpSegment::UseInnerWinding(lastWinding, winding);
@@ -333,12 +333,12 @@ void SkOpAngle::debugValidateLoop() const {
         }
         if (oppWinding != SK_MinS32) {
             if (span.fOppSum != SK_MinS32) {
-                SK_DEBUGBREAK(span.fOppSum == oppSumWinding || segment->oppXor() || segment->_xor());
+                SK_ALWAYSBREAK(span.fOppSum == oppSumWinding || segment->oppXor() || segment->_xor());
             }
         } else {
-            SK_DEBUGBREAK(!firstOperand);
-            SK_DEBUGBREAK(!segment->operand());
-            SK_DEBUGBREAK(!span.fOppValue);
+            SK_ALWAYSBREAK(!firstOperand);
+            SK_ALWAYSBREAK(!segment->operand());
+            SK_ALWAYSBREAK(!span.fOppValue);
         }
         next = next->next();
     } while (next != first);
@@ -356,14 +356,14 @@ bool SkOpSegment::controlsContainedByEnds(int tStart, int tEnd) const {
 #endif
 
 #if DEBUG_CONCIDENT
-// SK_DEBUGBREAK if pair has not already been added
+// SK_ALWAYSBREAK if pair has not already been added
 void SkOpSegment::debugAddTPair(double t, const SkOpSegment& other, double otherT) const {
     for (int i = 0; i < fTs.count(); ++i) {
         if (fTs[i].fT == t && fTs[i].fOther == &other && fTs[i].fOtherT == otherT) {
             return;
         }
     }
-    SK_DEBUGBREAK(0);
+    SK_ALWAYSBREAK(0);
 }
 #endif
 
@@ -372,7 +372,7 @@ void SkOpSegment::debugCheckPointsEqualish(int tStart, int tEnd) const {
     const SkPoint& basePt = fTs[tStart].fPt;
     while (++tStart < tEnd) {
        const SkPoint& cmpPt = fTs[tStart].fPt;
-       SK_DEBUGBREAK(SkDPoint::ApproximatelyEqual(basePt, cmpPt));
+       SK_ALWAYSBREAK(SkDPoint::ApproximatelyEqual(basePt, cmpPt));
     }
 }
 #endif
@@ -461,7 +461,7 @@ void SkOpSegment::debugShowActiveSpans() const {
         if (fTs[i].fDone) {
             continue;
         }
-        SK_DEBUGBREAK(i < fTs.count() - 1);
+        SK_ALWAYSBREAK(i < fTs.count() - 1);
 #if DEBUG_ACTIVE_SPANS_SHORT_FORM
         if (lastId == fID && lastT == fTs[i].fT) {
             continue;
@@ -502,7 +502,7 @@ void SkOpSegment::debugShowNewWinding(const char* fun, const SkOpSpan& span, int
     for (int vIndex = 1; vIndex <= SkPathOpsVerbToPoints(fVerb); ++vIndex) {
         SkDebugf(" %1.9g,%1.9g", fPts[vIndex].fX, fPts[vIndex].fY);
     }
-    SK_DEBUGBREAK(&span == &span.fOther->fTs[span.fOtherIndex].fOther->
+    SK_ALWAYSBREAK(&span == &span.fOther->fTs[span.fOtherIndex].fOther->
             fTs[span.fOther->fTs[span.fOtherIndex].fOtherIndex]);
     SkDebugf(") t=%1.9g [%d] (%1.9g,%1.9g) tEnd=%1.9g newWindSum=%d windSum=",
             span.fT, span.fOther->fTs[span.fOtherIndex].fOtherIndex, pt.fX, pt.fY,
@@ -523,7 +523,7 @@ void SkOpSegment::debugShowNewWinding(const char* fun, const SkOpSpan& span, int
     for (int vIndex = 1; vIndex <= SkPathOpsVerbToPoints(fVerb); ++vIndex) {
         SkDebugf(" %1.9g,%1.9g", fPts[vIndex].fX, fPts[vIndex].fY);
     }
-    SK_DEBUGBREAK(&span == &span.fOther->fTs[span.fOtherIndex].fOther->
+    SK_ALWAYSBREAK(&span == &span.fOther->fTs[span.fOtherIndex].fOther->
             fTs[span.fOther->fTs[span.fOtherIndex].fOtherIndex]);
     SkDebugf(") t=%1.9g [%d] (%1.9g,%1.9g) tEnd=%1.9g newWindSum=%d newOppSum=%d oppSum=",
             span.fT, span.fOther->fTs[span.fOtherIndex].fOtherIndex, pt.fX, pt.fY,
@@ -569,9 +569,9 @@ int SkOpSegment::debugShowWindingValues(int slotCount, int ofInterest) const {
 void SkOpSegment::debugValidate() const {
 #if DEBUG_VALIDATE
     int count = fTs.count();
-    SK_DEBUGBREAK(count >= 2);
-    SK_DEBUGBREAK(fTs[0].fT == 0);
-    SK_DEBUGBREAK(fTs[count - 1].fT == 1);
+    SK_ALWAYSBREAK(count >= 2);
+    SK_ALWAYSBREAK(fTs[0].fT == 0);
+    SK_ALWAYSBREAK(fTs[count - 1].fT == 1);
     int done = 0;
     double t = -1;
     const SkOpSpan* last = NULL;
@@ -579,33 +579,33 @@ void SkOpSegment::debugValidate() const {
     bool hasLoop = false;
     for (int i = 0; i < count; ++i) {
         const SkOpSpan& span = fTs[i];
-        SK_DEBUGBREAK(t <= span.fT);
+        SK_ALWAYSBREAK(t <= span.fT);
         t = span.fT;
         int otherIndex = span.fOtherIndex;
         const SkOpSegment* other = span.fOther;
-        SK_DEBUGBREAK(other != this || fVerb == SkPath::kCubic_Verb);
+        SK_ALWAYSBREAK(other != this || fVerb == SkPath::kCubic_Verb);
         const SkOpSpan& otherSpan = other->fTs[otherIndex];
-        SK_DEBUGBREAK(otherSpan.fPt == span.fPt);
-        SK_DEBUGBREAK(otherSpan.fOtherT == t);
-        SK_DEBUGBREAK(&fTs[i] == &otherSpan.fOther->fTs[otherSpan.fOtherIndex]);
+        SK_ALWAYSBREAK(otherSpan.fPt == span.fPt);
+        SK_ALWAYSBREAK(otherSpan.fOtherT == t);
+        SK_ALWAYSBREAK(&fTs[i] == &otherSpan.fOther->fTs[otherSpan.fOtherIndex]);
         done += span.fDone;
         if (last) {
             bool tsEqual = last->fT == span.fT;
             bool tsPreciselyEqual = precisely_equal(last->fT, span.fT);
-            SK_DEBUGBREAK(!tsEqual || tsPreciselyEqual);
+            SK_ALWAYSBREAK(!tsEqual || tsPreciselyEqual);
             bool pointsEqual = last->fPt == span.fPt;
             bool pointsNearlyEqual = AlmostEqualUlps(last->fPt, span.fPt);
 #if 0  // bufferOverflow test triggers this
-            SK_DEBUGBREAK(!tsPreciselyEqual || pointsNearlyEqual);
+            SK_ALWAYSBREAK(!tsPreciselyEqual || pointsNearlyEqual);
 #endif
-//            SK_DEBUGBREAK(!last->fTiny || !tsPreciselyEqual || span.fTiny || tinyTFound);
-            SK_DEBUGBREAK(last->fTiny || tsPreciselyEqual || !pointsEqual || hasLoop);
-            SK_DEBUGBREAK(!last->fTiny || pointsEqual);
-            SK_DEBUGBREAK(!last->fTiny || last->fDone);
-            SK_DEBUGBREAK(!last->fSmall || pointsNearlyEqual);
-            SK_DEBUGBREAK(!last->fSmall || last->fDone);
-//            SK_DEBUGBREAK(!last->fSmall || last->fTiny);
-//            SK_DEBUGBREAK(last->fTiny || !pointsEqual || last->fDone == span.fDone);
+//            SK_ALWAYSBREAK(!last->fTiny || !tsPreciselyEqual || span.fTiny || tinyTFound);
+            SK_ALWAYSBREAK(last->fTiny || tsPreciselyEqual || !pointsEqual || hasLoop);
+            SK_ALWAYSBREAK(!last->fTiny || pointsEqual);
+            SK_ALWAYSBREAK(!last->fTiny || last->fDone);
+            SK_ALWAYSBREAK(!last->fSmall || pointsNearlyEqual);
+            SK_ALWAYSBREAK(!last->fSmall || last->fDone);
+//            SK_ALWAYSBREAK(!last->fSmall || last->fTiny);
+//            SK_ALWAYSBREAK(last->fTiny || !pointsEqual || last->fDone == span.fDone);
             if (last->fTiny) {
                 tinyTFound |= !tsPreciselyEqual;
             } else {
@@ -615,7 +615,7 @@ void SkOpSegment::debugValidate() const {
         last = &span;
         hasLoop |= last->fLoop;
     }
-    SK_DEBUGBREAK(done == fDoneSpans);
+    SK_ALWAYSBREAK(done == fDoneSpans);
     if (fAngles.count() ) {
         fAngles.begin()->debugValidateLoop();
     }
diff --git a/ports/SkFontConfigInterface_direct.cpp b/ports/SkFontConfigInterface_direct.cpp
index 13993f10..80ee56e8 100644
--- a/ports/SkFontConfigInterface_direct.cpp
+++ b/ports/SkFontConfigInterface_direct.cpp
@@ -15,6 +15,7 @@
 
 #include "SkBuffer.h"
 #include "SkFontConfigInterface.h"
+#include "SkOnce.h"
 #include "SkStream.h"
 
 size_t SkFontConfigInterface::FontIdentity::writeToMemory(void* addr) const {
@@ -123,16 +124,13 @@ private:
     SkMutex mutex_;
 };
 
+static void create_singleton_direct_interface(SkFontConfigInterface** singleton) {
+    *singleton = new SkFontConfigInterfaceDirect;
+}
 SkFontConfigInterface* SkFontConfigInterface::GetSingletonDirectInterface() {
     static SkFontConfigInterface* gDirect;
-    if (NULL == gDirect) {
-        static SkMutex gMutex;
-        SkAutoMutexAcquire ac(gMutex);
-
-        if (NULL == gDirect) {
-            gDirect = new SkFontConfigInterfaceDirect;
-        }
-    }
+    SK_DECLARE_STATIC_ONCE(once);
+    SkOnce(&once, create_singleton_direct_interface, &gDirect);
     return gDirect;
 }
 
diff --git a/ports/SkFontHost_win_dw.cpp b/ports/SkFontHost_win_dw.cpp
index 36ed4d4f..cd32fdbf 100644
--- a/ports/SkFontHost_win_dw.cpp
+++ b/ports/SkFontHost_win_dw.cpp
@@ -22,7 +22,10 @@
 #include "SkGlyph.h"
 #include "SkHRESULT.h"
 #include "SkMaskGamma.h"
+#include "SkMatrix22.h"
 #include "SkOnce.h"
+#include "SkOTTable_EBLC.h"
+#include "SkOTTable_EBSC.h"
 #include "SkOTTable_head.h"
 #include "SkOTTable_hhea.h"
 #include "SkOTTable_OS_2.h"
@@ -449,7 +452,22 @@ private:
     const void* drawDWMask(const SkGlyph& glyph);
 
     SkTDArray<uint8_t> fBits;
+    /** The total matrix without the text height scale. */
+    SkMatrix fSkXform;
+    /** The total matrix without the text height scale. */
     DWRITE_MATRIX fXform;
+    /** The non-rotational part of total matrix without the text height scale.
+     *  This is used to find the magnitude of gdi compatible advances.
+     */
+    DWRITE_MATRIX fGsA;
+    /** The inverse of the rotational part of the total matrix.
+     *  This is used to find the direction of gdi compatible advances.
+     */
+    SkMatrix fG_inv;
+    /** The text size to render with. */
+    SkScalar fTextSizeRender;
+    /** The text size to measure with. */
+    SkScalar fTextSizeMeasure;
     SkAutoTUnref<DWriteFontTypeface> fTypeface;
     int fGlyphCount;
     DWRITE_RENDERING_MODE fRenderingMode;
@@ -570,32 +588,224 @@ static bool FindByDWriteFont(SkTypeface* face, SkTypeface::Style requestedStyle,
            wcscmp(dwFaceFontNameChar.get(), dwFontNameChar.get()) == 0;
 }
 
+class AutoDWriteTable {
+public:
+    AutoDWriteTable(IDWriteFontFace* fontFace, UINT32 beTag) : fFontFace(fontFace), fExists(FALSE) {
+        // Any errors are ignored, user must check fExists anyway.
+        fontFace->TryGetFontTable(beTag,
+            reinterpret_cast<const void **>(&fData), &fSize, &fLock, &fExists);
+    }
+    ~AutoDWriteTable() {
+        if (fExists) {
+            fFontFace->ReleaseFontTable(fLock);
+        }
+    }
+
+    const uint8_t* fData;
+    UINT32 fSize;
+    BOOL fExists;
+private:
+    // Borrowed reference, the user must ensure the fontFace stays alive.
+    IDWriteFontFace* fFontFace;
+    void* fLock;
+};
+template<typename T> class AutoTDWriteTable : public AutoDWriteTable {
+public:
+    static const UINT32 tag = DWRITE_MAKE_OPENTYPE_TAG(T::TAG0, T::TAG1, T::TAG2, T::TAG3);
+    AutoTDWriteTable(IDWriteFontFace* fontFace) : AutoDWriteTable(fontFace, tag) { }
+
+    const T* get() const { return reinterpret_cast<const T*>(fData); }
+    const T* operator->() const { return reinterpret_cast<const T*>(fData); }
+};
+
+static bool hasBitmapStrike(DWriteFontTypeface* typeface, int size) {
+    {
+        AutoTDWriteTable<SkOTTableEmbeddedBitmapLocation> eblc(typeface->fDWriteFontFace.get());
+        if (!eblc.fExists) {
+            return false;
+        }
+        if (eblc.fSize < sizeof(SkOTTableEmbeddedBitmapLocation)) {
+            return false;
+        }
+        if (eblc->version != SkOTTableEmbeddedBitmapLocation::version_initial) {
+            return false;
+        }
+
+        uint32_t numSizes = SkEndianSwap32(eblc->numSizes);
+        if (eblc.fSize < sizeof(SkOTTableEmbeddedBitmapLocation) +
+                         sizeof(SkOTTableEmbeddedBitmapLocation::BitmapSizeTable) * numSizes)
+        {
+            return false;
+        }
+
+        const SkOTTableEmbeddedBitmapLocation::BitmapSizeTable* sizeTable =
+                SkTAfter<const SkOTTableEmbeddedBitmapLocation::BitmapSizeTable>(eblc.get());
+        for (uint32_t i = 0; i < numSizes; ++i, ++sizeTable) {
+            if (sizeTable->ppemX == size && sizeTable->ppemY == size) {
+                // TODO: determine if we should dig through IndexSubTableArray/IndexSubTable
+                // to determine the actual number of glyphs with bitmaps.
+
+                // TODO: Ensure that the bitmaps actually cover a significant portion of the strike.
+
+                //TODO: Endure that the bitmaps are bi-level.
+                if (sizeTable->endGlyphIndex >= sizeTable->startGlyphIndex + 3) {
+                    return true;
+                }
+            }
+        }
+    }
+
+    {
+        AutoTDWriteTable<SkOTTableEmbeddedBitmapScaling> ebsc(typeface->fDWriteFontFace.get());
+        if (!ebsc.fExists) {
+            return false;
+        }
+        if (ebsc.fSize < sizeof(SkOTTableEmbeddedBitmapScaling)) {
+            return false;
+        }
+        if (ebsc->version != SkOTTableEmbeddedBitmapScaling::version_initial) {
+            return false;
+        }
+
+        uint32_t numSizes = SkEndianSwap32(ebsc->numSizes);
+        if (ebsc.fSize < sizeof(SkOTTableEmbeddedBitmapScaling) +
+                         sizeof(SkOTTableEmbeddedBitmapScaling::BitmapScaleTable) * numSizes)
+        {
+            return false;
+        }
+
+        const SkOTTableEmbeddedBitmapScaling::BitmapScaleTable* scaleTable =
+                SkTAfter<const SkOTTableEmbeddedBitmapScaling::BitmapScaleTable>(ebsc.get());
+        for (uint32_t i = 0; i < numSizes; ++i, ++scaleTable) {
+            if (scaleTable->ppemX == size && scaleTable->ppemY == size) {
+                // EBSC tables are normally only found in bitmap only fonts.
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
+static bool bothZero(SkScalar a, SkScalar b) {
+    return 0 == a && 0 == b;
+}
+
+// returns false if there is any non-90-rotation or skew
+static bool isAxisAligned(const SkScalerContext::Rec& rec) {
+    return 0 == rec.fPreSkewX &&
+           (bothZero(rec.fPost2x2[0][1], rec.fPost2x2[1][0]) ||
+            bothZero(rec.fPost2x2[0][0], rec.fPost2x2[1][1]));
+}
+
 SkScalerContext_DW::SkScalerContext_DW(DWriteFontTypeface* typeface,
                                        const SkDescriptor* desc)
         : SkScalerContext(typeface, desc)
         , fTypeface(SkRef(typeface))
         , fGlyphCount(-1) {
 
-    fXform.m11 = SkScalarToFloat(fRec.fPost2x2[0][0]);
-    fXform.m12 = SkScalarToFloat(fRec.fPost2x2[1][0]);
-    fXform.m21 = SkScalarToFloat(fRec.fPost2x2[0][1]);
-    fXform.m22 = SkScalarToFloat(fRec.fPost2x2[1][1]);
-    fXform.dx = 0;
-    fXform.dy = 0;
-
-    if (SkMask::kBW_Format == fRec.fMaskFormat) {
+    // In general, all glyphs should use CLEARTYPE_NATURAL_SYMMETRIC
+    // except when bi-level rendering is requested or there are embedded
+    // bi-level bitmaps (and the embedded bitmap flag is set and no rotation).
+    //
+    // DirectWrite's IDWriteFontFace::GetRecommendedRenderingMode does not do
+    // this. As a result, determine the actual size of the text and then see if
+    // there are any embedded bi-level bitmaps of that size. If there are, then
+    // force bitmaps by requesting bi-level rendering.
+    //
+    // FreeType allows for separate ppemX and ppemY, but DirectWrite assumes
+    // square pixels and only uses ppemY. Therefore the transform must track any
+    // non-uniform x-scale.
+    //
+    // Also, rotated glyphs should have the same absolute advance widths as
+    // horizontal glyphs and the subpixel flag should not affect glyph shapes.
+
+    // A is the total matrix.
+    SkMatrix A;
+    fRec.getSingleMatrix(&A);
+
+    // h is where A maps the horizontal baseline.
+    SkPoint h = SkPoint::Make(SK_Scalar1, 0);
+    A.mapPoints(&h, 1);
+
+    // G is the Givens Matrix for A (rotational matrix where GA[0][1] == 0).
+    SkMatrix G;
+    SkComputeGivensRotation(h, &G);
+
+    // GA is the matrix A with rotation removed.
+    SkMatrix GA(G);
+    GA.preConcat(A);
+
+    // realTextSize is the actual device size we want (as opposed to the size the user requested).
+    // gdiTextSize is the size we request when GDI compatible.
+    // If the scale is negative, this means the matrix will do the flip anyway.
+    SkScalar realTextSize = SkScalarAbs(GA.get(SkMatrix::kMScaleY));
+    // Due to floating point math, the lower bits are suspect. Round carefully.
+    SkScalar roundedTextSize = SkScalarRoundToScalar(realTextSize * 64.0f) / 64.0f;
+    SkScalar gdiTextSize = SkScalarFloorToScalar(roundedTextSize);
+    if (gdiTextSize == 0) {
+        gdiTextSize = SK_Scalar1;
+    }
+
+    bool hasBitmap = fRec.fFlags & SkScalerContext::kEmbeddedBitmapText_Flag &&
+                     hasBitmapStrike(typeface, SkScalarTruncToInt(gdiTextSize));
+    bool axisAligned = isAxisAligned(fRec);
+    bool isBiLevel = SkMask::kBW_Format == fRec.fMaskFormat || (hasBitmap && axisAligned);
+
+    if (isBiLevel) {
+        fTextSizeRender = gdiTextSize;
         fRenderingMode = DWRITE_RENDERING_MODE_ALIASED;
         fTextureType = DWRITE_TEXTURE_ALIASED_1x1;
+        fTextSizeMeasure = gdiTextSize;
+        fMeasuringMode = DWRITE_MEASURING_MODE_GDI_CLASSIC;
+    } else if (hasBitmap) {
+        // If rotated but the horizontal text would have used a bitmap,
+        // render high quality rotated glyphs using the bitmap metrics.
+        fTextSizeRender = gdiTextSize;
+        fRenderingMode = DWRITE_RENDERING_MODE_CLEARTYPE_NATURAL_SYMMETRIC;
+        fTextureType = DWRITE_TEXTURE_CLEARTYPE_3x1;
+        fTextSizeMeasure = gdiTextSize;
         fMeasuringMode = DWRITE_MEASURING_MODE_GDI_CLASSIC;
     } else {
+        fTextSizeRender = realTextSize;
         fRenderingMode = DWRITE_RENDERING_MODE_CLEARTYPE_NATURAL_SYMMETRIC;
         fTextureType = DWRITE_TEXTURE_CLEARTYPE_3x1;
+        fTextSizeMeasure = realTextSize;
         fMeasuringMode = DWRITE_MEASURING_MODE_NATURAL;
     }
 
     if (this->isSubpixel()) {
+        fTextSizeMeasure = realTextSize;
         fMeasuringMode = DWRITE_MEASURING_MODE_NATURAL;
     }
+
+    // Remove the realTextSize, as that is the text height scale currently in A.
+    SkScalar scale = SkScalarInvert(realTextSize);
+
+    // fSkXform is the total matrix A without the text height scale.
+    fSkXform = A;
+    fSkXform.preScale(scale, scale); //remove the text height scale.
+
+    fXform.m11 = SkScalarToFloat(fSkXform.getScaleX());
+    fXform.m12 = SkScalarToFloat(fSkXform.getSkewY());
+    fXform.m21 = SkScalarToFloat(fSkXform.getSkewX());
+    fXform.m22 = SkScalarToFloat(fSkXform.getScaleY());
+    fXform.dx = 0;
+    fXform.dy = 0;
+
+    // GsA is the non-rotational part of A without the text height scale.
+    SkMatrix GsA(GA);
+    GsA.preScale(scale, scale); //remove text height scale, G is rotational so reorders with scale.
+
+    fGsA.m11 = SkScalarToFloat(GsA.get(SkMatrix::kMScaleX));
+    fGsA.m12 = SkScalarToFloat(GsA.get(SkMatrix::kMSkewY)); // This should be ~0.
+    fGsA.m21 = SkScalarToFloat(GsA.get(SkMatrix::kMSkewX));
+    fGsA.m22 = SkScalarToFloat(GsA.get(SkMatrix::kMScaleY));
+
+    // fG_inv is G inverse, which is fairly simple since G is 2x2 rotational.
+    fG_inv.setAll(G.get(SkMatrix::kMScaleX), -G.get(SkMatrix::kMSkewX), G.get(SkMatrix::kMTransX),
+                  -G.get(SkMatrix::kMSkewY), G.get(SkMatrix::kMScaleY), G.get(SkMatrix::kMTransY),
+                  G.get(SkMatrix::kMPersp0), G.get(SkMatrix::kMPersp1), G.get(SkMatrix::kMPersp2));
 }
 
 SkScalerContext_DW::~SkScalerContext_DW() {
@@ -631,9 +841,9 @@ void SkScalerContext_DW::generateAdvance(SkGlyph* glyph) {
         DWRITE_MEASURING_MODE_GDI_NATURAL == fMeasuringMode)
     {
         HRVM(fTypeface->fDWriteFontFace->GetGdiCompatibleGlyphMetrics(
-                 fRec.fTextSize,
+                 fTextSizeMeasure,
                  1.0f, // pixelsPerDip
-                 &fXform,
+                 &fGsA,
                  DWRITE_MEASURING_MODE_GDI_NATURAL == fMeasuringMode,
                  &glyphId, 1,
                  &gm),
@@ -645,7 +855,7 @@ void SkScalerContext_DW::generateAdvance(SkGlyph* glyph) {
 
     DWRITE_FONT_METRICS dwfm;
     fTypeface->fDWriteFontFace->GetMetrics(&dwfm);
-    SkScalar advanceX = SkScalarMulDiv(fRec.fTextSize,
+    SkScalar advanceX = SkScalarMulDiv(fTextSizeMeasure,
                                        SkIntToScalar(gm.advanceWidth),
                                        SkIntToScalar(dwfm.designUnitsPerEm));
 
@@ -654,9 +864,13 @@ void SkScalerContext_DW::generateAdvance(SkGlyph* glyph) {
     }
 
     SkVector vecs[1] = { { advanceX, 0 } };
-    SkMatrix mat;
-    fRec.getMatrixFrom2x2(&mat);
-    mat.mapVectors(vecs, SK_ARRAY_COUNT(vecs));
+    if (DWRITE_MEASURING_MODE_GDI_CLASSIC == fMeasuringMode ||
+        DWRITE_MEASURING_MODE_GDI_NATURAL == fMeasuringMode)
+    {
+        fG_inv.mapVectors(vecs, SK_ARRAY_COUNT(vecs));
+    } else {
+        fSkXform.mapVectors(vecs, SK_ARRAY_COUNT(vecs));
+    }
 
     glyph->fAdvanceX = SkScalarToFixed(vecs[0].fX);
     glyph->fAdvanceY = SkScalarToFixed(vecs[0].fY);
@@ -683,7 +897,7 @@ void SkScalerContext_DW::generateMetrics(SkGlyph* glyph) {
     run.glyphCount = 1;
     run.glyphAdvances = &advance;
     run.fontFace = fTypeface->fDWriteFontFace.get();
-    run.fontEmSize = SkScalarToFloat(fRec.fTextSize);
+    run.fontEmSize = SkScalarToFloat(fTextSizeRender);
     run.bidiLevel = 0;
     run.glyphIndices = &glyphId;
     run.isSideways = FALSE;
@@ -728,7 +942,7 @@ void SkScalerContext_DW::generateFontMetrics(SkPaint::FontMetrics* mx,
         DWRITE_MEASURING_MODE_GDI_NATURAL == fMeasuringMode)
     {
         fTypeface->fDWriteFontFace->GetGdiCompatibleMetrics(
-             fRec.fTextSize,
+             fTextSizeRender,
              1.0f, // pixelsPerDip
              &fXform,
              &dwfm);
@@ -738,28 +952,28 @@ void SkScalerContext_DW::generateFontMetrics(SkPaint::FontMetrics* mx,
 
     SkScalar upem = SkIntToScalar(dwfm.designUnitsPerEm);
     if (mx) {
-        mx->fTop = -fRec.fTextSize * SkIntToScalar(dwfm.ascent) / upem;
+        mx->fTop = -fTextSizeRender * SkIntToScalar(dwfm.ascent) / upem;
         mx->fAscent = mx->fTop;
-        mx->fDescent = fRec.fTextSize * SkIntToScalar(dwfm.descent) / upem;
+        mx->fDescent = fTextSizeRender * SkIntToScalar(dwfm.descent) / upem;
         mx->fBottom = mx->fDescent;
-        mx->fLeading = fRec.fTextSize * SkIntToScalar(dwfm.lineGap) / upem;
-        mx->fXHeight = fRec.fTextSize * SkIntToScalar(dwfm.xHeight) / upem;
-        mx->fUnderlineThickness = fRec.fTextSize * SkIntToScalar(dwfm.underlinePosition) / upem;
-        mx->fUnderlinePosition = -(fRec.fTextSize * SkIntToScalar(dwfm.underlineThickness) / upem);
+        mx->fLeading = fTextSizeRender * SkIntToScalar(dwfm.lineGap) / upem;
+        mx->fXHeight = fTextSizeRender * SkIntToScalar(dwfm.xHeight) / upem;
+        mx->fUnderlineThickness = fTextSizeRender * SkIntToScalar(dwfm.underlinePosition) / upem;
+        mx->fUnderlinePosition = -(fTextSizeRender * SkIntToScalar(dwfm.underlineThickness) / upem);
 
         mx->fFlags |= SkPaint::FontMetrics::kUnderlineThinknessIsValid_Flag;
         mx->fFlags |= SkPaint::FontMetrics::kUnderlinePositionIsValid_Flag;
     }
 
     if (my) {
-        my->fTop = -fRec.fTextSize * SkIntToScalar(dwfm.ascent) / upem;
+        my->fTop = -fTextSizeRender * SkIntToScalar(dwfm.ascent) / upem;
         my->fAscent = my->fTop;
-        my->fDescent = fRec.fTextSize * SkIntToScalar(dwfm.descent) / upem;
+        my->fDescent = fTextSizeRender * SkIntToScalar(dwfm.descent) / upem;
         my->fBottom = my->fDescent;
-        my->fLeading = fRec.fTextSize * SkIntToScalar(dwfm.lineGap) / upem;
-        my->fXHeight = fRec.fTextSize * SkIntToScalar(dwfm.xHeight) / upem;
-        my->fUnderlineThickness = fRec.fTextSize * SkIntToScalar(dwfm.underlinePosition) / upem;
-        my->fUnderlinePosition = -(fRec.fTextSize * SkIntToScalar(dwfm.underlineThickness) / upem);
+        my->fLeading = fTextSizeRender * SkIntToScalar(dwfm.lineGap) / upem;
+        my->fXHeight = fTextSizeRender * SkIntToScalar(dwfm.xHeight) / upem;
+        my->fUnderlineThickness = fTextSizeRender * SkIntToScalar(dwfm.underlinePosition) / upem;
+        my->fUnderlinePosition = -(fTextSizeRender * SkIntToScalar(dwfm.underlineThickness) / upem);
 
         my->fFlags |= SkPaint::FontMetrics::kUnderlineThinknessIsValid_Flag;
         my->fFlags |= SkPaint::FontMetrics::kUnderlinePositionIsValid_Flag;
@@ -888,7 +1102,7 @@ const void* SkScalerContext_DW::drawDWMask(const SkGlyph& glyph) {
     run.glyphCount = 1;
     run.glyphAdvances = &advance;
     run.fontFace = fTypeface->fDWriteFontFace.get();
-    run.fontEmSize = SkScalarToFloat(fRec.fTextSize);
+    run.fontEmSize = SkScalarToFloat(fTextSizeRender);
     run.bidiLevel = 0;
     run.glyphIndices = &index;
     run.isSideways = FALSE;
@@ -966,7 +1180,7 @@ void SkScalerContext_DW::generatePath(const SkGlyph& glyph, SkPath* path) {
     uint16_t glyphId = glyph.getGlyphID();
     //TODO: convert to<->from DIUs? This would make a difference if hinting.
     //It may not be needed, it appears that DirectWrite only hints at em size.
-    HRVM(fTypeface->fDWriteFontFace->GetGlyphRunOutline(SkScalarToFloat(fRec.fTextSize),
+    HRVM(fTypeface->fDWriteFontFace->GetGlyphRunOutline(SkScalarToFloat(fTextSizeRender),
                                        &glyphId,
                                        NULL, //advances
                                        NULL, //offsets
@@ -976,9 +1190,7 @@ void SkScalerContext_DW::generatePath(const SkGlyph& glyph, SkPath* path) {
                                        geometryToPath.get()),
          "Could not create glyph outline.");
 
-    SkMatrix mat;
-    fRec.getMatrixFrom2x2(&mat);
-    path->transform(mat);
+    path->transform(fSkXform);
 }
 
 void DWriteFontTypeface::onGetFontDescriptor(SkFontDescriptor* desc,
@@ -1146,28 +1358,6 @@ int DWriteFontTypeface::onGetTableTags(SkFontTableTag tags[]) const {
     return stream.get() ? SkFontStream::GetTableTags(stream, ttcIndex, tags) : 0;
 }
 
-class AutoDWriteTable {
-public:
-    AutoDWriteTable(IDWriteFontFace* fontFace, UINT32 beTag) : fFontFace(fontFace), fExists(FALSE) {
-        // Any errors are ignored, user must check fExists anyway.
-        fontFace->TryGetFontTable(beTag,
-            reinterpret_cast<const void **>(&fData), &fSize, &fLock, &fExists);
-    }
-    ~AutoDWriteTable() {
-        if (fExists) {
-            fFontFace->ReleaseFontTable(fLock);
-        }
-    }
-
-    const uint8_t* fData;
-    UINT32 fSize;
-    BOOL fExists;
-private:
-    // Borrowed reference, the user must ensure the fontFace stays alive.
-    IDWriteFontFace* fFontFace;
-    void* fLock;
-};
-
 size_t DWriteFontTypeface::onGetTableData(SkFontTableTag tag, size_t offset,
                                           size_t length, void* data) const
 {
@@ -1260,7 +1450,6 @@ void DWriteFontTypeface::onFilterRec(SkScalerContext::Rec* rec) const {
 
     unsigned flagsWeDontSupport = SkScalerContext::kDevKernText_Flag |
                                   SkScalerContext::kForceAutohinting_Flag |
-                                  SkScalerContext::kEmbeddedBitmapText_Flag |
                                   SkScalerContext::kEmbolden_Flag |
                                   SkScalerContext::kLCD_BGROrder_Flag |
                                   SkScalerContext::kLCD_Vertical_Flag;
@@ -1362,14 +1551,6 @@ static bool getWidthAdvance(IDWriteFontFace* fontFace, int gId, int16_t* advance
     return true;
 }
 
-template<typename T> class AutoTDWriteTable : public AutoDWriteTable {
-public:
-    static const UINT32 tag = DWRITE_MAKE_OPENTYPE_TAG(T::TAG0, T::TAG1, T::TAG2, T::TAG3);
-    AutoTDWriteTable(IDWriteFontFace* fontFace) : AutoDWriteTable(fontFace, tag) { }
-
-    const T* operator->() const { return reinterpret_cast<const T*>(fData); }
-};
-
 SkAdvancedTypefaceMetrics* DWriteFontTypeface::onGetAdvancedTypefaceMetrics(
         SkAdvancedTypefaceMetrics::PerGlyphInfo perGlyphInfo,
         const uint32_t* glyphIDs,
diff --git a/record/SkRecordOpts.cpp b/record/SkRecordOpts.cpp
index 5b537de0..aaa611cf 100644
--- a/record/SkRecordOpts.cpp
+++ b/record/SkRecordOpts.cpp
@@ -7,10 +7,12 @@
 
 #include "SkRecordOpts.h"
 
-#include "SkRecordTraits.h"
+#include "SkRecordPattern.h"
 #include "SkRecords.h"
 #include "SkTDArray.h"
 
+using namespace SkRecords;
+
 void SkRecordOptimize(SkRecord* record) {
     // TODO(mtklein): fuse independent optimizations to reduce number of passes?
     SkRecordNoopSaveRestores(record);
@@ -19,205 +21,180 @@ void SkRecordOptimize(SkRecord* record) {
     SkRecordBoundDrawPosTextH(record);
 }
 
-namespace {
-
-// Convenience base class to share some common implementation code.
-class Common : SkNoncopyable {
-public:
-    explicit Common(SkRecord* record) : fRecord(record), fIndex(0) {}
-
-    unsigned index() const { return fIndex; }
-    void next() { ++fIndex; }
-
-protected:
-    SkRecord* fRecord;
-    unsigned fIndex;
-};
-
-// Turns logical no-op Save-[non-drawing command]*-Restore patterns into actual no-ops.
-// TODO(mtklein): state machine diagram
-class SaveRestoreNooper : public Common {
-public:
-    explicit SaveRestoreNooper(SkRecord* record)
-        : Common(record), fSave(kInactive), fChanged(false) {}
-
-    // Drawing commands reset state to inactive without nooping.
-    template <typename T>
-    SK_WHEN(SkRecords::IsDraw<T>, void) operator()(T*) { fSave = kInactive; }
+// Most of the optimizations in this file are pattern-based.  These are all defined as structs with:
+//   - a Pattern typedef
+//   - a bool onMatch(SkRceord*, Pattern*, unsigned begin, unsigned end) method,
+//     which returns true if it made changes and false if not.
 
-    // Most non-drawing commands can be ignored.
-    template <typename T>
-    SK_WHEN(!SkRecords::IsDraw<T>, void) operator()(T*) {}
+// Run a pattern-based optimization once across the SkRecord, returning true if it made any changes.
+// It looks for spans which match Pass::Pattern, and when found calls onMatch() with the pattern,
+// record, and [begin,end) span of the commands that matched.
+template <typename Pass>
+static bool apply(Pass* pass, SkRecord* record) {
+    typename Pass::Pattern pattern;
+    bool changed = false;
+    unsigned begin, end = 0;
 
-    void operator()(SkRecords::Save* r) {
-        fSave = SkCanvas::kMatrixClip_SaveFlag == r->flags ? this->index() : kInactive;
+    while (pattern.search(record, &begin, &end)) {
+        changed |= pass->onMatch(record, &pattern, begin, end);
     }
+    return changed;
+}
 
-    void operator()(SkRecords::Restore* r) {
-        if (fSave != kInactive) {
-            // Remove everything between the save and restore, inclusive on both sides.
-            fChanged = true;
-            for (unsigned i = fSave; i <= this->index(); i++) {
-                fRecord->replace<SkRecords::NoOp>(i);
-            }
-            fSave = kInactive;
+// Turns logical no-op Save-[non-drawing command]*-Restore patterns into actual no-ops.
+struct SaveRestoreNooper {
+    // Star matches greedily, so we also have to exclude Save and Restore.
+    typedef Pattern3<Is<Save>,
+                     Star<Not<Or3<Is<Save>,
+                                  Is<Restore>,
+                                  IsDraw> > >,
+                     Is<Restore> >
+        Pattern;
+
+    bool onMatch(SkRecord* record, Pattern* pattern, unsigned begin, unsigned end) {
+        // If restore doesn't revert both matrix and clip, this isn't safe to noop away.
+        if (pattern->first<Save>()->flags != SkCanvas::kMatrixClip_SaveFlag) {
+            return false;
         }
-    }
-
-    bool changed() const { return fChanged; }
-
-private:
-    static const unsigned kInactive = ~0;
-    unsigned fSave;
-    bool fChanged;
-};
-
-// Tries to replace PushCull with PairedPushCull, which lets us skip to the paired PopCull
-// when the canvas can quickReject the cull rect.
-class CullAnnotator : public Common {
-public:
-    explicit CullAnnotator(SkRecord* record) : Common(record) {}
-
-    // Do nothing to most ops.
-    template <typename T> void operator()(T*) {}
-
-    void operator()(SkRecords::PushCull* push) {
-        Pair pair = { this->index(), push };
-        fPushStack.push(pair);
-    }
-
-    void operator()(SkRecords::PopCull* pop) {
-        Pair push = fPushStack.top();
-        fPushStack.pop();
-
-        SkASSERT(this->index() > push.index);
-        unsigned skip = this->index() - push.index;
 
-        SkRecords::Adopted<SkRecords::PushCull> adopted(push.command);
-        SkNEW_PLACEMENT_ARGS(fRecord->replace<SkRecords::PairedPushCull>(push.index, adopted),
-                             SkRecords::PairedPushCull, (&adopted, skip));
+        // The entire span between Save and Restore (inclusively) does nothing.
+        for (unsigned i = begin; i < end; i++) {
+            record->replace<NoOp>(i);
+        }
+        return true;
     }
-
-private:
-    struct Pair {
-        unsigned index;
-        SkRecords::PushCull* command;
-    };
-
-    SkTDArray<Pair> fPushStack;
 };
+void SkRecordNoopSaveRestores(SkRecord* record) {
+    SaveRestoreNooper pass;
+    while (apply(&pass, record));  // Run until it stops changing things.
+}
 
 // Replaces DrawPosText with DrawPosTextH when all Y coordinates are equal.
-class StrengthReducer : public Common {
-public:
-    explicit StrengthReducer(SkRecord* record) : Common(record) {}
+struct StrengthReducer {
+    typedef Pattern1<Is<DrawPosText> > Pattern;
 
-    // Do nothing to most ops.
-    template <typename T> void operator()(T*) {}
+    bool onMatch(SkRecord* record, Pattern* pattern, unsigned begin, unsigned end) {
+        SkASSERT(end == begin + 1);
+        DrawPosText* draw = pattern->first<DrawPosText>();
 
-    void operator()(SkRecords::DrawPosText* r) {
-        const unsigned points = r->paint.countText(r->text, r->byteLength);
+        const unsigned points = draw->paint.countText(draw->text, draw->byteLength);
         if (points == 0) {
-            // No point (ha!).
-            return;
+            return false;  // No point (ha!).
         }
 
-        const SkScalar firstY = r->pos[0].fY;
+        const SkScalar firstY = draw->pos[0].fY;
         for (unsigned i = 1; i < points; i++) {
-            if (r->pos[i].fY != firstY) {
-                // Needs the full strength of DrawPosText.
-                return;
+            if (draw->pos[i].fY != firstY) {
+                return false;  // Needs full power of DrawPosText.
             }
         }
         // All ys are the same.  We can replace DrawPosText with DrawPosTextH.
 
-        // r->pos is points SkPoints, [(x,y),(x,y),(x,y),(x,y), ... ].
+        // draw->pos is points SkPoints, [(x,y),(x,y),(x,y),(x,y), ... ].
         // We're going to squint and look at that as 2*points SkScalars, [x,y,x,y,x,y,x,y, ...].
         // Then we'll rearrange things so all the xs are in order up front, clobbering the ys.
         SK_COMPILE_ASSERT(sizeof(SkPoint) == 2 * sizeof(SkScalar), SquintingIsNotSafe);
-        SkScalar* scalars = &r->pos[0].fX;
+        SkScalar* scalars = &draw->pos[0].fX;
         for (unsigned i = 0; i < 2*points; i += 2) {
             scalars[i/2] = scalars[i];
         }
 
-        // Extend lifetime of r to the end of the method so we can copy its parts.
-        SkRecords::Adopted<SkRecords::DrawPosText> adopted(r);
-        SkNEW_PLACEMENT_ARGS(fRecord->replace<SkRecords::DrawPosTextH>(this->index(), adopted),
-                             SkRecords::DrawPosTextH,
-                             (r->text, r->byteLength, scalars, firstY, r->paint));
+        // Extend lifetime of draw to the end of the loop so we can copy its paint.
+        Adopted<DrawPosText> adopted(draw);
+        SkNEW_PLACEMENT_ARGS(record->replace<DrawPosTextH>(begin, adopted),
+                             DrawPosTextH,
+                             (draw->text, draw->byteLength, scalars, firstY, draw->paint));
+        return true;
     }
 };
+void SkRecordReduceDrawPosTextStrength(SkRecord* record) {
+    StrengthReducer pass;
+    apply(&pass, record);
+}
 
 // Tries to replace DrawPosTextH with BoundedDrawPosTextH, which knows conservative upper and lower
 // bounds to use with SkCanvas::quickRejectY.
-class TextBounder : public Common {
-public:
-    explicit TextBounder(SkRecord* record) : Common(record) {}
+struct TextBounder {
+    typedef Pattern1<Is<DrawPosTextH> > Pattern;
 
-    // Do nothing to most ops.
-    template <typename T> void operator()(T*) {}
+    bool onMatch(SkRecord* record, Pattern* pattern, unsigned begin, unsigned end) {
+        SkASSERT(end == begin + 1);
+        DrawPosTextH* draw = pattern->first<DrawPosTextH>();
 
-    void operator()(SkRecords::DrawPosTextH* r) {
         // If we're drawing vertical text, none of the checks we're about to do make any sense.
         // We'll need to call SkPaint::computeFastBounds() later, so bail if that's not possible.
-        if (r->paint.isVerticalText() || !r->paint.canComputeFastBounds()) {
-            return;
+        if (draw->paint.isVerticalText() || !draw->paint.canComputeFastBounds()) {
+            return false;
         }
 
         // Rather than checking the top and bottom font metrics, we guess.  Actually looking up the
         // top and bottom metrics is slow, and this overapproximation should be good enough.
-        const SkScalar buffer = r->paint.getTextSize() * 1.5f;
+        const SkScalar buffer = draw->paint.getTextSize() * 1.5f;
         SkDEBUGCODE(SkPaint::FontMetrics metrics;)
-        SkDEBUGCODE(r->paint.getFontMetrics(&metrics);)
+        SkDEBUGCODE(draw->paint.getFontMetrics(&metrics);)
         SkASSERT(-buffer <= metrics.fTop);
         SkASSERT(+buffer >= metrics.fBottom);
 
         // Let the paint adjust the text bounds.  We don't care about left and right here, so we use
         // 0 and 1 respectively just so the bounds rectangle isn't empty.
         SkRect bounds;
-        bounds.set(0, r->y - buffer, SK_Scalar1, r->y + buffer);
-        SkRect adjusted = r->paint.computeFastBounds(bounds, &bounds);
-
-        SkRecords::Adopted<SkRecords::DrawPosTextH> adopted(r);
-        SkNEW_PLACEMENT_ARGS(
-                fRecord->replace<SkRecords::BoundedDrawPosTextH>(this->index(), adopted),
-                SkRecords::BoundedDrawPosTextH,
-                (&adopted, adjusted.fTop, adjusted.fBottom));
+        bounds.set(0, draw->y - buffer, SK_Scalar1, draw->y + buffer);
+        SkRect adjusted = draw->paint.computeFastBounds(bounds, &bounds);
+
+        Adopted<DrawPosTextH> adopted(draw);
+        SkNEW_PLACEMENT_ARGS(record->replace<BoundedDrawPosTextH>(begin, adopted),
+                             BoundedDrawPosTextH,
+                             (&adopted, adjusted.fTop, adjusted.fBottom));
+        return true;
     }
 };
+void SkRecordBoundDrawPosTextH(SkRecord* record) {
+    TextBounder pass;
+    apply(&pass, record);
+}
 
+// Replaces PushCull with PairedPushCull, which lets us skip to the paired PopCull when the canvas
+// can quickReject the cull rect.
+// There's no efficient way (yet?) to express this one as a pattern, so we write a custom pass.
+class CullAnnotator {
+public:
+    // Do nothing to most ops.
+    template <typename T> void operator()(T*) {}
 
-template <typename Pass>
-static void run_pass(Pass& pass, SkRecord* record) {
-    for (; pass.index() < record->count(); pass.next()) {
-        record->mutate(pass.index(), pass);
+    void operator()(PushCull* push) {
+        Pair pair = { fIndex, push };
+        fPushStack.push(pair);
     }
-}
 
-}  // namespace
+    void operator()(PopCull* pop) {
+        Pair push = fPushStack.top();
+        fPushStack.pop();
 
+        SkASSERT(fIndex > push.index);
+        unsigned skip = fIndex - push.index;
 
-void SkRecordNoopSaveRestores(SkRecord* record) {
-    // Run SaveRestoreNooper until it doesn't make any more changes.
-    bool changed;
-    do {
-        SaveRestoreNooper nooper(record);
-        run_pass(nooper, record);
-        changed = nooper.changed();
-    } while (changed);
-}
+        Adopted<PushCull> adopted(push.command);
+        SkNEW_PLACEMENT_ARGS(fRecord->replace<PairedPushCull>(push.index, adopted),
+                             PairedPushCull, (&adopted, skip));
+    }
 
-void SkRecordAnnotateCullingPairs(SkRecord* record) {
-    CullAnnotator annotator(record);
-    run_pass(annotator, record);
-}
+    void apply(SkRecord* record) {
+        for (fRecord = record, fIndex = 0; fIndex < record->count(); fIndex++) {
+            fRecord->mutate(fIndex, *this);
+        }
+    }
 
-void SkRecordReduceDrawPosTextStrength(SkRecord* record) {
-    StrengthReducer reducer(record);
-    run_pass(reducer, record);
-}
+private:
+    struct Pair {
+        unsigned index;
+        PushCull* command;
+    };
 
-void SkRecordBoundDrawPosTextH(SkRecord* record) {
-    TextBounder bounder(record);
-    run_pass(bounder, record);
+    SkTDArray<Pair> fPushStack;
+    SkRecord* fRecord;
+    unsigned fIndex;
+};
+void SkRecordAnnotateCullingPairs(SkRecord* record) {
+    CullAnnotator pass;
+    pass.apply(record);
 }
diff --git a/record/SkRecordPattern.h b/record/SkRecordPattern.h
new file mode 100644
index 00000000..2023a905
--- /dev/null
+++ b/record/SkRecordPattern.h
@@ -0,0 +1,219 @@
+#ifndef SkRecordPattern_DEFINED
+#define SkRecordPattern_DEFINED
+
+#include "SkTLogic.h"
+
+namespace SkRecords {
+
+// First, some matchers.  These match a single command in the SkRecord,
+// and may hang onto some data from it.  If so, you can get the data by calling .get().
+
+// Matches a command of type T, and stores that command.
+template <typename T>
+class Is {
+public:
+    Is() : fPtr(NULL) {}
+
+    typedef T type;
+    type* get() { return fPtr; }
+
+    bool match(T* ptr) {
+        fPtr = ptr;
+        return true;
+    }
+
+    template <typename U>
+    bool match(U*) {
+        fPtr = NULL;
+        return false;
+    }
+
+private:
+    type* fPtr;
+};
+
+// Matches any command that draws, and stores its paint.
+class IsDraw {
+    SK_CREATE_MEMBER_DETECTOR(paint);
+public:
+    IsDraw() : fPaint(NULL) {}
+
+    typedef SkPaint type;
+    type* get() { return fPaint; }
+
+    template <typename T>
+    SK_WHEN(HasMember_paint<T>, bool) match(T* draw) {
+        fPaint = AsPtr(draw->paint);
+        return true;
+    }
+
+    template <typename T>
+    SK_WHEN(!HasMember_paint<T>, bool) match(T*) {
+        fPaint = NULL;
+        return false;
+    }
+
+private:
+    // Abstracts away whether the paint is always part of the command or optional.
+    template <typename T> static T* AsPtr(SkRecords::Optional<T>& x) { return x; }
+    template <typename T> static T* AsPtr(T& x) { return &x; }
+
+    type* fPaint;
+};
+
+// Matches if Matcher doesn't.  Stores nothing.
+template <typename Matcher>
+struct Not {
+    template <typename T>
+    bool match(T* ptr) { return !Matcher().match(ptr); }
+};
+
+// Matches if either of A or B does.  Stores nothing.
+template <typename A, typename B>
+struct Or {
+    template <typename T>
+    bool match(T* ptr) { return A().match(ptr) || B().match(ptr); }
+};
+
+// Matches if any of A, B or C does.  Stores nothing.
+template <typename A, typename B, typename C>
+struct Or3 : Or<A, Or<B, C> > {};
+
+// We'll use this to choose which implementation of Star suits each Matcher.
+SK_CREATE_TYPE_DETECTOR(type);
+
+// Star is a special matcher that matches Matcher 0 or more times _greedily_ in the SkRecord.
+// This version stores nothing.  It's enabled when Matcher stores nothing.
+template <typename Matcher, typename = void>
+class Star {
+public:
+    void reset() {}
+
+    template <typename T>
+    bool match(T* ptr) { return Matcher().match(ptr); }
+};
+
+// This version stores a list of matches.  It's enabled if Matcher stores something.
+template <typename Matcher>
+class Star<Matcher, SK_WHEN(HasType_type<Matcher>, void)> {
+public:
+    typedef SkTDArray<typename Matcher::type*> type;
+    type* get() { return &fMatches; }
+
+    void reset() { fMatches.rewind(); }
+
+    template <typename T>
+    bool match(T* ptr) {
+        Matcher matcher;
+        if (matcher.match(ptr)) {
+            fMatches.push(matcher.get());
+            return true;
+        }
+        return false;
+    }
+
+private:
+    type fMatches;
+};
+
+
+// Cons builds a list of Matchers.
+// It first matches Matcher (something from above), then Pattern (another Cons or Nil).
+//
+// This is the main entry point to pattern matching, and so provides a couple of extra API bits:
+//  - search scans through the record to look for matches;
+//  - first, second, and third return the data stored by their respective matchers in the pattern.
+//
+// These Cons build lists analogously to Lisp's "cons".  See Pattern# for the "list" equivalent.
+template <typename Matcher, typename Pattern>
+class Cons {
+public:
+    // If this pattern matches the SkRecord starting at i,
+    // return the index just past the end of the pattern, otherwise return 0.
+    SK_ALWAYS_INLINE unsigned match(SkRecord* record, unsigned i) {
+        i = this->matchHead(&fHead, record, i);
+        return i == 0 ? 0 : fTail.match(record, i);
+    }
+
+    // Starting from *end, walk through the SkRecord to find the first span matching this pattern.
+    // If there is no such span, return false.  If there is, return true and set [*begin, *end).
+    SK_ALWAYS_INLINE bool search(SkRecord* record, unsigned* begin, unsigned* end) {
+        for (*begin = *end; *begin < record->count(); ++(*begin)) {
+            *end = this->match(record, *begin);
+            if (*end != 0) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    // Once either match or search has succeeded, access the stored data of the first, second,
+    // or third matcher in this pattern.  Add as needed for longer patterns.
+    // T is checked statically at compile time; no casting is involved.  It's just an API wart.
+    template <typename T> T* first()  { return fHead.get(); }
+    template <typename T> T* second() { return fTail.fHead.get(); }
+    template <typename T> T* third()  { return fTail.fTail.fHead.get(); }
+
+private:
+    template <typename T>
+    void operator()(T* r) { fHeadMatched = fHead.match(r); }
+
+    // If head isn't a Star, try to match at i once.
+    template <typename T>
+    unsigned matchHead(T*, SkRecord* record, unsigned i) {
+        if (i < record->count()) {
+            fHeadMatched = false;
+            record->mutate(i, *this);
+            if (fHeadMatched) {
+                return i+1;
+            }
+        }
+        return 0;
+    }
+
+    // If head is a Star, walk i until it doesn't match.
+    template <typename T>
+    unsigned matchHead(Star<T>*, SkRecord* record, unsigned i) {
+        fHead.reset();
+        while (i < record->count()) {
+            fHeadMatched = false;
+            record->mutate(i, *this);
+            if (!fHeadMatched) {
+                return i;
+            }
+            i++;
+        }
+        return 0;
+    }
+
+    Matcher fHead;
+    Pattern fTail;
+    bool fHeadMatched;
+
+    friend class ::SkRecord;  // So operator() can otherwise stay private.
+
+    // All Cons are friends with each other.  This lets first, second, and third work.
+    template <typename, typename> friend class Cons;
+};
+
+// Nil is the end of every pattern Cons chain.
+struct Nil {
+    // Bottoms out recursion down the fTail chain.  Just return whatever i the front decided on.
+    unsigned match(SkRecord*, unsigned i) { return i; }
+};
+
+// These Pattern# types are syntax sugar over Cons and Nil, just to help eliminate some of the
+// template noise.  Use these if you can.  Feel free to add more for longer patterns.
+// All types A, B, C, ... are Matchers.
+template <typename A>
+struct Pattern1 : Cons<A, Nil> {};
+
+template <typename A, typename B>
+struct Pattern2 : Cons<A, Pattern1<B> > {};
+
+template <typename A, typename B, typename C>
+struct Pattern3 : Cons<A, Pattern2<B, C> > {};
+
+}  // namespace SkRecords
+
+#endif//SkRecordPattern_DEFINED
diff --git a/record/SkRecordTraits.h b/record/SkRecordTraits.h
deleted file mode 100644
index 570a717e..00000000
--- a/record/SkRecordTraits.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "SkRecords.h"
-#include "SkTLogic.h"
-
-// Type traits that are useful for working with SkRecords.
-
-namespace SkRecords {
-
-namespace {
-
-// Abstracts away whether the T is optional or not.
-template <typename T> const T* as_ptr(const SkRecords::Optional<T>& x) { return x; }
-template <typename T> const T* as_ptr(const T& x) { return &x; }
-
-}  // namespace
-
-// Gets the paint from any command that may have one.
-template <typename Command> const SkPaint* GetPaint(const Command& x) { return as_ptr(x.paint); }
-
-// Have a paint?  You are a draw command!
-template <typename Command> struct IsDraw {
-    SK_CREATE_MEMBER_DETECTOR(paint);
-    static const bool value = HasMember_paint<Command>::value;
-};
-
-// Have a clip op?  You are a clip command.
-template <typename Command> struct IsClip {
-    SK_CREATE_MEMBER_DETECTOR(op);
-    static const bool value = HasMember_op<Command>::value;
-};
-
-}  // namespace SkRecords
diff --git a/sfnt/SkOTTable_EBDT.h b/sfnt/SkOTTable_EBDT.h
new file mode 100644
index 00000000..89d7a3ab
--- /dev/null
+++ b/sfnt/SkOTTable_EBDT.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkOTTable_EBDT_DEFINED
+#define SkOTTable_EBDT_DEFINED
+
+#include "SkEndian.h"
+#include "SkOTTableTypes.h"
+#include "SkOTTable_head.h"
+#include "SkOTTable_loca.h"
+#include "SkTypedEnum.h"
+
+#pragma pack(push, 1)
+
+struct SkOTTableEmbeddedBitmapData {
+    static const SK_OT_CHAR TAG0 = 'E';
+    static const SK_OT_CHAR TAG1 = 'B';
+    static const SK_OT_CHAR TAG2 = 'D';
+    static const SK_OT_CHAR TAG3 = 'T';
+    static const SK_OT_ULONG TAG = SkOTTableTAG<SkOTTableEmbeddedBitmapData>::value;
+
+    SK_OT_Fixed version;
+    static const SK_OT_Fixed version_initial = SkTEndian_SwapBE32(0x00020000);
+
+    struct BigGlyphMetrics {
+        SK_OT_BYTE height;
+        SK_OT_BYTE width;
+        SK_OT_CHAR horiBearingX;
+        SK_OT_CHAR horiBearingY;
+        SK_OT_BYTE horiAdvance;
+        SK_OT_CHAR vertBearingX;
+        SK_OT_CHAR vertBearingY;
+        SK_OT_BYTE vertAdvance;
+    };
+
+    struct SmallGlyphMetrics {
+        SK_OT_BYTE height;
+        SK_OT_BYTE width;
+        SK_OT_CHAR bearingX;
+        SK_OT_CHAR bearingY;
+        SK_OT_BYTE advance;
+    };
+
+    // Small metrics, byte-aligned data.
+    struct Format1 {
+        SmallGlyphMetrics smallGlyphMetrics;
+        //SK_OT_BYTE[] byteAlignedBitmap;
+    };
+
+    // Small metrics, bit-aligned data.
+    struct Format2 {
+        SmallGlyphMetrics smallGlyphMetrics;
+        //SK_OT_BYTE[] bitAlignedBitmap;
+    };
+
+    // Format 3 is not used.
+
+    // EBLC metrics (IndexSubTable::header::indexFormat 2 or 5), compressed data.
+    // Only used on Mac.
+    struct Format4 {
+        SK_OT_ULONG whiteTreeOffset;
+        SK_OT_ULONG blackTreeOffset;
+        SK_OT_ULONG glyphDataOffset;
+    };
+
+    // EBLC metrics (IndexSubTable::header::indexFormat 2 or 5), bit-aligned data.
+    struct Format5 {
+        //SK_OT_BYTE[] bitAlignedBitmap;
+    };
+
+    // Big metrics, byte-aligned data.
+    struct Format6 {
+        BigGlyphMetrics bigGlyphMetrics;
+        //SK_OT_BYTE[] byteAlignedBitmap;
+    };
+
+    // Big metrics, bit-aligned data.
+    struct Format7 {
+        BigGlyphMetrics bigGlyphMetrics;
+        //SK_OT_BYTE[] bitAlignedBitmap;
+    };
+
+    struct EBDTComponent {
+        SK_OT_USHORT glyphCode; // Component glyph code
+        SK_OT_CHAR xOffset; // Position of component left
+        SK_OT_CHAR yOffset; // Position of component top
+    };
+
+    struct Format8 {
+        SmallGlyphMetrics smallMetrics; // Metrics information for the glyph
+        SK_OT_BYTE pad; // Pad to short boundary
+        SK_OT_USHORT numComponents; // Number of components
+        //EBDTComponent componentArray[numComponents]; // Glyph code, offset array
+    };
+
+    struct Format9 {
+        BigGlyphMetrics bigMetrics; // Metrics information for the glyph
+        SK_OT_USHORT numComponents; // Number of components
+        //EBDTComponent componentArray[numComponents]; // Glyph code, offset array
+    };
+};
+
+#pragma pack(pop)
+
+#endif
diff --git a/sfnt/SkOTTable_EBLC.h b/sfnt/SkOTTable_EBLC.h
new file mode 100644
index 00000000..845418d3
--- /dev/null
+++ b/sfnt/SkOTTable_EBLC.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkOTTable_EBLC_DEFINED
+#define SkOTTable_EBLC_DEFINED
+
+#include "SkEndian.h"
+#include "SkOTTable_EBDT.h"
+#include "SkOTTableTypes.h"
+#include "SkTypedEnum.h"
+
+#pragma pack(push, 1)
+
+struct SkOTTableEmbeddedBitmapLocation {
+    static const SK_OT_CHAR TAG0 = 'E';
+    static const SK_OT_CHAR TAG1 = 'B';
+    static const SK_OT_CHAR TAG2 = 'L';
+    static const SK_OT_CHAR TAG3 = 'C';
+    static const SK_OT_ULONG TAG = SkOTTableTAG<SkOTTableEmbeddedBitmapLocation>::value;
+
+    SK_OT_Fixed version;
+    static const SK_OT_Fixed version_initial = SkTEndian_SwapBE32(0x00020000);
+
+    SK_OT_ULONG numSizes;
+
+    struct SbitLineMetrics {
+        SK_OT_CHAR ascender;
+        SK_OT_CHAR descender;
+        SK_OT_BYTE widthMax;
+        SK_OT_CHAR caretSlopeNumerator;
+        SK_OT_CHAR caretSlopeDenominator;
+        SK_OT_CHAR caretOffset;
+        SK_OT_CHAR minOriginSB;
+        SK_OT_CHAR minAdvanceSB;
+        SK_OT_CHAR maxBeforeBL;
+        SK_OT_CHAR minAfterBL;
+        SK_OT_CHAR pad1;
+        SK_OT_CHAR pad2;
+    };
+
+    struct BitmapSizeTable {
+        SK_OT_ULONG indexSubTableArrayOffset; //offset to indexSubtableArray from beginning of EBLC.
+        SK_OT_ULONG indexTablesSize; //number of bytes in corresponding index subtables and array
+        SK_OT_ULONG numberOfIndexSubTables; //an index subtable for each range or format change
+        SK_OT_ULONG colorRef; //not used; set to 0.
+        SbitLineMetrics hori; //line metrics for text rendered horizontally
+        SbitLineMetrics vert; //line metrics for text rendered vertically
+        SK_OT_USHORT startGlyphIndex; //lowest glyph index for this size
+        SK_OT_USHORT endGlyphIndex; //highest glyph index for this size
+        SK_OT_BYTE ppemX; //horizontal pixels per Em
+        SK_OT_BYTE ppemY; //vertical pixels per Em
+        struct BitDepth {
+            SK_TYPED_ENUM(Value, SK_OT_BYTE,
+                ((BW, 1))
+                ((Gray4, 2))
+                ((Gray16, 4))
+                ((Gray256, 8))
+                SK_SEQ_END,
+            SK_SEQ_END)
+            SK_OT_BYTE value;
+        } bitDepth; //the Microsoft rasterizer v.1.7 or greater supports
+        union Flags {
+            struct Field {
+                //0-7
+                SK_OT_BYTE_BITFIELD(
+                    Horizontal, // Horizontal small glyph metrics
+                    Vertical,  // Vertical small glyph metrics
+                    Reserved02,
+                    Reserved03,
+                    Reserved04,
+                    Reserved05,
+                    Reserved06,
+                    Reserved07)
+            } field;
+            struct Raw {
+                static const SK_OT_CHAR Horizontal = 1u << 0;
+                static const SK_OT_CHAR Vertical = 1u << 1;
+                SK_OT_CHAR value;
+            } raw;
+        } flags;
+    }; //bitmapSizeTable[numSizes];
+
+    struct IndexSubTableArray {
+        SK_OT_USHORT firstGlyphIndex; //first glyph code of this range
+        SK_OT_USHORT lastGlyphIndex; //last glyph code of this range (inclusive)
+        SK_OT_ULONG additionalOffsetToIndexSubtable; //add to BitmapSizeTable::indexSubTableArrayOffset to get offset from beginning of 'EBLC'
+    }; //indexSubTableArray[BitmapSizeTable::numberOfIndexSubTables];
+
+    struct IndexSubHeader {
+        SK_OT_USHORT indexFormat; //format of this indexSubTable
+        SK_OT_USHORT imageFormat; //format of 'EBDT' image data
+        SK_OT_ULONG imageDataOffset; //offset to image data in 'EBDT' table
+    };
+
+    // Variable metrics glyphs with 4 byte offsets
+    struct IndexSubTable1 {
+        IndexSubHeader header;
+        //SK_OT_ULONG offsetArray[lastGlyphIndex - firstGlyphIndex + 1 + 1]; //last element points to one past end of last glyph
+        //glyphData = offsetArray[glyphIndex - firstGlyphIndex] + imageDataOffset
+    };
+
+    // All Glyphs have identical metrics
+    struct IndexSubTable2 {
+        IndexSubHeader header;
+        SK_OT_ULONG imageSize; // all glyphs are of the same size
+        SkOTTableEmbeddedBitmapData::BigGlyphMetrics bigMetrics; // all glyphs have the same metrics; glyph data may be compressed, byte-aligned, or bit-aligned
+    };
+
+    // Variable metrics glyphs with 2 byte offsets
+    struct IndexSubTable3 {
+        IndexSubHeader header;
+        //SK_OT_USHORT offsetArray[lastGlyphIndex - firstGlyphIndex + 1 + 1]; //last element points to one past end of last glyph, may have extra element to force even number of elements
+        //glyphData = offsetArray[glyphIndex - firstGlyphIndex] + imageDataOffset
+    };
+
+    // Variable metrics glyphs with sparse glyph codes
+    struct IndexSubTable4 {
+        IndexSubHeader header;
+        SK_OT_ULONG numGlyphs;
+        struct CodeOffsetPair {
+            SK_OT_USHORT glyphCode;
+            SK_OT_USHORT offset; //location in EBDT
+        }; //glyphArray[numGlyphs+1]
+    };
+
+    // Constant metrics glyphs with sparse glyph codes
+    struct IndexSubTable5 {
+        IndexSubHeader header;
+        SK_OT_ULONG imageSize; //all glyphs have the same data size
+        SkOTTableEmbeddedBitmapData::BigGlyphMetrics bigMetrics; //all glyphs have the same metrics
+        SK_OT_ULONG numGlyphs;
+        //SK_OT_USHORT glyphCodeArray[numGlyphs] //must have even number of entries (set pad to 0)
+    };
+
+    union IndexSubTable {
+        IndexSubHeader header;
+        IndexSubTable1 format1;
+        IndexSubTable2 format2;
+        IndexSubTable3 format3;
+        IndexSubTable4 format4;
+        IndexSubTable5 format5;
+    };
+
+};
+
+#pragma pack(pop)
+
+#endif
diff --git a/sfnt/SkOTTable_EBSC.h b/sfnt/SkOTTable_EBSC.h
new file mode 100644
index 00000000..316c45d1
--- /dev/null
+++ b/sfnt/SkOTTable_EBSC.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkOTTable_EBSC_DEFINED
+#define SkOTTable_EBSC_DEFINED
+
+#include "SkEndian.h"
+#include "SkOTTable_EBLC.h"
+#include "SkOTTableTypes.h"
+
+#pragma pack(push, 1)
+
+struct SkOTTableEmbeddedBitmapScaling {
+    static const SK_OT_CHAR TAG0 = 'E';
+    static const SK_OT_CHAR TAG1 = 'S';
+    static const SK_OT_CHAR TAG2 = 'B';
+    static const SK_OT_CHAR TAG3 = 'C';
+    static const SK_OT_ULONG TAG = SkOTTableTAG<SkOTTableEmbeddedBitmapScaling>::value;
+
+    SK_OT_Fixed version;
+    static const SK_OT_Fixed version_initial = SkTEndian_SwapBE32(0x00020000);
+
+    SK_OT_ULONG numSizes;
+
+    struct BitmapScaleTable {
+        SkOTTableEmbeddedBitmapLocation::SbitLineMetrics hori;
+        SkOTTableEmbeddedBitmapLocation::SbitLineMetrics vert;
+        SK_OT_BYTE ppemX; //target horizontal pixels per EM
+        SK_OT_BYTE ppemY; //target vertical pixels per EM
+        SK_OT_BYTE substitutePpemX; //use bitmaps of this size
+        SK_OT_BYTE substitutePpemY; //use bitmaps of this size
+    }; //bitmapScaleTable[numSizes];
+};
+
+#pragma pack(pop)
+
+#endif
diff --git a/utils/SkTLogic.h b/utils/SkTLogic.h
index 62952ad1..925d4bdc 100644
--- a/utils/SkTLogic.h
+++ b/utils/SkTLogic.h
@@ -89,4 +89,14 @@ public:
     static const bool value = sizeof(func<Derived>(NULL)) == sizeof(uint16_t);      \
 }
 
+// Same sort of thing as SK_CREATE_MEMBER_DETECTOR, but checks for the existence of a nested type.
+#define SK_CREATE_TYPE_DETECTOR(type)                                   \
+template <typename T>                                                   \
+class HasType_##type {                                                  \
+    template <typename U> static uint8_t func(typename U::type*);       \
+    template <typename U> static uint16_t func(...);                    \
+public:                                                                 \
+    static const bool value = sizeof(func<T>(NULL)) == sizeof(uint8_t); \
+}
+
 #endif
diff --git a/utils/debugger/SkDebugCanvas.cpp b/utils/debugger/SkDebugCanvas.cpp
index ec201789..14fbf888 100644
--- a/utils/debugger/SkDebugCanvas.cpp
+++ b/utils/debugger/SkDebugCanvas.cpp
@@ -125,8 +125,6 @@ public:
 #endif
 };
 
-// The OverdrawFilter modifies every paint to use an SkProcXfermode which
-// in turn invokes OverdrawXferModeProc
 class SkOverdrawFilter : public SkDrawFilter {
 public:
     SkOverdrawFilter() {
author	Torne (Richard Coles) <torne@google.com>	2014-05-14 12:13:15 +0100
committer	Torne (Richard Coles) <torne@google.com>	2014-05-14 12:13:15 +0100
commit	6b0bad3aba066dc8d7d47a5f6738ebf773ffc2e9 (patch)
tree	ad988be7ad87928b7b98c44f2def4cce7d3330b6
parent	27ab20dffff01006f5d20fdb2b3f4ea503d69114 (diff)
parent	d60f7edf0fa7eb2eb7c99de486abfe61ad3dcd69 (diff)
download	src-master.tar.gz