summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuIntrinsicBlur.cpp
diff options
context:
space:
mode:
authorJason Sams <jsams@google.com>2014-08-21 17:40:26 -0700
committerJason Sams <jsams@google.com>2014-08-21 17:40:26 -0700
commit4c513c12f0f620c336efce7b92b8f26aae39ffdd (patch)
treecbac651e9e338c36603c1e0bf5ae261bc7f76b65 /cpu_ref/rsCpuIntrinsicBlur.cpp
parent50067eface1865235fb90cecc84bd585dec219f5 (diff)
parent5f95a8abfbd11aed8d6e1061e46c6d7e09847c7d (diff)
downloadrs-4c513c12f0f620c336efce7b92b8f26aae39ffdd.tar.gz
resolved conflicts for merge of 5f95a8ab to lmp-dev-plus-aosp
Change-Id: I3044361ad6d5af09906e4a32e47efc3f29486514
Diffstat (limited to 'cpu_ref/rsCpuIntrinsicBlur.cpp')
-rw-r--r--cpu_ref/rsCpuIntrinsicBlur.cpp20
1 files changed, 12 insertions, 8 deletions
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index 7f888e91..eb5f7481 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -128,7 +128,7 @@ static void OneVU4(const RsExpandKernelParams *p, float4 *out, int32_t x, int32_
gPtr++;
}
- out->xyzw = blurredPixel;
+ out[0] = blurredPixel;
}
static void OneVU1(const RsExpandKernelParams *p, float *out, int32_t x, int32_t y,
@@ -163,6 +163,7 @@ extern "C" void rsdIntrinsicBlurHFU1_K(void *dst, const void *pin, const void *g
static void OneVFU4(float4 *out,
const uchar *ptrIn, int iStride, const float* gPtr, int ct,
int x1, int x2) {
+ out += x1;
#if defined(ARCH_X86_HAVE_SSSE3)
if (gArchUseSIMD) {
int t = (x2 - x1);
@@ -195,6 +196,7 @@ static void OneVFU1(float *out,
const uchar *ptrIn, int iStride, const float* gPtr, int ct, int x1, int x2) {
int len = x2 - x1;
+ out += x1;
while((x2 > x1) && (((uintptr_t)ptrIn) & 0x3)) {
const uchar *pi = ptrIn;
@@ -293,7 +295,7 @@ void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelParams *p,
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS)
- if (gArchUseSIMD) {
+ if (gArchUseSIMD && !xstart && (xend == p->dimX)) {
rsdIntrinsicBlurU4_K(out, (uchar4 const *)(pin + stride * p->y), p->dimX, p->dimY,
stride, x1, p->y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
return;
@@ -313,9 +315,10 @@ void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelParams *p,
int y = p->y;
if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius))) {
const uchar *pi = pin + (y - cp->mIradius) * stride;
- OneVFU4(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, x1, x2);
+ OneVFU4(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, p->dimX);
} else {
- while(x2 > x1) {
+ x1 = 0;
+ while(p->dimX > x1) {
OneVU4(p, fout, x1, y, pin, stride, cp->mFp, cp->mIradius);
fout++;
x1++;
@@ -362,9 +365,9 @@ void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelParams *p,
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS)
- if (gArchUseSIMD) {
+ if (gArchUseSIMD && !xstart && (xend == p->dimX)) {
rsdIntrinsicBlurU1_K(out, pin + stride * p->y, p->dimX, p->dimY,
- stride, x1, p->y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
+ stride, 0, p->y, p->dimX, cp->mIradius, cp->mIp + cp->mIradius);
return;
}
#endif
@@ -373,9 +376,10 @@ void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelParams *p,
int y = p->y;
if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius -1))) {
const uchar *pi = pin + (y - cp->mIradius) * stride;
- OneVFU1(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, x1, x2);
+ OneVFU1(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, p->dimX);
} else {
- while(x2 > x1) {
+ x1 = 0;
+ while(p->dimX > x1) {
OneVU1(p, fout, x1, y, pin, stride, cp->mFp, cp->mIradius);
fout++;
x1++;