diff options
author | Stephen Hines <srhines@google.com> | 2013-01-14 20:44:09 -0800 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2013-01-14 20:46:46 -0800 |
commit | 2913f381a554c28abb44f49eddd1ee4c68a72578 (patch) | |
tree | 67dbc011e26a9e80b557fc913e2aa32b5aa9b449 /cpu_ref/rsCpuIntrinsicBlur.cpp | |
parent | 5e3fb0b3cfadcb44a74cf4b6ec9ec65c11ba811e (diff) | |
download | rs-2913f381a554c28abb44f49eddd1ee4c68a72578.tar.gz |
Align all allocations to a 16-byte boundary.
This change also fixes an issue in the Blur intrinsic, where we mis-cast a
float array to float4 (and thus encountered some new alignment errors with
the updated LLVM).
Change-Id: I3955b38f156c35f4d160652c75ab416bae09b2c8
Diffstat (limited to 'cpu_ref/rsCpuIntrinsicBlur.cpp')
-rw-r--r-- | cpu_ref/rsCpuIntrinsicBlur.cpp | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp index 0d9fde86..d44b07ae 100644 --- a/cpu_ref/rsCpuIntrinsicBlur.cpp +++ b/cpu_ref/rsCpuIntrinsicBlur.cpp @@ -272,8 +272,8 @@ void RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p, uint32_t xstart, uint32_t xend, uint32_t instep, uint32_t outstep) { - float stackbuf[4 * 2048]; - float *buf = &stackbuf[0]; + float4 stackbuf[2048]; + float4 *buf = &stackbuf[0]; RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr; if (!cp->mAlloc.get()) { ALOGE("Blur executed without input, skipping"); @@ -291,7 +291,7 @@ void RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p, cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], p->dimX * 16); cp->mScratchSize[p->lid] = p->dimX; } - buf = (float *)cp->mScratch[p->lid]; + buf = (float4 *)cp->mScratch[p->lid]; } float4 *fout = (float4 *)buf; int y = p->y; @@ -308,20 +308,20 @@ void RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p, x1 = xstart; while ((x1 < (uint32_t)cp->mIradius) && (x1 < x2)) { - OneHU4(p, out, x1, (float4 *)buf, cp->mFp, cp->mIradius); + OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius); out++; x1++; } #if defined(ARCH_ARM_HAVE_NEON) if ((x1 + cp->mIradius) < x2) { - rsdIntrinsicBlurHFU4_K(out, ((float4 *)buf) - cp->mIradius, cp->mFp, + rsdIntrinsicBlurHFU4_K(out, buf - cp->mIradius, cp->mFp, cp->mIradius * 2 + 1, x1, x2 - cp->mIradius); out += (x2 - cp->mIradius) - x1; x1 = x2 - cp->mIradius; } #endif while(x2 > x1) { - OneHU4(p, out, x1, (float4 *)buf, cp->mFp, cp->mIradius); + OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius); out++; x1++; } |