summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuIntrinsicBlur.cpp
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2013-01-14 20:44:09 -0800
committerStephen Hines <srhines@google.com>2013-01-14 20:46:46 -0800
commit2913f381a554c28abb44f49eddd1ee4c68a72578 (patch)
tree67dbc011e26a9e80b557fc913e2aa32b5aa9b449 /cpu_ref/rsCpuIntrinsicBlur.cpp
parent5e3fb0b3cfadcb44a74cf4b6ec9ec65c11ba811e (diff)
downloadrs-2913f381a554c28abb44f49eddd1ee4c68a72578.tar.gz
Align all allocations to a 16-byte boundary.
This change also fixes an issue in the Blur intrinsic, where we mis-cast a float array to float4 (and thus encountered some new alignment errors with the updated LLVM). Change-Id: I3955b38f156c35f4d160652c75ab416bae09b2c8
Diffstat (limited to 'cpu_ref/rsCpuIntrinsicBlur.cpp')
-rw-r--r--cpu_ref/rsCpuIntrinsicBlur.cpp12
1 files changed, 6 insertions, 6 deletions
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index 0d9fde86..d44b07ae 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -272,8 +272,8 @@ void RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
- float stackbuf[4 * 2048];
- float *buf = &stackbuf[0];
+ float4 stackbuf[2048];
+ float4 *buf = &stackbuf[0];
RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr;
if (!cp->mAlloc.get()) {
ALOGE("Blur executed without input, skipping");
@@ -291,7 +291,7 @@ void RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p,
cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], p->dimX * 16);
cp->mScratchSize[p->lid] = p->dimX;
}
- buf = (float *)cp->mScratch[p->lid];
+ buf = (float4 *)cp->mScratch[p->lid];
}
float4 *fout = (float4 *)buf;
int y = p->y;
@@ -308,20 +308,20 @@ void RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p,
x1 = xstart;
while ((x1 < (uint32_t)cp->mIradius) && (x1 < x2)) {
- OneHU4(p, out, x1, (float4 *)buf, cp->mFp, cp->mIradius);
+ OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
#if defined(ARCH_ARM_HAVE_NEON)
if ((x1 + cp->mIradius) < x2) {
- rsdIntrinsicBlurHFU4_K(out, ((float4 *)buf) - cp->mIradius, cp->mFp,
+ rsdIntrinsicBlurHFU4_K(out, buf - cp->mIradius, cp->mFp,
cp->mIradius * 2 + 1, x1, x2 - cp->mIradius);
out += (x2 - cp->mIradius) - x1;
x1 = x2 - cp->mIradius;
}
#endif
while(x2 > x1) {
- OneHU4(p, out, x1, (float4 *)buf, cp->mFp, cp->mIradius);
+ OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}