diff options
author | Simon Hosie <simon.hosie@arm.com> | 2016-04-20 16:20:25 -0700 |
---|---|---|
committer | Simon Hosie <simon.hosie@arm.com> | 2016-04-21 18:07:50 -0700 |
commit | 5a1f196d68d54513c081958adf4ce3dcafed9ea2 (patch) | |
tree | b03f130f848dfb1ddc1663a81f16c43bdaf276dc /cpu_ref/rsCpuIntrinsicBlur.cpp | |
parent | 18ca8ae8797d9b948a5bda0f1974e24ffdb2c339 (diff) | |
download | rs-5a1f196d68d54513c081958adf4ce3dcafed9ea2.tar.gz |
Refactor ARM Blur prefill logic.
Refactor the prefill logic for ARM (and improve documentation along the way) so
as to fix some cases where data is read outside of the source image, and to
minimise the remaining cases which must fall back to the C implementation.
Change-Id: I3d06416b40c48dea06258e9f7bb5ddc246d7c710
Diffstat (limited to 'cpu_ref/rsCpuIntrinsicBlur.cpp')
-rw-r--r-- | cpu_ref/rsCpuIntrinsicBlur.cpp | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp index cac10d81..9d51e68b 100644 --- a/cpu_ref/rsCpuIntrinsicBlur.cpp +++ b/cpu_ref/rsCpuIntrinsicBlur.cpp @@ -297,7 +297,7 @@ void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelDriverInfo *info, uint32_t x2 = xend; #if defined(ARCH_ARM_USE_INTRINSICS) - if (gArchUseSIMD) { + if (gArchUseSIMD && info->dim.x >= 4) { rsdIntrinsicBlurU4_K(out, (uchar4 const *)(pin + stride * info->current.y), info->dim.x, info->dim.y, stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius); @@ -368,10 +368,15 @@ void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelDriverInfo *info, uint32_t x2 = xend; #if defined(ARCH_ARM_USE_INTRINSICS) - if (gArchUseSIMD) { - rsdIntrinsicBlurU1_K(out, pin + stride * info->current.y, info->dim.x, info->dim.y, - stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius); - return; + if (gArchUseSIMD && info->dim.x >= 16) { + // The specialisation for r<=8 has an awkward prefill case, which is + // fiddly to resolve, where starting close to the right edge can cause + // a read beyond the end of input. So avoid that case here. + if (cp->mIradius > 8 || (info->dim.x - rsMax(0, (int32_t)x1 - 8)) >= 16) { + rsdIntrinsicBlurU1_K(out, pin + stride * info->current.y, info->dim.x, info->dim.y, + stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius); + return; + } } #endif |