summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuIntrinsicBlur.cpp
diff options
context:
space:
mode:
authorSimon Hosie <simon.hosie@arm.com>2016-04-20 16:20:25 -0700
committerSimon Hosie <simon.hosie@arm.com>2016-04-21 18:07:50 -0700
commit5a1f196d68d54513c081958adf4ce3dcafed9ea2 (patch)
treeb03f130f848dfb1ddc1663a81f16c43bdaf276dc /cpu_ref/rsCpuIntrinsicBlur.cpp
parent18ca8ae8797d9b948a5bda0f1974e24ffdb2c339 (diff)
downloadrs-5a1f196d68d54513c081958adf4ce3dcafed9ea2.tar.gz
Refactor ARM Blur prefill logic.
Refactor the prefill logic for ARM (and improve documentation along the way) so as to fix some cases where data is read outside of the source image, and to minimise the remaining cases which must fall back to the C implementation. Change-Id: I3d06416b40c48dea06258e9f7bb5ddc246d7c710
Diffstat (limited to 'cpu_ref/rsCpuIntrinsicBlur.cpp')
-rw-r--r--cpu_ref/rsCpuIntrinsicBlur.cpp15
1 files changed, 10 insertions, 5 deletions
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index cac10d81..9d51e68b 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -297,7 +297,7 @@ void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS)
- if (gArchUseSIMD) {
+ if (gArchUseSIMD && info->dim.x >= 4) {
rsdIntrinsicBlurU4_K(out, (uchar4 const *)(pin + stride * info->current.y),
info->dim.x, info->dim.y,
stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
@@ -368,10 +368,15 @@ void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS)
- if (gArchUseSIMD) {
- rsdIntrinsicBlurU1_K(out, pin + stride * info->current.y, info->dim.x, info->dim.y,
- stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
- return;
+ if (gArchUseSIMD && info->dim.x >= 16) {
+ // The specialisation for r<=8 has an awkward prefill case, which is
+ // fiddly to resolve, where starting close to the right edge can cause
+ // a read beyond the end of input. So avoid that case here.
+ if (cp->mIradius > 8 || (info->dim.x - rsMax(0, (int32_t)x1 - 8)) >= 16) {
+ rsdIntrinsicBlurU1_K(out, pin + stride * info->current.y, info->dim.x, info->dim.y,
+ stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
+ return;
+ }
}
#endif