summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Hosie <simon.hosie@arm.com>2014-05-01 23:28:45 -0700
committerSimon Hosie <simon.hosie@arm.com>2014-05-07 15:03:01 -0700
commit1d9c887c58d115975e01c9d500595f503803dc8c (patch)
tree7238184201d2cca382d88f93fd26c68c9ed3f817
parent9bd08e84ba2c9bd0708de5600877162126c4467c (diff)
downloadrs-1d9c887c58d115975e01c9d500595f503803dc8c.tar.gz
YuvToRGB sub-rectangle handling.
Fix some difficult edge cases when processing only a portion of the image. Also fix a register-marshalling bug in AArch64 assembly. Change-Id: I8cd67f394fb42b216b2c3c7401e90eb2b86fca3d
-rw-r--r--cpu_ref/rsCpuIntrinsicYuvToRGB.cpp17
-rw-r--r--cpu_ref/rsCpuIntrinsics_advsimd_YuvToRGB.S6
-rw-r--r--cpu_ref/rsCpuIntrinsics_neon_YuvToRGB.S4
3 files changed, 18 insertions, 9 deletions
diff --git a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
index 2d905de3..f65d6aa2 100644
--- a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
+++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
@@ -123,7 +123,7 @@ void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsForEachStubParamStruct *p,
}
const uchar *Y = pinY + (p->y * strideY);
- uchar4 *out = (uchar4 *)p->out;
+ uchar4 *out = (uchar4 *)p->out + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -152,11 +152,20 @@ void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsForEachStubParamStruct *p,
cstep = 2;
}
+ /* If we start on an odd pixel then deal with it here and bump things along
+ * so that subsequent code can carry on with even-odd pairing assumptions.
+ */
+ if((x1 & 1) && (x2 > x1)) {
+ int cx = (x1 >> 1) * cstep;
+ *out = rsYuvToRGBA_uchar4(Y[x1], u[cx], v[cx]);
+ out++;
+ x1++;
+ }
#if defined(ARCH_ARM_HAVE_VFP)
if((x2 > x1) && gArchUseSIMD) {
int32_t len = x2 - x1;
if (cstep == 1) {
- rsdIntrinsicYuv2_K(out, Y, u, v, x1, x2);
+ rsdIntrinsicYuv2_K(p->out, Y, u, v, x1, x2);
x1 += len;
out += len;
} else if (cstep == 2) {
@@ -165,11 +174,11 @@ void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsForEachStubParamStruct *p,
intptr_t ipv = (intptr_t)v;
if (ipu == (ipv + 1)) {
- rsdIntrinsicYuv_K(out, Y, v, x1, x2);
+ rsdIntrinsicYuv_K(p->out, Y, v, x1, x2);
x1 += len;
out += len;
} else if (ipu == (ipv - 1)) {
- rsdIntrinsicYuvR_K(out, Y, u, x1, x2);
+ rsdIntrinsicYuvR_K(p->out, Y, u, x1, x2);
x1 += len;
out += len;
}
diff --git a/cpu_ref/rsCpuIntrinsics_advsimd_YuvToRGB.S b/cpu_ref/rsCpuIntrinsics_advsimd_YuvToRGB.S
index 9232a796..632ef7a4 100644
--- a/cpu_ref/rsCpuIntrinsics_advsimd_YuvToRGB.S
+++ b/cpu_ref/rsCpuIntrinsics_advsimd_YuvToRGB.S
@@ -165,7 +165,7 @@
1: tbz x2, #0, 1f
ld1 {v8.b}[1], [x1], #1
.if \interleaved
- ld1 {v16.b}[1], [x3], #1
+ ld1 {v16.h}[0], [x3], #2
.else
ld1 {v16.b}[0], [x3], #1
ld1 {v17.b}[0], [x4], #1
@@ -247,7 +247,7 @@ END(rsdIntrinsicYuv2_K)
* size_t xend); // x4
*/
ENTRY(rsdIntrinsicYuv_K)
- bic x5, x4, #1
+ bic x5, x3, #1
add x0, x0, x5, LSL #2
add x1, x1, x5
add x3, x2, x5
@@ -273,7 +273,7 @@ END(rsdIntrinsicYuv_K)
* size_t xend); // x4
*/
ENTRY(rsdIntrinsicYuvR_K)
- bic x5, x4, #1
+ bic x5, x3, #1
add x0, x0, x5, LSL #2
add x1, x1, x5
add x3, x2, x5
diff --git a/cpu_ref/rsCpuIntrinsics_neon_YuvToRGB.S b/cpu_ref/rsCpuIntrinsics_neon_YuvToRGB.S
index da4cdedb..5c3bce41 100644
--- a/cpu_ref/rsCpuIntrinsics_neon_YuvToRGB.S
+++ b/cpu_ref/rsCpuIntrinsics_neon_YuvToRGB.S
@@ -117,7 +117,7 @@
\kernel
- subs r2, #16
+ subs r2, #16
vst4.u8 {d0,d2,d4,d6}, [r0]!
vst4.u8 {d1,d3,d5,d7}, [r0]!
@@ -169,7 +169,7 @@
beq 1f
vld1.u8 d16[1], [r1]!
.if \interleaved
- vld1.u8 d20[1], [r3]!
+ vld1.u16 d20[0], [r3]!
.else
vld1.u8 d20[0], [r3]!
vld1.u8 d21[0], [r4]!