summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpu_ref/Android.mk3
-rw-r--r--cpu_ref/rsCpuCore.cpp242
-rw-r--r--cpu_ref/rsCpuCore.h65
-rw-r--r--cpu_ref/rsCpuIntrinsic.cpp49
-rw-r--r--cpu_ref/rsCpuIntrinsic.h55
-rw-r--r--cpu_ref/rsCpuIntrinsic3DLUT.cpp10
-rw-r--r--cpu_ref/rsCpuIntrinsicBlend.cpp5
-rw-r--r--cpu_ref/rsCpuIntrinsicColorMatrix.cpp38
-rw-r--r--cpu_ref/rsCpuIntrinsicHistogram.cpp58
-rw-r--r--cpu_ref/rsCpuIntrinsicLUT.cpp4
-rw-r--r--cpu_ref/rsCpuIntrinsicResize.cpp14
-rw-r--r--cpu_ref/rsCpuScript.cpp214
-rw-r--r--cpu_ref/rsCpuScript.h36
-rw-r--r--cpu_ref/rsCpuScriptGroup.cpp103
-rw-r--r--cpu_ref/rsd_cpu.h22
-rw-r--r--driver/rsdBcc.cpp21
-rw-r--r--rsRuntime.h2
-rw-r--r--rsScript.cpp31
-rw-r--r--rsScript.h10
-rw-r--r--rsScriptC.cpp46
-rw-r--r--rsScriptC.h8
-rw-r--r--rsScriptC_Lib.cpp12
-rw-r--r--rsScriptGroup.cpp11
-rw-r--r--rsScriptIntrinsic.cpp27
-rw-r--r--rsScriptIntrinsic.h12
25 files changed, 464 insertions, 634 deletions
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index aeb75a65..ad7cef73 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -1,7 +1,8 @@
LOCAL_PATH:=$(call my-dir)
-rs_base_CFLAGS := -Werror -Wall -Wno-unused-parameter -Wno-unused-variable -fno-exceptions
+rs_base_CFLAGS := -Werror -Wall -Wno-unused-parameter -Wno-unused-variable \
+ -fno-exceptions -std=c++11
ifeq ($(TARGET_BUILD_PDK), true)
rs_base_CFLAGS += -D__RS_PDK__
endif
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index a0564fc1..db3cc7fa 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -350,180 +350,134 @@ RsdCpuReferenceImpl::~RsdCpuReferenceImpl() {
}
typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
+typedef void (*walk_loop_t)(MTLaunchStruct*,
+ RsExpandKernelParams&,
+ outer_foreach_t);
-static void wc_xy(void *usr, uint32_t idx) {
+
+static void walk_wrapper(void* usr, uint32_t idx, walk_loop_t walk_loop) {
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+ uint32_t inLen = mtls->fep.inLen;
+
RsExpandKernelParams kparams;
kparams.takeFields(mtls->fep);
// Used by CpuScriptGroup, IntrinsicBlur, and IntrinsicHistogram
kparams.lid = idx;
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- while (1) {
- uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
- uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
- uint32_t yEnd = yStart + mtls->mSliceSize;
-
- yEnd = rsMin(yEnd, mtls->yEnd);
-
- if (yEnd <= yStart) {
- return;
- }
-
- //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
- //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
-
- for (kparams.y = yStart; kparams.y < yEnd; kparams.y++) {
- kparams.out = mtls->fep.ptrOut +
- (mtls->fep.yStrideOut * kparams.y) +
- (mtls->fep.eStrideOut * mtls->xStart);
-
- kparams.in = mtls->fep.ptrIn +
- (mtls->fep.yStrideIn * kparams.y) +
- (mtls->fep.eStrideIn * mtls->xStart);
+ if (inLen > 0) {
+ // Allocate space for our input base pointers.
+ kparams.ins = (const void**)alloca(inLen * sizeof(void*));
+ // Allocate space for our input stride information.
+ kparams.inEStrides = (uint32_t*)alloca(inLen * sizeof(uint32_t));
- fn(&kparams, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn,
- mtls->fep.eStrideOut);
+ // Fill our stride information.
+ for (int inIndex = inLen; --inIndex >= 0;) {
+ kparams.inEStrides[inIndex] = mtls->fep.inStrides[inIndex].eStride;
}
}
-}
-
-static void wc_x(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
-
- RsExpandKernelParams kparams;
- kparams.takeFields(mtls->fep);
-
- // Used by CpuScriptGroup, IntrinsicBlur, and IntrisicHistogram
- kparams.lid = idx;
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- while (1) {
- uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
- uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
- uint32_t xEnd = xStart + mtls->mSliceSize;
-
- xEnd = rsMin(xEnd, mtls->xEnd);
-
- if (xEnd <= xStart) {
- return;
- }
-
- //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
- //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
- kparams.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
- kparams.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
-
- fn(&kparams, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
- }
+ walk_loop(mtls, kparams, fn);
}
-void RsdCpuReferenceImpl::launchThreads(const Allocation * ain, Allocation * aout,
- const RsScriptCall *sc, MTLaunchStruct *mtls) {
+static void walk_2d(void *usr, uint32_t idx) {
+ walk_wrapper(usr, idx, [](MTLaunchStruct *mtls,
+ RsExpandKernelParams &kparams,
+ outer_foreach_t fn) {
- //android::StopWatch kernel_time("kernel time");
+ while (1) {
+ uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
+ uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
+ uint32_t yEnd = yStart + mtls->mSliceSize;
- if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
- const size_t targetByteChunk = 16 * 1024;
- mInForEach = true;
- if (mtls->fep.dimY > 1) {
- uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4);
- uint32_t s2 = 0;
+ yEnd = rsMin(yEnd, mtls->yEnd);
- // This chooses our slice size to rate limit atomic ops to
- // one per 16k bytes of reads/writes.
- if (mtls->fep.yStrideOut) {
- s2 = targetByteChunk / mtls->fep.yStrideOut;
- } else {
- s2 = targetByteChunk / mtls->fep.yStrideIn;
+ if (yEnd <= yStart) {
+ return;
}
- mtls->mSliceSize = rsMin(s1, s2);
- if(mtls->mSliceSize < 1) {
- mtls->mSliceSize = 1;
- }
+ for (kparams.y = yStart; kparams.y < yEnd; kparams.y++) {
+ kparams.out = mtls->fep.outPtr +
+ (mtls->fep.outStride.yStride * kparams.y) +
+ (mtls->fep.outStride.eStride * mtls->xStart);
- // mtls->mSliceSize = 2;
- launchThreads(wc_xy, mtls);
- } else {
- uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4);
- uint32_t s2 = 0;
+ for (int inIndex = mtls->fep.inLen; --inIndex >= 0;) {
+ StridePair &strides = mtls->fep.inStrides[inIndex];
- // This chooses our slice size to rate limit atomic ops to
- // one per 16k bytes of reads/writes.
- if (mtls->fep.eStrideOut) {
- s2 = targetByteChunk / mtls->fep.eStrideOut;
- } else {
- s2 = targetByteChunk / mtls->fep.eStrideIn;
- }
- mtls->mSliceSize = rsMin(s1, s2);
+ kparams.ins[inIndex] =
+ mtls->fep.inPtrs[inIndex] +
+ (strides.yStride * kparams.y) +
+ (strides.eStride * mtls->xStart);
+ }
- if(mtls->mSliceSize < 1) {
- mtls->mSliceSize = 1;
+ // Kernels now get their input strides from kparams.
+ fn(&kparams, mtls->xStart, mtls->xEnd, 0,
+ mtls->fep.outStride.eStride);
}
-
- launchThreads(wc_x, mtls);
}
- mInForEach = false;
-
- //ALOGE("launch 1");
- } else {
- RsExpandKernelParams kparams;
- kparams.takeFields(mtls->fep);
+ });
+}
- //ALOGE("launch 3");
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- for (uint32_t arrayIndex = mtls->arrayStart;
- arrayIndex < mtls->arrayEnd; arrayIndex++) {
+static void walk_1d(void *usr, uint32_t idx) {
+ walk_wrapper(usr, idx, [](MTLaunchStruct *mtls,
+ RsExpandKernelParams &kparams,
+ outer_foreach_t fn) {
- for (kparams.z = mtls->zStart; kparams.z < mtls->zEnd;
- kparams.z++) {
+ while (1) {
+ uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
+ uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
+ uint32_t xEnd = xStart + mtls->mSliceSize;
- for (kparams.y = mtls->yStart; kparams.y < mtls->yEnd;
- kparams.y++) {
+ xEnd = rsMin(xEnd, mtls->xEnd);
- uint32_t offset =
- kparams.dimY * kparams.dimZ * arrayIndex +
- kparams.dimY * kparams.z + kparams.y;
+ if (xEnd <= xStart) {
+ return;
+ }
- kparams.out = mtls->fep.ptrOut +
- (mtls->fep.yStrideOut * offset) +
- (mtls->fep.eStrideOut * mtls->xStart);
+ kparams.out = mtls->fep.outPtr +
+ (mtls->fep.outStride.eStride * xStart);
- kparams.in = mtls->fep.ptrIn +
- (mtls->fep.yStrideIn * offset) +
- (mtls->fep.eStrideIn * mtls->xStart);
+ for (int inIndex = mtls->fep.inLen; --inIndex >= 0;) {
+ StridePair &strides = mtls->fep.inStrides[inIndex];
- fn(&kparams, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn,
- mtls->fep.eStrideOut);
- }
+ kparams.ins[inIndex] =
+ mtls->fep.inPtrs[inIndex] + (strides.eStride * xStart);
}
+
+ // Kernels now get their input strides from kparams.
+ fn(&kparams, xStart, xEnd, 0, mtls->fep.outStride.eStride);
}
- }
+ });
}
-void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen, Allocation* aout,
- const RsScriptCall* sc, MTLaunchStruct* mtls) {
+
+void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains,
+ uint32_t inLen,
+ Allocation* aout,
+ const RsScriptCall* sc,
+ MTLaunchStruct* mtls) {
//android::StopWatch kernel_time("kernel time");
if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
const size_t targetByteChunk = 16 * 1024;
mInForEach = true;
+
if (mtls->fep.dimY > 1) {
uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4);
uint32_t s2 = 0;
// This chooses our slice size to rate limit atomic ops to
// one per 16k bytes of reads/writes.
- if (mtls->fep.yStrideOut) {
- s2 = targetByteChunk / mtls->fep.yStrideOut;
+ if (mtls->fep.outStride.yStride) {
+ s2 = targetByteChunk / mtls->fep.outStride.yStride;
} else {
- s2 = targetByteChunk / mtls->fep.yStrideIn;
+ // We know that there is either an output or an input.
+ s2 = targetByteChunk / mtls->fep.inStrides[0].yStride;
}
mtls->mSliceSize = rsMin(s1, s2);
@@ -531,18 +485,18 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen,
mtls->mSliceSize = 1;
}
- // mtls->mSliceSize = 2;
- launchThreads(wc_xy, mtls);
+ launchThreads(walk_2d, mtls);
} else {
uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4);
uint32_t s2 = 0;
// This chooses our slice size to rate limit atomic ops to
// one per 16k bytes of reads/writes.
- if (mtls->fep.eStrideOut) {
- s2 = targetByteChunk / mtls->fep.eStrideOut;
+ if (mtls->fep.outStride.eStride) {
+ s2 = targetByteChunk / mtls->fep.outStride.eStride;
} else {
- s2 = targetByteChunk / mtls->fep.eStrideIn;
+ // We know that there is either an output or an input.
+ s2 = targetByteChunk / mtls->fep.inStrides[0].eStride;
}
mtls->mSliceSize = rsMin(s1, s2);
@@ -550,24 +504,26 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen,
mtls->mSliceSize = 1;
}
- launchThreads(wc_x, mtls);
+ launchThreads(walk_1d, mtls);
}
mInForEach = false;
- //ALOGE("launch 1");
} else {
RsExpandKernelParams kparams;
kparams.takeFields(mtls->fep);
- // Allocate space for our input base pointers.
- kparams.ins = new const void*[inLen];
+ if (inLen > 0) {
+ // Allocate space for our input base pointers.
+ kparams.ins = (const void**)alloca(inLen * sizeof(void*));
- // Allocate space for our input stride information.
- kparams.eStrideIns = new uint32_t[inLen];
+ // Allocate space for our input stride information.
+ kparams.inEStrides = (uint32_t*)alloca(inLen * sizeof(uint32_t));
- // Fill our stride information.
- for (int inIndex = inLen; --inIndex >= 0;) {
- kparams.eStrideIns[inIndex] = mtls->fep.inStrides[inIndex].eStride;
+ // Fill our stride information.
+ for (int inIndex = inLen; --inIndex >= 0;) {
+ kparams.inEStrides[inIndex] =
+ mtls->fep.inStrides[inIndex].eStride;
+ }
}
//ALOGE("launch 3");
@@ -585,15 +541,15 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen,
mtls->fep.dimY * mtls->fep.dimZ * arrayIndex +
mtls->fep.dimY * kparams.z + kparams.y;
- kparams.out = mtls->fep.ptrOut +
- (mtls->fep.yStrideOut * offset) +
- (mtls->fep.eStrideOut * mtls->xStart);
+ kparams.out = mtls->fep.outPtr +
+ (mtls->fep.outStride.yStride * offset) +
+ (mtls->fep.outStride.eStride * mtls->xStart);
for (int inIndex = inLen; --inIndex >= 0;) {
StridePair &strides = mtls->fep.inStrides[inIndex];
kparams.ins[inIndex] =
- mtls->fep.ptrIns[inIndex] +
+ mtls->fep.inPtrs[inIndex] +
(strides.yStride * offset) +
(strides.eStride * mtls->xStart);
}
@@ -604,14 +560,10 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen,
* that points to an array.
*/
fn(&kparams, mtls->xStart, mtls->xEnd, 0,
- mtls->fep.eStrideOut);
+ mtls->fep.outStride.eStride);
}
}
}
-
- // Free our arrays.
- delete[] kparams.ins;
- delete[] kparams.eStrideIns;
}
}
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index 5d4b6cc5..2fea3fcd 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -25,6 +25,8 @@
#include <string>
+#define RS_KERNEL_INPUT_THRESHOLD 32
+
namespace bcc {
class BCCContext;
class RSCompilerDriver;
@@ -40,31 +42,36 @@ struct StridePair {
};
struct RsExpandKernelDriverInfo {
- const void *usr;
- uint32_t usrLen;
+ const uint8_t **inPtrs;
+ uint32_t inLen;
+
+ uint8_t *outPtr;
+
+ StridePair *inStrides;
+ StridePair outStride;
uint32_t dimX;
uint32_t dimY;
uint32_t dimZ;
- const uint8_t *ptrIn;
- uint8_t *ptrOut;
- uint32_t eStrideIn;
- uint32_t eStrideOut;
- uint32_t yStrideIn;
- uint32_t yStrideOut;
uint32_t slot;
- const uint8_t** ptrIns;
- StridePair* inStrides;
+ const void *usr;
+ uint32_t usrLen;
- ~RsExpandKernelDriverInfo() {
- if (ptrIns != NULL) {
- delete[] ptrIns;
- }
+ bool heapAllocatedArrays;
- if (inStrides != NULL) {
- delete[] inStrides;
+ RsExpandKernelDriverInfo() : heapAllocatedArrays(false) {}
+
+ ~RsExpandKernelDriverInfo() {
+ if (heapAllocatedArrays) {
+ if (inPtrs != NULL) {
+ delete[] inPtrs;
+ }
+
+ if (inStrides != NULL) {
+ delete[] inStrides;
+ }
}
}
};
@@ -72,15 +79,13 @@ struct RsExpandKernelDriverInfo {
struct RsExpandKernelParams {
// Used by kernels
- const void *in;
+ const void **ins;
+ uint32_t *inEStrides;
void *out;
uint32_t y;
uint32_t z;
uint32_t lid;
- const void **ins;
- uint32_t *eStrideIns;
-
// Used by ScriptGroup and user kernels.
const void *usr;
@@ -115,13 +120,13 @@ typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
class RsdCpuScriptImpl;
class RsdCpuReferenceImpl;
-typedef struct ScriptTLSStructRec {
+struct ScriptTLSStruct {
android::renderscript::Context * mContext;
const android::renderscript::Script * mScript;
RsdCpuScriptImpl *mImpl;
-} ScriptTLSStruct;
+};
-typedef struct {
+struct MTLaunchStruct {
RsExpandKernelDriverInfo fep;
RsdCpuReferenceImpl *rsc;
@@ -129,7 +134,7 @@ typedef struct {
ForEachFunc_t kernel;
uint32_t sig;
- const Allocation * ain;
+ const Allocation ** ains;
Allocation * aout;
uint32_t mSliceSize;
@@ -145,12 +150,9 @@ typedef struct {
uint32_t arrayStart;
uint32_t arrayEnd;
- // Multi-input data.
- const Allocation ** ains;
-} MTLaunchStruct;
-
-
-
+ const uint8_t *inPtrsBuff[RS_KERNEL_INPUT_THRESHOLD];
+ StridePair inStridesBuff[RS_KERNEL_INPUT_THRESHOLD];
+};
class RsdCpuReferenceImpl : public RsdCpuReference {
public:
@@ -171,9 +173,6 @@ public:
return mWorkers.mCount + 1;
}
- void launchThreads(const Allocation * ain, Allocation * aout,
- const RsScriptCall *sc, MTLaunchStruct *mtls);
-
void launchThreads(const Allocation** ains, uint32_t inLen, Allocation* aout,
const RsScriptCall* sc, MTLaunchStruct* mtls);
diff --git a/cpu_ref/rsCpuIntrinsic.cpp b/cpu_ref/rsCpuIntrinsic.cpp
index 5a7fffd5..8437c998 100644
--- a/cpu_ref/rsCpuIntrinsic.cpp
+++ b/cpu_ref/rsCpuIntrinsic.cpp
@@ -73,54 +73,29 @@ void RsdCpuScriptIntrinsic::invokeFreeChildren() {
}
-void RsdCpuScriptIntrinsic::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc) {
+void RsdCpuScriptIntrinsic::preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {
}
-void RsdCpuScriptIntrinsic::postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc) {
+void RsdCpuScriptIntrinsic::postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {
}
void RsdCpuScriptIntrinsic::invokeForEach(uint32_t slot,
- const Allocation * ain,
+ const Allocation ** ains,
+ uint32_t inLen,
Allocation * aout,
const void * usr,
uint32_t usrLen,
const RsScriptCall *sc) {
MTLaunchStruct mtls;
- preLaunch(slot, ain, aout, usr, usrLen, sc);
- forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
- mtls.script = this;
- mtls.fep.slot = slot;
-
- mtls.kernel = (void (*)())mRootPtr;
- mtls.fep.usr = this;
-
- RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
- mCtx->launchThreads(ain, aout, sc, &mtls);
- mCtx->setTLS(oldTLS);
-
- postLaunch(slot, ain, aout, usr, usrLen, sc);
-}
-
-void RsdCpuScriptIntrinsic::invokeForEachMulti(uint32_t slot,
- const Allocation ** ains,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) {
-
- MTLaunchStruct mtls;
- /*
- * FIXME: Possibly create new preLaunch and postLaunch functions that take
- * all of the input allocation pointers.
- */
- preLaunch(slot, ains[0], aout, usr, usrLen, sc);
+ preLaunch(slot, ains, inLen, aout, usr, usrLen, sc);
forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
mtls.script = this;
@@ -133,7 +108,7 @@ void RsdCpuScriptIntrinsic::invokeForEachMulti(uint32_t slot,
mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
mCtx->setTLS(oldTLS);
- postLaunch(slot, ains[0], aout, usr, usrLen, sc);
+ postLaunch(slot, ains, inLen, aout, usr, usrLen, sc);
}
void RsdCpuScriptIntrinsic::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
diff --git a/cpu_ref/rsCpuIntrinsic.h b/cpu_ref/rsCpuIntrinsic.h
index bf6a8acd..95aaa141 100644
--- a/cpu_ref/rsCpuIntrinsic.h
+++ b/cpu_ref/rsCpuIntrinsic.h
@@ -28,43 +28,42 @@ class RsdCpuScriptIntrinsic : public RsdCpuScriptImpl {
public:
virtual void populateScript(Script *) = 0;
- virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength);
+ virtual void invokeFunction(uint32_t slot, const void * params,
+ size_t paramLength);
virtual int invokeRoot();
+
virtual void invokeForEach(uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc);
-
- virtual void invokeForEachMulti(uint32_t slot,
- const Allocation ** ain,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc);
-
- virtual void forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls);
+ const Allocation ** ain,
+ uint32_t inLen,
+ Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc);
+
+ virtual void forEachKernelSetup(uint32_t slot, MTLaunchStruct * mtls);
virtual void invokeInit();
virtual void invokeFreeChildren();
- virtual void preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc);
- virtual void postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc);
-
- virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength);
- virtual void setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
- const Element *e, const uint32_t *dims, size_t dimLength);
+ virtual void preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout, const void * usr,
+ uint32_t usrLen, const RsScriptCall * sc);
+ virtual void postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall * sc);
+
+ virtual void setGlobalVar(uint32_t slot, const void * data,
+ size_t dataLength);
+ virtual void setGlobalVarWithElemDims(uint32_t slot, const void * data,
+ size_t dataLength, const Element * e,
+ const uint32_t * dims,
+ size_t dimLength);
virtual void setGlobalBind(uint32_t slot, Allocation *data);
virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
virtual ~RsdCpuScriptIntrinsic();
- RsdCpuScriptIntrinsic(RsdCpuReferenceImpl *ctx, const Script *s, const Element *,
- RsScriptIntrinsicID iid);
+ RsdCpuScriptIntrinsic(RsdCpuReferenceImpl * ctx, const Script * s,
+ const Element * e, RsScriptIntrinsicID iid);
protected:
RsScriptIntrinsicID mID;
diff --git a/cpu_ref/rsCpuIntrinsic3DLUT.cpp b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
index c839c19d..a19d8851 100644
--- a/cpu_ref/rsCpuIntrinsic3DLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
@@ -64,7 +64,7 @@ void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelParams *p,
RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)p->usr;
uchar4 *out = (uchar4 *)p->out + xstart;
- uchar4 *in = (uchar4 *)p->in + xstart;
+ uchar4 *in = (uchar4 *)p->ins[0] + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -161,9 +161,9 @@ void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelParams *p,
}
}
-RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl *ctx,
- const Script *s, const Element *e)
- : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) {
+RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT(
+ RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) :
+ RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) {
mRootPtr = &kernel;
}
@@ -185,5 +185,3 @@ RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
return new RsdCpuScriptIntrinsic3DLUT(ctx, s, e);
}
-
-
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index b6046584..0378e076 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -117,7 +117,7 @@ void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelParams *p,
// instep/outstep can be ignored--sizeof(uchar4) known at compile time
uchar4 *out = (uchar4 *)p->out;
- uchar4 *in = (uchar4 *)p->in;
+ uchar4 *in = (uchar4 *)p->ins[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -509,6 +509,3 @@ RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
const Script *s, const Element *e) {
return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
}
-
-
-
diff --git a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
index bf78eb3e..4e90ad72 100644
--- a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
+++ b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
@@ -169,10 +169,9 @@ public:
virtual ~RsdCpuScriptIntrinsicColorMatrix();
RsdCpuScriptIntrinsicColorMatrix(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
- virtual void preLaunch(uint32_t slot, const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen, const RsScriptCall *sc);
- virtual void postLaunch(uint32_t slot, const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen, const RsScriptCall *sc);
+ virtual void preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout, const void * usr,
+ uint32_t usrLen, const RsScriptCall *sc);
protected:
float fp[16];
@@ -883,8 +882,13 @@ void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr;
- uchar *out = (uchar *)p->out + outstep * xstart;
- uchar *in = (uchar *)p->in + instep * xstart;
+
+ // Update the instep due to change in parameter passing.
+ instep = p->inEStrides[0];
+
+ uchar *out = (uchar *)p->out + outstep * xstart;
+ uchar *in = (uchar *)p->ins[0] + instep * xstart;
+
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -932,11 +936,15 @@ void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelParams *p,
}
}
-void RsdCpuScriptIntrinsicColorMatrix::preLaunch(
- uint32_t slot, const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen, const RsScriptCall *sc) {
+void RsdCpuScriptIntrinsicColorMatrix::preLaunch(uint32_t slot,
+ const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc) {
- const Element *ein = ain->mHal.state.type->getElement();
+ const Element *ein = ains[0]->mHal.state.type->getElement();
const Element *eout = aout->mHal.state.type->getElement();
if (ein->getType() == eout->getType()) {
@@ -953,8 +961,8 @@ void RsdCpuScriptIntrinsicColorMatrix::preLaunch(
}
}
- Key_t key = computeKey(ain->mHal.state.type->getElement(),
- aout->mHal.state.type->getElement());
+ Key_t key = computeKey(ein, eout);
+
#if defined(ARCH_X86_HAVE_SSSE3)
if ((mOptKernel == NULL) || (mLastKey.key != key.key)) {
// FIXME: Disable mOptKernel to pass RS color matrix CTS cases
@@ -996,12 +1004,6 @@ void RsdCpuScriptIntrinsicColorMatrix::preLaunch(
#endif //if !defined(ARCH_X86_HAVE_SSSE3)
}
-void RsdCpuScriptIntrinsicColorMatrix::postLaunch(
- uint32_t slot, const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen, const RsScriptCall *sc) {
-
-}
-
RsdCpuScriptIntrinsicColorMatrix::RsdCpuScriptIntrinsicColorMatrix(
RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
: RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX) {
diff --git a/cpu_ref/rsCpuIntrinsicHistogram.cpp b/cpu_ref/rsCpuIntrinsicHistogram.cpp
index 1c430b72..b5dbfa80 100644
--- a/cpu_ref/rsCpuIntrinsicHistogram.cpp
+++ b/cpu_ref/rsCpuIntrinsicHistogram.cpp
@@ -36,10 +36,10 @@ public:
RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
protected:
- void preLaunch(uint32_t slot, const Allocation * ain,
+ void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
Allocation * aout, const void * usr,
uint32_t usrLen, const RsScriptCall *sc);
- void postLaunch(uint32_t slot, const Allocation * ain,
+ void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
Allocation * aout, const void * usr,
uint32_t usrLen, const RsScriptCall *sc);
@@ -97,9 +97,12 @@ void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *dat
-void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc) {
+void
+RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot,
+ const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {
const uint32_t threads = mCtx->getThreadCount();
uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
@@ -123,7 +126,7 @@ void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation *
}
break;
case 1:
- switch(ain->getType()->getElement()->getVectorSize()) {
+ switch(ains[0]->getType()->getElement()->getVectorSize()) {
case 1:
mRootPtr = &kernelP1L1;
break;
@@ -142,9 +145,12 @@ void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation *
memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
}
-void RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc) {
+void
+RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot,
+ const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {
unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
uint32_t threads = mCtx->getThreadCount();
@@ -165,7 +171,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * 4 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
@@ -173,7 +179,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p,
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
sums[(in[3] << 2) + 3] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
@@ -182,14 +188,14 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelParams *p,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * 4 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[(in[0] << 2) ] ++;
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
@@ -198,13 +204,13 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelParams *p,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * 2 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[(in[0] << 1) ] ++;
sums[(in[1] << 1) + 1] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
@@ -213,7 +219,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
@@ -222,7 +228,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p,
(cp->mDotI[2] * in[2]) +
(cp->mDotI[3] * in[3]);
sums[(t + 0x7f) >> 8] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
@@ -231,7 +237,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
@@ -239,7 +245,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p,
(cp->mDotI[1] * in[1]) +
(cp->mDotI[2] * in[2]);
sums[(t + 0x7f) >> 8] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
@@ -248,14 +254,14 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelParams *p,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]) +
(cp->mDotI[1] * in[1]);
sums[(t + 0x7f) >> 8] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
@@ -264,13 +270,13 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelParams *p,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]);
sums[(t + 0x7f) >> 8] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
@@ -279,12 +285,12 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelParams *p,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[in[0]] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
@@ -323,5 +329,3 @@ RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script
return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
}
-
-
diff --git a/cpu_ref/rsCpuIntrinsicLUT.cpp b/cpu_ref/rsCpuIntrinsicLUT.cpp
index db73a838..9d3b4003 100644
--- a/cpu_ref/rsCpuIntrinsicLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsicLUT.cpp
@@ -59,7 +59,7 @@ void RsdCpuScriptIntrinsicLUT::kernel(const RsExpandKernelParams *p,
RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)p->usr;
uchar *out = (uchar *)p->out;
- const uchar *in = (uchar *)p->in;
+ const uchar *in = (uchar *)p->ins[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -103,5 +103,3 @@ RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx,
return new RsdCpuScriptIntrinsicLUT(ctx, s, e);
}
-
-
diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp
index af1127e7..3a307d63 100644
--- a/cpu_ref/rsCpuIntrinsicResize.cpp
+++ b/cpu_ref/rsCpuIntrinsicResize.cpp
@@ -35,8 +35,8 @@ public:
virtual ~RsdCpuScriptIntrinsicResize();
RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
- virtual void preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
+ virtual void preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout, const void * usr,
uint32_t usrLen, const RsScriptCall *sc);
float scaleX;
@@ -308,9 +308,11 @@ RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
}
-void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
+void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
+ const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc)
{
if (!mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -351,5 +353,3 @@ RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s
return new RsdCpuScriptIntrinsicResize(ctx, s, e);
}
-
-
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index a11fda19..05984207 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -789,144 +789,33 @@ void RsdCpuScriptImpl::populateScript(Script *script) {
typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
-void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout,
+void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
const void * usr, uint32_t usrLen,
const RsScriptCall *sc,
MTLaunchStruct *mtls) {
memset(mtls, 0, sizeof(MTLaunchStruct));
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
- return;
- }
- if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
- return;
- }
-
- if (ain != NULL) {
- const Type *inType = ain->getType();
+ for (int index = inLen; --index >= 0;) {
+ const Allocation* ain = ains[index];
- mtls->fep.dimX = inType->getDimX();
- mtls->fep.dimY = inType->getDimY();
- mtls->fep.dimZ = inType->getDimZ();
-
- } else if (aout != NULL) {
- const Type *outType = aout->getType();
-
- mtls->fep.dimX = outType->getDimX();
- mtls->fep.dimY = outType->getDimY();
- mtls->fep.dimZ = outType->getDimZ();
-
- } else {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
- return;
- }
-
- if (ain != NULL && aout != NULL) {
- if (!ain->hasSameDims(aout)) {
+ // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
+ if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; dimensions of input and output allocations do not match.");
-
+ "rsForEach called with null in allocations");
return;
}
}
- if (!sc || (sc->xEnd == 0)) {
- mtls->xEnd = mtls->fep.dimX;
- } else {
- rsAssert(sc->xStart < mtls->fep.dimX);
- rsAssert(sc->xEnd <= mtls->fep.dimX);
- rsAssert(sc->xStart < sc->xEnd);
- mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
- mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
- if (mtls->xStart >= mtls->xEnd) return;
- }
-
- if (!sc || (sc->yEnd == 0)) {
- mtls->yEnd = mtls->fep.dimY;
- } else {
- rsAssert(sc->yStart < mtls->fep.dimY);
- rsAssert(sc->yEnd <= mtls->fep.dimY);
- rsAssert(sc->yStart < sc->yEnd);
- mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
- mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
- if (mtls->yStart >= mtls->yEnd) return;
- }
-
- if (!sc || (sc->zEnd == 0)) {
- mtls->zEnd = mtls->fep.dimZ;
- } else {
- rsAssert(sc->zStart < mtls->fep.dimZ);
- rsAssert(sc->zEnd <= mtls->fep.dimZ);
- rsAssert(sc->zStart < sc->zEnd);
- mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart);
- mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd);
- if (mtls->zStart >= mtls->zEnd) return;
- }
-
- mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
- mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
- mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
- mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
-
- rsAssert(!ain || (ain->getType()->getDimZ() == 0));
-
- mtls->rsc = mCtx;
- mtls->ain = ain;
- mtls->aout = aout;
- mtls->fep.usr = usr;
- mtls->fep.usrLen = usrLen;
- mtls->mSliceSize = 1;
- mtls->mSliceNum = 0;
-
- mtls->fep.ptrIn = NULL;
- mtls->fep.eStrideIn = 0;
- mtls->isThreadable = mIsThreadable;
-
- if (ain) {
- mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
- mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride;
- }
-
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
- }
-}
-
-void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen,
- Allocation * aout,
- const void * usr, uint32_t usrLen,
- const RsScriptCall *sc,
- MTLaunchStruct *mtls) {
-
- memset(mtls, 0, sizeof(MTLaunchStruct));
-
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ains != NULL) {
- for (int index = inLen; --index >= 0;) {
- const Allocation* ain = ains[index];
-
- if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
- return;
- }
- }
- }
-
if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null out allocations");
return;
}
- if (ains != NULL) {
+ if (inLen > 0) {
const Allocation *ain0 = ains[0];
const Type *inType = ain0->getType();
@@ -951,11 +840,12 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->fep.dimZ = outType->getDimZ();
} else {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null allocations");
return;
}
- if (ains != NULL && aout != NULL) {
+ if (inLen > 0 && aout != NULL) {
if (!ains[0]->hasSameDims(aout)) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
"Failed to launch kernel; dimensions of input and output allocations do not match.");
@@ -1002,7 +892,7 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
- rsAssert(!ains || (ains[0]->getType()->getDimZ() == 0));
+ rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0));
mtls->rsc = mCtx;
mtls->ains = ains;
@@ -1012,18 +902,28 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->mSliceSize = 1;
mtls->mSliceNum = 0;
- mtls->fep.ptrIns = NULL;
- mtls->fep.eStrideIn = 0;
+ mtls->fep.inPtrs = NULL;
+ mtls->fep.inStrides = NULL;
mtls->isThreadable = mIsThreadable;
- if (ains) {
- mtls->fep.ptrIns = new const uint8_t*[inLen];
- mtls->fep.inStrides = new StridePair[inLen];
+ if (inLen > 0) {
+
+ if (inLen <= RS_KERNEL_INPUT_THRESHOLD) {
+ mtls->fep.inPtrs = (const uint8_t**)mtls->inPtrsBuff;
+ mtls->fep.inStrides = mtls->inStridesBuff;
+ } else {
+ mtls->fep.heapAllocatedArrays = true;
+
+ mtls->fep.inPtrs = new const uint8_t*[inLen];
+ mtls->fep.inStrides = new StridePair[inLen];
+ }
+
+ mtls->fep.inLen = inLen;
for (int index = inLen; --index >= 0;) {
const Allocation *ain = ains[index];
- mtls->fep.ptrIns[index] =
+ mtls->fep.inPtrs[index] =
(const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr;
mtls->fep.inStrides[index].eStride =
@@ -1033,41 +933,27 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
}
}
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
+ mtls->fep.outPtr = NULL;
+ mtls->fep.outStride.eStride = 0;
+ mtls->fep.outStride.yStride = 0;
+ if (aout != NULL) {
+ mtls->fep.outPtr = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
+
+ mtls->fep.outStride.eStride = aout->getType()->getElementSizeBytes();
+ mtls->fep.outStride.yStride = aout->mHal.drvState.lod[0].stride;
}
}
void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
- const Allocation * ain,
+ const Allocation ** ains,
+ uint32_t inLen,
Allocation * aout,
const void * usr,
uint32_t usrLen,
const RsScriptCall *sc) {
MTLaunchStruct mtls;
- forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
- forEachKernelSetup(slot, &mtls);
-
- RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
- mCtx->launchThreads(ain, aout, sc, &mtls);
- mCtx->setTLS(oldTLS);
-}
-
-void RsdCpuScriptImpl::invokeForEachMulti(uint32_t slot,
- const Allocation ** ains,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) {
-
- MTLaunchStruct mtls;
forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
forEachKernelSetup(slot, &mtls);
@@ -1338,17 +1224,15 @@ Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
return NULL;
}
-void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
-{
-}
+void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {}
-void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
-{
-}
+void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {}
}
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index d51e9e3f..f0843cc0 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -64,26 +64,22 @@ public:
virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength);
virtual int invokeRoot();
- virtual void preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
+ virtual void preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout, const void * usr,
uint32_t usrLen, const RsScriptCall *sc);
- virtual void postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc);
+ virtual void postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc);
+
virtual void invokeForEach(uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc);
-
- virtual void invokeForEachMulti(uint32_t slot,
- const Allocation** ains,
- uint32_t inLen,
- Allocation* aout,
- const void* usr,
- uint32_t usrLen,
- const RsScriptCall* sc);
+ const Allocation ** ains,
+ uint32_t inLen,
+ Allocation* aout,
+ const void* usr,
+ uint32_t usrLen,
+ const RsScriptCall* sc);
+
virtual void invokeInit();
virtual void invokeFreeChildren();
@@ -100,10 +96,6 @@ public:
const Script * getScript() {return mScript;}
- void forEachMtlsSetup(const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen,
- const RsScriptCall *sc, MTLaunchStruct *mtls);
-
void forEachMtlsSetup(const Allocation ** ains, uint32_t inLen,
Allocation * aout, const void * usr, uint32_t usrLen,
const RsScriptCall *sc, MTLaunchStruct *mtls);
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 08785523..20ee09db 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -53,38 +53,45 @@ void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
uint32_t instep, uint32_t outstep) {
- const ScriptList *sl = (const ScriptList *)kparams->usr;
+ const ScriptList *sl = (const ScriptList *)kparams->usr;
RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
+ const void **oldIns = mkparams->ins;
+ uint32_t *oldStrides = mkparams->inEStrides;
+
+ void *localIns[1];
+ uint32_t localStride[1];
+
+ mkparams->ins = (const void**)localIns;
+ mkparams->inEStrides = localStride;
+
for (size_t ct = 0; ct < sl->count; ct++) {
ScriptGroupRootFunc_t func;
func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
mkparams->usr = sl->usrPtrs[ct];
- mkparams->in = NULL;
- mkparams->out = NULL;
-
- uint32_t istep = 0;
- uint32_t ostep = 0;
-
if (sl->ins[ct]) {
- mkparams->in =
- (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+ localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
- istep = sl->ins[ct]->mHal.state.elementSizeBytes;
+ localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
if (sl->inExts[ct]) {
- mkparams->in =
- (const uint8_t *)mkparams->in +
- sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y;
+ localIns[0] = (void*)
+ ((const uint8_t *)localIns[0] +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y);
} else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
- mkparams->in =
- (const uint8_t *)mkparams->in +
- sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid;
+ localIns[0] = (void*)
+ ((const uint8_t *)localIns[0] +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid);
}
+
+ } else {
+ localIns[0] = NULL;
+ localStride[0] = 0;
}
+ uint32_t ostep;
if (sl->outs[ct]) {
mkparams->out =
(uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
@@ -101,14 +108,23 @@ void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
(uint8_t *)mkparams->out +
sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
}
+ } else {
+ mkparams->out = NULL;
+ ostep = 0;
}
//ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
- func(kparams, xstart, xend, istep, ostep);
+ /*
+ * The fourth argument is zero here because kernels get their stride
+ * information from a member of p that points to an array.
+ */
+ func(kparams, xstart, xend, 0, ostep);
}
//ALOGE("script group root");
- mkparams->usr = sl;
+ mkparams->ins = oldIns;
+ mkparams->inEStrides = oldStrides;
+ mkparams->usr = sl;
}
@@ -195,17 +211,33 @@ void CpuScriptGroupImpl::execute() {
MTLaunchStruct mtls;
- if(fieldDep) {
+ if (fieldDep) {
for (size_t ct=0; ct < ins.size(); ct++) {
Script *s = kernels[ct]->mScript;
RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
uint32_t slot = kernels[ct]->mSlot;
- si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
+ uint32_t inLen;
+ const Allocation **ains;
+
+ if (ins[ct] == NULL) {
+ inLen = 0;
+ ains = NULL;
+
+ } else {
+ inLen = 1;
+ ains = const_cast<const Allocation**>(&ins[ct]);
+ }
+
+ si->forEachMtlsSetup(ains, inLen, outs[ct], NULL, 0, NULL, &mtls);
+
si->forEachKernelSetup(slot, &mtls);
- si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
- mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
- si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
+ si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
+ mtls.fep.usrLen, NULL);
+
+ mCtx->launchThreads(ains, inLen, outs[ct], NULL, &mtls);
+
+ si->postLaunch(slot, ains, inLen, outs[ct], NULL, 0, NULL);
}
} else {
ScriptList sl;
@@ -214,6 +246,18 @@ void CpuScriptGroupImpl::execute() {
sl.kernels = kernels.array();
sl.count = kernels.size();
+ uint32_t inLen;
+ const Allocation **ains;
+
+ if (ins[0] == NULL) {
+ inLen = 0;
+ ains = NULL;
+
+ } else {
+ inLen = 1;
+ ains = const_cast<const Allocation**>(&ins[0]);
+ }
+
Vector<const void *> usrPtrs;
Vector<const void *> fnPtrs;
Vector<uint32_t> sigs;
@@ -225,7 +269,8 @@ void CpuScriptGroupImpl::execute() {
fnPtrs.add((void *)mtls.kernel);
usrPtrs.add(mtls.fep.usr);
sigs.add(mtls.fep.usrLen);
- si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
+ si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
+ mtls.fep.usr, mtls.fep.usrLen, NULL);
}
sl.sigs = sigs.array();
sl.usrPtrs = usrPtrs.array();
@@ -235,16 +280,20 @@ void CpuScriptGroupImpl::execute() {
Script *s = kernels[0]->mScript;
RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
- si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
+
+ si->forEachMtlsSetup(ains, inLen, outs[0], NULL, 0, NULL, &mtls);
+
mtls.script = NULL;
mtls.kernel = (void (*)())&scriptGroupRoot;
mtls.fep.usr = &sl;
- mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
+
+ mCtx->launchThreads(ains, inLen, outs[0], NULL, &mtls);
for (size_t ct=0; ct < kernels.size(); ct++) {
Script *s = kernels[ct]->mScript;
RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
- si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
+ si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], NULL, 0,
+ NULL);
}
}
}
diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h
index 0076cb98..4728b7c8 100644
--- a/cpu_ref/rsd_cpu.h
+++ b/cpu_ref/rsd_cpu.h
@@ -69,21 +69,15 @@ public:
virtual void populateScript(Script *) = 0;
virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength) = 0;
virtual int invokeRoot() = 0;
+
virtual void invokeForEach(uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) = 0;
-
- virtual void invokeForEachMulti(uint32_t slot,
- const Allocation** ains,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) = 0;
-
+ const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc) = 0;
+
virtual void invokeInit() = 0;
virtual void invokeFreeChildren() = 0;
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index 27029cf1..b7c7f2e5 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -43,8 +43,9 @@ bool rsdScriptInit(const Context *rsc,
size_t bitcodeSize,
uint32_t flags) {
RsdHal *dc = (RsdHal *)rsc->mHal.drv;
- RsdCpuReference::CpuScript * cs = dc->mCpuRef->createScript(script, resName, cacheDir,
- bitcode, bitcodeSize, flags);
+ RsdCpuReference::CpuScript * cs =
+ dc->mCpuRef->createScript(script, resName, cacheDir, bitcode,
+ bitcodeSize, flags);
if (cs == NULL) {
return false;
}
@@ -53,7 +54,8 @@ bool rsdScriptInit(const Context *rsc,
return true;
}
-bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) {
+bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid,
+ Element *e) {
RsdHal *dc = (RsdHal *)rsc->mHal.drv;
RsdCpuReference::CpuScript * cs = dc->mCpuRef->createIntrinsic(s, iid, e);
if (cs == NULL) {
@@ -73,8 +75,15 @@ void rsdScriptInvokeForEach(const Context *rsc,
size_t usrLen,
const RsScriptCall *sc) {
- RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
- cs->invokeForEach(slot, ain, aout, usr, usrLen, sc);
+ if (ain == NULL) {
+ rsdScriptInvokeForEachMulti(rsc, s, slot, NULL, 0, aout, usr, usrLen,
+ sc);
+ } else {
+ const Allocation *ains[1] = {ain};
+
+ rsdScriptInvokeForEachMulti(rsc, s, slot, ains, 1, aout, usr, usrLen,
+ sc);
+ }
}
void rsdScriptInvokeForEachMulti(const Context *rsc,
@@ -88,7 +97,7 @@ void rsdScriptInvokeForEachMulti(const Context *rsc,
const RsScriptCall *sc) {
RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
- cs->invokeForEachMulti(slot, ains, inLen, aout, usr, usrLen, sc);
+ cs->invokeForEach(slot, ains, inLen, aout, usr, usrLen, sc);
}
diff --git a/rsRuntime.h b/rsRuntime.h
index eb93e252..5a058830 100644
--- a/rsRuntime.h
+++ b/rsRuntime.h
@@ -158,7 +158,7 @@ void rsrForEach(Context *, Script *target,
Allocation *in,
Allocation *out,
const void *usr,
- uint32_t usrBytes,
+ uint32_t usrBytes,
const RsScriptCall *call);
diff --git a/rsScript.cpp b/rsScript.cpp
index ea1b3ac9..a4fa1966 100644
--- a/rsScript.cpp
+++ b/rsScript.cpp
@@ -187,23 +187,13 @@ void rsi_ScriptSetTimeZone(Context * rsc, RsScript vs, const char * timeZone, si
free(tz);
}
-void rsi_ScriptForEach(Context *rsc, RsScript vs, uint32_t slot,
- RsAllocation vain, RsAllocation vaout,
- const void *params, size_t paramLen,
- const RsScriptCall *sc, size_t scLen) {
- Script *s = static_cast<Script *>(vs);
- s->runForEach(rsc, slot,
- static_cast<const Allocation *>(vain), static_cast<Allocation *>(vaout),
- params, paramLen, sc);
-
-}
-
void rsi_ScriptForEachMulti(Context *rsc, RsScript vs, uint32_t slot,
RsAllocation *vains, size_t inLen,
RsAllocation vaout, const void *params,
size_t paramLen, const RsScriptCall *sc,
size_t scLen) {
- Script *s = static_cast<Script *>(vs);
+
+ Script *s = static_cast<Script *>(vs);
Allocation **ains = (Allocation**)(vains);
s->runForEach(rsc, slot,
@@ -212,6 +202,23 @@ void rsi_ScriptForEachMulti(Context *rsc, RsScript vs, uint32_t slot,
}
+void rsi_ScriptForEach(Context *rsc, RsScript vs, uint32_t slot,
+ RsAllocation vain, RsAllocation vaout,
+ const void *params, size_t paramLen,
+ const RsScriptCall *sc, size_t scLen) {
+
+ if (vain == NULL) {
+ rsi_ScriptForEachMulti(rsc, vs, slot, NULL, 0, vaout, params, paramLen,
+ sc, scLen);
+ } else {
+ RsAllocation ains[1] = {vain};
+
+ rsi_ScriptForEachMulti(rsc, vs, slot, ains,
+ sizeof(ains) / sizeof(RsAllocation), vaout,
+ params, paramLen, sc, scLen);
+ }
+}
+
void rsi_ScriptInvoke(Context *rsc, RsScript vs, uint32_t slot) {
Script *s = static_cast<Script *>(vs);
s->Invoke(rsc, slot, NULL, 0);
diff --git a/rsScript.h b/rsScript.h
index 1ad013f9..2e232f0f 100644
--- a/rsScript.h
+++ b/rsScript.h
@@ -108,17 +108,9 @@ public:
virtual bool freeChildren();
- virtual void runForEach(Context *rsc,
- uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- size_t usrBytes,
- const RsScriptCall *sc = NULL) = 0;
-
virtual void runForEach(Context* rsc,
uint32_t slot,
- const Allocation** ains,
+ const Allocation ** ains,
size_t inLen,
Allocation* aout,
const void* usr,
diff --git a/rsScriptC.cpp b/rsScriptC.cpp
index e7ff8c7d..892807bd 100644
--- a/rsScriptC.cpp
+++ b/rsScriptC.cpp
@@ -156,36 +156,6 @@ uint32_t ScriptC::run(Context *rsc) {
void ScriptC::runForEach(Context *rsc,
uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- size_t usrBytes,
- const RsScriptCall *sc) {
- // Trace this function call.
- // To avoid overhead, we only build the string, if tracing is actually
- // enabled.
- String8 *AString = NULL;
- const char *String = "";
- if (ATRACE_ENABLED()) {
- AString = new String8("runForEach_");
- AString->append(mHal.info.exportedForeachFuncList[slot].first);
- String = AString->string();
- }
- ATRACE_NAME(String);
- (void)String;
-
- Context::PushState ps(rsc);
-
- setupGLState(rsc);
- setupScript(rsc);
- rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ain, aout, usr, usrBytes, sc);
-
- if (AString)
- delete AString;
-}
-
-void ScriptC::runForEach(Context *rsc,
- uint32_t slot,
const Allocation ** ains,
size_t inLen,
Allocation * aout,
@@ -210,10 +180,22 @@ void ScriptC::runForEach(Context *rsc,
setupGLState(rsc);
setupScript(rsc);
- rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen, aout, usr, usrBytes, sc);
+ if (rsc->mHal.funcs.script.invokeForEachMulti != NULL) {
+ rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen,
+ aout, usr, usrBytes, sc);
+
+ } else if (inLen == 1) {
+ rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ains[0], aout,
+ usr, usrBytes, sc);
- if (AString)
+ } else {
+ rsc->setError(RS_ERROR_FATAL_DRIVER,
+ "Driver support for multi-input not present");
+ }
+
+ if (AString) {
delete AString;
+ }
}
void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) {
diff --git a/rsScriptC.h b/rsScriptC.h
index d3d9d513..5735bea3 100644
--- a/rsScriptC.h
+++ b/rsScriptC.h
@@ -44,14 +44,6 @@ public:
virtual void runForEach(Context *rsc,
uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- size_t usrBytes,
- const RsScriptCall *sc = NULL);
-
- virtual void runForEach(Context *rsc,
- uint32_t slot,
const Allocation ** ains,
size_t inLen,
Allocation * aout,
diff --git a/rsScriptC_Lib.cpp b/rsScriptC_Lib.cpp
index a41f4a79..cacb37ad 100644
--- a/rsScriptC_Lib.cpp
+++ b/rsScriptC_Lib.cpp
@@ -230,7 +230,17 @@ void rsrForEach(Context *rsc,
Allocation *in, Allocation *out,
const void *usr, uint32_t usrBytes,
const RsScriptCall *call) {
- target->runForEach(rsc, /* root slot */ 0, in, out, usr, usrBytes, call);
+
+ if (in == NULL) {
+ target->runForEach(rsc, /* root slot */ 0, NULL, 0, out, usr,
+ usrBytes, call);
+
+ } else {
+ const Allocation *ins[1] = {in};
+ target->runForEach(rsc, /* root slot */ 0, ins,
+ sizeof(ins) / sizeof(RsAllocation), out, usr,
+ usrBytes, call);
+ }
}
void rsrAllocationSyncAll(Context *rsc, Allocation *a, RsAllocationUsageType usage) {
diff --git a/rsScriptGroup.cpp b/rsScriptGroup.cpp
index d1dd9d81..a03cb786 100644
--- a/rsScriptGroup.cpp
+++ b/rsScriptGroup.cpp
@@ -346,7 +346,15 @@ void ScriptGroup::execute(Context *rsc) {
}
}
- n->mScript->runForEach(rsc, k->mSlot, ain, aout, NULL, 0);
+ if (ain == NULL) {
+ n->mScript->runForEach(rsc, k->mSlot, NULL, 0, aout, NULL, 0);
+
+ } else {
+ const Allocation *ains[1] = {ain};
+ n->mScript->runForEach(rsc, k->mSlot, ains,
+ sizeof(ains) / sizeof(RsAllocation),
+ aout, NULL, 0);
+ }
}
}
@@ -409,4 +417,3 @@ void rsi_ScriptGroupExecute(Context *rsc, RsScriptGroup sg) {
}
}
-
diff --git a/rsScriptIntrinsic.cpp b/rsScriptIntrinsic.cpp
index 86f1c504..7461d34a 100644
--- a/rsScriptIntrinsic.cpp
+++ b/rsScriptIntrinsic.cpp
@@ -55,18 +55,6 @@ uint32_t ScriptIntrinsic::run(Context *rsc) {
return 0;
}
-
-void ScriptIntrinsic::runForEach(Context *rsc,
- uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- size_t usrBytes,
- const RsScriptCall *sc) {
-
- rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ain, aout, usr, usrBytes, sc);
-}
-
void ScriptIntrinsic::runForEach(Context* rsc,
uint32_t slot,
const Allocation** ains,
@@ -76,7 +64,18 @@ void ScriptIntrinsic::runForEach(Context* rsc,
size_t usrBytes,
const RsScriptCall* sc) {
- rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen, aout, usr, usrBytes, sc);
+ if (rsc->mHal.funcs.script.invokeForEachMulti != NULL) {
+ rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen,
+ aout, usr, usrBytes, sc);
+
+ } else if (inLen == 1) {
+ rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ains[0], aout,
+ usr, usrBytes, sc);
+
+ } else {
+ rsc->setError(RS_ERROR_FATAL_DRIVER,
+ "Driver support for multi-input not present");
+ }
}
void ScriptIntrinsic::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) {
@@ -107,5 +106,3 @@ RsScript rsi_ScriptIntrinsicCreate(Context *rsc, uint32_t id, RsElement ve) {
}
}
-
-
diff --git a/rsScriptIntrinsic.h b/rsScriptIntrinsic.h
index 66b60318..87b73539 100644
--- a/rsScriptIntrinsic.h
+++ b/rsScriptIntrinsic.h
@@ -40,17 +40,9 @@ public:
virtual RsA3DClassID getClassId() const;
virtual bool freeChildren();
- virtual void runForEach(Context *rsc,
- uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- size_t usrBytes,
- const RsScriptCall *sc = NULL);
-
virtual void runForEach(Context* rsc,
uint32_t slot,
- const Allocation** ains,
+ const Allocation ** ains,
size_t inLen,
Allocation* aout,
const void* usr,
@@ -69,5 +61,3 @@ protected:
}
}
#endif
-
-