diff options
-rw-r--r-- | cpu_ref/Android.mk | 3 | ||||
-rw-r--r-- | cpu_ref/rsCpuCore.cpp | 242 | ||||
-rw-r--r-- | cpu_ref/rsCpuCore.h | 65 | ||||
-rw-r--r-- | cpu_ref/rsCpuIntrinsic.cpp | 49 | ||||
-rw-r--r-- | cpu_ref/rsCpuIntrinsic.h | 55 | ||||
-rw-r--r-- | cpu_ref/rsCpuIntrinsic3DLUT.cpp | 10 | ||||
-rw-r--r-- | cpu_ref/rsCpuIntrinsicBlend.cpp | 5 | ||||
-rw-r--r-- | cpu_ref/rsCpuIntrinsicColorMatrix.cpp | 38 | ||||
-rw-r--r-- | cpu_ref/rsCpuIntrinsicHistogram.cpp | 58 | ||||
-rw-r--r-- | cpu_ref/rsCpuIntrinsicLUT.cpp | 4 | ||||
-rw-r--r-- | cpu_ref/rsCpuIntrinsicResize.cpp | 14 | ||||
-rw-r--r-- | cpu_ref/rsCpuScript.cpp | 214 | ||||
-rw-r--r-- | cpu_ref/rsCpuScript.h | 36 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup.cpp | 103 | ||||
-rw-r--r-- | cpu_ref/rsd_cpu.h | 22 | ||||
-rw-r--r-- | driver/rsdBcc.cpp | 21 | ||||
-rw-r--r-- | rsRuntime.h | 2 | ||||
-rw-r--r-- | rsScript.cpp | 31 | ||||
-rw-r--r-- | rsScript.h | 10 | ||||
-rw-r--r-- | rsScriptC.cpp | 46 | ||||
-rw-r--r-- | rsScriptC.h | 8 | ||||
-rw-r--r-- | rsScriptC_Lib.cpp | 12 | ||||
-rw-r--r-- | rsScriptGroup.cpp | 11 | ||||
-rw-r--r-- | rsScriptIntrinsic.cpp | 27 | ||||
-rw-r--r-- | rsScriptIntrinsic.h | 12 |
25 files changed, 464 insertions, 634 deletions
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk index aeb75a65..ad7cef73 100644 --- a/cpu_ref/Android.mk +++ b/cpu_ref/Android.mk @@ -1,7 +1,8 @@ LOCAL_PATH:=$(call my-dir) -rs_base_CFLAGS := -Werror -Wall -Wno-unused-parameter -Wno-unused-variable -fno-exceptions +rs_base_CFLAGS := -Werror -Wall -Wno-unused-parameter -Wno-unused-variable \ + -fno-exceptions -std=c++11 ifeq ($(TARGET_BUILD_PDK), true) rs_base_CFLAGS += -D__RS_PDK__ endif diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp index a0564fc1..db3cc7fa 100644 --- a/cpu_ref/rsCpuCore.cpp +++ b/cpu_ref/rsCpuCore.cpp @@ -350,180 +350,134 @@ RsdCpuReferenceImpl::~RsdCpuReferenceImpl() { } typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); +typedef void (*walk_loop_t)(MTLaunchStruct*, + RsExpandKernelParams&, + outer_foreach_t); -static void wc_xy(void *usr, uint32_t idx) { + +static void walk_wrapper(void* usr, uint32_t idx, walk_loop_t walk_loop) { MTLaunchStruct *mtls = (MTLaunchStruct *)usr; + uint32_t inLen = mtls->fep.inLen; + RsExpandKernelParams kparams; kparams.takeFields(mtls->fep); // Used by CpuScriptGroup, IntrinsicBlur, and IntrinsicHistogram kparams.lid = idx; - outer_foreach_t fn = (outer_foreach_t) mtls->kernel; - while (1) { - uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); - uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; - uint32_t yEnd = yStart + mtls->mSliceSize; - - yEnd = rsMin(yEnd, mtls->yEnd); - - if (yEnd <= yStart) { - return; - } - - //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); - //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut); - - for (kparams.y = yStart; kparams.y < yEnd; kparams.y++) { - kparams.out = mtls->fep.ptrOut + - (mtls->fep.yStrideOut * kparams.y) + - (mtls->fep.eStrideOut * mtls->xStart); - - kparams.in = mtls->fep.ptrIn + - (mtls->fep.yStrideIn * kparams.y) + - (mtls->fep.eStrideIn * mtls->xStart); + if (inLen > 0) { + // Allocate space for our input base pointers. + kparams.ins = (const void**)alloca(inLen * sizeof(void*)); + // Allocate space for our input stride information. + kparams.inEStrides = (uint32_t*)alloca(inLen * sizeof(uint32_t)); - fn(&kparams, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, - mtls->fep.eStrideOut); + // Fill our stride information. + for (int inIndex = inLen; --inIndex >= 0;) { + kparams.inEStrides[inIndex] = mtls->fep.inStrides[inIndex].eStride; } } -} - -static void wc_x(void *usr, uint32_t idx) { - MTLaunchStruct *mtls = (MTLaunchStruct *)usr; - - RsExpandKernelParams kparams; - kparams.takeFields(mtls->fep); - - // Used by CpuScriptGroup, IntrinsicBlur, and IntrisicHistogram - kparams.lid = idx; outer_foreach_t fn = (outer_foreach_t) mtls->kernel; - while (1) { - uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); - uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; - uint32_t xEnd = xStart + mtls->mSliceSize; - - xEnd = rsMin(xEnd, mtls->xEnd); - - if (xEnd <= xStart) { - return; - } - - //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); - //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut); - kparams.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart); - kparams.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart); - - fn(&kparams, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut); - } + walk_loop(mtls, kparams, fn); } -void RsdCpuReferenceImpl::launchThreads(const Allocation * ain, Allocation * aout, - const RsScriptCall *sc, MTLaunchStruct *mtls) { +static void walk_2d(void *usr, uint32_t idx) { + walk_wrapper(usr, idx, [](MTLaunchStruct *mtls, + RsExpandKernelParams &kparams, + outer_foreach_t fn) { - //android::StopWatch kernel_time("kernel time"); + while (1) { + uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); + uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; + uint32_t yEnd = yStart + mtls->mSliceSize; - if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) { - const size_t targetByteChunk = 16 * 1024; - mInForEach = true; - if (mtls->fep.dimY > 1) { - uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4); - uint32_t s2 = 0; + yEnd = rsMin(yEnd, mtls->yEnd); - // This chooses our slice size to rate limit atomic ops to - // one per 16k bytes of reads/writes. - if (mtls->fep.yStrideOut) { - s2 = targetByteChunk / mtls->fep.yStrideOut; - } else { - s2 = targetByteChunk / mtls->fep.yStrideIn; + if (yEnd <= yStart) { + return; } - mtls->mSliceSize = rsMin(s1, s2); - if(mtls->mSliceSize < 1) { - mtls->mSliceSize = 1; - } + for (kparams.y = yStart; kparams.y < yEnd; kparams.y++) { + kparams.out = mtls->fep.outPtr + + (mtls->fep.outStride.yStride * kparams.y) + + (mtls->fep.outStride.eStride * mtls->xStart); - // mtls->mSliceSize = 2; - launchThreads(wc_xy, mtls); - } else { - uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4); - uint32_t s2 = 0; + for (int inIndex = mtls->fep.inLen; --inIndex >= 0;) { + StridePair &strides = mtls->fep.inStrides[inIndex]; - // This chooses our slice size to rate limit atomic ops to - // one per 16k bytes of reads/writes. - if (mtls->fep.eStrideOut) { - s2 = targetByteChunk / mtls->fep.eStrideOut; - } else { - s2 = targetByteChunk / mtls->fep.eStrideIn; - } - mtls->mSliceSize = rsMin(s1, s2); + kparams.ins[inIndex] = + mtls->fep.inPtrs[inIndex] + + (strides.yStride * kparams.y) + + (strides.eStride * mtls->xStart); + } - if(mtls->mSliceSize < 1) { - mtls->mSliceSize = 1; + // Kernels now get their input strides from kparams. + fn(&kparams, mtls->xStart, mtls->xEnd, 0, + mtls->fep.outStride.eStride); } - - launchThreads(wc_x, mtls); } - mInForEach = false; - - //ALOGE("launch 1"); - } else { - RsExpandKernelParams kparams; - kparams.takeFields(mtls->fep); + }); +} - //ALOGE("launch 3"); - outer_foreach_t fn = (outer_foreach_t) mtls->kernel; - for (uint32_t arrayIndex = mtls->arrayStart; - arrayIndex < mtls->arrayEnd; arrayIndex++) { +static void walk_1d(void *usr, uint32_t idx) { + walk_wrapper(usr, idx, [](MTLaunchStruct *mtls, + RsExpandKernelParams &kparams, + outer_foreach_t fn) { - for (kparams.z = mtls->zStart; kparams.z < mtls->zEnd; - kparams.z++) { + while (1) { + uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); + uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; + uint32_t xEnd = xStart + mtls->mSliceSize; - for (kparams.y = mtls->yStart; kparams.y < mtls->yEnd; - kparams.y++) { + xEnd = rsMin(xEnd, mtls->xEnd); - uint32_t offset = - kparams.dimY * kparams.dimZ * arrayIndex + - kparams.dimY * kparams.z + kparams.y; + if (xEnd <= xStart) { + return; + } - kparams.out = mtls->fep.ptrOut + - (mtls->fep.yStrideOut * offset) + - (mtls->fep.eStrideOut * mtls->xStart); + kparams.out = mtls->fep.outPtr + + (mtls->fep.outStride.eStride * xStart); - kparams.in = mtls->fep.ptrIn + - (mtls->fep.yStrideIn * offset) + - (mtls->fep.eStrideIn * mtls->xStart); + for (int inIndex = mtls->fep.inLen; --inIndex >= 0;) { + StridePair &strides = mtls->fep.inStrides[inIndex]; - fn(&kparams, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, - mtls->fep.eStrideOut); - } + kparams.ins[inIndex] = + mtls->fep.inPtrs[inIndex] + (strides.eStride * xStart); } + + // Kernels now get their input strides from kparams. + fn(&kparams, xStart, xEnd, 0, mtls->fep.outStride.eStride); } - } + }); } -void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen, Allocation* aout, - const RsScriptCall* sc, MTLaunchStruct* mtls) { + +void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains, + uint32_t inLen, + Allocation* aout, + const RsScriptCall* sc, + MTLaunchStruct* mtls) { //android::StopWatch kernel_time("kernel time"); if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) { const size_t targetByteChunk = 16 * 1024; mInForEach = true; + if (mtls->fep.dimY > 1) { uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4); uint32_t s2 = 0; // This chooses our slice size to rate limit atomic ops to // one per 16k bytes of reads/writes. - if (mtls->fep.yStrideOut) { - s2 = targetByteChunk / mtls->fep.yStrideOut; + if (mtls->fep.outStride.yStride) { + s2 = targetByteChunk / mtls->fep.outStride.yStride; } else { - s2 = targetByteChunk / mtls->fep.yStrideIn; + // We know that there is either an output or an input. + s2 = targetByteChunk / mtls->fep.inStrides[0].yStride; } mtls->mSliceSize = rsMin(s1, s2); @@ -531,18 +485,18 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen, mtls->mSliceSize = 1; } - // mtls->mSliceSize = 2; - launchThreads(wc_xy, mtls); + launchThreads(walk_2d, mtls); } else { uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4); uint32_t s2 = 0; // This chooses our slice size to rate limit atomic ops to // one per 16k bytes of reads/writes. - if (mtls->fep.eStrideOut) { - s2 = targetByteChunk / mtls->fep.eStrideOut; + if (mtls->fep.outStride.eStride) { + s2 = targetByteChunk / mtls->fep.outStride.eStride; } else { - s2 = targetByteChunk / mtls->fep.eStrideIn; + // We know that there is either an output or an input. + s2 = targetByteChunk / mtls->fep.inStrides[0].eStride; } mtls->mSliceSize = rsMin(s1, s2); @@ -550,24 +504,26 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen, mtls->mSliceSize = 1; } - launchThreads(wc_x, mtls); + launchThreads(walk_1d, mtls); } mInForEach = false; - //ALOGE("launch 1"); } else { RsExpandKernelParams kparams; kparams.takeFields(mtls->fep); - // Allocate space for our input base pointers. - kparams.ins = new const void*[inLen]; + if (inLen > 0) { + // Allocate space for our input base pointers. + kparams.ins = (const void**)alloca(inLen * sizeof(void*)); - // Allocate space for our input stride information. - kparams.eStrideIns = new uint32_t[inLen]; + // Allocate space for our input stride information. + kparams.inEStrides = (uint32_t*)alloca(inLen * sizeof(uint32_t)); - // Fill our stride information. - for (int inIndex = inLen; --inIndex >= 0;) { - kparams.eStrideIns[inIndex] = mtls->fep.inStrides[inIndex].eStride; + // Fill our stride information. + for (int inIndex = inLen; --inIndex >= 0;) { + kparams.inEStrides[inIndex] = + mtls->fep.inStrides[inIndex].eStride; + } } //ALOGE("launch 3"); @@ -585,15 +541,15 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen, mtls->fep.dimY * mtls->fep.dimZ * arrayIndex + mtls->fep.dimY * kparams.z + kparams.y; - kparams.out = mtls->fep.ptrOut + - (mtls->fep.yStrideOut * offset) + - (mtls->fep.eStrideOut * mtls->xStart); + kparams.out = mtls->fep.outPtr + + (mtls->fep.outStride.yStride * offset) + + (mtls->fep.outStride.eStride * mtls->xStart); for (int inIndex = inLen; --inIndex >= 0;) { StridePair &strides = mtls->fep.inStrides[inIndex]; kparams.ins[inIndex] = - mtls->fep.ptrIns[inIndex] + + mtls->fep.inPtrs[inIndex] + (strides.yStride * offset) + (strides.eStride * mtls->xStart); } @@ -604,14 +560,10 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen, * that points to an array. */ fn(&kparams, mtls->xStart, mtls->xEnd, 0, - mtls->fep.eStrideOut); + mtls->fep.outStride.eStride); } } } - - // Free our arrays. - delete[] kparams.ins; - delete[] kparams.eStrideIns; } } diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h index 5d4b6cc5..2fea3fcd 100644 --- a/cpu_ref/rsCpuCore.h +++ b/cpu_ref/rsCpuCore.h @@ -25,6 +25,8 @@ #include <string> +#define RS_KERNEL_INPUT_THRESHOLD 32 + namespace bcc { class BCCContext; class RSCompilerDriver; @@ -40,31 +42,36 @@ struct StridePair { }; struct RsExpandKernelDriverInfo { - const void *usr; - uint32_t usrLen; + const uint8_t **inPtrs; + uint32_t inLen; + + uint8_t *outPtr; + + StridePair *inStrides; + StridePair outStride; uint32_t dimX; uint32_t dimY; uint32_t dimZ; - const uint8_t *ptrIn; - uint8_t *ptrOut; - uint32_t eStrideIn; - uint32_t eStrideOut; - uint32_t yStrideIn; - uint32_t yStrideOut; uint32_t slot; - const uint8_t** ptrIns; - StridePair* inStrides; + const void *usr; + uint32_t usrLen; - ~RsExpandKernelDriverInfo() { - if (ptrIns != NULL) { - delete[] ptrIns; - } + bool heapAllocatedArrays; - if (inStrides != NULL) { - delete[] inStrides; + RsExpandKernelDriverInfo() : heapAllocatedArrays(false) {} + + ~RsExpandKernelDriverInfo() { + if (heapAllocatedArrays) { + if (inPtrs != NULL) { + delete[] inPtrs; + } + + if (inStrides != NULL) { + delete[] inStrides; + } } } }; @@ -72,15 +79,13 @@ struct RsExpandKernelDriverInfo { struct RsExpandKernelParams { // Used by kernels - const void *in; + const void **ins; + uint32_t *inEStrides; void *out; uint32_t y; uint32_t z; uint32_t lid; - const void **ins; - uint32_t *eStrideIns; - // Used by ScriptGroup and user kernels. const void *usr; @@ -115,13 +120,13 @@ typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); class RsdCpuScriptImpl; class RsdCpuReferenceImpl; -typedef struct ScriptTLSStructRec { +struct ScriptTLSStruct { android::renderscript::Context * mContext; const android::renderscript::Script * mScript; RsdCpuScriptImpl *mImpl; -} ScriptTLSStruct; +}; -typedef struct { +struct MTLaunchStruct { RsExpandKernelDriverInfo fep; RsdCpuReferenceImpl *rsc; @@ -129,7 +134,7 @@ typedef struct { ForEachFunc_t kernel; uint32_t sig; - const Allocation * ain; + const Allocation ** ains; Allocation * aout; uint32_t mSliceSize; @@ -145,12 +150,9 @@ typedef struct { uint32_t arrayStart; uint32_t arrayEnd; - // Multi-input data. - const Allocation ** ains; -} MTLaunchStruct; - - - + const uint8_t *inPtrsBuff[RS_KERNEL_INPUT_THRESHOLD]; + StridePair inStridesBuff[RS_KERNEL_INPUT_THRESHOLD]; +}; class RsdCpuReferenceImpl : public RsdCpuReference { public: @@ -171,9 +173,6 @@ public: return mWorkers.mCount + 1; } - void launchThreads(const Allocation * ain, Allocation * aout, - const RsScriptCall *sc, MTLaunchStruct *mtls); - void launchThreads(const Allocation** ains, uint32_t inLen, Allocation* aout, const RsScriptCall* sc, MTLaunchStruct* mtls); diff --git a/cpu_ref/rsCpuIntrinsic.cpp b/cpu_ref/rsCpuIntrinsic.cpp index 5a7fffd5..8437c998 100644 --- a/cpu_ref/rsCpuIntrinsic.cpp +++ b/cpu_ref/rsCpuIntrinsic.cpp @@ -73,54 +73,29 @@ void RsdCpuScriptIntrinsic::invokeFreeChildren() { } -void RsdCpuScriptIntrinsic::preLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc) { +void RsdCpuScriptIntrinsic::preLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc) { } -void RsdCpuScriptIntrinsic::postLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc) { +void RsdCpuScriptIntrinsic::postLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc) { } void RsdCpuScriptIntrinsic::invokeForEach(uint32_t slot, - const Allocation * ain, + const Allocation ** ains, + uint32_t inLen, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc) { MTLaunchStruct mtls; - preLaunch(slot, ain, aout, usr, usrLen, sc); - forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls); - mtls.script = this; - mtls.fep.slot = slot; - - mtls.kernel = (void (*)())mRootPtr; - mtls.fep.usr = this; - - RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); - mCtx->launchThreads(ain, aout, sc, &mtls); - mCtx->setTLS(oldTLS); - - postLaunch(slot, ain, aout, usr, usrLen, sc); -} - -void RsdCpuScriptIntrinsic::invokeForEachMulti(uint32_t slot, - const Allocation ** ains, - uint32_t inLen, - Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc) { - - MTLaunchStruct mtls; - /* - * FIXME: Possibly create new preLaunch and postLaunch functions that take - * all of the input allocation pointers. - */ - preLaunch(slot, ains[0], aout, usr, usrLen, sc); + preLaunch(slot, ains, inLen, aout, usr, usrLen, sc); forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls); mtls.script = this; @@ -133,7 +108,7 @@ void RsdCpuScriptIntrinsic::invokeForEachMulti(uint32_t slot, mCtx->launchThreads(ains, inLen, aout, sc, &mtls); mCtx->setTLS(oldTLS); - postLaunch(slot, ains[0], aout, usr, usrLen, sc); + postLaunch(slot, ains, inLen, aout, usr, usrLen, sc); } void RsdCpuScriptIntrinsic::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) { diff --git a/cpu_ref/rsCpuIntrinsic.h b/cpu_ref/rsCpuIntrinsic.h index bf6a8acd..95aaa141 100644 --- a/cpu_ref/rsCpuIntrinsic.h +++ b/cpu_ref/rsCpuIntrinsic.h @@ -28,43 +28,42 @@ class RsdCpuScriptIntrinsic : public RsdCpuScriptImpl { public: virtual void populateScript(Script *) = 0; - virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength); + virtual void invokeFunction(uint32_t slot, const void * params, + size_t paramLength); virtual int invokeRoot(); + virtual void invokeForEach(uint32_t slot, - const Allocation * ain, - Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc); - - virtual void invokeForEachMulti(uint32_t slot, - const Allocation ** ain, - uint32_t inLen, - Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc); - - virtual void forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls); + const Allocation ** ain, + uint32_t inLen, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc); + + virtual void forEachKernelSetup(uint32_t slot, MTLaunchStruct * mtls); virtual void invokeInit(); virtual void invokeFreeChildren(); - virtual void preLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc); - virtual void postLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc); - - virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); - virtual void setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, - const Element *e, const uint32_t *dims, size_t dimLength); + virtual void preLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, const void * usr, + uint32_t usrLen, const RsScriptCall * sc); + virtual void postLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall * sc); + + virtual void setGlobalVar(uint32_t slot, const void * data, + size_t dataLength); + virtual void setGlobalVarWithElemDims(uint32_t slot, const void * data, + size_t dataLength, const Element * e, + const uint32_t * dims, + size_t dimLength); virtual void setGlobalBind(uint32_t slot, Allocation *data); virtual void setGlobalObj(uint32_t slot, ObjectBase *data); virtual ~RsdCpuScriptIntrinsic(); - RsdCpuScriptIntrinsic(RsdCpuReferenceImpl *ctx, const Script *s, const Element *, - RsScriptIntrinsicID iid); + RsdCpuScriptIntrinsic(RsdCpuReferenceImpl * ctx, const Script * s, + const Element * e, RsScriptIntrinsicID iid); protected: RsScriptIntrinsicID mID; diff --git a/cpu_ref/rsCpuIntrinsic3DLUT.cpp b/cpu_ref/rsCpuIntrinsic3DLUT.cpp index c839c19d..a19d8851 100644 --- a/cpu_ref/rsCpuIntrinsic3DLUT.cpp +++ b/cpu_ref/rsCpuIntrinsic3DLUT.cpp @@ -64,7 +64,7 @@ void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelParams *p, RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)p->usr; uchar4 *out = (uchar4 *)p->out + xstart; - uchar4 *in = (uchar4 *)p->in + xstart; + uchar4 *in = (uchar4 *)p->ins[0] + xstart; uint32_t x1 = xstart; uint32_t x2 = xend; @@ -161,9 +161,9 @@ void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelParams *p, } } -RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl *ctx, - const Script *s, const Element *e) - : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) { +RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT( + RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) : + RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) { mRootPtr = &kernel; } @@ -185,5 +185,3 @@ RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx, return new RsdCpuScriptIntrinsic3DLUT(ctx, s, e); } - - diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp index b6046584..0378e076 100644 --- a/cpu_ref/rsCpuIntrinsicBlend.cpp +++ b/cpu_ref/rsCpuIntrinsicBlend.cpp @@ -117,7 +117,7 @@ void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelParams *p, // instep/outstep can be ignored--sizeof(uchar4) known at compile time uchar4 *out = (uchar4 *)p->out; - uchar4 *in = (uchar4 *)p->in; + uchar4 *in = (uchar4 *)p->ins[0]; uint32_t x1 = xstart; uint32_t x2 = xend; @@ -509,6 +509,3 @@ RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { return new RsdCpuScriptIntrinsicBlend(ctx, s, e); } - - - diff --git a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp index bf78eb3e..4e90ad72 100644 --- a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp +++ b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp @@ -169,10 +169,9 @@ public: virtual ~RsdCpuScriptIntrinsicColorMatrix(); RsdCpuScriptIntrinsicColorMatrix(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); - virtual void preLaunch(uint32_t slot, const Allocation * ain, Allocation * aout, - const void * usr, uint32_t usrLen, const RsScriptCall *sc); - virtual void postLaunch(uint32_t slot, const Allocation * ain, Allocation * aout, - const void * usr, uint32_t usrLen, const RsScriptCall *sc); + virtual void preLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, const void * usr, + uint32_t usrLen, const RsScriptCall *sc); protected: float fp[16]; @@ -883,8 +882,13 @@ void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelParams *p, uint32_t xstart, uint32_t xend, uint32_t instep, uint32_t outstep) { RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr; - uchar *out = (uchar *)p->out + outstep * xstart; - uchar *in = (uchar *)p->in + instep * xstart; + + // Update the instep due to change in parameter passing. + instep = p->inEStrides[0]; + + uchar *out = (uchar *)p->out + outstep * xstart; + uchar *in = (uchar *)p->ins[0] + instep * xstart; + uint32_t x1 = xstart; uint32_t x2 = xend; @@ -932,11 +936,15 @@ void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelParams *p, } } -void RsdCpuScriptIntrinsicColorMatrix::preLaunch( - uint32_t slot, const Allocation * ain, Allocation * aout, - const void * usr, uint32_t usrLen, const RsScriptCall *sc) { +void RsdCpuScriptIntrinsicColorMatrix::preLaunch(uint32_t slot, + const Allocation ** ains, + uint32_t inLen, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc) { - const Element *ein = ain->mHal.state.type->getElement(); + const Element *ein = ains[0]->mHal.state.type->getElement(); const Element *eout = aout->mHal.state.type->getElement(); if (ein->getType() == eout->getType()) { @@ -953,8 +961,8 @@ void RsdCpuScriptIntrinsicColorMatrix::preLaunch( } } - Key_t key = computeKey(ain->mHal.state.type->getElement(), - aout->mHal.state.type->getElement()); + Key_t key = computeKey(ein, eout); + #if defined(ARCH_X86_HAVE_SSSE3) if ((mOptKernel == NULL) || (mLastKey.key != key.key)) { // FIXME: Disable mOptKernel to pass RS color matrix CTS cases @@ -996,12 +1004,6 @@ void RsdCpuScriptIntrinsicColorMatrix::preLaunch( #endif //if !defined(ARCH_X86_HAVE_SSSE3) } -void RsdCpuScriptIntrinsicColorMatrix::postLaunch( - uint32_t slot, const Allocation * ain, Allocation * aout, - const void * usr, uint32_t usrLen, const RsScriptCall *sc) { - -} - RsdCpuScriptIntrinsicColorMatrix::RsdCpuScriptIntrinsicColorMatrix( RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX) { diff --git a/cpu_ref/rsCpuIntrinsicHistogram.cpp b/cpu_ref/rsCpuIntrinsicHistogram.cpp index 1c430b72..b5dbfa80 100644 --- a/cpu_ref/rsCpuIntrinsicHistogram.cpp +++ b/cpu_ref/rsCpuIntrinsicHistogram.cpp @@ -36,10 +36,10 @@ public: RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); protected: - void preLaunch(uint32_t slot, const Allocation * ain, + void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc); - void postLaunch(uint32_t slot, const Allocation * ain, + void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc); @@ -97,9 +97,12 @@ void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *dat -void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc) { +void +RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, + const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc) { const uint32_t threads = mCtx->getThreadCount(); uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); @@ -123,7 +126,7 @@ void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * } break; case 1: - switch(ain->getType()->getElement()->getVectorSize()) { + switch(ains[0]->getType()->getElement()->getVectorSize()) { case 1: mRootPtr = &kernelP1L1; break; @@ -142,9 +145,12 @@ void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize); } -void RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc) { +void +RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, + const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc) { unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr; uint32_t threads = mCtx->getThreadCount(); @@ -165,7 +171,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p, uint32_t instep, uint32_t outstep) { RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; - uchar *in = (uchar *)p->in; + uchar *in = (uchar *)p->ins[0]; int * sums = &cp->mSums[256 * 4 * p->lid]; for (uint32_t x = xstart; x < xend; x++) { @@ -173,7 +179,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p, sums[(in[1] << 2) + 1] ++; sums[(in[2] << 2) + 2] ++; sums[(in[3] << 2) + 3] ++; - in += instep; + in += p->inEStrides[0]; } } @@ -182,14 +188,14 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelParams *p, uint32_t instep, uint32_t outstep) { RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; - uchar *in = (uchar *)p->in; + uchar *in = (uchar *)p->ins[0]; int * sums = &cp->mSums[256 * 4 * p->lid]; for (uint32_t x = xstart; x < xend; x++) { sums[(in[0] << 2) ] ++; sums[(in[1] << 2) + 1] ++; sums[(in[2] << 2) + 2] ++; - in += instep; + in += p->inEStrides[0]; } } @@ -198,13 +204,13 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelParams *p, uint32_t instep, uint32_t outstep) { RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; - uchar *in = (uchar *)p->in; + uchar *in = (uchar *)p->ins[0]; int * sums = &cp->mSums[256 * 2 * p->lid]; for (uint32_t x = xstart; x < xend; x++) { sums[(in[0] << 1) ] ++; sums[(in[1] << 1) + 1] ++; - in += instep; + in += p->inEStrides[0]; } } @@ -213,7 +219,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p, uint32_t instep, uint32_t outstep) { RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; - uchar *in = (uchar *)p->in; + uchar *in = (uchar *)p->ins[0]; int * sums = &cp->mSums[256 * p->lid]; for (uint32_t x = xstart; x < xend; x++) { @@ -222,7 +228,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p, (cp->mDotI[2] * in[2]) + (cp->mDotI[3] * in[3]); sums[(t + 0x7f) >> 8] ++; - in += instep; + in += p->inEStrides[0]; } } @@ -231,7 +237,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p, uint32_t instep, uint32_t outstep) { RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; - uchar *in = (uchar *)p->in; + uchar *in = (uchar *)p->ins[0]; int * sums = &cp->mSums[256 * p->lid]; for (uint32_t x = xstart; x < xend; x++) { @@ -239,7 +245,7 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p, (cp->mDotI[1] * in[1]) + (cp->mDotI[2] * in[2]); sums[(t + 0x7f) >> 8] ++; - in += instep; + in += p->inEStrides[0]; } } @@ -248,14 +254,14 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelParams *p, uint32_t instep, uint32_t outstep) { RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; - uchar *in = (uchar *)p->in; + uchar *in = (uchar *)p->ins[0]; int * sums = &cp->mSums[256 * p->lid]; for (uint32_t x = xstart; x < xend; x++) { int t = (cp->mDotI[0] * in[0]) + (cp->mDotI[1] * in[1]); sums[(t + 0x7f) >> 8] ++; - in += instep; + in += p->inEStrides[0]; } } @@ -264,13 +270,13 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelParams *p, uint32_t instep, uint32_t outstep) { RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; - uchar *in = (uchar *)p->in; + uchar *in = (uchar *)p->ins[0]; int * sums = &cp->mSums[256 * p->lid]; for (uint32_t x = xstart; x < xend; x++) { int t = (cp->mDotI[0] * in[0]); sums[(t + 0x7f) >> 8] ++; - in += instep; + in += p->inEStrides[0]; } } @@ -279,12 +285,12 @@ void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelParams *p, uint32_t instep, uint32_t outstep) { RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; - uchar *in = (uchar *)p->in; + uchar *in = (uchar *)p->ins[0]; int * sums = &cp->mSums[256 * p->lid]; for (uint32_t x = xstart; x < xend; x++) { sums[in[0]] ++; - in += instep; + in += p->inEStrides[0]; } } @@ -323,5 +329,3 @@ RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script return new RsdCpuScriptIntrinsicHistogram(ctx, s, e); } - - diff --git a/cpu_ref/rsCpuIntrinsicLUT.cpp b/cpu_ref/rsCpuIntrinsicLUT.cpp index db73a838..9d3b4003 100644 --- a/cpu_ref/rsCpuIntrinsicLUT.cpp +++ b/cpu_ref/rsCpuIntrinsicLUT.cpp @@ -59,7 +59,7 @@ void RsdCpuScriptIntrinsicLUT::kernel(const RsExpandKernelParams *p, RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)p->usr; uchar *out = (uchar *)p->out; - const uchar *in = (uchar *)p->in; + const uchar *in = (uchar *)p->ins[0]; uint32_t x1 = xstart; uint32_t x2 = xend; @@ -103,5 +103,3 @@ RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx, return new RsdCpuScriptIntrinsicLUT(ctx, s, e); } - - diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp index af1127e7..3a307d63 100644 --- a/cpu_ref/rsCpuIntrinsicResize.cpp +++ b/cpu_ref/rsCpuIntrinsicResize.cpp @@ -35,8 +35,8 @@ public: virtual ~RsdCpuScriptIntrinsicResize(); RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *); - virtual void preLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, + virtual void preLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc); float scaleX; @@ -308,9 +308,11 @@ RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize ( RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() { } -void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc) +void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, + const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc) { if (!mAlloc.get()) { ALOGE("Resize executed without input, skipping"); @@ -351,5 +353,3 @@ RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s return new RsdCpuScriptIntrinsicResize(ctx, s, e); } - - diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp index a11fda19..05984207 100644 --- a/cpu_ref/rsCpuScript.cpp +++ b/cpu_ref/rsCpuScript.cpp @@ -789,144 +789,33 @@ void RsdCpuScriptImpl::populateScript(Script *script) { typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); -void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout, +void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, + uint32_t inLen, + Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc, MTLaunchStruct *mtls) { memset(mtls, 0, sizeof(MTLaunchStruct)); - // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface - if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations"); - return; - } - if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations"); - return; - } - - if (ain != NULL) { - const Type *inType = ain->getType(); + for (int index = inLen; --index >= 0;) { + const Allocation* ain = ains[index]; - mtls->fep.dimX = inType->getDimX(); - mtls->fep.dimY = inType->getDimY(); - mtls->fep.dimZ = inType->getDimZ(); - - } else if (aout != NULL) { - const Type *outType = aout->getType(); - - mtls->fep.dimX = outType->getDimX(); - mtls->fep.dimY = outType->getDimY(); - mtls->fep.dimZ = outType->getDimZ(); - - } else { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); - return; - } - - if (ain != NULL && aout != NULL) { - if (!ain->hasSameDims(aout)) { + // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface + if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, - "Failed to launch kernel; dimensions of input and output allocations do not match."); - + "rsForEach called with null in allocations"); return; } } - if (!sc || (sc->xEnd == 0)) { - mtls->xEnd = mtls->fep.dimX; - } else { - rsAssert(sc->xStart < mtls->fep.dimX); - rsAssert(sc->xEnd <= mtls->fep.dimX); - rsAssert(sc->xStart < sc->xEnd); - mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart); - mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd); - if (mtls->xStart >= mtls->xEnd) return; - } - - if (!sc || (sc->yEnd == 0)) { - mtls->yEnd = mtls->fep.dimY; - } else { - rsAssert(sc->yStart < mtls->fep.dimY); - rsAssert(sc->yEnd <= mtls->fep.dimY); - rsAssert(sc->yStart < sc->yEnd); - mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart); - mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd); - if (mtls->yStart >= mtls->yEnd) return; - } - - if (!sc || (sc->zEnd == 0)) { - mtls->zEnd = mtls->fep.dimZ; - } else { - rsAssert(sc->zStart < mtls->fep.dimZ); - rsAssert(sc->zEnd <= mtls->fep.dimZ); - rsAssert(sc->zStart < sc->zEnd); - mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart); - mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd); - if (mtls->zStart >= mtls->zEnd) return; - } - - mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); - mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); - mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); - mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); - - rsAssert(!ain || (ain->getType()->getDimZ() == 0)); - - mtls->rsc = mCtx; - mtls->ain = ain; - mtls->aout = aout; - mtls->fep.usr = usr; - mtls->fep.usrLen = usrLen; - mtls->mSliceSize = 1; - mtls->mSliceNum = 0; - - mtls->fep.ptrIn = NULL; - mtls->fep.eStrideIn = 0; - mtls->isThreadable = mIsThreadable; - - if (ain) { - mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr; - mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes(); - mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride; - } - - mtls->fep.ptrOut = NULL; - mtls->fep.eStrideOut = 0; - if (aout) { - mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; - mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); - mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride; - } -} - -void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen, - Allocation * aout, - const void * usr, uint32_t usrLen, - const RsScriptCall *sc, - MTLaunchStruct *mtls) { - - memset(mtls, 0, sizeof(MTLaunchStruct)); - - // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface - if (ains != NULL) { - for (int index = inLen; --index >= 0;) { - const Allocation* ain = ains[index]; - - if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations"); - return; - } - } - } - if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations"); + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, + "rsForEach called with null out allocations"); return; } - if (ains != NULL) { + if (inLen > 0) { const Allocation *ain0 = ains[0]; const Type *inType = ain0->getType(); @@ -951,11 +840,12 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen mtls->fep.dimZ = outType->getDimZ(); } else { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, + "rsForEach called with null allocations"); return; } - if (ains != NULL && aout != NULL) { + if (inLen > 0 && aout != NULL) { if (!ains[0]->hasSameDims(aout)) { mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "Failed to launch kernel; dimensions of input and output allocations do not match."); @@ -1002,7 +892,7 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); - rsAssert(!ains || (ains[0]->getType()->getDimZ() == 0)); + rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0)); mtls->rsc = mCtx; mtls->ains = ains; @@ -1012,18 +902,28 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen mtls->mSliceSize = 1; mtls->mSliceNum = 0; - mtls->fep.ptrIns = NULL; - mtls->fep.eStrideIn = 0; + mtls->fep.inPtrs = NULL; + mtls->fep.inStrides = NULL; mtls->isThreadable = mIsThreadable; - if (ains) { - mtls->fep.ptrIns = new const uint8_t*[inLen]; - mtls->fep.inStrides = new StridePair[inLen]; + if (inLen > 0) { + + if (inLen <= RS_KERNEL_INPUT_THRESHOLD) { + mtls->fep.inPtrs = (const uint8_t**)mtls->inPtrsBuff; + mtls->fep.inStrides = mtls->inStridesBuff; + } else { + mtls->fep.heapAllocatedArrays = true; + + mtls->fep.inPtrs = new const uint8_t*[inLen]; + mtls->fep.inStrides = new StridePair[inLen]; + } + + mtls->fep.inLen = inLen; for (int index = inLen; --index >= 0;) { const Allocation *ain = ains[index]; - mtls->fep.ptrIns[index] = + mtls->fep.inPtrs[index] = (const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr; mtls->fep.inStrides[index].eStride = @@ -1033,41 +933,27 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen } } - mtls->fep.ptrOut = NULL; - mtls->fep.eStrideOut = 0; - if (aout) { - mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; - mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); - mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride; + mtls->fep.outPtr = NULL; + mtls->fep.outStride.eStride = 0; + mtls->fep.outStride.yStride = 0; + if (aout != NULL) { + mtls->fep.outPtr = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; + + mtls->fep.outStride.eStride = aout->getType()->getElementSizeBytes(); + mtls->fep.outStride.yStride = aout->mHal.drvState.lod[0].stride; } } void RsdCpuScriptImpl::invokeForEach(uint32_t slot, - const Allocation * ain, + const Allocation ** ains, + uint32_t inLen, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc) { MTLaunchStruct mtls; - forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls); - forEachKernelSetup(slot, &mtls); - - RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); - mCtx->launchThreads(ain, aout, sc, &mtls); - mCtx->setTLS(oldTLS); -} - -void RsdCpuScriptImpl::invokeForEachMulti(uint32_t slot, - const Allocation ** ains, - uint32_t inLen, - Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc) { - - MTLaunchStruct mtls; forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls); forEachKernelSetup(slot, &mtls); @@ -1338,17 +1224,15 @@ Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { return NULL; } -void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc) -{ -} +void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc) {} -void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc) -{ -} +void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc) {} } diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h index d51e9e3f..f0843cc0 100644 --- a/cpu_ref/rsCpuScript.h +++ b/cpu_ref/rsCpuScript.h @@ -64,26 +64,22 @@ public: virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength); virtual int invokeRoot(); - virtual void preLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, + virtual void preLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc); - virtual void postLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc); + virtual void postLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc); + virtual void invokeForEach(uint32_t slot, - const Allocation * ain, - Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc); - - virtual void invokeForEachMulti(uint32_t slot, - const Allocation** ains, - uint32_t inLen, - Allocation* aout, - const void* usr, - uint32_t usrLen, - const RsScriptCall* sc); + const Allocation ** ains, + uint32_t inLen, + Allocation* aout, + const void* usr, + uint32_t usrLen, + const RsScriptCall* sc); + virtual void invokeInit(); virtual void invokeFreeChildren(); @@ -100,10 +96,6 @@ public: const Script * getScript() {return mScript;} - void forEachMtlsSetup(const Allocation * ain, Allocation * aout, - const void * usr, uint32_t usrLen, - const RsScriptCall *sc, MTLaunchStruct *mtls); - void forEachMtlsSetup(const Allocation ** ains, uint32_t inLen, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc, MTLaunchStruct *mtls); diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp index 08785523..20ee09db 100644 --- a/cpu_ref/rsCpuScriptGroup.cpp +++ b/cpu_ref/rsCpuScriptGroup.cpp @@ -53,38 +53,45 @@ void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams, uint32_t instep, uint32_t outstep) { - const ScriptList *sl = (const ScriptList *)kparams->usr; + const ScriptList *sl = (const ScriptList *)kparams->usr; RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams; + const void **oldIns = mkparams->ins; + uint32_t *oldStrides = mkparams->inEStrides; + + void *localIns[1]; + uint32_t localStride[1]; + + mkparams->ins = (const void**)localIns; + mkparams->inEStrides = localStride; + for (size_t ct = 0; ct < sl->count; ct++) { ScriptGroupRootFunc_t func; func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; mkparams->usr = sl->usrPtrs[ct]; - mkparams->in = NULL; - mkparams->out = NULL; - - uint32_t istep = 0; - uint32_t ostep = 0; - if (sl->ins[ct]) { - mkparams->in = - (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; + localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; - istep = sl->ins[ct]->mHal.state.elementSizeBytes; + localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes; if (sl->inExts[ct]) { - mkparams->in = - (const uint8_t *)mkparams->in + - sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y; + localIns[0] = (void*) + ((const uint8_t *)localIns[0] + + sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y); } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) { - mkparams->in = - (const uint8_t *)mkparams->in + - sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid; + localIns[0] = (void*) + ((const uint8_t *)localIns[0] + + sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid); } + + } else { + localIns[0] = NULL; + localStride[0] = 0; } + uint32_t ostep; if (sl->outs[ct]) { mkparams->out = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; @@ -101,14 +108,23 @@ void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams, (uint8_t *)mkparams->out + sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid; } + } else { + mkparams->out = NULL; + ostep = 0; } //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); - func(kparams, xstart, xend, istep, ostep); + /* + * The fourth argument is zero here because kernels get their stride + * information from a member of p that points to an array. + */ + func(kparams, xstart, xend, 0, ostep); } //ALOGE("script group root"); - mkparams->usr = sl; + mkparams->ins = oldIns; + mkparams->inEStrides = oldStrides; + mkparams->usr = sl; } @@ -195,17 +211,33 @@ void CpuScriptGroupImpl::execute() { MTLaunchStruct mtls; - if(fieldDep) { + if (fieldDep) { for (size_t ct=0; ct < ins.size(); ct++) { Script *s = kernels[ct]->mScript; RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); uint32_t slot = kernels[ct]->mSlot; - si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls); + uint32_t inLen; + const Allocation **ains; + + if (ins[ct] == NULL) { + inLen = 0; + ains = NULL; + + } else { + inLen = 1; + ains = const_cast<const Allocation**>(&ins[ct]); + } + + si->forEachMtlsSetup(ains, inLen, outs[ct], NULL, 0, NULL, &mtls); + si->forEachKernelSetup(slot, &mtls); - si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); - mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls); - si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL); + si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr, + mtls.fep.usrLen, NULL); + + mCtx->launchThreads(ains, inLen, outs[ct], NULL, &mtls); + + si->postLaunch(slot, ains, inLen, outs[ct], NULL, 0, NULL); } } else { ScriptList sl; @@ -214,6 +246,18 @@ void CpuScriptGroupImpl::execute() { sl.kernels = kernels.array(); sl.count = kernels.size(); + uint32_t inLen; + const Allocation **ains; + + if (ins[0] == NULL) { + inLen = 0; + ains = NULL; + + } else { + inLen = 1; + ains = const_cast<const Allocation**>(&ins[0]); + } + Vector<const void *> usrPtrs; Vector<const void *> fnPtrs; Vector<uint32_t> sigs; @@ -225,7 +269,8 @@ void CpuScriptGroupImpl::execute() { fnPtrs.add((void *)mtls.kernel); usrPtrs.add(mtls.fep.usr); sigs.add(mtls.fep.usrLen); - si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); + si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], + mtls.fep.usr, mtls.fep.usrLen, NULL); } sl.sigs = sigs.array(); sl.usrPtrs = usrPtrs.array(); @@ -235,16 +280,20 @@ void CpuScriptGroupImpl::execute() { Script *s = kernels[0]->mScript; RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); - si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls); + + si->forEachMtlsSetup(ains, inLen, outs[0], NULL, 0, NULL, &mtls); + mtls.script = NULL; mtls.kernel = (void (*)())&scriptGroupRoot; mtls.fep.usr = &sl; - mCtx->launchThreads(ins[0], outs[0], NULL, &mtls); + + mCtx->launchThreads(ains, inLen, outs[0], NULL, &mtls); for (size_t ct=0; ct < kernels.size(); ct++) { Script *s = kernels[ct]->mScript; RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); - si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL); + si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], NULL, 0, + NULL); } } } diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h index 0076cb98..4728b7c8 100644 --- a/cpu_ref/rsd_cpu.h +++ b/cpu_ref/rsd_cpu.h @@ -69,21 +69,15 @@ public: virtual void populateScript(Script *) = 0; virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength) = 0; virtual int invokeRoot() = 0; + virtual void invokeForEach(uint32_t slot, - const Allocation * ain, - Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc) = 0; - - virtual void invokeForEachMulti(uint32_t slot, - const Allocation** ains, - uint32_t inLen, - Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc) = 0; - + const Allocation ** ains, + uint32_t inLen, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc) = 0; + virtual void invokeInit() = 0; virtual void invokeFreeChildren() = 0; diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp index 27029cf1..b7c7f2e5 100644 --- a/driver/rsdBcc.cpp +++ b/driver/rsdBcc.cpp @@ -43,8 +43,9 @@ bool rsdScriptInit(const Context *rsc, size_t bitcodeSize, uint32_t flags) { RsdHal *dc = (RsdHal *)rsc->mHal.drv; - RsdCpuReference::CpuScript * cs = dc->mCpuRef->createScript(script, resName, cacheDir, - bitcode, bitcodeSize, flags); + RsdCpuReference::CpuScript * cs = + dc->mCpuRef->createScript(script, resName, cacheDir, bitcode, + bitcodeSize, flags); if (cs == NULL) { return false; } @@ -53,7 +54,8 @@ bool rsdScriptInit(const Context *rsc, return true; } -bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) { +bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, + Element *e) { RsdHal *dc = (RsdHal *)rsc->mHal.drv; RsdCpuReference::CpuScript * cs = dc->mCpuRef->createIntrinsic(s, iid, e); if (cs == NULL) { @@ -73,8 +75,15 @@ void rsdScriptInvokeForEach(const Context *rsc, size_t usrLen, const RsScriptCall *sc) { - RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; - cs->invokeForEach(slot, ain, aout, usr, usrLen, sc); + if (ain == NULL) { + rsdScriptInvokeForEachMulti(rsc, s, slot, NULL, 0, aout, usr, usrLen, + sc); + } else { + const Allocation *ains[1] = {ain}; + + rsdScriptInvokeForEachMulti(rsc, s, slot, ains, 1, aout, usr, usrLen, + sc); + } } void rsdScriptInvokeForEachMulti(const Context *rsc, @@ -88,7 +97,7 @@ void rsdScriptInvokeForEachMulti(const Context *rsc, const RsScriptCall *sc) { RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; - cs->invokeForEachMulti(slot, ains, inLen, aout, usr, usrLen, sc); + cs->invokeForEach(slot, ains, inLen, aout, usr, usrLen, sc); } diff --git a/rsRuntime.h b/rsRuntime.h index eb93e252..5a058830 100644 --- a/rsRuntime.h +++ b/rsRuntime.h @@ -158,7 +158,7 @@ void rsrForEach(Context *, Script *target, Allocation *in, Allocation *out, const void *usr, - uint32_t usrBytes, + uint32_t usrBytes, const RsScriptCall *call); diff --git a/rsScript.cpp b/rsScript.cpp index ea1b3ac9..a4fa1966 100644 --- a/rsScript.cpp +++ b/rsScript.cpp @@ -187,23 +187,13 @@ void rsi_ScriptSetTimeZone(Context * rsc, RsScript vs, const char * timeZone, si free(tz); } -void rsi_ScriptForEach(Context *rsc, RsScript vs, uint32_t slot, - RsAllocation vain, RsAllocation vaout, - const void *params, size_t paramLen, - const RsScriptCall *sc, size_t scLen) { - Script *s = static_cast<Script *>(vs); - s->runForEach(rsc, slot, - static_cast<const Allocation *>(vain), static_cast<Allocation *>(vaout), - params, paramLen, sc); - -} - void rsi_ScriptForEachMulti(Context *rsc, RsScript vs, uint32_t slot, RsAllocation *vains, size_t inLen, RsAllocation vaout, const void *params, size_t paramLen, const RsScriptCall *sc, size_t scLen) { - Script *s = static_cast<Script *>(vs); + + Script *s = static_cast<Script *>(vs); Allocation **ains = (Allocation**)(vains); s->runForEach(rsc, slot, @@ -212,6 +202,23 @@ void rsi_ScriptForEachMulti(Context *rsc, RsScript vs, uint32_t slot, } +void rsi_ScriptForEach(Context *rsc, RsScript vs, uint32_t slot, + RsAllocation vain, RsAllocation vaout, + const void *params, size_t paramLen, + const RsScriptCall *sc, size_t scLen) { + + if (vain == NULL) { + rsi_ScriptForEachMulti(rsc, vs, slot, NULL, 0, vaout, params, paramLen, + sc, scLen); + } else { + RsAllocation ains[1] = {vain}; + + rsi_ScriptForEachMulti(rsc, vs, slot, ains, + sizeof(ains) / sizeof(RsAllocation), vaout, + params, paramLen, sc, scLen); + } +} + void rsi_ScriptInvoke(Context *rsc, RsScript vs, uint32_t slot) { Script *s = static_cast<Script *>(vs); s->Invoke(rsc, slot, NULL, 0); @@ -108,17 +108,9 @@ public: virtual bool freeChildren(); - virtual void runForEach(Context *rsc, - uint32_t slot, - const Allocation * ain, - Allocation * aout, - const void * usr, - size_t usrBytes, - const RsScriptCall *sc = NULL) = 0; - virtual void runForEach(Context* rsc, uint32_t slot, - const Allocation** ains, + const Allocation ** ains, size_t inLen, Allocation* aout, const void* usr, diff --git a/rsScriptC.cpp b/rsScriptC.cpp index e7ff8c7d..892807bd 100644 --- a/rsScriptC.cpp +++ b/rsScriptC.cpp @@ -156,36 +156,6 @@ uint32_t ScriptC::run(Context *rsc) { void ScriptC::runForEach(Context *rsc, uint32_t slot, - const Allocation * ain, - Allocation * aout, - const void * usr, - size_t usrBytes, - const RsScriptCall *sc) { - // Trace this function call. - // To avoid overhead, we only build the string, if tracing is actually - // enabled. - String8 *AString = NULL; - const char *String = ""; - if (ATRACE_ENABLED()) { - AString = new String8("runForEach_"); - AString->append(mHal.info.exportedForeachFuncList[slot].first); - String = AString->string(); - } - ATRACE_NAME(String); - (void)String; - - Context::PushState ps(rsc); - - setupGLState(rsc); - setupScript(rsc); - rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ain, aout, usr, usrBytes, sc); - - if (AString) - delete AString; -} - -void ScriptC::runForEach(Context *rsc, - uint32_t slot, const Allocation ** ains, size_t inLen, Allocation * aout, @@ -210,10 +180,22 @@ void ScriptC::runForEach(Context *rsc, setupGLState(rsc); setupScript(rsc); - rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen, aout, usr, usrBytes, sc); + if (rsc->mHal.funcs.script.invokeForEachMulti != NULL) { + rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen, + aout, usr, usrBytes, sc); + + } else if (inLen == 1) { + rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ains[0], aout, + usr, usrBytes, sc); - if (AString) + } else { + rsc->setError(RS_ERROR_FATAL_DRIVER, + "Driver support for multi-input not present"); + } + + if (AString) { delete AString; + } } void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) { diff --git a/rsScriptC.h b/rsScriptC.h index d3d9d513..5735bea3 100644 --- a/rsScriptC.h +++ b/rsScriptC.h @@ -44,14 +44,6 @@ public: virtual void runForEach(Context *rsc, uint32_t slot, - const Allocation * ain, - Allocation * aout, - const void * usr, - size_t usrBytes, - const RsScriptCall *sc = NULL); - - virtual void runForEach(Context *rsc, - uint32_t slot, const Allocation ** ains, size_t inLen, Allocation * aout, diff --git a/rsScriptC_Lib.cpp b/rsScriptC_Lib.cpp index a41f4a79..cacb37ad 100644 --- a/rsScriptC_Lib.cpp +++ b/rsScriptC_Lib.cpp @@ -230,7 +230,17 @@ void rsrForEach(Context *rsc, Allocation *in, Allocation *out, const void *usr, uint32_t usrBytes, const RsScriptCall *call) { - target->runForEach(rsc, /* root slot */ 0, in, out, usr, usrBytes, call); + + if (in == NULL) { + target->runForEach(rsc, /* root slot */ 0, NULL, 0, out, usr, + usrBytes, call); + + } else { + const Allocation *ins[1] = {in}; + target->runForEach(rsc, /* root slot */ 0, ins, + sizeof(ins) / sizeof(RsAllocation), out, usr, + usrBytes, call); + } } void rsrAllocationSyncAll(Context *rsc, Allocation *a, RsAllocationUsageType usage) { diff --git a/rsScriptGroup.cpp b/rsScriptGroup.cpp index d1dd9d81..a03cb786 100644 --- a/rsScriptGroup.cpp +++ b/rsScriptGroup.cpp @@ -346,7 +346,15 @@ void ScriptGroup::execute(Context *rsc) { } } - n->mScript->runForEach(rsc, k->mSlot, ain, aout, NULL, 0); + if (ain == NULL) { + n->mScript->runForEach(rsc, k->mSlot, NULL, 0, aout, NULL, 0); + + } else { + const Allocation *ains[1] = {ain}; + n->mScript->runForEach(rsc, k->mSlot, ains, + sizeof(ains) / sizeof(RsAllocation), + aout, NULL, 0); + } } } @@ -409,4 +417,3 @@ void rsi_ScriptGroupExecute(Context *rsc, RsScriptGroup sg) { } } - diff --git a/rsScriptIntrinsic.cpp b/rsScriptIntrinsic.cpp index 86f1c504..7461d34a 100644 --- a/rsScriptIntrinsic.cpp +++ b/rsScriptIntrinsic.cpp @@ -55,18 +55,6 @@ uint32_t ScriptIntrinsic::run(Context *rsc) { return 0; } - -void ScriptIntrinsic::runForEach(Context *rsc, - uint32_t slot, - const Allocation * ain, - Allocation * aout, - const void * usr, - size_t usrBytes, - const RsScriptCall *sc) { - - rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ain, aout, usr, usrBytes, sc); -} - void ScriptIntrinsic::runForEach(Context* rsc, uint32_t slot, const Allocation** ains, @@ -76,7 +64,18 @@ void ScriptIntrinsic::runForEach(Context* rsc, size_t usrBytes, const RsScriptCall* sc) { - rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen, aout, usr, usrBytes, sc); + if (rsc->mHal.funcs.script.invokeForEachMulti != NULL) { + rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen, + aout, usr, usrBytes, sc); + + } else if (inLen == 1) { + rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ains[0], aout, + usr, usrBytes, sc); + + } else { + rsc->setError(RS_ERROR_FATAL_DRIVER, + "Driver support for multi-input not present"); + } } void ScriptIntrinsic::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) { @@ -107,5 +106,3 @@ RsScript rsi_ScriptIntrinsicCreate(Context *rsc, uint32_t id, RsElement ve) { } } - - diff --git a/rsScriptIntrinsic.h b/rsScriptIntrinsic.h index 66b60318..87b73539 100644 --- a/rsScriptIntrinsic.h +++ b/rsScriptIntrinsic.h @@ -40,17 +40,9 @@ public: virtual RsA3DClassID getClassId() const; virtual bool freeChildren(); - virtual void runForEach(Context *rsc, - uint32_t slot, - const Allocation * ain, - Allocation * aout, - const void * usr, - size_t usrBytes, - const RsScriptCall *sc = NULL); - virtual void runForEach(Context* rsc, uint32_t slot, - const Allocation** ains, + const Allocation ** ains, size_t inLen, Allocation* aout, const void* usr, @@ -69,5 +61,3 @@ protected: } } #endif - - |