diff options
author | David Gross <dgross@google.com> | 2016-06-01 14:45:47 -0700 |
---|---|---|
committer | David Gross <dgross@google.com> | 2016-06-10 10:45:29 -0700 |
commit | ae2ec3febedfc29376b9104413fb4042028f1265 (patch) | |
tree | 36d8f632058bc158f48b4e148bcf41c177802d1b | |
parent | 8e70791ff732ce244077310bdfdaf75dc19baabc (diff) | |
download | rs-ae2ec3febedfc29376b9104413fb4042028f1265.tar.gz |
Delete simple reduction implementation.nougat-dev
Bug: 27298560
Change-Id: I8c3d568e98aaf0b7d86881c985d13ed5b8e95338
-rw-r--r-- | cpp/Script.cpp | 16 | ||||
-rw-r--r-- | cpp/rsCppStructs.h | 2 | ||||
-rw-r--r-- | cpp/rsDispatch.cpp | 13 | ||||
-rw-r--r-- | cpp/rsDispatch.h | 4 | ||||
-rw-r--r-- | cpu_ref/rsCpuCore.cpp | 139 | ||||
-rw-r--r-- | cpu_ref/rsCpuCore.h | 55 | ||||
-rw-r--r-- | cpu_ref/rsCpuExecutable.cpp | 77 | ||||
-rw-r--r-- | cpu_ref/rsCpuExecutable.h | 22 | ||||
-rw-r--r-- | cpu_ref/rsCpuScript.cpp | 81 | ||||
-rw-r--r-- | cpu_ref/rsCpuScript.h | 19 | ||||
-rw-r--r-- | cpu_ref/rsd_cpu.h | 7 | ||||
-rw-r--r-- | driver/rsdBcc.cpp | 13 | ||||
-rw-r--r-- | driver/rsdBcc.h | 11 | ||||
-rw-r--r-- | driver/rsdCore.cpp | 2 | ||||
-rw-r--r-- | libRS.map | 1 | ||||
-rw-r--r-- | rs.spec | 8 | ||||
-rw-r--r-- | rsDriverLoader.cpp | 1 | ||||
-rw-r--r-- | rsScript.cpp | 20 | ||||
-rw-r--r-- | rsScript.h | 8 | ||||
-rw-r--r-- | rsScriptC.cpp | 30 | ||||
-rw-r--r-- | rsScriptC.h | 7 | ||||
-rw-r--r-- | rsScriptIntrinsic.cpp | 8 | ||||
-rw-r--r-- | rsScriptIntrinsic.h | 7 | ||||
-rw-r--r-- | rs_hal.h | 9 |
24 files changed, 149 insertions, 411 deletions
diff --git a/cpp/Script.cpp b/cpp/Script.cpp index acea0c8a..52933f27 100644 --- a/cpp/Script.cpp +++ b/cpp/Script.cpp @@ -36,21 +36,6 @@ void Script::forEach(uint32_t slot, sp<const Allocation> ain, sp<const Allocatio tryDispatch(mRS, RS::dispatch->ScriptForEach(mRS->getContext(), getID(), slot, in_id, out_id, usr, usrLen, nullptr, 0)); } -void Script::reduce(uint32_t slot, sp<const Allocation> ain, sp<const Allocation> aout, - const RsScriptCall *sc) const { - if (RS::dispatch->ScriptReduce == nullptr) { - mRS->throwError(RS_ERROR_RUNTIME_ERROR, "Reduce is not supported at the current API level"); - return; - } - if (ain == nullptr || aout == nullptr) { - mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Both ain and aout are required to be non-null."); - return; - } - void *in_id = BaseObj::getObjID(ain); - void *out_id = BaseObj::getObjID(aout); - tryDispatch(mRS, RS::dispatch->ScriptReduce(mRS->getContext(), getID(), slot, in_id, out_id, sc, sc == nullptr ? 0 : sizeof(*sc))); -} - Script::Script(void *id, sp<RS> rs) : BaseObj(id, rs) { } @@ -71,4 +56,3 @@ void Script::setVar(uint32_t index, const void *v, size_t len) const { void Script::FieldBase::init(sp<RS> rs, uint32_t dimx, uint32_t usages) { mAllocation = Allocation::createSized(rs, mElement, dimx, RS_ALLOCATION_USAGE_SCRIPT | usages); } - diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h index 95d190b0..18023864 100644 --- a/cpp/rsCppStructs.h +++ b/cpp/rsCppStructs.h @@ -1700,8 +1700,6 @@ protected: Script(void *id, sp<RS> rs); void forEach(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out, const void *v, size_t) const; - void reduce(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out, - const RsScriptCall *sc) const; void bindAllocation(sp<Allocation> va, uint32_t slot) const; void setVar(uint32_t index, const void *, size_t len) const; void setVar(uint32_t index, sp<const BaseObj> o) const; diff --git a/cpp/rsDispatch.cpp b/cpp/rsDispatch.cpp index f6121459..5773903c 100644 --- a/cpp/rsDispatch.cpp +++ b/cpp/rsDispatch.cpp @@ -21,8 +21,7 @@ #include <limits.h> #define LOG_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "RS Dispatch", __VA_ARGS__); -#define REDUCE_API_LEVEL INT_MAX -#define REDUCE_NEW_API_LEVEL 24 +#define REDUCE_API_LEVEL 24 bool loadSymbols(void* handle, dispatchTable& dispatchTab, int targetApiLevel) { #ifdef __LP64__ @@ -101,7 +100,6 @@ bool loadSymbols(void* handle, dispatchTable& dispatchTab, int targetApiLevel) { dispatchTab.ScriptInvokeV = (ScriptInvokeVFnPtr)dlsym(handle, "rsScriptInvokeV"); dispatchTab.ScriptKernelIDCreate = (ScriptKernelIDCreateFnPtr)dlsym(handle, "rsScriptKernelIDCreate"); dispatchTab.ScriptReduce = (ScriptReduceFnPtr)dlsym(handle, "rsScriptReduce"); - dispatchTab.ScriptReduceNew = (ScriptReduceNewFnPtr)dlsym(handle, "rsScriptReduceNew"); dispatchTab.ScriptSetTimeZone = (ScriptSetTimeZoneFnPtr)dlsym(handle, "rsScriptSetTimeZone"); dispatchTab.ScriptSetVarD = (ScriptSetVarDFnPtr)dlsym(handle, "rsScriptSetVarD"); dispatchTab.ScriptSetVarF = (ScriptSetVarFFnPtr)dlsym(handle, "rsScriptSetVarF"); @@ -427,7 +425,7 @@ bool loadSymbols(void* handle, dispatchTable& dispatchTab, int targetApiLevel) { return false; } } - // TODO: Update the API level when reduce is added. + if (targetApiLevel >= REDUCE_API_LEVEL) { if (dispatchTab.ScriptReduce == nullptr) { LOG_ERR("Couldn't initialize dispatchTab.ScriptReduce"); @@ -435,13 +433,6 @@ bool loadSymbols(void* handle, dispatchTable& dispatchTab, int targetApiLevel) { } } - if (targetApiLevel >= REDUCE_NEW_API_LEVEL) { - if (dispatchTab.ScriptReduceNew == nullptr) { - LOG_ERR("Couldn't initialize dispatchTab.ScriptReduceNew"); - return false; - } - } - return true; } diff --git a/cpp/rsDispatch.h b/cpp/rsDispatch.h index 8f2df705..df12f320 100644 --- a/cpp/rsDispatch.h +++ b/cpp/rsDispatch.h @@ -77,8 +77,7 @@ typedef void (*ScriptInvokeFnPtr) (RsContext, RsScript, uint32_t); typedef void (*ScriptInvokeVFnPtr) (RsContext, RsScript, uint32_t, const void*, size_t); typedef void (*ScriptForEachFnPtr) (RsContext, RsScript, uint32_t, RsAllocation, RsAllocation, const void*, size_t, const RsScriptCall*, size_t); typedef void (*ScriptForEachMultiFnPtr) (RsContext, RsScript, uint32_t, RsAllocation*, size_t, RsAllocation, const void*, size_t, const RsScriptCall*, size_t); -typedef void (*ScriptReduceFnPtr) (RsContext, RsScript, uint32_t, RsAllocation, RsAllocation, const RsScriptCall*, size_t); -typedef void (*ScriptReduceNewFnPtr) (RsContext, RsScript, uint32_t, RsAllocation*, size_t, RsAllocation, const RsScriptCall*, size_t); +typedef void (*ScriptReduceFnPtr) (RsContext, RsScript, uint32_t, RsAllocation*, size_t, RsAllocation, const RsScriptCall*, size_t); typedef void (*ScriptSetVarIFnPtr) (RsContext, RsScript, uint32_t, int); typedef void (*ScriptSetVarObjFnPtr) (RsContext, RsScript, uint32_t, RsObjectBase); typedef void (*ScriptSetVarJFnPtr) (RsContext, RsScript, uint32_t, int64_t); @@ -173,7 +172,6 @@ struct dispatchTable { ScriptInvokeVFnPtr ScriptInvokeV; ScriptKernelIDCreateFnPtr ScriptKernelIDCreate; ScriptReduceFnPtr ScriptReduce; - ScriptReduceNewFnPtr ScriptReduceNew; ScriptSetTimeZoneFnPtr ScriptSetTimeZone; ScriptSetVarDFnPtr ScriptSetVarD; ScriptSetVarFFnPtr ScriptSetVarF; diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp index 011b8e3d..8fefe882 100644 --- a/cpu_ref/rsCpuCore.cpp +++ b/cpu_ref/rsCpuCore.cpp @@ -45,7 +45,7 @@ static pid_t gettid() { using namespace android; using namespace android::renderscript; -#define REDUCE_NEW_ALOGV(mtls, level, ...) do { if ((mtls)->logReduce >= (level)) ALOGV(__VA_ARGS__); } while(0) +#define REDUCE_ALOGV(mtls, level, ...) do { if ((mtls)->logReduce >= (level)) ALOGV(__VA_ARGS__); } while(0) static pthread_key_t gThreadTLSKey = 0; static uint32_t gThreadTLSKeyCount = 0; @@ -354,7 +354,7 @@ static inline void FepPtrSetup(const MTLaunchStructForEach *mtls, RsExpandKernel // mtls - The MTLaunchStruct holding information about the kernel launch // redp - The reduce parameters (driver info structure) // x, y, z - The start offsets into each dimension -static inline void RedpPtrSetup(const MTLaunchStructReduceNew *mtls, RsExpandKernelDriverInfo *redp, +static inline void RedpPtrSetup(const MTLaunchStructReduce *mtls, RsExpandKernelDriverInfo *redp, uint32_t x, uint32_t y, uint32_t z) { for (uint32_t i = 0; i < redp->inLen; i++) { redp->inPtr[i] = (const uint8_t *)mtls->ains[i]->getPointerUnchecked(x, y, z); @@ -508,8 +508,8 @@ static const char *format_bytes(FormatBuf *outBuf, const uint8_t *inBuf, const i return *outBuf; } -static void reduce_new_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructReduceNew *mtls, - const char *walkerName, uint32_t threadIdx) { +static void reduce_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructReduce *mtls, + const char *walkerName, uint32_t threadIdx) { rsAssert(!accumPtr); uint32_t accumIdx = (uint32_t)__sync_fetch_and_add(&mtls->accumCount, 1); @@ -522,8 +522,8 @@ static void reduce_new_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructR accumPtr = mtls->accumAlloc + mtls->accumStride * (accumIdx - 1); } } - REDUCE_NEW_ALOGV(mtls, 2, "%s(%p): idx = %u got accumCount %u and accumPtr %p", - walkerName, mtls->accumFunc, threadIdx, accumIdx, accumPtr); + REDUCE_ALOGV(mtls, 2, "%s(%p): idx = %u got accumCount %u and accumPtr %p", + walkerName, mtls->accumFunc, threadIdx, accumIdx, accumPtr); // initialize accumulator if (mtls->initFunc) { mtls->initFunc(accumPtr); @@ -532,18 +532,18 @@ static void reduce_new_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructR } } -static void walk_1d_reduce_new(void *usr, uint32_t idx) { - const MTLaunchStructReduceNew *mtls = (const MTLaunchStructReduceNew *)usr; +static void walk_1d_reduce(void *usr, uint32_t idx) { + const MTLaunchStructReduce *mtls = (const MTLaunchStructReduce *)usr; RsExpandKernelDriverInfo redp = mtls->redp; // find accumulator uint8_t *&accumPtr = mtls->accumPtr[idx]; if (!accumPtr) { - reduce_new_get_accumulator(accumPtr, mtls, __func__, idx); + reduce_get_accumulator(accumPtr, mtls, __func__, idx); } // accumulate - const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc; + const ReduceAccumulatorFunc_t fn = mtls->accumFunc; while (1) { uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); uint32_t xStart = mtls->start.x + slice * mtls->mSliceSize; @@ -566,23 +566,23 @@ static void walk_1d_reduce_new(void *usr, uint32_t idx) { } else { fmt[0] = 0; } - REDUCE_NEW_ALOGV(mtls, 2, "walk_1d_reduce_new(%p): idx = %u, x in [%u, %u)%s", - mtls->accumFunc, idx, xStart, xEnd, fmt); + REDUCE_ALOGV(mtls, 2, "walk_1d_reduce(%p): idx = %u, x in [%u, %u)%s", + mtls->accumFunc, idx, xStart, xEnd, fmt); } } -static void walk_2d_reduce_new(void *usr, uint32_t idx) { - const MTLaunchStructReduceNew *mtls = (const MTLaunchStructReduceNew *)usr; +static void walk_2d_reduce(void *usr, uint32_t idx) { + const MTLaunchStructReduce *mtls = (const MTLaunchStructReduce *)usr; RsExpandKernelDriverInfo redp = mtls->redp; // find accumulator uint8_t *&accumPtr = mtls->accumPtr[idx]; if (!accumPtr) { - reduce_new_get_accumulator(accumPtr, mtls, __func__, idx); + reduce_get_accumulator(accumPtr, mtls, __func__, idx); } // accumulate - const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc; + const ReduceAccumulatorFunc_t fn = mtls->accumFunc; while (1) { uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); uint32_t yStart = mtls->start.y + slice * mtls->mSliceSize; @@ -605,23 +605,23 @@ static void walk_2d_reduce_new(void *usr, uint32_t idx) { } else { fmt[0] = 0; } - REDUCE_NEW_ALOGV(mtls, 2, "walk_2d_reduce_new(%p): idx = %u, y in [%u, %u)%s", - mtls->accumFunc, idx, yStart, yEnd, fmt); + REDUCE_ALOGV(mtls, 2, "walk_2d_reduce(%p): idx = %u, y in [%u, %u)%s", + mtls->accumFunc, idx, yStart, yEnd, fmt); } } -static void walk_3d_reduce_new(void *usr, uint32_t idx) { - const MTLaunchStructReduceNew *mtls = (const MTLaunchStructReduceNew *)usr; +static void walk_3d_reduce(void *usr, uint32_t idx) { + const MTLaunchStructReduce *mtls = (const MTLaunchStructReduce *)usr; RsExpandKernelDriverInfo redp = mtls->redp; // find accumulator uint8_t *&accumPtr = mtls->accumPtr[idx]; if (!accumPtr) { - reduce_new_get_accumulator(accumPtr, mtls, __func__, idx); + reduce_get_accumulator(accumPtr, mtls, __func__, idx); } // accumulate - const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc; + const ReduceAccumulatorFunc_t fn = mtls->accumFunc; while (1) { uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); @@ -640,44 +640,25 @@ static void walk_3d_reduce_new(void *usr, uint32_t idx) { } else { fmt[0] = 0; } - REDUCE_NEW_ALOGV(mtls, 2, "walk_3d_reduce_new(%p): idx = %u, z = %u%s", - mtls->accumFunc, idx, redp.current.z, fmt); + REDUCE_ALOGV(mtls, 2, "walk_3d_reduce(%p): idx = %u, z = %u%s", + mtls->accumFunc, idx, redp.current.z, fmt); } } -// Launch a simple reduce-style kernel. -// Inputs: -// ain: The allocation that contains the input -// aout: The allocation that will hold the output -// mtls: Holds launch parameters -void RsdCpuReferenceImpl::launchReduce(const Allocation *ain, - Allocation *aout, - MTLaunchStructReduce *mtls) { - const uint32_t xStart = mtls->start.x; - const uint32_t xEnd = mtls->end.x; - - if (xStart >= xEnd) { - return; - } - - const uint32_t startOffset = ain->getType()->getElementSizeBytes() * xStart; - mtls->kernel(&mtls->inBuf[startOffset], mtls->outBuf, xEnd - xStart); -} - // Launch a general reduce-style kernel. // Inputs: // ains[0..inLen-1]: Array of allocations that contain the inputs // aout: The allocation that will hold the output // mtls: Holds launch parameters -void RsdCpuReferenceImpl::launchReduceNew(const Allocation ** ains, - uint32_t inLen, - Allocation * aout, - MTLaunchStructReduceNew *mtls) { +void RsdCpuReferenceImpl::launchReduce(const Allocation ** ains, + uint32_t inLen, + Allocation * aout, + MTLaunchStructReduce *mtls) { mtls->logReduce = mRSC->props.mLogReduce; if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInKernel) { - launchReduceNewParallel(ains, inLen, aout, mtls); + launchReduceParallel(ains, inLen, aout, mtls); } else { - launchReduceNewSerial(ains, inLen, aout, mtls); + launchReduceSerial(ains, inLen, aout, mtls); } } @@ -686,12 +667,12 @@ void RsdCpuReferenceImpl::launchReduceNew(const Allocation ** ains, // ains[0..inLen-1]: Array of allocations that contain the inputs // aout: The allocation that will hold the output // mtls: Holds launch parameters -void RsdCpuReferenceImpl::launchReduceNewSerial(const Allocation ** ains, - uint32_t inLen, - Allocation * aout, - MTLaunchStructReduceNew *mtls) { - REDUCE_NEW_ALOGV(mtls, 1, "launchReduceNewSerial(%p): %u x %u x %u", mtls->accumFunc, - mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z); +void RsdCpuReferenceImpl::launchReduceSerial(const Allocation ** ains, + uint32_t inLen, + Allocation * aout, + MTLaunchStructReduce *mtls) { + REDUCE_ALOGV(mtls, 1, "launchReduceSerial(%p): %u x %u x %u", mtls->accumFunc, + mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z); // In the presence of outconverter, we allocate temporary memory for // the accumulator. @@ -710,7 +691,7 @@ void RsdCpuReferenceImpl::launchReduceNewSerial(const Allocation ** ains, } // accumulate - const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc; + const ReduceAccumulatorFunc_t fn = mtls->accumFunc; uint32_t slice = 0; while (SelectOuterSlice(mtls, &mtls->redp, slice++)) { for (mtls->redp.current.y = mtls->start.y; @@ -733,13 +714,13 @@ void RsdCpuReferenceImpl::launchReduceNewSerial(const Allocation ** ains, // ains[0..inLen-1]: Array of allocations that contain the inputs // aout: The allocation that will hold the output // mtls: Holds launch parameters -void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains, - uint32_t inLen, - Allocation * aout, - MTLaunchStructReduceNew *mtls) { +void RsdCpuReferenceImpl::launchReduceParallel(const Allocation ** ains, + uint32_t inLen, + Allocation * aout, + MTLaunchStructReduce *mtls) { // For now, we don't know how to go parallel in the absence of a combiner. if (!mtls->combFunc) { - launchReduceNewSerial(ains, inLen, aout, mtls); + launchReduceSerial(ains, inLen, aout, mtls); return; } @@ -777,19 +758,19 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains, rsAssert(!mInKernel); mInKernel = true; - REDUCE_NEW_ALOGV(mtls, 1, "launchReduceNewParallel(%p): %u x %u x %u, %u threads, accumAlloc = %p", - mtls->accumFunc, - mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z, - numThreads, mtls->accumAlloc); + REDUCE_ALOGV(mtls, 1, "launchReduceParallel(%p): %u x %u x %u, %u threads, accumAlloc = %p", + mtls->accumFunc, + mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z, + numThreads, mtls->accumAlloc); if (mtls->redp.dim.z > 1) { mtls->mSliceSize = 1; - launchThreads(walk_3d_reduce_new, mtls); + launchThreads(walk_3d_reduce, mtls); } else if (mtls->redp.dim.y > 1) { mtls->mSliceSize = rsMax(1U, mtls->redp.dim.y / (numThreads * 4)); - launchThreads(walk_2d_reduce_new, mtls); + launchThreads(walk_2d_reduce, mtls); } else { mtls->mSliceSize = rsMax(1U, mtls->redp.dim.x / (numThreads * 4)); - launchThreads(walk_1d_reduce_new, mtls); + launchThreads(walk_1d_reduce, mtls); } mInKernel = false; @@ -804,12 +785,12 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains, if (mtls->combFunc) { if (mtls->logReduce >= 3) { FormatBuf fmt; - REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): accumulating into%s", - mtls->accumFunc, - format_bytes(&fmt, finalAccumPtr, mtls->accumSize)); - REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): accumulator[%d]%s", - mtls->accumFunc, idx, - format_bytes(&fmt, thisAccumPtr, mtls->accumSize)); + REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): accumulating into%s", + mtls->accumFunc, + format_bytes(&fmt, finalAccumPtr, mtls->accumSize)); + REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): accumulator[%d]%s", + mtls->accumFunc, idx, + format_bytes(&fmt, thisAccumPtr, mtls->accumSize)); } mtls->combFunc(finalAccumPtr, thisAccumPtr); } else { @@ -823,8 +804,8 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains, rsAssert(finalAccumPtr != nullptr); if (mtls->logReduce >= 3) { FormatBuf fmt; - REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): final accumulator%s", - mtls->accumFunc, format_bytes(&fmt, finalAccumPtr, mtls->accumSize)); + REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): final accumulator%s", + mtls->accumFunc, format_bytes(&fmt, finalAccumPtr, mtls->accumSize)); } // Outconvert @@ -832,9 +813,9 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains, mtls->outFunc(mtls->redp.outPtr[0], finalAccumPtr); if (mtls->logReduce >= 3) { FormatBuf fmt; - REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): final outconverted result%s", - mtls->accumFunc, - format_bytes(&fmt, mtls->redp.outPtr[0], mtls->redp.outStride[0])); + REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): final outconverted result%s", + mtls->accumFunc, + format_bytes(&fmt, mtls->redp.outPtr[0], mtls->redp.outStride[0])); } } diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h index 62882aa8..1515b77c 100644 --- a/cpu_ref/rsCpuCore.h +++ b/cpu_ref/rsCpuCore.h @@ -32,22 +32,21 @@ namespace renderscript { extern bool gArchUseSIMD; // Function types found in RenderScript code -typedef void (*ReduceFunc_t)(const uint8_t *inBuf, uint8_t *outBuf, uint32_t len); -typedef void (*ReduceNewAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum); -typedef void (*ReduceNewCombinerFunc_t)(uint8_t *accum, const uint8_t *other); -typedef void (*ReduceNewInitializerFunc_t)(uint8_t *accum); -typedef void (*ReduceNewOutConverterFunc_t)(uint8_t *out, const uint8_t *accum); +typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum); +typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other); +typedef void (*ReduceInitializerFunc_t)(uint8_t *accum); +typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum); typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride); typedef void (*InvokeFunc_t)(void *params); typedef void (*InitOrDtorFunc_t)(void); typedef int (*RootFunc_t)(void); -struct ReduceNewDescription { - ReduceNewAccumulatorFunc_t accumFunc; // expanded accumulator function - ReduceNewInitializerFunc_t initFunc; // user initializer function - ReduceNewCombinerFunc_t combFunc; // user combiner function - ReduceNewOutConverterFunc_t outFunc; // user outconverter function - size_t accumSize; // accumulator datum size, in bytes +struct ReduceDescription { + ReduceAccumulatorFunc_t accumFunc; // expanded accumulator function + ReduceInitializerFunc_t initFunc; // user initializer function + ReduceCombinerFunc_t combFunc; // user combiner function + ReduceOutConverterFunc_t outFunc; // user outconverter function + size_t accumSize; // accumulator datum size, in bytes }; // Internal driver callback used to execute a kernel @@ -75,8 +74,7 @@ struct MTLaunchStructCommon { RsLaunchDimensions start; RsLaunchDimensions end; // Points to MTLaunchStructForEach::fep::dim or - // MTLaunchStructReduce::inputDim or - // MTLaunchStructReduceNew::redp::dim. + // MTLaunchStructReduce::redp::dim. RsLaunchDimensions *dimPtr; }; @@ -90,22 +88,15 @@ struct MTLaunchStructForEach : public MTLaunchStructCommon { }; struct MTLaunchStructReduce : public MTLaunchStructCommon { - ReduceFunc_t kernel; - const uint8_t *inBuf; - uint8_t *outBuf; - RsLaunchDimensions inputDim; -}; - -struct MTLaunchStructReduceNew : public MTLaunchStructCommon { // Driver info structure RsExpandKernelDriverInfo redp; const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; - ReduceNewAccumulatorFunc_t accumFunc; - ReduceNewInitializerFunc_t initFunc; - ReduceNewCombinerFunc_t combFunc; - ReduceNewOutConverterFunc_t outFunc; + ReduceAccumulatorFunc_t accumFunc; + ReduceInitializerFunc_t initFunc; + ReduceCombinerFunc_t combFunc; + ReduceOutConverterFunc_t outFunc; size_t accumSize; // accumulator datum size in bytes @@ -174,13 +165,9 @@ public: void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout, const RsScriptCall *sc, MTLaunchStructForEach *mtls); - // Launch a simple reduce kernel - void launchReduce(const Allocation *ain, Allocation *aout, - MTLaunchStructReduce *mtls); - // Launch a general reduce kernel - void launchReduceNew(const Allocation ** ains, uint32_t inLen, Allocation *aout, - MTLaunchStructReduceNew *mtls); + void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout, + MTLaunchStructReduce *mtls); CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir, uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override; @@ -271,10 +258,10 @@ protected: long mPageSize; // Launch a general reduce kernel - void launchReduceNewSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout, - MTLaunchStructReduceNew *mtls); - void launchReduceNewParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout, - MTLaunchStructReduceNew *mtls); + void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout, + MTLaunchStructReduce *mtls); + void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout, + MTLaunchStructReduce *mtls); }; diff --git a/cpu_ref/rsCpuExecutable.cpp b/cpu_ref/rsCpuExecutable.cpp index ca9a4b62..3d5e6350 100644 --- a/cpu_ref/rsCpuExecutable.cpp +++ b/cpu_ref/rsCpuExecutable.cpp @@ -272,7 +272,6 @@ void* SharedLibraryUtils::loadSOHelper(const char *origName, const char *cacheDi #define EXPORT_FUNC_STR "exportFuncCount: " #define EXPORT_FOREACH_STR "exportForEachCount: " #define EXPORT_REDUCE_STR "exportReduceCount: " -#define EXPORT_REDUCE_NEW_STR "exportReduceNewCount: " #define OBJECT_SLOT_STR "objectSlotCount: " #define PRAGMA_STR "pragmaCount: " #define THREADABLE_STR "isThreadable: " @@ -311,7 +310,6 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( size_t funcCount = 0; size_t forEachCount = 0; size_t reduceCount = 0; - size_t reduceNewCount = 0; size_t objectSlotCount = 0; size_t pragmaCount = 0; bool isThreadable = true; @@ -322,8 +320,7 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( InvokeFunc_t* invokeFunctions = nullptr; ForEachFunc_t* forEachFunctions = nullptr; uint32_t* forEachSignatures = nullptr; - ReduceFunc_t* reduceFunctions = nullptr; - ReduceNewDescription* reduceNewDescriptions = nullptr; + ReduceDescription* reduceDescriptions = nullptr; const char ** pragmaKeys = nullptr; const char ** pragmaValues = nullptr; uint32_t checksum = 0; @@ -455,56 +452,21 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( } } - // Read simple reduce kernels - if (strgets(line, MAXLINE, &rsInfo) == nullptr) { - goto error; - } - if (sscanf(line, EXPORT_REDUCE_STR "%zu", &reduceCount) != 1) { - ALOGE("Invalid export reduce count!: %s", line); - goto error; - } - - reduceFunctions = new ReduceFunc_t[reduceCount]; - if (reduceFunctions == nullptr) { - goto error; - } - - for (size_t i = 0; i < reduceCount; ++i) { - if (strgets(line, MAXLINE, &rsInfo) == nullptr) { - goto error; - } - char *c = strrchr(line, '\n'); - if (c) { - *c = '\0'; - } - - // Lookup the expanded reduce kernel. - strncat(line, ".expand", MAXLINESTR-strlen(line)); - - reduceFunctions[i] = - reinterpret_cast<ReduceFunc_t>(dlsym(sharedObj, line)); - if (reduceFunctions[i] == nullptr) { - ALOGE("Failed to get function address for %s(): %s", - line, dlerror()); - goto error; - } - } - // Read general reduce kernels if (strgets(line, MAXLINE, &rsInfo) == nullptr) { goto error; } - if (sscanf(line, EXPORT_REDUCE_NEW_STR "%zu", &reduceNewCount) != 1) { + if (sscanf(line, EXPORT_REDUCE_STR "%zu", &reduceCount) != 1) { ALOGE("Invalid export reduce new count!: %s", line); goto error; } - reduceNewDescriptions = new ReduceNewDescription[reduceNewCount]; - if (reduceNewDescriptions == nullptr) { + reduceDescriptions = new ReduceDescription[reduceCount]; + if (reduceDescriptions == nullptr) { goto error; } - for (size_t i = 0; i < reduceNewCount; ++i) { + for (size_t i = 0; i < reduceCount; ++i) { static const char kNoName[] = "."; unsigned int tmpSig = 0; @@ -545,25 +507,25 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( // The current implementation does not use the signature // or reduce name. - reduceNewDescriptions[i].accumSize = tmpSize; + reduceDescriptions[i].accumSize = tmpSize; // Process the (optional) initializer. if (strcmp(tmpNameInitializer, kNoName)) { // Lookup the original user-written initializer. - if (!(reduceNewDescriptions[i].initFunc = - (ReduceNewInitializerFunc_t) dlsym(sharedObj, tmpNameInitializer))) { + if (!(reduceDescriptions[i].initFunc = + (ReduceInitializerFunc_t) dlsym(sharedObj, tmpNameInitializer))) { ALOGE("Failed to find initializer function address for %s(): %s", tmpNameInitializer, dlerror()); goto error; } } else { - reduceNewDescriptions[i].initFunc = nullptr; + reduceDescriptions[i].initFunc = nullptr; } // Lookup the expanded accumulator. strncat(tmpNameAccumulator, ".expand", MAXLINESTR-strlen(tmpNameAccumulator)); - if (!(reduceNewDescriptions[i].accumFunc = - (ReduceNewAccumulatorFunc_t) dlsym(sharedObj, tmpNameAccumulator))) { + if (!(reduceDescriptions[i].accumFunc = + (ReduceAccumulatorFunc_t) dlsym(sharedObj, tmpNameAccumulator))) { ALOGE("Failed to find accumulator function address for %s(): %s", tmpNameAccumulator, dlerror()); goto error; @@ -572,27 +534,27 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( // Process the (optional) combiner. if (strcmp(tmpNameCombiner, kNoName)) { // Lookup the original user-written combiner. - if (!(reduceNewDescriptions[i].combFunc = - (ReduceNewCombinerFunc_t) dlsym(sharedObj, tmpNameCombiner))) { + if (!(reduceDescriptions[i].combFunc = + (ReduceCombinerFunc_t) dlsym(sharedObj, tmpNameCombiner))) { ALOGE("Failed to find combiner function address for %s(): %s", tmpNameCombiner, dlerror()); goto error; } } else { - reduceNewDescriptions[i].combFunc = nullptr; + reduceDescriptions[i].combFunc = nullptr; } // Process the (optional) outconverter. if (strcmp(tmpNameOutConverter, kNoName)) { // Lookup the original user-written outconverter. - if (!(reduceNewDescriptions[i].outFunc = - (ReduceNewOutConverterFunc_t) dlsym(sharedObj, tmpNameOutConverter))) { + if (!(reduceDescriptions[i].outFunc = + (ReduceOutConverterFunc_t) dlsym(sharedObj, tmpNameOutConverter))) { ALOGE("Failed to find outconverter function address for %s(): %s", tmpNameOutConverter, dlerror()); goto error; } } else { - reduceNewDescriptions[i].outFunc = nullptr; + reduceDescriptions[i].outFunc = nullptr; } } @@ -726,8 +688,7 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( fieldAddress, fieldIsObject, fieldName, varCount, invokeFunctions, funcCount, forEachFunctions, forEachSignatures, forEachCount, - reduceFunctions, reduceCount, - reduceNewDescriptions, reduceNewCount, + reduceDescriptions, reduceCount, pragmaKeys, pragmaValues, pragmaCount, rsGlobalNames, rsGlobalAddresses, rsGlobalSizes, rsGlobalProperties, numEntries, isThreadable, checksum); @@ -745,8 +706,6 @@ error: delete[] pragmaKeys; #endif // RS_COMPATIBILITY_LIB - delete[] reduceFunctions; - delete[] forEachSignatures; delete[] forEachFunctions; diff --git a/cpu_ref/rsCpuExecutable.h b/cpu_ref/rsCpuExecutable.h index 72c352c2..90d37591 100644 --- a/cpu_ref/rsCpuExecutable.h +++ b/cpu_ref/rsCpuExecutable.h @@ -67,8 +67,7 @@ public: InvokeFunc_t* invokeFunctions, size_t funcCount, ForEachFunc_t* forEachFunctions, uint32_t* forEachSignatures, size_t forEachCount, - ReduceFunc_t* reduceFunctions, size_t reduceCount, - ReduceNewDescription *reduceNewDescriptions, size_t reduceNewCount, + ReduceDescription *reduceDescriptions, size_t reduceCount, const char** pragmaKeys, const char** pragmaValues, size_t pragmaCount, const char **globalNames, const void **globalAddresses, @@ -80,8 +79,7 @@ public: mInvokeFunctions(invokeFunctions), mFuncCount(funcCount), mForEachFunctions(forEachFunctions), mForEachSignatures(forEachSignatures), mForEachCount(forEachCount), - mReduceFunctions(reduceFunctions), mReduceCount(reduceCount), - mReduceNewDescriptions(reduceNewDescriptions), mReduceNewCount(reduceNewCount), + mReduceDescriptions(reduceDescriptions), mReduceCount(reduceCount), mPragmaKeys(pragmaKeys), mPragmaValues(pragmaValues), mPragmaCount(pragmaCount), mGlobalNames(globalNames), mGlobalAddresses(globalAddresses), mGlobalSizes(globalSizes), @@ -107,9 +105,7 @@ public: delete[] mPragmaValues; delete[] mPragmaKeys; - delete[] mReduceFunctions; - - delete[] mReduceNewDescriptions; + delete[] mReduceDescriptions; delete[] mForEachSignatures; delete[] mForEachFunctions; @@ -136,7 +132,6 @@ public: size_t getExportedFunctionCount() const { return mFuncCount; } size_t getExportedForEachCount() const { return mForEachCount; } size_t getExportedReduceCount() const { return mReduceCount; } - size_t getExportedReduceNewCount() const { return mReduceNewCount; } size_t getPragmaCount() const { return mPragmaCount; } void* getFieldAddress(int slot) const { return mFieldAddress[slot]; } @@ -149,10 +144,8 @@ public: ForEachFunc_t getForEachFunction(int slot) const { return mForEachFunctions[slot]; } uint32_t getForEachSignature(int slot) const { return mForEachSignatures[slot]; } - ReduceFunc_t getReduceFunction(int slot) const { return mReduceFunctions[slot]; } - - const ReduceNewDescription* getReduceNewDescription(int slot) const { - return &mReduceNewDescriptions[slot]; + const ReduceDescription* getReduceDescription(int slot) const { + return &mReduceDescriptions[slot]; } const char ** getPragmaKeys() const { return mPragmaKeys; } @@ -207,12 +200,9 @@ private: uint32_t* mForEachSignatures; size_t mForEachCount; - ReduceFunc_t* mReduceFunctions; + ReduceDescription* mReduceDescriptions; size_t mReduceCount; - ReduceNewDescription* mReduceNewDescriptions; - size_t mReduceNewCount; - const char ** mPragmaKeys; const char ** mPragmaValues; size_t mPragmaCount; diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp index 0400fab7..582b3424 100644 --- a/cpu_ref/rsCpuScript.cpp +++ b/cpu_ref/rsCpuScript.cpp @@ -500,7 +500,6 @@ void RsdCpuScriptImpl::populateScript(Script *script) { // Copy info over to runtime script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount(); script->mHal.info.exportedReduceCount = mScriptExec->getExportedReduceCount(); - script->mHal.info.exportedReduceNewCount = mScriptExec->getExportedReduceNewCount(); script->mHal.info.exportedForEachCount = mScriptExec->getExportedForEachCount(); script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount(); script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();; @@ -555,52 +554,14 @@ bool RsdCpuScriptImpl::setUpMtlsDimensions(MTLaunchStructCommon *mtls, return true; } -// Preliminary work to prepare a simple reduce-style kernel for launch. -bool RsdCpuScriptImpl::reduceMtlsSetup(const Allocation *ain, - const Allocation *aout, +// Preliminary work to prepare a general reduce-style kernel for launch. +bool RsdCpuScriptImpl::reduceMtlsSetup(const Allocation ** ains, + uint32_t inLen, + const Allocation * aout, const RsScriptCall *sc, MTLaunchStructReduce *mtls) { - rsAssert(ain && aout); - memset(mtls, 0, sizeof(MTLaunchStructReduce)); - mtls->dimPtr = &mtls->inputDim; - - if (allocationLODIsNull(ain) || allocationLODIsNull(aout)) { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, - "reduce called with a null allocation"); - return false; - } - - // Set up the dimensions of the input. - const Type *inType = ain->getType(); - mtls->inputDim.x = inType->getDimX(); - rsAssert(inType->getDimY() == 0); - - if (!setUpMtlsDimensions(mtls, mtls->inputDim, sc)) { - return false; - } - - mtls->rs = mCtx; - // Currently not threaded. - mtls->isThreadable = false; - mtls->mSliceNum = -1; - - // Set up input and output. - mtls->inBuf = static_cast<uint8_t *>(ain->getPointerUnchecked(0, 0)); - mtls->outBuf = static_cast<uint8_t *>(aout->getPointerUnchecked(0, 0)); - - rsAssert(mtls->inBuf && mtls->outBuf); - - return true; -} - -// Preliminary work to prepare a general reduce-style kernel for launch. -bool RsdCpuScriptImpl::reduceNewMtlsSetup(const Allocation ** ains, - uint32_t inLen, - const Allocation * aout, - const RsScriptCall *sc, - MTLaunchStructReduceNew *mtls) { rsAssert(ains && (inLen >= 1) && aout); - memset(mtls, 0, sizeof(MTLaunchStructReduceNew)); + memset(mtls, 0, sizeof(MTLaunchStructReduce)); mtls->dimPtr = &mtls->redp.dim; for (int index = inLen; --index >= 0;) { @@ -793,29 +754,15 @@ void RsdCpuScriptImpl::invokeForEach(uint32_t slot, } void RsdCpuScriptImpl::invokeReduce(uint32_t slot, - const Allocation *ain, + const Allocation ** ains, uint32_t inLen, Allocation *aout, const RsScriptCall *sc) { - MTLaunchStructReduce mtls; - - if (reduceMtlsSetup(ain, aout, sc, &mtls)) { - reduceKernelSetup(slot, &mtls); - RsdCpuScriptImpl *oldTLS = mCtx->setTLS(this); - mCtx->launchReduce(ain, aout, &mtls); - mCtx->setTLS(oldTLS); - } -} + MTLaunchStructReduce mtls; -void RsdCpuScriptImpl::invokeReduceNew(uint32_t slot, - const Allocation ** ains, uint32_t inLen, - Allocation *aout, - const RsScriptCall *sc) { - MTLaunchStructReduceNew mtls; - - if (reduceNewMtlsSetup(ains, inLen, aout, sc, &mtls)) { - reduceNewKernelSetup(slot, &mtls); + if (reduceMtlsSetup(ains, inLen, aout, sc, &mtls)) { + reduceKernelSetup(slot, &mtls); RsdCpuScriptImpl *oldTLS = mCtx->setTLS(this); - mCtx->launchReduceNew(ains, inLen, aout, &mtls); + mCtx->launchReduce(ains, inLen, aout, &mtls); mCtx->setTLS(oldTLS); } } @@ -829,15 +776,9 @@ void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStructForEach * void RsdCpuScriptImpl::reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls) { mtls->script = this; - mtls->kernel = mScriptExec->getReduceFunction(slot); - rsAssert(mtls->kernel != nullptr); -} - -void RsdCpuScriptImpl::reduceNewKernelSetup(uint32_t slot, MTLaunchStructReduceNew *mtls) { - mtls->script = this; mtls->redp.slot = slot; - const ReduceNewDescription *desc = mScriptExec->getReduceNewDescription(slot); + const ReduceDescription *desc = mScriptExec->getReduceDescription(slot); mtls->accumFunc = desc->accumFunc; mtls->initFunc = desc->initFunc; // might legally be nullptr mtls->combFunc = desc->combFunc; // might legally be nullptr diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h index 2909dab1..94345bd5 100644 --- a/cpu_ref/rsCpuScript.h +++ b/cpu_ref/rsCpuScript.h @@ -61,15 +61,10 @@ public: const RsScriptCall* sc) override; void invokeReduce(uint32_t slot, - const Allocation* ain, + const Allocation ** ains, uint32_t inLen, Allocation* aout, const RsScriptCall* sc) override; - void invokeReduceNew(uint32_t slot, - const Allocation ** ains, uint32_t inLen, - Allocation* aout, - const RsScriptCall* sc) override; - void invokeInit() override; void invokeFreeChildren() override; @@ -94,17 +89,11 @@ public: virtual void forEachKernelSetup(uint32_t slot, MTLaunchStructForEach *mtls); - // Build an MTLaunchStruct suitable for launching a simple reduce-style kernel. - bool reduceMtlsSetup(const Allocation *ain, const Allocation *aout, - const RsScriptCall *sc, MTLaunchStructReduce *mtls); - // Finalize an MTLaunchStruct for launching a simple reduce-style kernel. - virtual void reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls); - // Build an MTLaunchStruct suitable for launching a general reduce-style kernel. - bool reduceNewMtlsSetup(const Allocation ** ains, uint32_t inLen, const Allocation *aout, - const RsScriptCall *sc, MTLaunchStructReduceNew *mtls); + bool reduceMtlsSetup(const Allocation ** ains, uint32_t inLen, const Allocation *aout, + const RsScriptCall *sc, MTLaunchStructReduce *mtls); // Finalize an MTLaunchStruct for launching a general reduce-style kernel. - virtual void reduceNewKernelSetup(uint32_t slot, MTLaunchStructReduceNew *mtls); + virtual void reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls); const RsdCpuReference::CpuSymbol * lookupSymbolMath(const char *sym); static void * lookupRuntimeStub(void* pContext, char const* name); diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h index e226b934..a8d980e1 100644 --- a/cpu_ref/rsd_cpu.h +++ b/cpu_ref/rsd_cpu.h @@ -59,15 +59,10 @@ public: const RsScriptCall *sc) = 0; virtual void invokeReduce(uint32_t slot, - const Allocation *ain, + const Allocation ** ains, uint32_t inLen, Allocation *aout, const RsScriptCall *sc) = 0; - virtual void invokeReduceNew(uint32_t slot, - const Allocation ** ains, uint32_t inLen, - Allocation *aout, - const RsScriptCall *sc) = 0; - virtual void invokeInit() = 0; virtual void invokeFreeChildren() = 0; diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp index af8d6adf..5aa1c949 100644 --- a/driver/rsdBcc.cpp +++ b/driver/rsdBcc.cpp @@ -126,20 +126,11 @@ void rsdScriptInvokeFunction(const Context *dc, Script *s, void rsdScriptInvokeReduce(const Context *dc, Script *s, uint32_t slot, - const Allocation *ain, + const Allocation ** ains, size_t inLen, Allocation *aout, const RsScriptCall *sc) { RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; - cs->invokeReduce(slot, ain, aout, sc); -} - -void rsdScriptInvokeReduceNew(const Context *dc, Script *s, - uint32_t slot, - const Allocation ** ains, size_t inLen, - Allocation *aout, - const RsScriptCall *sc) { - RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; - cs->invokeReduceNew(slot, ains, inLen, aout, sc); + cs->invokeReduce(slot, ains, inLen, aout, sc); } void rsdScriptSetGlobalVar(const Context *dc, const Script *s, diff --git a/driver/rsdBcc.h b/driver/rsdBcc.h index a2bf8be0..c79f445a 100644 --- a/driver/rsdBcc.h +++ b/driver/rsdBcc.h @@ -46,18 +46,11 @@ void rsdScriptInvokeForEach(const android::renderscript::Context *rsc, void rsdScriptInvokeReduce(const android::renderscript::Context *rsc, android::renderscript::Script *s, uint32_t slot, - const android::renderscript::Allocation *ain, + const android::renderscript::Allocation ** ains, + size_t inLen, android::renderscript::Allocation *aout, const RsScriptCall *sc); -void rsdScriptInvokeReduceNew(const android::renderscript::Context *rsc, - android::renderscript::Script *s, - uint32_t slot, - const android::renderscript::Allocation ** ains, - size_t inLen, - android::renderscript::Allocation *aout, - const RsScriptCall *sc); - void rsdScriptInvokeForEachMulti(const android::renderscript::Context *rsc, android::renderscript::Script *s, uint32_t slot, diff --git a/driver/rsdCore.cpp b/driver/rsdCore.cpp index 503da5af..a5e942a9 100644 --- a/driver/rsdCore.cpp +++ b/driver/rsdCore.cpp @@ -101,8 +101,6 @@ extern "C" bool rsdHalQueryHal(RsHalInitEnums entry, void **fnPtr) { fnPtr[0] = (void *)rsdScriptUpdateCachedObject; break; case RS_HAL_SCRIPT_INVOKE_REDUCE: fnPtr[0] = (void *)rsdScriptInvokeReduce; break; - case RS_HAL_SCRIPT_INVOKE_REDUCE_NEW: - fnPtr[0] = (void *)rsdScriptInvokeReduceNew; break; case RS_HAL_ALLOCATION_INIT: fnPtr[0] = (void *)rsdAllocationInit; break; @@ -94,7 +94,6 @@ libRS { rsScriptInvokeV; rsScriptKernelIDCreate; rsScriptReduce; - rsScriptReduceNew; rsScriptSetTimeZone; rsScriptSetVarD; rsScriptSetVarF; @@ -414,14 +414,6 @@ ScriptForEachMulti { ScriptReduce { param RsScript s param uint32_t slot - param RsAllocation ain - param RsAllocation aout - param const RsScriptCall * sc -} - -ScriptReduceNew { - param RsScript s - param uint32_t slot param RsAllocation * ains param RsAllocation aout param const RsScriptCall * sc diff --git a/rsDriverLoader.cpp b/rsDriverLoader.cpp index 83c6cf62..16efa0d7 100644 --- a/rsDriverLoader.cpp +++ b/rsDriverLoader.cpp @@ -71,7 +71,6 @@ static bool LoadHalTable(Context *rsc, HalQueryHal fn, bool loadGraphics) { ret &= fn(RS_HAL_SCRIPT_INVOKE_ROOT, (void **)&rsc->mHal.funcs.script.invokeRoot); ret &= fn(RS_HAL_SCRIPT_INVOKE_FOR_EACH, (void **)&rsc->mHal.funcs.script.invokeForEach); ret &= fn(RS_HAL_SCRIPT_INVOKE_REDUCE, (void **)&rsc->mHal.funcs.script.invokeReduce); - ret &= fn(RS_HAL_SCRIPT_INVOKE_REDUCE_NEW, (void **)&rsc->mHal.funcs.script.invokeReduceNew); ret &= fn(RS_HAL_SCRIPT_INVOKE_INIT, (void **)&rsc->mHal.funcs.script.invokeInit); ret &= fn(RS_HAL_SCRIPT_INVOKE_FREE_CHILDREN, (void **)&rsc->mHal.funcs.script.invokeFreeChildren); ret &= fn(RS_HAL_SCRIPT_SET_GLOBAL_VAR, (void **)&rsc->mHal.funcs.script.setGlobalVar); diff --git a/rsScript.cpp b/rsScript.cpp index bf28328b..4c2f52f0 100644 --- a/rsScript.cpp +++ b/rsScript.cpp @@ -225,23 +225,15 @@ void rsi_ScriptForEach(Context *rsc, RsScript vs, uint32_t slot, } void rsi_ScriptReduce(Context *rsc, RsScript vs, uint32_t slot, - RsAllocation vain, RsAllocation vaout, - const RsScriptCall *sc, size_t scLen) { - Script *s = static_cast<Script *>(vs); - s->runReduce(rsc, slot, static_cast<const Allocation *>(vain), - static_cast<Allocation *>(vaout), sc); -} - -void rsi_ScriptReduceNew(Context *rsc, RsScript vs, uint32_t slot, - RsAllocation *vains, size_t inLen, - RsAllocation vaout, const RsScriptCall *sc, - size_t scLen) { + RsAllocation *vains, size_t inLen, + RsAllocation vaout, const RsScriptCall *sc, + size_t scLen) { Script *s = static_cast<Script *>(vs); Allocation **ains = (Allocation**)(vains); - s->runReduceNew(rsc, slot, - const_cast<const Allocation **>(ains), inLen, - static_cast<Allocation *>(vaout), sc); + s->runReduce(rsc, slot, + const_cast<const Allocation **>(ains), inLen, + static_cast<Allocation *>(vaout), sc); } void rsi_ScriptInvoke(Context *rsc, RsScript vs, uint32_t slot) { @@ -86,7 +86,6 @@ public: size_t exportedVariableCount; size_t exportedForEachCount; size_t exportedReduceCount; - size_t exportedReduceNewCount; size_t exportedFunctionCount; size_t exportedPragmaCount; char const **exportedPragmaKeyList; @@ -133,13 +132,10 @@ public: size_t usrBytes, const RsScriptCall *sc = nullptr) = 0; - virtual void runReduce(Context *rsc, uint32_t slot, const Allocation *ain, + virtual void runReduce(Context *rsc, uint32_t slot, + const Allocation **ains, size_t inLen, Allocation *aout, const RsScriptCall *sc) = 0; - virtual void runReduceNew(Context *rsc, uint32_t slot, - const Allocation **ains, size_t inLen, - Allocation *aout, const RsScriptCall *sc) = 0; - virtual void Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) = 0; virtual void setupScript(Context *rsc) = 0; virtual uint32_t run(Context *) = 0; diff --git a/rsScriptC.cpp b/rsScriptC.cpp index d2d5b1fd..c0f858a2 100644 --- a/rsScriptC.cpp +++ b/rsScriptC.cpp @@ -239,33 +239,13 @@ void ScriptC::runForEach(Context *rsc, } } -void ScriptC::runReduce(Context *rsc, uint32_t slot, const Allocation *ain, +void ScriptC::runReduce(Context *rsc, uint32_t slot, + const Allocation ** ains, size_t inLen, Allocation *aout, const RsScriptCall *sc) { - // TODO: Record the name of the kernel in the tracing information. - ATRACE_CALL(); - - if (slot >= mHal.info.exportedReduceCount) { - rsc->setError(RS_ERROR_BAD_SCRIPT, "The simple reduce kernel index is out of bounds"); - return; - } - if (mRSC->hadFatalError()) return; - - setupScript(rsc); - - if (rsc->props.mLogScripts) { - ALOGV("%p ScriptC::runReduce invoking slot %i, ptr %p", rsc, slot, this); - } - - rsc->mHal.funcs.script.invokeReduce(rsc, this, slot, ain, aout, sc); -} - -void ScriptC::runReduceNew(Context *rsc, uint32_t slot, - const Allocation ** ains, size_t inLen, - Allocation *aout, const RsScriptCall *sc) { // TODO: Record the name of the kernel in the tracing information. ATRACE_CALL(); - if (slot >= mHal.info.exportedReduceNewCount) { + if (slot >= mHal.info.exportedReduceCount) { rsc->setError(RS_ERROR_BAD_SCRIPT, "The general reduce kernel index is out of bounds"); return; } @@ -274,10 +254,10 @@ void ScriptC::runReduceNew(Context *rsc, uint32_t slot, setupScript(rsc); if (rsc->props.mLogScripts) { - ALOGV("%p ScriptC::runReduceNew invoking slot %i, ptr %p", rsc, slot, this); + ALOGV("%p ScriptC::runReduce invoking slot %i, ptr %p", rsc, slot, this); } - rsc->mHal.funcs.script.invokeReduceNew(rsc, this, slot, ains, inLen, aout, sc); + rsc->mHal.funcs.script.invokeReduce(rsc, this, slot, ains, inLen, aout, sc); } void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) { diff --git a/rsScriptC.h b/rsScriptC.h index c8881a4b..6c34215b 100644 --- a/rsScriptC.h +++ b/rsScriptC.h @@ -47,13 +47,10 @@ public: size_t usrBytes, const RsScriptCall *sc = nullptr) override; - void runReduce(Context *rsc, uint32_t slot, const Allocation *ain, + void runReduce(Context *rsc, uint32_t slot, + const Allocation ** ains, size_t inLen, Allocation *aout, const RsScriptCall *sc) override; - void runReduceNew(Context *rsc, uint32_t slot, - const Allocation ** ains, size_t inLen, - Allocation *aout, const RsScriptCall *sc) override; - virtual void serialize(Context *rsc, OStream *stream) const { } virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_SCRIPT_C; } static Type *createFromStream(Context *rsc, IStream *stream) { return nullptr; } diff --git a/rsScriptIntrinsic.cpp b/rsScriptIntrinsic.cpp index 6e0f6ae3..0122a718 100644 --- a/rsScriptIntrinsic.cpp +++ b/rsScriptIntrinsic.cpp @@ -68,15 +68,11 @@ void ScriptIntrinsic::runForEach(Context* rsc, aout, usr, usrBytes, sc); } -void ScriptIntrinsic::runReduce(Context *rsc, uint32_t slot, const Allocation *ain, +void ScriptIntrinsic::runReduce(Context *rsc, uint32_t slot, + const Allocation ** ains, size_t inLen, Allocation *aout, const RsScriptCall *sc) { } -void ScriptIntrinsic::runReduceNew(Context *rsc, uint32_t slot, - const Allocation ** ains, size_t inLen, - Allocation *aout, const RsScriptCall *sc) { -} - void ScriptIntrinsic::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) { } diff --git a/rsScriptIntrinsic.h b/rsScriptIntrinsic.h index e2b04b86..9b4f9d3b 100644 --- a/rsScriptIntrinsic.h +++ b/rsScriptIntrinsic.h @@ -49,13 +49,10 @@ public: size_t usrBytes, const RsScriptCall* sc = nullptr) override; - void runReduce(Context *rsc, uint32_t slot, const Allocation *ain, + void runReduce(Context *rsc, uint32_t slot, + const Allocation ** ains, size_t inLen, Allocation *aout, const RsScriptCall *sc) override; - void runReduceNew(Context *rsc, uint32_t slot, - const Allocation ** ains, size_t inLen, - Allocation *aout, const RsScriptCall *sc) override; - void Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) override; void setupScript(Context *rsc) override; uint32_t run(Context *) override; @@ -172,14 +172,10 @@ typedef struct { size_t usrLen, const RsScriptCall *sc); void (*invokeReduce)(const Context *rsc, Script *s, - uint32_t slot, const Allocation *ain, + uint32_t slot, + const Allocation ** ains, size_t inLen, Allocation *aout, const RsScriptCall *sc); - void (*invokeReduceNew)(const Context *rsc, Script *s, - uint32_t slot, - const Allocation ** ains, size_t inLen, - Allocation *aout, - const RsScriptCall *sc); void (*invokeInit)(const Context *rsc, Script *s); void (*invokeFreeChildren)(const Context *rsc, Script *s); @@ -412,7 +408,6 @@ enum RsHalInitEnums { RS_HAL_SCRIPT_INVOKE_FOR_EACH_MULTI = 1013, RS_HAL_SCRIPT_UPDATE_CACHED_OBJECT = 1014, RS_HAL_SCRIPT_INVOKE_REDUCE = 1015, - RS_HAL_SCRIPT_INVOKE_REDUCE_NEW = 1016, RS_HAL_ALLOCATION_INIT = 2000, RS_HAL_ALLOCATION_INIT_ADAPTER = 2001, |