From ae2ec3febedfc29376b9104413fb4042028f1265 Mon Sep 17 00:00:00 2001 From: David Gross Date: Wed, 1 Jun 2016 14:45:47 -0700 Subject: Delete simple reduction implementation. Bug: 27298560 Change-Id: I8c3d568e98aaf0b7d86881c985d13ed5b8e95338 --- cpu_ref/rsCpuCore.cpp | 139 ++++++++++++++++++++++---------------------------- 1 file changed, 60 insertions(+), 79 deletions(-) (limited to 'cpu_ref/rsCpuCore.cpp') diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp index 011b8e3d..8fefe882 100644 --- a/cpu_ref/rsCpuCore.cpp +++ b/cpu_ref/rsCpuCore.cpp @@ -45,7 +45,7 @@ static pid_t gettid() { using namespace android; using namespace android::renderscript; -#define REDUCE_NEW_ALOGV(mtls, level, ...) do { if ((mtls)->logReduce >= (level)) ALOGV(__VA_ARGS__); } while(0) +#define REDUCE_ALOGV(mtls, level, ...) do { if ((mtls)->logReduce >= (level)) ALOGV(__VA_ARGS__); } while(0) static pthread_key_t gThreadTLSKey = 0; static uint32_t gThreadTLSKeyCount = 0; @@ -354,7 +354,7 @@ static inline void FepPtrSetup(const MTLaunchStructForEach *mtls, RsExpandKernel // mtls - The MTLaunchStruct holding information about the kernel launch // redp - The reduce parameters (driver info structure) // x, y, z - The start offsets into each dimension -static inline void RedpPtrSetup(const MTLaunchStructReduceNew *mtls, RsExpandKernelDriverInfo *redp, +static inline void RedpPtrSetup(const MTLaunchStructReduce *mtls, RsExpandKernelDriverInfo *redp, uint32_t x, uint32_t y, uint32_t z) { for (uint32_t i = 0; i < redp->inLen; i++) { redp->inPtr[i] = (const uint8_t *)mtls->ains[i]->getPointerUnchecked(x, y, z); @@ -508,8 +508,8 @@ static const char *format_bytes(FormatBuf *outBuf, const uint8_t *inBuf, const i return *outBuf; } -static void reduce_new_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructReduceNew *mtls, - const char *walkerName, uint32_t threadIdx) { +static void reduce_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructReduce *mtls, + const char *walkerName, uint32_t threadIdx) { rsAssert(!accumPtr); uint32_t accumIdx = (uint32_t)__sync_fetch_and_add(&mtls->accumCount, 1); @@ -522,8 +522,8 @@ static void reduce_new_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructR accumPtr = mtls->accumAlloc + mtls->accumStride * (accumIdx - 1); } } - REDUCE_NEW_ALOGV(mtls, 2, "%s(%p): idx = %u got accumCount %u and accumPtr %p", - walkerName, mtls->accumFunc, threadIdx, accumIdx, accumPtr); + REDUCE_ALOGV(mtls, 2, "%s(%p): idx = %u got accumCount %u and accumPtr %p", + walkerName, mtls->accumFunc, threadIdx, accumIdx, accumPtr); // initialize accumulator if (mtls->initFunc) { mtls->initFunc(accumPtr); @@ -532,18 +532,18 @@ static void reduce_new_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructR } } -static void walk_1d_reduce_new(void *usr, uint32_t idx) { - const MTLaunchStructReduceNew *mtls = (const MTLaunchStructReduceNew *)usr; +static void walk_1d_reduce(void *usr, uint32_t idx) { + const MTLaunchStructReduce *mtls = (const MTLaunchStructReduce *)usr; RsExpandKernelDriverInfo redp = mtls->redp; // find accumulator uint8_t *&accumPtr = mtls->accumPtr[idx]; if (!accumPtr) { - reduce_new_get_accumulator(accumPtr, mtls, __func__, idx); + reduce_get_accumulator(accumPtr, mtls, __func__, idx); } // accumulate - const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc; + const ReduceAccumulatorFunc_t fn = mtls->accumFunc; while (1) { uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); uint32_t xStart = mtls->start.x + slice * mtls->mSliceSize; @@ -566,23 +566,23 @@ static void walk_1d_reduce_new(void *usr, uint32_t idx) { } else { fmt[0] = 0; } - REDUCE_NEW_ALOGV(mtls, 2, "walk_1d_reduce_new(%p): idx = %u, x in [%u, %u)%s", - mtls->accumFunc, idx, xStart, xEnd, fmt); + REDUCE_ALOGV(mtls, 2, "walk_1d_reduce(%p): idx = %u, x in [%u, %u)%s", + mtls->accumFunc, idx, xStart, xEnd, fmt); } } -static void walk_2d_reduce_new(void *usr, uint32_t idx) { - const MTLaunchStructReduceNew *mtls = (const MTLaunchStructReduceNew *)usr; +static void walk_2d_reduce(void *usr, uint32_t idx) { + const MTLaunchStructReduce *mtls = (const MTLaunchStructReduce *)usr; RsExpandKernelDriverInfo redp = mtls->redp; // find accumulator uint8_t *&accumPtr = mtls->accumPtr[idx]; if (!accumPtr) { - reduce_new_get_accumulator(accumPtr, mtls, __func__, idx); + reduce_get_accumulator(accumPtr, mtls, __func__, idx); } // accumulate - const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc; + const ReduceAccumulatorFunc_t fn = mtls->accumFunc; while (1) { uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); uint32_t yStart = mtls->start.y + slice * mtls->mSliceSize; @@ -605,23 +605,23 @@ static void walk_2d_reduce_new(void *usr, uint32_t idx) { } else { fmt[0] = 0; } - REDUCE_NEW_ALOGV(mtls, 2, "walk_2d_reduce_new(%p): idx = %u, y in [%u, %u)%s", - mtls->accumFunc, idx, yStart, yEnd, fmt); + REDUCE_ALOGV(mtls, 2, "walk_2d_reduce(%p): idx = %u, y in [%u, %u)%s", + mtls->accumFunc, idx, yStart, yEnd, fmt); } } -static void walk_3d_reduce_new(void *usr, uint32_t idx) { - const MTLaunchStructReduceNew *mtls = (const MTLaunchStructReduceNew *)usr; +static void walk_3d_reduce(void *usr, uint32_t idx) { + const MTLaunchStructReduce *mtls = (const MTLaunchStructReduce *)usr; RsExpandKernelDriverInfo redp = mtls->redp; // find accumulator uint8_t *&accumPtr = mtls->accumPtr[idx]; if (!accumPtr) { - reduce_new_get_accumulator(accumPtr, mtls, __func__, idx); + reduce_get_accumulator(accumPtr, mtls, __func__, idx); } // accumulate - const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc; + const ReduceAccumulatorFunc_t fn = mtls->accumFunc; while (1) { uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); @@ -640,44 +640,25 @@ static void walk_3d_reduce_new(void *usr, uint32_t idx) { } else { fmt[0] = 0; } - REDUCE_NEW_ALOGV(mtls, 2, "walk_3d_reduce_new(%p): idx = %u, z = %u%s", - mtls->accumFunc, idx, redp.current.z, fmt); + REDUCE_ALOGV(mtls, 2, "walk_3d_reduce(%p): idx = %u, z = %u%s", + mtls->accumFunc, idx, redp.current.z, fmt); } } -// Launch a simple reduce-style kernel. -// Inputs: -// ain: The allocation that contains the input -// aout: The allocation that will hold the output -// mtls: Holds launch parameters -void RsdCpuReferenceImpl::launchReduce(const Allocation *ain, - Allocation *aout, - MTLaunchStructReduce *mtls) { - const uint32_t xStart = mtls->start.x; - const uint32_t xEnd = mtls->end.x; - - if (xStart >= xEnd) { - return; - } - - const uint32_t startOffset = ain->getType()->getElementSizeBytes() * xStart; - mtls->kernel(&mtls->inBuf[startOffset], mtls->outBuf, xEnd - xStart); -} - // Launch a general reduce-style kernel. // Inputs: // ains[0..inLen-1]: Array of allocations that contain the inputs // aout: The allocation that will hold the output // mtls: Holds launch parameters -void RsdCpuReferenceImpl::launchReduceNew(const Allocation ** ains, - uint32_t inLen, - Allocation * aout, - MTLaunchStructReduceNew *mtls) { +void RsdCpuReferenceImpl::launchReduce(const Allocation ** ains, + uint32_t inLen, + Allocation * aout, + MTLaunchStructReduce *mtls) { mtls->logReduce = mRSC->props.mLogReduce; if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInKernel) { - launchReduceNewParallel(ains, inLen, aout, mtls); + launchReduceParallel(ains, inLen, aout, mtls); } else { - launchReduceNewSerial(ains, inLen, aout, mtls); + launchReduceSerial(ains, inLen, aout, mtls); } } @@ -686,12 +667,12 @@ void RsdCpuReferenceImpl::launchReduceNew(const Allocation ** ains, // ains[0..inLen-1]: Array of allocations that contain the inputs // aout: The allocation that will hold the output // mtls: Holds launch parameters -void RsdCpuReferenceImpl::launchReduceNewSerial(const Allocation ** ains, - uint32_t inLen, - Allocation * aout, - MTLaunchStructReduceNew *mtls) { - REDUCE_NEW_ALOGV(mtls, 1, "launchReduceNewSerial(%p): %u x %u x %u", mtls->accumFunc, - mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z); +void RsdCpuReferenceImpl::launchReduceSerial(const Allocation ** ains, + uint32_t inLen, + Allocation * aout, + MTLaunchStructReduce *mtls) { + REDUCE_ALOGV(mtls, 1, "launchReduceSerial(%p): %u x %u x %u", mtls->accumFunc, + mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z); // In the presence of outconverter, we allocate temporary memory for // the accumulator. @@ -710,7 +691,7 @@ void RsdCpuReferenceImpl::launchReduceNewSerial(const Allocation ** ains, } // accumulate - const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc; + const ReduceAccumulatorFunc_t fn = mtls->accumFunc; uint32_t slice = 0; while (SelectOuterSlice(mtls, &mtls->redp, slice++)) { for (mtls->redp.current.y = mtls->start.y; @@ -733,13 +714,13 @@ void RsdCpuReferenceImpl::launchReduceNewSerial(const Allocation ** ains, // ains[0..inLen-1]: Array of allocations that contain the inputs // aout: The allocation that will hold the output // mtls: Holds launch parameters -void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains, - uint32_t inLen, - Allocation * aout, - MTLaunchStructReduceNew *mtls) { +void RsdCpuReferenceImpl::launchReduceParallel(const Allocation ** ains, + uint32_t inLen, + Allocation * aout, + MTLaunchStructReduce *mtls) { // For now, we don't know how to go parallel in the absence of a combiner. if (!mtls->combFunc) { - launchReduceNewSerial(ains, inLen, aout, mtls); + launchReduceSerial(ains, inLen, aout, mtls); return; } @@ -777,19 +758,19 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains, rsAssert(!mInKernel); mInKernel = true; - REDUCE_NEW_ALOGV(mtls, 1, "launchReduceNewParallel(%p): %u x %u x %u, %u threads, accumAlloc = %p", - mtls->accumFunc, - mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z, - numThreads, mtls->accumAlloc); + REDUCE_ALOGV(mtls, 1, "launchReduceParallel(%p): %u x %u x %u, %u threads, accumAlloc = %p", + mtls->accumFunc, + mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z, + numThreads, mtls->accumAlloc); if (mtls->redp.dim.z > 1) { mtls->mSliceSize = 1; - launchThreads(walk_3d_reduce_new, mtls); + launchThreads(walk_3d_reduce, mtls); } else if (mtls->redp.dim.y > 1) { mtls->mSliceSize = rsMax(1U, mtls->redp.dim.y / (numThreads * 4)); - launchThreads(walk_2d_reduce_new, mtls); + launchThreads(walk_2d_reduce, mtls); } else { mtls->mSliceSize = rsMax(1U, mtls->redp.dim.x / (numThreads * 4)); - launchThreads(walk_1d_reduce_new, mtls); + launchThreads(walk_1d_reduce, mtls); } mInKernel = false; @@ -804,12 +785,12 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains, if (mtls->combFunc) { if (mtls->logReduce >= 3) { FormatBuf fmt; - REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): accumulating into%s", - mtls->accumFunc, - format_bytes(&fmt, finalAccumPtr, mtls->accumSize)); - REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): accumulator[%d]%s", - mtls->accumFunc, idx, - format_bytes(&fmt, thisAccumPtr, mtls->accumSize)); + REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): accumulating into%s", + mtls->accumFunc, + format_bytes(&fmt, finalAccumPtr, mtls->accumSize)); + REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): accumulator[%d]%s", + mtls->accumFunc, idx, + format_bytes(&fmt, thisAccumPtr, mtls->accumSize)); } mtls->combFunc(finalAccumPtr, thisAccumPtr); } else { @@ -823,8 +804,8 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains, rsAssert(finalAccumPtr != nullptr); if (mtls->logReduce >= 3) { FormatBuf fmt; - REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): final accumulator%s", - mtls->accumFunc, format_bytes(&fmt, finalAccumPtr, mtls->accumSize)); + REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): final accumulator%s", + mtls->accumFunc, format_bytes(&fmt, finalAccumPtr, mtls->accumSize)); } // Outconvert @@ -832,9 +813,9 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains, mtls->outFunc(mtls->redp.outPtr[0], finalAccumPtr); if (mtls->logReduce >= 3) { FormatBuf fmt; - REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): final outconverted result%s", - mtls->accumFunc, - format_bytes(&fmt, mtls->redp.outPtr[0], mtls->redp.outStride[0])); + REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): final outconverted result%s", + mtls->accumFunc, + format_bytes(&fmt, mtls->redp.outPtr[0], mtls->redp.outStride[0])); } } -- cgit v1.2.3