diff options
Diffstat (limited to 'cpu_ref/rsCpuCore.cpp')
-rw-r--r-- | cpu_ref/rsCpuCore.cpp | 60 |
1 files changed, 39 insertions, 21 deletions
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp index 4367bd4a..48e8dbb1 100644 --- a/cpu_ref/rsCpuCore.cpp +++ b/cpu_ref/rsCpuCore.cpp @@ -45,11 +45,6 @@ static pid_t gettid() { using namespace android; using namespace android::renderscript; -typedef void (*outer_foreach_t)( - const RsExpandKernelDriverInfo *, - uint32_t x1, uint32_t x2, uint32_t outstep); - - static pthread_key_t gThreadTLSKey = 0; static uint32_t gThreadTLSKeyCount = 0; static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER; @@ -153,13 +148,15 @@ void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) { return nullptr; } +// Launch a kernel. +// The callback function is called to execute the kernel. void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) { mWorkers.mLaunchData = data; mWorkers.mLaunchCallback = cbk; // fast path for very small launches - MTLaunchStruct *mtls = (MTLaunchStruct *)data; - if (mtls && mtls->fep.dim.y <= 1 && mtls->end.x <= mtls->start.x + mtls->mSliceSize) { + MTLaunchStructCommon *mtls = (MTLaunchStructCommon *)data; + if (mtls && mtls->dimPtr->y <= 1 && mtls->end.x <= mtls->start.x + mtls->mSliceSize) { if (mWorkers.mLaunchCallback) { mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0); } @@ -220,7 +217,6 @@ static void GetCpuInfo() { bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn) { - mSymLookupFn = lfn; mScriptLookupFn = slfn; @@ -328,16 +324,19 @@ RsdCpuReferenceImpl::~RsdCpuReferenceImpl() { } -static inline void FepPtrSetup(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo *fep, +// Set up the appropriate input and output pointers to the kernel driver info structure. +// Inputs: +// mtls - The MTLaunchStruct holding information about the kernel launch +// fep - The forEach parameters (driver info structure) +// x, y, z, lod, face, a1, a2, a3, a4 - The start offsets into each dimension +static inline void FepPtrSetup(const MTLaunchStructForEach *mtls, RsExpandKernelDriverInfo *fep, uint32_t x, uint32_t y, uint32_t z = 0, uint32_t lod = 0, RsAllocationCubemapFace face = RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X, uint32_t a1 = 0, uint32_t a2 = 0, uint32_t a3 = 0, uint32_t a4 = 0) { - for (uint32_t i = 0; i < fep->inLen; i++) { fep->inPtr[i] = (const uint8_t *)mtls->ains[i]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4); } - if (mtls->aout[0] != nullptr) { fep->outPtr[0] = (uint8_t *)mtls->aout[0]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4); } @@ -356,7 +355,7 @@ static uint32_t sliceInt(uint32_t *p, uint32_t val, uint32_t start, uint32_t end return n; } -static bool SelectOuterSlice(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo* fep, uint32_t sliceNum) { +static bool SelectOuterSlice(const MTLaunchStructForEach *mtls, RsExpandKernelDriverInfo* fep, uint32_t sliceNum) { uint32_t r = sliceNum; r = sliceInt(&fep->current.z, r, mtls->start.z, mtls->end.z); @@ -371,10 +370,10 @@ static bool SelectOuterSlice(const MTLaunchStruct *mtls, RsExpandKernelDriverInf static void walk_general(void *usr, uint32_t idx) { - MTLaunchStruct *mtls = (MTLaunchStruct *)usr; + MTLaunchStructForEach *mtls = (MTLaunchStructForEach *)usr; RsExpandKernelDriverInfo fep = mtls->fep; fep.lid = idx; - outer_foreach_t fn = (outer_foreach_t) mtls->kernel; + ForEachFunc_t fn = mtls->kernel; while(1) { @@ -400,10 +399,10 @@ static void walk_general(void *usr, uint32_t idx) { } static void walk_2d(void *usr, uint32_t idx) { - MTLaunchStruct *mtls = (MTLaunchStruct *)usr; + MTLaunchStructForEach *mtls = (MTLaunchStructForEach *)usr; RsExpandKernelDriverInfo fep = mtls->fep; fep.lid = idx; - outer_foreach_t fn = (outer_foreach_t) mtls->kernel; + ForEachFunc_t fn = mtls->kernel; while (1) { uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); @@ -425,10 +424,10 @@ static void walk_2d(void *usr, uint32_t idx) { } static void walk_1d(void *usr, uint32_t idx) { - MTLaunchStruct *mtls = (MTLaunchStruct *)usr; + MTLaunchStructForEach *mtls = (MTLaunchStructForEach *)usr; RsExpandKernelDriverInfo fep = mtls->fep; fep.lid = idx; - outer_foreach_t fn = (outer_foreach_t) mtls->kernel; + ForEachFunc_t fn = mtls->kernel; while (1) { uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); @@ -447,11 +446,30 @@ static void walk_1d(void *usr, uint32_t idx) { } } -void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains, +// Launch a reduce-style kernel. +// Inputs: +// ain: The allocation that contains the input +// aout: The allocation that will hold the output +// mtls: Holds launch parameters +void RsdCpuReferenceImpl::launchReduce(const Allocation *ain, + Allocation *aout, + MTLaunchStructReduce *mtls) { + const uint32_t xStart = mtls->start.x; + const uint32_t xEnd = mtls->end.x; + + if (xStart >= xEnd) { + return; + } + + const uint32_t startOffset = ain->getType()->getElementSizeBytes() * xStart; + mtls->kernel(&mtls->inBuf[startOffset], mtls->outBuf, xEnd - xStart); +} + +void RsdCpuReferenceImpl::launchForEach(const Allocation ** ains, uint32_t inLen, Allocation* aout, const RsScriptCall* sc, - MTLaunchStruct* mtls) { + MTLaunchStructForEach* mtls) { //android::StopWatch kernel_time("kernel time"); @@ -519,7 +537,7 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains, mInForEach = false; } else { - outer_foreach_t fn = (outer_foreach_t) mtls->kernel; + ForEachFunc_t fn = mtls->kernel; uint32_t slice = 0; |