summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuCore.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'cpu_ref/rsCpuCore.cpp')
-rw-r--r--cpu_ref/rsCpuCore.cpp60
1 files changed, 39 insertions, 21 deletions
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 4367bd4a..48e8dbb1 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -45,11 +45,6 @@ static pid_t gettid() {
using namespace android;
using namespace android::renderscript;
-typedef void (*outer_foreach_t)(
- const RsExpandKernelDriverInfo *,
- uint32_t x1, uint32_t x2, uint32_t outstep);
-
-
static pthread_key_t gThreadTLSKey = 0;
static uint32_t gThreadTLSKeyCount = 0;
static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER;
@@ -153,13 +148,15 @@ void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) {
return nullptr;
}
+// Launch a kernel.
+// The callback function is called to execute the kernel.
void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) {
mWorkers.mLaunchData = data;
mWorkers.mLaunchCallback = cbk;
// fast path for very small launches
- MTLaunchStruct *mtls = (MTLaunchStruct *)data;
- if (mtls && mtls->fep.dim.y <= 1 && mtls->end.x <= mtls->start.x + mtls->mSliceSize) {
+ MTLaunchStructCommon *mtls = (MTLaunchStructCommon *)data;
+ if (mtls && mtls->dimPtr->y <= 1 && mtls->end.x <= mtls->start.x + mtls->mSliceSize) {
if (mWorkers.mLaunchCallback) {
mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
}
@@ -220,7 +217,6 @@ static void GetCpuInfo() {
bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor,
sym_lookup_t lfn, script_lookup_t slfn) {
-
mSymLookupFn = lfn;
mScriptLookupFn = slfn;
@@ -328,16 +324,19 @@ RsdCpuReferenceImpl::~RsdCpuReferenceImpl() {
}
-static inline void FepPtrSetup(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo *fep,
+// Set up the appropriate input and output pointers to the kernel driver info structure.
+// Inputs:
+// mtls - The MTLaunchStruct holding information about the kernel launch
+// fep - The forEach parameters (driver info structure)
+// x, y, z, lod, face, a1, a2, a3, a4 - The start offsets into each dimension
+static inline void FepPtrSetup(const MTLaunchStructForEach *mtls, RsExpandKernelDriverInfo *fep,
uint32_t x, uint32_t y,
uint32_t z = 0, uint32_t lod = 0,
RsAllocationCubemapFace face = RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,
uint32_t a1 = 0, uint32_t a2 = 0, uint32_t a3 = 0, uint32_t a4 = 0) {
-
for (uint32_t i = 0; i < fep->inLen; i++) {
fep->inPtr[i] = (const uint8_t *)mtls->ains[i]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4);
}
-
if (mtls->aout[0] != nullptr) {
fep->outPtr[0] = (uint8_t *)mtls->aout[0]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4);
}
@@ -356,7 +355,7 @@ static uint32_t sliceInt(uint32_t *p, uint32_t val, uint32_t start, uint32_t end
return n;
}
-static bool SelectOuterSlice(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo* fep, uint32_t sliceNum) {
+static bool SelectOuterSlice(const MTLaunchStructForEach *mtls, RsExpandKernelDriverInfo* fep, uint32_t sliceNum) {
uint32_t r = sliceNum;
r = sliceInt(&fep->current.z, r, mtls->start.z, mtls->end.z);
@@ -371,10 +370,10 @@ static bool SelectOuterSlice(const MTLaunchStruct *mtls, RsExpandKernelDriverInf
static void walk_general(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+ MTLaunchStructForEach *mtls = (MTLaunchStructForEach *)usr;
RsExpandKernelDriverInfo fep = mtls->fep;
fep.lid = idx;
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
+ ForEachFunc_t fn = mtls->kernel;
while(1) {
@@ -400,10 +399,10 @@ static void walk_general(void *usr, uint32_t idx) {
}
static void walk_2d(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+ MTLaunchStructForEach *mtls = (MTLaunchStructForEach *)usr;
RsExpandKernelDriverInfo fep = mtls->fep;
fep.lid = idx;
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
+ ForEachFunc_t fn = mtls->kernel;
while (1) {
uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
@@ -425,10 +424,10 @@ static void walk_2d(void *usr, uint32_t idx) {
}
static void walk_1d(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+ MTLaunchStructForEach *mtls = (MTLaunchStructForEach *)usr;
RsExpandKernelDriverInfo fep = mtls->fep;
fep.lid = idx;
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
+ ForEachFunc_t fn = mtls->kernel;
while (1) {
uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
@@ -447,11 +446,30 @@ static void walk_1d(void *usr, uint32_t idx) {
}
}
-void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains,
+// Launch a reduce-style kernel.
+// Inputs:
+// ain: The allocation that contains the input
+// aout: The allocation that will hold the output
+// mtls: Holds launch parameters
+void RsdCpuReferenceImpl::launchReduce(const Allocation *ain,
+ Allocation *aout,
+ MTLaunchStructReduce *mtls) {
+ const uint32_t xStart = mtls->start.x;
+ const uint32_t xEnd = mtls->end.x;
+
+ if (xStart >= xEnd) {
+ return;
+ }
+
+ const uint32_t startOffset = ain->getType()->getElementSizeBytes() * xStart;
+ mtls->kernel(&mtls->inBuf[startOffset], mtls->outBuf, xEnd - xStart);
+}
+
+void RsdCpuReferenceImpl::launchForEach(const Allocation ** ains,
uint32_t inLen,
Allocation* aout,
const RsScriptCall* sc,
- MTLaunchStruct* mtls) {
+ MTLaunchStructForEach* mtls) {
//android::StopWatch kernel_time("kernel time");
@@ -519,7 +537,7 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains,
mInForEach = false;
} else {
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
+ ForEachFunc_t fn = mtls->kernel;
uint32_t slice = 0;