summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Wala <wala@google.com>2015-07-30 17:30:25 -0700
committerMatt Wala <wala@google.com>2015-08-14 12:24:24 -0700
commit14ce007a633b10e3b9a3fae29d8f53a7e8c9b59f (patch)
treed9089fc8c60a65d071cb817b156cc0edc3bb5015
parent2b4632b580c756af046ee4b9a6ecc77a01388d4e (diff)
downloadrs-14ce007a633b10e3b9a3fae29d8f53a7e8c9b59f.tar.gz
Add a basic implementation of the reduce kernel API to the CPU
reference implementation. Bug: 22631253 For now, this just runs a serial reduction on one thread. Change-Id: I34c96d24bb6f44274de72bb53160abcf79d143b0
-rw-r--r--cpu_ref/rsCpuCore.cpp60
-rw-r--r--cpu_ref/rsCpuCore.h54
-rw-r--r--cpu_ref/rsCpuExecutable.cpp43
-rw-r--r--cpu_ref/rsCpuExecutable.h10
-rw-r--r--cpu_ref/rsCpuIntrinsic.cpp10
-rw-r--r--cpu_ref/rsCpuIntrinsic.h4
-rw-r--r--cpu_ref/rsCpuScript.cpp223
-rw-r--r--cpu_ref/rsCpuScript.h34
-rw-r--r--cpu_ref/rsCpuScriptGroup.cpp8
-rw-r--r--cpu_ref/rsCpuScriptGroup2.cpp12
-rw-r--r--cpu_ref/rsd_cpu.h5
-rw-r--r--driver/rsdBcc.cpp9
-rw-r--r--driver/rsdBcc.h7
-rw-r--r--driver/rsdCore.cpp2
-rw-r--r--rsDriverLoader.cpp1
-rw-r--r--rsScript.cpp4
-rw-r--r--rsScript.h5
-rw-r--r--rsScriptC.cpp32
-rw-r--r--rsScriptC.h3
-rw-r--r--rsScriptIntrinsic.cpp5
-rw-r--r--rsScriptIntrinsic.h35
-rw-r--r--rs_hal.h5
22 files changed, 382 insertions, 189 deletions
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 4367bd4a..48e8dbb1 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -45,11 +45,6 @@ static pid_t gettid() {
using namespace android;
using namespace android::renderscript;
-typedef void (*outer_foreach_t)(
- const RsExpandKernelDriverInfo *,
- uint32_t x1, uint32_t x2, uint32_t outstep);
-
-
static pthread_key_t gThreadTLSKey = 0;
static uint32_t gThreadTLSKeyCount = 0;
static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER;
@@ -153,13 +148,15 @@ void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) {
return nullptr;
}
+// Launch a kernel.
+// The callback function is called to execute the kernel.
void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) {
mWorkers.mLaunchData = data;
mWorkers.mLaunchCallback = cbk;
// fast path for very small launches
- MTLaunchStruct *mtls = (MTLaunchStruct *)data;
- if (mtls && mtls->fep.dim.y <= 1 && mtls->end.x <= mtls->start.x + mtls->mSliceSize) {
+ MTLaunchStructCommon *mtls = (MTLaunchStructCommon *)data;
+ if (mtls && mtls->dimPtr->y <= 1 && mtls->end.x <= mtls->start.x + mtls->mSliceSize) {
if (mWorkers.mLaunchCallback) {
mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
}
@@ -220,7 +217,6 @@ static void GetCpuInfo() {
bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor,
sym_lookup_t lfn, script_lookup_t slfn) {
-
mSymLookupFn = lfn;
mScriptLookupFn = slfn;
@@ -328,16 +324,19 @@ RsdCpuReferenceImpl::~RsdCpuReferenceImpl() {
}
-static inline void FepPtrSetup(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo *fep,
+// Set up the appropriate input and output pointers to the kernel driver info structure.
+// Inputs:
+// mtls - The MTLaunchStruct holding information about the kernel launch
+// fep - The forEach parameters (driver info structure)
+// x, y, z, lod, face, a1, a2, a3, a4 - The start offsets into each dimension
+static inline void FepPtrSetup(const MTLaunchStructForEach *mtls, RsExpandKernelDriverInfo *fep,
uint32_t x, uint32_t y,
uint32_t z = 0, uint32_t lod = 0,
RsAllocationCubemapFace face = RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,
uint32_t a1 = 0, uint32_t a2 = 0, uint32_t a3 = 0, uint32_t a4 = 0) {
-
for (uint32_t i = 0; i < fep->inLen; i++) {
fep->inPtr[i] = (const uint8_t *)mtls->ains[i]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4);
}
-
if (mtls->aout[0] != nullptr) {
fep->outPtr[0] = (uint8_t *)mtls->aout[0]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4);
}
@@ -356,7 +355,7 @@ static uint32_t sliceInt(uint32_t *p, uint32_t val, uint32_t start, uint32_t end
return n;
}
-static bool SelectOuterSlice(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo* fep, uint32_t sliceNum) {
+static bool SelectOuterSlice(const MTLaunchStructForEach *mtls, RsExpandKernelDriverInfo* fep, uint32_t sliceNum) {
uint32_t r = sliceNum;
r = sliceInt(&fep->current.z, r, mtls->start.z, mtls->end.z);
@@ -371,10 +370,10 @@ static bool SelectOuterSlice(const MTLaunchStruct *mtls, RsExpandKernelDriverInf
static void walk_general(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+ MTLaunchStructForEach *mtls = (MTLaunchStructForEach *)usr;
RsExpandKernelDriverInfo fep = mtls->fep;
fep.lid = idx;
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
+ ForEachFunc_t fn = mtls->kernel;
while(1) {
@@ -400,10 +399,10 @@ static void walk_general(void *usr, uint32_t idx) {
}
static void walk_2d(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+ MTLaunchStructForEach *mtls = (MTLaunchStructForEach *)usr;
RsExpandKernelDriverInfo fep = mtls->fep;
fep.lid = idx;
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
+ ForEachFunc_t fn = mtls->kernel;
while (1) {
uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
@@ -425,10 +424,10 @@ static void walk_2d(void *usr, uint32_t idx) {
}
static void walk_1d(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+ MTLaunchStructForEach *mtls = (MTLaunchStructForEach *)usr;
RsExpandKernelDriverInfo fep = mtls->fep;
fep.lid = idx;
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
+ ForEachFunc_t fn = mtls->kernel;
while (1) {
uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
@@ -447,11 +446,30 @@ static void walk_1d(void *usr, uint32_t idx) {
}
}
-void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains,
+// Launch a reduce-style kernel.
+// Inputs:
+// ain: The allocation that contains the input
+// aout: The allocation that will hold the output
+// mtls: Holds launch parameters
+void RsdCpuReferenceImpl::launchReduce(const Allocation *ain,
+ Allocation *aout,
+ MTLaunchStructReduce *mtls) {
+ const uint32_t xStart = mtls->start.x;
+ const uint32_t xEnd = mtls->end.x;
+
+ if (xStart >= xEnd) {
+ return;
+ }
+
+ const uint32_t startOffset = ain->getType()->getElementSizeBytes() * xStart;
+ mtls->kernel(&mtls->inBuf[startOffset], mtls->outBuf, xEnd - xStart);
+}
+
+void RsdCpuReferenceImpl::launchForEach(const Allocation ** ains,
uint32_t inLen,
Allocation* aout,
const RsScriptCall* sc,
- MTLaunchStruct* mtls) {
+ MTLaunchStructForEach* mtls) {
//android::StopWatch kernel_time("kernel time");
@@ -519,7 +537,7 @@ void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains,
mInForEach = false;
} else {
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
+ ForEachFunc_t fn = mtls->kernel;
uint32_t slice = 0;
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index 0f784382..cfdb29a6 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -31,8 +31,14 @@ namespace renderscript {
// Whether the CPU we're running on supports SIMD instructions
extern bool gArchUseSIMD;
-typedef void (* InvokeFunc_t)(void);
-typedef void (* ForEachFunc_t)(void);
+// Function types found in RenderScript code
+typedef void (*ReduceFunc_t)(const uint8_t *inBuf, uint8_t *outBuf, uint32_t len);
+typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride);
+typedef void (*InvokeFunc_t)(void *params);
+typedef void (*InitOrDtorFunc_t)(void);
+typedef int (*RootFunc_t)(void);
+
+// Internal driver callback used to execute a kernel
typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
class RsdCpuScriptImpl;
@@ -44,23 +50,38 @@ struct ScriptTLSStruct {
RsdCpuScriptImpl *mImpl;
};
-struct MTLaunchStruct {
- RsExpandKernelDriverInfo fep;
-
- RsdCpuReferenceImpl *rsc;
+// MTLaunchStruct passes information about a multithreaded kernel launch.
+struct MTLaunchStructCommon {
+ RsdCpuReferenceImpl *rs;
RsdCpuScriptImpl *script;
- ForEachFunc_t kernel;
- uint32_t sig;
- const Allocation * ains[RS_KERNEL_INPUT_LIMIT];
- Allocation * aout[RS_KERNEL_INPUT_LIMIT];
-
uint32_t mSliceSize;
volatile int mSliceNum;
bool isThreadable;
+ // Boundary information about the launch
RsLaunchDimensions start;
RsLaunchDimensions end;
+ // Points to MTLaunchStructForEach::fep::dim or
+ // MTLaunchStructReduce::inputDim.
+ RsLaunchDimensions *dimPtr;
+};
+
+struct MTLaunchStructForEach : public MTLaunchStructCommon {
+ // Driver info structure
+ RsExpandKernelDriverInfo fep;
+
+ ForEachFunc_t kernel;
+ uint32_t sig;
+ const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
+ Allocation *aout[RS_KERNEL_INPUT_LIMIT];
+};
+
+struct MTLaunchStructReduce : public MTLaunchStructCommon {
+ ReduceFunc_t kernel;
+ const uint8_t *inBuf;
+ uint8_t *outBuf;
+ RsLaunchDimensions inputDim;
};
class RsdCpuReferenceImpl : public RsdCpuReference {
@@ -82,8 +103,13 @@ public:
return mWorkers.mCount + 1;
}
- void launchThreads(const Allocation** ains, uint32_t inLen, Allocation* aout,
- const RsScriptCall* sc, MTLaunchStruct* mtls);
+ // Launch foreach kernel
+ void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout,
+ const RsScriptCall *sc, MTLaunchStructForEach *mtls);
+
+ // Launch a reduce kernel
+ void launchReduce(const Allocation *ain, Allocation *aout,
+ MTLaunchStructReduce *mtls);
CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir,
uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override;
@@ -92,7 +118,7 @@ public:
const RsdCpuReference::CpuSymbol *symLookup(const char *);
- RsdCpuReference::CpuScript * lookupScript(const Script *s) {
+ RsdCpuReference::CpuScript *lookupScript(const Script *s) {
return mScriptLookupFn(mRSC, s);
}
diff --git a/cpu_ref/rsCpuExecutable.cpp b/cpu_ref/rsCpuExecutable.cpp
index 867a2cd6..74d400f9 100644
--- a/cpu_ref/rsCpuExecutable.cpp
+++ b/cpu_ref/rsCpuExecutable.cpp
@@ -267,6 +267,7 @@ void* SharedLibraryUtils::loadSOHelper(const char *origName, const char *cacheDi
#define EXPORT_VAR_STR "exportVarCount: "
#define EXPORT_FUNC_STR "exportFuncCount: "
#define EXPORT_FOREACH_STR "exportForEachCount: "
+#define EXPORT_REDUCE_STR "exportReduceCount: "
#define OBJECT_SLOT_STR "objectSlotCount: "
#define PRAGMA_STR "pragmaCount: "
#define THREADABLE_STR "isThreadable: "
@@ -304,6 +305,7 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
size_t varCount = 0;
size_t funcCount = 0;
size_t forEachCount = 0;
+ size_t reduceCount = 0;
size_t objectSlotCount = 0;
size_t pragmaCount = 0;
bool isThreadable = true;
@@ -314,6 +316,7 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
InvokeFunc_t* invokeFunctions = nullptr;
ForEachFunc_t* forEachFunctions = nullptr;
uint32_t* forEachSignatures = nullptr;
+ ReduceFunc_t* reduceFunctions = nullptr;
const char ** pragmaKeys = nullptr;
const char ** pragmaValues = nullptr;
uint32_t checksum = 0;
@@ -439,12 +442,47 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
strcmp(tmpName, "root.expand")) {
// Ignore missing root.expand functions.
// root() is always specified at location 0.
- ALOGE("Failed to find forEach function address for %s: %s",
+ ALOGE("Failed to find forEach function address for %s(): %s",
tmpName, dlerror());
goto error;
}
}
+ // Read reduce kernels
+ if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
+ goto error;
+ }
+ if (sscanf(line, EXPORT_REDUCE_STR "%zu", &reduceCount) != 1) {
+ ALOGE("Invalid export reduce count!: %s", line);
+ goto error;
+ }
+
+ reduceFunctions = new ReduceFunc_t[reduceCount];
+ if (reduceFunctions == nullptr) {
+ goto error;
+ }
+
+ for (size_t i = 0; i < reduceCount; ++i) {
+ if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
+ goto error;
+ }
+ char *c = strrchr(line, '\n');
+ if (c) {
+ *c = '\0';
+ }
+
+ // Lookup the expanded reduce kernel.
+ strncat(line, ".expand", MAXLINE-1-strlen(line));
+
+ reduceFunctions[i] =
+ reinterpret_cast<ReduceFunc_t>(dlsym(sharedObj, line));
+ if (reduceFunctions[i] == nullptr) {
+ ALOGE("Failed to get function address for %s(): %s",
+ line, dlerror());
+ goto error;
+ }
+ }
+
if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
goto error;
}
@@ -577,6 +615,7 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
RSContext, fieldAddress, fieldIsObject, fieldName, varCount,
invokeFunctions, funcCount,
forEachFunctions, forEachSignatures, forEachCount,
+ reduceFunctions, reduceCount,
pragmaKeys, pragmaValues, pragmaCount,
rsGlobalNames, rsGlobalAddresses, rsGlobalSizes, rsGlobalProperties,
numEntries, isThreadable, checksum);
@@ -594,6 +633,8 @@ error:
delete[] pragmaKeys;
#endif // RS_COMPATIBILITY_LIB
+ delete[] reduceFunctions;
+
delete[] forEachSignatures;
delete[] forEachFunctions;
diff --git a/cpu_ref/rsCpuExecutable.h b/cpu_ref/rsCpuExecutable.h
index 68809706..fe9c2ad5 100644
--- a/cpu_ref/rsCpuExecutable.h
+++ b/cpu_ref/rsCpuExecutable.h
@@ -68,6 +68,7 @@ public:
InvokeFunc_t* invokeFunctions, size_t funcCount,
ForEachFunc_t* forEachFunctions, uint32_t* forEachSignatures,
size_t forEachCount,
+ ReduceFunc_t* reduceFunctions, size_t reduceCount,
const char** pragmaKeys, const char** pragmaValues,
size_t pragmaCount,
const char **globalNames, const void **globalAddresses,
@@ -79,6 +80,7 @@ public:
mInvokeFunctions(invokeFunctions), mFuncCount(funcCount),
mForEachFunctions(forEachFunctions), mForEachSignatures(forEachSignatures),
mForEachCount(forEachCount),
+ mReduceFunctions(reduceFunctions), mReduceCount(reduceCount),
mPragmaKeys(pragmaKeys), mPragmaValues(pragmaValues),
mPragmaCount(pragmaCount), mGlobalNames(globalNames),
mGlobalAddresses(globalAddresses), mGlobalSizes(globalSizes),
@@ -105,6 +107,8 @@ public:
delete[] mPragmaValues;
delete[] mPragmaKeys;
+ delete[] mReduceFunctions;
+
delete[] mForEachSignatures;
delete[] mForEachFunctions;
@@ -129,6 +133,7 @@ public:
size_t getExportedVariableCount() const { return mExportedVarCount; }
size_t getExportedFunctionCount() const { return mFuncCount; }
size_t getExportedForEachCount() const { return mForEachCount; }
+ size_t getExportedReduceCount() const { return mReduceCount; }
size_t getPragmaCount() const { return mPragmaCount; }
void* getFieldAddress(int slot) const { return mFieldAddress[slot]; }
@@ -141,6 +146,8 @@ public:
ForEachFunc_t getForEachFunction(int slot) const { return mForEachFunctions[slot]; }
uint32_t getForEachSignature(int slot) const { return mForEachSignatures[slot]; }
+ ReduceFunc_t getReduceFunction(int slot) const { return mReduceFunctions[slot]; }
+
const char ** getPragmaKeys() const { return mPragmaKeys; }
const char ** getPragmaValues() const { return mPragmaValues; }
@@ -193,6 +200,9 @@ private:
uint32_t* mForEachSignatures;
size_t mForEachCount;
+ ReduceFunc_t* mReduceFunctions;
+ size_t mReduceCount;
+
const char ** mPragmaKeys;
const char ** mPragmaValues;
size_t mPragmaCount;
diff --git a/cpu_ref/rsCpuIntrinsic.cpp b/cpu_ref/rsCpuIntrinsic.cpp
index 16363697..4cb3f9ff 100644
--- a/cpu_ref/rsCpuIntrinsic.cpp
+++ b/cpu_ref/rsCpuIntrinsic.cpp
@@ -93,7 +93,7 @@ void RsdCpuScriptIntrinsic::invokeForEach(uint32_t slot,
uint32_t usrLen,
const RsScriptCall *sc) {
- MTLaunchStruct mtls;
+ MTLaunchStructForEach mtls;
preLaunch(slot, ains, inLen, aout, usr, usrLen, sc);
@@ -101,21 +101,21 @@ void RsdCpuScriptIntrinsic::invokeForEach(uint32_t slot,
mtls.script = this;
mtls.fep.slot = slot;
- mtls.kernel = (void (*)())mRootPtr;
+ mtls.kernel = mRootPtr;
mtls.fep.usr = this;
RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
- mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
+ mCtx->launchForEach(ains, inLen, aout, sc, &mtls);
mCtx->setTLS(oldTLS);
}
postLaunch(slot, ains, inLen, aout, usr, usrLen, sc);
}
-void RsdCpuScriptIntrinsic::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
+void RsdCpuScriptIntrinsic::forEachKernelSetup(uint32_t slot, MTLaunchStructForEach *mtls) {
mtls->script = this;
mtls->fep.slot = slot;
- mtls->kernel = (void (*)())mRootPtr;
+ mtls->kernel = mRootPtr;
mtls->fep.usr = this;
}
diff --git a/cpu_ref/rsCpuIntrinsic.h b/cpu_ref/rsCpuIntrinsic.h
index 0ac8e6e7..9c7e1726 100644
--- a/cpu_ref/rsCpuIntrinsic.h
+++ b/cpu_ref/rsCpuIntrinsic.h
@@ -39,7 +39,7 @@ public:
uint32_t usrLen,
const RsScriptCall *sc) override;
- void forEachKernelSetup(uint32_t slot, MTLaunchStruct * mtls) override;
+ void forEachKernelSetup(uint32_t slot, MTLaunchStructForEach * mtls) override;
void invokeInit() override;
void invokeFreeChildren() override;
@@ -65,7 +65,7 @@ public:
protected:
RsScriptIntrinsicID mID;
- outer_foreach_t mRootPtr;
+ ForEachFunc_t mRootPtr;
ObjectBaseRef<const Element> mElement;
};
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index 09e7ab79..5adca544 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -50,6 +50,12 @@ namespace {
static const bool kDebugGlobalVariables = false;
+static bool allocationLODIsNull(const android::renderscript::Allocation *alloc) {
+ // Even if alloc != nullptr, mallocPtr could be null if
+ // IO_OUTPUT/IO_INPUT with no bound surface.
+ return alloc && alloc->mHal.drvState.lod[0].mallocPtr == nullptr;
+}
+
#ifndef RS_COMPATIBILITY_LIB
static bool is_force_recompile() {
@@ -282,11 +288,11 @@ bool RsdCpuScriptImpl::storeRSInfoFromSO() {
if (mRootExpand) {
//ALOGE("Found root.expand(): %p", mRootExpand);
}
- mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
+ mInit = (InitOrDtorFunc_t) dlsym(mScriptSO, "init");
if (mInit) {
//ALOGE("Found init(): %p", mInit);
}
- mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
+ mFreeChildren = (InitOrDtorFunc_t) dlsym(mScriptSO, ".rs.dtor");
if (mFreeChildren) {
//ALOGE("Found .rs.dtor(): %p", mFreeChildren);
}
@@ -490,6 +496,8 @@ const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, c
void RsdCpuScriptImpl::populateScript(Script *script) {
// Copy info over to runtime
script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
+ script->mHal.info.exportedReduceCount = mScriptExec->getExportedReduceCount();
+ script->mHal.info.exportedForEachCount = mScriptExec->getExportedForEachCount();
script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys();
@@ -503,32 +511,105 @@ void RsdCpuScriptImpl::populateScript(Script *script) {
}
}
+// Set up the launch dimensions, and write the values of the launch
+// dimensions into the mtls start/end fields.
+//
+// Inputs:
+// baseDim - base shape of the input
+// sc - used to constrain the launch dimensions
+//
+// Returns:
+// True on success, false on failure to set up
+bool RsdCpuScriptImpl::setUpMtlsDimensions(MTLaunchStructCommon *mtls,
+ const RsLaunchDimensions &baseDim,
+ const RsScriptCall *sc) {
+ rsAssert(mtls);
+
+#define SET_UP_DIMENSION(DIM_FIELD, SC_FIELD) do { \
+ if (!sc || (sc->SC_FIELD##End == 0)) { \
+ mtls->end.DIM_FIELD = baseDim.DIM_FIELD; \
+ } else { \
+ mtls->start.DIM_FIELD = \
+ rsMin(baseDim.DIM_FIELD, sc->SC_FIELD##Start); \
+ mtls->end.DIM_FIELD = \
+ rsMin(baseDim.DIM_FIELD, sc->SC_FIELD##End); \
+ if (mtls->start.DIM_FIELD >= mtls->end.DIM_FIELD) { \
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, \
+ "Failed to launch kernel; Invalid " \
+ #SC_FIELD "Start or " #SC_FIELD "End."); \
+ return false; \
+ } \
+ }} while(0)
+
+ SET_UP_DIMENSION(x, x);
+ SET_UP_DIMENSION(y, y);
+ SET_UP_DIMENSION(z, z);
+ SET_UP_DIMENSION(array[0], array);
+ SET_UP_DIMENSION(array[1], array2);
+ SET_UP_DIMENSION(array[2], array3);
+ SET_UP_DIMENSION(array[3], array4);
+#undef SET_UP_DIMENSION
+
+ return true;
+}
+
+// Preliminary work to prepare a reduce-style kernel for launch.
+bool RsdCpuScriptImpl::reduceMtlsSetup(const Allocation *ain,
+ const Allocation *aout,
+ const RsScriptCall *sc,
+ MTLaunchStructReduce *mtls) {
+ rsAssert(ain && aout);
+ memset(mtls, 0, sizeof(MTLaunchStructReduce));
+ mtls->dimPtr = &mtls->inputDim;
+
+ if (allocationLODIsNull(ain) || allocationLODIsNull(aout)) {
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "reduce called with a null allocation");
+ return false;
+ }
+
+ // Set up the dimensions of the input.
+ const Type *inType = ain->getType();
+ mtls->inputDim.x = inType->getDimX();
+ rsAssert(inType->getDimY() == 0);
+
+ if (!setUpMtlsDimensions(mtls, mtls->inputDim, sc)) {
+ return false;
+ }
+
+ mtls->rs = mCtx;
+ // Currently not threaded.
+ mtls->isThreadable = false;
+ mtls->mSliceNum = -1;
+
+ // Set up input and output.
+ mtls->inBuf = static_cast<uint8_t *>(ain->getPointerUnchecked(0, 0));
+ mtls->outBuf = static_cast<uint8_t *>(aout->getPointerUnchecked(0, 0));
+
+ rsAssert(mtls->inBuf && mtls->outBuf);
+
+ return true;
+}
+// Preliminary work to prepare a forEach-style kernel for launch.
bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
uint32_t inLen,
Allocation * aout,
const void * usr, uint32_t usrLen,
const RsScriptCall *sc,
- MTLaunchStruct *mtls) {
-
- memset(mtls, 0, sizeof(MTLaunchStruct));
+ MTLaunchStructForEach *mtls) {
+ memset(mtls, 0, sizeof(MTLaunchStructForEach));
+ mtls->dimPtr = &mtls->fep.dim;
for (int index = inLen; --index >= 0;) {
- const Allocation* ain = ains[index];
-
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ain != nullptr &&
- (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
-
+ if (allocationLODIsNull(ains[index])) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
"rsForEach called with null in allocations");
return false;
}
}
- if (aout &&
- (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
-
+ if (allocationLODIsNull(aout)) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
"rsForEach called with null out allocations");
return false;
@@ -578,96 +659,14 @@ bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
}
}
- if (!sc || (sc->xEnd == 0)) {
- mtls->end.x = mtls->fep.dim.x;
- } else {
- mtls->start.x = rsMin(mtls->fep.dim.x, sc->xStart);
- mtls->end.x = rsMin(mtls->fep.dim.x, sc->xEnd);
- if (mtls->start.x >= mtls->end.x) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; Invalid xStart or xEnd.");
- return false;
- }
- }
-
- if (!sc || (sc->yEnd == 0)) {
- mtls->end.y = mtls->fep.dim.y;
- } else {
- mtls->start.y = rsMin(mtls->fep.dim.y, sc->yStart);
- mtls->end.y = rsMin(mtls->fep.dim.y, sc->yEnd);
- if (mtls->start.y >= mtls->end.y) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; Invalid yStart or yEnd.");
- return false;
- }
- }
-
- if (!sc || (sc->zEnd == 0)) {
- mtls->end.z = mtls->fep.dim.z;
- } else {
- mtls->start.z = rsMin(mtls->fep.dim.z, sc->zStart);
- mtls->end.z = rsMin(mtls->fep.dim.z, sc->zEnd);
- if (mtls->start.z >= mtls->end.z) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; Invalid zStart or zEnd.");
- return false;
- }
- }
-
- if (!sc || (sc->arrayEnd == 0)) {
- mtls->end.array[0] = mtls->fep.dim.array[0];
- } else {
- mtls->start.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayStart);
- mtls->end.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayEnd);
- if (mtls->start.array[0] >= mtls->end.array[0]) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; Invalid arrayStart or arrayEnd.");
- return false;
- }
- }
-
- if (!sc || (sc->array2End == 0)) {
- mtls->end.array[1] = mtls->fep.dim.array[1];
- } else {
- mtls->start.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2Start);
- mtls->end.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2End);
- if (mtls->start.array[1] >= mtls->end.array[1]) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; Invalid array2Start or array2End.");
- return false;
- }
- }
-
- if (!sc || (sc->array3End == 0)) {
- mtls->end.array[2] = mtls->fep.dim.array[2];
- } else {
- mtls->start.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3Start);
- mtls->end.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3End);
- if (mtls->start.array[2] >= mtls->end.array[2]) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; Invalid array3Start or array3End.");
- return false;
- }
- }
-
- if (!sc || (sc->array4End == 0)) {
- mtls->end.array[3] = mtls->fep.dim.array[3];
- } else {
- mtls->start.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4Start);
- mtls->end.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4End);
- if (mtls->start.array[3] >= mtls->end.array[3]) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; Invalid array4Start or array4End.");
- return false;
- }
+ if (!setUpMtlsDimensions(mtls, mtls->fep.dim, sc)) {
+ return false;
}
-
// The X & Y walkers always want 0-1 min even if dim is not present
mtls->end.x = rsMax((uint32_t)1, mtls->end.x);
mtls->end.y = rsMax((uint32_t)1, mtls->end.y);
-
- mtls->rsc = mCtx;
+ mtls->rs = mCtx;
if (ains) {
memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
}
@@ -705,18 +704,32 @@ void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
uint32_t usrLen,
const RsScriptCall *sc) {
- MTLaunchStruct mtls;
+ MTLaunchStructForEach mtls;
if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
forEachKernelSetup(slot, &mtls);
RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
- mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
+ mCtx->launchForEach(ains, inLen, aout, sc, &mtls);
mCtx->setTLS(oldTLS);
}
}
-void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
+void RsdCpuScriptImpl::invokeReduce(uint32_t slot,
+ const Allocation *ain,
+ Allocation *aout,
+ const RsScriptCall *sc) {
+ MTLaunchStructReduce mtls;
+
+ if (reduceMtlsSetup(ain, aout, sc, &mtls)) {
+ reduceKernelSetup(slot, &mtls);
+ RsdCpuScriptImpl *oldTLS = mCtx->setTLS(this);
+ mCtx->launchReduce(ain, aout, &mtls);
+ mCtx->setTLS(oldTLS);
+ }
+}
+
+void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStructForEach *mtls) {
mtls->script = this;
mtls->fep.slot = slot;
mtls->kernel = mScriptExec->getForEachFunction(slot);
@@ -724,6 +737,12 @@ void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
mtls->sig = mScriptExec->getForEachSignature(slot);
}
+void RsdCpuScriptImpl::reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls) {
+ mtls->script = this;
+ mtls->kernel = mScriptExec->getReduceFunction(slot);
+ rsAssert(mtls->kernel != nullptr);
+}
+
int RsdCpuScriptImpl::invokeRoot() {
RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
int ret = mRoot();
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index 6059825a..248e5c73 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -37,18 +37,6 @@ class ScriptExecutable;
class RsdCpuScriptImpl : public RsdCpuReferenceImpl::CpuScript {
public:
- typedef void (*outer_foreach_t)(
- const RsExpandKernelDriverInfo *,
- uint32_t x1, uint32_t x2,
- uint32_t outstep);
-
- typedef void (* InvokeFunc_t)(void);
- typedef void (* ForEachFunc_t)(void);
- typedef int (* RootFunc_t)(void);
-#ifdef RS_COMPATIBILITY_LIB
- typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
-#endif
-
bool init(char const *resName, char const *cacheDir,
uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags,
char const *bccPluginName = nullptr);
@@ -72,6 +60,11 @@ public:
uint32_t usrLen,
const RsScriptCall* sc) override;
+ void invokeReduce(uint32_t slot,
+ const Allocation* ain,
+ Allocation* aout,
+ const RsScriptCall* sc) override;
+
void invokeInit() override;
void invokeFreeChildren() override;
@@ -92,10 +85,15 @@ public:
bool forEachMtlsSetup(const Allocation ** ains, uint32_t inLen,
Allocation * aout, const void * usr, uint32_t usrLen,
- const RsScriptCall *sc, MTLaunchStruct *mtls);
+ const RsScriptCall *sc, MTLaunchStructForEach *mtls);
- virtual void forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls);
+ virtual void forEachKernelSetup(uint32_t slot, MTLaunchStructForEach *mtls);
+ // Build an MTLaunchStruct suitable for launching a reduce-style kernel.
+ bool reduceMtlsSetup(const Allocation *ain, const Allocation *aout,
+ const RsScriptCall *sc, MTLaunchStructReduce *mtls);
+ // Finalize an MTLaunchStruct for launching a reduce-style kernel.
+ virtual void reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls);
const RsdCpuReference::CpuSymbol * lookupSymbolMath(const char *sym);
static void * lookupRuntimeStub(void* pContext, char const* name);
@@ -122,8 +120,8 @@ protected:
RootFunc_t mRoot;
RootFunc_t mRootExpand;
- InvokeFunc_t mInit;
- InvokeFunc_t mFreeChildren;
+ InitOrDtorFunc_t mInit;
+ InitOrDtorFunc_t mFreeChildren;
ScriptExecutable* mScriptExec;
Allocation **mBoundAllocs;
@@ -135,6 +133,10 @@ public:
const char* getBitcodeFilePath() const { return mBitcodeFilePath.string(); }
private:
+ bool setUpMtlsDimensions(MTLaunchStructCommon *mtls,
+ const RsLaunchDimensions &baseDim,
+ const RsScriptCall *sc);
+
String8 mBitcodeFilePath;
uint32_t mBuildChecksum;
bool mChecksumNeeded;
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 82208db9..9cc9b69d 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -203,7 +203,7 @@ void CpuScriptGroupImpl::execute() {
}
- MTLaunchStruct mtls;
+ MTLaunchStructForEach mtls;
if (fieldDep) {
for (size_t ct=0; ct < ins.size(); ct++) {
@@ -230,7 +230,7 @@ void CpuScriptGroupImpl::execute() {
mtls.fep.usrLen, nullptr);
if (launchOK) {
- mCtx->launchThreads(ains, inLen, outs[ct], nullptr, &mtls);
+ mCtx->launchForEach(ains, inLen, outs[ct], nullptr, &mtls);
}
si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
@@ -280,10 +280,10 @@ void CpuScriptGroupImpl::execute() {
if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) {
mtls.script = nullptr;
- mtls.kernel = (void (*)())&scriptGroupRoot;
+ mtls.kernel = &scriptGroupRoot;
mtls.fep.usr = &sl;
- mCtx->launchThreads(ains, inLen, outs[0], nullptr, &mtls);
+ mCtx->launchForEach(ains, inLen, outs[0], nullptr, &mtls);
}
for (size_t ct=0; ct < kernels.size(); ct++) {
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
index 50b203d6..f0b657a2 100644
--- a/cpu_ref/rsCpuScriptGroup2.cpp
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -165,7 +165,7 @@ CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
RsdCpuScriptImpl* si =
(RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
if (closure->mIsKernel) {
- MTLaunchStruct mtls;
+ MTLaunchStructForEach mtls;
si->forEachKernelSetup(funcID->mSlot, &mtls);
cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
} else {
@@ -568,7 +568,7 @@ void Batch::run() {
}
if (mFunc != nullptr) {
- MTLaunchStruct mtls;
+ MTLaunchStructForEach mtls;
const CPUClosure* firstCpuClosure = mClosures.front();
const CPUClosure* lastCpuClosure = mClosures.back();
@@ -582,7 +582,7 @@ void Batch::run() {
mtls.fep.usr = nullptr;
mtls.kernel = (ForEachFunc_t)mFunc;
- mGroup->getCpuRefImpl()->launchThreads(
+ mGroup->getCpuRefImpl()->launchForEach(
(const Allocation**)firstCpuClosure->mClosure->mArgs,
firstCpuClosure->mClosure->mNumArg,
lastCpuClosure->mClosure->mReturnValue,
@@ -603,7 +603,7 @@ void Batch::run() {
const CPUClosure* cpuClosure = mClosures.front();
const Closure* closure = cpuClosure->mClosure;
- MTLaunchStruct mtls;
+ MTLaunchStructForEach mtls;
if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
closure->mNumArg,
@@ -611,10 +611,10 @@ void Batch::run() {
nullptr, 0, nullptr, &mtls)) {
mtls.script = nullptr;
- mtls.kernel = (void (*)())&groupRoot;
+ mtls.kernel = &groupRoot;
mtls.fep.usr = &mClosures;
- mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
+ mGroup->getCpuRefImpl()->launchForEach(nullptr, 0, nullptr, nullptr, &mtls);
}
for (CPUClosure* cpuClosure : mClosures) {
diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h
index 8e205d85..f2c7f19a 100644
--- a/cpu_ref/rsd_cpu.h
+++ b/cpu_ref/rsd_cpu.h
@@ -58,6 +58,11 @@ public:
uint32_t usrLen,
const RsScriptCall *sc) = 0;
+ virtual void invokeReduce(uint32_t slot,
+ const Allocation *ain,
+ Allocation *aout,
+ const RsScriptCall *sc) = 0;
+
virtual void invokeInit() = 0;
virtual void invokeFreeChildren() = 0;
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index a57409d9..25659d86 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -124,6 +124,15 @@ void rsdScriptInvokeFunction(const Context *dc, Script *s,
cs->invokeFunction(slot, params, paramLength);
}
+void rsdScriptInvokeReduce(const Context *dc, Script *s,
+ uint32_t slot,
+ const Allocation *ain,
+ Allocation *aout,
+ const RsScriptCall *sc) {
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ cs->invokeReduce(slot, ain, aout, sc);
+}
+
void rsdScriptSetGlobalVar(const Context *dc, const Script *s,
uint32_t slot, void *data, size_t dataLength) {
RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
diff --git a/driver/rsdBcc.h b/driver/rsdBcc.h
index d51fb80f..e95529b2 100644
--- a/driver/rsdBcc.h
+++ b/driver/rsdBcc.h
@@ -43,6 +43,13 @@ void rsdScriptInvokeForEach(const android::renderscript::Context *rsc,
size_t usrLen,
const RsScriptCall *sc);
+void rsdScriptInvokeReduce(const android::renderscript::Context *rsc,
+ android::renderscript::Script *s,
+ uint32_t slot,
+ const android::renderscript::Allocation *ain,
+ android::renderscript::Allocation *aout,
+ const RsScriptCall *sc);
+
void rsdScriptInvokeForEachMulti(const android::renderscript::Context *rsc,
android::renderscript::Script *s,
uint32_t slot,
diff --git a/driver/rsdCore.cpp b/driver/rsdCore.cpp
index 1fcfcc2e..f70b79b6 100644
--- a/driver/rsdCore.cpp
+++ b/driver/rsdCore.cpp
@@ -99,6 +99,8 @@ extern "C" bool rsdHalQueryHal(RsHalInitEnums entry, void **fnPtr) {
fnPtr[0] = (void *)rsdScriptInvokeForEachMulti; break;
case RS_HAL_SCRIPT_UPDATE_CACHED_OBJECT:
fnPtr[0] = (void *)rsdScriptUpdateCachedObject; break;
+ case RS_HAL_SCRIPT_INVOKE_REDUCE:
+ fnPtr[0] = (void *)rsdScriptInvokeReduce; break;
case RS_HAL_ALLOCATION_INIT:
fnPtr[0] = (void *)rsdAllocationInit; break;
diff --git a/rsDriverLoader.cpp b/rsDriverLoader.cpp
index 125a6df4..43e42949 100644
--- a/rsDriverLoader.cpp
+++ b/rsDriverLoader.cpp
@@ -70,6 +70,7 @@ static bool LoadHalTable(Context *rsc, HalQueryHal fn, bool loadGraphics) {
ret &= fn(RS_HAL_SCRIPT_INVOKE_FUNCTION, (void **)&rsc->mHal.funcs.script.invokeFunction);
ret &= fn(RS_HAL_SCRIPT_INVOKE_ROOT, (void **)&rsc->mHal.funcs.script.invokeRoot);
ret &= fn(RS_HAL_SCRIPT_INVOKE_FOR_EACH, (void **)&rsc->mHal.funcs.script.invokeForEach);
+ ret &= fn(RS_HAL_SCRIPT_INVOKE_REDUCE, (void **)&rsc->mHal.funcs.script.invokeReduce);
ret &= fn(RS_HAL_SCRIPT_INVOKE_INIT, (void **)&rsc->mHal.funcs.script.invokeInit);
ret &= fn(RS_HAL_SCRIPT_INVOKE_FREE_CHILDREN, (void **)&rsc->mHal.funcs.script.invokeFreeChildren);
ret &= fn(RS_HAL_SCRIPT_SET_GLOBAL_VAR, (void **)&rsc->mHal.funcs.script.setGlobalVar);
diff --git a/rsScript.cpp b/rsScript.cpp
index 483789cb..bc242921 100644
--- a/rsScript.cpp
+++ b/rsScript.cpp
@@ -227,7 +227,9 @@ void rsi_ScriptForEach(Context *rsc, RsScript vs, uint32_t slot,
void rsi_ScriptReduce(Context *rsc, RsScript vs, uint32_t slot,
RsAllocation vain, RsAllocation vaout,
const RsScriptCall *sc, size_t scLen) {
- // TODO(wala)
+ Script *s = static_cast<Script *>(vs);
+ s->runReduce(rsc, slot, static_cast<const Allocation *>(vain),
+ static_cast<Allocation *>(vaout), sc);
}
void rsi_ScriptInvoke(Context *rsc, RsScript vs, uint32_t slot) {
diff --git a/rsScript.h b/rsScript.h
index 6ca4fc19..bd6622d5 100644
--- a/rsScript.h
+++ b/rsScript.h
@@ -84,6 +84,8 @@ public:
int mVersionMinor;
size_t exportedVariableCount;
+ size_t exportedForEachCount;
+ size_t exportedReduceCount;
size_t exportedFunctionCount;
size_t exportedPragmaCount;
char const **exportedPragmaKeyList;
@@ -130,6 +132,9 @@ public:
size_t usrBytes,
const RsScriptCall *sc = nullptr) = 0;
+ virtual void runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+ Allocation *aout, const RsScriptCall *sc) = 0;
+
virtual void Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) = 0;
virtual void setupScript(Context *rsc) = 0;
virtual uint32_t run(Context *) = 0;
diff --git a/rsScriptC.cpp b/rsScriptC.cpp
index a24334e3..618c0c94 100644
--- a/rsScriptC.cpp
+++ b/rsScriptC.cpp
@@ -201,6 +201,12 @@ void ScriptC::runForEach(Context *rsc,
sc = &sc_copy;
}
+ if (slot >= mHal.info.exportedForEachCount) {
+ rsc->setError(RS_ERROR_BAD_SCRIPT,
+ "The forEach kernel index is out of bounds");
+ return;
+ }
+
// Trace this function call.
// To avoid overhead we only build the string if tracing is actually
// enabled.
@@ -220,6 +226,10 @@ void ScriptC::runForEach(Context *rsc,
setupGLState(rsc);
setupScript(rsc);
+ if (rsc->props.mLogScripts) {
+ ALOGV("%p ScriptC::runForEach invoking slot %i, ptr %p", rsc, slot, this);
+ }
+
if (rsc->mHal.funcs.script.invokeForEachMulti != nullptr) {
rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen,
aout, usr, usrBytes, sc);
@@ -238,11 +248,31 @@ void ScriptC::runForEach(Context *rsc,
}
}
+void ScriptC::runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+ Allocation *aout, const RsScriptCall *sc) {
+ // TODO: Record the name of the kernel in the tracing information.
+ ATRACE_CALL();
+
+ if (slot >= mHal.info.exportedReduceCount) {
+ rsc->setError(RS_ERROR_BAD_SCRIPT, "The reduce kernel index is out of bounds");
+ return;
+ }
+ if (mRSC->hadFatalError()) return;
+
+ setupScript(rsc);
+
+ if (rsc->props.mLogScripts) {
+ ALOGV("%p ScriptC::runReduce invoking slot %i, ptr %p", rsc, slot, this);
+ }
+
+ rsc->mHal.funcs.script.invokeReduce(rsc, this, slot, ain, aout, sc);
+}
+
void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) {
ATRACE_CALL();
if (slot >= mHal.info.exportedFunctionCount) {
- rsc->setError(RS_ERROR_BAD_SCRIPT, "Calling invoke on bad script");
+ rsc->setError(RS_ERROR_BAD_SCRIPT, "The invokable index is out of bounds");
return;
}
if (mRSC->hadFatalError()) return;
diff --git a/rsScriptC.h b/rsScriptC.h
index e2a6c7c3..09bf82b9 100644
--- a/rsScriptC.h
+++ b/rsScriptC.h
@@ -51,6 +51,9 @@ public:
size_t usrBytes,
const RsScriptCall *sc = nullptr);
+ virtual void runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+ Allocation *aout, const RsScriptCall *sc);
+
virtual void serialize(Context *rsc, OStream *stream) const { }
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_SCRIPT_C; }
static Type *createFromStream(Context *rsc, IStream *stream) { return nullptr; }
diff --git a/rsScriptIntrinsic.cpp b/rsScriptIntrinsic.cpp
index 38965862..223e93bc 100644
--- a/rsScriptIntrinsic.cpp
+++ b/rsScriptIntrinsic.cpp
@@ -68,6 +68,11 @@ void ScriptIntrinsic::runForEach(Context* rsc,
aout, usr, usrBytes, sc);
}
+void ScriptIntrinsic::runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+ Allocation *aout, const RsScriptCall *sc) {
+}
+
+
void ScriptIntrinsic::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) {
}
diff --git a/rsScriptIntrinsic.h b/rsScriptIntrinsic.h
index cfbc8aa4..fd48bdff 100644
--- a/rsScriptIntrinsic.h
+++ b/rsScriptIntrinsic.h
@@ -31,28 +31,31 @@ public:
ObjectBaseRef<const Element> mElement;
ScriptIntrinsic(Context *);
- virtual ~ScriptIntrinsic();
+ ~ScriptIntrinsic() override;
bool init(Context *rsc, RsScriptIntrinsicID iid, Element *e);
- virtual void serialize(Context *rsc, OStream *stream) const;
- virtual RsA3DClassID getClassId() const;
- virtual bool freeChildren();
+ void serialize(Context *rsc, OStream *stream) const override;
+ RsA3DClassID getClassId() const override;
+ bool freeChildren() override;
- virtual void runForEach(Context* rsc,
- uint32_t slot,
- const Allocation ** ains,
- size_t inLen,
- Allocation* aout,
- const void* usr,
- size_t usrBytes,
- const RsScriptCall* sc = nullptr);
+ void runForEach(Context* rsc,
+ uint32_t slot,
+ const Allocation ** ains,
+ size_t inLen,
+ Allocation* aout,
+ const void* usr,
+ size_t usrBytes,
+ const RsScriptCall* sc = nullptr) override;
- virtual void Invoke(Context *rsc, uint32_t slot, const void *data, size_t len);
- virtual void setupScript(Context *rsc);
- virtual uint32_t run(Context *);
- virtual bool isInstrinsic() const { return true; }
+ void runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+ Allocation *aout, const RsScriptCall *sc) override;
+
+ void Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) override;
+ void setupScript(Context *rsc) override;
+ uint32_t run(Context *) override;
+ bool isIntrinsic() const override { return true; }
protected:
uint32_t mIntrinsicID;
diff --git a/rs_hal.h b/rs_hal.h
index 6bc7d7a0..390e90df 100644
--- a/rs_hal.h
+++ b/rs_hal.h
@@ -150,6 +150,10 @@ typedef struct {
const void * usr,
size_t usrLen,
const RsScriptCall *sc);
+ void (*invokeReduce)(const Context *rsc, Script *s,
+ uint32_t slot, const Allocation *ain,
+ Allocation *aout,
+ const RsScriptCall *sc);
void (*invokeInit)(const Context *rsc, Script *s);
void (*invokeFreeChildren)(const Context *rsc, Script *s);
@@ -381,6 +385,7 @@ enum RsHalInitEnums {
RS_HAL_SCRIPT_DESTROY = 1012,
RS_HAL_SCRIPT_INVOKE_FOR_EACH_MULTI = 1013,
RS_HAL_SCRIPT_UPDATE_CACHED_OBJECT = 1014,
+ RS_HAL_SCRIPT_INVOKE_REDUCE = 1015,
RS_HAL_ALLOCATION_INIT = 2000,
RS_HAL_ALLOCATION_INIT_ADAPTER = 2001,