summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Gross <dgross@google.com>2016-06-01 14:45:47 -0700
committerDavid Gross <dgross@google.com>2016-06-10 10:45:29 -0700
commitae2ec3febedfc29376b9104413fb4042028f1265 (patch)
tree36d8f632058bc158f48b4e148bcf41c177802d1b
parent8e70791ff732ce244077310bdfdaf75dc19baabc (diff)
downloadrs-ae2ec3febedfc29376b9104413fb4042028f1265.tar.gz
Delete simple reduction implementation.nougat-dev
Bug: 27298560 Change-Id: I8c3d568e98aaf0b7d86881c985d13ed5b8e95338
-rw-r--r--cpp/Script.cpp16
-rw-r--r--cpp/rsCppStructs.h2
-rw-r--r--cpp/rsDispatch.cpp13
-rw-r--r--cpp/rsDispatch.h4
-rw-r--r--cpu_ref/rsCpuCore.cpp139
-rw-r--r--cpu_ref/rsCpuCore.h55
-rw-r--r--cpu_ref/rsCpuExecutable.cpp77
-rw-r--r--cpu_ref/rsCpuExecutable.h22
-rw-r--r--cpu_ref/rsCpuScript.cpp81
-rw-r--r--cpu_ref/rsCpuScript.h19
-rw-r--r--cpu_ref/rsd_cpu.h7
-rw-r--r--driver/rsdBcc.cpp13
-rw-r--r--driver/rsdBcc.h11
-rw-r--r--driver/rsdCore.cpp2
-rw-r--r--libRS.map1
-rw-r--r--rs.spec8
-rw-r--r--rsDriverLoader.cpp1
-rw-r--r--rsScript.cpp20
-rw-r--r--rsScript.h8
-rw-r--r--rsScriptC.cpp30
-rw-r--r--rsScriptC.h7
-rw-r--r--rsScriptIntrinsic.cpp8
-rw-r--r--rsScriptIntrinsic.h7
-rw-r--r--rs_hal.h9
24 files changed, 149 insertions, 411 deletions
diff --git a/cpp/Script.cpp b/cpp/Script.cpp
index acea0c8a..52933f27 100644
--- a/cpp/Script.cpp
+++ b/cpp/Script.cpp
@@ -36,21 +36,6 @@ void Script::forEach(uint32_t slot, sp<const Allocation> ain, sp<const Allocatio
tryDispatch(mRS, RS::dispatch->ScriptForEach(mRS->getContext(), getID(), slot, in_id, out_id, usr, usrLen, nullptr, 0));
}
-void Script::reduce(uint32_t slot, sp<const Allocation> ain, sp<const Allocation> aout,
- const RsScriptCall *sc) const {
- if (RS::dispatch->ScriptReduce == nullptr) {
- mRS->throwError(RS_ERROR_RUNTIME_ERROR, "Reduce is not supported at the current API level");
- return;
- }
- if (ain == nullptr || aout == nullptr) {
- mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Both ain and aout are required to be non-null.");
- return;
- }
- void *in_id = BaseObj::getObjID(ain);
- void *out_id = BaseObj::getObjID(aout);
- tryDispatch(mRS, RS::dispatch->ScriptReduce(mRS->getContext(), getID(), slot, in_id, out_id, sc, sc == nullptr ? 0 : sizeof(*sc)));
-}
-
Script::Script(void *id, sp<RS> rs) : BaseObj(id, rs) {
}
@@ -71,4 +56,3 @@ void Script::setVar(uint32_t index, const void *v, size_t len) const {
void Script::FieldBase::init(sp<RS> rs, uint32_t dimx, uint32_t usages) {
mAllocation = Allocation::createSized(rs, mElement, dimx, RS_ALLOCATION_USAGE_SCRIPT | usages);
}
-
diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h
index 95d190b0..18023864 100644
--- a/cpp/rsCppStructs.h
+++ b/cpp/rsCppStructs.h
@@ -1700,8 +1700,6 @@ protected:
Script(void *id, sp<RS> rs);
void forEach(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out,
const void *v, size_t) const;
- void reduce(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out,
- const RsScriptCall *sc) const;
void bindAllocation(sp<Allocation> va, uint32_t slot) const;
void setVar(uint32_t index, const void *, size_t len) const;
void setVar(uint32_t index, sp<const BaseObj> o) const;
diff --git a/cpp/rsDispatch.cpp b/cpp/rsDispatch.cpp
index f6121459..5773903c 100644
--- a/cpp/rsDispatch.cpp
+++ b/cpp/rsDispatch.cpp
@@ -21,8 +21,7 @@
#include <limits.h>
#define LOG_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "RS Dispatch", __VA_ARGS__);
-#define REDUCE_API_LEVEL INT_MAX
-#define REDUCE_NEW_API_LEVEL 24
+#define REDUCE_API_LEVEL 24
bool loadSymbols(void* handle, dispatchTable& dispatchTab, int targetApiLevel) {
#ifdef __LP64__
@@ -101,7 +100,6 @@ bool loadSymbols(void* handle, dispatchTable& dispatchTab, int targetApiLevel) {
dispatchTab.ScriptInvokeV = (ScriptInvokeVFnPtr)dlsym(handle, "rsScriptInvokeV");
dispatchTab.ScriptKernelIDCreate = (ScriptKernelIDCreateFnPtr)dlsym(handle, "rsScriptKernelIDCreate");
dispatchTab.ScriptReduce = (ScriptReduceFnPtr)dlsym(handle, "rsScriptReduce");
- dispatchTab.ScriptReduceNew = (ScriptReduceNewFnPtr)dlsym(handle, "rsScriptReduceNew");
dispatchTab.ScriptSetTimeZone = (ScriptSetTimeZoneFnPtr)dlsym(handle, "rsScriptSetTimeZone");
dispatchTab.ScriptSetVarD = (ScriptSetVarDFnPtr)dlsym(handle, "rsScriptSetVarD");
dispatchTab.ScriptSetVarF = (ScriptSetVarFFnPtr)dlsym(handle, "rsScriptSetVarF");
@@ -427,7 +425,7 @@ bool loadSymbols(void* handle, dispatchTable& dispatchTab, int targetApiLevel) {
return false;
}
}
- // TODO: Update the API level when reduce is added.
+
if (targetApiLevel >= REDUCE_API_LEVEL) {
if (dispatchTab.ScriptReduce == nullptr) {
LOG_ERR("Couldn't initialize dispatchTab.ScriptReduce");
@@ -435,13 +433,6 @@ bool loadSymbols(void* handle, dispatchTable& dispatchTab, int targetApiLevel) {
}
}
- if (targetApiLevel >= REDUCE_NEW_API_LEVEL) {
- if (dispatchTab.ScriptReduceNew == nullptr) {
- LOG_ERR("Couldn't initialize dispatchTab.ScriptReduceNew");
- return false;
- }
- }
-
return true;
}
diff --git a/cpp/rsDispatch.h b/cpp/rsDispatch.h
index 8f2df705..df12f320 100644
--- a/cpp/rsDispatch.h
+++ b/cpp/rsDispatch.h
@@ -77,8 +77,7 @@ typedef void (*ScriptInvokeFnPtr) (RsContext, RsScript, uint32_t);
typedef void (*ScriptInvokeVFnPtr) (RsContext, RsScript, uint32_t, const void*, size_t);
typedef void (*ScriptForEachFnPtr) (RsContext, RsScript, uint32_t, RsAllocation, RsAllocation, const void*, size_t, const RsScriptCall*, size_t);
typedef void (*ScriptForEachMultiFnPtr) (RsContext, RsScript, uint32_t, RsAllocation*, size_t, RsAllocation, const void*, size_t, const RsScriptCall*, size_t);
-typedef void (*ScriptReduceFnPtr) (RsContext, RsScript, uint32_t, RsAllocation, RsAllocation, const RsScriptCall*, size_t);
-typedef void (*ScriptReduceNewFnPtr) (RsContext, RsScript, uint32_t, RsAllocation*, size_t, RsAllocation, const RsScriptCall*, size_t);
+typedef void (*ScriptReduceFnPtr) (RsContext, RsScript, uint32_t, RsAllocation*, size_t, RsAllocation, const RsScriptCall*, size_t);
typedef void (*ScriptSetVarIFnPtr) (RsContext, RsScript, uint32_t, int);
typedef void (*ScriptSetVarObjFnPtr) (RsContext, RsScript, uint32_t, RsObjectBase);
typedef void (*ScriptSetVarJFnPtr) (RsContext, RsScript, uint32_t, int64_t);
@@ -173,7 +172,6 @@ struct dispatchTable {
ScriptInvokeVFnPtr ScriptInvokeV;
ScriptKernelIDCreateFnPtr ScriptKernelIDCreate;
ScriptReduceFnPtr ScriptReduce;
- ScriptReduceNewFnPtr ScriptReduceNew;
ScriptSetTimeZoneFnPtr ScriptSetTimeZone;
ScriptSetVarDFnPtr ScriptSetVarD;
ScriptSetVarFFnPtr ScriptSetVarF;
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 011b8e3d..8fefe882 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -45,7 +45,7 @@ static pid_t gettid() {
using namespace android;
using namespace android::renderscript;
-#define REDUCE_NEW_ALOGV(mtls, level, ...) do { if ((mtls)->logReduce >= (level)) ALOGV(__VA_ARGS__); } while(0)
+#define REDUCE_ALOGV(mtls, level, ...) do { if ((mtls)->logReduce >= (level)) ALOGV(__VA_ARGS__); } while(0)
static pthread_key_t gThreadTLSKey = 0;
static uint32_t gThreadTLSKeyCount = 0;
@@ -354,7 +354,7 @@ static inline void FepPtrSetup(const MTLaunchStructForEach *mtls, RsExpandKernel
// mtls - The MTLaunchStruct holding information about the kernel launch
// redp - The reduce parameters (driver info structure)
// x, y, z - The start offsets into each dimension
-static inline void RedpPtrSetup(const MTLaunchStructReduceNew *mtls, RsExpandKernelDriverInfo *redp,
+static inline void RedpPtrSetup(const MTLaunchStructReduce *mtls, RsExpandKernelDriverInfo *redp,
uint32_t x, uint32_t y, uint32_t z) {
for (uint32_t i = 0; i < redp->inLen; i++) {
redp->inPtr[i] = (const uint8_t *)mtls->ains[i]->getPointerUnchecked(x, y, z);
@@ -508,8 +508,8 @@ static const char *format_bytes(FormatBuf *outBuf, const uint8_t *inBuf, const i
return *outBuf;
}
-static void reduce_new_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructReduceNew *mtls,
- const char *walkerName, uint32_t threadIdx) {
+static void reduce_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructReduce *mtls,
+ const char *walkerName, uint32_t threadIdx) {
rsAssert(!accumPtr);
uint32_t accumIdx = (uint32_t)__sync_fetch_and_add(&mtls->accumCount, 1);
@@ -522,8 +522,8 @@ static void reduce_new_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructR
accumPtr = mtls->accumAlloc + mtls->accumStride * (accumIdx - 1);
}
}
- REDUCE_NEW_ALOGV(mtls, 2, "%s(%p): idx = %u got accumCount %u and accumPtr %p",
- walkerName, mtls->accumFunc, threadIdx, accumIdx, accumPtr);
+ REDUCE_ALOGV(mtls, 2, "%s(%p): idx = %u got accumCount %u and accumPtr %p",
+ walkerName, mtls->accumFunc, threadIdx, accumIdx, accumPtr);
// initialize accumulator
if (mtls->initFunc) {
mtls->initFunc(accumPtr);
@@ -532,18 +532,18 @@ static void reduce_new_get_accumulator(uint8_t *&accumPtr, const MTLaunchStructR
}
}
-static void walk_1d_reduce_new(void *usr, uint32_t idx) {
- const MTLaunchStructReduceNew *mtls = (const MTLaunchStructReduceNew *)usr;
+static void walk_1d_reduce(void *usr, uint32_t idx) {
+ const MTLaunchStructReduce *mtls = (const MTLaunchStructReduce *)usr;
RsExpandKernelDriverInfo redp = mtls->redp;
// find accumulator
uint8_t *&accumPtr = mtls->accumPtr[idx];
if (!accumPtr) {
- reduce_new_get_accumulator(accumPtr, mtls, __func__, idx);
+ reduce_get_accumulator(accumPtr, mtls, __func__, idx);
}
// accumulate
- const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc;
+ const ReduceAccumulatorFunc_t fn = mtls->accumFunc;
while (1) {
uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
uint32_t xStart = mtls->start.x + slice * mtls->mSliceSize;
@@ -566,23 +566,23 @@ static void walk_1d_reduce_new(void *usr, uint32_t idx) {
} else {
fmt[0] = 0;
}
- REDUCE_NEW_ALOGV(mtls, 2, "walk_1d_reduce_new(%p): idx = %u, x in [%u, %u)%s",
- mtls->accumFunc, idx, xStart, xEnd, fmt);
+ REDUCE_ALOGV(mtls, 2, "walk_1d_reduce(%p): idx = %u, x in [%u, %u)%s",
+ mtls->accumFunc, idx, xStart, xEnd, fmt);
}
}
-static void walk_2d_reduce_new(void *usr, uint32_t idx) {
- const MTLaunchStructReduceNew *mtls = (const MTLaunchStructReduceNew *)usr;
+static void walk_2d_reduce(void *usr, uint32_t idx) {
+ const MTLaunchStructReduce *mtls = (const MTLaunchStructReduce *)usr;
RsExpandKernelDriverInfo redp = mtls->redp;
// find accumulator
uint8_t *&accumPtr = mtls->accumPtr[idx];
if (!accumPtr) {
- reduce_new_get_accumulator(accumPtr, mtls, __func__, idx);
+ reduce_get_accumulator(accumPtr, mtls, __func__, idx);
}
// accumulate
- const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc;
+ const ReduceAccumulatorFunc_t fn = mtls->accumFunc;
while (1) {
uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
uint32_t yStart = mtls->start.y + slice * mtls->mSliceSize;
@@ -605,23 +605,23 @@ static void walk_2d_reduce_new(void *usr, uint32_t idx) {
} else {
fmt[0] = 0;
}
- REDUCE_NEW_ALOGV(mtls, 2, "walk_2d_reduce_new(%p): idx = %u, y in [%u, %u)%s",
- mtls->accumFunc, idx, yStart, yEnd, fmt);
+ REDUCE_ALOGV(mtls, 2, "walk_2d_reduce(%p): idx = %u, y in [%u, %u)%s",
+ mtls->accumFunc, idx, yStart, yEnd, fmt);
}
}
-static void walk_3d_reduce_new(void *usr, uint32_t idx) {
- const MTLaunchStructReduceNew *mtls = (const MTLaunchStructReduceNew *)usr;
+static void walk_3d_reduce(void *usr, uint32_t idx) {
+ const MTLaunchStructReduce *mtls = (const MTLaunchStructReduce *)usr;
RsExpandKernelDriverInfo redp = mtls->redp;
// find accumulator
uint8_t *&accumPtr = mtls->accumPtr[idx];
if (!accumPtr) {
- reduce_new_get_accumulator(accumPtr, mtls, __func__, idx);
+ reduce_get_accumulator(accumPtr, mtls, __func__, idx);
}
// accumulate
- const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc;
+ const ReduceAccumulatorFunc_t fn = mtls->accumFunc;
while (1) {
uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
@@ -640,44 +640,25 @@ static void walk_3d_reduce_new(void *usr, uint32_t idx) {
} else {
fmt[0] = 0;
}
- REDUCE_NEW_ALOGV(mtls, 2, "walk_3d_reduce_new(%p): idx = %u, z = %u%s",
- mtls->accumFunc, idx, redp.current.z, fmt);
+ REDUCE_ALOGV(mtls, 2, "walk_3d_reduce(%p): idx = %u, z = %u%s",
+ mtls->accumFunc, idx, redp.current.z, fmt);
}
}
-// Launch a simple reduce-style kernel.
-// Inputs:
-// ain: The allocation that contains the input
-// aout: The allocation that will hold the output
-// mtls: Holds launch parameters
-void RsdCpuReferenceImpl::launchReduce(const Allocation *ain,
- Allocation *aout,
- MTLaunchStructReduce *mtls) {
- const uint32_t xStart = mtls->start.x;
- const uint32_t xEnd = mtls->end.x;
-
- if (xStart >= xEnd) {
- return;
- }
-
- const uint32_t startOffset = ain->getType()->getElementSizeBytes() * xStart;
- mtls->kernel(&mtls->inBuf[startOffset], mtls->outBuf, xEnd - xStart);
-}
-
// Launch a general reduce-style kernel.
// Inputs:
// ains[0..inLen-1]: Array of allocations that contain the inputs
// aout: The allocation that will hold the output
// mtls: Holds launch parameters
-void RsdCpuReferenceImpl::launchReduceNew(const Allocation ** ains,
- uint32_t inLen,
- Allocation * aout,
- MTLaunchStructReduceNew *mtls) {
+void RsdCpuReferenceImpl::launchReduce(const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
+ MTLaunchStructReduce *mtls) {
mtls->logReduce = mRSC->props.mLogReduce;
if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInKernel) {
- launchReduceNewParallel(ains, inLen, aout, mtls);
+ launchReduceParallel(ains, inLen, aout, mtls);
} else {
- launchReduceNewSerial(ains, inLen, aout, mtls);
+ launchReduceSerial(ains, inLen, aout, mtls);
}
}
@@ -686,12 +667,12 @@ void RsdCpuReferenceImpl::launchReduceNew(const Allocation ** ains,
// ains[0..inLen-1]: Array of allocations that contain the inputs
// aout: The allocation that will hold the output
// mtls: Holds launch parameters
-void RsdCpuReferenceImpl::launchReduceNewSerial(const Allocation ** ains,
- uint32_t inLen,
- Allocation * aout,
- MTLaunchStructReduceNew *mtls) {
- REDUCE_NEW_ALOGV(mtls, 1, "launchReduceNewSerial(%p): %u x %u x %u", mtls->accumFunc,
- mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z);
+void RsdCpuReferenceImpl::launchReduceSerial(const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
+ MTLaunchStructReduce *mtls) {
+ REDUCE_ALOGV(mtls, 1, "launchReduceSerial(%p): %u x %u x %u", mtls->accumFunc,
+ mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z);
// In the presence of outconverter, we allocate temporary memory for
// the accumulator.
@@ -710,7 +691,7 @@ void RsdCpuReferenceImpl::launchReduceNewSerial(const Allocation ** ains,
}
// accumulate
- const ReduceNewAccumulatorFunc_t fn = mtls->accumFunc;
+ const ReduceAccumulatorFunc_t fn = mtls->accumFunc;
uint32_t slice = 0;
while (SelectOuterSlice(mtls, &mtls->redp, slice++)) {
for (mtls->redp.current.y = mtls->start.y;
@@ -733,13 +714,13 @@ void RsdCpuReferenceImpl::launchReduceNewSerial(const Allocation ** ains,
// ains[0..inLen-1]: Array of allocations that contain the inputs
// aout: The allocation that will hold the output
// mtls: Holds launch parameters
-void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains,
- uint32_t inLen,
- Allocation * aout,
- MTLaunchStructReduceNew *mtls) {
+void RsdCpuReferenceImpl::launchReduceParallel(const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
+ MTLaunchStructReduce *mtls) {
// For now, we don't know how to go parallel in the absence of a combiner.
if (!mtls->combFunc) {
- launchReduceNewSerial(ains, inLen, aout, mtls);
+ launchReduceSerial(ains, inLen, aout, mtls);
return;
}
@@ -777,19 +758,19 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains,
rsAssert(!mInKernel);
mInKernel = true;
- REDUCE_NEW_ALOGV(mtls, 1, "launchReduceNewParallel(%p): %u x %u x %u, %u threads, accumAlloc = %p",
- mtls->accumFunc,
- mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z,
- numThreads, mtls->accumAlloc);
+ REDUCE_ALOGV(mtls, 1, "launchReduceParallel(%p): %u x %u x %u, %u threads, accumAlloc = %p",
+ mtls->accumFunc,
+ mtls->redp.dim.x, mtls->redp.dim.y, mtls->redp.dim.z,
+ numThreads, mtls->accumAlloc);
if (mtls->redp.dim.z > 1) {
mtls->mSliceSize = 1;
- launchThreads(walk_3d_reduce_new, mtls);
+ launchThreads(walk_3d_reduce, mtls);
} else if (mtls->redp.dim.y > 1) {
mtls->mSliceSize = rsMax(1U, mtls->redp.dim.y / (numThreads * 4));
- launchThreads(walk_2d_reduce_new, mtls);
+ launchThreads(walk_2d_reduce, mtls);
} else {
mtls->mSliceSize = rsMax(1U, mtls->redp.dim.x / (numThreads * 4));
- launchThreads(walk_1d_reduce_new, mtls);
+ launchThreads(walk_1d_reduce, mtls);
}
mInKernel = false;
@@ -804,12 +785,12 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains,
if (mtls->combFunc) {
if (mtls->logReduce >= 3) {
FormatBuf fmt;
- REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): accumulating into%s",
- mtls->accumFunc,
- format_bytes(&fmt, finalAccumPtr, mtls->accumSize));
- REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): accumulator[%d]%s",
- mtls->accumFunc, idx,
- format_bytes(&fmt, thisAccumPtr, mtls->accumSize));
+ REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): accumulating into%s",
+ mtls->accumFunc,
+ format_bytes(&fmt, finalAccumPtr, mtls->accumSize));
+ REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): accumulator[%d]%s",
+ mtls->accumFunc, idx,
+ format_bytes(&fmt, thisAccumPtr, mtls->accumSize));
}
mtls->combFunc(finalAccumPtr, thisAccumPtr);
} else {
@@ -823,8 +804,8 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains,
rsAssert(finalAccumPtr != nullptr);
if (mtls->logReduce >= 3) {
FormatBuf fmt;
- REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): final accumulator%s",
- mtls->accumFunc, format_bytes(&fmt, finalAccumPtr, mtls->accumSize));
+ REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): final accumulator%s",
+ mtls->accumFunc, format_bytes(&fmt, finalAccumPtr, mtls->accumSize));
}
// Outconvert
@@ -832,9 +813,9 @@ void RsdCpuReferenceImpl::launchReduceNewParallel(const Allocation ** ains,
mtls->outFunc(mtls->redp.outPtr[0], finalAccumPtr);
if (mtls->logReduce >= 3) {
FormatBuf fmt;
- REDUCE_NEW_ALOGV(mtls, 3, "launchReduceNewParallel(%p): final outconverted result%s",
- mtls->accumFunc,
- format_bytes(&fmt, mtls->redp.outPtr[0], mtls->redp.outStride[0]));
+ REDUCE_ALOGV(mtls, 3, "launchReduceParallel(%p): final outconverted result%s",
+ mtls->accumFunc,
+ format_bytes(&fmt, mtls->redp.outPtr[0], mtls->redp.outStride[0]));
}
}
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index 62882aa8..1515b77c 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -32,22 +32,21 @@ namespace renderscript {
extern bool gArchUseSIMD;
// Function types found in RenderScript code
-typedef void (*ReduceFunc_t)(const uint8_t *inBuf, uint8_t *outBuf, uint32_t len);
-typedef void (*ReduceNewAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum);
-typedef void (*ReduceNewCombinerFunc_t)(uint8_t *accum, const uint8_t *other);
-typedef void (*ReduceNewInitializerFunc_t)(uint8_t *accum);
-typedef void (*ReduceNewOutConverterFunc_t)(uint8_t *out, const uint8_t *accum);
+typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum);
+typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other);
+typedef void (*ReduceInitializerFunc_t)(uint8_t *accum);
+typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum);
typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride);
typedef void (*InvokeFunc_t)(void *params);
typedef void (*InitOrDtorFunc_t)(void);
typedef int (*RootFunc_t)(void);
-struct ReduceNewDescription {
- ReduceNewAccumulatorFunc_t accumFunc; // expanded accumulator function
- ReduceNewInitializerFunc_t initFunc; // user initializer function
- ReduceNewCombinerFunc_t combFunc; // user combiner function
- ReduceNewOutConverterFunc_t outFunc; // user outconverter function
- size_t accumSize; // accumulator datum size, in bytes
+struct ReduceDescription {
+ ReduceAccumulatorFunc_t accumFunc; // expanded accumulator function
+ ReduceInitializerFunc_t initFunc; // user initializer function
+ ReduceCombinerFunc_t combFunc; // user combiner function
+ ReduceOutConverterFunc_t outFunc; // user outconverter function
+ size_t accumSize; // accumulator datum size, in bytes
};
// Internal driver callback used to execute a kernel
@@ -75,8 +74,7 @@ struct MTLaunchStructCommon {
RsLaunchDimensions start;
RsLaunchDimensions end;
// Points to MTLaunchStructForEach::fep::dim or
- // MTLaunchStructReduce::inputDim or
- // MTLaunchStructReduceNew::redp::dim.
+ // MTLaunchStructReduce::redp::dim.
RsLaunchDimensions *dimPtr;
};
@@ -90,22 +88,15 @@ struct MTLaunchStructForEach : public MTLaunchStructCommon {
};
struct MTLaunchStructReduce : public MTLaunchStructCommon {
- ReduceFunc_t kernel;
- const uint8_t *inBuf;
- uint8_t *outBuf;
- RsLaunchDimensions inputDim;
-};
-
-struct MTLaunchStructReduceNew : public MTLaunchStructCommon {
// Driver info structure
RsExpandKernelDriverInfo redp;
const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
- ReduceNewAccumulatorFunc_t accumFunc;
- ReduceNewInitializerFunc_t initFunc;
- ReduceNewCombinerFunc_t combFunc;
- ReduceNewOutConverterFunc_t outFunc;
+ ReduceAccumulatorFunc_t accumFunc;
+ ReduceInitializerFunc_t initFunc;
+ ReduceCombinerFunc_t combFunc;
+ ReduceOutConverterFunc_t outFunc;
size_t accumSize; // accumulator datum size in bytes
@@ -174,13 +165,9 @@ public:
void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout,
const RsScriptCall *sc, MTLaunchStructForEach *mtls);
- // Launch a simple reduce kernel
- void launchReduce(const Allocation *ain, Allocation *aout,
- MTLaunchStructReduce *mtls);
-
// Launch a general reduce kernel
- void launchReduceNew(const Allocation ** ains, uint32_t inLen, Allocation *aout,
- MTLaunchStructReduceNew *mtls);
+ void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout,
+ MTLaunchStructReduce *mtls);
CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir,
uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override;
@@ -271,10 +258,10 @@ protected:
long mPageSize;
// Launch a general reduce kernel
- void launchReduceNewSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout,
- MTLaunchStructReduceNew *mtls);
- void launchReduceNewParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout,
- MTLaunchStructReduceNew *mtls);
+ void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout,
+ MTLaunchStructReduce *mtls);
+ void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout,
+ MTLaunchStructReduce *mtls);
};
diff --git a/cpu_ref/rsCpuExecutable.cpp b/cpu_ref/rsCpuExecutable.cpp
index ca9a4b62..3d5e6350 100644
--- a/cpu_ref/rsCpuExecutable.cpp
+++ b/cpu_ref/rsCpuExecutable.cpp
@@ -272,7 +272,6 @@ void* SharedLibraryUtils::loadSOHelper(const char *origName, const char *cacheDi
#define EXPORT_FUNC_STR "exportFuncCount: "
#define EXPORT_FOREACH_STR "exportForEachCount: "
#define EXPORT_REDUCE_STR "exportReduceCount: "
-#define EXPORT_REDUCE_NEW_STR "exportReduceNewCount: "
#define OBJECT_SLOT_STR "objectSlotCount: "
#define PRAGMA_STR "pragmaCount: "
#define THREADABLE_STR "isThreadable: "
@@ -311,7 +310,6 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
size_t funcCount = 0;
size_t forEachCount = 0;
size_t reduceCount = 0;
- size_t reduceNewCount = 0;
size_t objectSlotCount = 0;
size_t pragmaCount = 0;
bool isThreadable = true;
@@ -322,8 +320,7 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
InvokeFunc_t* invokeFunctions = nullptr;
ForEachFunc_t* forEachFunctions = nullptr;
uint32_t* forEachSignatures = nullptr;
- ReduceFunc_t* reduceFunctions = nullptr;
- ReduceNewDescription* reduceNewDescriptions = nullptr;
+ ReduceDescription* reduceDescriptions = nullptr;
const char ** pragmaKeys = nullptr;
const char ** pragmaValues = nullptr;
uint32_t checksum = 0;
@@ -455,56 +452,21 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
}
}
- // Read simple reduce kernels
- if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
- goto error;
- }
- if (sscanf(line, EXPORT_REDUCE_STR "%zu", &reduceCount) != 1) {
- ALOGE("Invalid export reduce count!: %s", line);
- goto error;
- }
-
- reduceFunctions = new ReduceFunc_t[reduceCount];
- if (reduceFunctions == nullptr) {
- goto error;
- }
-
- for (size_t i = 0; i < reduceCount; ++i) {
- if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
- goto error;
- }
- char *c = strrchr(line, '\n');
- if (c) {
- *c = '\0';
- }
-
- // Lookup the expanded reduce kernel.
- strncat(line, ".expand", MAXLINESTR-strlen(line));
-
- reduceFunctions[i] =
- reinterpret_cast<ReduceFunc_t>(dlsym(sharedObj, line));
- if (reduceFunctions[i] == nullptr) {
- ALOGE("Failed to get function address for %s(): %s",
- line, dlerror());
- goto error;
- }
- }
-
// Read general reduce kernels
if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
goto error;
}
- if (sscanf(line, EXPORT_REDUCE_NEW_STR "%zu", &reduceNewCount) != 1) {
+ if (sscanf(line, EXPORT_REDUCE_STR "%zu", &reduceCount) != 1) {
ALOGE("Invalid export reduce new count!: %s", line);
goto error;
}
- reduceNewDescriptions = new ReduceNewDescription[reduceNewCount];
- if (reduceNewDescriptions == nullptr) {
+ reduceDescriptions = new ReduceDescription[reduceCount];
+ if (reduceDescriptions == nullptr) {
goto error;
}
- for (size_t i = 0; i < reduceNewCount; ++i) {
+ for (size_t i = 0; i < reduceCount; ++i) {
static const char kNoName[] = ".";
unsigned int tmpSig = 0;
@@ -545,25 +507,25 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
// The current implementation does not use the signature
// or reduce name.
- reduceNewDescriptions[i].accumSize = tmpSize;
+ reduceDescriptions[i].accumSize = tmpSize;
// Process the (optional) initializer.
if (strcmp(tmpNameInitializer, kNoName)) {
// Lookup the original user-written initializer.
- if (!(reduceNewDescriptions[i].initFunc =
- (ReduceNewInitializerFunc_t) dlsym(sharedObj, tmpNameInitializer))) {
+ if (!(reduceDescriptions[i].initFunc =
+ (ReduceInitializerFunc_t) dlsym(sharedObj, tmpNameInitializer))) {
ALOGE("Failed to find initializer function address for %s(): %s",
tmpNameInitializer, dlerror());
goto error;
}
} else {
- reduceNewDescriptions[i].initFunc = nullptr;
+ reduceDescriptions[i].initFunc = nullptr;
}
// Lookup the expanded accumulator.
strncat(tmpNameAccumulator, ".expand", MAXLINESTR-strlen(tmpNameAccumulator));
- if (!(reduceNewDescriptions[i].accumFunc =
- (ReduceNewAccumulatorFunc_t) dlsym(sharedObj, tmpNameAccumulator))) {
+ if (!(reduceDescriptions[i].accumFunc =
+ (ReduceAccumulatorFunc_t) dlsym(sharedObj, tmpNameAccumulator))) {
ALOGE("Failed to find accumulator function address for %s(): %s",
tmpNameAccumulator, dlerror());
goto error;
@@ -572,27 +534,27 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
// Process the (optional) combiner.
if (strcmp(tmpNameCombiner, kNoName)) {
// Lookup the original user-written combiner.
- if (!(reduceNewDescriptions[i].combFunc =
- (ReduceNewCombinerFunc_t) dlsym(sharedObj, tmpNameCombiner))) {
+ if (!(reduceDescriptions[i].combFunc =
+ (ReduceCombinerFunc_t) dlsym(sharedObj, tmpNameCombiner))) {
ALOGE("Failed to find combiner function address for %s(): %s",
tmpNameCombiner, dlerror());
goto error;
}
} else {
- reduceNewDescriptions[i].combFunc = nullptr;
+ reduceDescriptions[i].combFunc = nullptr;
}
// Process the (optional) outconverter.
if (strcmp(tmpNameOutConverter, kNoName)) {
// Lookup the original user-written outconverter.
- if (!(reduceNewDescriptions[i].outFunc =
- (ReduceNewOutConverterFunc_t) dlsym(sharedObj, tmpNameOutConverter))) {
+ if (!(reduceDescriptions[i].outFunc =
+ (ReduceOutConverterFunc_t) dlsym(sharedObj, tmpNameOutConverter))) {
ALOGE("Failed to find outconverter function address for %s(): %s",
tmpNameOutConverter, dlerror());
goto error;
}
} else {
- reduceNewDescriptions[i].outFunc = nullptr;
+ reduceDescriptions[i].outFunc = nullptr;
}
}
@@ -726,8 +688,7 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
fieldAddress, fieldIsObject, fieldName, varCount,
invokeFunctions, funcCount,
forEachFunctions, forEachSignatures, forEachCount,
- reduceFunctions, reduceCount,
- reduceNewDescriptions, reduceNewCount,
+ reduceDescriptions, reduceCount,
pragmaKeys, pragmaValues, pragmaCount,
rsGlobalNames, rsGlobalAddresses, rsGlobalSizes, rsGlobalProperties,
numEntries, isThreadable, checksum);
@@ -745,8 +706,6 @@ error:
delete[] pragmaKeys;
#endif // RS_COMPATIBILITY_LIB
- delete[] reduceFunctions;
-
delete[] forEachSignatures;
delete[] forEachFunctions;
diff --git a/cpu_ref/rsCpuExecutable.h b/cpu_ref/rsCpuExecutable.h
index 72c352c2..90d37591 100644
--- a/cpu_ref/rsCpuExecutable.h
+++ b/cpu_ref/rsCpuExecutable.h
@@ -67,8 +67,7 @@ public:
InvokeFunc_t* invokeFunctions, size_t funcCount,
ForEachFunc_t* forEachFunctions, uint32_t* forEachSignatures,
size_t forEachCount,
- ReduceFunc_t* reduceFunctions, size_t reduceCount,
- ReduceNewDescription *reduceNewDescriptions, size_t reduceNewCount,
+ ReduceDescription *reduceDescriptions, size_t reduceCount,
const char** pragmaKeys, const char** pragmaValues,
size_t pragmaCount,
const char **globalNames, const void **globalAddresses,
@@ -80,8 +79,7 @@ public:
mInvokeFunctions(invokeFunctions), mFuncCount(funcCount),
mForEachFunctions(forEachFunctions), mForEachSignatures(forEachSignatures),
mForEachCount(forEachCount),
- mReduceFunctions(reduceFunctions), mReduceCount(reduceCount),
- mReduceNewDescriptions(reduceNewDescriptions), mReduceNewCount(reduceNewCount),
+ mReduceDescriptions(reduceDescriptions), mReduceCount(reduceCount),
mPragmaKeys(pragmaKeys), mPragmaValues(pragmaValues),
mPragmaCount(pragmaCount), mGlobalNames(globalNames),
mGlobalAddresses(globalAddresses), mGlobalSizes(globalSizes),
@@ -107,9 +105,7 @@ public:
delete[] mPragmaValues;
delete[] mPragmaKeys;
- delete[] mReduceFunctions;
-
- delete[] mReduceNewDescriptions;
+ delete[] mReduceDescriptions;
delete[] mForEachSignatures;
delete[] mForEachFunctions;
@@ -136,7 +132,6 @@ public:
size_t getExportedFunctionCount() const { return mFuncCount; }
size_t getExportedForEachCount() const { return mForEachCount; }
size_t getExportedReduceCount() const { return mReduceCount; }
- size_t getExportedReduceNewCount() const { return mReduceNewCount; }
size_t getPragmaCount() const { return mPragmaCount; }
void* getFieldAddress(int slot) const { return mFieldAddress[slot]; }
@@ -149,10 +144,8 @@ public:
ForEachFunc_t getForEachFunction(int slot) const { return mForEachFunctions[slot]; }
uint32_t getForEachSignature(int slot) const { return mForEachSignatures[slot]; }
- ReduceFunc_t getReduceFunction(int slot) const { return mReduceFunctions[slot]; }
-
- const ReduceNewDescription* getReduceNewDescription(int slot) const {
- return &mReduceNewDescriptions[slot];
+ const ReduceDescription* getReduceDescription(int slot) const {
+ return &mReduceDescriptions[slot];
}
const char ** getPragmaKeys() const { return mPragmaKeys; }
@@ -207,12 +200,9 @@ private:
uint32_t* mForEachSignatures;
size_t mForEachCount;
- ReduceFunc_t* mReduceFunctions;
+ ReduceDescription* mReduceDescriptions;
size_t mReduceCount;
- ReduceNewDescription* mReduceNewDescriptions;
- size_t mReduceNewCount;
-
const char ** mPragmaKeys;
const char ** mPragmaValues;
size_t mPragmaCount;
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index 0400fab7..582b3424 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -500,7 +500,6 @@ void RsdCpuScriptImpl::populateScript(Script *script) {
// Copy info over to runtime
script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
script->mHal.info.exportedReduceCount = mScriptExec->getExportedReduceCount();
- script->mHal.info.exportedReduceNewCount = mScriptExec->getExportedReduceNewCount();
script->mHal.info.exportedForEachCount = mScriptExec->getExportedForEachCount();
script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
@@ -555,52 +554,14 @@ bool RsdCpuScriptImpl::setUpMtlsDimensions(MTLaunchStructCommon *mtls,
return true;
}
-// Preliminary work to prepare a simple reduce-style kernel for launch.
-bool RsdCpuScriptImpl::reduceMtlsSetup(const Allocation *ain,
- const Allocation *aout,
+// Preliminary work to prepare a general reduce-style kernel for launch.
+bool RsdCpuScriptImpl::reduceMtlsSetup(const Allocation ** ains,
+ uint32_t inLen,
+ const Allocation * aout,
const RsScriptCall *sc,
MTLaunchStructReduce *mtls) {
- rsAssert(ain && aout);
- memset(mtls, 0, sizeof(MTLaunchStructReduce));
- mtls->dimPtr = &mtls->inputDim;
-
- if (allocationLODIsNull(ain) || allocationLODIsNull(aout)) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "reduce called with a null allocation");
- return false;
- }
-
- // Set up the dimensions of the input.
- const Type *inType = ain->getType();
- mtls->inputDim.x = inType->getDimX();
- rsAssert(inType->getDimY() == 0);
-
- if (!setUpMtlsDimensions(mtls, mtls->inputDim, sc)) {
- return false;
- }
-
- mtls->rs = mCtx;
- // Currently not threaded.
- mtls->isThreadable = false;
- mtls->mSliceNum = -1;
-
- // Set up input and output.
- mtls->inBuf = static_cast<uint8_t *>(ain->getPointerUnchecked(0, 0));
- mtls->outBuf = static_cast<uint8_t *>(aout->getPointerUnchecked(0, 0));
-
- rsAssert(mtls->inBuf && mtls->outBuf);
-
- return true;
-}
-
-// Preliminary work to prepare a general reduce-style kernel for launch.
-bool RsdCpuScriptImpl::reduceNewMtlsSetup(const Allocation ** ains,
- uint32_t inLen,
- const Allocation * aout,
- const RsScriptCall *sc,
- MTLaunchStructReduceNew *mtls) {
rsAssert(ains && (inLen >= 1) && aout);
- memset(mtls, 0, sizeof(MTLaunchStructReduceNew));
+ memset(mtls, 0, sizeof(MTLaunchStructReduce));
mtls->dimPtr = &mtls->redp.dim;
for (int index = inLen; --index >= 0;) {
@@ -793,29 +754,15 @@ void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
}
void RsdCpuScriptImpl::invokeReduce(uint32_t slot,
- const Allocation *ain,
+ const Allocation ** ains, uint32_t inLen,
Allocation *aout,
const RsScriptCall *sc) {
- MTLaunchStructReduce mtls;
-
- if (reduceMtlsSetup(ain, aout, sc, &mtls)) {
- reduceKernelSetup(slot, &mtls);
- RsdCpuScriptImpl *oldTLS = mCtx->setTLS(this);
- mCtx->launchReduce(ain, aout, &mtls);
- mCtx->setTLS(oldTLS);
- }
-}
+ MTLaunchStructReduce mtls;
-void RsdCpuScriptImpl::invokeReduceNew(uint32_t slot,
- const Allocation ** ains, uint32_t inLen,
- Allocation *aout,
- const RsScriptCall *sc) {
- MTLaunchStructReduceNew mtls;
-
- if (reduceNewMtlsSetup(ains, inLen, aout, sc, &mtls)) {
- reduceNewKernelSetup(slot, &mtls);
+ if (reduceMtlsSetup(ains, inLen, aout, sc, &mtls)) {
+ reduceKernelSetup(slot, &mtls);
RsdCpuScriptImpl *oldTLS = mCtx->setTLS(this);
- mCtx->launchReduceNew(ains, inLen, aout, &mtls);
+ mCtx->launchReduce(ains, inLen, aout, &mtls);
mCtx->setTLS(oldTLS);
}
}
@@ -829,15 +776,9 @@ void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStructForEach *
void RsdCpuScriptImpl::reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls) {
mtls->script = this;
- mtls->kernel = mScriptExec->getReduceFunction(slot);
- rsAssert(mtls->kernel != nullptr);
-}
-
-void RsdCpuScriptImpl::reduceNewKernelSetup(uint32_t slot, MTLaunchStructReduceNew *mtls) {
- mtls->script = this;
mtls->redp.slot = slot;
- const ReduceNewDescription *desc = mScriptExec->getReduceNewDescription(slot);
+ const ReduceDescription *desc = mScriptExec->getReduceDescription(slot);
mtls->accumFunc = desc->accumFunc;
mtls->initFunc = desc->initFunc; // might legally be nullptr
mtls->combFunc = desc->combFunc; // might legally be nullptr
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index 2909dab1..94345bd5 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -61,15 +61,10 @@ public:
const RsScriptCall* sc) override;
void invokeReduce(uint32_t slot,
- const Allocation* ain,
+ const Allocation ** ains, uint32_t inLen,
Allocation* aout,
const RsScriptCall* sc) override;
- void invokeReduceNew(uint32_t slot,
- const Allocation ** ains, uint32_t inLen,
- Allocation* aout,
- const RsScriptCall* sc) override;
-
void invokeInit() override;
void invokeFreeChildren() override;
@@ -94,17 +89,11 @@ public:
virtual void forEachKernelSetup(uint32_t slot, MTLaunchStructForEach *mtls);
- // Build an MTLaunchStruct suitable for launching a simple reduce-style kernel.
- bool reduceMtlsSetup(const Allocation *ain, const Allocation *aout,
- const RsScriptCall *sc, MTLaunchStructReduce *mtls);
- // Finalize an MTLaunchStruct for launching a simple reduce-style kernel.
- virtual void reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls);
-
// Build an MTLaunchStruct suitable for launching a general reduce-style kernel.
- bool reduceNewMtlsSetup(const Allocation ** ains, uint32_t inLen, const Allocation *aout,
- const RsScriptCall *sc, MTLaunchStructReduceNew *mtls);
+ bool reduceMtlsSetup(const Allocation ** ains, uint32_t inLen, const Allocation *aout,
+ const RsScriptCall *sc, MTLaunchStructReduce *mtls);
// Finalize an MTLaunchStruct for launching a general reduce-style kernel.
- virtual void reduceNewKernelSetup(uint32_t slot, MTLaunchStructReduceNew *mtls);
+ virtual void reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls);
const RsdCpuReference::CpuSymbol * lookupSymbolMath(const char *sym);
static void * lookupRuntimeStub(void* pContext, char const* name);
diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h
index e226b934..a8d980e1 100644
--- a/cpu_ref/rsd_cpu.h
+++ b/cpu_ref/rsd_cpu.h
@@ -59,15 +59,10 @@ public:
const RsScriptCall *sc) = 0;
virtual void invokeReduce(uint32_t slot,
- const Allocation *ain,
+ const Allocation ** ains, uint32_t inLen,
Allocation *aout,
const RsScriptCall *sc) = 0;
- virtual void invokeReduceNew(uint32_t slot,
- const Allocation ** ains, uint32_t inLen,
- Allocation *aout,
- const RsScriptCall *sc) = 0;
-
virtual void invokeInit() = 0;
virtual void invokeFreeChildren() = 0;
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index af8d6adf..5aa1c949 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -126,20 +126,11 @@ void rsdScriptInvokeFunction(const Context *dc, Script *s,
void rsdScriptInvokeReduce(const Context *dc, Script *s,
uint32_t slot,
- const Allocation *ain,
+ const Allocation ** ains, size_t inLen,
Allocation *aout,
const RsScriptCall *sc) {
RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
- cs->invokeReduce(slot, ain, aout, sc);
-}
-
-void rsdScriptInvokeReduceNew(const Context *dc, Script *s,
- uint32_t slot,
- const Allocation ** ains, size_t inLen,
- Allocation *aout,
- const RsScriptCall *sc) {
- RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
- cs->invokeReduceNew(slot, ains, inLen, aout, sc);
+ cs->invokeReduce(slot, ains, inLen, aout, sc);
}
void rsdScriptSetGlobalVar(const Context *dc, const Script *s,
diff --git a/driver/rsdBcc.h b/driver/rsdBcc.h
index a2bf8be0..c79f445a 100644
--- a/driver/rsdBcc.h
+++ b/driver/rsdBcc.h
@@ -46,18 +46,11 @@ void rsdScriptInvokeForEach(const android::renderscript::Context *rsc,
void rsdScriptInvokeReduce(const android::renderscript::Context *rsc,
android::renderscript::Script *s,
uint32_t slot,
- const android::renderscript::Allocation *ain,
+ const android::renderscript::Allocation ** ains,
+ size_t inLen,
android::renderscript::Allocation *aout,
const RsScriptCall *sc);
-void rsdScriptInvokeReduceNew(const android::renderscript::Context *rsc,
- android::renderscript::Script *s,
- uint32_t slot,
- const android::renderscript::Allocation ** ains,
- size_t inLen,
- android::renderscript::Allocation *aout,
- const RsScriptCall *sc);
-
void rsdScriptInvokeForEachMulti(const android::renderscript::Context *rsc,
android::renderscript::Script *s,
uint32_t slot,
diff --git a/driver/rsdCore.cpp b/driver/rsdCore.cpp
index 503da5af..a5e942a9 100644
--- a/driver/rsdCore.cpp
+++ b/driver/rsdCore.cpp
@@ -101,8 +101,6 @@ extern "C" bool rsdHalQueryHal(RsHalInitEnums entry, void **fnPtr) {
fnPtr[0] = (void *)rsdScriptUpdateCachedObject; break;
case RS_HAL_SCRIPT_INVOKE_REDUCE:
fnPtr[0] = (void *)rsdScriptInvokeReduce; break;
- case RS_HAL_SCRIPT_INVOKE_REDUCE_NEW:
- fnPtr[0] = (void *)rsdScriptInvokeReduceNew; break;
case RS_HAL_ALLOCATION_INIT:
fnPtr[0] = (void *)rsdAllocationInit; break;
diff --git a/libRS.map b/libRS.map
index c288bf1d..64b6cb6d 100644
--- a/libRS.map
+++ b/libRS.map
@@ -94,7 +94,6 @@ libRS {
rsScriptInvokeV;
rsScriptKernelIDCreate;
rsScriptReduce;
- rsScriptReduceNew;
rsScriptSetTimeZone;
rsScriptSetVarD;
rsScriptSetVarF;
diff --git a/rs.spec b/rs.spec
index 608f3247..efae43e8 100644
--- a/rs.spec
+++ b/rs.spec
@@ -414,14 +414,6 @@ ScriptForEachMulti {
ScriptReduce {
param RsScript s
param uint32_t slot
- param RsAllocation ain
- param RsAllocation aout
- param const RsScriptCall * sc
-}
-
-ScriptReduceNew {
- param RsScript s
- param uint32_t slot
param RsAllocation * ains
param RsAllocation aout
param const RsScriptCall * sc
diff --git a/rsDriverLoader.cpp b/rsDriverLoader.cpp
index 83c6cf62..16efa0d7 100644
--- a/rsDriverLoader.cpp
+++ b/rsDriverLoader.cpp
@@ -71,7 +71,6 @@ static bool LoadHalTable(Context *rsc, HalQueryHal fn, bool loadGraphics) {
ret &= fn(RS_HAL_SCRIPT_INVOKE_ROOT, (void **)&rsc->mHal.funcs.script.invokeRoot);
ret &= fn(RS_HAL_SCRIPT_INVOKE_FOR_EACH, (void **)&rsc->mHal.funcs.script.invokeForEach);
ret &= fn(RS_HAL_SCRIPT_INVOKE_REDUCE, (void **)&rsc->mHal.funcs.script.invokeReduce);
- ret &= fn(RS_HAL_SCRIPT_INVOKE_REDUCE_NEW, (void **)&rsc->mHal.funcs.script.invokeReduceNew);
ret &= fn(RS_HAL_SCRIPT_INVOKE_INIT, (void **)&rsc->mHal.funcs.script.invokeInit);
ret &= fn(RS_HAL_SCRIPT_INVOKE_FREE_CHILDREN, (void **)&rsc->mHal.funcs.script.invokeFreeChildren);
ret &= fn(RS_HAL_SCRIPT_SET_GLOBAL_VAR, (void **)&rsc->mHal.funcs.script.setGlobalVar);
diff --git a/rsScript.cpp b/rsScript.cpp
index bf28328b..4c2f52f0 100644
--- a/rsScript.cpp
+++ b/rsScript.cpp
@@ -225,23 +225,15 @@ void rsi_ScriptForEach(Context *rsc, RsScript vs, uint32_t slot,
}
void rsi_ScriptReduce(Context *rsc, RsScript vs, uint32_t slot,
- RsAllocation vain, RsAllocation vaout,
- const RsScriptCall *sc, size_t scLen) {
- Script *s = static_cast<Script *>(vs);
- s->runReduce(rsc, slot, static_cast<const Allocation *>(vain),
- static_cast<Allocation *>(vaout), sc);
-}
-
-void rsi_ScriptReduceNew(Context *rsc, RsScript vs, uint32_t slot,
- RsAllocation *vains, size_t inLen,
- RsAllocation vaout, const RsScriptCall *sc,
- size_t scLen) {
+ RsAllocation *vains, size_t inLen,
+ RsAllocation vaout, const RsScriptCall *sc,
+ size_t scLen) {
Script *s = static_cast<Script *>(vs);
Allocation **ains = (Allocation**)(vains);
- s->runReduceNew(rsc, slot,
- const_cast<const Allocation **>(ains), inLen,
- static_cast<Allocation *>(vaout), sc);
+ s->runReduce(rsc, slot,
+ const_cast<const Allocation **>(ains), inLen,
+ static_cast<Allocation *>(vaout), sc);
}
void rsi_ScriptInvoke(Context *rsc, RsScript vs, uint32_t slot) {
diff --git a/rsScript.h b/rsScript.h
index c3241abf..e336f07b 100644
--- a/rsScript.h
+++ b/rsScript.h
@@ -86,7 +86,6 @@ public:
size_t exportedVariableCount;
size_t exportedForEachCount;
size_t exportedReduceCount;
- size_t exportedReduceNewCount;
size_t exportedFunctionCount;
size_t exportedPragmaCount;
char const **exportedPragmaKeyList;
@@ -133,13 +132,10 @@ public:
size_t usrBytes,
const RsScriptCall *sc = nullptr) = 0;
- virtual void runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+ virtual void runReduce(Context *rsc, uint32_t slot,
+ const Allocation **ains, size_t inLen,
Allocation *aout, const RsScriptCall *sc) = 0;
- virtual void runReduceNew(Context *rsc, uint32_t slot,
- const Allocation **ains, size_t inLen,
- Allocation *aout, const RsScriptCall *sc) = 0;
-
virtual void Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) = 0;
virtual void setupScript(Context *rsc) = 0;
virtual uint32_t run(Context *) = 0;
diff --git a/rsScriptC.cpp b/rsScriptC.cpp
index d2d5b1fd..c0f858a2 100644
--- a/rsScriptC.cpp
+++ b/rsScriptC.cpp
@@ -239,33 +239,13 @@ void ScriptC::runForEach(Context *rsc,
}
}
-void ScriptC::runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+void ScriptC::runReduce(Context *rsc, uint32_t slot,
+ const Allocation ** ains, size_t inLen,
Allocation *aout, const RsScriptCall *sc) {
- // TODO: Record the name of the kernel in the tracing information.
- ATRACE_CALL();
-
- if (slot >= mHal.info.exportedReduceCount) {
- rsc->setError(RS_ERROR_BAD_SCRIPT, "The simple reduce kernel index is out of bounds");
- return;
- }
- if (mRSC->hadFatalError()) return;
-
- setupScript(rsc);
-
- if (rsc->props.mLogScripts) {
- ALOGV("%p ScriptC::runReduce invoking slot %i, ptr %p", rsc, slot, this);
- }
-
- rsc->mHal.funcs.script.invokeReduce(rsc, this, slot, ain, aout, sc);
-}
-
-void ScriptC::runReduceNew(Context *rsc, uint32_t slot,
- const Allocation ** ains, size_t inLen,
- Allocation *aout, const RsScriptCall *sc) {
// TODO: Record the name of the kernel in the tracing information.
ATRACE_CALL();
- if (slot >= mHal.info.exportedReduceNewCount) {
+ if (slot >= mHal.info.exportedReduceCount) {
rsc->setError(RS_ERROR_BAD_SCRIPT, "The general reduce kernel index is out of bounds");
return;
}
@@ -274,10 +254,10 @@ void ScriptC::runReduceNew(Context *rsc, uint32_t slot,
setupScript(rsc);
if (rsc->props.mLogScripts) {
- ALOGV("%p ScriptC::runReduceNew invoking slot %i, ptr %p", rsc, slot, this);
+ ALOGV("%p ScriptC::runReduce invoking slot %i, ptr %p", rsc, slot, this);
}
- rsc->mHal.funcs.script.invokeReduceNew(rsc, this, slot, ains, inLen, aout, sc);
+ rsc->mHal.funcs.script.invokeReduce(rsc, this, slot, ains, inLen, aout, sc);
}
void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) {
diff --git a/rsScriptC.h b/rsScriptC.h
index c8881a4b..6c34215b 100644
--- a/rsScriptC.h
+++ b/rsScriptC.h
@@ -47,13 +47,10 @@ public:
size_t usrBytes,
const RsScriptCall *sc = nullptr) override;
- void runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+ void runReduce(Context *rsc, uint32_t slot,
+ const Allocation ** ains, size_t inLen,
Allocation *aout, const RsScriptCall *sc) override;
- void runReduceNew(Context *rsc, uint32_t slot,
- const Allocation ** ains, size_t inLen,
- Allocation *aout, const RsScriptCall *sc) override;
-
virtual void serialize(Context *rsc, OStream *stream) const { }
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_SCRIPT_C; }
static Type *createFromStream(Context *rsc, IStream *stream) { return nullptr; }
diff --git a/rsScriptIntrinsic.cpp b/rsScriptIntrinsic.cpp
index 6e0f6ae3..0122a718 100644
--- a/rsScriptIntrinsic.cpp
+++ b/rsScriptIntrinsic.cpp
@@ -68,15 +68,11 @@ void ScriptIntrinsic::runForEach(Context* rsc,
aout, usr, usrBytes, sc);
}
-void ScriptIntrinsic::runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+void ScriptIntrinsic::runReduce(Context *rsc, uint32_t slot,
+ const Allocation ** ains, size_t inLen,
Allocation *aout, const RsScriptCall *sc) {
}
-void ScriptIntrinsic::runReduceNew(Context *rsc, uint32_t slot,
- const Allocation ** ains, size_t inLen,
- Allocation *aout, const RsScriptCall *sc) {
-}
-
void ScriptIntrinsic::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) {
}
diff --git a/rsScriptIntrinsic.h b/rsScriptIntrinsic.h
index e2b04b86..9b4f9d3b 100644
--- a/rsScriptIntrinsic.h
+++ b/rsScriptIntrinsic.h
@@ -49,13 +49,10 @@ public:
size_t usrBytes,
const RsScriptCall* sc = nullptr) override;
- void runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+ void runReduce(Context *rsc, uint32_t slot,
+ const Allocation ** ains, size_t inLen,
Allocation *aout, const RsScriptCall *sc) override;
- void runReduceNew(Context *rsc, uint32_t slot,
- const Allocation ** ains, size_t inLen,
- Allocation *aout, const RsScriptCall *sc) override;
-
void Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) override;
void setupScript(Context *rsc) override;
uint32_t run(Context *) override;
diff --git a/rs_hal.h b/rs_hal.h
index faee684a..7e07ddd9 100644
--- a/rs_hal.h
+++ b/rs_hal.h
@@ -172,14 +172,10 @@ typedef struct {
size_t usrLen,
const RsScriptCall *sc);
void (*invokeReduce)(const Context *rsc, Script *s,
- uint32_t slot, const Allocation *ain,
+ uint32_t slot,
+ const Allocation ** ains, size_t inLen,
Allocation *aout,
const RsScriptCall *sc);
- void (*invokeReduceNew)(const Context *rsc, Script *s,
- uint32_t slot,
- const Allocation ** ains, size_t inLen,
- Allocation *aout,
- const RsScriptCall *sc);
void (*invokeInit)(const Context *rsc, Script *s);
void (*invokeFreeChildren)(const Context *rsc, Script *s);
@@ -412,7 +408,6 @@ enum RsHalInitEnums {
RS_HAL_SCRIPT_INVOKE_FOR_EACH_MULTI = 1013,
RS_HAL_SCRIPT_UPDATE_CACHED_OBJECT = 1014,
RS_HAL_SCRIPT_INVOKE_REDUCE = 1015,
- RS_HAL_SCRIPT_INVOKE_REDUCE_NEW = 1016,
RS_HAL_ALLOCATION_INIT = 2000,
RS_HAL_ALLOCATION_INIT_ADAPTER = 2001,