summaryrefslogtreecommitdiff
path: root/cpu_ref
diff options
context:
space:
mode:
Diffstat (limited to 'cpu_ref')
-rw-r--r--cpu_ref/Android.mk1
-rw-r--r--cpu_ref/rsCpuCore.cpp17
-rw-r--r--cpu_ref/rsCpuCore.h2
-rw-r--r--cpu_ref/rsCpuScriptGroup.cpp6
-rw-r--r--cpu_ref/rsCpuScriptGroup.h2
-rw-r--r--cpu_ref/rsCpuScriptGroup2.cpp192
-rw-r--r--cpu_ref/rsCpuScriptGroup2.h60
-rw-r--r--cpu_ref/rsd_cpu.h18
8 files changed, 285 insertions, 13 deletions
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index 5de964fc..27243f9f 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -29,6 +29,7 @@ LOCAL_SRC_FILES:= \
rsCpuRuntimeMath.cpp \
rsCpuRuntimeStubs.cpp \
rsCpuScriptGroup.cpp \
+ rsCpuScriptGroup2.cpp \
rsCpuIntrinsic.cpp \
rsCpuIntrinsic3DLUT.cpp \
rsCpuIntrinsicBlend.cpp \
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 47bc1c01..84c24169 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -17,6 +17,7 @@
#include "rsCpuCore.h"
#include "rsCpuScript.h"
#include "rsCpuScriptGroup.h"
+#include "rsCpuScriptGroup2.h"
#include <malloc.h>
#include "rsContext.h"
@@ -660,11 +661,19 @@ RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *
return i;
}
-RsdCpuReference::CpuScriptGroup * RsdCpuReferenceImpl::createScriptGroup(const ScriptGroup *sg) {
- CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg);
- if (!sgi->init()) {
+void* RsdCpuReferenceImpl::createScriptGroup(const ScriptGroupBase *sg) {
+ switch (sg->getApiVersion()) {
+ case ScriptGroupBase::SG_V1: {
+ CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg);
+ if (!sgi->init()) {
delete sgi;
return nullptr;
+ }
+ return sgi;
}
- return sgi;
+ case ScriptGroupBase::SG_V2: {
+ return new CpuScriptGroup2Impl(this, sg);
+ }
+ }
+ return nullptr;
}
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index bfd5e512..e0696583 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -182,7 +182,7 @@ public:
uint32_t flags);
virtual CpuScript * createIntrinsic(const Script *s,
RsScriptIntrinsicID iid, Element *e);
- virtual CpuScriptGroup * createScriptGroup(const ScriptGroup *sg);
+ virtual void* createScriptGroup(const ScriptGroupBase *sg);
const RsdCpuReference::CpuSymbol *symLookup(const char *);
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 751bafb8..3d32a512 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -18,15 +18,13 @@
#include "rsCpuScript.h"
#include "rsScriptGroup.h"
#include "rsCpuScriptGroup.h"
-//#include "rsdBcc.h"
-//#include "rsdAllocation.h"
using namespace android;
using namespace android::renderscript;
-CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
+CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
mCtx = ctx;
- mSG = sg;
+ mSG = (ScriptGroup*)sg;
}
CpuScriptGroupImpl::~CpuScriptGroupImpl() {
diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h
index 1a4af058..50ba2acc 100644
--- a/cpu_ref/rsCpuScriptGroup.h
+++ b/cpu_ref/rsCpuScriptGroup.h
@@ -30,7 +30,7 @@ public:
virtual void execute();
virtual ~CpuScriptGroupImpl();
- CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg);
+ CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg);
bool init();
static void scriptGroupRoot(const RsExpandKernelParams *p,
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
new file mode 100644
index 00000000..9dc4d900
--- /dev/null
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -0,0 +1,192 @@
+#include "rsCpuScriptGroup2.h"
+
+#include "cpu_ref/rsCpuCore.h"
+#include "rsClosure.h"
+#include "rsContext.h"
+#include "rsCpuCore.h"
+#include "rsCpuScript.h"
+#include "rsScript.h"
+#include "rsScriptGroup2.h"
+
+namespace android {
+namespace renderscript {
+
+namespace {
+
+static const size_t DefaultKernelArgCount = 2;
+
+void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
+ uint32_t xend, uint32_t outstep) {
+ const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr;
+ RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
+ const void **oldIns = kparams->ins;
+ uint32_t *oldStrides = kparams->inEStrides;
+
+ std::vector<const void*> ins(DefaultKernelArgCount);
+ std::vector<uint32_t> strides(DefaultKernelArgCount);
+
+ for (CPUClosure* cpuClosure : closures) {
+ const Closure* closure = cpuClosure->mClosure;
+
+ auto in_iter = ins.begin();
+ auto stride_iter = strides.begin();
+
+ for (const auto& arg : closure->mArgs) {
+ const Allocation* a = (const Allocation*)arg;
+ const uint32_t eStride = a->mHal.state.elementSizeBytes;
+ const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
+ eStride * xstart;
+ if (kparams->dimY > 1) {
+ ptr += a->mHal.drvState.lod[0].stride * kparams->y;
+ }
+ *in_iter++ = ptr;
+ *stride_iter++ = eStride;
+ }
+
+ mutable_kparams->ins = &ins[0];
+ mutable_kparams->inEStrides = &strides[0];
+
+ const Allocation* out = closure->mReturnValue;
+ const uint32_t ostep = out->mHal.state.elementSizeBytes;
+ const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
+ ostep * xstart;
+ if (kparams->dimY > 1) {
+ ptr += out->mHal.drvState.lod[0].stride * kparams->y;
+ }
+
+ mutable_kparams->out = (void*)ptr;
+
+ mutable_kparams->usr = cpuClosure->mUsrPtr;
+
+ cpuClosure->mFunc(kparams, xstart, xend, ostep);
+ }
+
+ mutable_kparams->ins = oldIns;
+ mutable_kparams->inEStrides = oldStrides;
+ mutable_kparams->usr = &closures;
+}
+
+/*
+ Returns true if closure depends on any closure in batch via a glboal variable
+ TODO: this probably should go into class Closure.
+ */
+bool conflict(const list<CPUClosure*> &batch, CPUClosure* closure) {
+ for (const auto &p : closure->mClosure->mGlobalDeps) {
+ const Closure* dep = p.first;
+ for (CPUClosure* c : batch) {
+ if (c->mClosure == dep) {
+ return true;
+ }
+ }
+ }
+ for (const auto &p : closure->mClosure->mArgDeps) {
+ const Closure* dep = p.first;
+ for (CPUClosure* c : batch) {
+ if (c->mClosure == dep) {
+ for (const auto &p1 : *p.second) {
+ if (p1.second != nullptr) {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+} // namespace
+
+CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
+ const ScriptGroupBase *sg) :
+ mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
+ list<CPUClosure*>* batch = new list<CPUClosure*>();
+ for (Closure* closure: mGroup->mClosures) {
+ const ScriptKernelID* kernelID = closure->mKernelID.get();
+ RsdCpuScriptImpl* si =
+ (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
+
+ MTLaunchStruct mtls;
+ si->forEachKernelSetup(kernelID->mSlot, &mtls);
+ // TODO: Is mtls.fep.usrLen ever used?
+ CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
+ mtls.fep.usr, mtls.fep.usrLen);
+ if (conflict(*batch, cc)) {
+ mBatches.push_back(batch);
+ batch = new list<CPUClosure*>();
+ }
+ batch->push_back(cc);
+ }
+ mBatches.push_back(batch);
+}
+
+CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
+ for (list<CPUClosure*>* batch : mBatches) {
+ for (CPUClosure* c : *batch) {
+ delete c;
+ }
+ }
+}
+
+void CpuScriptGroup2Impl::execute() {
+ for (list<CPUClosure*>* batch : mBatches) {
+ setGlobalsForBatch(*batch);
+ runBatch(*batch);
+ }
+}
+
+void CpuScriptGroup2Impl::setGlobalsForBatch(const list<CPUClosure*>& batch) {
+ for (CPUClosure* cpuClosure : batch) {
+ const Closure* closure = cpuClosure->mClosure;
+ const ScriptKernelID* kernelID = closure->mKernelID.get();
+ Script* s = kernelID->mScript;
+ for (const auto& p : closure->mGlobals) {
+ const void* value = p.second.first;
+ int size = p.second.second;
+ // We use -1 size to indicate an ObjectBase rather than a primitive type
+ if (size < 0) {
+ s->setVarObj(p.first->mSlot, (ObjectBase*)value);
+ } else {
+ s->setVar(p.first->mSlot, (const void*)&value, size);
+ }
+ }
+ }
+}
+
+void CpuScriptGroup2Impl::runBatch(const list<CPUClosure*>& batch) {
+ for (CPUClosure* cpuClosure : batch) {
+ const Closure* closure = cpuClosure->mClosure;
+ const ScriptKernelID* kernelID = closure->mKernelID.get();
+ cpuClosure->mSi->preLaunch(kernelID->mSlot,
+ (const Allocation**)&closure->mArgs[0],
+ closure->mArgs.size(), closure->mReturnValue,
+ cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
+ nullptr);
+ }
+
+ const CPUClosure* cpuClosure = batch.front();
+ const Closure* closure = cpuClosure->mClosure;
+ MTLaunchStruct mtls;
+
+ cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0],
+ closure->mArgs.size(),
+ closure->mReturnValue,
+ nullptr, 0, nullptr, &mtls);
+
+ mtls.script = nullptr;
+ mtls.kernel = (void (*)())&groupRoot;
+ mtls.fep.usr = &batch;
+
+ mCpuRefImpl->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
+
+ for (CPUClosure* cpuClosure : batch) {
+ const Closure* closure = cpuClosure->mClosure;
+ const ScriptKernelID* kernelID = closure->mKernelID.get();
+ cpuClosure->mSi->postLaunch(kernelID->mSlot,
+ (const Allocation**)&closure->mArgs[0],
+ closure->mArgs.size(), closure->mReturnValue,
+ nullptr, 0, nullptr);
+ }
+}
+
+} // namespace renderscript
+} // namespace android
diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h
new file mode 100644
index 00000000..6cb72a65
--- /dev/null
+++ b/cpu_ref/rsCpuScriptGroup2.h
@@ -0,0 +1,60 @@
+#ifndef CPU_REF_CPUSCRIPTGROUP2IMPL_H_
+#define CPU_REF_CPUSCRIPTGROUP2IMPL_H_
+
+#include <list>
+
+#include "rsd_cpu.h"
+
+using std::list;
+
+namespace android {
+namespace renderscript {
+
+class Closure;
+class RsdCpuScriptImpl;
+class RsdCpuReferenceImpl;
+class ScriptGroup2;
+
+struct RsExpandKernelParams;
+
+typedef void (*ExpandFuncTy)(const RsExpandKernelParams*, uint32_t, uint32_t,
+ uint32_t);
+
+class CPUClosure {
+ public:
+ CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func,
+ const void* usrPtr, const size_t usrSize) :
+ mClosure(closure), mSi(si), mFunc(func), mUsrPtr(usrPtr),
+ mUsrSize(usrSize) {}
+
+ // It's important to do forwarding here than inheritance for unbound value
+ // binding to work.
+ const Closure* mClosure;
+ RsdCpuScriptImpl* mSi;
+ const ExpandFuncTy mFunc;
+ const void* mUsrPtr;
+ const size_t mUsrSize;
+};
+
+class CpuScriptGroup2Impl : public RsdCpuReference::CpuScriptGroup2 {
+ public:
+ CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase* group);
+ virtual ~CpuScriptGroup2Impl();
+
+ bool init();
+ virtual void execute();
+
+ private:
+ void setGlobalsForBatch(const list<CPUClosure*>& batch);
+ void runBatch(const list<CPUClosure*>& batch);
+
+ RsdCpuReferenceImpl* mCpuRefImpl;
+ const ScriptGroup2* mGroup;
+
+ list<list<CPUClosure*>*> mBatches;
+};
+
+} // namespace renderscript
+} // namespace android
+
+#endif // CPU_REF_CPUSCRIPTGROUP2IMPL_H_
diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h
index b0e924e3..d886cef0 100644
--- a/cpu_ref/rsd_cpu.h
+++ b/cpu_ref/rsd_cpu.h
@@ -45,7 +45,7 @@ namespace renderscript {
class ScriptC;
class Script;
-class ScriptGroup;
+class ScriptGroupBase;
class ScriptKernelID;
@@ -97,7 +97,13 @@ public:
};
typedef CpuScript * (* script_lookup_t)(Context *, const Script *s);
- class CpuScriptGroup {
+ class CpuScriptGroupBase {
+ public:
+ virtual void execute() = 0;
+ virtual ~CpuScriptGroupBase() {}
+ };
+
+ class CpuScriptGroup : public CpuScriptGroupBase {
public:
virtual void setInput(const ScriptKernelID *kid, Allocation *) = 0;
virtual void setOutput(const ScriptKernelID *kid, Allocation *) = 0;
@@ -105,6 +111,12 @@ public:
virtual ~CpuScriptGroup() {};
};
+ class CpuScriptGroup2 : public CpuScriptGroupBase {
+ public:
+ virtual void execute() = 0;
+ virtual ~CpuScriptGroup2() {}
+ };
+
static Context * getTlsContext();
static const Script * getTlsScript();
static pthread_key_t getThreadTLSKey();
@@ -124,7 +136,7 @@ public:
uint8_t const *bitcode, size_t bitcodeSize,
uint32_t flags) = 0;
virtual CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) = 0;
- virtual CpuScriptGroup * createScriptGroup(const ScriptGroup *sg) = 0;
+ virtual void* createScriptGroup(const ScriptGroupBase *sg) = 0;
virtual bool getInForEach() = 0;
#ifndef RS_COMPATIBILITY_LIB