summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYang Ni <yangni@google.com>2015-01-07 09:16:40 -0800
committerYang Ni <yangni@google.com>2015-01-07 09:16:40 -0800
commit1ffd86b448d78366190c540f98f8b6d641cdb6cf (patch)
tree65dc9b2f0c6d53ce4858ae990ab82f34cc26fd06
parent82f515b5a40d030f88bf622b8c05a03ec80083ee (diff)
downloadrs-1ffd86b448d78366190c540f98f8b6d641cdb6cf.tar.gz
New Script Group API: runtime and cpu driver support.
Change-Id: I9c612cf8874aabaf0ca7d1640567464c71ed3070
-rw-r--r--Android.mk4
-rw-r--r--cpp/rsDispatch.h9
-rw-r--r--cpu_ref/Android.mk1
-rw-r--r--cpu_ref/rsCpuCore.cpp17
-rw-r--r--cpu_ref/rsCpuCore.h2
-rw-r--r--cpu_ref/rsCpuScriptGroup.cpp6
-rw-r--r--cpu_ref/rsCpuScriptGroup.h2
-rw-r--r--cpu_ref/rsCpuScriptGroup2.cpp192
-rw-r--r--cpu_ref/rsCpuScriptGroup2.h60
-rw-r--r--cpu_ref/rsd_cpu.h18
-rw-r--r--driver/rsdScriptGroup.cpp14
-rw-r--r--driver/rsdScriptGroup.h6
-rw-r--r--rs.spec31
-rw-r--r--rsClosure.cpp147
-rw-r--r--rsClosure.h78
-rw-r--r--rsDefines.h6
-rw-r--r--rsFileA3D.cpp4
-rw-r--r--rsInternalDefines.h8
-rw-r--r--rsScriptGroup.cpp18
-rw-r--r--rsScriptGroup.h29
-rw-r--r--rsScriptGroup2.cpp27
-rw-r--r--rsScriptGroup2.h36
-rw-r--r--rsScriptGroupBase.h41
-rw-r--r--rs_hal.h7
24 files changed, 698 insertions, 65 deletions
diff --git a/Android.mk b/Android.mk
index d9af8cca..36f97309 100644
--- a/Android.mk
+++ b/Android.mk
@@ -141,6 +141,7 @@ LOCAL_SRC_FILES:= \
rsAnimation.cpp \
rsComponent.cpp \
rsContext.cpp \
+ rsClosure.cpp \
rsCppUtils.cpp \
rsDevice.cpp \
rsElement.cpp \
@@ -167,6 +168,7 @@ LOCAL_SRC_FILES:= \
rsScriptC_Lib.cpp \
rsScriptC_LibGL.cpp \
rsScriptGroup.cpp \
+ rsScriptGroup2.cpp \
rsScriptIntrinsic.cpp \
rsSignal.cpp \
rsStream.cpp \
@@ -245,6 +247,7 @@ LOCAL_SRC_FILES:= \
rsAnimation.cpp \
rsComponent.cpp \
rsContext.cpp \
+ rsClosure.cpp \
rsDevice.cpp \
rsElement.cpp \
rsFBOCache.cpp \
@@ -269,6 +272,7 @@ LOCAL_SRC_FILES:= \
rsScriptC_Lib.cpp \
rsScriptC_LibGL.cpp \
rsScriptGroup.cpp \
+ rsScriptGroup2.cpp \
rsScriptIntrinsic.cpp \
rsSignal.cpp \
rsStream.cpp \
diff --git a/cpp/rsDispatch.h b/cpp/rsDispatch.h
index 659591b0..53d72d4e 100644
--- a/cpp/rsDispatch.h
+++ b/cpp/rsDispatch.h
@@ -28,7 +28,9 @@ typedef void (*DeviceDestroyFnPtr) (RsDevice dev);
typedef void (*DeviceSetConfigFnPtr) (RsDevice dev, RsDeviceParam p, int32_t value);
typedef RsContext (*ContextCreateFnPtr)(RsDevice vdev, uint32_t version, uint32_t sdkVersion, RsContextType ct, uint32_t flags);
typedef void (*GetNameFnPtr)(RsContext, void * obj, const char **name);
-
+typedef RsClosure (*ClosureCreateFnPtr)(RsContext, RsScriptKernelID, RsAllocation, RsScriptFieldID*, size_t, uintptr_t*, size_t, size_t*, size_t, RsClosure*, size_t, RsScriptFieldID*, size_t);
+typedef void (*ClosureSetArgFnPtr)(RsContext, RsClosure, uint32_t, uintptr_t, size_t);
+typedef void (*ClosureSetGlobalFnPtr)(RsContext, RsClosure, RsScriptFieldID, uintptr_t, size_t);
typedef void (*ContextDestroyFnPtr) (RsContext);
typedef RsMessageToClientType (*ContextGetMessageFnPtr) (RsContext, void*, size_t, size_t*, size_t, uint32_t*, size_t);
typedef RsMessageToClientType (*ContextPeekMessageFnPtr) (RsContext, size_t*, size_t, uint32_t*, size_t);
@@ -80,6 +82,7 @@ typedef RsScript (*ScriptIntrinsicCreateFnPtr) (RsContext, uint32_t id, RsElemen
typedef RsScriptKernelID (*ScriptKernelIDCreateFnPtr) (RsContext, RsScript, int, int);
typedef RsScriptFieldID (*ScriptFieldIDCreateFnPtr) (RsContext, RsScript, int);
typedef RsScriptGroup (*ScriptGroupCreateFnPtr) (RsContext, RsScriptKernelID*, size_t, RsScriptKernelID*, size_t, RsScriptKernelID*, size_t, RsScriptFieldID*, size_t, const RsType*, size_t);
+typedef RsScriptGroup2 (*ScriptGroup2CreateFnPtr)(RsContext, RsClosure*, size_t);
typedef void (*ScriptGroupSetOutputFnPtr) (RsContext, RsScriptGroup, RsScriptKernelID, RsAllocation);
typedef void (*ScriptGroupSetInputFnPtr) (RsContext, RsScriptGroup, RsScriptKernelID, RsAllocation);
typedef void (*ScriptGroupExecuteFnPtr) (RsContext, RsScriptGroup);
@@ -113,6 +116,9 @@ struct dispatchTable {
AllocationCubeCreateFromBitmapFnPtr AllocationCubeCreateFromBitmap;
AllocationGetSurfaceFnPtr AllocationGetSurface;
AllocationSetSurfaceFnPtr AllocationSetSurface;
+ ClosureCreateFnPtr ClosureCreate;
+ ClosureSetArgFnPtr ClosureSetArg;
+ ClosureSetGlobalFnPtr ClosureSetGlobal;
ContextFinishFnPtr ContextFinish;
ContextDumpFnPtr ContextDump;
ContextSetPriorityFnPtr ContextSetPriority;
@@ -152,6 +158,7 @@ struct dispatchTable {
ScriptKernelIDCreateFnPtr ScriptKernelIDCreate;
ScriptFieldIDCreateFnPtr ScriptFieldIDCreate;
ScriptGroupCreateFnPtr ScriptGroupCreate;
+ ScriptGroup2CreateFnPtr ScriptGroup2Create;
ScriptGroupSetOutputFnPtr ScriptGroupSetOutput;
ScriptGroupSetInputFnPtr ScriptGroupSetInput;
ScriptGroupExecuteFnPtr ScriptGroupExecute;
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index 5de964fc..27243f9f 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -29,6 +29,7 @@ LOCAL_SRC_FILES:= \
rsCpuRuntimeMath.cpp \
rsCpuRuntimeStubs.cpp \
rsCpuScriptGroup.cpp \
+ rsCpuScriptGroup2.cpp \
rsCpuIntrinsic.cpp \
rsCpuIntrinsic3DLUT.cpp \
rsCpuIntrinsicBlend.cpp \
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 47bc1c01..84c24169 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -17,6 +17,7 @@
#include "rsCpuCore.h"
#include "rsCpuScript.h"
#include "rsCpuScriptGroup.h"
+#include "rsCpuScriptGroup2.h"
#include <malloc.h>
#include "rsContext.h"
@@ -660,11 +661,19 @@ RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *
return i;
}
-RsdCpuReference::CpuScriptGroup * RsdCpuReferenceImpl::createScriptGroup(const ScriptGroup *sg) {
- CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg);
- if (!sgi->init()) {
+void* RsdCpuReferenceImpl::createScriptGroup(const ScriptGroupBase *sg) {
+ switch (sg->getApiVersion()) {
+ case ScriptGroupBase::SG_V1: {
+ CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg);
+ if (!sgi->init()) {
delete sgi;
return nullptr;
+ }
+ return sgi;
}
- return sgi;
+ case ScriptGroupBase::SG_V2: {
+ return new CpuScriptGroup2Impl(this, sg);
+ }
+ }
+ return nullptr;
}
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index bfd5e512..e0696583 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -182,7 +182,7 @@ public:
uint32_t flags);
virtual CpuScript * createIntrinsic(const Script *s,
RsScriptIntrinsicID iid, Element *e);
- virtual CpuScriptGroup * createScriptGroup(const ScriptGroup *sg);
+ virtual void* createScriptGroup(const ScriptGroupBase *sg);
const RsdCpuReference::CpuSymbol *symLookup(const char *);
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 751bafb8..3d32a512 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -18,15 +18,13 @@
#include "rsCpuScript.h"
#include "rsScriptGroup.h"
#include "rsCpuScriptGroup.h"
-//#include "rsdBcc.h"
-//#include "rsdAllocation.h"
using namespace android;
using namespace android::renderscript;
-CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
+CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
mCtx = ctx;
- mSG = sg;
+ mSG = (ScriptGroup*)sg;
}
CpuScriptGroupImpl::~CpuScriptGroupImpl() {
diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h
index 1a4af058..50ba2acc 100644
--- a/cpu_ref/rsCpuScriptGroup.h
+++ b/cpu_ref/rsCpuScriptGroup.h
@@ -30,7 +30,7 @@ public:
virtual void execute();
virtual ~CpuScriptGroupImpl();
- CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg);
+ CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg);
bool init();
static void scriptGroupRoot(const RsExpandKernelParams *p,
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
new file mode 100644
index 00000000..9dc4d900
--- /dev/null
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -0,0 +1,192 @@
+#include "rsCpuScriptGroup2.h"
+
+#include "cpu_ref/rsCpuCore.h"
+#include "rsClosure.h"
+#include "rsContext.h"
+#include "rsCpuCore.h"
+#include "rsCpuScript.h"
+#include "rsScript.h"
+#include "rsScriptGroup2.h"
+
+namespace android {
+namespace renderscript {
+
+namespace {
+
+static const size_t DefaultKernelArgCount = 2;
+
+void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
+ uint32_t xend, uint32_t outstep) {
+ const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr;
+ RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
+ const void **oldIns = kparams->ins;
+ uint32_t *oldStrides = kparams->inEStrides;
+
+ std::vector<const void*> ins(DefaultKernelArgCount);
+ std::vector<uint32_t> strides(DefaultKernelArgCount);
+
+ for (CPUClosure* cpuClosure : closures) {
+ const Closure* closure = cpuClosure->mClosure;
+
+ auto in_iter = ins.begin();
+ auto stride_iter = strides.begin();
+
+ for (const auto& arg : closure->mArgs) {
+ const Allocation* a = (const Allocation*)arg;
+ const uint32_t eStride = a->mHal.state.elementSizeBytes;
+ const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
+ eStride * xstart;
+ if (kparams->dimY > 1) {
+ ptr += a->mHal.drvState.lod[0].stride * kparams->y;
+ }
+ *in_iter++ = ptr;
+ *stride_iter++ = eStride;
+ }
+
+ mutable_kparams->ins = &ins[0];
+ mutable_kparams->inEStrides = &strides[0];
+
+ const Allocation* out = closure->mReturnValue;
+ const uint32_t ostep = out->mHal.state.elementSizeBytes;
+ const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
+ ostep * xstart;
+ if (kparams->dimY > 1) {
+ ptr += out->mHal.drvState.lod[0].stride * kparams->y;
+ }
+
+ mutable_kparams->out = (void*)ptr;
+
+ mutable_kparams->usr = cpuClosure->mUsrPtr;
+
+ cpuClosure->mFunc(kparams, xstart, xend, ostep);
+ }
+
+ mutable_kparams->ins = oldIns;
+ mutable_kparams->inEStrides = oldStrides;
+ mutable_kparams->usr = &closures;
+}
+
+/*
+ Returns true if closure depends on any closure in batch via a glboal variable
+ TODO: this probably should go into class Closure.
+ */
+bool conflict(const list<CPUClosure*> &batch, CPUClosure* closure) {
+ for (const auto &p : closure->mClosure->mGlobalDeps) {
+ const Closure* dep = p.first;
+ for (CPUClosure* c : batch) {
+ if (c->mClosure == dep) {
+ return true;
+ }
+ }
+ }
+ for (const auto &p : closure->mClosure->mArgDeps) {
+ const Closure* dep = p.first;
+ for (CPUClosure* c : batch) {
+ if (c->mClosure == dep) {
+ for (const auto &p1 : *p.second) {
+ if (p1.second != nullptr) {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+} // namespace
+
+CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
+ const ScriptGroupBase *sg) :
+ mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
+ list<CPUClosure*>* batch = new list<CPUClosure*>();
+ for (Closure* closure: mGroup->mClosures) {
+ const ScriptKernelID* kernelID = closure->mKernelID.get();
+ RsdCpuScriptImpl* si =
+ (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
+
+ MTLaunchStruct mtls;
+ si->forEachKernelSetup(kernelID->mSlot, &mtls);
+ // TODO: Is mtls.fep.usrLen ever used?
+ CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
+ mtls.fep.usr, mtls.fep.usrLen);
+ if (conflict(*batch, cc)) {
+ mBatches.push_back(batch);
+ batch = new list<CPUClosure*>();
+ }
+ batch->push_back(cc);
+ }
+ mBatches.push_back(batch);
+}
+
+CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
+ for (list<CPUClosure*>* batch : mBatches) {
+ for (CPUClosure* c : *batch) {
+ delete c;
+ }
+ }
+}
+
+void CpuScriptGroup2Impl::execute() {
+ for (list<CPUClosure*>* batch : mBatches) {
+ setGlobalsForBatch(*batch);
+ runBatch(*batch);
+ }
+}
+
+void CpuScriptGroup2Impl::setGlobalsForBatch(const list<CPUClosure*>& batch) {
+ for (CPUClosure* cpuClosure : batch) {
+ const Closure* closure = cpuClosure->mClosure;
+ const ScriptKernelID* kernelID = closure->mKernelID.get();
+ Script* s = kernelID->mScript;
+ for (const auto& p : closure->mGlobals) {
+ const void* value = p.second.first;
+ int size = p.second.second;
+ // We use -1 size to indicate an ObjectBase rather than a primitive type
+ if (size < 0) {
+ s->setVarObj(p.first->mSlot, (ObjectBase*)value);
+ } else {
+ s->setVar(p.first->mSlot, (const void*)&value, size);
+ }
+ }
+ }
+}
+
+void CpuScriptGroup2Impl::runBatch(const list<CPUClosure*>& batch) {
+ for (CPUClosure* cpuClosure : batch) {
+ const Closure* closure = cpuClosure->mClosure;
+ const ScriptKernelID* kernelID = closure->mKernelID.get();
+ cpuClosure->mSi->preLaunch(kernelID->mSlot,
+ (const Allocation**)&closure->mArgs[0],
+ closure->mArgs.size(), closure->mReturnValue,
+ cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
+ nullptr);
+ }
+
+ const CPUClosure* cpuClosure = batch.front();
+ const Closure* closure = cpuClosure->mClosure;
+ MTLaunchStruct mtls;
+
+ cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0],
+ closure->mArgs.size(),
+ closure->mReturnValue,
+ nullptr, 0, nullptr, &mtls);
+
+ mtls.script = nullptr;
+ mtls.kernel = (void (*)())&groupRoot;
+ mtls.fep.usr = &batch;
+
+ mCpuRefImpl->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
+
+ for (CPUClosure* cpuClosure : batch) {
+ const Closure* closure = cpuClosure->mClosure;
+ const ScriptKernelID* kernelID = closure->mKernelID.get();
+ cpuClosure->mSi->postLaunch(kernelID->mSlot,
+ (const Allocation**)&closure->mArgs[0],
+ closure->mArgs.size(), closure->mReturnValue,
+ nullptr, 0, nullptr);
+ }
+}
+
+} // namespace renderscript
+} // namespace android
diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h
new file mode 100644
index 00000000..6cb72a65
--- /dev/null
+++ b/cpu_ref/rsCpuScriptGroup2.h
@@ -0,0 +1,60 @@
+#ifndef CPU_REF_CPUSCRIPTGROUP2IMPL_H_
+#define CPU_REF_CPUSCRIPTGROUP2IMPL_H_
+
+#include <list>
+
+#include "rsd_cpu.h"
+
+using std::list;
+
+namespace android {
+namespace renderscript {
+
+class Closure;
+class RsdCpuScriptImpl;
+class RsdCpuReferenceImpl;
+class ScriptGroup2;
+
+struct RsExpandKernelParams;
+
+typedef void (*ExpandFuncTy)(const RsExpandKernelParams*, uint32_t, uint32_t,
+ uint32_t);
+
+class CPUClosure {
+ public:
+ CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func,
+ const void* usrPtr, const size_t usrSize) :
+ mClosure(closure), mSi(si), mFunc(func), mUsrPtr(usrPtr),
+ mUsrSize(usrSize) {}
+
+ // It's important to do forwarding here than inheritance for unbound value
+ // binding to work.
+ const Closure* mClosure;
+ RsdCpuScriptImpl* mSi;
+ const ExpandFuncTy mFunc;
+ const void* mUsrPtr;
+ const size_t mUsrSize;
+};
+
+class CpuScriptGroup2Impl : public RsdCpuReference::CpuScriptGroup2 {
+ public:
+ CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase* group);
+ virtual ~CpuScriptGroup2Impl();
+
+ bool init();
+ virtual void execute();
+
+ private:
+ void setGlobalsForBatch(const list<CPUClosure*>& batch);
+ void runBatch(const list<CPUClosure*>& batch);
+
+ RsdCpuReferenceImpl* mCpuRefImpl;
+ const ScriptGroup2* mGroup;
+
+ list<list<CPUClosure*>*> mBatches;
+};
+
+} // namespace renderscript
+} // namespace android
+
+#endif // CPU_REF_CPUSCRIPTGROUP2IMPL_H_
diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h
index b0e924e3..d886cef0 100644
--- a/cpu_ref/rsd_cpu.h
+++ b/cpu_ref/rsd_cpu.h
@@ -45,7 +45,7 @@ namespace renderscript {
class ScriptC;
class Script;
-class ScriptGroup;
+class ScriptGroupBase;
class ScriptKernelID;
@@ -97,7 +97,13 @@ public:
};
typedef CpuScript * (* script_lookup_t)(Context *, const Script *s);
- class CpuScriptGroup {
+ class CpuScriptGroupBase {
+ public:
+ virtual void execute() = 0;
+ virtual ~CpuScriptGroupBase() {}
+ };
+
+ class CpuScriptGroup : public CpuScriptGroupBase {
public:
virtual void setInput(const ScriptKernelID *kid, Allocation *) = 0;
virtual void setOutput(const ScriptKernelID *kid, Allocation *) = 0;
@@ -105,6 +111,12 @@ public:
virtual ~CpuScriptGroup() {};
};
+ class CpuScriptGroup2 : public CpuScriptGroupBase {
+ public:
+ virtual void execute() = 0;
+ virtual ~CpuScriptGroup2() {}
+ };
+
static Context * getTlsContext();
static const Script * getTlsScript();
static pthread_key_t getThreadTLSKey();
@@ -124,7 +136,7 @@ public:
uint8_t const *bitcode, size_t bitcodeSize,
uint32_t flags) = 0;
virtual CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) = 0;
- virtual CpuScriptGroup * createScriptGroup(const ScriptGroup *sg) = 0;
+ virtual void* createScriptGroup(const ScriptGroupBase *sg) = 0;
virtual bool getInForEach() = 0;
#ifndef RS_COMPATIBILITY_LIB
diff --git a/driver/rsdScriptGroup.cpp b/driver/rsdScriptGroup.cpp
index a7b2e774..ed800a36 100644
--- a/driver/rsdScriptGroup.cpp
+++ b/driver/rsdScriptGroup.cpp
@@ -28,7 +28,7 @@ using namespace android;
using namespace android::renderscript;
-bool rsdScriptGroupInit(const Context *rsc, ScriptGroup *sg) {
+bool rsdScriptGroupInit(const Context *rsc, ScriptGroupBase *sg) {
RsdHal *dc = (RsdHal *)rsc->mHal.drv;
sg->mHal.drv = dc->mCpuRef->createScriptGroup(sg);
@@ -43,13 +43,15 @@ void rsdScriptGroupSetOutput(const Context *rsc, const ScriptGroup *sg,
const ScriptKernelID *kid, Allocation *) {
}
-void rsdScriptGroupExecute(const Context *rsc, const ScriptGroup *sg) {
- RsdCpuReference::CpuScriptGroup *sgi = (RsdCpuReference::CpuScriptGroup *)sg->mHal.drv;
+void rsdScriptGroupExecute(const Context *rsc, const ScriptGroupBase *sg) {
+ RsdCpuReference::CpuScriptGroupBase *sgi =
+ (RsdCpuReference::CpuScriptGroupBase *)sg->mHal.drv;
sgi->execute();
}
-void rsdScriptGroupDestroy(const Context *rsc, const ScriptGroup *sg) {
- RsdCpuReference::CpuScriptGroup *sgi = (RsdCpuReference::CpuScriptGroup *)sg->mHal.drv;
+void rsdScriptGroupDestroy(const Context *rsc, const ScriptGroupBase *sg) {
+ RsdCpuReference::CpuScriptGroupBase *sgi =
+ (RsdCpuReference::CpuScriptGroupBase *)sg->mHal.drv;
delete sgi;
}
@@ -68,5 +70,3 @@ void rsdScriptGroupUpdateCachedObject(const Context *rsc,
obj->v2 = nullptr;
#endif
}
-
-
diff --git a/driver/rsdScriptGroup.h b/driver/rsdScriptGroup.h
index db44e231..95e5d19b 100644
--- a/driver/rsdScriptGroup.h
+++ b/driver/rsdScriptGroup.h
@@ -20,7 +20,7 @@
#include <rs_hal.h>
bool rsdScriptGroupInit(const android::renderscript::Context *rsc,
- android::renderscript::ScriptGroup *sg);
+ android::renderscript::ScriptGroupBase *sg);
void rsdScriptGroupSetInput(const android::renderscript::Context *rsc,
const android::renderscript::ScriptGroup *sg,
const android::renderscript::ScriptKernelID *kid,
@@ -30,9 +30,9 @@ void rsdScriptGroupSetOutput(const android::renderscript::Context *rsc,
const android::renderscript::ScriptKernelID *kid,
android::renderscript::Allocation *);
void rsdScriptGroupExecute(const android::renderscript::Context *rsc,
- const android::renderscript::ScriptGroup *sg);
+ const android::renderscript::ScriptGroupBase *sg);
void rsdScriptGroupDestroy(const android::renderscript::Context *rsc,
- const android::renderscript::ScriptGroup *sg);
+ const android::renderscript::ScriptGroupBase *sg);
void rsdScriptGroupUpdateCachedObject(const android::renderscript::Context *rsc,
const android::renderscript::ScriptGroup *sg,
android::renderscript::rs_script_group *obj);
diff --git a/rs.spec b/rs.spec
index 18ece8cd..22c53b91 100644
--- a/rs.spec
+++ b/rs.spec
@@ -252,6 +252,31 @@ AllocationCopy3DRange {
param uint32_t srcMip
}
+ClosureCreate {
+ direct
+ param RsScriptKernelID kernelID
+ param RsAllocation returnValue
+ param RsScriptFieldID * fieldIDs
+ param uintptr_t * values
+ param size_t * sizes
+ param RsClosure * depClosures
+ param RsScriptFieldID * depFieldIDs
+ ret RsClosure
+ }
+
+ClosureSetArg {
+ param RsClosure closureID
+ param uint32_t index
+ param uintptr_t value
+ param size_t valueSize
+}
+
+ClosureSetGlobal {
+ param RsClosure closureID
+ param RsScriptFieldID fieldID
+ param uintptr_t value
+ param size_t valueSize
+}
SamplerCreate {
direct
@@ -410,6 +435,12 @@ ScriptGroupExecute {
param RsScriptGroup group
}
+ScriptGroup2Create{
+ direct
+ param RsClosure * closures
+ ret RsScriptGroup2
+}
+
AllocationIoSend {
param RsAllocation alloc
}
diff --git a/rsClosure.cpp b/rsClosure.cpp
new file mode 100644
index 00000000..8530fc16
--- /dev/null
+++ b/rsClosure.cpp
@@ -0,0 +1,147 @@
+#include "rsClosure.h"
+
+#include "cpu_ref/rsCpuCore.h"
+#include "rsContext.h" // XXX: necessary to avoid compiler error on rsScript.h below
+#include "rsScript.h"
+#include "rsType.h"
+
+namespace android {
+namespace renderscript {
+
+RsClosure rsi_ClosureCreate(Context* context, RsScriptKernelID kernelID,
+ RsAllocation returnValue,
+ RsScriptFieldID* fieldIDs, size_t fieldIDs_length,
+ uintptr_t* values, size_t values_length,
+ size_t* sizes, size_t sizes_length,
+ RsClosure* depClosures, size_t depClosures_length,
+ RsScriptFieldID* depFieldIDs,
+ size_t depFieldIDs_length) {
+ rsAssert(fieldIDs_length == values_length && values_length == sizes_length &&
+ sizes_length == depClosures_length &&
+ depClosures_length == depFieldIDs_length);
+
+ return (RsClosure)(new Closure(
+ context, (const ScriptKernelID*)kernelID, (Allocation*)returnValue,
+ fieldIDs_length, (const ScriptFieldID**)fieldIDs, (const void**)values,
+ sizes, (const Closure**)depClosures,
+ (const ScriptFieldID**)depFieldIDs));
+}
+
+void rsi_ClosureEval(Context* rsc, RsClosure closure) {
+ ((Closure*)closure)->eval();
+}
+
+void rsi_ClosureSetArg(Context* rsc, RsClosure closure, uint32_t index,
+ uintptr_t value, size_t size) {
+ ((Closure*)closure)->setArg(index, (const void*)value, size);
+}
+
+void rsi_ClosureSetGlobal(Context* rsc, RsClosure closure,
+ RsScriptFieldID fieldID, uintptr_t value,
+ size_t size) {
+ ((Closure*)closure)->setGlobal((const ScriptFieldID*)fieldID,
+ (const void*)value, size);
+}
+
+Closure::Closure(Context* context,
+ const ScriptKernelID* kernelID,
+ Allocation* returnValue,
+ const int numValues,
+ const ScriptFieldID** fieldIDs,
+ const void** values,
+ const size_t* sizes,
+ const Closure** depClosures,
+ const ScriptFieldID** depFieldIDs) :
+ ObjectBase(context), mContext(context), mKernelID((ScriptKernelID*)kernelID),
+ mReturnValue(returnValue) {
+ size_t i;
+
+ for (i = 0; i < (size_t)numValues && fieldIDs[i] == nullptr; i++);
+
+ vector<const void*> args(values, values + i);
+ mArgs.swap(args);
+
+ for (; i < (size_t)numValues; i++) {
+ mGlobals[fieldIDs[i]] = std::make_pair(values[i], sizes[i]);
+ }
+
+ mDependences.insert(depClosures, depClosures + numValues);
+
+ for (i = 0; i < mArgs.size(); i++) {
+ const Closure* dep = depClosures[i];
+ if (dep != nullptr) {
+ auto mapping = mArgDeps[dep];
+ if (mapping == nullptr) {
+ mapping = new map<int, const ObjectBaseRef<ScriptFieldID>*>();
+ mArgDeps[dep] = mapping;
+ }
+ (*mapping)[i] = new ObjectBaseRef<ScriptFieldID>(
+ const_cast<ScriptFieldID*>(depFieldIDs[i]));
+ }
+ }
+
+ for (; i < (size_t)numValues; i++) {
+ const Closure* dep = depClosures[i];
+ if (dep != nullptr) {
+ auto mapping = mGlobalDeps[dep];
+ if (mapping == nullptr) {
+ mapping = new map<const ObjectBaseRef<ScriptFieldID>*,
+ const ObjectBaseRef<ScriptFieldID>*>();
+ mGlobalDeps[dep] = mapping;
+ }
+ (*mapping)[new ObjectBaseRef<ScriptFieldID>(
+ const_cast<ScriptFieldID*>(fieldIDs[i]))] =
+ new ObjectBaseRef<ScriptFieldID>(
+ const_cast<ScriptFieldID*>(depFieldIDs[i]));
+ }
+ }
+}
+
+Closure::~Closure() {
+ for (const auto& p : mArgDeps) {
+ auto map = p.second;
+ for (const auto& p1 : *map) {
+ delete p1.second;
+ }
+ delete p.second;
+ }
+
+ for (const auto& p : mGlobalDeps) {
+ auto map = p.second;
+ for (const auto& p1 : *map) {
+ delete p1.first;
+ delete p1.second;
+ }
+ delete p.second;
+ }
+}
+
+void Closure::eval() {
+ Script *s = mKernelID->mScript;
+
+ for (const auto& p : mGlobals) {
+ const void* value = p.second.first;
+ int size = p.second.second;
+ // We use -1 size to indicate an ObjectBase rather than a primitive type
+ if (size < 0) {
+ s->setVarObj(p.first->mSlot, (ObjectBase*)value);
+ } else {
+ s->setVar(p.first->mSlot, (const void*)&value, size);
+ }
+ }
+
+ s->runForEach(mContext, mKernelID->mSlot, (const Allocation **)(&mArgs[0]),
+ mArgs.size(), mReturnValue, nullptr, 0, nullptr);
+}
+
+void Closure::setArg(const uint32_t index, const void* value, const size_t size) {
+ mArgs[index] = value;
+}
+
+void Closure::setGlobal(const ScriptFieldID* fieldID, const void* value,
+ const size_t size) {
+ mGlobals[fieldID] = std::make_pair(value, size);
+}
+
+} // namespace renderscript
+} // namespace android
diff --git a/rsClosure.h b/rsClosure.h
new file mode 100644
index 00000000..372cd327
--- /dev/null
+++ b/rsClosure.h
@@ -0,0 +1,78 @@
+#ifndef ANDROID_RENDERSCRIPT_CLOSURE_H_
+#define ANDROID_RENDERSCRIPT_CLOSURE_H_
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "rsDefines.h"
+#include "rsObjectBase.h"
+
+namespace android {
+namespace renderscript {
+
+using std::map;
+using std::pair;
+using std::set;
+using std::vector;
+
+class Allocation;
+class Context;
+class ScriptFieldID;
+class ScriptKernelID;
+class Type;
+
+class Closure : public ObjectBase {
+ public:
+ Closure(Context* context,
+ const ScriptKernelID* kernelID,
+ Allocation* returnValue,
+ const int numValues,
+ const ScriptFieldID** fieldIDs,
+ const void** values, // Allocations or primitive (numeric) types
+ const size_t* sizes, // size for data type. -1 indicates an allocation.
+ const Closure** depClosures,
+ const ScriptFieldID** depFieldIDs);
+
+ virtual ~Closure();
+
+ virtual void serialize(Context *rsc, OStream *stream) const {}
+
+ virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_CLOSURE; }
+
+ void eval();
+
+ void setArg(const uint32_t index, const void* value, const size_t size);
+ void setGlobal(const ScriptFieldID* fieldID, const void* value,
+ const size_t size);
+
+ Context* mContext;
+ const ObjectBaseRef<ScriptKernelID> mKernelID;
+
+ // Values referrenced in arguments and globals cannot be futures. They must be
+ // either a known value or unbound value.
+ // For now, all arguments should be Allocations.
+ vector<const void*> mArgs;
+
+ // A global could be allocation or any primitive data type.
+ map<const ScriptFieldID*, pair<const void*, int>> mGlobals;
+
+ Allocation* mReturnValue;
+
+ // All the other closures that this closure depends on
+ set<const Closure*> mDependences;
+
+ // All the other closures which this closure depends on for one of its
+ // arguments, and the fields which it depends on.
+ map<const Closure*, map<int, const ObjectBaseRef<ScriptFieldID>*>*> mArgDeps;
+
+ // All the other closures that this closure depends on for one of its fields,
+ // and the fields that it depends on.
+ map<const Closure*, map<const ObjectBaseRef<ScriptFieldID>*,
+ const ObjectBaseRef<ScriptFieldID>*>*> mGlobalDeps;
+};
+
+} // namespace renderscript
+} // namespace android
+
+#endif // ANDROID_RENDERSCRIPT_CLOSURE_H_
diff --git a/rsDefines.h b/rsDefines.h
index 0a91ea8d..9345eb9b 100644
--- a/rsDefines.h
+++ b/rsDefines.h
@@ -33,6 +33,7 @@ typedef void * RsAdapter1D;
typedef void * RsAdapter2D;
typedef void * RsAllocation;
typedef void * RsAnimation;
+typedef void * RsClosure;
typedef void * RsContext;
typedef void * RsDevice;
typedef void * RsElement;
@@ -44,6 +45,7 @@ typedef void * RsScriptKernelID;
typedef void * RsScriptFieldID;
typedef void * RsScriptMethodID;
typedef void * RsScriptGroup;
+typedef void * RsScriptGroup2;
typedef void * RsMesh;
typedef void * RsPath;
typedef void * RsType;
@@ -251,7 +253,3 @@ enum RsContextFlags {
#endif
#endif // RENDER_SCRIPT_DEFINES_H
-
-
-
-
diff --git a/rsFileA3D.cpp b/rsFileA3D.cpp
index 6f146370..3fe69422 100644
--- a/rsFileA3D.cpp
+++ b/rsFileA3D.cpp
@@ -294,6 +294,10 @@ ObjectBase *FileA3D::initializeFromEntry(size_t index) {
break;
case RS_A3D_CLASS_ID_SCRIPT_GROUP:
break;
+ case RS_A3D_CLASS_ID_CLOSURE:
+ break;
+ case RS_A3D_CLASS_ID_SCRIPT_GROUP2:
+ break;
}
if (entry->mRsObj) {
entry->mRsObj->incUserRef();
diff --git a/rsInternalDefines.h b/rsInternalDefines.h
index 19f21226..57cb72a9 100644
--- a/rsInternalDefines.h
+++ b/rsInternalDefines.h
@@ -163,7 +163,9 @@ enum RsA3DClassID {
RS_A3D_CLASS_ID_SCRIPT_KERNEL_ID,
RS_A3D_CLASS_ID_SCRIPT_FIELD_ID,
RS_A3D_CLASS_ID_SCRIPT_METHOD_ID,
- RS_A3D_CLASS_ID_SCRIPT_GROUP
+ RS_A3D_CLASS_ID_SCRIPT_GROUP,
+ RS_A3D_CLASS_ID_CLOSURE,
+ RS_A3D_CLASS_ID_SCRIPT_GROUP2
};
enum RsCullMode {
@@ -202,7 +204,3 @@ typedef struct {
#endif
#endif // RENDER_SCRIPT_DEFINES_H
-
-
-
-
diff --git a/rsScriptGroup.cpp b/rsScriptGroup.cpp
index 618c28c0..791ab147 100644
--- a/rsScriptGroup.cpp
+++ b/rsScriptGroup.cpp
@@ -14,15 +14,18 @@
* limitations under the License.
*/
-#include <algorithm>
+#include "rsScriptGroup.h"
#include "rsContext.h"
+#include "rsScriptGroup2.h"
+
+#include <algorithm>
#include <time.h>
using namespace android;
using namespace android::renderscript;
-ScriptGroup::ScriptGroup(Context *rsc) : ObjectBase(rsc) {
+ScriptGroup::ScriptGroup(Context *rsc) : ScriptGroupBase(rsc) {
}
ScriptGroup::~ScriptGroup() {
@@ -270,12 +273,10 @@ bool ScriptGroup::validateInputAndOutput(Context *rsc) {
}
void ScriptGroup::execute(Context *rsc) {
-
if (!validateInputAndOutput(rsc)) {
return;
}
- //ALOGE("ScriptGroup::execute");
if (rsc->mHal.funcs.scriptgroup.execute) {
rsc->mHal.funcs.scriptgroup.execute(rsc, this);
return;
@@ -324,13 +325,6 @@ void ScriptGroup::execute(Context *rsc) {
}
-void ScriptGroup::serialize(Context *rsc, OStream *stream) const {
-}
-
-RsA3DClassID ScriptGroup::getClassId() const {
- return RS_A3D_CLASS_ID_SCRIPT_GROUP;
-}
-
ScriptGroup::Link::Link() {
}
@@ -371,7 +365,7 @@ void rsi_ScriptGroupSetOutput(Context *rsc, RsScriptGroup sg, RsScriptKernelID k
}
void rsi_ScriptGroupExecute(Context *rsc, RsScriptGroup sg) {
- ScriptGroup *s = (ScriptGroup *)sg;
+ ScriptGroupBase *s = (ScriptGroupBase *)sg;
s->execute(rsc);
}
diff --git a/rsScriptGroup.h b/rsScriptGroup.h
index 974e3ba5..ff0259a4 100644
--- a/rsScriptGroup.h
+++ b/rsScriptGroup.h
@@ -17,21 +17,30 @@
#ifndef ANDROID_RS_SCRIPT_GROUP_H
#define ANDROID_RS_SCRIPT_GROUP_H
-#include "rsAllocation.h"
-#include "rsScript.h"
+#include "rsScriptGroupBase.h"
+#include <vector>
// ---------------------------------------------------------------------------
namespace android {
namespace renderscript {
+class Allocation;
+class Context;
class ProgramVertex;
class ProgramFragment;
class ProgramRaster;
class ProgramStore;
+class Script;
+class ScriptFieldID;
+class ScriptKernelID;
+class Type;
-class ScriptGroup : public ObjectBase {
+class ScriptGroup : public ScriptGroupBase {
public:
+ virtual SG_API_Version getApiVersion() const { return SG_V1; }
+ virtual void execute(Context *rsc);
+
std::vector<ObjectBaseRef<ScriptKernelID> > mKernels;
class Link {
@@ -70,15 +79,6 @@ public:
std::vector<IO *> mInputs;
std::vector<IO *> mOutputs;
- struct Hal {
- void * drv;
-
- struct DriverInfo {
- };
- DriverInfo info;
- };
- Hal mHal;
-
static ScriptGroup * create(Context *rsc,
ScriptKernelID ** kernels, size_t kernelsSize,
ScriptKernelID ** src, size_t srcSize,
@@ -86,14 +86,9 @@ public:
ScriptFieldID ** dstF, size_t dstFSize,
const Type ** type, size_t typeSize);
- virtual void serialize(Context *rsc, OStream *stream) const;
- virtual RsA3DClassID getClassId() const;
-
- void execute(Context *rsc);
void setInput(Context *rsc, ScriptKernelID *kid, Allocation *a);
void setOutput(Context *rsc, ScriptKernelID *kid, Allocation *a);
-
protected:
virtual ~ScriptGroup();
bool mInitialized;
diff --git a/rsScriptGroup2.cpp b/rsScriptGroup2.cpp
new file mode 100644
index 00000000..06a252aa
--- /dev/null
+++ b/rsScriptGroup2.cpp
@@ -0,0 +1,27 @@
+#include "rsScriptGroup2.h"
+
+#include "rsContext.h"
+
+namespace android {
+namespace renderscript {
+
+void ScriptGroup2::execute(Context* rsc) {
+ if (rsc->mHal.funcs.scriptgroup.execute) {
+ rsc->mHal.funcs.scriptgroup.execute(rsc, this);
+ }
+}
+
+RsScriptGroup2 rsi_ScriptGroup2Create(Context* rsc, RsClosure* closures,
+ size_t numClosures) {
+ ScriptGroup2* group = new ScriptGroup2(rsc, (Closure**)closures, numClosures);
+
+ // Create a device-specific implementation by calling the device driver
+ if (rsc->mHal.funcs.scriptgroup.init) {
+ rsc->mHal.funcs.scriptgroup.init(rsc, group);
+ }
+
+ return group;
+}
+
+} // namespace renderscript
+} // namespace android
diff --git a/rsScriptGroup2.h b/rsScriptGroup2.h
new file mode 100644
index 00000000..c759faff
--- /dev/null
+++ b/rsScriptGroup2.h
@@ -0,0 +1,36 @@
+#ifndef ANDROID_RENDERSCRIPT_SCRIPTGROUP2_H_
+#define ANDROID_RENDERSCRIPT_SCRIPTGROUP2_H_
+
+#include "rsScriptGroupBase.h"
+
+#include <list>
+
+namespace android {
+namespace renderscript {
+
+class Closure;
+class Context;
+
+class ScriptGroup2 : public ScriptGroupBase {
+ public:
+ /*
+ TODO:
+ Inputs and outputs are set and retrieved in Java runtime.
+ They are opaque in the C++ runtime.
+ For better compiler optimizations (of a script group), we need to include
+ input and output information in the C++ runtime.
+ */
+ ScriptGroup2(Context* rsc, Closure** closures, size_t numClosures) :
+ ScriptGroupBase(rsc), mClosures(closures, closures + numClosures) {}
+ virtual ~ScriptGroup2() {}
+
+ virtual SG_API_Version getApiVersion() const { return SG_V2; }
+ virtual void execute(Context* rsc);
+
+ std::list<Closure*> mClosures;
+};
+
+} // namespace renderscript
+} // namespace android
+
+#endif // ANDROID_RENDERSCRIPT_SCRIPTGROUP2_H_
diff --git a/rsScriptGroupBase.h b/rsScriptGroupBase.h
new file mode 100644
index 00000000..00ae6c6d
--- /dev/null
+++ b/rsScriptGroupBase.h
@@ -0,0 +1,41 @@
+#ifndef ANDROID_RS_SCRIPT_GROUP_BASE_H
+#define ANDROID_RS_SCRIPT_GROUP_BASE_H
+
+#include "rsObjectBase.h"
+
+namespace android {
+namespace renderscript {
+
+class ScriptGroupBase : public ObjectBase {
+ public:
+ ScriptGroupBase(Context* rsc) : ObjectBase(rsc) {}
+ virtual ~ScriptGroupBase() {}
+
+ virtual void serialize(Context *rsc, OStream *stream) const {}
+
+ virtual RsA3DClassID getClassId() const {
+ return RS_A3D_CLASS_ID_SCRIPT_GROUP;
+ }
+
+ enum SG_API_Version {
+ SG_V1 = 10,
+ SG_V2 = 20,
+ };
+
+ virtual void execute(Context *rsc) = 0;
+ virtual SG_API_Version getApiVersion() const = 0;
+
+ struct Hal {
+ void * drv;
+
+ struct DriverInfo {
+ };
+ DriverInfo info;
+ };
+ Hal mHal;
+};
+
+} // namespace renderscript
+} // namespace android
+
+#endif // ANDROID_RS_SCRIPT_GROUP_BASE_H
diff --git a/rs_hal.h b/rs_hal.h
index 419827b2..b3c2e39e 100644
--- a/rs_hal.h
+++ b/rs_hal.h
@@ -35,6 +35,7 @@ class ScriptFieldID;
class ScriptMethodID;
class ScriptC;
class ScriptGroup;
+class ScriptGroupBase;
class Path;
class Program;
class ProgramStore;
@@ -300,13 +301,13 @@ typedef struct {
} framebuffer;
struct {
- bool (*init)(const Context *rsc, ScriptGroup *sg);
+ bool (*init)(const Context *rsc, ScriptGroupBase *sg);
void (*setInput)(const Context *rsc, const ScriptGroup *sg,
const ScriptKernelID *kid, Allocation *);
void (*setOutput)(const Context *rsc, const ScriptGroup *sg,
const ScriptKernelID *kid, Allocation *);
- void (*execute)(const Context *rsc, const ScriptGroup *sg);
- void (*destroy)(const Context *rsc, const ScriptGroup *sg);
+ void (*execute)(const Context *rsc, const ScriptGroupBase *sg);
+ void (*destroy)(const Context *rsc, const ScriptGroupBase *sg);
void (*updateCachedObject)(const Context *rsc, const ScriptGroup *sg, rs_script_group *obj);
} scriptgroup;