diff options
author | Yang Ni <yangni@google.com> | 2015-01-07 09:16:40 -0800 |
---|---|---|
committer | Yang Ni <yangni@google.com> | 2015-01-07 09:16:40 -0800 |
commit | 1ffd86b448d78366190c540f98f8b6d641cdb6cf (patch) | |
tree | 65dc9b2f0c6d53ce4858ae990ab82f34cc26fd06 | |
parent | 82f515b5a40d030f88bf622b8c05a03ec80083ee (diff) | |
download | rs-1ffd86b448d78366190c540f98f8b6d641cdb6cf.tar.gz |
New Script Group API: runtime and cpu driver support.
Change-Id: I9c612cf8874aabaf0ca7d1640567464c71ed3070
-rw-r--r-- | Android.mk | 4 | ||||
-rw-r--r-- | cpp/rsDispatch.h | 9 | ||||
-rw-r--r-- | cpu_ref/Android.mk | 1 | ||||
-rw-r--r-- | cpu_ref/rsCpuCore.cpp | 17 | ||||
-rw-r--r-- | cpu_ref/rsCpuCore.h | 2 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup.cpp | 6 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup.h | 2 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup2.cpp | 192 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup2.h | 60 | ||||
-rw-r--r-- | cpu_ref/rsd_cpu.h | 18 | ||||
-rw-r--r-- | driver/rsdScriptGroup.cpp | 14 | ||||
-rw-r--r-- | driver/rsdScriptGroup.h | 6 | ||||
-rw-r--r-- | rs.spec | 31 | ||||
-rw-r--r-- | rsClosure.cpp | 147 | ||||
-rw-r--r-- | rsClosure.h | 78 | ||||
-rw-r--r-- | rsDefines.h | 6 | ||||
-rw-r--r-- | rsFileA3D.cpp | 4 | ||||
-rw-r--r-- | rsInternalDefines.h | 8 | ||||
-rw-r--r-- | rsScriptGroup.cpp | 18 | ||||
-rw-r--r-- | rsScriptGroup.h | 29 | ||||
-rw-r--r-- | rsScriptGroup2.cpp | 27 | ||||
-rw-r--r-- | rsScriptGroup2.h | 36 | ||||
-rw-r--r-- | rsScriptGroupBase.h | 41 | ||||
-rw-r--r-- | rs_hal.h | 7 |
24 files changed, 698 insertions, 65 deletions
@@ -141,6 +141,7 @@ LOCAL_SRC_FILES:= \ rsAnimation.cpp \ rsComponent.cpp \ rsContext.cpp \ + rsClosure.cpp \ rsCppUtils.cpp \ rsDevice.cpp \ rsElement.cpp \ @@ -167,6 +168,7 @@ LOCAL_SRC_FILES:= \ rsScriptC_Lib.cpp \ rsScriptC_LibGL.cpp \ rsScriptGroup.cpp \ + rsScriptGroup2.cpp \ rsScriptIntrinsic.cpp \ rsSignal.cpp \ rsStream.cpp \ @@ -245,6 +247,7 @@ LOCAL_SRC_FILES:= \ rsAnimation.cpp \ rsComponent.cpp \ rsContext.cpp \ + rsClosure.cpp \ rsDevice.cpp \ rsElement.cpp \ rsFBOCache.cpp \ @@ -269,6 +272,7 @@ LOCAL_SRC_FILES:= \ rsScriptC_Lib.cpp \ rsScriptC_LibGL.cpp \ rsScriptGroup.cpp \ + rsScriptGroup2.cpp \ rsScriptIntrinsic.cpp \ rsSignal.cpp \ rsStream.cpp \ diff --git a/cpp/rsDispatch.h b/cpp/rsDispatch.h index 659591b0..53d72d4e 100644 --- a/cpp/rsDispatch.h +++ b/cpp/rsDispatch.h @@ -28,7 +28,9 @@ typedef void (*DeviceDestroyFnPtr) (RsDevice dev); typedef void (*DeviceSetConfigFnPtr) (RsDevice dev, RsDeviceParam p, int32_t value); typedef RsContext (*ContextCreateFnPtr)(RsDevice vdev, uint32_t version, uint32_t sdkVersion, RsContextType ct, uint32_t flags); typedef void (*GetNameFnPtr)(RsContext, void * obj, const char **name); - +typedef RsClosure (*ClosureCreateFnPtr)(RsContext, RsScriptKernelID, RsAllocation, RsScriptFieldID*, size_t, uintptr_t*, size_t, size_t*, size_t, RsClosure*, size_t, RsScriptFieldID*, size_t); +typedef void (*ClosureSetArgFnPtr)(RsContext, RsClosure, uint32_t, uintptr_t, size_t); +typedef void (*ClosureSetGlobalFnPtr)(RsContext, RsClosure, RsScriptFieldID, uintptr_t, size_t); typedef void (*ContextDestroyFnPtr) (RsContext); typedef RsMessageToClientType (*ContextGetMessageFnPtr) (RsContext, void*, size_t, size_t*, size_t, uint32_t*, size_t); typedef RsMessageToClientType (*ContextPeekMessageFnPtr) (RsContext, size_t*, size_t, uint32_t*, size_t); @@ -80,6 +82,7 @@ typedef RsScript (*ScriptIntrinsicCreateFnPtr) (RsContext, uint32_t id, RsElemen typedef RsScriptKernelID (*ScriptKernelIDCreateFnPtr) (RsContext, RsScript, int, int); typedef RsScriptFieldID (*ScriptFieldIDCreateFnPtr) (RsContext, RsScript, int); typedef RsScriptGroup (*ScriptGroupCreateFnPtr) (RsContext, RsScriptKernelID*, size_t, RsScriptKernelID*, size_t, RsScriptKernelID*, size_t, RsScriptFieldID*, size_t, const RsType*, size_t); +typedef RsScriptGroup2 (*ScriptGroup2CreateFnPtr)(RsContext, RsClosure*, size_t); typedef void (*ScriptGroupSetOutputFnPtr) (RsContext, RsScriptGroup, RsScriptKernelID, RsAllocation); typedef void (*ScriptGroupSetInputFnPtr) (RsContext, RsScriptGroup, RsScriptKernelID, RsAllocation); typedef void (*ScriptGroupExecuteFnPtr) (RsContext, RsScriptGroup); @@ -113,6 +116,9 @@ struct dispatchTable { AllocationCubeCreateFromBitmapFnPtr AllocationCubeCreateFromBitmap; AllocationGetSurfaceFnPtr AllocationGetSurface; AllocationSetSurfaceFnPtr AllocationSetSurface; + ClosureCreateFnPtr ClosureCreate; + ClosureSetArgFnPtr ClosureSetArg; + ClosureSetGlobalFnPtr ClosureSetGlobal; ContextFinishFnPtr ContextFinish; ContextDumpFnPtr ContextDump; ContextSetPriorityFnPtr ContextSetPriority; @@ -152,6 +158,7 @@ struct dispatchTable { ScriptKernelIDCreateFnPtr ScriptKernelIDCreate; ScriptFieldIDCreateFnPtr ScriptFieldIDCreate; ScriptGroupCreateFnPtr ScriptGroupCreate; + ScriptGroup2CreateFnPtr ScriptGroup2Create; ScriptGroupSetOutputFnPtr ScriptGroupSetOutput; ScriptGroupSetInputFnPtr ScriptGroupSetInput; ScriptGroupExecuteFnPtr ScriptGroupExecute; diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk index 5de964fc..27243f9f 100644 --- a/cpu_ref/Android.mk +++ b/cpu_ref/Android.mk @@ -29,6 +29,7 @@ LOCAL_SRC_FILES:= \ rsCpuRuntimeMath.cpp \ rsCpuRuntimeStubs.cpp \ rsCpuScriptGroup.cpp \ + rsCpuScriptGroup2.cpp \ rsCpuIntrinsic.cpp \ rsCpuIntrinsic3DLUT.cpp \ rsCpuIntrinsicBlend.cpp \ diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp index 47bc1c01..84c24169 100644 --- a/cpu_ref/rsCpuCore.cpp +++ b/cpu_ref/rsCpuCore.cpp @@ -17,6 +17,7 @@ #include "rsCpuCore.h" #include "rsCpuScript.h" #include "rsCpuScriptGroup.h" +#include "rsCpuScriptGroup2.h" #include <malloc.h> #include "rsContext.h" @@ -660,11 +661,19 @@ RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script * return i; } -RsdCpuReference::CpuScriptGroup * RsdCpuReferenceImpl::createScriptGroup(const ScriptGroup *sg) { - CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg); - if (!sgi->init()) { +void* RsdCpuReferenceImpl::createScriptGroup(const ScriptGroupBase *sg) { + switch (sg->getApiVersion()) { + case ScriptGroupBase::SG_V1: { + CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg); + if (!sgi->init()) { delete sgi; return nullptr; + } + return sgi; } - return sgi; + case ScriptGroupBase::SG_V2: { + return new CpuScriptGroup2Impl(this, sg); + } + } + return nullptr; } diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h index bfd5e512..e0696583 100644 --- a/cpu_ref/rsCpuCore.h +++ b/cpu_ref/rsCpuCore.h @@ -182,7 +182,7 @@ public: uint32_t flags); virtual CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e); - virtual CpuScriptGroup * createScriptGroup(const ScriptGroup *sg); + virtual void* createScriptGroup(const ScriptGroupBase *sg); const RsdCpuReference::CpuSymbol *symLookup(const char *); diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp index 751bafb8..3d32a512 100644 --- a/cpu_ref/rsCpuScriptGroup.cpp +++ b/cpu_ref/rsCpuScriptGroup.cpp @@ -18,15 +18,13 @@ #include "rsCpuScript.h" #include "rsScriptGroup.h" #include "rsCpuScriptGroup.h" -//#include "rsdBcc.h" -//#include "rsdAllocation.h" using namespace android; using namespace android::renderscript; -CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) { +CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) { mCtx = ctx; - mSG = sg; + mSG = (ScriptGroup*)sg; } CpuScriptGroupImpl::~CpuScriptGroupImpl() { diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h index 1a4af058..50ba2acc 100644 --- a/cpu_ref/rsCpuScriptGroup.h +++ b/cpu_ref/rsCpuScriptGroup.h @@ -30,7 +30,7 @@ public: virtual void execute(); virtual ~CpuScriptGroupImpl(); - CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg); + CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg); bool init(); static void scriptGroupRoot(const RsExpandKernelParams *p, diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp new file mode 100644 index 00000000..9dc4d900 --- /dev/null +++ b/cpu_ref/rsCpuScriptGroup2.cpp @@ -0,0 +1,192 @@ +#include "rsCpuScriptGroup2.h" + +#include "cpu_ref/rsCpuCore.h" +#include "rsClosure.h" +#include "rsContext.h" +#include "rsCpuCore.h" +#include "rsCpuScript.h" +#include "rsScript.h" +#include "rsScriptGroup2.h" + +namespace android { +namespace renderscript { + +namespace { + +static const size_t DefaultKernelArgCount = 2; + +void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, + uint32_t xend, uint32_t outstep) { + const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr; + RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; + const void **oldIns = kparams->ins; + uint32_t *oldStrides = kparams->inEStrides; + + std::vector<const void*> ins(DefaultKernelArgCount); + std::vector<uint32_t> strides(DefaultKernelArgCount); + + for (CPUClosure* cpuClosure : closures) { + const Closure* closure = cpuClosure->mClosure; + + auto in_iter = ins.begin(); + auto stride_iter = strides.begin(); + + for (const auto& arg : closure->mArgs) { + const Allocation* a = (const Allocation*)arg; + const uint32_t eStride = a->mHal.state.elementSizeBytes; + const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + + eStride * xstart; + if (kparams->dimY > 1) { + ptr += a->mHal.drvState.lod[0].stride * kparams->y; + } + *in_iter++ = ptr; + *stride_iter++ = eStride; + } + + mutable_kparams->ins = &ins[0]; + mutable_kparams->inEStrides = &strides[0]; + + const Allocation* out = closure->mReturnValue; + const uint32_t ostep = out->mHal.state.elementSizeBytes; + const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + + ostep * xstart; + if (kparams->dimY > 1) { + ptr += out->mHal.drvState.lod[0].stride * kparams->y; + } + + mutable_kparams->out = (void*)ptr; + + mutable_kparams->usr = cpuClosure->mUsrPtr; + + cpuClosure->mFunc(kparams, xstart, xend, ostep); + } + + mutable_kparams->ins = oldIns; + mutable_kparams->inEStrides = oldStrides; + mutable_kparams->usr = &closures; +} + +/* + Returns true if closure depends on any closure in batch via a glboal variable + TODO: this probably should go into class Closure. + */ +bool conflict(const list<CPUClosure*> &batch, CPUClosure* closure) { + for (const auto &p : closure->mClosure->mGlobalDeps) { + const Closure* dep = p.first; + for (CPUClosure* c : batch) { + if (c->mClosure == dep) { + return true; + } + } + } + for (const auto &p : closure->mClosure->mArgDeps) { + const Closure* dep = p.first; + for (CPUClosure* c : batch) { + if (c->mClosure == dep) { + for (const auto &p1 : *p.second) { + if (p1.second != nullptr) { + return true; + } + } + } + } + } + return false; +} + +} // namespace + +CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, + const ScriptGroupBase *sg) : + mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) { + list<CPUClosure*>* batch = new list<CPUClosure*>(); + for (Closure* closure: mGroup->mClosures) { + const ScriptKernelID* kernelID = closure->mKernelID.get(); + RsdCpuScriptImpl* si = + (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); + + MTLaunchStruct mtls; + si->forEachKernelSetup(kernelID->mSlot, &mtls); + // TODO: Is mtls.fep.usrLen ever used? + CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, + mtls.fep.usr, mtls.fep.usrLen); + if (conflict(*batch, cc)) { + mBatches.push_back(batch); + batch = new list<CPUClosure*>(); + } + batch->push_back(cc); + } + mBatches.push_back(batch); +} + +CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { + for (list<CPUClosure*>* batch : mBatches) { + for (CPUClosure* c : *batch) { + delete c; + } + } +} + +void CpuScriptGroup2Impl::execute() { + for (list<CPUClosure*>* batch : mBatches) { + setGlobalsForBatch(*batch); + runBatch(*batch); + } +} + +void CpuScriptGroup2Impl::setGlobalsForBatch(const list<CPUClosure*>& batch) { + for (CPUClosure* cpuClosure : batch) { + const Closure* closure = cpuClosure->mClosure; + const ScriptKernelID* kernelID = closure->mKernelID.get(); + Script* s = kernelID->mScript; + for (const auto& p : closure->mGlobals) { + const void* value = p.second.first; + int size = p.second.second; + // We use -1 size to indicate an ObjectBase rather than a primitive type + if (size < 0) { + s->setVarObj(p.first->mSlot, (ObjectBase*)value); + } else { + s->setVar(p.first->mSlot, (const void*)&value, size); + } + } + } +} + +void CpuScriptGroup2Impl::runBatch(const list<CPUClosure*>& batch) { + for (CPUClosure* cpuClosure : batch) { + const Closure* closure = cpuClosure->mClosure; + const ScriptKernelID* kernelID = closure->mKernelID.get(); + cpuClosure->mSi->preLaunch(kernelID->mSlot, + (const Allocation**)&closure->mArgs[0], + closure->mArgs.size(), closure->mReturnValue, + cpuClosure->mUsrPtr, cpuClosure->mUsrSize, + nullptr); + } + + const CPUClosure* cpuClosure = batch.front(); + const Closure* closure = cpuClosure->mClosure; + MTLaunchStruct mtls; + + cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0], + closure->mArgs.size(), + closure->mReturnValue, + nullptr, 0, nullptr, &mtls); + + mtls.script = nullptr; + mtls.kernel = (void (*)())&groupRoot; + mtls.fep.usr = &batch; + + mCpuRefImpl->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); + + for (CPUClosure* cpuClosure : batch) { + const Closure* closure = cpuClosure->mClosure; + const ScriptKernelID* kernelID = closure->mKernelID.get(); + cpuClosure->mSi->postLaunch(kernelID->mSlot, + (const Allocation**)&closure->mArgs[0], + closure->mArgs.size(), closure->mReturnValue, + nullptr, 0, nullptr); + } +} + +} // namespace renderscript +} // namespace android diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h new file mode 100644 index 00000000..6cb72a65 --- /dev/null +++ b/cpu_ref/rsCpuScriptGroup2.h @@ -0,0 +1,60 @@ +#ifndef CPU_REF_CPUSCRIPTGROUP2IMPL_H_ +#define CPU_REF_CPUSCRIPTGROUP2IMPL_H_ + +#include <list> + +#include "rsd_cpu.h" + +using std::list; + +namespace android { +namespace renderscript { + +class Closure; +class RsdCpuScriptImpl; +class RsdCpuReferenceImpl; +class ScriptGroup2; + +struct RsExpandKernelParams; + +typedef void (*ExpandFuncTy)(const RsExpandKernelParams*, uint32_t, uint32_t, + uint32_t); + +class CPUClosure { + public: + CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func, + const void* usrPtr, const size_t usrSize) : + mClosure(closure), mSi(si), mFunc(func), mUsrPtr(usrPtr), + mUsrSize(usrSize) {} + + // It's important to do forwarding here than inheritance for unbound value + // binding to work. + const Closure* mClosure; + RsdCpuScriptImpl* mSi; + const ExpandFuncTy mFunc; + const void* mUsrPtr; + const size_t mUsrSize; +}; + +class CpuScriptGroup2Impl : public RsdCpuReference::CpuScriptGroup2 { + public: + CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase* group); + virtual ~CpuScriptGroup2Impl(); + + bool init(); + virtual void execute(); + + private: + void setGlobalsForBatch(const list<CPUClosure*>& batch); + void runBatch(const list<CPUClosure*>& batch); + + RsdCpuReferenceImpl* mCpuRefImpl; + const ScriptGroup2* mGroup; + + list<list<CPUClosure*>*> mBatches; +}; + +} // namespace renderscript +} // namespace android + +#endif // CPU_REF_CPUSCRIPTGROUP2IMPL_H_ diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h index b0e924e3..d886cef0 100644 --- a/cpu_ref/rsd_cpu.h +++ b/cpu_ref/rsd_cpu.h @@ -45,7 +45,7 @@ namespace renderscript { class ScriptC; class Script; -class ScriptGroup; +class ScriptGroupBase; class ScriptKernelID; @@ -97,7 +97,13 @@ public: }; typedef CpuScript * (* script_lookup_t)(Context *, const Script *s); - class CpuScriptGroup { + class CpuScriptGroupBase { + public: + virtual void execute() = 0; + virtual ~CpuScriptGroupBase() {} + }; + + class CpuScriptGroup : public CpuScriptGroupBase { public: virtual void setInput(const ScriptKernelID *kid, Allocation *) = 0; virtual void setOutput(const ScriptKernelID *kid, Allocation *) = 0; @@ -105,6 +111,12 @@ public: virtual ~CpuScriptGroup() {}; }; + class CpuScriptGroup2 : public CpuScriptGroupBase { + public: + virtual void execute() = 0; + virtual ~CpuScriptGroup2() {} + }; + static Context * getTlsContext(); static const Script * getTlsScript(); static pthread_key_t getThreadTLSKey(); @@ -124,7 +136,7 @@ public: uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) = 0; virtual CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) = 0; - virtual CpuScriptGroup * createScriptGroup(const ScriptGroup *sg) = 0; + virtual void* createScriptGroup(const ScriptGroupBase *sg) = 0; virtual bool getInForEach() = 0; #ifndef RS_COMPATIBILITY_LIB diff --git a/driver/rsdScriptGroup.cpp b/driver/rsdScriptGroup.cpp index a7b2e774..ed800a36 100644 --- a/driver/rsdScriptGroup.cpp +++ b/driver/rsdScriptGroup.cpp @@ -28,7 +28,7 @@ using namespace android; using namespace android::renderscript; -bool rsdScriptGroupInit(const Context *rsc, ScriptGroup *sg) { +bool rsdScriptGroupInit(const Context *rsc, ScriptGroupBase *sg) { RsdHal *dc = (RsdHal *)rsc->mHal.drv; sg->mHal.drv = dc->mCpuRef->createScriptGroup(sg); @@ -43,13 +43,15 @@ void rsdScriptGroupSetOutput(const Context *rsc, const ScriptGroup *sg, const ScriptKernelID *kid, Allocation *) { } -void rsdScriptGroupExecute(const Context *rsc, const ScriptGroup *sg) { - RsdCpuReference::CpuScriptGroup *sgi = (RsdCpuReference::CpuScriptGroup *)sg->mHal.drv; +void rsdScriptGroupExecute(const Context *rsc, const ScriptGroupBase *sg) { + RsdCpuReference::CpuScriptGroupBase *sgi = + (RsdCpuReference::CpuScriptGroupBase *)sg->mHal.drv; sgi->execute(); } -void rsdScriptGroupDestroy(const Context *rsc, const ScriptGroup *sg) { - RsdCpuReference::CpuScriptGroup *sgi = (RsdCpuReference::CpuScriptGroup *)sg->mHal.drv; +void rsdScriptGroupDestroy(const Context *rsc, const ScriptGroupBase *sg) { + RsdCpuReference::CpuScriptGroupBase *sgi = + (RsdCpuReference::CpuScriptGroupBase *)sg->mHal.drv; delete sgi; } @@ -68,5 +70,3 @@ void rsdScriptGroupUpdateCachedObject(const Context *rsc, obj->v2 = nullptr; #endif } - - diff --git a/driver/rsdScriptGroup.h b/driver/rsdScriptGroup.h index db44e231..95e5d19b 100644 --- a/driver/rsdScriptGroup.h +++ b/driver/rsdScriptGroup.h @@ -20,7 +20,7 @@ #include <rs_hal.h> bool rsdScriptGroupInit(const android::renderscript::Context *rsc, - android::renderscript::ScriptGroup *sg); + android::renderscript::ScriptGroupBase *sg); void rsdScriptGroupSetInput(const android::renderscript::Context *rsc, const android::renderscript::ScriptGroup *sg, const android::renderscript::ScriptKernelID *kid, @@ -30,9 +30,9 @@ void rsdScriptGroupSetOutput(const android::renderscript::Context *rsc, const android::renderscript::ScriptKernelID *kid, android::renderscript::Allocation *); void rsdScriptGroupExecute(const android::renderscript::Context *rsc, - const android::renderscript::ScriptGroup *sg); + const android::renderscript::ScriptGroupBase *sg); void rsdScriptGroupDestroy(const android::renderscript::Context *rsc, - const android::renderscript::ScriptGroup *sg); + const android::renderscript::ScriptGroupBase *sg); void rsdScriptGroupUpdateCachedObject(const android::renderscript::Context *rsc, const android::renderscript::ScriptGroup *sg, android::renderscript::rs_script_group *obj); @@ -252,6 +252,31 @@ AllocationCopy3DRange { param uint32_t srcMip } +ClosureCreate { + direct + param RsScriptKernelID kernelID + param RsAllocation returnValue + param RsScriptFieldID * fieldIDs + param uintptr_t * values + param size_t * sizes + param RsClosure * depClosures + param RsScriptFieldID * depFieldIDs + ret RsClosure + } + +ClosureSetArg { + param RsClosure closureID + param uint32_t index + param uintptr_t value + param size_t valueSize +} + +ClosureSetGlobal { + param RsClosure closureID + param RsScriptFieldID fieldID + param uintptr_t value + param size_t valueSize +} SamplerCreate { direct @@ -410,6 +435,12 @@ ScriptGroupExecute { param RsScriptGroup group } +ScriptGroup2Create{ + direct + param RsClosure * closures + ret RsScriptGroup2 +} + AllocationIoSend { param RsAllocation alloc } diff --git a/rsClosure.cpp b/rsClosure.cpp new file mode 100644 index 00000000..8530fc16 --- /dev/null +++ b/rsClosure.cpp @@ -0,0 +1,147 @@ +#include "rsClosure.h" + +#include "cpu_ref/rsCpuCore.h" +#include "rsContext.h" // XXX: necessary to avoid compiler error on rsScript.h below +#include "rsScript.h" +#include "rsType.h" + +namespace android { +namespace renderscript { + +RsClosure rsi_ClosureCreate(Context* context, RsScriptKernelID kernelID, + RsAllocation returnValue, + RsScriptFieldID* fieldIDs, size_t fieldIDs_length, + uintptr_t* values, size_t values_length, + size_t* sizes, size_t sizes_length, + RsClosure* depClosures, size_t depClosures_length, + RsScriptFieldID* depFieldIDs, + size_t depFieldIDs_length) { + rsAssert(fieldIDs_length == values_length && values_length == sizes_length && + sizes_length == depClosures_length && + depClosures_length == depFieldIDs_length); + + return (RsClosure)(new Closure( + context, (const ScriptKernelID*)kernelID, (Allocation*)returnValue, + fieldIDs_length, (const ScriptFieldID**)fieldIDs, (const void**)values, + sizes, (const Closure**)depClosures, + (const ScriptFieldID**)depFieldIDs)); +} + +void rsi_ClosureEval(Context* rsc, RsClosure closure) { + ((Closure*)closure)->eval(); +} + +void rsi_ClosureSetArg(Context* rsc, RsClosure closure, uint32_t index, + uintptr_t value, size_t size) { + ((Closure*)closure)->setArg(index, (const void*)value, size); +} + +void rsi_ClosureSetGlobal(Context* rsc, RsClosure closure, + RsScriptFieldID fieldID, uintptr_t value, + size_t size) { + ((Closure*)closure)->setGlobal((const ScriptFieldID*)fieldID, + (const void*)value, size); +} + +Closure::Closure(Context* context, + const ScriptKernelID* kernelID, + Allocation* returnValue, + const int numValues, + const ScriptFieldID** fieldIDs, + const void** values, + const size_t* sizes, + const Closure** depClosures, + const ScriptFieldID** depFieldIDs) : + ObjectBase(context), mContext(context), mKernelID((ScriptKernelID*)kernelID), + mReturnValue(returnValue) { + size_t i; + + for (i = 0; i < (size_t)numValues && fieldIDs[i] == nullptr; i++); + + vector<const void*> args(values, values + i); + mArgs.swap(args); + + for (; i < (size_t)numValues; i++) { + mGlobals[fieldIDs[i]] = std::make_pair(values[i], sizes[i]); + } + + mDependences.insert(depClosures, depClosures + numValues); + + for (i = 0; i < mArgs.size(); i++) { + const Closure* dep = depClosures[i]; + if (dep != nullptr) { + auto mapping = mArgDeps[dep]; + if (mapping == nullptr) { + mapping = new map<int, const ObjectBaseRef<ScriptFieldID>*>(); + mArgDeps[dep] = mapping; + } + (*mapping)[i] = new ObjectBaseRef<ScriptFieldID>( + const_cast<ScriptFieldID*>(depFieldIDs[i])); + } + } + + for (; i < (size_t)numValues; i++) { + const Closure* dep = depClosures[i]; + if (dep != nullptr) { + auto mapping = mGlobalDeps[dep]; + if (mapping == nullptr) { + mapping = new map<const ObjectBaseRef<ScriptFieldID>*, + const ObjectBaseRef<ScriptFieldID>*>(); + mGlobalDeps[dep] = mapping; + } + (*mapping)[new ObjectBaseRef<ScriptFieldID>( + const_cast<ScriptFieldID*>(fieldIDs[i]))] = + new ObjectBaseRef<ScriptFieldID>( + const_cast<ScriptFieldID*>(depFieldIDs[i])); + } + } +} + +Closure::~Closure() { + for (const auto& p : mArgDeps) { + auto map = p.second; + for (const auto& p1 : *map) { + delete p1.second; + } + delete p.second; + } + + for (const auto& p : mGlobalDeps) { + auto map = p.second; + for (const auto& p1 : *map) { + delete p1.first; + delete p1.second; + } + delete p.second; + } +} + +void Closure::eval() { + Script *s = mKernelID->mScript; + + for (const auto& p : mGlobals) { + const void* value = p.second.first; + int size = p.second.second; + // We use -1 size to indicate an ObjectBase rather than a primitive type + if (size < 0) { + s->setVarObj(p.first->mSlot, (ObjectBase*)value); + } else { + s->setVar(p.first->mSlot, (const void*)&value, size); + } + } + + s->runForEach(mContext, mKernelID->mSlot, (const Allocation **)(&mArgs[0]), + mArgs.size(), mReturnValue, nullptr, 0, nullptr); +} + +void Closure::setArg(const uint32_t index, const void* value, const size_t size) { + mArgs[index] = value; +} + +void Closure::setGlobal(const ScriptFieldID* fieldID, const void* value, + const size_t size) { + mGlobals[fieldID] = std::make_pair(value, size); +} + +} // namespace renderscript +} // namespace android diff --git a/rsClosure.h b/rsClosure.h new file mode 100644 index 00000000..372cd327 --- /dev/null +++ b/rsClosure.h @@ -0,0 +1,78 @@ +#ifndef ANDROID_RENDERSCRIPT_CLOSURE_H_ +#define ANDROID_RENDERSCRIPT_CLOSURE_H_ + +#include <map> +#include <set> +#include <vector> + +#include "rsDefines.h" +#include "rsObjectBase.h" + +namespace android { +namespace renderscript { + +using std::map; +using std::pair; +using std::set; +using std::vector; + +class Allocation; +class Context; +class ScriptFieldID; +class ScriptKernelID; +class Type; + +class Closure : public ObjectBase { + public: + Closure(Context* context, + const ScriptKernelID* kernelID, + Allocation* returnValue, + const int numValues, + const ScriptFieldID** fieldIDs, + const void** values, // Allocations or primitive (numeric) types + const size_t* sizes, // size for data type. -1 indicates an allocation. + const Closure** depClosures, + const ScriptFieldID** depFieldIDs); + + virtual ~Closure(); + + virtual void serialize(Context *rsc, OStream *stream) const {} + + virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_CLOSURE; } + + void eval(); + + void setArg(const uint32_t index, const void* value, const size_t size); + void setGlobal(const ScriptFieldID* fieldID, const void* value, + const size_t size); + + Context* mContext; + const ObjectBaseRef<ScriptKernelID> mKernelID; + + // Values referrenced in arguments and globals cannot be futures. They must be + // either a known value or unbound value. + // For now, all arguments should be Allocations. + vector<const void*> mArgs; + + // A global could be allocation or any primitive data type. + map<const ScriptFieldID*, pair<const void*, int>> mGlobals; + + Allocation* mReturnValue; + + // All the other closures that this closure depends on + set<const Closure*> mDependences; + + // All the other closures which this closure depends on for one of its + // arguments, and the fields which it depends on. + map<const Closure*, map<int, const ObjectBaseRef<ScriptFieldID>*>*> mArgDeps; + + // All the other closures that this closure depends on for one of its fields, + // and the fields that it depends on. + map<const Closure*, map<const ObjectBaseRef<ScriptFieldID>*, + const ObjectBaseRef<ScriptFieldID>*>*> mGlobalDeps; +}; + +} // namespace renderscript +} // namespace android + +#endif // ANDROID_RENDERSCRIPT_CLOSURE_H_ diff --git a/rsDefines.h b/rsDefines.h index 0a91ea8d..9345eb9b 100644 --- a/rsDefines.h +++ b/rsDefines.h @@ -33,6 +33,7 @@ typedef void * RsAdapter1D; typedef void * RsAdapter2D; typedef void * RsAllocation; typedef void * RsAnimation; +typedef void * RsClosure; typedef void * RsContext; typedef void * RsDevice; typedef void * RsElement; @@ -44,6 +45,7 @@ typedef void * RsScriptKernelID; typedef void * RsScriptFieldID; typedef void * RsScriptMethodID; typedef void * RsScriptGroup; +typedef void * RsScriptGroup2; typedef void * RsMesh; typedef void * RsPath; typedef void * RsType; @@ -251,7 +253,3 @@ enum RsContextFlags { #endif #endif // RENDER_SCRIPT_DEFINES_H - - - - diff --git a/rsFileA3D.cpp b/rsFileA3D.cpp index 6f146370..3fe69422 100644 --- a/rsFileA3D.cpp +++ b/rsFileA3D.cpp @@ -294,6 +294,10 @@ ObjectBase *FileA3D::initializeFromEntry(size_t index) { break; case RS_A3D_CLASS_ID_SCRIPT_GROUP: break; + case RS_A3D_CLASS_ID_CLOSURE: + break; + case RS_A3D_CLASS_ID_SCRIPT_GROUP2: + break; } if (entry->mRsObj) { entry->mRsObj->incUserRef(); diff --git a/rsInternalDefines.h b/rsInternalDefines.h index 19f21226..57cb72a9 100644 --- a/rsInternalDefines.h +++ b/rsInternalDefines.h @@ -163,7 +163,9 @@ enum RsA3DClassID { RS_A3D_CLASS_ID_SCRIPT_KERNEL_ID, RS_A3D_CLASS_ID_SCRIPT_FIELD_ID, RS_A3D_CLASS_ID_SCRIPT_METHOD_ID, - RS_A3D_CLASS_ID_SCRIPT_GROUP + RS_A3D_CLASS_ID_SCRIPT_GROUP, + RS_A3D_CLASS_ID_CLOSURE, + RS_A3D_CLASS_ID_SCRIPT_GROUP2 }; enum RsCullMode { @@ -202,7 +204,3 @@ typedef struct { #endif #endif // RENDER_SCRIPT_DEFINES_H - - - - diff --git a/rsScriptGroup.cpp b/rsScriptGroup.cpp index 618c28c0..791ab147 100644 --- a/rsScriptGroup.cpp +++ b/rsScriptGroup.cpp @@ -14,15 +14,18 @@ * limitations under the License. */ -#include <algorithm> +#include "rsScriptGroup.h" #include "rsContext.h" +#include "rsScriptGroup2.h" + +#include <algorithm> #include <time.h> using namespace android; using namespace android::renderscript; -ScriptGroup::ScriptGroup(Context *rsc) : ObjectBase(rsc) { +ScriptGroup::ScriptGroup(Context *rsc) : ScriptGroupBase(rsc) { } ScriptGroup::~ScriptGroup() { @@ -270,12 +273,10 @@ bool ScriptGroup::validateInputAndOutput(Context *rsc) { } void ScriptGroup::execute(Context *rsc) { - if (!validateInputAndOutput(rsc)) { return; } - //ALOGE("ScriptGroup::execute"); if (rsc->mHal.funcs.scriptgroup.execute) { rsc->mHal.funcs.scriptgroup.execute(rsc, this); return; @@ -324,13 +325,6 @@ void ScriptGroup::execute(Context *rsc) { } -void ScriptGroup::serialize(Context *rsc, OStream *stream) const { -} - -RsA3DClassID ScriptGroup::getClassId() const { - return RS_A3D_CLASS_ID_SCRIPT_GROUP; -} - ScriptGroup::Link::Link() { } @@ -371,7 +365,7 @@ void rsi_ScriptGroupSetOutput(Context *rsc, RsScriptGroup sg, RsScriptKernelID k } void rsi_ScriptGroupExecute(Context *rsc, RsScriptGroup sg) { - ScriptGroup *s = (ScriptGroup *)sg; + ScriptGroupBase *s = (ScriptGroupBase *)sg; s->execute(rsc); } diff --git a/rsScriptGroup.h b/rsScriptGroup.h index 974e3ba5..ff0259a4 100644 --- a/rsScriptGroup.h +++ b/rsScriptGroup.h @@ -17,21 +17,30 @@ #ifndef ANDROID_RS_SCRIPT_GROUP_H #define ANDROID_RS_SCRIPT_GROUP_H -#include "rsAllocation.h" -#include "rsScript.h" +#include "rsScriptGroupBase.h" +#include <vector> // --------------------------------------------------------------------------- namespace android { namespace renderscript { +class Allocation; +class Context; class ProgramVertex; class ProgramFragment; class ProgramRaster; class ProgramStore; +class Script; +class ScriptFieldID; +class ScriptKernelID; +class Type; -class ScriptGroup : public ObjectBase { +class ScriptGroup : public ScriptGroupBase { public: + virtual SG_API_Version getApiVersion() const { return SG_V1; } + virtual void execute(Context *rsc); + std::vector<ObjectBaseRef<ScriptKernelID> > mKernels; class Link { @@ -70,15 +79,6 @@ public: std::vector<IO *> mInputs; std::vector<IO *> mOutputs; - struct Hal { - void * drv; - - struct DriverInfo { - }; - DriverInfo info; - }; - Hal mHal; - static ScriptGroup * create(Context *rsc, ScriptKernelID ** kernels, size_t kernelsSize, ScriptKernelID ** src, size_t srcSize, @@ -86,14 +86,9 @@ public: ScriptFieldID ** dstF, size_t dstFSize, const Type ** type, size_t typeSize); - virtual void serialize(Context *rsc, OStream *stream) const; - virtual RsA3DClassID getClassId() const; - - void execute(Context *rsc); void setInput(Context *rsc, ScriptKernelID *kid, Allocation *a); void setOutput(Context *rsc, ScriptKernelID *kid, Allocation *a); - protected: virtual ~ScriptGroup(); bool mInitialized; diff --git a/rsScriptGroup2.cpp b/rsScriptGroup2.cpp new file mode 100644 index 00000000..06a252aa --- /dev/null +++ b/rsScriptGroup2.cpp @@ -0,0 +1,27 @@ +#include "rsScriptGroup2.h" + +#include "rsContext.h" + +namespace android { +namespace renderscript { + +void ScriptGroup2::execute(Context* rsc) { + if (rsc->mHal.funcs.scriptgroup.execute) { + rsc->mHal.funcs.scriptgroup.execute(rsc, this); + } +} + +RsScriptGroup2 rsi_ScriptGroup2Create(Context* rsc, RsClosure* closures, + size_t numClosures) { + ScriptGroup2* group = new ScriptGroup2(rsc, (Closure**)closures, numClosures); + + // Create a device-specific implementation by calling the device driver + if (rsc->mHal.funcs.scriptgroup.init) { + rsc->mHal.funcs.scriptgroup.init(rsc, group); + } + + return group; +} + +} // namespace renderscript +} // namespace android diff --git a/rsScriptGroup2.h b/rsScriptGroup2.h new file mode 100644 index 00000000..c759faff --- /dev/null +++ b/rsScriptGroup2.h @@ -0,0 +1,36 @@ +#ifndef ANDROID_RENDERSCRIPT_SCRIPTGROUP2_H_ +#define ANDROID_RENDERSCRIPT_SCRIPTGROUP2_H_ + +#include "rsScriptGroupBase.h" + +#include <list> + +namespace android { +namespace renderscript { + +class Closure; +class Context; + +class ScriptGroup2 : public ScriptGroupBase { + public: + /* + TODO: + Inputs and outputs are set and retrieved in Java runtime. + They are opaque in the C++ runtime. + For better compiler optimizations (of a script group), we need to include + input and output information in the C++ runtime. + */ + ScriptGroup2(Context* rsc, Closure** closures, size_t numClosures) : + ScriptGroupBase(rsc), mClosures(closures, closures + numClosures) {} + virtual ~ScriptGroup2() {} + + virtual SG_API_Version getApiVersion() const { return SG_V2; } + virtual void execute(Context* rsc); + + std::list<Closure*> mClosures; +}; + +} // namespace renderscript +} // namespace android + +#endif // ANDROID_RENDERSCRIPT_SCRIPTGROUP2_H_ diff --git a/rsScriptGroupBase.h b/rsScriptGroupBase.h new file mode 100644 index 00000000..00ae6c6d --- /dev/null +++ b/rsScriptGroupBase.h @@ -0,0 +1,41 @@ +#ifndef ANDROID_RS_SCRIPT_GROUP_BASE_H +#define ANDROID_RS_SCRIPT_GROUP_BASE_H + +#include "rsObjectBase.h" + +namespace android { +namespace renderscript { + +class ScriptGroupBase : public ObjectBase { + public: + ScriptGroupBase(Context* rsc) : ObjectBase(rsc) {} + virtual ~ScriptGroupBase() {} + + virtual void serialize(Context *rsc, OStream *stream) const {} + + virtual RsA3DClassID getClassId() const { + return RS_A3D_CLASS_ID_SCRIPT_GROUP; + } + + enum SG_API_Version { + SG_V1 = 10, + SG_V2 = 20, + }; + + virtual void execute(Context *rsc) = 0; + virtual SG_API_Version getApiVersion() const = 0; + + struct Hal { + void * drv; + + struct DriverInfo { + }; + DriverInfo info; + }; + Hal mHal; +}; + +} // namespace renderscript +} // namespace android + +#endif // ANDROID_RS_SCRIPT_GROUP_BASE_H @@ -35,6 +35,7 @@ class ScriptFieldID; class ScriptMethodID; class ScriptC; class ScriptGroup; +class ScriptGroupBase; class Path; class Program; class ProgramStore; @@ -300,13 +301,13 @@ typedef struct { } framebuffer; struct { - bool (*init)(const Context *rsc, ScriptGroup *sg); + bool (*init)(const Context *rsc, ScriptGroupBase *sg); void (*setInput)(const Context *rsc, const ScriptGroup *sg, const ScriptKernelID *kid, Allocation *); void (*setOutput)(const Context *rsc, const ScriptGroup *sg, const ScriptKernelID *kid, Allocation *); - void (*execute)(const Context *rsc, const ScriptGroup *sg); - void (*destroy)(const Context *rsc, const ScriptGroup *sg); + void (*execute)(const Context *rsc, const ScriptGroupBase *sg); + void (*destroy)(const Context *rsc, const ScriptGroupBase *sg); void (*updateCachedObject)(const Context *rsc, const ScriptGroup *sg, rs_script_group *obj); } scriptgroup; |