diff options
Diffstat (limited to 'cpu_ref')
-rw-r--r-- | cpu_ref/Android.mk | 1 | ||||
-rw-r--r-- | cpu_ref/rsCpuCore.cpp | 17 | ||||
-rw-r--r-- | cpu_ref/rsCpuCore.h | 2 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup.cpp | 6 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup.h | 2 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup2.cpp | 192 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup2.h | 60 | ||||
-rw-r--r-- | cpu_ref/rsd_cpu.h | 18 |
8 files changed, 285 insertions, 13 deletions
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk index 5de964fc..27243f9f 100644 --- a/cpu_ref/Android.mk +++ b/cpu_ref/Android.mk @@ -29,6 +29,7 @@ LOCAL_SRC_FILES:= \ rsCpuRuntimeMath.cpp \ rsCpuRuntimeStubs.cpp \ rsCpuScriptGroup.cpp \ + rsCpuScriptGroup2.cpp \ rsCpuIntrinsic.cpp \ rsCpuIntrinsic3DLUT.cpp \ rsCpuIntrinsicBlend.cpp \ diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp index 47bc1c01..84c24169 100644 --- a/cpu_ref/rsCpuCore.cpp +++ b/cpu_ref/rsCpuCore.cpp @@ -17,6 +17,7 @@ #include "rsCpuCore.h" #include "rsCpuScript.h" #include "rsCpuScriptGroup.h" +#include "rsCpuScriptGroup2.h" #include <malloc.h> #include "rsContext.h" @@ -660,11 +661,19 @@ RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script * return i; } -RsdCpuReference::CpuScriptGroup * RsdCpuReferenceImpl::createScriptGroup(const ScriptGroup *sg) { - CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg); - if (!sgi->init()) { +void* RsdCpuReferenceImpl::createScriptGroup(const ScriptGroupBase *sg) { + switch (sg->getApiVersion()) { + case ScriptGroupBase::SG_V1: { + CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg); + if (!sgi->init()) { delete sgi; return nullptr; + } + return sgi; } - return sgi; + case ScriptGroupBase::SG_V2: { + return new CpuScriptGroup2Impl(this, sg); + } + } + return nullptr; } diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h index bfd5e512..e0696583 100644 --- a/cpu_ref/rsCpuCore.h +++ b/cpu_ref/rsCpuCore.h @@ -182,7 +182,7 @@ public: uint32_t flags); virtual CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e); - virtual CpuScriptGroup * createScriptGroup(const ScriptGroup *sg); + virtual void* createScriptGroup(const ScriptGroupBase *sg); const RsdCpuReference::CpuSymbol *symLookup(const char *); diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp index 751bafb8..3d32a512 100644 --- a/cpu_ref/rsCpuScriptGroup.cpp +++ b/cpu_ref/rsCpuScriptGroup.cpp @@ -18,15 +18,13 @@ #include "rsCpuScript.h" #include "rsScriptGroup.h" #include "rsCpuScriptGroup.h" -//#include "rsdBcc.h" -//#include "rsdAllocation.h" using namespace android; using namespace android::renderscript; -CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) { +CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) { mCtx = ctx; - mSG = sg; + mSG = (ScriptGroup*)sg; } CpuScriptGroupImpl::~CpuScriptGroupImpl() { diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h index 1a4af058..50ba2acc 100644 --- a/cpu_ref/rsCpuScriptGroup.h +++ b/cpu_ref/rsCpuScriptGroup.h @@ -30,7 +30,7 @@ public: virtual void execute(); virtual ~CpuScriptGroupImpl(); - CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg); + CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg); bool init(); static void scriptGroupRoot(const RsExpandKernelParams *p, diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp new file mode 100644 index 00000000..9dc4d900 --- /dev/null +++ b/cpu_ref/rsCpuScriptGroup2.cpp @@ -0,0 +1,192 @@ +#include "rsCpuScriptGroup2.h" + +#include "cpu_ref/rsCpuCore.h" +#include "rsClosure.h" +#include "rsContext.h" +#include "rsCpuCore.h" +#include "rsCpuScript.h" +#include "rsScript.h" +#include "rsScriptGroup2.h" + +namespace android { +namespace renderscript { + +namespace { + +static const size_t DefaultKernelArgCount = 2; + +void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, + uint32_t xend, uint32_t outstep) { + const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr; + RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; + const void **oldIns = kparams->ins; + uint32_t *oldStrides = kparams->inEStrides; + + std::vector<const void*> ins(DefaultKernelArgCount); + std::vector<uint32_t> strides(DefaultKernelArgCount); + + for (CPUClosure* cpuClosure : closures) { + const Closure* closure = cpuClosure->mClosure; + + auto in_iter = ins.begin(); + auto stride_iter = strides.begin(); + + for (const auto& arg : closure->mArgs) { + const Allocation* a = (const Allocation*)arg; + const uint32_t eStride = a->mHal.state.elementSizeBytes; + const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + + eStride * xstart; + if (kparams->dimY > 1) { + ptr += a->mHal.drvState.lod[0].stride * kparams->y; + } + *in_iter++ = ptr; + *stride_iter++ = eStride; + } + + mutable_kparams->ins = &ins[0]; + mutable_kparams->inEStrides = &strides[0]; + + const Allocation* out = closure->mReturnValue; + const uint32_t ostep = out->mHal.state.elementSizeBytes; + const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + + ostep * xstart; + if (kparams->dimY > 1) { + ptr += out->mHal.drvState.lod[0].stride * kparams->y; + } + + mutable_kparams->out = (void*)ptr; + + mutable_kparams->usr = cpuClosure->mUsrPtr; + + cpuClosure->mFunc(kparams, xstart, xend, ostep); + } + + mutable_kparams->ins = oldIns; + mutable_kparams->inEStrides = oldStrides; + mutable_kparams->usr = &closures; +} + +/* + Returns true if closure depends on any closure in batch via a glboal variable + TODO: this probably should go into class Closure. + */ +bool conflict(const list<CPUClosure*> &batch, CPUClosure* closure) { + for (const auto &p : closure->mClosure->mGlobalDeps) { + const Closure* dep = p.first; + for (CPUClosure* c : batch) { + if (c->mClosure == dep) { + return true; + } + } + } + for (const auto &p : closure->mClosure->mArgDeps) { + const Closure* dep = p.first; + for (CPUClosure* c : batch) { + if (c->mClosure == dep) { + for (const auto &p1 : *p.second) { + if (p1.second != nullptr) { + return true; + } + } + } + } + } + return false; +} + +} // namespace + +CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, + const ScriptGroupBase *sg) : + mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) { + list<CPUClosure*>* batch = new list<CPUClosure*>(); + for (Closure* closure: mGroup->mClosures) { + const ScriptKernelID* kernelID = closure->mKernelID.get(); + RsdCpuScriptImpl* si = + (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); + + MTLaunchStruct mtls; + si->forEachKernelSetup(kernelID->mSlot, &mtls); + // TODO: Is mtls.fep.usrLen ever used? + CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, + mtls.fep.usr, mtls.fep.usrLen); + if (conflict(*batch, cc)) { + mBatches.push_back(batch); + batch = new list<CPUClosure*>(); + } + batch->push_back(cc); + } + mBatches.push_back(batch); +} + +CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { + for (list<CPUClosure*>* batch : mBatches) { + for (CPUClosure* c : *batch) { + delete c; + } + } +} + +void CpuScriptGroup2Impl::execute() { + for (list<CPUClosure*>* batch : mBatches) { + setGlobalsForBatch(*batch); + runBatch(*batch); + } +} + +void CpuScriptGroup2Impl::setGlobalsForBatch(const list<CPUClosure*>& batch) { + for (CPUClosure* cpuClosure : batch) { + const Closure* closure = cpuClosure->mClosure; + const ScriptKernelID* kernelID = closure->mKernelID.get(); + Script* s = kernelID->mScript; + for (const auto& p : closure->mGlobals) { + const void* value = p.second.first; + int size = p.second.second; + // We use -1 size to indicate an ObjectBase rather than a primitive type + if (size < 0) { + s->setVarObj(p.first->mSlot, (ObjectBase*)value); + } else { + s->setVar(p.first->mSlot, (const void*)&value, size); + } + } + } +} + +void CpuScriptGroup2Impl::runBatch(const list<CPUClosure*>& batch) { + for (CPUClosure* cpuClosure : batch) { + const Closure* closure = cpuClosure->mClosure; + const ScriptKernelID* kernelID = closure->mKernelID.get(); + cpuClosure->mSi->preLaunch(kernelID->mSlot, + (const Allocation**)&closure->mArgs[0], + closure->mArgs.size(), closure->mReturnValue, + cpuClosure->mUsrPtr, cpuClosure->mUsrSize, + nullptr); + } + + const CPUClosure* cpuClosure = batch.front(); + const Closure* closure = cpuClosure->mClosure; + MTLaunchStruct mtls; + + cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0], + closure->mArgs.size(), + closure->mReturnValue, + nullptr, 0, nullptr, &mtls); + + mtls.script = nullptr; + mtls.kernel = (void (*)())&groupRoot; + mtls.fep.usr = &batch; + + mCpuRefImpl->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); + + for (CPUClosure* cpuClosure : batch) { + const Closure* closure = cpuClosure->mClosure; + const ScriptKernelID* kernelID = closure->mKernelID.get(); + cpuClosure->mSi->postLaunch(kernelID->mSlot, + (const Allocation**)&closure->mArgs[0], + closure->mArgs.size(), closure->mReturnValue, + nullptr, 0, nullptr); + } +} + +} // namespace renderscript +} // namespace android diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h new file mode 100644 index 00000000..6cb72a65 --- /dev/null +++ b/cpu_ref/rsCpuScriptGroup2.h @@ -0,0 +1,60 @@ +#ifndef CPU_REF_CPUSCRIPTGROUP2IMPL_H_ +#define CPU_REF_CPUSCRIPTGROUP2IMPL_H_ + +#include <list> + +#include "rsd_cpu.h" + +using std::list; + +namespace android { +namespace renderscript { + +class Closure; +class RsdCpuScriptImpl; +class RsdCpuReferenceImpl; +class ScriptGroup2; + +struct RsExpandKernelParams; + +typedef void (*ExpandFuncTy)(const RsExpandKernelParams*, uint32_t, uint32_t, + uint32_t); + +class CPUClosure { + public: + CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func, + const void* usrPtr, const size_t usrSize) : + mClosure(closure), mSi(si), mFunc(func), mUsrPtr(usrPtr), + mUsrSize(usrSize) {} + + // It's important to do forwarding here than inheritance for unbound value + // binding to work. + const Closure* mClosure; + RsdCpuScriptImpl* mSi; + const ExpandFuncTy mFunc; + const void* mUsrPtr; + const size_t mUsrSize; +}; + +class CpuScriptGroup2Impl : public RsdCpuReference::CpuScriptGroup2 { + public: + CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase* group); + virtual ~CpuScriptGroup2Impl(); + + bool init(); + virtual void execute(); + + private: + void setGlobalsForBatch(const list<CPUClosure*>& batch); + void runBatch(const list<CPUClosure*>& batch); + + RsdCpuReferenceImpl* mCpuRefImpl; + const ScriptGroup2* mGroup; + + list<list<CPUClosure*>*> mBatches; +}; + +} // namespace renderscript +} // namespace android + +#endif // CPU_REF_CPUSCRIPTGROUP2IMPL_H_ diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h index b0e924e3..d886cef0 100644 --- a/cpu_ref/rsd_cpu.h +++ b/cpu_ref/rsd_cpu.h @@ -45,7 +45,7 @@ namespace renderscript { class ScriptC; class Script; -class ScriptGroup; +class ScriptGroupBase; class ScriptKernelID; @@ -97,7 +97,13 @@ public: }; typedef CpuScript * (* script_lookup_t)(Context *, const Script *s); - class CpuScriptGroup { + class CpuScriptGroupBase { + public: + virtual void execute() = 0; + virtual ~CpuScriptGroupBase() {} + }; + + class CpuScriptGroup : public CpuScriptGroupBase { public: virtual void setInput(const ScriptKernelID *kid, Allocation *) = 0; virtual void setOutput(const ScriptKernelID *kid, Allocation *) = 0; @@ -105,6 +111,12 @@ public: virtual ~CpuScriptGroup() {}; }; + class CpuScriptGroup2 : public CpuScriptGroupBase { + public: + virtual void execute() = 0; + virtual ~CpuScriptGroup2() {} + }; + static Context * getTlsContext(); static const Script * getTlsScript(); static pthread_key_t getThreadTLSKey(); @@ -124,7 +136,7 @@ public: uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) = 0; virtual CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) = 0; - virtual CpuScriptGroup * createScriptGroup(const ScriptGroup *sg) = 0; + virtual void* createScriptGroup(const ScriptGroupBase *sg) = 0; virtual bool getInForEach() = 0; #ifndef RS_COMPATIBILITY_LIB |