diff options
author | Yang Ni <yangni@google.com> | 2015-02-20 15:20:00 -0800 |
---|---|---|
committer | Yang Ni <yangni@google.com> | 2015-03-10 15:26:25 -0700 |
commit | 062c287f573ecc06c38ee4295e5627e12c52ac3d (patch) | |
tree | fc9fc1581ac0cc8a68d107575522a388842b21c0 /cpu_ref | |
parent | 30940f637bce187126a667bb3b979978d7067a8b (diff) | |
download | rs-062c287f573ecc06c38ee4295e5627e12c52ac3d.tar.gz |
Runtime support for Script Group as single module
Also made two other cleanups:
- Changed KernelID/InvokeID into IDBase in class Closure
Rather than having two fields in class Closure, one of type ScriptKernelID and
the other InovkeID, use a single field of the common base class IDBase. Added a
boolean field to indicate whether it is kernel or invoke, since -fno-rtti is on.
- Removed user pointer from CPU closure
Change-Id: I5553f86b2e58325f85649078d48685a38f12d62f
Diffstat (limited to 'cpu_ref')
-rw-r--r-- | cpu_ref/rsCpuExecutable.cpp | 34 | ||||
-rw-r--r-- | cpu_ref/rsCpuExecutable.h | 33 | ||||
-rw-r--r-- | cpu_ref/rsCpuScript.cpp | 4 | ||||
-rw-r--r-- | cpu_ref/rsCpuScript.h | 1 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup2.cpp | 232 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup2.h | 27 |
6 files changed, 226 insertions, 105 deletions
diff --git a/cpu_ref/rsCpuExecutable.cpp b/cpu_ref/rsCpuExecutable.cpp index e5009d84..75f5e61a 100644 --- a/cpu_ref/rsCpuExecutable.cpp +++ b/cpu_ref/rsCpuExecutable.cpp @@ -329,6 +329,7 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( void** fieldAddress = nullptr; bool* fieldIsObject = nullptr; + char** fieldName = nullptr; InvokeFunc_t* invokeFunctions = nullptr; ForEachFunc_t* forEachFunctions = nullptr; uint32_t* forEachSignatures = nullptr; @@ -356,6 +357,11 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( goto error; } + fieldName = new char*[varCount]; + if (fieldName == nullptr) { + goto error; + } + for (size_t i = 0; i < varCount; ++i) { if (strgets(line, MAXLINE, &rsInfo) == nullptr) { goto error; @@ -372,6 +378,8 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( } fieldAddress[i] = addr; fieldIsObject[i] = false; + fieldName[i] = new char[strlen(line)+1]; + strcpy(fieldName[i], line); } if (strgets(line, MAXLINE, &rsInfo) == nullptr) { @@ -440,7 +448,8 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( forEachSignatures[i] = tmpSig; forEachFunctions[i] = (ForEachFunc_t) dlsym(sharedObj, tmpName); - if (i != 0 && forEachFunctions[i] == nullptr) { + if (i != 0 && forEachFunctions[i] == nullptr && + strcmp(tmpName, "root.expand")) { // Ignore missing root.expand functions. // root() is always specified at location 0. ALOGE("Failed to find forEach function address for %s: %s", @@ -503,7 +512,6 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( ALOGE("Unable to read pragma at index %zu!", i); goto error; } - char key[MAXLINE]; char value[MAXLINE] = ""; // initialize in case value is empty @@ -561,15 +569,15 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject( char *checksumStart = &line[strlen(CHECKSUM_STR)]; checksum = new char[strlen(checksumStart) + 1]; strcpy(checksum, checksumStart); - } - else { + } else { + ALOGE("Missing checksum in shared obj file"); goto error; } #endif // RS_COMPATIBILITY_LIB return new ScriptExecutable( - RSContext, fieldAddress, fieldIsObject, varCount, + RSContext, fieldAddress, fieldIsObject, fieldName, varCount, invokeFunctions, funcCount, forEachFunctions, forEachSignatures, forEachCount, pragmaKeys, pragmaValues, pragmaCount, @@ -591,12 +599,28 @@ error: delete[] forEachSignatures; delete[] forEachFunctions; + delete[] invokeFunctions; + + for (size_t i = 0; i < varCount; i++) { + delete[] fieldName[i]; + } + delete[] fieldName; delete[] fieldIsObject; delete[] fieldAddress; return nullptr; } +void* ScriptExecutable::getFieldAddress(const char* name) const { + // TODO: improve this by using a hash map. + for (size_t i = 0; i < mExportedVarCount; i++) { + if (strcmp(name, mFieldName[i]) == 0) { + return mFieldAddress[i]; + } + } + return nullptr; +} + } // namespace renderscript } // namespace android diff --git a/cpu_ref/rsCpuExecutable.h b/cpu_ref/rsCpuExecutable.h index cdf6fd6b..ed6904d0 100644 --- a/cpu_ref/rsCpuExecutable.h +++ b/cpu_ref/rsCpuExecutable.h @@ -57,22 +57,22 @@ private: class ScriptExecutable { public: ScriptExecutable(Context* RSContext, - void** fieldAddress, bool* fieldIsObject, size_t varCount, + void** fieldAddress, bool* fieldIsObject, + const char* const * fieldName, size_t varCount, InvokeFunc_t* invokeFunctions, size_t funcCount, ForEachFunc_t* forEachFunctions, uint32_t* forEachSignatures, size_t forEachCount, - const char ** pragmaKeys, const char ** pragmaValues, + const char** pragmaKeys, const char** pragmaValues, size_t pragmaCount, bool isThreadable, const char *buildChecksum) : mFieldAddress(fieldAddress), mFieldIsObject(fieldIsObject), - mExportedVarCount(varCount), - mInvokeFunctions(invokeFunctions), mFuncCount(funcCount), - mForEachFunctions(forEachFunctions), mForEachSignatures(forEachSignatures), - mForEachCount(forEachCount), - mPragmaKeys(pragmaKeys), mPragmaValues(pragmaValues), - mPragmaCount(pragmaCount), - mIsThreadable(isThreadable), mBuildChecksum(buildChecksum), - mRS(RSContext) { + mFieldName(fieldName), mExportedVarCount(varCount), + mInvokeFunctions(invokeFunctions), mFuncCount(funcCount), + mForEachFunctions(forEachFunctions), mForEachSignatures(forEachSignatures), + mForEachCount(forEachCount), + mPragmaKeys(pragmaKeys), mPragmaValues(pragmaValues), + mPragmaCount(pragmaCount), mIsThreadable(isThreadable), + mBuildChecksum(buildChecksum), mRS(RSContext) { } ~ScriptExecutable() { @@ -91,12 +91,18 @@ public: delete [] mPragmaKeys[i]; delete [] mPragmaValues[i]; } - delete[] mPragmaValues; delete[] mPragmaKeys; + delete[] mForEachSignatures; delete[] mForEachFunctions; + delete[] mInvokeFunctions; + + for (size_t i = 0; i < mExportedVarCount; i++) { + delete[] mFieldName[i]; + } + delete[] mFieldName; delete[] mFieldIsObject; delete[] mFieldAddress; } @@ -110,8 +116,12 @@ public: size_t getPragmaCount() const { return mPragmaCount; } void* getFieldAddress(int slot) const { return mFieldAddress[slot]; } + void* getFieldAddress(const char* name) const; bool getFieldIsObject(int slot) const { return mFieldIsObject[slot]; } + const char* getFieldName(int slot) const { return mFieldName[slot]; } + InvokeFunc_t getInvokeFunction(int slot) const { return mInvokeFunctions[slot]; } + ForEachFunc_t getForEachFunction(int slot) const { return mForEachFunctions[slot]; } uint32_t getForEachSignature(int slot) const { return mForEachSignatures[slot]; } @@ -129,6 +139,7 @@ public: private: void** mFieldAddress; bool* mFieldIsObject; + const char* const * mFieldName; size_t mExportedVarCount; InvokeFunc_t* mInvokeFunctions; diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp index ae7e5970..481c54d9 100644 --- a/cpu_ref/rsCpuScript.cpp +++ b/cpu_ref/rsCpuScript.cpp @@ -855,6 +855,10 @@ void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) { rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data); } +const char* RsdCpuScriptImpl::getFieldName(uint32_t slot) const { + return mScriptExec->getFieldName(slot); +} + RsdCpuScriptImpl::~RsdCpuScriptImpl() { #ifndef RS_COMPATIBILITY_LIB if (mCompilerDriver) { diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h index 44df8a9c..aaaa2a29 100644 --- a/cpu_ref/rsCpuScript.h +++ b/cpu_ref/rsCpuScript.h @@ -87,6 +87,7 @@ public: virtual void setGlobalBind(uint32_t slot, Allocation *data); virtual void setGlobalObj(uint32_t slot, ObjectBase *data); + const char* getFieldName(uint32_t slot) const; virtual ~RsdCpuScriptImpl(); RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s); diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp index 7222eb9d..6bc98b41 100644 --- a/cpu_ref/rsCpuScriptGroup2.cpp +++ b/cpu_ref/rsCpuScriptGroup2.cpp @@ -5,6 +5,8 @@ #include <stdlib.h> #include <unistd.h> +#include <set> +#include <sstream> #include <string> #include <vector> @@ -75,25 +77,25 @@ void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, mutable_kparams->out = (void*)ptr; - mutable_kparams->usr = cpuClosure->mUsrPtr; - cpuClosure->mFunc(kparams, xstart, xend, ostep); } mutable_kparams->ins = oldIns; mutable_kparams->inEStrides = oldStrides; - mutable_kparams->usr = &closures; } } // namespace +Batch::Batch(CpuScriptGroup2Impl* group, const char* name) : + mGroup(group), mFunc(nullptr) { + mName = strndup(name, strlen(name)); +} + Batch::~Batch() { for (CPUClosure* c : mClosures) { delete c; } - if (mScriptObj) { - dlclose(mScriptObj); - } + free(mName); } bool Batch::conflict(CPUClosure* cpuClosure) const { @@ -103,8 +105,7 @@ bool Batch::conflict(CPUClosure* cpuClosure) const { const Closure* closure = cpuClosure->mClosure; - if (closure->mKernelID.get() == nullptr || - mClosures.front()->mClosure->mKernelID.get() == nullptr) { + if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { // An invoke should be in a batch by itself, so it conflicts with any other // closure. return true; @@ -134,30 +135,30 @@ bool Batch::conflict(CPUClosure* cpuClosure) const { CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase *sg) : - mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) { + mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), + mExecutable(nullptr), mScriptObj(nullptr) { rsAssert(!mGroup->mClosures.empty()); - Batch* batch = new Batch(this); + Batch* batch = new Batch(this, "Batch0"); + int i = 0; for (Closure* closure: mGroup->mClosures) { - const ScriptKernelID* kernelID = closure->mKernelID.get(); - RsdCpuScriptImpl* si; CPUClosure* cc; - if (kernelID != nullptr) { - si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); + const IDBase* funcID = closure->mFunctionID.get(); + RsdCpuScriptImpl* si = + (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); + if (closure->mIsKernel) { MTLaunchStruct mtls; - si->forEachKernelSetup(kernelID->mSlot, &mtls); - // TODO: Is mtls.fep.usrLen ever used? - cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, - mtls.fep.usr, mtls.fep.usrLen); + si->forEachKernelSetup(funcID->mSlot, &mtls); + cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); } else { - si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript( - closure->mInvokeID->mScript); cc = new CPUClosure(closure, si); } if (batch->conflict(cc)) { mBatches.push_back(batch); - batch = new Batch(this); + std::stringstream ss; + ss << "Batch" << ++i; + batch = new Batch(this, ss.str().c_str()); } batch->mClosures.push_back(cc); @@ -167,16 +168,33 @@ CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, mBatches.push_back(batch); #ifndef RS_COMPATIBILITY_LIB - for (Batch* batch : mBatches) { - batch->tryToCreateFusedKernel(mGroup->mCacheDir); + compile(mGroup->mCacheDir); + if (mScriptObj != nullptr && mExecutable != nullptr) { + for (Batch* batch : mBatches) { + batch->resolveFuncPtr(mScriptObj); + } } -#endif +#endif // RS_COMPATIBILITY_LIB +} + +void Batch::resolveFuncPtr(void* sharedObj) { + std::string funcName(mName); + if (mClosures.front()->mClosure->mIsKernel) { + funcName.append(".expand"); + } + mFunc = dlsym(sharedObj, funcName.c_str()); + rsAssert (mFunc != nullptr); } CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { for (Batch* batch : mBatches) { delete batch; } + // TODO: move this dlclose into ~ScriptExecutable(). + if (mScriptObj != nullptr) { + dlclose(mScriptObj); + } + delete mExecutable; } namespace { @@ -189,7 +207,8 @@ string getFileName(string path) { } void setupCompileArguments( - const vector<string>& inputs, const vector<int>& kernels, + const vector<string>& inputs, const vector<string>& kernelBatches, + const vector<string>& invokeBatches, const string& output_dir, const string& output_filename, const string& rsLib, vector<const char*>* args) { args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); @@ -202,10 +221,13 @@ void setupCompileArguments( for (const string& input : inputs) { args->push_back(input.c_str()); } - for (int kernel : kernels) { - args->push_back("-k"); - string strKernel = std::to_string(kernel); - args->push_back(strKernel.c_str()); + for (const string& batch : kernelBatches) { + args->push_back("-merge"); + args->push_back(batch.c_str()); + } + for (const string& batch : invokeBatches) { + args->push_back("-invoke"); + args->push_back(batch.c_str()); } args->push_back("-output_path"); args->push_back(output_dir.c_str()); @@ -247,13 +269,32 @@ bool fuseAndCompile(const char** arguments, return true; } -#endif + +void generateSourceSlot(const Closure& closure, + const std::vector<std::string>& inputs, + std::stringstream& ss) { + const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); + const Script* script = funcID->mScript; + + rsAssert (!script->isIntrinsic()); + + const RsdCpuScriptImpl *cpuScript = + (const RsdCpuScriptImpl*)script->mHal.drv; + const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); + + const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - + inputs.begin(); + + ss << index << "," << funcID->mSlot << "."; +} + +#endif // RS_COMPATIBILTY_LIB } // anonymous namespace -void Batch::tryToCreateFusedKernel(const char *cacheDir) { +void CpuScriptGroup2Impl::compile(const char* cacheDir) { #ifndef RS_COMPATIBILITY_LIB - if (mClosures.size() < 2) { + if (mGroup->mClosures.size() < 2) { return; } @@ -261,25 +302,43 @@ void Batch::tryToCreateFusedKernel(const char *cacheDir) { // Fuse the input kernels and generate native code in an object file //===--------------------------------------------------------------------===// - std::vector<string> inputFiles; - std::vector<int> slots; - - for (CPUClosure* cpuClosure : mClosures) { - const Closure* closure = cpuClosure->mClosure; - const ScriptKernelID* kernelID = closure->mKernelID.get(); - const Script* script = kernelID->mScript; + std::set<string> inputSet; + for (Closure* closure : mGroup->mClosures) { + const Script* script = closure->mFunctionID.get()->mScript; + // If any script is an intrinsic, give up trying fusing the kernels. if (script->isIntrinsic()) { return; } const RsdCpuScriptImpl *cpuScript = (const RsdCpuScriptImpl*)script->mHal.drv; - const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); + inputSet.insert(bitcodeFilename); + } + + std::vector<string> inputs(inputSet.begin(), inputSet.end()); + + std::vector<string> kernelBatches; + std::vector<string> invokeBatches; + + int i = 0; + for (const auto& batch : mBatches) { + rsAssert(batch->size() > 0); - inputFiles.push_back(bitcodeFilename); - slots.push_back(kernelID->mSlot); + std::stringstream ss; + ss << batch->mName << ":"; + + if (!batch->mClosures.front()->mClosure->mIsKernel) { + rsAssert(batch->size() == 1); + generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss); + invokeBatches.push_back(ss.str()); + } else { + for (const auto& cpuClosure : batch->mClosures) { + generateSourceSlot(*cpuClosure->mClosure, inputs, ss); + } + kernelBatches.push_back(ss.str()); + } } rsAssert(cacheDir != nullptr); @@ -295,8 +354,8 @@ void Batch::tryToCreateFusedKernel(const char *cacheDir) { string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2)); string rsLibPath(SYSLIBPATH"/libclcore.bc"); vector<const char*> arguments; - setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath, - &arguments); + setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, + outputFileName, rsLibPath, &arguments); std::unique_ptr<const char> joined( rsuJoinStrings(arguments.size() - 1, arguments.data())); string commandLine (joined.get()); @@ -317,15 +376,15 @@ void Batch::tryToCreateFusedKernel(const char *cacheDir) { return; } - void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); - if (mSharedObj == nullptr) { + mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); + if (mScriptObj == nullptr) { ALOGE("Unable to load '%s'", resName); return; } mExecutable = ScriptExecutable::createFromSharedObject( - nullptr, // RS context. Unused. - mSharedObj); + nullptr, // RS context. Unused. + mScriptObj); #endif // RS_COMPATIBILITY_LIB } @@ -340,13 +399,8 @@ void CpuScriptGroup2Impl::execute() { void Batch::setGlobalsForBatch() { for (CPUClosure* cpuClosure : mClosures) { const Closure* closure = cpuClosure->mClosure; - const ScriptKernelID* kernelID = closure->mKernelID.get(); - Script* s; - if (kernelID != nullptr) { - s = kernelID->mScript; - } else { - s = cpuClosure->mClosure->mInvokeID->mScript; - } + const IDBase* funcID = closure->mFunctionID.get(); + Script* s = funcID->mScript;; for (const auto& p : closure->mGlobals) { const void* value = p.second.first; int size = p.second.second; @@ -360,18 +414,54 @@ void Batch::setGlobalsForBatch() { rsAssert(p.first != nullptr); ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)", closure, p.first, p.first->mScript, p.first->mSlot); - // We use -1 size to indicate an ObjectBase rather than a primitive type - if (size < 0) { - s->setVarObj(p.first->mSlot, (ObjectBase*)value); + Script* script = p.first->mScript; + const RsdCpuScriptImpl *cpuScript = + (const RsdCpuScriptImpl*)script->mHal.drv; + int slot = p.first->mSlot; + ScriptExecutable* exec = mGroup->getExecutable(); + if (exec != nullptr) { + const char* varName = cpuScript->getFieldName(slot); + void* addr = exec->getFieldAddress(varName); + if (size < 0) { + rsrSetObject(mGroup->getCpuRefImpl()->getContext(), + (rs_object_base*)addr, (ObjectBase*)value); + } else { + memcpy(addr, (const void*)&value, size); + } } else { - s->setVar(p.first->mSlot, (const void*)&value, size); + // We use -1 size to indicate an ObjectBase rather than a primitive type + if (size < 0) { + s->setVarObj(slot, (ObjectBase*)value); + } else { + s->setVar(slot, (const void*)&value, size); + } } } } } void Batch::run() { - if (mExecutable != nullptr) { + if (!mClosures.front()->mClosure->mIsKernel) { + rsAssert(mClosures.size() == 1); + + // This batch contains a single closure for an invoke function + CPUClosure* cc = mClosures.front(); + const Closure* c = cc->mClosure; + + if (mFunc != nullptr) { + // TODO: Need align pointers for x86_64. + // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp + ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); + } else { + const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); + rsAssert(invokeID != nullptr); + cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); + } + + return; + } + + if (mFunc != nullptr) { MTLaunchStruct mtls; const CPUClosure* firstCpuClosure = mClosures.front(); const CPUClosure* lastCpuClosure = mClosures.back(); @@ -384,7 +474,7 @@ void Batch::run() { mtls.script = nullptr; mtls.fep.usr = nullptr; - mtls.kernel = mExecutable->getForEachFunction(0); + mtls.kernel = (ForEachFunc_t)mFunc; mGroup->getCpuRefImpl()->launchThreads( (const Allocation**)firstCpuClosure->mClosure->mArgs, @@ -395,25 +485,14 @@ void Batch::run() { return; } - if (mClosures.size() == 1 && - mClosures.front()->mClosure->mKernelID.get() == nullptr) { - // This closure is for an invoke function - CPUClosure* cc = mClosures.front(); - const Closure* c = cc->mClosure; - const ScriptInvokeID* invokeID = c->mInvokeID; - rsAssert(invokeID != nullptr); - cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); - return; - } - for (CPUClosure* cpuClosure : mClosures) { const Closure* closure = cpuClosure->mClosure; - const ScriptKernelID* kernelID = closure->mKernelID.get(); + const ScriptKernelID* kernelID = + (const ScriptKernelID*)closure->mFunctionID.get(); cpuClosure->mSi->preLaunch(kernelID->mSlot, (const Allocation**)closure->mArgs, closure->mNumArg, closure->mReturnValue, - cpuClosure->mUsrPtr, cpuClosure->mUsrSize, - nullptr); + nullptr, 0, nullptr); } const CPUClosure* cpuClosure = mClosures.front(); @@ -434,7 +513,8 @@ void Batch::run() { for (CPUClosure* cpuClosure : mClosures) { const Closure* closure = cpuClosure->mClosure; - const ScriptKernelID* kernelID = closure->mKernelID.get(); + const ScriptKernelID* kernelID = + (const ScriptKernelID*)closure->mFunctionID.get(); cpuClosure->mSi->postLaunch(kernelID->mSlot, (const Allocation**)closure->mArgs, closure->mNumArg, closure->mReturnValue, diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h index 9ff16c47..f8d36fd3 100644 --- a/cpu_ref/rsCpuScriptGroup2.h +++ b/cpu_ref/rsCpuScriptGroup2.h @@ -21,44 +21,40 @@ typedef void (*InvokeFuncTy)(const void*, uint32_t); class CPUClosure { public: - CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func, - const void* usrPtr, const size_t usrSize) : - mClosure(closure), mSi(si), mFunc(func), - mUsrPtr(usrPtr), mUsrSize(usrSize) {} + CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func) : + mClosure(closure), mSi(si), mFunc(func) {} CPUClosure(const Closure* closure, RsdCpuScriptImpl* si) : - mClosure(closure), mSi(si), mFunc(nullptr), - mUsrPtr(nullptr), mUsrSize(0) {} + mClosure(closure), mSi(si), mFunc(nullptr) {} // It's important to do forwarding here than inheritance for unbound value // binding to work. const Closure* mClosure; RsdCpuScriptImpl* mSi; const ExpandFuncTy mFunc; - const void* mUsrPtr; - const size_t mUsrSize; }; class CpuScriptGroup2Impl; class Batch { public: - Batch(CpuScriptGroup2Impl* group) : mGroup(group), mExecutable(nullptr) {} - + Batch(CpuScriptGroup2Impl* group, const char* name); ~Batch(); // Returns true if closure depends on any closure in this batch for a global // variable bool conflict(CPUClosure* closure) const; - void tryToCreateFusedKernel(const char* cacheDir); + void resolveFuncPtr(void* sharedObj); void setGlobalsForBatch(); void run(); + size_t size() const { return mClosures.size(); } + CpuScriptGroup2Impl* mGroup; - ScriptExecutable* mExecutable; - void* mScriptObj; List<CPUClosure*> mClosures; + char* mName; + void* mFunc; }; class CpuScriptGroup2Impl : public RsdCpuReference::CpuScriptGroup2 { @@ -70,11 +66,16 @@ public: virtual void execute(); RsdCpuReferenceImpl* getCpuRefImpl() const { return mCpuRefImpl; } + ScriptExecutable* getExecutable() const { return mExecutable; } + + void compile(const char* cacheDir); private: RsdCpuReferenceImpl* mCpuRefImpl; const ScriptGroup2* mGroup; List<Batch*> mBatches; + ScriptExecutable* mExecutable; + void* mScriptObj; }; } // namespace renderscript |