diff options
author | Yang Ni <yangni@google.com> | 2015-01-27 14:32:25 -0800 |
---|---|---|
committer | Yang Ni <yangni@google.com> | 2015-02-04 17:02:05 -0800 |
commit | eb9aa675754c49f613c6ad71d41472b30f38b007 (patch) | |
tree | ec08471b489ff5aed696c2217841667a9ab16124 /cpu_ref | |
parent | caa8bd8b8a43d4cecbc391651fc90b3a2147957e (diff) | |
download | rs-eb9aa675754c49f613c6ad71d41472b30f38b007.tar.gz |
Runtime support for invocables in ScriptGroups
This also includes support for InvokeID
Change-Id: I5b59df166ea30b309b8dd9623825ac0e72d03856
Diffstat (limited to 'cpu_ref')
-rw-r--r-- | cpu_ref/rsCpuScript.cpp | 1 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup2.cpp | 589 | ||||
-rw-r--r-- | cpu_ref/rsCpuScriptGroup2.h | 77 |
3 files changed, 353 insertions, 314 deletions
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp index 3cec0b96..a618a176 100644 --- a/cpu_ref/rsCpuScript.cpp +++ b/cpu_ref/rsCpuScript.cpp @@ -898,6 +898,7 @@ error: mCtx->unlockMutex(); if (mScriptSO) { dlclose(mScriptSO); + mScriptSO = nullptr; } return false; } diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp index 90907d05..18c9f73b 100644 --- a/cpu_ref/rsCpuScriptGroup2.cpp +++ b/cpu_ref/rsCpuScriptGroup2.cpp @@ -31,138 +31,150 @@ const size_t DefaultKernelArgCount = 2; void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, uint32_t xend, uint32_t outstep) { - const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr; - RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; - const void **oldIns = kparams->ins; - uint32_t *oldStrides = kparams->inEStrides; + const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr; + RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; + const void **oldIns = kparams->ins; + uint32_t *oldStrides = kparams->inEStrides; + + std::vector<const void*> ins(DefaultKernelArgCount); + std::vector<uint32_t> strides(DefaultKernelArgCount); + + for (CPUClosure* cpuClosure : closures) { + const Closure* closure = cpuClosure->mClosure; + + auto in_iter = ins.begin(); + auto stride_iter = strides.begin(); + + for (const auto& arg : closure->mArgs) { + const Allocation* a = (const Allocation*)arg; + const uint32_t eStride = a->mHal.state.elementSizeBytes; + const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + + eStride * xstart; + if (kparams->dimY > 1) { + ptr += a->mHal.drvState.lod[0].stride * kparams->y; + } + *in_iter++ = ptr; + *stride_iter++ = eStride; + } - std::vector<const void*> ins(DefaultKernelArgCount); - std::vector<uint32_t> strides(DefaultKernelArgCount); + mutable_kparams->ins = &ins[0]; + mutable_kparams->inEStrides = &strides[0]; - for (CPUClosure* cpuClosure : closures) { - const Closure* closure = cpuClosure->mClosure; + const Allocation* out = closure->mReturnValue; + const uint32_t ostep = out->mHal.state.elementSizeBytes; + const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + + ostep * xstart; + if (kparams->dimY > 1) { + ptr += out->mHal.drvState.lod[0].stride * kparams->y; + } - auto in_iter = ins.begin(); - auto stride_iter = strides.begin(); - - for (const auto& arg : closure->mArgs) { - const Allocation* a = (const Allocation*)arg; - const uint32_t eStride = a->mHal.state.elementSizeBytes; - const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + - eStride * xstart; - if (kparams->dimY > 1) { - ptr += a->mHal.drvState.lod[0].stride * kparams->y; - } - *in_iter++ = ptr; - *stride_iter++ = eStride; - } + mutable_kparams->out = (void*)ptr; - mutable_kparams->ins = &ins[0]; - mutable_kparams->inEStrides = &strides[0]; + mutable_kparams->usr = cpuClosure->mUsrPtr; - const Allocation* out = closure->mReturnValue; - const uint32_t ostep = out->mHal.state.elementSizeBytes; - const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + - ostep * xstart; - if (kparams->dimY > 1) { - ptr += out->mHal.drvState.lod[0].stride * kparams->y; + cpuClosure->mFunc(kparams, xstart, xend, ostep); } - mutable_kparams->out = (void*)ptr; - - mutable_kparams->usr = cpuClosure->mUsrPtr; - - cpuClosure->mFunc(kparams, xstart, xend, ostep); - } - - mutable_kparams->ins = oldIns; - mutable_kparams->inEStrides = oldStrides; - mutable_kparams->usr = &closures; + mutable_kparams->ins = oldIns; + mutable_kparams->inEStrides = oldStrides; + mutable_kparams->usr = &closures; } } // namespace Batch::~Batch() { - for (CPUClosure* c : mClosures) { - delete c; - } - if (mScriptObj) { - dlclose(mScriptObj); - } + for (CPUClosure* c : mClosures) { + delete c; + } + if (mScriptObj) { + dlclose(mScriptObj); + } } bool Batch::conflict(CPUClosure* closure) const { - if (mClosures.empty()) { - return false; - } - - if (closure->mClosure->mKernelID.get() == nullptr || - mClosures.front()->mClosure->mKernelID.get() == nullptr) { - // An invoke should be in a batch by itself, so it conflicts with any other - // closure. - return true; - } + if (mClosures.empty()) { + return false; + } - for (const auto &p : closure->mClosure->mGlobalDeps) { - const Closure* dep = p.first; - for (CPUClosure* c : mClosures) { - if (c->mClosure == dep) { - ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global", closure, dep); + if (closure->mClosure->mKernelID.get() == nullptr || + mClosures.front()->mClosure->mKernelID.get() == nullptr) { + // An invoke should be in a batch by itself, so it conflicts with any other + // closure. return true; - } } - } - for (const auto &p : closure->mClosure->mArgDeps) { - const Closure* dep = p.first; - for (CPUClosure* c : mClosures) { - if (c->mClosure == dep) { - for (const auto &p1 : *p.second) { - if (p1.second->get() != nullptr) { - ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg", closure, dep); - return true; - } + + for (const auto &p : closure->mClosure->mGlobalDeps) { + const Closure* dep = p.first; + for (CPUClosure* c : mClosures) { + if (c->mClosure == dep) { + ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global", + closure, dep); + return true; + } + } + } + for (const auto &p : closure->mClosure->mArgDeps) { + const Closure* dep = p.first; + for (CPUClosure* c : mClosures) { + if (c->mClosure == dep) { + for (const auto &p1 : *p.second) { + if (p1.second->get() != nullptr) { + ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg", + closure, dep); + return true; + } + } + } } - } } - } - return false; + return false; } CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase *sg) : mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) { - Batch* batch = new Batch(this); - for (Closure* closure: mGroup->mClosures) { - const ScriptKernelID* kernelID = closure->mKernelID.get(); - RsdCpuScriptImpl* si = - (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); + rsAssert(!mGroup->mClosures.empty()); + + Batch* batch = new Batch(this); + for (Closure* closure: mGroup->mClosures) { + const ScriptKernelID* kernelID = closure->mKernelID.get(); + RsdCpuScriptImpl* si; + CPUClosure* cc; + if (kernelID != nullptr) { + si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); + MTLaunchStruct mtls; + si->forEachKernelSetup(kernelID->mSlot, &mtls); + // TODO: Is mtls.fep.usrLen ever used? + cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, + mtls.fep.usr, mtls.fep.usrLen); + } else { + si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript( + closure->mInvokeID->mScript); + cc = new CPUClosure(closure, si); + } - MTLaunchStruct mtls; - si->forEachKernelSetup(kernelID->mSlot, &mtls); - // TODO: Is mtls.fep.usrLen ever used? - CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, - mtls.fep.usr, mtls.fep.usrLen); - if (batch->conflict(cc)) { - mBatches.push_back(batch); - batch = new Batch(this); - } + if (batch->conflict(cc)) { + mBatches.push_back(batch); + batch = new Batch(this); + } - batch->mClosures.push_back(cc); - } + batch->mClosures.push_back(cc); + } - mBatches.push_back(batch); + rsAssert(!batch->mClosures.empty()); + mBatches.push_back(batch); #ifndef RS_COMPATIBILITY_LIB - for (Batch* batch : mBatches) { - batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str()); - } + for (Batch* batch : mBatches) { + batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str()); + } #endif } CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { - for (Batch* batch : mBatches) { - delete batch; - } + for (Batch* batch : mBatches) { + delete batch; + } } namespace { @@ -170,78 +182,78 @@ namespace { #ifndef RS_COMPATIBILITY_LIB string getFileName(string path) { - unsigned found = path.find_last_of("/\\"); - return path.substr(found + 1); + unsigned found = path.find_last_of("/\\"); + return path.substr(found + 1); } void setupCompileArguments( - const vector<string>& inputs, const vector<int>& kernels, - const string& output_dir, const string& output_filename, - const string& rsLib, vector<const char*>* args) { - args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); - args->push_back("-fPIC"); - args->push_back("-embedRSInfo"); - args->push_back("-mtriple"); - args->push_back(DEFAULT_TARGET_TRIPLE_STRING); - args->push_back("-bclib"); - args->push_back(rsLib.c_str()); - for (const string& input : inputs) { - args->push_back(input.c_str()); - } - for (int kernel : kernels) { - args->push_back("-k"); - string strKernel = std::to_string(kernel); - args->push_back(strKernel.c_str()); - } - args->push_back("-output_path"); - args->push_back(output_dir.c_str()); - args->push_back("-o"); - args->push_back(output_filename.c_str()); - args->push_back(nullptr); + const vector<string>& inputs, const vector<int>& kernels, + const string& output_dir, const string& output_filename, + const string& rsLib, vector<const char*>* args) { + args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); + args->push_back("-fPIC"); + args->push_back("-embedRSInfo"); + args->push_back("-mtriple"); + args->push_back(DEFAULT_TARGET_TRIPLE_STRING); + args->push_back("-bclib"); + args->push_back(rsLib.c_str()); + for (const string& input : inputs) { + args->push_back(input.c_str()); + } + for (int kernel : kernels) { + args->push_back("-k"); + string strKernel = std::to_string(kernel); + args->push_back(strKernel.c_str()); + } + args->push_back("-output_path"); + args->push_back(output_dir.c_str()); + args->push_back("-o"); + args->push_back(output_filename.c_str()); + args->push_back(nullptr); } string convertListToString(int n, const char* const* strs) { - string ret; - ret.append(strs[0]); - for (int i = 1; i < n; i++) { - ret.append(" "); - ret.append(strs[i]); - } - return ret; + string ret; + ret.append(strs[0]); + for (int i = 1; i < n; i++) { + ret.append(" "); + ret.append(strs[i]); + } + return ret; } bool fuseAndCompile(const char** arguments, const string& commandLine) { - const pid_t pid = fork(); + const pid_t pid = fork(); - if (pid == -1) { - ALOGE("Couldn't fork for bcc execution"); - return false; - } + if (pid == -1) { + ALOGE("Couldn't fork for bcc execution"); + return false; + } - if (pid == 0) { - // Child process - ALOGV("Invoking BCC with: %s", commandLine.c_str()); - execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); + if (pid == 0) { + // Child process + ALOGV("Invoking BCC with: %s", commandLine.c_str()); + execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); - ALOGE("execv() failed: %s", strerror(errno)); - abort(); - return false; - } + ALOGE("execv() failed: %s", strerror(errno)); + abort(); + return false; + } - // Parent process - int status = 0; - const pid_t w = waitpid(pid, &status, 0); - if (w == -1) { - return false; - } + // Parent process + int status = 0; + const pid_t w = waitpid(pid, &status, 0); + if (w == -1) { + return false; + } - if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { - ALOGE("bcc terminated unexpectedly"); - return false; - } + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { + ALOGE("bcc terminated unexpectedly"); + return false; + } - return true; + return true; } #endif @@ -249,160 +261,181 @@ bool fuseAndCompile(const char** arguments, void Batch::tryToCreateFusedKernel(const char *cacheDir) { #ifndef RS_COMPATIBILITY_LIB - if (mClosures.size() < 2) { - ALOGV("Compiler kernel fusion skipped due to only one or zero kernel in" - " a script group batch."); - return; - } - - //===--------------------------------------------------------------------===// - // Fuse the input kernels and generate native code in an object file - //===--------------------------------------------------------------------===// + if (mClosures.size() < 2) { + return; + } - std::vector<string> inputFiles; - std::vector<int> slots; + //===--------------------------------------------------------------------===// + // Fuse the input kernels and generate native code in an object file + //===--------------------------------------------------------------------===// - for (CPUClosure* cpuClosure : mClosures) { - const Closure* closure = cpuClosure->mClosure; - const ScriptKernelID* kernelID = closure->mKernelID.get(); - const Script* script = kernelID->mScript; + std::vector<string> inputFiles; + std::vector<int> slots; - if (script->isIntrinsic()) { - return; - } + for (CPUClosure* cpuClosure : mClosures) { + const Closure* closure = cpuClosure->mClosure; + const ScriptKernelID* kernelID = closure->mKernelID.get(); + const Script* script = kernelID->mScript; - const RsdCpuScriptImpl *cpuScript = - (const RsdCpuScriptImpl*)script->mHal.drv; + if (script->isIntrinsic()) { + return; + } - const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); + const RsdCpuScriptImpl *cpuScript = + (const RsdCpuScriptImpl*)script->mHal.drv; - inputFiles.push_back(bitcodeFilename); - slots.push_back(kernelID->mSlot); - } + const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); - string outputPath(tempnam(cacheDir, "fused")); - string outputFileName = getFileName(outputPath); - string objFilePath(outputPath); - objFilePath.append(".o"); - string rsLibPath(SYSLIBPATH"/libclcore.bc"); - vector<const char*> arguments; - setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath, - &arguments); - string commandLine = - convertListToString(arguments.size() - 1, arguments.data()); + inputFiles.push_back(bitcodeFilename); + slots.push_back(kernelID->mSlot); + } - if (!fuseAndCompile(arguments.data(), commandLine)) { - return; - } + string outputPath(tempnam(cacheDir, "fused")); + string outputFileName = getFileName(outputPath); + string objFilePath(outputPath); + objFilePath.append(".o"); + string rsLibPath(SYSLIBPATH"/libclcore.bc"); + vector<const char*> arguments; + setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath, + &arguments); + string commandLine = + convertListToString(arguments.size() - 1, arguments.data()); + + if (!fuseAndCompile(arguments.data(), commandLine)) { + return; + } - //===--------------------------------------------------------------------===// - // Create and load the shared lib - //===--------------------------------------------------------------------===// + //===--------------------------------------------------------------------===// + // Create and load the shared lib + //===--------------------------------------------------------------------===// - const char* resName = outputFileName.c_str(); + const char* resName = outputFileName.c_str(); - if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { - ALOGE("Failed to link object file '%s'", resName); - return; - } + if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { + ALOGE("Failed to link object file '%s'", resName); + return; + } - void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); - if (mSharedObj == nullptr) { - ALOGE("Unable to load '%s'", resName); - return; - } + void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); + if (mSharedObj == nullptr) { + ALOGE("Unable to load '%s'", resName); + return; + } - mExecutable = ScriptExecutable::createFromSharedObject( - nullptr, // RS context. Unused. - mSharedObj); + mExecutable = ScriptExecutable::createFromSharedObject( + nullptr, // RS context. Unused. + mSharedObj); #endif // RS_COMPATIBILITY_LIB } void CpuScriptGroup2Impl::execute() { - for (auto batch : mBatches) { - batch->setGlobalsForBatch(); - batch->run(); - } + for (auto batch : mBatches) { + batch->setGlobalsForBatch(); + batch->run(); + } } void Batch::setGlobalsForBatch() { - for (CPUClosure* cpuClosure : mClosures) { - const Closure* closure = cpuClosure->mClosure; - const ScriptKernelID* kernelID = closure->mKernelID.get(); - Script* s = kernelID->mScript; - for (const auto& p : closure->mGlobals) { - const void* value = p.second.first; - int size = p.second.second; - // We use -1 size to indicate an ObjectBase rather than a primitive type - if (size < 0) { - s->setVarObj(p.first->mSlot, (ObjectBase*)value); - } else { - s->setVar(p.first->mSlot, (const void*)&value, size); - } + for (CPUClosure* cpuClosure : mClosures) { + const Closure* closure = cpuClosure->mClosure; + const ScriptKernelID* kernelID = closure->mKernelID.get(); + Script* s; + if (kernelID != nullptr) { + s = kernelID->mScript; + } else { + s = cpuClosure->mClosure->mInvokeID->mScript; + } + for (const auto& p : closure->mGlobals) { + const void* value = p.second.first; + int size = p.second.second; + if (value == nullptr && size == 0) { + // This indicates the current closure depends on another closure for a + // global in their shared module (script). In this case we don't need to + // copy the value. For example, an invoke intializes a global variable + // which a kernel later reads. + continue; + } + // We use -1 size to indicate an ObjectBase rather than a primitive type + if (size < 0) { + s->setVarObj(p.first->mSlot, (ObjectBase*)value); + } else { + s->setVar(p.first->mSlot, (const void*)&value, size); + } + } } - } } void Batch::run() { - if (mExecutable != nullptr) { - MTLaunchStruct mtls; - const CPUClosure* firstCpuClosure = mClosures.front(); - const CPUClosure* lastCpuClosure = mClosures.back(); + if (mExecutable != nullptr) { + MTLaunchStruct mtls; + const CPUClosure* firstCpuClosure = mClosures.front(); + const CPUClosure* lastCpuClosure = mClosures.back(); + + firstCpuClosure->mSi->forEachMtlsSetup( + (const Allocation**)&firstCpuClosure->mClosure->mArgs[0], + firstCpuClosure->mClosure->mArgs.size(), + lastCpuClosure->mClosure->mReturnValue, + nullptr, 0, nullptr, &mtls); + + mtls.script = nullptr; + mtls.fep.usr = nullptr; + mtls.kernel = mExecutable->getForEachFunction(0); + + mGroup->getCpuRefImpl()->launchThreads( + (const Allocation**)&firstCpuClosure->mClosure->mArgs[0], + firstCpuClosure->mClosure->mArgs.size(), + lastCpuClosure->mClosure->mReturnValue, + nullptr, &mtls); + + return; + } + + if (mClosures.size() == 1 && + mClosures.front()->mClosure->mKernelID.get() == nullptr) { + // This closure is for an invoke function + CPUClosure* cc = mClosures.front(); + const Closure* c = cc->mClosure; + const ScriptInvokeID* invokeID = c->mInvokeID; + rsAssert(invokeID != nullptr); + cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); + return; + } - firstCpuClosure->mSi->forEachMtlsSetup( - (const Allocation**)&firstCpuClosure->mClosure->mArgs[0], - firstCpuClosure->mClosure->mArgs.size(), - lastCpuClosure->mClosure->mReturnValue, - nullptr, 0, nullptr, &mtls); + for (CPUClosure* cpuClosure : mClosures) { + const Closure* closure = cpuClosure->mClosure; + const ScriptKernelID* kernelID = closure->mKernelID.get(); + cpuClosure->mSi->preLaunch(kernelID->mSlot, + (const Allocation**)&closure->mArgs[0], + closure->mArgs.size(), closure->mReturnValue, + cpuClosure->mUsrPtr, cpuClosure->mUsrSize, + nullptr); + } - mtls.script = nullptr; - mtls.fep.usr = nullptr; - mtls.kernel = mExecutable->getForEachFunction(0); + const CPUClosure* cpuClosure = mClosures.front(); + const Closure* closure = cpuClosure->mClosure; + MTLaunchStruct mtls; - mGroup->getCpuRefImpl()->launchThreads( - (const Allocation**)&firstCpuClosure->mClosure->mArgs[0], - firstCpuClosure->mClosure->mArgs.size(), - lastCpuClosure->mClosure->mReturnValue, - nullptr, &mtls); + if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0], + closure->mArgs.size(), + closure->mReturnValue, + nullptr, 0, nullptr, &mtls)) { - return; - } + mtls.script = nullptr; + mtls.kernel = (void (*)())&groupRoot; + mtls.fep.usr = &mClosures; - for (CPUClosure* cpuClosure : mClosures) { - const Closure* closure = cpuClosure->mClosure; - const ScriptKernelID* kernelID = closure->mKernelID.get(); - cpuClosure->mSi->preLaunch(kernelID->mSlot, - (const Allocation**)&closure->mArgs[0], - closure->mArgs.size(), closure->mReturnValue, - cpuClosure->mUsrPtr, cpuClosure->mUsrSize, - nullptr); - } - - const CPUClosure* cpuClosure = mClosures.front(); - const Closure* closure = cpuClosure->mClosure; - MTLaunchStruct mtls; - - if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0], - closure->mArgs.size(), - closure->mReturnValue, - nullptr, 0, nullptr, &mtls)) { - - mtls.script = nullptr; - mtls.kernel = (void (*)())&groupRoot; - mtls.fep.usr = &mClosures; - - mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); - } - - for (CPUClosure* cpuClosure : mClosures) { - const Closure* closure = cpuClosure->mClosure; - const ScriptKernelID* kernelID = closure->mKernelID.get(); - cpuClosure->mSi->postLaunch(kernelID->mSlot, - (const Allocation**)&closure->mArgs[0], - closure->mArgs.size(), closure->mReturnValue, - nullptr, 0, nullptr); - } + mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); + } + + for (CPUClosure* cpuClosure : mClosures) { + const Closure* closure = cpuClosure->mClosure; + const ScriptKernelID* kernelID = closure->mKernelID.get(); + cpuClosure->mSi->postLaunch(kernelID->mSlot, + (const Allocation**)&closure->mArgs[0], + closure->mArgs.size(), closure->mReturnValue, + nullptr, 0, nullptr); + } } } // namespace renderscript diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h index 53a0fe59..6b816dcd 100644 --- a/cpu_ref/rsCpuScriptGroup2.h +++ b/cpu_ref/rsCpuScriptGroup2.h @@ -20,59 +20,64 @@ struct RsExpandKernelParams; typedef void (*ExpandFuncTy)(const RsExpandKernelParams*, uint32_t, uint32_t, uint32_t); +typedef void (*InvokeFuncTy)(const void*, uint32_t); class CPUClosure { - public: - CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func, - const void* usrPtr, const size_t usrSize) : - mClosure(closure), mSi(si), mFunc(func), mUsrPtr(usrPtr), - mUsrSize(usrSize) {} - - // It's important to do forwarding here than inheritance for unbound value - // binding to work. - const Closure* mClosure; - RsdCpuScriptImpl* mSi; - const ExpandFuncTy mFunc; - const void* mUsrPtr; - const size_t mUsrSize; +public: + CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func, + const void* usrPtr, const size_t usrSize) : + mClosure(closure), mSi(si), mFunc(func), + mUsrPtr(usrPtr), mUsrSize(usrSize) {} + + CPUClosure(const Closure* closure, RsdCpuScriptImpl* si) : + mClosure(closure), mSi(si), mFunc(nullptr), + mUsrPtr(nullptr), mUsrSize(0) {} + + // It's important to do forwarding here than inheritance for unbound value + // binding to work. + const Closure* mClosure; + RsdCpuScriptImpl* mSi; + const ExpandFuncTy mFunc; + const void* mUsrPtr; + const size_t mUsrSize; }; class CpuScriptGroup2Impl; class Batch { - public: - Batch(CpuScriptGroup2Impl* group) : mGroup(group), mExecutable(nullptr) {} +public: + Batch(CpuScriptGroup2Impl* group) : mGroup(group), mExecutable(nullptr) {} - ~Batch(); + ~Batch(); - // Returns true if closure depends on any closure in this batch for a global - // variable - bool conflict(CPUClosure* closure) const; + // Returns true if closure depends on any closure in this batch for a global + // variable + bool conflict(CPUClosure* closure) const; - void tryToCreateFusedKernel(const char* cacheDir); - void setGlobalsForBatch(); - void run(); + void tryToCreateFusedKernel(const char* cacheDir); + void setGlobalsForBatch(); + void run(); - CpuScriptGroup2Impl* mGroup; - ScriptExecutable* mExecutable; - void* mScriptObj; - list<CPUClosure*> mClosures; + CpuScriptGroup2Impl* mGroup; + ScriptExecutable* mExecutable; + void* mScriptObj; + list<CPUClosure*> mClosures; }; class CpuScriptGroup2Impl : public RsdCpuReference::CpuScriptGroup2 { - public: - CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase* group); - virtual ~CpuScriptGroup2Impl(); +public: + CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase* group); + virtual ~CpuScriptGroup2Impl(); - bool init(); - virtual void execute(); + bool init(); + virtual void execute(); - RsdCpuReferenceImpl* getCpuRefImpl() const { return mCpuRefImpl; } + RsdCpuReferenceImpl* getCpuRefImpl() const { return mCpuRefImpl; } - private: - RsdCpuReferenceImpl* mCpuRefImpl; - const ScriptGroup2* mGroup; - list<Batch*> mBatches; +private: + RsdCpuReferenceImpl* mCpuRefImpl; + const ScriptGroup2* mGroup; + list<Batch*> mBatches; }; } // namespace renderscript |