summaryrefslogtreecommitdiff
path: root/cpu_ref
diff options
context:
space:
mode:
authorYang Ni <yangni@google.com>2015-02-20 15:20:00 -0800
committerYang Ni <yangni@google.com>2015-03-10 15:26:25 -0700
commit062c287f573ecc06c38ee4295e5627e12c52ac3d (patch)
treefc9fc1581ac0cc8a68d107575522a388842b21c0 /cpu_ref
parent30940f637bce187126a667bb3b979978d7067a8b (diff)
downloadrs-062c287f573ecc06c38ee4295e5627e12c52ac3d.tar.gz
Runtime support for Script Group as single module
Also made two other cleanups: - Changed KernelID/InvokeID into IDBase in class Closure Rather than having two fields in class Closure, one of type ScriptKernelID and the other InovkeID, use a single field of the common base class IDBase. Added a boolean field to indicate whether it is kernel or invoke, since -fno-rtti is on. - Removed user pointer from CPU closure Change-Id: I5553f86b2e58325f85649078d48685a38f12d62f
Diffstat (limited to 'cpu_ref')
-rw-r--r--cpu_ref/rsCpuExecutable.cpp34
-rw-r--r--cpu_ref/rsCpuExecutable.h33
-rw-r--r--cpu_ref/rsCpuScript.cpp4
-rw-r--r--cpu_ref/rsCpuScript.h1
-rw-r--r--cpu_ref/rsCpuScriptGroup2.cpp232
-rw-r--r--cpu_ref/rsCpuScriptGroup2.h27
6 files changed, 226 insertions, 105 deletions
diff --git a/cpu_ref/rsCpuExecutable.cpp b/cpu_ref/rsCpuExecutable.cpp
index e5009d84..75f5e61a 100644
--- a/cpu_ref/rsCpuExecutable.cpp
+++ b/cpu_ref/rsCpuExecutable.cpp
@@ -329,6 +329,7 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
void** fieldAddress = nullptr;
bool* fieldIsObject = nullptr;
+ char** fieldName = nullptr;
InvokeFunc_t* invokeFunctions = nullptr;
ForEachFunc_t* forEachFunctions = nullptr;
uint32_t* forEachSignatures = nullptr;
@@ -356,6 +357,11 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
goto error;
}
+ fieldName = new char*[varCount];
+ if (fieldName == nullptr) {
+ goto error;
+ }
+
for (size_t i = 0; i < varCount; ++i) {
if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
goto error;
@@ -372,6 +378,8 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
}
fieldAddress[i] = addr;
fieldIsObject[i] = false;
+ fieldName[i] = new char[strlen(line)+1];
+ strcpy(fieldName[i], line);
}
if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
@@ -440,7 +448,8 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
forEachSignatures[i] = tmpSig;
forEachFunctions[i] =
(ForEachFunc_t) dlsym(sharedObj, tmpName);
- if (i != 0 && forEachFunctions[i] == nullptr) {
+ if (i != 0 && forEachFunctions[i] == nullptr &&
+ strcmp(tmpName, "root.expand")) {
// Ignore missing root.expand functions.
// root() is always specified at location 0.
ALOGE("Failed to find forEach function address for %s: %s",
@@ -503,7 +512,6 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
ALOGE("Unable to read pragma at index %zu!", i);
goto error;
}
-
char key[MAXLINE];
char value[MAXLINE] = ""; // initialize in case value is empty
@@ -561,15 +569,15 @@ ScriptExecutable* ScriptExecutable::createFromSharedObject(
char *checksumStart = &line[strlen(CHECKSUM_STR)];
checksum = new char[strlen(checksumStart) + 1];
strcpy(checksum, checksumStart);
- }
- else {
+ } else {
+ ALOGE("Missing checksum in shared obj file");
goto error;
}
#endif // RS_COMPATIBILITY_LIB
return new ScriptExecutable(
- RSContext, fieldAddress, fieldIsObject, varCount,
+ RSContext, fieldAddress, fieldIsObject, fieldName, varCount,
invokeFunctions, funcCount,
forEachFunctions, forEachSignatures, forEachCount,
pragmaKeys, pragmaValues, pragmaCount,
@@ -591,12 +599,28 @@ error:
delete[] forEachSignatures;
delete[] forEachFunctions;
+
delete[] invokeFunctions;
+
+ for (size_t i = 0; i < varCount; i++) {
+ delete[] fieldName[i];
+ }
+ delete[] fieldName;
delete[] fieldIsObject;
delete[] fieldAddress;
return nullptr;
}
+void* ScriptExecutable::getFieldAddress(const char* name) const {
+ // TODO: improve this by using a hash map.
+ for (size_t i = 0; i < mExportedVarCount; i++) {
+ if (strcmp(name, mFieldName[i]) == 0) {
+ return mFieldAddress[i];
+ }
+ }
+ return nullptr;
+}
+
} // namespace renderscript
} // namespace android
diff --git a/cpu_ref/rsCpuExecutable.h b/cpu_ref/rsCpuExecutable.h
index cdf6fd6b..ed6904d0 100644
--- a/cpu_ref/rsCpuExecutable.h
+++ b/cpu_ref/rsCpuExecutable.h
@@ -57,22 +57,22 @@ private:
class ScriptExecutable {
public:
ScriptExecutable(Context* RSContext,
- void** fieldAddress, bool* fieldIsObject, size_t varCount,
+ void** fieldAddress, bool* fieldIsObject,
+ const char* const * fieldName, size_t varCount,
InvokeFunc_t* invokeFunctions, size_t funcCount,
ForEachFunc_t* forEachFunctions, uint32_t* forEachSignatures,
size_t forEachCount,
- const char ** pragmaKeys, const char ** pragmaValues,
+ const char** pragmaKeys, const char** pragmaValues,
size_t pragmaCount,
bool isThreadable, const char *buildChecksum) :
mFieldAddress(fieldAddress), mFieldIsObject(fieldIsObject),
- mExportedVarCount(varCount),
- mInvokeFunctions(invokeFunctions), mFuncCount(funcCount),
- mForEachFunctions(forEachFunctions), mForEachSignatures(forEachSignatures),
- mForEachCount(forEachCount),
- mPragmaKeys(pragmaKeys), mPragmaValues(pragmaValues),
- mPragmaCount(pragmaCount),
- mIsThreadable(isThreadable), mBuildChecksum(buildChecksum),
- mRS(RSContext) {
+ mFieldName(fieldName), mExportedVarCount(varCount),
+ mInvokeFunctions(invokeFunctions), mFuncCount(funcCount),
+ mForEachFunctions(forEachFunctions), mForEachSignatures(forEachSignatures),
+ mForEachCount(forEachCount),
+ mPragmaKeys(pragmaKeys), mPragmaValues(pragmaValues),
+ mPragmaCount(pragmaCount), mIsThreadable(isThreadable),
+ mBuildChecksum(buildChecksum), mRS(RSContext) {
}
~ScriptExecutable() {
@@ -91,12 +91,18 @@ public:
delete [] mPragmaKeys[i];
delete [] mPragmaValues[i];
}
-
delete[] mPragmaValues;
delete[] mPragmaKeys;
+
delete[] mForEachSignatures;
delete[] mForEachFunctions;
+
delete[] mInvokeFunctions;
+
+ for (size_t i = 0; i < mExportedVarCount; i++) {
+ delete[] mFieldName[i];
+ }
+ delete[] mFieldName;
delete[] mFieldIsObject;
delete[] mFieldAddress;
}
@@ -110,8 +116,12 @@ public:
size_t getPragmaCount() const { return mPragmaCount; }
void* getFieldAddress(int slot) const { return mFieldAddress[slot]; }
+ void* getFieldAddress(const char* name) const;
bool getFieldIsObject(int slot) const { return mFieldIsObject[slot]; }
+ const char* getFieldName(int slot) const { return mFieldName[slot]; }
+
InvokeFunc_t getInvokeFunction(int slot) const { return mInvokeFunctions[slot]; }
+
ForEachFunc_t getForEachFunction(int slot) const { return mForEachFunctions[slot]; }
uint32_t getForEachSignature(int slot) const { return mForEachSignatures[slot]; }
@@ -129,6 +139,7 @@ public:
private:
void** mFieldAddress;
bool* mFieldIsObject;
+ const char* const * mFieldName;
size_t mExportedVarCount;
InvokeFunc_t* mInvokeFunctions;
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index ae7e5970..481c54d9 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -855,6 +855,10 @@ void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
}
+const char* RsdCpuScriptImpl::getFieldName(uint32_t slot) const {
+ return mScriptExec->getFieldName(slot);
+}
+
RsdCpuScriptImpl::~RsdCpuScriptImpl() {
#ifndef RS_COMPATIBILITY_LIB
if (mCompilerDriver) {
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index 44df8a9c..aaaa2a29 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -87,6 +87,7 @@ public:
virtual void setGlobalBind(uint32_t slot, Allocation *data);
virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
+ const char* getFieldName(uint32_t slot) const;
virtual ~RsdCpuScriptImpl();
RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s);
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
index 7222eb9d..6bc98b41 100644
--- a/cpu_ref/rsCpuScriptGroup2.cpp
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -5,6 +5,8 @@
#include <stdlib.h>
#include <unistd.h>
+#include <set>
+#include <sstream>
#include <string>
#include <vector>
@@ -75,25 +77,25 @@ void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
mutable_kparams->out = (void*)ptr;
- mutable_kparams->usr = cpuClosure->mUsrPtr;
-
cpuClosure->mFunc(kparams, xstart, xend, ostep);
}
mutable_kparams->ins = oldIns;
mutable_kparams->inEStrides = oldStrides;
- mutable_kparams->usr = &closures;
}
} // namespace
+Batch::Batch(CpuScriptGroup2Impl* group, const char* name) :
+ mGroup(group), mFunc(nullptr) {
+ mName = strndup(name, strlen(name));
+}
+
Batch::~Batch() {
for (CPUClosure* c : mClosures) {
delete c;
}
- if (mScriptObj) {
- dlclose(mScriptObj);
- }
+ free(mName);
}
bool Batch::conflict(CPUClosure* cpuClosure) const {
@@ -103,8 +105,7 @@ bool Batch::conflict(CPUClosure* cpuClosure) const {
const Closure* closure = cpuClosure->mClosure;
- if (closure->mKernelID.get() == nullptr ||
- mClosures.front()->mClosure->mKernelID.get() == nullptr) {
+ if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
// An invoke should be in a batch by itself, so it conflicts with any other
// closure.
return true;
@@ -134,30 +135,30 @@ bool Batch::conflict(CPUClosure* cpuClosure) const {
CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
const ScriptGroupBase *sg) :
- mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
+ mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
+ mExecutable(nullptr), mScriptObj(nullptr) {
rsAssert(!mGroup->mClosures.empty());
- Batch* batch = new Batch(this);
+ Batch* batch = new Batch(this, "Batch0");
+ int i = 0;
for (Closure* closure: mGroup->mClosures) {
- const ScriptKernelID* kernelID = closure->mKernelID.get();
- RsdCpuScriptImpl* si;
CPUClosure* cc;
- if (kernelID != nullptr) {
- si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
+ const IDBase* funcID = closure->mFunctionID.get();
+ RsdCpuScriptImpl* si =
+ (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
+ if (closure->mIsKernel) {
MTLaunchStruct mtls;
- si->forEachKernelSetup(kernelID->mSlot, &mtls);
- // TODO: Is mtls.fep.usrLen ever used?
- cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
- mtls.fep.usr, mtls.fep.usrLen);
+ si->forEachKernelSetup(funcID->mSlot, &mtls);
+ cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
} else {
- si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(
- closure->mInvokeID->mScript);
cc = new CPUClosure(closure, si);
}
if (batch->conflict(cc)) {
mBatches.push_back(batch);
- batch = new Batch(this);
+ std::stringstream ss;
+ ss << "Batch" << ++i;
+ batch = new Batch(this, ss.str().c_str());
}
batch->mClosures.push_back(cc);
@@ -167,16 +168,33 @@ CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
mBatches.push_back(batch);
#ifndef RS_COMPATIBILITY_LIB
- for (Batch* batch : mBatches) {
- batch->tryToCreateFusedKernel(mGroup->mCacheDir);
+ compile(mGroup->mCacheDir);
+ if (mScriptObj != nullptr && mExecutable != nullptr) {
+ for (Batch* batch : mBatches) {
+ batch->resolveFuncPtr(mScriptObj);
+ }
}
-#endif
+#endif // RS_COMPATIBILITY_LIB
+}
+
+void Batch::resolveFuncPtr(void* sharedObj) {
+ std::string funcName(mName);
+ if (mClosures.front()->mClosure->mIsKernel) {
+ funcName.append(".expand");
+ }
+ mFunc = dlsym(sharedObj, funcName.c_str());
+ rsAssert (mFunc != nullptr);
}
CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
for (Batch* batch : mBatches) {
delete batch;
}
+ // TODO: move this dlclose into ~ScriptExecutable().
+ if (mScriptObj != nullptr) {
+ dlclose(mScriptObj);
+ }
+ delete mExecutable;
}
namespace {
@@ -189,7 +207,8 @@ string getFileName(string path) {
}
void setupCompileArguments(
- const vector<string>& inputs, const vector<int>& kernels,
+ const vector<string>& inputs, const vector<string>& kernelBatches,
+ const vector<string>& invokeBatches,
const string& output_dir, const string& output_filename,
const string& rsLib, vector<const char*>* args) {
args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
@@ -202,10 +221,13 @@ void setupCompileArguments(
for (const string& input : inputs) {
args->push_back(input.c_str());
}
- for (int kernel : kernels) {
- args->push_back("-k");
- string strKernel = std::to_string(kernel);
- args->push_back(strKernel.c_str());
+ for (const string& batch : kernelBatches) {
+ args->push_back("-merge");
+ args->push_back(batch.c_str());
+ }
+ for (const string& batch : invokeBatches) {
+ args->push_back("-invoke");
+ args->push_back(batch.c_str());
}
args->push_back("-output_path");
args->push_back(output_dir.c_str());
@@ -247,13 +269,32 @@ bool fuseAndCompile(const char** arguments,
return true;
}
-#endif
+
+void generateSourceSlot(const Closure& closure,
+ const std::vector<std::string>& inputs,
+ std::stringstream& ss) {
+ const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
+ const Script* script = funcID->mScript;
+
+ rsAssert (!script->isIntrinsic());
+
+ const RsdCpuScriptImpl *cpuScript =
+ (const RsdCpuScriptImpl*)script->mHal.drv;
+ const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
+
+ const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
+ inputs.begin();
+
+ ss << index << "," << funcID->mSlot << ".";
+}
+
+#endif // RS_COMPATIBILTY_LIB
} // anonymous namespace
-void Batch::tryToCreateFusedKernel(const char *cacheDir) {
+void CpuScriptGroup2Impl::compile(const char* cacheDir) {
#ifndef RS_COMPATIBILITY_LIB
- if (mClosures.size() < 2) {
+ if (mGroup->mClosures.size() < 2) {
return;
}
@@ -261,25 +302,43 @@ void Batch::tryToCreateFusedKernel(const char *cacheDir) {
// Fuse the input kernels and generate native code in an object file
//===--------------------------------------------------------------------===//
- std::vector<string> inputFiles;
- std::vector<int> slots;
-
- for (CPUClosure* cpuClosure : mClosures) {
- const Closure* closure = cpuClosure->mClosure;
- const ScriptKernelID* kernelID = closure->mKernelID.get();
- const Script* script = kernelID->mScript;
+ std::set<string> inputSet;
+ for (Closure* closure : mGroup->mClosures) {
+ const Script* script = closure->mFunctionID.get()->mScript;
+ // If any script is an intrinsic, give up trying fusing the kernels.
if (script->isIntrinsic()) {
return;
}
const RsdCpuScriptImpl *cpuScript =
(const RsdCpuScriptImpl*)script->mHal.drv;
-
const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
+ inputSet.insert(bitcodeFilename);
+ }
+
+ std::vector<string> inputs(inputSet.begin(), inputSet.end());
+
+ std::vector<string> kernelBatches;
+ std::vector<string> invokeBatches;
+
+ int i = 0;
+ for (const auto& batch : mBatches) {
+ rsAssert(batch->size() > 0);
- inputFiles.push_back(bitcodeFilename);
- slots.push_back(kernelID->mSlot);
+ std::stringstream ss;
+ ss << batch->mName << ":";
+
+ if (!batch->mClosures.front()->mClosure->mIsKernel) {
+ rsAssert(batch->size() == 1);
+ generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss);
+ invokeBatches.push_back(ss.str());
+ } else {
+ for (const auto& cpuClosure : batch->mClosures) {
+ generateSourceSlot(*cpuClosure->mClosure, inputs, ss);
+ }
+ kernelBatches.push_back(ss.str());
+ }
}
rsAssert(cacheDir != nullptr);
@@ -295,8 +354,8 @@ void Batch::tryToCreateFusedKernel(const char *cacheDir) {
string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2));
string rsLibPath(SYSLIBPATH"/libclcore.bc");
vector<const char*> arguments;
- setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath,
- &arguments);
+ setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
+ outputFileName, rsLibPath, &arguments);
std::unique_ptr<const char> joined(
rsuJoinStrings(arguments.size() - 1, arguments.data()));
string commandLine (joined.get());
@@ -317,15 +376,15 @@ void Batch::tryToCreateFusedKernel(const char *cacheDir) {
return;
}
- void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
- if (mSharedObj == nullptr) {
+ mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
+ if (mScriptObj == nullptr) {
ALOGE("Unable to load '%s'", resName);
return;
}
mExecutable = ScriptExecutable::createFromSharedObject(
- nullptr, // RS context. Unused.
- mSharedObj);
+ nullptr, // RS context. Unused.
+ mScriptObj);
#endif // RS_COMPATIBILITY_LIB
}
@@ -340,13 +399,8 @@ void CpuScriptGroup2Impl::execute() {
void Batch::setGlobalsForBatch() {
for (CPUClosure* cpuClosure : mClosures) {
const Closure* closure = cpuClosure->mClosure;
- const ScriptKernelID* kernelID = closure->mKernelID.get();
- Script* s;
- if (kernelID != nullptr) {
- s = kernelID->mScript;
- } else {
- s = cpuClosure->mClosure->mInvokeID->mScript;
- }
+ const IDBase* funcID = closure->mFunctionID.get();
+ Script* s = funcID->mScript;;
for (const auto& p : closure->mGlobals) {
const void* value = p.second.first;
int size = p.second.second;
@@ -360,18 +414,54 @@ void Batch::setGlobalsForBatch() {
rsAssert(p.first != nullptr);
ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)",
closure, p.first, p.first->mScript, p.first->mSlot);
- // We use -1 size to indicate an ObjectBase rather than a primitive type
- if (size < 0) {
- s->setVarObj(p.first->mSlot, (ObjectBase*)value);
+ Script* script = p.first->mScript;
+ const RsdCpuScriptImpl *cpuScript =
+ (const RsdCpuScriptImpl*)script->mHal.drv;
+ int slot = p.first->mSlot;
+ ScriptExecutable* exec = mGroup->getExecutable();
+ if (exec != nullptr) {
+ const char* varName = cpuScript->getFieldName(slot);
+ void* addr = exec->getFieldAddress(varName);
+ if (size < 0) {
+ rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
+ (rs_object_base*)addr, (ObjectBase*)value);
+ } else {
+ memcpy(addr, (const void*)&value, size);
+ }
} else {
- s->setVar(p.first->mSlot, (const void*)&value, size);
+ // We use -1 size to indicate an ObjectBase rather than a primitive type
+ if (size < 0) {
+ s->setVarObj(slot, (ObjectBase*)value);
+ } else {
+ s->setVar(slot, (const void*)&value, size);
+ }
}
}
}
}
void Batch::run() {
- if (mExecutable != nullptr) {
+ if (!mClosures.front()->mClosure->mIsKernel) {
+ rsAssert(mClosures.size() == 1);
+
+ // This batch contains a single closure for an invoke function
+ CPUClosure* cc = mClosures.front();
+ const Closure* c = cc->mClosure;
+
+ if (mFunc != nullptr) {
+ // TODO: Need align pointers for x86_64.
+ // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
+ ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
+ } else {
+ const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
+ rsAssert(invokeID != nullptr);
+ cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
+ }
+
+ return;
+ }
+
+ if (mFunc != nullptr) {
MTLaunchStruct mtls;
const CPUClosure* firstCpuClosure = mClosures.front();
const CPUClosure* lastCpuClosure = mClosures.back();
@@ -384,7 +474,7 @@ void Batch::run() {
mtls.script = nullptr;
mtls.fep.usr = nullptr;
- mtls.kernel = mExecutable->getForEachFunction(0);
+ mtls.kernel = (ForEachFunc_t)mFunc;
mGroup->getCpuRefImpl()->launchThreads(
(const Allocation**)firstCpuClosure->mClosure->mArgs,
@@ -395,25 +485,14 @@ void Batch::run() {
return;
}
- if (mClosures.size() == 1 &&
- mClosures.front()->mClosure->mKernelID.get() == nullptr) {
- // This closure is for an invoke function
- CPUClosure* cc = mClosures.front();
- const Closure* c = cc->mClosure;
- const ScriptInvokeID* invokeID = c->mInvokeID;
- rsAssert(invokeID != nullptr);
- cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
- return;
- }
-
for (CPUClosure* cpuClosure : mClosures) {
const Closure* closure = cpuClosure->mClosure;
- const ScriptKernelID* kernelID = closure->mKernelID.get();
+ const ScriptKernelID* kernelID =
+ (const ScriptKernelID*)closure->mFunctionID.get();
cpuClosure->mSi->preLaunch(kernelID->mSlot,
(const Allocation**)closure->mArgs,
closure->mNumArg, closure->mReturnValue,
- cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
- nullptr);
+ nullptr, 0, nullptr);
}
const CPUClosure* cpuClosure = mClosures.front();
@@ -434,7 +513,8 @@ void Batch::run() {
for (CPUClosure* cpuClosure : mClosures) {
const Closure* closure = cpuClosure->mClosure;
- const ScriptKernelID* kernelID = closure->mKernelID.get();
+ const ScriptKernelID* kernelID =
+ (const ScriptKernelID*)closure->mFunctionID.get();
cpuClosure->mSi->postLaunch(kernelID->mSlot,
(const Allocation**)closure->mArgs,
closure->mNumArg, closure->mReturnValue,
diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h
index 9ff16c47..f8d36fd3 100644
--- a/cpu_ref/rsCpuScriptGroup2.h
+++ b/cpu_ref/rsCpuScriptGroup2.h
@@ -21,44 +21,40 @@ typedef void (*InvokeFuncTy)(const void*, uint32_t);
class CPUClosure {
public:
- CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func,
- const void* usrPtr, const size_t usrSize) :
- mClosure(closure), mSi(si), mFunc(func),
- mUsrPtr(usrPtr), mUsrSize(usrSize) {}
+ CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func) :
+ mClosure(closure), mSi(si), mFunc(func) {}
CPUClosure(const Closure* closure, RsdCpuScriptImpl* si) :
- mClosure(closure), mSi(si), mFunc(nullptr),
- mUsrPtr(nullptr), mUsrSize(0) {}
+ mClosure(closure), mSi(si), mFunc(nullptr) {}
// It's important to do forwarding here than inheritance for unbound value
// binding to work.
const Closure* mClosure;
RsdCpuScriptImpl* mSi;
const ExpandFuncTy mFunc;
- const void* mUsrPtr;
- const size_t mUsrSize;
};
class CpuScriptGroup2Impl;
class Batch {
public:
- Batch(CpuScriptGroup2Impl* group) : mGroup(group), mExecutable(nullptr) {}
-
+ Batch(CpuScriptGroup2Impl* group, const char* name);
~Batch();
// Returns true if closure depends on any closure in this batch for a global
// variable
bool conflict(CPUClosure* closure) const;
- void tryToCreateFusedKernel(const char* cacheDir);
+ void resolveFuncPtr(void* sharedObj);
void setGlobalsForBatch();
void run();
+ size_t size() const { return mClosures.size(); }
+
CpuScriptGroup2Impl* mGroup;
- ScriptExecutable* mExecutable;
- void* mScriptObj;
List<CPUClosure*> mClosures;
+ char* mName;
+ void* mFunc;
};
class CpuScriptGroup2Impl : public RsdCpuReference::CpuScriptGroup2 {
@@ -70,11 +66,16 @@ public:
virtual void execute();
RsdCpuReferenceImpl* getCpuRefImpl() const { return mCpuRefImpl; }
+ ScriptExecutable* getExecutable() const { return mExecutable; }
+
+ void compile(const char* cacheDir);
private:
RsdCpuReferenceImpl* mCpuRefImpl;
const ScriptGroup2* mGroup;
List<Batch*> mBatches;
+ ScriptExecutable* mExecutable;
+ void* mScriptObj;
};
} // namespace renderscript