summaryrefslogtreecommitdiff
path: root/cpu_ref
diff options
context:
space:
mode:
authorYang Ni <yangni@google.com>2015-01-12 13:03:40 -0800
committerYang Ni <yangni@google.com>2015-02-02 17:37:03 -0800
commitda0f069871343119251d6b0586be356dc2146a62 (patch)
tree3b3727ac5ff30a598b33a926e2536cc7c6190b05 /cpu_ref
parentf5e39bba1d0e7be4adfa7364d92ba3f541420fdd (diff)
downloadrs-da0f069871343119251d6b0586be356dc2146a62.tar.gz
Runtime support for compiler kernel fusion.
The runtime will start a seperate process to call the new bcc to fuse kernels. Change-Id: Ia73ea917a126a5055ec97f13d90a5feaafd6a2f5
Diffstat (limited to 'cpu_ref')
-rw-r--r--cpu_ref/rsCpuScript.cpp19
-rw-r--r--cpu_ref/rsCpuScript.h15
-rw-r--r--cpu_ref/rsCpuScriptGroup2.cpp274
-rw-r--r--cpu_ref/rsCpuScriptGroup2.h31
4 files changed, 293 insertions, 46 deletions
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index f975094f..01fbcafd 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -161,15 +161,13 @@ static bool is_force_recompile() {
#endif // RS_SERVER
}
-const static char *BCC_EXE_PATH = "/system/bin/bcc";
-
static void setCompileArguments(std::vector<const char*>* args,
const std::string& bcFileName,
const char* cacheDir, const char* resName,
const char* core_lib, bool useRSDebugContext,
const char* bccPluginName) {
rsAssert(cacheDir && resName && core_lib);
- args->push_back(BCC_EXE_PATH);
+ args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH);
args->push_back("-unroll-runtime");
args->push_back("-scalarize-load-store");
args->push_back("-o");
@@ -234,7 +232,8 @@ static bool compileBitcode(const std::string &bcFileName,
}
case 0: { // Child process
ALOGV("Invoking BCC with: %s", compileCommandLine.c_str());
- execv(BCC_EXE_PATH, (char* const*)compileArguments);
+ execv(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH,
+ (char* const*)compileArguments);
ALOGE("execv() failed: %s", strerror(errno));
abort();
@@ -428,6 +427,8 @@ void* SharedLibraryUtils::loadSOHelper(const char *origName, const char *cacheDi
return loaded;
}
+const char* RsdCpuScriptImpl::BCC_EXE_PATH = "/system/bin/bcc";
+
#define MAXLINE 500
#define MAKE_STR_HELPER(S) #S
#define MAKE_STR(S) MAKE_STR_HELPER(S)
@@ -764,12 +765,8 @@ error:
#ifndef RS_COMPATIBILITY_LIB
for (size_t idx = 0; idx < pragmaCount; ++idx) {
- if (pragmaKeys[idx] != nullptr) {
- delete [] pragmaKeys[idx];
- }
- if (pragmaValues[idx] != nullptr) {
- delete [] pragmaValues[idx];
- }
+ delete [] pragmaKeys[idx];
+ delete [] pragmaValues[idx];
}
delete[] pragmaValues;
@@ -867,6 +864,8 @@ bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
}
}
+ mBitcodeFilePath = bcFileName;
+
// Read RS symbol information from the .so.
if ( !mScriptSO) {
goto error;
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index e4ae4d36..39bf53e8 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -237,6 +237,13 @@ protected:
Allocation **mBoundAllocs;
void * mIntrinsicData;
bool mIsThreadable;
+
+ public:
+ static const char* BCC_EXE_PATH;
+ const std::string& getBitcodeFilePath() const { return mBitcodeFilePath; }
+
+ private:
+ std::string mBitcodeFilePath;
};
Allocation * rsdScriptGetAllocationForPointer(
@@ -244,10 +251,14 @@ Allocation * rsdScriptGetAllocationForPointer(
const Script *script,
const void *);
-
-
}
+#ifdef __LP64__
+#define SYSLIBPATH "/system/lib64"
+#else
+#define SYSLIBPATH "/system/lib"
+#endif
+
}
#endif
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
index 52cd8a02..90907d05 100644
--- a/cpu_ref/rsCpuScriptGroup2.cpp
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -1,5 +1,15 @@
#include "rsCpuScriptGroup2.h"
+#include <dlfcn.h>
+
+#include <string>
+#include <vector>
+
+#ifndef RS_COMPATIBILITY_LIB
+#include "bcc/Config/Config.h"
+#include <sys/wait.h>
+#endif
+
#include "cpu_ref/rsCpuCore.h"
#include "rsClosure.h"
#include "rsContext.h"
@@ -7,13 +17,17 @@
#include "rsCpuScript.h"
#include "rsScript.h"
#include "rsScriptGroup2.h"
+#include "rsScriptIntrinsic.h"
+
+using std::string;
+using std::vector;
namespace android {
namespace renderscript {
namespace {
-static const size_t DefaultKernelArgCount = 2;
+const size_t DefaultKernelArgCount = 2;
void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
uint32_t xend, uint32_t outstep) {
@@ -66,25 +80,45 @@ void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
mutable_kparams->usr = &closures;
}
-/*
- Returns true if closure depends on any closure in batch via a glboal variable
- TODO: this probably should go into class Closure.
- */
-bool conflict(const list<CPUClosure*> &batch, CPUClosure* closure) {
+} // namespace
+
+Batch::~Batch() {
+ for (CPUClosure* c : mClosures) {
+ delete c;
+ }
+ if (mScriptObj) {
+ dlclose(mScriptObj);
+ }
+}
+
+bool Batch::conflict(CPUClosure* closure) const {
+ if (mClosures.empty()) {
+ return false;
+ }
+
+ if (closure->mClosure->mKernelID.get() == nullptr ||
+ mClosures.front()->mClosure->mKernelID.get() == nullptr) {
+ // An invoke should be in a batch by itself, so it conflicts with any other
+ // closure.
+ return true;
+ }
+
for (const auto &p : closure->mClosure->mGlobalDeps) {
const Closure* dep = p.first;
- for (CPUClosure* c : batch) {
+ for (CPUClosure* c : mClosures) {
if (c->mClosure == dep) {
+ ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global", closure, dep);
return true;
}
}
}
for (const auto &p : closure->mClosure->mArgDeps) {
const Closure* dep = p.first;
- for (CPUClosure* c : batch) {
+ for (CPUClosure* c : mClosures) {
if (c->mClosure == dep) {
for (const auto &p1 : *p.second) {
- if (p1.second != nullptr) {
+ if (p1.second->get() != nullptr) {
+ ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg", closure, dep);
return true;
}
}
@@ -94,12 +128,10 @@ bool conflict(const list<CPUClosure*> &batch, CPUClosure* closure) {
return false;
}
-} // namespace
-
CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
const ScriptGroupBase *sg) :
mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
- list<CPUClosure*>* batch = new list<CPUClosure*>();
+ Batch* batch = new Batch(this);
for (Closure* closure: mGroup->mClosures) {
const ScriptKernelID* kernelID = closure->mKernelID.get();
RsdCpuScriptImpl* si =
@@ -110,32 +142,192 @@ CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
// TODO: Is mtls.fep.usrLen ever used?
CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
mtls.fep.usr, mtls.fep.usrLen);
- if (conflict(*batch, cc)) {
+ if (batch->conflict(cc)) {
mBatches.push_back(batch);
- batch = new list<CPUClosure*>();
+ batch = new Batch(this);
}
- batch->push_back(cc);
+
+ batch->mClosures.push_back(cc);
}
+
mBatches.push_back(batch);
+
+#ifndef RS_COMPATIBILITY_LIB
+ for (Batch* batch : mBatches) {
+ batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str());
+ }
+#endif
}
CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
- for (list<CPUClosure*>* batch : mBatches) {
- for (CPUClosure* c : *batch) {
- delete c;
+ for (Batch* batch : mBatches) {
+ delete batch;
+ }
+}
+
+namespace {
+
+#ifndef RS_COMPATIBILITY_LIB
+
+string getFileName(string path) {
+ unsigned found = path.find_last_of("/\\");
+ return path.substr(found + 1);
+}
+
+void setupCompileArguments(
+ const vector<string>& inputs, const vector<int>& kernels,
+ const string& output_dir, const string& output_filename,
+ const string& rsLib, vector<const char*>* args) {
+ args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
+ args->push_back("-fPIC");
+ args->push_back("-embedRSInfo");
+ args->push_back("-mtriple");
+ args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
+ args->push_back("-bclib");
+ args->push_back(rsLib.c_str());
+ for (const string& input : inputs) {
+ args->push_back(input.c_str());
+ }
+ for (int kernel : kernels) {
+ args->push_back("-k");
+ string strKernel = std::to_string(kernel);
+ args->push_back(strKernel.c_str());
+ }
+ args->push_back("-output_path");
+ args->push_back(output_dir.c_str());
+ args->push_back("-o");
+ args->push_back(output_filename.c_str());
+ args->push_back(nullptr);
+}
+
+string convertListToString(int n, const char* const* strs) {
+ string ret;
+ ret.append(strs[0]);
+ for (int i = 1; i < n; i++) {
+ ret.append(" ");
+ ret.append(strs[i]);
+ }
+ return ret;
+}
+
+bool fuseAndCompile(const char** arguments,
+ const string& commandLine) {
+ const pid_t pid = fork();
+
+ if (pid == -1) {
+ ALOGE("Couldn't fork for bcc execution");
+ return false;
+ }
+
+ if (pid == 0) {
+ // Child process
+ ALOGV("Invoking BCC with: %s", commandLine.c_str());
+ execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
+
+ ALOGE("execv() failed: %s", strerror(errno));
+ abort();
+ return false;
+ }
+
+ // Parent process
+ int status = 0;
+ const pid_t w = waitpid(pid, &status, 0);
+ if (w == -1) {
+ return false;
+ }
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
+ ALOGE("bcc terminated unexpectedly");
+ return false;
+ }
+
+ return true;
+}
+#endif
+
+} // anonymous namespace
+
+void Batch::tryToCreateFusedKernel(const char *cacheDir) {
+#ifndef RS_COMPATIBILITY_LIB
+ if (mClosures.size() < 2) {
+ ALOGV("Compiler kernel fusion skipped due to only one or zero kernel in"
+ " a script group batch.");
+ return;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Fuse the input kernels and generate native code in an object file
+ //===--------------------------------------------------------------------===//
+
+ std::vector<string> inputFiles;
+ std::vector<int> slots;
+
+ for (CPUClosure* cpuClosure : mClosures) {
+ const Closure* closure = cpuClosure->mClosure;
+ const ScriptKernelID* kernelID = closure->mKernelID.get();
+ const Script* script = kernelID->mScript;
+
+ if (script->isIntrinsic()) {
+ return;
}
+
+ const RsdCpuScriptImpl *cpuScript =
+ (const RsdCpuScriptImpl*)script->mHal.drv;
+
+ const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
+
+ inputFiles.push_back(bitcodeFilename);
+ slots.push_back(kernelID->mSlot);
+ }
+
+ string outputPath(tempnam(cacheDir, "fused"));
+ string outputFileName = getFileName(outputPath);
+ string objFilePath(outputPath);
+ objFilePath.append(".o");
+ string rsLibPath(SYSLIBPATH"/libclcore.bc");
+ vector<const char*> arguments;
+ setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath,
+ &arguments);
+ string commandLine =
+ convertListToString(arguments.size() - 1, arguments.data());
+
+ if (!fuseAndCompile(arguments.data(), commandLine)) {
+ return;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Create and load the shared lib
+ //===--------------------------------------------------------------------===//
+
+ const char* resName = outputFileName.c_str();
+
+ if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
+ ALOGE("Failed to link object file '%s'", resName);
+ return;
+ }
+
+ void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
+ if (mSharedObj == nullptr) {
+ ALOGE("Unable to load '%s'", resName);
+ return;
}
+
+ mExecutable = ScriptExecutable::createFromSharedObject(
+ nullptr, // RS context. Unused.
+ mSharedObj);
+
+#endif // RS_COMPATIBILITY_LIB
}
void CpuScriptGroup2Impl::execute() {
- for (list<CPUClosure*>* batch : mBatches) {
- setGlobalsForBatch(*batch);
- runBatch(*batch);
+ for (auto batch : mBatches) {
+ batch->setGlobalsForBatch();
+ batch->run();
}
}
-void CpuScriptGroup2Impl::setGlobalsForBatch(const list<CPUClosure*>& batch) {
- for (CPUClosure* cpuClosure : batch) {
+void Batch::setGlobalsForBatch() {
+ for (CPUClosure* cpuClosure : mClosures) {
const Closure* closure = cpuClosure->mClosure;
const ScriptKernelID* kernelID = closure->mKernelID.get();
Script* s = kernelID->mScript;
@@ -152,8 +344,32 @@ void CpuScriptGroup2Impl::setGlobalsForBatch(const list<CPUClosure*>& batch) {
}
}
-void CpuScriptGroup2Impl::runBatch(const list<CPUClosure*>& batch) {
- for (CPUClosure* cpuClosure : batch) {
+void Batch::run() {
+ if (mExecutable != nullptr) {
+ MTLaunchStruct mtls;
+ const CPUClosure* firstCpuClosure = mClosures.front();
+ const CPUClosure* lastCpuClosure = mClosures.back();
+
+ firstCpuClosure->mSi->forEachMtlsSetup(
+ (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
+ firstCpuClosure->mClosure->mArgs.size(),
+ lastCpuClosure->mClosure->mReturnValue,
+ nullptr, 0, nullptr, &mtls);
+
+ mtls.script = nullptr;
+ mtls.fep.usr = nullptr;
+ mtls.kernel = mExecutable->getForEachFunction(0);
+
+ mGroup->getCpuRefImpl()->launchThreads(
+ (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
+ firstCpuClosure->mClosure->mArgs.size(),
+ lastCpuClosure->mClosure->mReturnValue,
+ nullptr, &mtls);
+
+ return;
+ }
+
+ for (CPUClosure* cpuClosure : mClosures) {
const Closure* closure = cpuClosure->mClosure;
const ScriptKernelID* kernelID = closure->mKernelID.get();
cpuClosure->mSi->preLaunch(kernelID->mSlot,
@@ -163,7 +379,7 @@ void CpuScriptGroup2Impl::runBatch(const list<CPUClosure*>& batch) {
nullptr);
}
- const CPUClosure* cpuClosure = batch.front();
+ const CPUClosure* cpuClosure = mClosures.front();
const Closure* closure = cpuClosure->mClosure;
MTLaunchStruct mtls;
@@ -174,12 +390,12 @@ void CpuScriptGroup2Impl::runBatch(const list<CPUClosure*>& batch) {
mtls.script = nullptr;
mtls.kernel = (void (*)())&groupRoot;
- mtls.fep.usr = &batch;
+ mtls.fep.usr = &mClosures;
- mCpuRefImpl->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
+ mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
}
- for (CPUClosure* cpuClosure : batch) {
+ for (CPUClosure* cpuClosure : mClosures) {
const Closure* closure = cpuClosure->mClosure;
const ScriptKernelID* kernelID = closure->mKernelID.get();
cpuClosure->mSi->postLaunch(kernelID->mSlot,
diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h
index 6cb72a65..53a0fe59 100644
--- a/cpu_ref/rsCpuScriptGroup2.h
+++ b/cpu_ref/rsCpuScriptGroup2.h
@@ -13,6 +13,7 @@ namespace renderscript {
class Closure;
class RsdCpuScriptImpl;
class RsdCpuReferenceImpl;
+class ScriptExecutable;
class ScriptGroup2;
struct RsExpandKernelParams;
@@ -36,6 +37,28 @@ class CPUClosure {
const size_t mUsrSize;
};
+class CpuScriptGroup2Impl;
+
+class Batch {
+ public:
+ Batch(CpuScriptGroup2Impl* group) : mGroup(group), mExecutable(nullptr) {}
+
+ ~Batch();
+
+ // Returns true if closure depends on any closure in this batch for a global
+ // variable
+ bool conflict(CPUClosure* closure) const;
+
+ void tryToCreateFusedKernel(const char* cacheDir);
+ void setGlobalsForBatch();
+ void run();
+
+ CpuScriptGroup2Impl* mGroup;
+ ScriptExecutable* mExecutable;
+ void* mScriptObj;
+ list<CPUClosure*> mClosures;
+};
+
class CpuScriptGroup2Impl : public RsdCpuReference::CpuScriptGroup2 {
public:
CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase* group);
@@ -44,14 +67,12 @@ class CpuScriptGroup2Impl : public RsdCpuReference::CpuScriptGroup2 {
bool init();
virtual void execute();
- private:
- void setGlobalsForBatch(const list<CPUClosure*>& batch);
- void runBatch(const list<CPUClosure*>& batch);
+ RsdCpuReferenceImpl* getCpuRefImpl() const { return mCpuRefImpl; }
+ private:
RsdCpuReferenceImpl* mCpuRefImpl;
const ScriptGroup2* mGroup;
-
- list<list<CPUClosure*>*> mBatches;
+ list<Batch*> mBatches;
};
} // namespace renderscript