summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuScriptGroup2.cpp
diff options
context:
space:
mode:
authorYang Ni <yangni@google.com>2015-03-11 09:07:15 -0700
committerYang Ni <yangni@google.com>2015-03-11 15:55:08 -0700
commitedf4ea312cc3f7dd4373f8db5aaf9325ff054c8e (patch)
treef7059daf55b04e1ef3122bc7cf117c75fe48ff53 /cpu_ref/rsCpuScriptGroup2.cpp
parentd38f6e8c15dbb1f5a507955ab3fd0a1e15b35fca (diff)
downloadrs-edf4ea312cc3f7dd4373f8db5aaf9325ff054c8e.tar.gz
Handle FP precision in kernel fusion
b/19098612 Find the proper core lib for full-precision and relaxed-precions FP. Path both to bcc while compiling a script group, and let bcc pick the right one based on the precision of the merged module. Change-Id: I2a641387f0990463887594729a935a5c3f0f856f
Diffstat (limited to 'cpu_ref/rsCpuScriptGroup2.cpp')
-rw-r--r--cpu_ref/rsCpuScriptGroup2.cpp39
1 files changed, 35 insertions, 4 deletions
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
index 6bc98b41..27a27dd0 100644
--- a/cpu_ref/rsCpuScriptGroup2.cpp
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -201,6 +201,32 @@ namespace {
#ifndef RS_COMPATIBILITY_LIB
+string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
+ *coreLibRelaxedPath = "";
+
+ // If we're debugging, use the debug library.
+ if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
+ return SYSLIBPATH"/libclcore_debug.bc";
+ }
+
+ // Check for a platform specific library
+
+#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
+ // NEON-capable ARMv7a devices can use an accelerated math library
+ // for all reduced precision scripts.
+ // ARMv8 does not use NEON, as ASIMD can be used with all precision
+ // levels.
+ *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
+#endif
+
+#if defined(__i386__) || defined(__x86_64__)
+ // x86 devices will use an optimized library.
+ return SYSLIBPATH"/libclcore_x86.bc";
+#else
+ return SYSLIBPATH"/libclcore.bc";
+#endif
+}
+
string getFileName(string path) {
unsigned found = path.find_last_of("/\\");
return path.substr(found + 1);
@@ -210,14 +236,17 @@ void setupCompileArguments(
const vector<string>& inputs, const vector<string>& kernelBatches,
const vector<string>& invokeBatches,
const string& output_dir, const string& output_filename,
- const string& rsLib, vector<const char*>* args) {
+ const string& coreLibPath, const string& coreLibRelaxedPath,
+ vector<const char*>* args) {
args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
args->push_back("-fPIC");
args->push_back("-embedRSInfo");
args->push_back("-mtriple");
args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
args->push_back("-bclib");
- args->push_back(rsLib.c_str());
+ args->push_back(coreLibPath.c_str());
+ args->push_back("-bclib_relaxed");
+ args->push_back(coreLibRelaxedPath.c_str());
for (const string& input : inputs) {
args->push_back(input.c_str());
}
@@ -352,10 +381,12 @@ void CpuScriptGroup2Impl::compile(const char* cacheDir) {
TEMP_FAILURE_RETRY(close(tempfd));
string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2));
- string rsLibPath(SYSLIBPATH"/libclcore.bc");
+ string coreLibRelaxedPath;
+ const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
+ &coreLibRelaxedPath);
vector<const char*> arguments;
setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
- outputFileName, rsLibPath, &arguments);
+ outputFileName, coreLibPath, coreLibRelaxedPath, &arguments);
std::unique_ptr<const char> joined(
rsuJoinStrings(arguments.size() - 1, arguments.data()));
string commandLine (joined.get());