diff options
author | Yang Ni <yangni@google.com> | 2015-03-11 09:07:15 -0700 |
---|---|---|
committer | Yang Ni <yangni@google.com> | 2015-03-11 15:55:08 -0700 |
commit | edf4ea312cc3f7dd4373f8db5aaf9325ff054c8e (patch) | |
tree | f7059daf55b04e1ef3122bc7cf117c75fe48ff53 /cpu_ref/rsCpuScriptGroup2.cpp | |
parent | d38f6e8c15dbb1f5a507955ab3fd0a1e15b35fca (diff) | |
download | rs-edf4ea312cc3f7dd4373f8db5aaf9325ff054c8e.tar.gz |
Handle FP precision in kernel fusion
b/19098612
Find the proper core lib for full-precision and relaxed-precions
FP. Path both to bcc while compiling a script group, and let bcc
pick the right one based on the precision of the merged module.
Change-Id: I2a641387f0990463887594729a935a5c3f0f856f
Diffstat (limited to 'cpu_ref/rsCpuScriptGroup2.cpp')
-rw-r--r-- | cpu_ref/rsCpuScriptGroup2.cpp | 39 |
1 files changed, 35 insertions, 4 deletions
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp index 6bc98b41..27a27dd0 100644 --- a/cpu_ref/rsCpuScriptGroup2.cpp +++ b/cpu_ref/rsCpuScriptGroup2.cpp @@ -201,6 +201,32 @@ namespace { #ifndef RS_COMPATIBILITY_LIB +string getCoreLibPath(Context* context, string* coreLibRelaxedPath) { + *coreLibRelaxedPath = ""; + + // If we're debugging, use the debug library. + if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { + return SYSLIBPATH"/libclcore_debug.bc"; + } + + // Check for a platform specific library + +#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) + // NEON-capable ARMv7a devices can use an accelerated math library + // for all reduced precision scripts. + // ARMv8 does not use NEON, as ASIMD can be used with all precision + // levels. + *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc"; +#endif + +#if defined(__i386__) || defined(__x86_64__) + // x86 devices will use an optimized library. + return SYSLIBPATH"/libclcore_x86.bc"; +#else + return SYSLIBPATH"/libclcore.bc"; +#endif +} + string getFileName(string path) { unsigned found = path.find_last_of("/\\"); return path.substr(found + 1); @@ -210,14 +236,17 @@ void setupCompileArguments( const vector<string>& inputs, const vector<string>& kernelBatches, const vector<string>& invokeBatches, const string& output_dir, const string& output_filename, - const string& rsLib, vector<const char*>* args) { + const string& coreLibPath, const string& coreLibRelaxedPath, + vector<const char*>* args) { args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); args->push_back("-fPIC"); args->push_back("-embedRSInfo"); args->push_back("-mtriple"); args->push_back(DEFAULT_TARGET_TRIPLE_STRING); args->push_back("-bclib"); - args->push_back(rsLib.c_str()); + args->push_back(coreLibPath.c_str()); + args->push_back("-bclib_relaxed"); + args->push_back(coreLibRelaxedPath.c_str()); for (const string& input : inputs) { args->push_back(input.c_str()); } @@ -352,10 +381,12 @@ void CpuScriptGroup2Impl::compile(const char* cacheDir) { TEMP_FAILURE_RETRY(close(tempfd)); string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2)); - string rsLibPath(SYSLIBPATH"/libclcore.bc"); + string coreLibRelaxedPath; + const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), + &coreLibRelaxedPath); vector<const char*> arguments; setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, - outputFileName, rsLibPath, &arguments); + outputFileName, coreLibPath, coreLibRelaxedPath, &arguments); std::unique_ptr<const char> joined( rsuJoinStrings(arguments.size() - 1, arguments.data())); string commandLine (joined.get()); |