diff options
author | Jean-Luc Brouillet <jeanluc@google.com> | 2017-02-21 23:03:29 -0800 |
---|---|---|
committer | Jean-Luc Brouillet <jeanluc@google.com> | 2017-02-24 22:48:52 +0000 |
commit | 5379131149cfd74e2b65b7b4f31c0b8e8b2f8fa0 (patch) | |
tree | 2b0dee1302c6162b532edaa0c62e818b0b3d8c23 | |
parent | cfb3604f234ce55c4bc2575cfdcf3f8c041fb869 (diff) | |
download | libbcc-5379131149cfd74e2b65b7b4f31c0b8e8b2f8fa0.tar.gz |
Don't rely on CPU name on configuring libbcc compilation flags.
Low-end Sandybridge, Ivybridge, Haswell, and Broadwell CPUs lack some features
indicated by the architecture names. Hence, setCPU(getHostCPUName()) can lead
to illegal instructions. Instead, this CL sets up the feature flags from
the actual info obtained from cpuid.
Manual cherry pick of cl/1911608, I2156abd5edcebe74dec768de05c63b5b056b3818
Test: For the cherry pick, ran on Fugu the perf tests:
Test: adb shell setprop debug.rs.default-CPU-driver 1
Test: timing -c 13 15 33 5
Test: Verified that performance did not change
Test: Ran CTS, succeeded all but small_struct
Test: Rand on x86-64 emulator, no performance change
BUG: 33062150
Change-Id: I9f921c6057a47930c36f77a176b76613ad7f8267
-rw-r--r-- | lib/CompilerConfig.cpp | 59 |
1 files changed, 14 insertions, 45 deletions
diff --git a/lib/CompilerConfig.cpp b/lib/CompilerConfig.cpp index 6f6e78c..3ba8498 100644 --- a/lib/CompilerConfig.cpp +++ b/lib/CompilerConfig.cpp @@ -32,17 +32,18 @@ using namespace bcc; namespace { -// Utility function to test for f16c feature. This function is only needed for -// on-device bcc for x86 -bool HasF16C() { +// Utility function to add feature flags supported by the running CPU. +// This function is only needed for on-device bcc for x86. +void AddX86NativeCPUFeatures(std::vector<std::string>* attributes) { llvm::StringMap<bool> features; - if (!llvm::sys::getHostCPUFeatures(features)) - return false; + if (llvm::sys::getHostCPUFeatures(features)) { + for (const auto& f : features) + attributes->push_back((f.second ? '+' : '-') + f.first().str()); + } - if (features.count("f16c") && features["f16c"]) - return true; - else - return false; + // LLVM generates AVX code that treats a long3 as 256 bits, while + // RenderScript considers a long3 192 bits (http://b/28879581) + attributes->push_back("-avx"); } } @@ -194,34 +195,13 @@ bool CompilerConfig::initializeArch() { #if defined (PROVIDE_X86_CODEGEN) case llvm::Triple::x86: getTargetOptions().UseInitArray = true; -#if defined (DEFAULT_X86_CODEGEN) && !defined (DEFAULT_X86_64_CODEGEN) +#if defined (DEFAULT_X86_CODEGEN) && !defined (__HOST__) setCPU(llvm::sys::getHostCPUName()); + AddX86NativeCPUFeatures(&attributes); #else // generic fallback for 32bit x86 targets setCPU("atom"); -#endif // DEFAULT_X86_CODEGEN && !DEFAULT_X86_64_CODEGEN - -#ifndef __HOST__ - // If not running on the host, and f16c is available, set it in the feature - // string - if (HasF16C()) - attributes.push_back("+f16c"); -#if defined(__SSE3__) - attributes.push_back("+sse3"); - attributes.push_back("+ssse3"); -#endif -#if defined(__SSE4_1__) - attributes.push_back("+sse4.1"); -#endif -#if defined(__SSE4_2__) - attributes.push_back("+sse4.2"); -#endif -#endif // __HOST__ - - // LLVM generates AVX code that treats a long3 as 256 bits, while - // RenderScript considers a long3 192 bits (http://b/28879581) - attributes.push_back("-avx"); - +#endif // DEFAULT_X86_CODEGEN && !__HOST__ break; #endif // PROVIDE_X86_CODEGEN @@ -230,6 +210,7 @@ bool CompilerConfig::initializeArch() { case llvm::Triple::x86_64: #if defined(DEFAULT_X86_64_CODEGEN) && !defined(__HOST__) setCPU(llvm::sys::getHostCPUName()); + AddX86NativeCPUFeatures(&attributes); #else // generic fallback for 64bit x86 targets setCPU("core2"); @@ -241,18 +222,6 @@ bool CompilerConfig::initializeArch() { setCodeModel(llvm::CodeModel::Medium); } getTargetOptions().UseInitArray = true; - -#ifndef __HOST__ - // If not running on the host, and f16c is available, set it in the feature - // string - if (HasF16C()) - attributes.push_back("+f16c"); -#endif // __HOST__ - - // LLVM generates AVX code that treats a long3 as 256 bits, while - // RenderScript considers a long3 192 bits (http://b/28879581) - attributes.push_back("-avx"); - break; #endif // PROVIDE_X86_CODEGEN |