aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Luc Brouillet <jeanluc@google.com>2017-02-21 23:03:29 -0800
committerJean-Luc Brouillet <jeanluc@google.com>2017-02-24 22:48:52 +0000
commit5379131149cfd74e2b65b7b4f31c0b8e8b2f8fa0 (patch)
tree2b0dee1302c6162b532edaa0c62e818b0b3d8c23
parentcfb3604f234ce55c4bc2575cfdcf3f8c041fb869 (diff)
downloadlibbcc-5379131149cfd74e2b65b7b4f31c0b8e8b2f8fa0.tar.gz
Don't rely on CPU name on configuring libbcc compilation flags.
Low-end Sandybridge, Ivybridge, Haswell, and Broadwell CPUs lack some features indicated by the architecture names. Hence, setCPU(getHostCPUName()) can lead to illegal instructions. Instead, this CL sets up the feature flags from the actual info obtained from cpuid. Manual cherry pick of cl/1911608, I2156abd5edcebe74dec768de05c63b5b056b3818 Test: For the cherry pick, ran on Fugu the perf tests: Test: adb shell setprop debug.rs.default-CPU-driver 1 Test: timing -c 13 15 33 5 Test: Verified that performance did not change Test: Ran CTS, succeeded all but small_struct Test: Rand on x86-64 emulator, no performance change BUG: 33062150 Change-Id: I9f921c6057a47930c36f77a176b76613ad7f8267
-rw-r--r--lib/CompilerConfig.cpp59
1 files changed, 14 insertions, 45 deletions
diff --git a/lib/CompilerConfig.cpp b/lib/CompilerConfig.cpp
index 6f6e78c..3ba8498 100644
--- a/lib/CompilerConfig.cpp
+++ b/lib/CompilerConfig.cpp
@@ -32,17 +32,18 @@ using namespace bcc;
namespace {
-// Utility function to test for f16c feature. This function is only needed for
-// on-device bcc for x86
-bool HasF16C() {
+// Utility function to add feature flags supported by the running CPU.
+// This function is only needed for on-device bcc for x86.
+void AddX86NativeCPUFeatures(std::vector<std::string>* attributes) {
llvm::StringMap<bool> features;
- if (!llvm::sys::getHostCPUFeatures(features))
- return false;
+ if (llvm::sys::getHostCPUFeatures(features)) {
+ for (const auto& f : features)
+ attributes->push_back((f.second ? '+' : '-') + f.first().str());
+ }
- if (features.count("f16c") && features["f16c"])
- return true;
- else
- return false;
+ // LLVM generates AVX code that treats a long3 as 256 bits, while
+ // RenderScript considers a long3 192 bits (http://b/28879581)
+ attributes->push_back("-avx");
}
}
@@ -194,34 +195,13 @@ bool CompilerConfig::initializeArch() {
#if defined (PROVIDE_X86_CODEGEN)
case llvm::Triple::x86:
getTargetOptions().UseInitArray = true;
-#if defined (DEFAULT_X86_CODEGEN) && !defined (DEFAULT_X86_64_CODEGEN)
+#if defined (DEFAULT_X86_CODEGEN) && !defined (__HOST__)
setCPU(llvm::sys::getHostCPUName());
+ AddX86NativeCPUFeatures(&attributes);
#else
// generic fallback for 32bit x86 targets
setCPU("atom");
-#endif // DEFAULT_X86_CODEGEN && !DEFAULT_X86_64_CODEGEN
-
-#ifndef __HOST__
- // If not running on the host, and f16c is available, set it in the feature
- // string
- if (HasF16C())
- attributes.push_back("+f16c");
-#if defined(__SSE3__)
- attributes.push_back("+sse3");
- attributes.push_back("+ssse3");
-#endif
-#if defined(__SSE4_1__)
- attributes.push_back("+sse4.1");
-#endif
-#if defined(__SSE4_2__)
- attributes.push_back("+sse4.2");
-#endif
-#endif // __HOST__
-
- // LLVM generates AVX code that treats a long3 as 256 bits, while
- // RenderScript considers a long3 192 bits (http://b/28879581)
- attributes.push_back("-avx");
-
+#endif // DEFAULT_X86_CODEGEN && !__HOST__
break;
#endif // PROVIDE_X86_CODEGEN
@@ -230,6 +210,7 @@ bool CompilerConfig::initializeArch() {
case llvm::Triple::x86_64:
#if defined(DEFAULT_X86_64_CODEGEN) && !defined(__HOST__)
setCPU(llvm::sys::getHostCPUName());
+ AddX86NativeCPUFeatures(&attributes);
#else
// generic fallback for 64bit x86 targets
setCPU("core2");
@@ -241,18 +222,6 @@ bool CompilerConfig::initializeArch() {
setCodeModel(llvm::CodeModel::Medium);
}
getTargetOptions().UseInitArray = true;
-
-#ifndef __HOST__
- // If not running on the host, and f16c is available, set it in the feature
- // string
- if (HasF16C())
- attributes.push_back("+f16c");
-#endif // __HOST__
-
- // LLVM generates AVX code that treats a long3 as 256 bits, while
- // RenderScript considers a long3 192 bits (http://b/28879581)
- attributes.push_back("-avx");
-
break;
#endif // PROVIDE_X86_CODEGEN