aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAshkan Aliabadi <ashkanaliabadi@fb.com>2020-03-19 19:34:26 -0700
committerAshkan Aliabadi <ashkanaliabadi@fb.com>2020-03-19 19:34:26 -0700
commitdcf8e1896b8b0df6ad9a02e2a8765b6630557e0c (patch)
tree8ed9c44333fc3994ae11ea3426e68784417338a0 /src
parentd6c0f915ee737f961915c9d17f1679b6777af207 (diff)
downloadcpuinfo-dcf8e1896b8b0df6ad9a02e2a8765b6630557e0c.tar.gz
Upstream cpuinfo updates in XNNPACK as of XNNPACK:c58bd3486d52db9c6b9934912790db741bc366f6.
Diffstat (limited to 'src')
-rw-r--r--src/api.c229
-rw-r--r--src/arm/cache.c1
-rw-r--r--src/arm/linux/api.h4
-rw-r--r--src/arm/linux/init.c77
-rw-r--r--src/arm/mach/init.c102
-rw-r--r--src/arm/uarch.c8
-rw-r--r--src/cpuinfo/common.h36
-rw-r--r--src/cpuinfo/internal-api.h15
-rw-r--r--src/linux/current.c41
-rw-r--r--src/x86/api.h1
-rw-r--r--src/x86/cache/init.c2
-rw-r--r--src/x86/cpuid.h25
-rw-r--r--src/x86/init.c8
-rw-r--r--src/x86/isa.c2
-rw-r--r--src/x86/linux/init.c23
-rw-r--r--src/x86/mach/init.c26
-rw-r--r--src/x86/nacl/isa.c306
-rw-r--r--src/x86/name.c1
-rw-r--r--src/x86/uarch.c17
-rw-r--r--src/x86/vendor.c11
-rw-r--r--src/x86/windows/init.c29
21 files changed, 429 insertions, 535 deletions
diff --git a/src/api.c b/src/api.c
index b180d80..0cc5d4e 100644
--- a/src/api.c
+++ b/src/api.c
@@ -1,9 +1,16 @@
+#include <stdbool.h>
#include <stddef.h>
#include <cpuinfo.h>
#include <cpuinfo/internal-api.h>
#include <cpuinfo/log.h>
+#ifdef __linux__
+ #include <linux/api.h>
+
+ #include <unistd.h>
+ #include <sys/syscall.h>
+#endif
bool cpuinfo_is_initialized = false;
@@ -20,235 +27,347 @@ uint32_t cpuinfo_packages_count = 0;
uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 };
uint32_t cpuinfo_max_cache_size = 0;
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ struct cpuinfo_uarch_info* cpuinfo_uarchs = NULL;
+ uint32_t cpuinfo_uarchs_count = 0;
+#else
+ struct cpuinfo_uarch_info cpuinfo_global_uarch = { cpuinfo_uarch_unknown };
+#endif
+
+#ifdef __linux__
+ uint32_t cpuinfo_linux_cpu_max = 0;
+ const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL;
+ const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL;
+ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map = NULL;
+ #endif
+#endif
+
const struct cpuinfo_processor* cpuinfo_get_processors(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors");
}
return cpuinfo_processors;
}
const struct cpuinfo_core* cpuinfo_get_cores(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core");
}
return cpuinfo_cores;
}
const struct cpuinfo_cluster* cpuinfo_get_clusters(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters");
}
return cpuinfo_clusters;
}
const struct cpuinfo_package* cpuinfo_get_packages(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages");
}
return cpuinfo_packages;
}
-const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) {
+const struct cpuinfo_uarch_info* cpuinfo_get_uarchs() {
if (!cpuinfo_is_initialized) {
+ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs");
+ }
+ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ return cpuinfo_uarchs;
+ #else
+ return &cpuinfo_global_uarch;
+ #endif
+}
+
+const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processor");
}
- if (index < cpuinfo_processors_count) {
- return cpuinfo_processors + index;
- } else {
+ if CPUINFO_UNLIKELY(index >= cpuinfo_processors_count) {
return NULL;
}
+ return &cpuinfo_processors[index];
}
const struct cpuinfo_core* cpuinfo_get_core(uint32_t index) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core");
}
- if (index < cpuinfo_cores_count) {
- return cpuinfo_cores + index;
- } else {
+ if CPUINFO_UNLIKELY(index >= cpuinfo_cores_count) {
return NULL;
}
+ return &cpuinfo_cores[index];
}
const struct cpuinfo_cluster* cpuinfo_get_cluster(uint32_t index) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cluster");
}
- if (index < cpuinfo_clusters_count) {
- return cpuinfo_clusters + index;
- } else {
+ if CPUINFO_UNLIKELY(index >= cpuinfo_clusters_count) {
return NULL;
}
+ return &cpuinfo_clusters[index];
}
const struct cpuinfo_package* cpuinfo_get_package(uint32_t index) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "package");
}
- if (index < cpuinfo_packages_count) {
- return cpuinfo_packages + index;
- } else {
+ if CPUINFO_UNLIKELY(index >= cpuinfo_packages_count) {
return NULL;
}
+ return &cpuinfo_packages[index];
}
-uint32_t cpuinfo_get_processors_count(void) {
+const struct cpuinfo_uarch_info* cpuinfo_get_uarch(uint32_t index) {
if (!cpuinfo_is_initialized) {
+ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarch");
+ }
+ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ if CPUINFO_UNLIKELY(index >= cpuinfo_uarchs_count) {
+ return NULL;
+ }
+ return &cpuinfo_uarchs[index];
+ #else
+ if CPUINFO_UNLIKELY(index != 0) {
+ return NULL;
+ }
+ return &cpuinfo_global_uarch;
+ #endif
+}
+
+uint32_t cpuinfo_get_processors_count(void) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors_count");
}
return cpuinfo_processors_count;
}
uint32_t cpuinfo_get_cores_count(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cores_count");
}
return cpuinfo_cores_count;
}
uint32_t cpuinfo_get_clusters_count(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters_count");
}
return cpuinfo_clusters_count;
}
uint32_t cpuinfo_get_packages_count(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages_count");
}
return cpuinfo_packages_count;
}
-const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) {
+uint32_t cpuinfo_get_uarchs_count(void) {
if (!cpuinfo_is_initialized) {
+ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs_count");
+ }
+ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ return cpuinfo_uarchs_count;
+ #else
+ return 1;
+ #endif
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches");
}
return cpuinfo_cache[cpuinfo_cache_level_1i];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches");
}
return cpuinfo_cache[cpuinfo_cache_level_1d];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches");
}
return cpuinfo_cache[cpuinfo_cache_level_2];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_caches(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches");
}
return cpuinfo_cache[cpuinfo_cache_level_3];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_caches(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches");
}
return cpuinfo_cache[cpuinfo_cache_level_4];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_cache");
}
- if (index < cpuinfo_cache_count[cpuinfo_cache_level_1i]) {
- return cpuinfo_cache[cpuinfo_cache_level_1i] + index;
- } else {
+ if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1i]) {
return NULL;
}
+ return &cpuinfo_cache[cpuinfo_cache_level_1i][index];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_cache");
}
- if (index < cpuinfo_cache_count[cpuinfo_cache_level_1d]) {
- return cpuinfo_cache[cpuinfo_cache_level_1d] + index;
- } else {
+ if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1d]) {
return NULL;
}
+ return &cpuinfo_cache[cpuinfo_cache_level_1d][index];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_cache");
}
- if (index < cpuinfo_cache_count[cpuinfo_cache_level_2]) {
- return cpuinfo_cache[cpuinfo_cache_level_2] + index;
- } else {
+ if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_2]) {
return NULL;
}
+ return &cpuinfo_cache[cpuinfo_cache_level_2][index];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_cache");
}
- if (index < cpuinfo_cache_count[cpuinfo_cache_level_3]) {
- return cpuinfo_cache[cpuinfo_cache_level_3] + index;
- } else {
+ if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_3]) {
return NULL;
}
+ return &cpuinfo_cache[cpuinfo_cache_level_3][index];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_cache");
}
- if (index < cpuinfo_cache_count[cpuinfo_cache_level_4]) {
- return cpuinfo_cache[cpuinfo_cache_level_4] + index;
- } else {
+ if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_4]) {
return NULL;
}
+ return &cpuinfo_cache[cpuinfo_cache_level_4][index];
}
uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches_count");
}
return cpuinfo_cache_count[cpuinfo_cache_level_1i];
}
uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches_count");
}
return cpuinfo_cache_count[cpuinfo_cache_level_1d];
}
uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches_count");
}
return cpuinfo_cache_count[cpuinfo_cache_level_2];
}
uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches_count");
}
return cpuinfo_cache_count[cpuinfo_cache_level_3];
}
uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches_count");
}
return cpuinfo_cache_count[cpuinfo_cache_level_4];
}
uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void) {
- if (!cpuinfo_is_initialized) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "max_cache_size");
}
return cpuinfo_max_cache_size;
}
+
+const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor");
+ }
+ #ifdef __linux__
+ unsigned cpu;
+ if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+ return 0;
+ }
+ if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+ return 0;
+ }
+ return cpuinfo_linux_cpu_to_processor_map[cpu];
+ #else
+ return NULL;
+ #endif
+}
+
+const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core");
+ }
+ #ifdef __linux__
+ unsigned cpu;
+ if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+ return 0;
+ }
+ if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+ return 0;
+ }
+ return cpuinfo_linux_cpu_to_core_map[cpu];
+ #else
+ return NULL;
+ #endif
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index");
+ }
+ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ #ifdef __linux__
+ if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
+ /* Special case: avoid syscall on systems with only a single type of cores */
+ return 0;
+ }
+
+ /* General case */
+ unsigned cpu;
+ if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+ return 0;
+ }
+ if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+ return 0;
+ }
+ return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
+ #else
+ /* Fallback: pretend to be on the big core. */
+ return 0;
+ #endif
+ #else
+ /* Only ARM/ARM64 processors may include cores of different types in the same package. */
+ return 0;
+ #endif
+}
diff --git a/src/arm/cache.c b/src/arm/cache.c
index ccadeb4..c2bc7d2 100644
--- a/src/arm/cache.c
+++ b/src/arm/cache.c
@@ -659,6 +659,7 @@ void cpuinfo_arm_decode_cache(
};
}
break;
+ case cpuinfo_uarch_cortex_a55r0:
case cpuinfo_uarch_cortex_a55:
/*
* ARM Cortex-A55 Core Technical Reference Manual
diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h
index 275d072..f99da66 100644
--- a/src/arm/linux/api.h
+++ b/src/arm/linux/api.h
@@ -153,6 +153,7 @@ struct cpuinfo_arm_linux_processor {
uint32_t midr;
enum cpuinfo_vendor vendor;
enum cpuinfo_uarch uarch;
+ uint32_t uarch_index;
/**
* ID of the physical package which includes this logical processor.
* The value is parsed from /sys/devices/system/cpu/cpu<N>/topology/physical_package_id
@@ -346,3 +347,6 @@ CPUINFO_INTERNAL uint32_t cpuinfo_arm_linux_detect_cluster_midr(
uint32_t max_processors,
uint32_t usable_processors,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+
+extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map;
+extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries;
diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c
index f0c432c..6272abf 100644
--- a/src/arm/linux/init.c
+++ b/src/arm/linux/init.c
@@ -106,12 +106,14 @@ void cpuinfo_arm_linux_init(void) {
struct cpuinfo_processor* processors = NULL;
struct cpuinfo_core* cores = NULL;
struct cpuinfo_cluster* clusters = NULL;
- const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
- const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
+ struct cpuinfo_uarch_info* uarchs = NULL;
struct cpuinfo_cache* l1i = NULL;
struct cpuinfo_cache* l1d = NULL;
struct cpuinfo_cache* l2 = NULL;
struct cpuinfo_cache* l3 = NULL;
+ const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
+ const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
+ uint32_t* linux_cpu_to_uarch_index_map = NULL;
const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count();
cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count);
@@ -400,6 +402,18 @@ void cpuinfo_arm_linux_init(void) {
}
}
+ uint32_t uarchs_count = 0;
+ enum cpuinfo_uarch last_uarch;
+ for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+ if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+ if (uarchs_count == 0 || arm_linux_processors[i].uarch != last_uarch) {
+ last_uarch = arm_linux_processors[i].uarch;
+ uarchs_count += 1;
+ }
+ arm_linux_processors[i].uarch_index = uarchs_count - 1;
+ }
+ }
+
/*
* Assumptions:
* - No SMP (i.e. each core supports only one hardware thread).
@@ -432,6 +446,13 @@ void cpuinfo_arm_linux_init(void) {
goto cleanup;
}
+ uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info));
+ if (uarchs == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures",
+ uarchs_count * sizeof(struct cpuinfo_uarch_info), uarchs_count);
+ goto cleanup;
+ }
+
linux_cpu_to_processor_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_processor*));
if (linux_cpu_to_processor_map == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" logical processor mapping entries",
@@ -446,6 +467,15 @@ void cpuinfo_arm_linux_init(void) {
goto cleanup;
}
+ if (uarchs_count > 1) {
+ linux_cpu_to_uarch_index_map = calloc(arm_linux_processors_count, sizeof(uint32_t));
+ if (linux_cpu_to_uarch_index_map == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch index mapping entries",
+ arm_linux_processors_count * sizeof(uint32_t), arm_linux_processors_count);
+ goto cleanup;
+ }
+ }
+
l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache));
if (l1i == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
@@ -460,6 +490,22 @@ void cpuinfo_arm_linux_init(void) {
goto cleanup;
}
+ uint32_t uarchs_index = 0;
+ for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+ if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+ if (uarchs_index == 0 || arm_linux_processors[i].uarch != last_uarch) {
+ last_uarch = arm_linux_processors[i].uarch;
+ uarchs[uarchs_index] = (struct cpuinfo_uarch_info) {
+ .uarch = arm_linux_processors[i].uarch,
+ .midr = arm_linux_processors[i].midr,
+ };
+ uarchs_index += 1;
+ }
+ uarchs[uarchs_index - 1].processor_count += 1;
+ uarchs[uarchs_index - 1].core_count += 1;
+ }
+ }
+
uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX;
/* Indication whether L3 (if it exists) is shared between all cores */
bool shared_l3 = true;
@@ -499,6 +545,11 @@ void cpuinfo_arm_linux_init(void) {
cores[i].midr = arm_linux_processors[i].midr;
linux_cpu_to_core_map[arm_linux_processors[i].system_processor_id] = &cores[i];
+ if (linux_cpu_to_uarch_index_map != NULL) {
+ linux_cpu_to_uarch_index_map[arm_linux_processors[i].system_processor_id] =
+ arm_linux_processors[i].uarch_index;
+ }
+
struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 };
cpuinfo_arm_decode_cache(
arm_linux_processors[i].uarch,
@@ -658,12 +709,11 @@ void cpuinfo_arm_linux_init(void) {
}
/* Commit */
- cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
- cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
cpuinfo_processors = processors;
cpuinfo_cores = cores;
cpuinfo_clusters = clusters;
cpuinfo_packages = &package;
+ cpuinfo_uarchs = uarchs;
cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
cpuinfo_cache[cpuinfo_cache_level_2] = l2;
@@ -673,33 +723,42 @@ void cpuinfo_arm_linux_init(void) {
cpuinfo_cores_count = valid_processors;
cpuinfo_clusters_count = cluster_count;
cpuinfo_packages_count = 1;
+ cpuinfo_uarchs_count = uarchs_count;
cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors;
cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors;
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
-
cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]);
+ cpuinfo_linux_cpu_max = arm_linux_processors_count;
+ cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
+ cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
+ cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map;
+
__sync_synchronize();
cpuinfo_is_initialized = true;
- linux_cpu_to_processor_map = NULL;
- linux_cpu_to_core_map = NULL;
processors = NULL;
cores = NULL;
clusters = NULL;
+ uarchs = NULL;
l1i = l1d = l2 = l3 = NULL;
+ linux_cpu_to_processor_map = NULL;
+ linux_cpu_to_core_map = NULL;
+ linux_cpu_to_uarch_index_map = NULL;
cleanup:
free(arm_linux_processors);
- free(linux_cpu_to_processor_map);
- free(linux_cpu_to_core_map);
free(processors);
free(cores);
free(clusters);
+ free(uarchs);
free(l1i);
free(l1d);
free(l2);
free(l3);
+ free(linux_cpu_to_processor_map);
+ free(linux_cpu_to_core_map);
+ free(linux_cpu_to_uarch_index_map);
}
diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c
index e64cc18..bd27259 100644
--- a/src/arm/mach/init.c
+++ b/src/arm/mach/init.c
@@ -14,6 +14,16 @@
#include <cpuinfo/internal-api.h>
#include <cpuinfo/log.h>
+/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */
+#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL
+ #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xE81E7EF6
+#endif
+#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
+ #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F
+#endif
+#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
+ #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2
+#endif
struct cpuinfo_arm_isa cpuinfo_isa = {
#if CPUINFO_ARCH_ARM
@@ -82,37 +92,34 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype
return cpuinfo_uarch_twister;
case CPUFAMILY_ARM_HURRICANE:
return cpuinfo_uarch_hurricane;
-#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL
case CPUFAMILY_ARM_MONSOON_MISTRAL:
-#else
- case 0xe81e7ef6:
- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */
-#endif
/* 2x Monsoon + 4x Mistral cores */
return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral;
-#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST
case CPUFAMILY_ARM_VORTEX_TEMPEST:
-#else
- case 0x07d34b9f:
- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */
-#endif
/* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */
return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest;
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ /* Hexa-core: 2x Lightning + 4x Thunder; Octa-core (presumed): 4x Lightning + 4x Thunder */
+ return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder;
default:
/* Use hw.cpusubtype for detection */
break;
}
- switch (cpu_subtype) {
- case CPU_SUBTYPE_ARM_V7:
- return cpuinfo_uarch_cortex_a8;
- case CPU_SUBTYPE_ARM_V7F:
- return cpuinfo_uarch_cortex_a9;
- case CPU_SUBTYPE_ARM_V7K:
- return cpuinfo_uarch_cortex_a7;
- default:
- return cpuinfo_uarch_unknown;
- }
+ #if CPUINFO_ARCH_ARM
+ switch (cpu_subtype) {
+ case CPU_SUBTYPE_ARM_V7:
+ return cpuinfo_uarch_cortex_a8;
+ case CPU_SUBTYPE_ARM_V7F:
+ return cpuinfo_uarch_cortex_a9;
+ case CPU_SUBTYPE_ARM_V7K:
+ return cpuinfo_uarch_cortex_a7;
+ default:
+ return cpuinfo_uarch_unknown;
+ }
+ #else
+ return cpuinfo_uarch_unknown;
+ #endif
}
static void decode_package_name(char* package_name) {
@@ -244,6 +251,7 @@ void cpuinfo_arm_mach_init(void) {
struct cpuinfo_core* cores = NULL;
struct cpuinfo_cluster* clusters = NULL;
struct cpuinfo_package* packages = NULL;
+ struct cpuinfo_uarch_info* uarchs = NULL;
struct cpuinfo_cache* l1i = NULL;
struct cpuinfo_cache* l1d = NULL;
struct cpuinfo_cache* l2 = NULL;
@@ -330,21 +338,12 @@ void cpuinfo_arm_mach_init(void) {
* Thus, we whitelist CPUs known to support these instructions.
*/
switch (cpu_family) {
-#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL
case CPUFAMILY_ARM_MONSOON_MISTRAL:
-#else
- case 0xe81e7ef6:
- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */
-#endif
-#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST
case CPUFAMILY_ARM_VORTEX_TEMPEST:
-#else
- case 0x07d34b9f:
- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */
-#endif
-#if CPUINFO_ARCH_ARM64
- cpuinfo_isa.atomics = true;
-#endif
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ #if CPUINFO_ARCH_ARM64
+ cpuinfo_isa.atomics = true;
+ #endif
cpuinfo_isa.fp16arith = true;
}
@@ -379,10 +378,22 @@ void cpuinfo_arm_mach_init(void) {
num_clusters * sizeof(struct cpuinfo_cluster), num_clusters);
goto cleanup;
}
+ uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info));
+ if (uarchs == NULL) {
+ cpuinfo_log_error(
+ "failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs",
+ num_clusters * sizeof(enum cpuinfo_uarch), num_clusters);
+ goto cleanup;
+ }
uint32_t cluster_idx = UINT32_MAX;
for (uint32_t i = 0; i < mach_topology.cores; i++) {
if (i == 0 || cores[i].uarch != cores[i - 1].uarch) {
cluster_idx++;
+ uarchs[cluster_idx] = (struct cpuinfo_uarch_info) {
+ .uarch = cores[i].uarch,
+ .processor_count = 1,
+ .core_count = 1,
+ };
clusters[cluster_idx] = (struct cpuinfo_cluster) {
.processor_start = i * threads_per_core,
.processor_count = 1,
@@ -394,6 +405,8 @@ void cpuinfo_arm_mach_init(void) {
.uarch = cores[i].uarch,
};
} else {
+ uarchs[cluster_idx].processor_count++;
+ uarchs[cluster_idx].core_count++;
clusters[cluster_idx].processor_count++;
clusters[cluster_idx].core_count++;
}
@@ -542,26 +555,25 @@ void cpuinfo_arm_mach_init(void) {
}
/* Commit changes */
- cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
- cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
- cpuinfo_cache[cpuinfo_cache_level_2] = l2;
- cpuinfo_cache[cpuinfo_cache_level_3] = l3;
-
cpuinfo_processors = processors;
cpuinfo_cores = cores;
cpuinfo_clusters = clusters;
cpuinfo_packages = packages;
-
- cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
- cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
- cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
- cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+ cpuinfo_uarchs = uarchs;
+ cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+ cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+ cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+ cpuinfo_cache[cpuinfo_cache_level_3] = l3;
cpuinfo_processors_count = mach_topology.threads;
cpuinfo_cores_count = mach_topology.cores;
cpuinfo_clusters_count = num_clusters;
cpuinfo_packages_count = mach_topology.packages;
-
+ cpuinfo_uarchs_count = num_clusters;
+ cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
+ cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
+ cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+ cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
__sync_synchronize();
@@ -572,6 +584,7 @@ void cpuinfo_arm_mach_init(void) {
cores = NULL;
clusters = NULL;
packages = NULL;
+ uarchs = NULL;
l1i = l1d = l2 = l3 = NULL;
cleanup:
@@ -579,6 +592,7 @@ cleanup:
free(cores);
free(clusters);
free(packages);
+ free(uarchs);
free(l1i);
free(l1d);
free(l2);
diff --git a/src/arm/uarch.c b/src/arm/uarch.c
index a38250a..2aef9e7 100644
--- a/src/arm/uarch.c
+++ b/src/arm/uarch.c
@@ -58,7 +58,9 @@ void cpuinfo_arm_decode_vendor_uarch(
*uarch = cpuinfo_uarch_cortex_a35;
break;
case 0xD05:
- *uarch = cpuinfo_uarch_cortex_a55;
+ // Note: use Variant, not Revision, field
+ *uarch = (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) == 0 ?
+ cpuinfo_uarch_cortex_a55r0 : cpuinfo_uarch_cortex_a55;
break;
case 0xD06:
*uarch = cpuinfo_uarch_cortex_a65;
@@ -257,9 +259,9 @@ void cpuinfo_arm_decode_vendor_uarch(
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a75;
break;
- case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55 */
+ case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55r0 */
*vendor = cpuinfo_vendor_arm;
- *uarch = cpuinfo_uarch_cortex_a55;
+ *uarch = cpuinfo_uarch_cortex_a55r0;
break;
case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */
*vendor = cpuinfo_vendor_arm;
diff --git a/src/cpuinfo/common.h b/src/cpuinfo/common.h
index 6ba746e..b2b404d 100644
--- a/src/cpuinfo/common.h
+++ b/src/cpuinfo/common.h
@@ -12,29 +12,29 @@
#define CPUINFO_COUNT_OF(array) (sizeof(array) / sizeof(0[array]))
#if defined(__GNUC__)
- #define CPUINFO_LIKELY(condition) (__builtin_expect(!!(condition), 1))
- #define CPUINFO_UNLIKELY(condition) (__builtin_expect(!!(condition), 0))
+ #define CPUINFO_LIKELY(condition) (__builtin_expect(!!(condition), 1))
+ #define CPUINFO_UNLIKELY(condition) (__builtin_expect(!!(condition), 0))
#else
- #define CPUINFO_LIKELY(condition) (!!(condition))
- #define CPUINFO_UNLIKELY(condition) (!!(condition))
+ #define CPUINFO_LIKELY(condition) (!!(condition))
+ #define CPUINFO_UNLIKELY(condition) (!!(condition))
#endif
#ifndef CPUINFO_INTERNAL
- #if defined(__ELF__)
- #define CPUINFO_INTERNAL __attribute__((__visibility__("internal")))
- #elif defined(__MACH__)
- #define CPUINFO_INTERNAL __attribute__((__visibility__("hidden")))
- #else
- #define CPUINFO_INTERNAL
- #endif
+ #if defined(__ELF__)
+ #define CPUINFO_INTERNAL __attribute__((__visibility__("internal")))
+ #elif defined(__MACH__)
+ #define CPUINFO_INTERNAL __attribute__((__visibility__("hidden")))
+ #else
+ #define CPUINFO_INTERNAL
+ #endif
#endif
#ifndef CPUINFO_PRIVATE
- #if defined(__ELF__)
- #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden")))
- #elif defined(__MACH__)
- #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden")))
- #else
- #define CPUINFO_PRIVATE
- #endif
+ #if defined(__ELF__)
+ #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden")))
+ #elif defined(__MACH__)
+ #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden")))
+ #else
+ #define CPUINFO_PRIVATE
+ #endif
#endif
diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h
index f12c48d..c6eed0b 100644
--- a/src/cpuinfo/internal-api.h
+++ b/src/cpuinfo/internal-api.h
@@ -21,11 +21,13 @@ enum cpuinfo_cache_level {
};
extern CPUINFO_INTERNAL bool cpuinfo_is_initialized;
+
extern CPUINFO_INTERNAL struct cpuinfo_processor* cpuinfo_processors;
extern CPUINFO_INTERNAL struct cpuinfo_core* cpuinfo_cores;
extern CPUINFO_INTERNAL struct cpuinfo_cluster* cpuinfo_clusters;
extern CPUINFO_INTERNAL struct cpuinfo_package* cpuinfo_packages;
extern CPUINFO_INTERNAL struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max];
+
extern CPUINFO_INTERNAL uint32_t cpuinfo_processors_count;
extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count;
extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count;
@@ -33,6 +35,19 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count;
extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max];
extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ extern CPUINFO_INTERNAL struct cpuinfo_uarch_info* cpuinfo_uarchs;
+ extern CPUINFO_INTERNAL uint32_t cpuinfo_uarchs_count;
+#else
+ extern CPUINFO_INTERNAL struct cpuinfo_uarch_info cpuinfo_global_uarch;
+#endif
+
+#ifdef __linux__
+ extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_max;
+ extern CPUINFO_INTERNAL const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map;
+ extern CPUINFO_INTERNAL const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map;
+#endif
+
CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
#ifdef _WIN32
diff --git a/src/linux/current.c b/src/linux/current.c
deleted file mode 100644
index 472a4c9..0000000
--- a/src/linux/current.c
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <sched.h>
-
-#include <cpuinfo.h>
-#include <cpuinfo/internal-api.h>
-#include <cpuinfo/log.h>
-#include <linux/api.h>
-
-
-const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL;
-const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL;
-
-
-const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) {
- if (!cpuinfo_is_initialized) {
- cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor");
- }
- const int cpu = sched_getcpu();
- if (cpu >= 0) {
- return cpuinfo_linux_cpu_to_processor_map[cpu];
- } else {
- return &cpuinfo_processors[0];
- }
-}
-
-const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) {
- if (!cpuinfo_is_initialized) {
- cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core");
- }
- const int cpu = sched_getcpu();
- if (cpu >= 0) {
- return cpuinfo_linux_cpu_to_core_map[cpu];
- } else {
- return &cpuinfo_cores[0];
- }
-}
diff --git a/src/x86/api.h b/src/x86/api.h
index 5f5e76d..213c2d8 100644
--- a/src/x86/api.h
+++ b/src/x86/api.h
@@ -93,7 +93,6 @@ CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
const struct cpuid_regs basic_info, const struct cpuid_regs extended_info,
uint32_t max_base_index, uint32_t max_extended_index,
enum cpuinfo_vendor vendor, enum cpuinfo_uarch uarch);
-CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_nacl_detect_isa(void);
CPUINFO_INTERNAL void cpuinfo_x86_detect_topology(
uint32_t max_base_index,
diff --git a/src/x86/cache/init.c b/src/x86/cache/init.c
index d581016..dd1f1ea 100644
--- a/src/x86/cache/init.c
+++ b/src/x86/cache/init.c
@@ -65,7 +65,7 @@ iterate_descriptors:
}
}
- if (vendor != cpuinfo_vendor_amd && max_base_index >= 4) {
+ if (vendor != cpuinfo_vendor_amd && vendor != cpuinfo_vendor_hygon && max_base_index >= 4) {
struct cpuid_regs leaf4;
uint32_t input_ecx = 0;
uint32_t package_cores_max = 0;
diff --git a/src/x86/cpuid.h b/src/x86/cpuid.h
index 829ec21..9e9e013 100644
--- a/src/x86/cpuid.h
+++ b/src/x86/cpuid.h
@@ -67,18 +67,13 @@
}
#endif
-/*
- * This instruction may be not supported by Native Client validator,
- * make sure it doesn't appear in the binary
- */
-#ifndef __native_client__
- static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) {
- #ifdef _MSC_VER
- return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg);
- #else
- uint32_t lo, hi;
- __asm__(".byte 0x0F, 0x01, 0xD0" : "=a" (lo), "=d" (hi) : "c" (ext_ctrl_reg));
- return ((uint64_t) hi << 32) | (uint64_t) lo;
- #endif
- }
-#endif
+static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) {
+ #ifdef _MSC_VER
+ return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg);
+ #else
+ uint32_t lo, hi;
+ __asm__(".byte 0x0F, 0x01, 0xD0" : "=a" (lo), "=d" (hi) : "c" (ext_ctrl_reg));
+ return ((uint64_t) hi << 32) | (uint64_t) lo;
+ #endif
+}
+
diff --git a/src/x86/init.c b/src/x86/init.c
index d736578..244359c 100644
--- a/src/x86/init.c
+++ b/src/x86/init.c
@@ -61,12 +61,8 @@ void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor) {
cpuinfo_x86_detect_topology(max_base_index, max_extended_index, leaf1, &processor->topology);
- #ifdef __native_client__
- cpuinfo_isa = cpuinfo_x86_nacl_detect_isa();
- #else
- cpuinfo_isa = cpuinfo_x86_detect_isa(leaf1, leaf0x80000001,
- max_base_index, max_extended_index, vendor, uarch);
- #endif
+ cpuinfo_isa = cpuinfo_x86_detect_isa(leaf1, leaf0x80000001,
+ max_base_index, max_extended_index, vendor, uarch);
}
if (max_extended_index >= UINT32_C(0x80000004)) {
struct cpuid_regs brand_string[3];
diff --git a/src/x86/isa.c b/src/x86/isa.c
index d27dbca..f2e5a28 100644
--- a/src/x86/isa.c
+++ b/src/x86/isa.c
@@ -244,6 +244,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
*/
break;
case cpuinfo_vendor_amd:
+ case cpuinfo_vendor_hygon:
isa.prefetch = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000)));
break;
default:
@@ -265,6 +266,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
*/
switch (vendor) {
case cpuinfo_vendor_amd:
+ case cpuinfo_vendor_hygon:
isa.prefetchw = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000)));
break;
default:
diff --git a/src/x86/linux/init.c b/src/x86/linux/init.c
index c096336..f565789 100644
--- a/src/x86/linux/init.c
+++ b/src/x86/linux/init.c
@@ -569,9 +569,6 @@ void cpuinfo_x86_linux_init(void) {
}
/* Commit changes */
- cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
- cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
-
cpuinfo_processors = processors;
cpuinfo_cores = cores;
cpuinfo_clusters = clusters;
@@ -591,24 +588,32 @@ void cpuinfo_x86_linux_init(void) {
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
-
cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+ cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
+ .uarch = x86_processor.uarch,
+ .cpuid = x86_processor.cpuid,
+ .processor_count = processors_count,
+ .core_count = cores_count,
+ };
+
+ cpuinfo_linux_cpu_max = x86_linux_processors_count;
+ cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
+ cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
+
__sync_synchronize();
cpuinfo_is_initialized = true;
- linux_cpu_to_processor_map = NULL;
- linux_cpu_to_core_map = NULL;
processors = NULL;
cores = NULL;
clusters = NULL;
packages = NULL;
l1i = l1d = l2 = l3 = l4 = NULL;
+ linux_cpu_to_processor_map = NULL;
+ linux_cpu_to_core_map = NULL;
cleanup:
- free(linux_cpu_to_processor_map);
- free(linux_cpu_to_core_map);
free(x86_linux_processors);
free(processors);
free(cores);
@@ -619,4 +624,6 @@ cleanup:
free(l2);
free(l3);
free(l4);
+ free(linux_cpu_to_processor_map);
+ free(linux_cpu_to_core_map);
}
diff --git a/src/x86/mach/init.c b/src/x86/mach/init.c
index ae2be33..b44d3ad 100644
--- a/src/x86/mach/init.c
+++ b/src/x86/mach/init.c
@@ -305,30 +305,34 @@ void cpuinfo_x86_mach_init(void) {
}
/* Commit changes */
+ cpuinfo_processors = processors;
+ cpuinfo_cores = cores;
+ cpuinfo_clusters = clusters;
+ cpuinfo_packages = packages;
cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
cpuinfo_cache[cpuinfo_cache_level_2] = l2;
cpuinfo_cache[cpuinfo_cache_level_3] = l3;
cpuinfo_cache[cpuinfo_cache_level_4] = l4;
- cpuinfo_processors = processors;
- cpuinfo_cores = cores;
- cpuinfo_clusters = clusters;
- cpuinfo_packages = packages;
-
+ cpuinfo_processors_count = mach_topology.threads;
+ cpuinfo_cores_count = mach_topology.cores;
+ cpuinfo_clusters_count = mach_topology.packages;
+ cpuinfo_packages_count = mach_topology.packages;
cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
-
- cpuinfo_processors_count = mach_topology.threads;
- cpuinfo_cores_count = mach_topology.cores;
- cpuinfo_clusters_count = mach_topology.packages;
- cpuinfo_packages_count = mach_topology.packages;
-
cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+ cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
+ .uarch = x86_processor.uarch,
+ .cpuid = x86_processor.cpuid,
+ .processor_count = mach_topology.threads,
+ .core_count = mach_topology.cores,
+ };
+
__sync_synchronize();
cpuinfo_is_initialized = true;
diff --git a/src/x86/nacl/isa.c b/src/x86/nacl/isa.c
deleted file mode 100644
index 662be33..0000000
--- a/src/x86/nacl/isa.c
+++ /dev/null
@@ -1,306 +0,0 @@
-#include <stdbool.h>
-#include <stdint.h>
-#include <stddef.h>
-
-#include <irt.h>
-
-#define NACL_CODE_BUNDLE_SIZE 32
-#include <cpuinfo.h>
-#include <x86/api.h>
-
-static const uint8_t cmpxchg16b_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* MOV edi, edi */
- 0x89, 0xFF,
- /* CMPXCHG16B [r15 + rdi * 1] */
- 0x49, 0x0F, 0xC7, 0x0C, 0x3F,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t lzcnt_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* LZCNT eax, ecx */
- 0xF3, 0x0F, 0xBD, 0xC1,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t popcnt_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* POPCNT eax, ecx */
- 0xF3, 0x0F, 0xB8, 0xC1,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t movbe_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* MOV ecx, ecx */
- 0x89, 0xC9,
- /* MOVBE eax, [r15 + rcx * 1] */
- 0x41, 0x0F, 0x38, 0xF0, 0x04, 0x0F,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t bmi_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* ANDN eax, ecx, edx */
- 0xC4, 0xE2, 0x70, 0xF2, 0xC2,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t tbm_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* BLCS eax, ecx */
- 0x8F, 0xE9, 0x78, 0x01, 0xD9,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t three_d_now_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* PFADD mm0, mm1 */
- 0x0F, 0x0F, 0xC1, 0x9E,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t three_d_now_plus_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* PFNACC mm0, mm1 */
- 0x0F, 0x0F, 0xC1, 0x8A,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t sse3_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* HADDPS xmm0, xmm1 */
- 0xF2, 0x0F, 0x7C, 0xC1,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t ssse3_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* PSHUFB xmm0, xmm1 */
- 0x66, 0x0F, 0x38, 0x00, 0xC1,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t sse4_1_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* PMULLD xmm0, xmm1 */
- 0x66, 0x0F, 0x38, 0x40, 0xC1,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t sse4_2_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* PCMPGTQ xmm0, xmm1 */
- 0x66, 0x0F, 0x38, 0x37, 0xC1,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t sse4a_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* EXTRQ xmm0, xmm1 */
- 0x66, 0x0F, 0x79, 0xC1,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t aes_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* AESENC xmm0, xmm1 */
- 0x66, 0x0F, 0x38, 0xDC, 0xC1,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t pclmulqdq_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* PCLMULQDQ xmm0, xmm1, 0 */
- 0x66, 0x0F, 0x3A, 0x44, 0xC1, 0x00,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t avx_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* VPERMILPS ymm0, ymm1, 0xAA */
- 0xC4, 0xE3, 0x7D, 0x04, 0xC1, 0xAA,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t fma3_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* VFMADDSUB213PS ymm0, ymm1, ymm2 */
- 0xC4, 0xE2, 0x75, 0xA6, 0xC2,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t fma4_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* VFMADDPS ymm0, ymm1, ymm2, ymm3 */
- 0xC4, 0xE3, 0xF5, 0x68, 0xC3, 0x20,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t xop_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* VPHADDBQ xmm0, xmm1 */
- 0x8F, 0xE9, 0x78, 0xC3, 0xC1,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t f16c_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* VCVTPH2PS ymm0, xmm1 */
- 0xC4, 0xE2, 0x7D, 0x13, 0xC1,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-static const uint8_t avx2_bundle[NACL_CODE_BUNDLE_SIZE] = {
- /* VPERMPS ymm0, ymm1, ymm2 */
- 0xC4, 0xE2, 0x75, 0x16, 0xC2,
- /* Fill remainder with HLTs */
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
-};
-
-
-struct cpuinfo_x86_isa cpuinfo_x86_nacl_detect_isa(void) {
- /*
- * Under Native Client sandbox we can't just ask the CPU:
- * - First, some instructions (XGETBV) necessary to query AVX support are not white-listed in the validator.
- * - Secondly, even if CPU supports some instruction, but validator doesn't know about it (e.g. due a bug in the
- * ISA detection in the validator), all instructions from the "unsupported" ISA extensions will be replaced by
- * HLTs when the module is loaded.
- * Thus, instead of quering the CPU about supported ISA extensions, we query the validator: we pass bundles with
- * instructions from ISA extensions to dynamic code generation APIs, and test if they are accepted.
- */
-
- struct cpuinfo_x86_isa isa = { 0 };
-
- struct nacl_irt_code_data_alloc nacl_irt_code_data_alloc = { 0 };
- struct nacl_irt_dyncode nacl_irt_dyncode = { 0 };
- if (sizeof(nacl_irt_code_data_alloc) != nacl_interface_query(NACL_IRT_CODE_DATA_ALLOC_v0_1,
- &nacl_irt_code_data_alloc,
- sizeof(nacl_irt_code_data_alloc)))
- {
- goto finish;
- }
-
- if (sizeof(nacl_irt_dyncode) != nacl_interface_query(NACL_IRT_DYNCODE_v0_1,
- &nacl_irt_dyncode,
- sizeof(nacl_irt_dyncode)))
- {
- goto finish;
- }
-
- const size_t allocation_size = 65536;
- uintptr_t code_segment = 0;
- if (0 != nacl_irt_code_data_alloc.allocate_code_data(0, allocation_size, 0, 0, &code_segment))
- {
- goto finish;
- }
-
- isa.cmpxchg16b = !nacl_irt_dyncode.dyncode_create((void*) code_segment, cmpxchg16b_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.lzcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, lzcnt_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.popcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, popcnt_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.movbe = !nacl_irt_dyncode.dyncode_create((void*) code_segment, movbe_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.bmi = !nacl_irt_dyncode.dyncode_create((void*) code_segment, bmi_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.tbm = !nacl_irt_dyncode.dyncode_create((void*) code_segment, tbm_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.three_d_now = !nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.three_d_now_plus =
- !nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_plus_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.sse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse3_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.ssse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, ssse3_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.sse4_1 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_1_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.sse4_2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_2_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.sse4a = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4a_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.aes = !nacl_irt_dyncode.dyncode_create((void*) code_segment, aes_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.pclmulqdq = !nacl_irt_dyncode.dyncode_create((void*) code_segment, pclmulqdq_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.avx = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.fma3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma3_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.fma4 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma4_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.xop = !nacl_irt_dyncode.dyncode_create((void*) code_segment, xop_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.f16c = !nacl_irt_dyncode.dyncode_create((void*) code_segment, f16c_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
- code_segment += NACL_CODE_BUNDLE_SIZE;
-
- isa.avx2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx2_bundle, NACL_CODE_BUNDLE_SIZE) &&
- (*((const uint8_t*) code_segment) != 0xF4);
-
-finish:
- return isa;
-}
diff --git a/src/x86/name.c b/src/x86/name.c
index 708be1d..e0d5a5b 100644
--- a/src/x86/name.c
+++ b/src/x86/name.c
@@ -671,6 +671,7 @@ static const char* vendor_string_map[] = {
[cpuinfo_vendor_intel] = "Intel",
[cpuinfo_vendor_amd] = "AMD",
[cpuinfo_vendor_via] = "VIA",
+ [cpuinfo_vendor_hygon] = "Hygon",
[cpuinfo_vendor_rdc] = "RDC",
[cpuinfo_vendor_dmp] = "DM&P",
[cpuinfo_vendor_transmeta] = "Transmeta",
diff --git a/src/x86/uarch.c b/src/x86/uarch.c
index ba72d8a..ecaa762 100644
--- a/src/x86/uarch.c
+++ b/src/x86/uarch.c
@@ -79,6 +79,8 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
case 0x5E: // Sky Lake Client DT/H/S
case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U
case 0x9E: // Kaby/Coffee Lake DT/H/S
+ case 0xA5: // Comet Lake H/S
+ case 0xA6: // Comet Lake U/Y
return cpuinfo_uarch_sky_lake;
case 0x66: // Cannon Lake (Core i3-8121U)
return cpuinfo_uarch_palm_cove;
@@ -94,7 +96,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
return cpuinfo_uarch_bonnell;
case 0x27: // Medfield
case 0x35: // Cloverview
- case 0x36: // Cedarview, Centerton
+ case 0x36: // Cedarview, Centerton
return cpuinfo_uarch_saltwell;
case 0x37: // Bay Trail
case 0x4A: // Merrifield
@@ -110,6 +112,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
return cpuinfo_uarch_goldmont;
case 0x7A: // Gemini Lake
return cpuinfo_uarch_goldmont_plus;
+
/* Knights-series cores */
case 0x57:
return cpuinfo_uarch_knights_landing;
@@ -173,7 +176,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
case 0x38: // Godavari
case 0x30: // Kaveri
return cpuinfo_uarch_steamroller;
- case 0x60: // Carrizo
+ case 0x60: // Carrizo
case 0x65: // Bristol Ridge
case 0x70: // Stoney Ridge
return cpuinfo_uarch_excavator;
@@ -201,14 +204,22 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
switch (model_info->model) {
case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl
case 0x08: // 12 nm Pinnacle Ridge
- case 0x11: // 14 nm Raven Ridge
+ case 0x11: // 14 nm Raven Ridge, Great Horned Owl
case 0x18: // 12 nm Picasso
return cpuinfo_uarch_zen;
+ case 0x31: // Rome, Castle Peak
+ case 0x60: // Renoir
case 0x71: // Matisse
return cpuinfo_uarch_zen2;
}
}
break;
+ case cpuinfo_vendor_hygon:
+ switch (model_info->family) {
+ case 0x00:
+ return cpuinfo_uarch_dhyana;
+ }
+ break;
default:
break;
}
diff --git a/src/x86/vendor.c b/src/x86/vendor.c
index 3f3c753..2bba90d 100644
--- a/src/x86/vendor.c
+++ b/src/x86/vendor.c
@@ -26,6 +26,11 @@
#define auls UINT32_C(0x736C7561)
#define VIA UINT32_C(0x20414956)
+/* Hygon vendor string: "HygonGenuine" */
+#define Hygo UINT32_C(0x6F677948)
+#define nGen UINT32_C(0x6E65476E)
+#define uine UINT32_C(0x656E6975)
+
/* Transmeta vendor strings: "GenuineTMx86", "TransmetaCPU" */
#define ineT UINT32_C(0x54656E69)
#define Mx86 UINT32_C(0x3638784D)
@@ -105,6 +110,12 @@ enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32
return cpuinfo_vendor_via;
}
break;
+ case Hygo:
+ if (edx == nGen && ecx == uine) {
+ /* "HygonGenuine" */
+ return cpuinfo_vendor_hygon;
+ }
+ break;
#if CPUINFO_ARCH_X86
case AMDi:
if (edx == sbet && ecx == ter) {
diff --git a/src/x86/windows/init.c b/src/x86/windows/init.c
index 7a2090e..2c7e3cd 100644
--- a/src/x86/windows/init.c
+++ b/src/x86/windows/init.c
@@ -417,9 +417,6 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
for (uint32_t i = 0; i < processors_count; i++) {
const uint32_t apic_id = processors[i].apic_id;
- //linux_cpu_to_processor_map[x86_linux_processors[i].linux_id] = processors + processor_index;
- //linux_cpu_to_core_map[x86_linux_processors[i].linux_id] = cores + core_index;
-
if (x86_processor.cache.l1i.size != 0) {
const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits);
processors[i].cache.l1i = &l1i[l1i_index];
@@ -549,30 +546,34 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
/* Commit changes */
+ cpuinfo_processors = processors;
+ cpuinfo_cores = cores;
+ cpuinfo_clusters = clusters;
+ cpuinfo_packages = packages;
cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
cpuinfo_cache[cpuinfo_cache_level_2] = l2;
cpuinfo_cache[cpuinfo_cache_level_3] = l3;
cpuinfo_cache[cpuinfo_cache_level_4] = l4;
- cpuinfo_processors = processors;
- cpuinfo_cores = cores;
- cpuinfo_clusters = clusters;
- cpuinfo_packages = packages;
-
+ cpuinfo_processors_count = processors_count;
+ cpuinfo_cores_count = cores_count;
+ cpuinfo_clusters_count = packages_count;
+ cpuinfo_packages_count = packages_count;
cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count;
cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count;
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
-
- cpuinfo_processors_count = processors_count;
- cpuinfo_cores_count = cores_count;
- cpuinfo_clusters_count = packages_count;
- cpuinfo_packages_count = packages_count;
-
cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+ cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
+ .uarch = x86_processor.uarch,
+ .cpuid = x86_processor.cpuid,
+ .processor_count = processors_count,
+ .core_count = cores_count,
+ };
+
MemoryBarrier();
cpuinfo_is_initialized = true;