diff options
author | Ashkan Aliabadi <ashkanaliabadi@fb.com> | 2020-03-19 19:34:26 -0700 |
---|---|---|
committer | Ashkan Aliabadi <ashkanaliabadi@fb.com> | 2020-03-19 19:34:26 -0700 |
commit | dcf8e1896b8b0df6ad9a02e2a8765b6630557e0c (patch) | |
tree | 8ed9c44333fc3994ae11ea3426e68784417338a0 /src | |
parent | d6c0f915ee737f961915c9d17f1679b6777af207 (diff) | |
download | cpuinfo-dcf8e1896b8b0df6ad9a02e2a8765b6630557e0c.tar.gz |
Upstream cpuinfo updates in XNNPACK as of XNNPACK:c58bd3486d52db9c6b9934912790db741bc366f6.
Diffstat (limited to 'src')
-rw-r--r-- | src/api.c | 229 | ||||
-rw-r--r-- | src/arm/cache.c | 1 | ||||
-rw-r--r-- | src/arm/linux/api.h | 4 | ||||
-rw-r--r-- | src/arm/linux/init.c | 77 | ||||
-rw-r--r-- | src/arm/mach/init.c | 102 | ||||
-rw-r--r-- | src/arm/uarch.c | 8 | ||||
-rw-r--r-- | src/cpuinfo/common.h | 36 | ||||
-rw-r--r-- | src/cpuinfo/internal-api.h | 15 | ||||
-rw-r--r-- | src/linux/current.c | 41 | ||||
-rw-r--r-- | src/x86/api.h | 1 | ||||
-rw-r--r-- | src/x86/cache/init.c | 2 | ||||
-rw-r--r-- | src/x86/cpuid.h | 25 | ||||
-rw-r--r-- | src/x86/init.c | 8 | ||||
-rw-r--r-- | src/x86/isa.c | 2 | ||||
-rw-r--r-- | src/x86/linux/init.c | 23 | ||||
-rw-r--r-- | src/x86/mach/init.c | 26 | ||||
-rw-r--r-- | src/x86/nacl/isa.c | 306 | ||||
-rw-r--r-- | src/x86/name.c | 1 | ||||
-rw-r--r-- | src/x86/uarch.c | 17 | ||||
-rw-r--r-- | src/x86/vendor.c | 11 | ||||
-rw-r--r-- | src/x86/windows/init.c | 29 |
21 files changed, 429 insertions, 535 deletions
@@ -1,9 +1,16 @@ +#include <stdbool.h> #include <stddef.h> #include <cpuinfo.h> #include <cpuinfo/internal-api.h> #include <cpuinfo/log.h> +#ifdef __linux__ + #include <linux/api.h> + + #include <unistd.h> + #include <sys/syscall.h> +#endif bool cpuinfo_is_initialized = false; @@ -20,235 +27,347 @@ uint32_t cpuinfo_packages_count = 0; uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 }; uint32_t cpuinfo_max_cache_size = 0; +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + struct cpuinfo_uarch_info* cpuinfo_uarchs = NULL; + uint32_t cpuinfo_uarchs_count = 0; +#else + struct cpuinfo_uarch_info cpuinfo_global_uarch = { cpuinfo_uarch_unknown }; +#endif + +#ifdef __linux__ + uint32_t cpuinfo_linux_cpu_max = 0; + const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL; + const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL; + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map = NULL; + #endif +#endif + const struct cpuinfo_processor* cpuinfo_get_processors(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors"); } return cpuinfo_processors; } const struct cpuinfo_core* cpuinfo_get_cores(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core"); } return cpuinfo_cores; } const struct cpuinfo_cluster* cpuinfo_get_clusters(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters"); } return cpuinfo_clusters; } const struct cpuinfo_package* cpuinfo_get_packages(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages"); } return cpuinfo_packages; } -const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) { +const struct cpuinfo_uarch_info* cpuinfo_get_uarchs() { if (!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs"); + } + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_uarchs; + #else + return &cpuinfo_global_uarch; + #endif +} + +const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processor"); } - if (index < cpuinfo_processors_count) { - return cpuinfo_processors + index; - } else { + if CPUINFO_UNLIKELY(index >= cpuinfo_processors_count) { return NULL; } + return &cpuinfo_processors[index]; } const struct cpuinfo_core* cpuinfo_get_core(uint32_t index) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core"); } - if (index < cpuinfo_cores_count) { - return cpuinfo_cores + index; - } else { + if CPUINFO_UNLIKELY(index >= cpuinfo_cores_count) { return NULL; } + return &cpuinfo_cores[index]; } const struct cpuinfo_cluster* cpuinfo_get_cluster(uint32_t index) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cluster"); } - if (index < cpuinfo_clusters_count) { - return cpuinfo_clusters + index; - } else { + if CPUINFO_UNLIKELY(index >= cpuinfo_clusters_count) { return NULL; } + return &cpuinfo_clusters[index]; } const struct cpuinfo_package* cpuinfo_get_package(uint32_t index) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "package"); } - if (index < cpuinfo_packages_count) { - return cpuinfo_packages + index; - } else { + if CPUINFO_UNLIKELY(index >= cpuinfo_packages_count) { return NULL; } + return &cpuinfo_packages[index]; } -uint32_t cpuinfo_get_processors_count(void) { +const struct cpuinfo_uarch_info* cpuinfo_get_uarch(uint32_t index) { if (!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarch"); + } + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + if CPUINFO_UNLIKELY(index >= cpuinfo_uarchs_count) { + return NULL; + } + return &cpuinfo_uarchs[index]; + #else + if CPUINFO_UNLIKELY(index != 0) { + return NULL; + } + return &cpuinfo_global_uarch; + #endif +} + +uint32_t cpuinfo_get_processors_count(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors_count"); } return cpuinfo_processors_count; } uint32_t cpuinfo_get_cores_count(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cores_count"); } return cpuinfo_cores_count; } uint32_t cpuinfo_get_clusters_count(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters_count"); } return cpuinfo_clusters_count; } uint32_t cpuinfo_get_packages_count(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages_count"); } return cpuinfo_packages_count; } -const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) { +uint32_t cpuinfo_get_uarchs_count(void) { if (!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs_count"); + } + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_uarchs_count; + #else + return 1; + #endif +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches"); } return cpuinfo_cache[cpuinfo_cache_level_1i]; } const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches"); } return cpuinfo_cache[cpuinfo_cache_level_1d]; } const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches"); } return cpuinfo_cache[cpuinfo_cache_level_2]; } const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_caches(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches"); } return cpuinfo_cache[cpuinfo_cache_level_3]; } const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_caches(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches"); } return cpuinfo_cache[cpuinfo_cache_level_4]; } const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_cache"); } - if (index < cpuinfo_cache_count[cpuinfo_cache_level_1i]) { - return cpuinfo_cache[cpuinfo_cache_level_1i] + index; - } else { + if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1i]) { return NULL; } + return &cpuinfo_cache[cpuinfo_cache_level_1i][index]; } const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_cache"); } - if (index < cpuinfo_cache_count[cpuinfo_cache_level_1d]) { - return cpuinfo_cache[cpuinfo_cache_level_1d] + index; - } else { + if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1d]) { return NULL; } + return &cpuinfo_cache[cpuinfo_cache_level_1d][index]; } const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_cache"); } - if (index < cpuinfo_cache_count[cpuinfo_cache_level_2]) { - return cpuinfo_cache[cpuinfo_cache_level_2] + index; - } else { + if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_2]) { return NULL; } + return &cpuinfo_cache[cpuinfo_cache_level_2][index]; } const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_cache"); } - if (index < cpuinfo_cache_count[cpuinfo_cache_level_3]) { - return cpuinfo_cache[cpuinfo_cache_level_3] + index; - } else { + if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_3]) { return NULL; } + return &cpuinfo_cache[cpuinfo_cache_level_3][index]; } const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_cache"); } - if (index < cpuinfo_cache_count[cpuinfo_cache_level_4]) { - return cpuinfo_cache[cpuinfo_cache_level_4] + index; - } else { + if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_4]) { return NULL; } + return &cpuinfo_cache[cpuinfo_cache_level_4][index]; } uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches_count"); } return cpuinfo_cache_count[cpuinfo_cache_level_1i]; } uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches_count"); } return cpuinfo_cache_count[cpuinfo_cache_level_1d]; } uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches_count"); } return cpuinfo_cache_count[cpuinfo_cache_level_2]; } uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches_count"); } return cpuinfo_cache_count[cpuinfo_cache_level_3]; } uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches_count"); } return cpuinfo_cache_count[cpuinfo_cache_level_4]; } uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void) { - if (!cpuinfo_is_initialized) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "max_cache_size"); } return cpuinfo_max_cache_size; } + +const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor"); + } + #ifdef __linux__ + unsigned cpu; + if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { + return 0; + } + if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { + return 0; + } + return cpuinfo_linux_cpu_to_processor_map[cpu]; + #else + return NULL; + #endif +} + +const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core"); + } + #ifdef __linux__ + unsigned cpu; + if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { + return 0; + } + if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { + return 0; + } + return cpuinfo_linux_cpu_to_core_map[cpu]; + #else + return NULL; + #endif +} + +uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index"); + } + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #ifdef __linux__ + if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) { + /* Special case: avoid syscall on systems with only a single type of cores */ + return 0; + } + + /* General case */ + unsigned cpu; + if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { + return 0; + } + if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { + return 0; + } + return cpuinfo_linux_cpu_to_uarch_index_map[cpu]; + #else + /* Fallback: pretend to be on the big core. */ + return 0; + #endif + #else + /* Only ARM/ARM64 processors may include cores of different types in the same package. */ + return 0; + #endif +} diff --git a/src/arm/cache.c b/src/arm/cache.c index ccadeb4..c2bc7d2 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -659,6 +659,7 @@ void cpuinfo_arm_decode_cache( }; } break; + case cpuinfo_uarch_cortex_a55r0: case cpuinfo_uarch_cortex_a55: /* * ARM Cortex-A55 Core Technical Reference Manual diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h index 275d072..f99da66 100644 --- a/src/arm/linux/api.h +++ b/src/arm/linux/api.h @@ -153,6 +153,7 @@ struct cpuinfo_arm_linux_processor { uint32_t midr; enum cpuinfo_vendor vendor; enum cpuinfo_uarch uarch; + uint32_t uarch_index; /** * ID of the physical package which includes this logical processor. * The value is parsed from /sys/devices/system/cpu/cpu<N>/topology/physical_package_id @@ -346,3 +347,6 @@ CPUINFO_INTERNAL uint32_t cpuinfo_arm_linux_detect_cluster_midr( uint32_t max_processors, uint32_t usable_processors, struct cpuinfo_arm_linux_processor processors[restrict static max_processors]); + +extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map; +extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries; diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c index f0c432c..6272abf 100644 --- a/src/arm/linux/init.c +++ b/src/arm/linux/init.c @@ -106,12 +106,14 @@ void cpuinfo_arm_linux_init(void) { struct cpuinfo_processor* processors = NULL; struct cpuinfo_core* cores = NULL; struct cpuinfo_cluster* clusters = NULL; - const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; - const struct cpuinfo_core** linux_cpu_to_core_map = NULL; + struct cpuinfo_uarch_info* uarchs = NULL; struct cpuinfo_cache* l1i = NULL; struct cpuinfo_cache* l1d = NULL; struct cpuinfo_cache* l2 = NULL; struct cpuinfo_cache* l3 = NULL; + const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; + const struct cpuinfo_core** linux_cpu_to_core_map = NULL; + uint32_t* linux_cpu_to_uarch_index_map = NULL; const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count(); cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count); @@ -400,6 +402,18 @@ void cpuinfo_arm_linux_init(void) { } } + uint32_t uarchs_count = 0; + enum cpuinfo_uarch last_uarch; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (uarchs_count == 0 || arm_linux_processors[i].uarch != last_uarch) { + last_uarch = arm_linux_processors[i].uarch; + uarchs_count += 1; + } + arm_linux_processors[i].uarch_index = uarchs_count - 1; + } + } + /* * Assumptions: * - No SMP (i.e. each core supports only one hardware thread). @@ -432,6 +446,13 @@ void cpuinfo_arm_linux_init(void) { goto cleanup; } + uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info)); + if (uarchs == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures", + uarchs_count * sizeof(struct cpuinfo_uarch_info), uarchs_count); + goto cleanup; + } + linux_cpu_to_processor_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_processor*)); if (linux_cpu_to_processor_map == NULL) { cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" logical processor mapping entries", @@ -446,6 +467,15 @@ void cpuinfo_arm_linux_init(void) { goto cleanup; } + if (uarchs_count > 1) { + linux_cpu_to_uarch_index_map = calloc(arm_linux_processors_count, sizeof(uint32_t)); + if (linux_cpu_to_uarch_index_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch index mapping entries", + arm_linux_processors_count * sizeof(uint32_t), arm_linux_processors_count); + goto cleanup; + } + } + l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache)); if (l1i == NULL) { cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", @@ -460,6 +490,22 @@ void cpuinfo_arm_linux_init(void) { goto cleanup; } + uint32_t uarchs_index = 0; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (uarchs_index == 0 || arm_linux_processors[i].uarch != last_uarch) { + last_uarch = arm_linux_processors[i].uarch; + uarchs[uarchs_index] = (struct cpuinfo_uarch_info) { + .uarch = arm_linux_processors[i].uarch, + .midr = arm_linux_processors[i].midr, + }; + uarchs_index += 1; + } + uarchs[uarchs_index - 1].processor_count += 1; + uarchs[uarchs_index - 1].core_count += 1; + } + } + uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX; /* Indication whether L3 (if it exists) is shared between all cores */ bool shared_l3 = true; @@ -499,6 +545,11 @@ void cpuinfo_arm_linux_init(void) { cores[i].midr = arm_linux_processors[i].midr; linux_cpu_to_core_map[arm_linux_processors[i].system_processor_id] = &cores[i]; + if (linux_cpu_to_uarch_index_map != NULL) { + linux_cpu_to_uarch_index_map[arm_linux_processors[i].system_processor_id] = + arm_linux_processors[i].uarch_index; + } + struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 }; cpuinfo_arm_decode_cache( arm_linux_processors[i].uarch, @@ -658,12 +709,11 @@ void cpuinfo_arm_linux_init(void) { } /* Commit */ - cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; - cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; cpuinfo_processors = processors; cpuinfo_cores = cores; cpuinfo_clusters = clusters; cpuinfo_packages = &package; + cpuinfo_uarchs = uarchs; cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; cpuinfo_cache[cpuinfo_cache_level_2] = l2; @@ -673,33 +723,42 @@ void cpuinfo_arm_linux_init(void) { cpuinfo_cores_count = valid_processors; cpuinfo_clusters_count = cluster_count; cpuinfo_packages_count = 1; + cpuinfo_uarchs_count = uarchs_count; cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors; cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors; cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; - cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]); + cpuinfo_linux_cpu_max = arm_linux_processors_count; + cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; + cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; + cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map; + __sync_synchronize(); cpuinfo_is_initialized = true; - linux_cpu_to_processor_map = NULL; - linux_cpu_to_core_map = NULL; processors = NULL; cores = NULL; clusters = NULL; + uarchs = NULL; l1i = l1d = l2 = l3 = NULL; + linux_cpu_to_processor_map = NULL; + linux_cpu_to_core_map = NULL; + linux_cpu_to_uarch_index_map = NULL; cleanup: free(arm_linux_processors); - free(linux_cpu_to_processor_map); - free(linux_cpu_to_core_map); free(processors); free(cores); free(clusters); + free(uarchs); free(l1i); free(l1d); free(l2); free(l3); + free(linux_cpu_to_processor_map); + free(linux_cpu_to_core_map); + free(linux_cpu_to_uarch_index_map); } diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c index e64cc18..bd27259 100644 --- a/src/arm/mach/init.c +++ b/src/arm/mach/init.c @@ -14,6 +14,16 @@ #include <cpuinfo/internal-api.h> #include <cpuinfo/log.h> +/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */ +#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL + #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xE81E7EF6 +#endif +#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST + #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F +#endif +#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER + #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2 +#endif struct cpuinfo_arm_isa cpuinfo_isa = { #if CPUINFO_ARCH_ARM @@ -82,37 +92,34 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype return cpuinfo_uarch_twister; case CPUFAMILY_ARM_HURRICANE: return cpuinfo_uarch_hurricane; -#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL case CPUFAMILY_ARM_MONSOON_MISTRAL: -#else - case 0xe81e7ef6: - /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */ -#endif /* 2x Monsoon + 4x Mistral cores */ return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral; -#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST case CPUFAMILY_ARM_VORTEX_TEMPEST: -#else - case 0x07d34b9f: - /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */ -#endif /* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */ return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest; + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + /* Hexa-core: 2x Lightning + 4x Thunder; Octa-core (presumed): 4x Lightning + 4x Thunder */ + return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder; default: /* Use hw.cpusubtype for detection */ break; } - switch (cpu_subtype) { - case CPU_SUBTYPE_ARM_V7: - return cpuinfo_uarch_cortex_a8; - case CPU_SUBTYPE_ARM_V7F: - return cpuinfo_uarch_cortex_a9; - case CPU_SUBTYPE_ARM_V7K: - return cpuinfo_uarch_cortex_a7; - default: - return cpuinfo_uarch_unknown; - } + #if CPUINFO_ARCH_ARM + switch (cpu_subtype) { + case CPU_SUBTYPE_ARM_V7: + return cpuinfo_uarch_cortex_a8; + case CPU_SUBTYPE_ARM_V7F: + return cpuinfo_uarch_cortex_a9; + case CPU_SUBTYPE_ARM_V7K: + return cpuinfo_uarch_cortex_a7; + default: + return cpuinfo_uarch_unknown; + } + #else + return cpuinfo_uarch_unknown; + #endif } static void decode_package_name(char* package_name) { @@ -244,6 +251,7 @@ void cpuinfo_arm_mach_init(void) { struct cpuinfo_core* cores = NULL; struct cpuinfo_cluster* clusters = NULL; struct cpuinfo_package* packages = NULL; + struct cpuinfo_uarch_info* uarchs = NULL; struct cpuinfo_cache* l1i = NULL; struct cpuinfo_cache* l1d = NULL; struct cpuinfo_cache* l2 = NULL; @@ -330,21 +338,12 @@ void cpuinfo_arm_mach_init(void) { * Thus, we whitelist CPUs known to support these instructions. */ switch (cpu_family) { -#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL case CPUFAMILY_ARM_MONSOON_MISTRAL: -#else - case 0xe81e7ef6: - /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */ -#endif -#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST case CPUFAMILY_ARM_VORTEX_TEMPEST: -#else - case 0x07d34b9f: - /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */ -#endif -#if CPUINFO_ARCH_ARM64 - cpuinfo_isa.atomics = true; -#endif + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + #if CPUINFO_ARCH_ARM64 + cpuinfo_isa.atomics = true; + #endif cpuinfo_isa.fp16arith = true; } @@ -379,10 +378,22 @@ void cpuinfo_arm_mach_init(void) { num_clusters * sizeof(struct cpuinfo_cluster), num_clusters); goto cleanup; } + uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info)); + if (uarchs == NULL) { + cpuinfo_log_error( + "failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs", + num_clusters * sizeof(enum cpuinfo_uarch), num_clusters); + goto cleanup; + } uint32_t cluster_idx = UINT32_MAX; for (uint32_t i = 0; i < mach_topology.cores; i++) { if (i == 0 || cores[i].uarch != cores[i - 1].uarch) { cluster_idx++; + uarchs[cluster_idx] = (struct cpuinfo_uarch_info) { + .uarch = cores[i].uarch, + .processor_count = 1, + .core_count = 1, + }; clusters[cluster_idx] = (struct cpuinfo_cluster) { .processor_start = i * threads_per_core, .processor_count = 1, @@ -394,6 +405,8 @@ void cpuinfo_arm_mach_init(void) { .uarch = cores[i].uarch, }; } else { + uarchs[cluster_idx].processor_count++; + uarchs[cluster_idx].core_count++; clusters[cluster_idx].processor_count++; clusters[cluster_idx].core_count++; } @@ -542,26 +555,25 @@ void cpuinfo_arm_mach_init(void) { } /* Commit changes */ - cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; - cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; - cpuinfo_cache[cpuinfo_cache_level_2] = l2; - cpuinfo_cache[cpuinfo_cache_level_3] = l3; - cpuinfo_processors = processors; cpuinfo_cores = cores; cpuinfo_clusters = clusters; cpuinfo_packages = packages; - - cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; - cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; - cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; - cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_uarchs = uarchs; + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + cpuinfo_cache[cpuinfo_cache_level_3] = l3; cpuinfo_processors_count = mach_topology.threads; cpuinfo_cores_count = mach_topology.cores; cpuinfo_clusters_count = num_clusters; cpuinfo_packages_count = mach_topology.packages; - + cpuinfo_uarchs_count = num_clusters; + cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); __sync_synchronize(); @@ -572,6 +584,7 @@ void cpuinfo_arm_mach_init(void) { cores = NULL; clusters = NULL; packages = NULL; + uarchs = NULL; l1i = l1d = l2 = l3 = NULL; cleanup: @@ -579,6 +592,7 @@ cleanup: free(cores); free(clusters); free(packages); + free(uarchs); free(l1i); free(l1d); free(l2); diff --git a/src/arm/uarch.c b/src/arm/uarch.c index a38250a..2aef9e7 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -58,7 +58,9 @@ void cpuinfo_arm_decode_vendor_uarch( *uarch = cpuinfo_uarch_cortex_a35; break; case 0xD05: - *uarch = cpuinfo_uarch_cortex_a55; + // Note: use Variant, not Revision, field + *uarch = (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) == 0 ? + cpuinfo_uarch_cortex_a55r0 : cpuinfo_uarch_cortex_a55; break; case 0xD06: *uarch = cpuinfo_uarch_cortex_a65; @@ -257,9 +259,9 @@ void cpuinfo_arm_decode_vendor_uarch( *vendor = cpuinfo_vendor_arm; *uarch = cpuinfo_uarch_cortex_a75; break; - case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55 */ + case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55r0 */ *vendor = cpuinfo_vendor_arm; - *uarch = cpuinfo_uarch_cortex_a55; + *uarch = cpuinfo_uarch_cortex_a55r0; break; case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */ *vendor = cpuinfo_vendor_arm; diff --git a/src/cpuinfo/common.h b/src/cpuinfo/common.h index 6ba746e..b2b404d 100644 --- a/src/cpuinfo/common.h +++ b/src/cpuinfo/common.h @@ -12,29 +12,29 @@ #define CPUINFO_COUNT_OF(array) (sizeof(array) / sizeof(0[array])) #if defined(__GNUC__) - #define CPUINFO_LIKELY(condition) (__builtin_expect(!!(condition), 1)) - #define CPUINFO_UNLIKELY(condition) (__builtin_expect(!!(condition), 0)) + #define CPUINFO_LIKELY(condition) (__builtin_expect(!!(condition), 1)) + #define CPUINFO_UNLIKELY(condition) (__builtin_expect(!!(condition), 0)) #else - #define CPUINFO_LIKELY(condition) (!!(condition)) - #define CPUINFO_UNLIKELY(condition) (!!(condition)) + #define CPUINFO_LIKELY(condition) (!!(condition)) + #define CPUINFO_UNLIKELY(condition) (!!(condition)) #endif #ifndef CPUINFO_INTERNAL - #if defined(__ELF__) - #define CPUINFO_INTERNAL __attribute__((__visibility__("internal"))) - #elif defined(__MACH__) - #define CPUINFO_INTERNAL __attribute__((__visibility__("hidden"))) - #else - #define CPUINFO_INTERNAL - #endif + #if defined(__ELF__) + #define CPUINFO_INTERNAL __attribute__((__visibility__("internal"))) + #elif defined(__MACH__) + #define CPUINFO_INTERNAL __attribute__((__visibility__("hidden"))) + #else + #define CPUINFO_INTERNAL + #endif #endif #ifndef CPUINFO_PRIVATE - #if defined(__ELF__) - #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden"))) - #elif defined(__MACH__) - #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden"))) - #else - #define CPUINFO_PRIVATE - #endif + #if defined(__ELF__) + #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden"))) + #elif defined(__MACH__) + #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden"))) + #else + #define CPUINFO_PRIVATE + #endif #endif diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h index f12c48d..c6eed0b 100644 --- a/src/cpuinfo/internal-api.h +++ b/src/cpuinfo/internal-api.h @@ -21,11 +21,13 @@ enum cpuinfo_cache_level { }; extern CPUINFO_INTERNAL bool cpuinfo_is_initialized; + extern CPUINFO_INTERNAL struct cpuinfo_processor* cpuinfo_processors; extern CPUINFO_INTERNAL struct cpuinfo_core* cpuinfo_cores; extern CPUINFO_INTERNAL struct cpuinfo_cluster* cpuinfo_clusters; extern CPUINFO_INTERNAL struct cpuinfo_package* cpuinfo_packages; extern CPUINFO_INTERNAL struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max]; + extern CPUINFO_INTERNAL uint32_t cpuinfo_processors_count; extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count; extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count; @@ -33,6 +35,19 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count; extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max]; extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size; +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + extern CPUINFO_INTERNAL struct cpuinfo_uarch_info* cpuinfo_uarchs; + extern CPUINFO_INTERNAL uint32_t cpuinfo_uarchs_count; +#else + extern CPUINFO_INTERNAL struct cpuinfo_uarch_info cpuinfo_global_uarch; +#endif + +#ifdef __linux__ + extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_max; + extern CPUINFO_INTERNAL const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map; + extern CPUINFO_INTERNAL const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map; +#endif + CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void); CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void); #ifdef _WIN32 diff --git a/src/linux/current.c b/src/linux/current.c deleted file mode 100644 index 472a4c9..0000000 --- a/src/linux/current.c +++ /dev/null @@ -1,41 +0,0 @@ -#include <stdbool.h> -#include <stdint.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> - -#include <sched.h> - -#include <cpuinfo.h> -#include <cpuinfo/internal-api.h> -#include <cpuinfo/log.h> -#include <linux/api.h> - - -const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL; -const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL; - - -const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) { - if (!cpuinfo_is_initialized) { - cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor"); - } - const int cpu = sched_getcpu(); - if (cpu >= 0) { - return cpuinfo_linux_cpu_to_processor_map[cpu]; - } else { - return &cpuinfo_processors[0]; - } -} - -const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) { - if (!cpuinfo_is_initialized) { - cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core"); - } - const int cpu = sched_getcpu(); - if (cpu >= 0) { - return cpuinfo_linux_cpu_to_core_map[cpu]; - } else { - return &cpuinfo_cores[0]; - } -} diff --git a/src/x86/api.h b/src/x86/api.h index 5f5e76d..213c2d8 100644 --- a/src/x86/api.h +++ b/src/x86/api.h @@ -93,7 +93,6 @@ CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( const struct cpuid_regs basic_info, const struct cpuid_regs extended_info, uint32_t max_base_index, uint32_t max_extended_index, enum cpuinfo_vendor vendor, enum cpuinfo_uarch uarch); -CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_nacl_detect_isa(void); CPUINFO_INTERNAL void cpuinfo_x86_detect_topology( uint32_t max_base_index, diff --git a/src/x86/cache/init.c b/src/x86/cache/init.c index d581016..dd1f1ea 100644 --- a/src/x86/cache/init.c +++ b/src/x86/cache/init.c @@ -65,7 +65,7 @@ iterate_descriptors: } } - if (vendor != cpuinfo_vendor_amd && max_base_index >= 4) { + if (vendor != cpuinfo_vendor_amd && vendor != cpuinfo_vendor_hygon && max_base_index >= 4) { struct cpuid_regs leaf4; uint32_t input_ecx = 0; uint32_t package_cores_max = 0; diff --git a/src/x86/cpuid.h b/src/x86/cpuid.h index 829ec21..9e9e013 100644 --- a/src/x86/cpuid.h +++ b/src/x86/cpuid.h @@ -67,18 +67,13 @@ } #endif -/* - * This instruction may be not supported by Native Client validator, - * make sure it doesn't appear in the binary - */ -#ifndef __native_client__ - static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) { - #ifdef _MSC_VER - return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg); - #else - uint32_t lo, hi; - __asm__(".byte 0x0F, 0x01, 0xD0" : "=a" (lo), "=d" (hi) : "c" (ext_ctrl_reg)); - return ((uint64_t) hi << 32) | (uint64_t) lo; - #endif - } -#endif +static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) { + #ifdef _MSC_VER + return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg); + #else + uint32_t lo, hi; + __asm__(".byte 0x0F, 0x01, 0xD0" : "=a" (lo), "=d" (hi) : "c" (ext_ctrl_reg)); + return ((uint64_t) hi << 32) | (uint64_t) lo; + #endif +} + diff --git a/src/x86/init.c b/src/x86/init.c index d736578..244359c 100644 --- a/src/x86/init.c +++ b/src/x86/init.c @@ -61,12 +61,8 @@ void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor) { cpuinfo_x86_detect_topology(max_base_index, max_extended_index, leaf1, &processor->topology); - #ifdef __native_client__ - cpuinfo_isa = cpuinfo_x86_nacl_detect_isa(); - #else - cpuinfo_isa = cpuinfo_x86_detect_isa(leaf1, leaf0x80000001, - max_base_index, max_extended_index, vendor, uarch); - #endif + cpuinfo_isa = cpuinfo_x86_detect_isa(leaf1, leaf0x80000001, + max_base_index, max_extended_index, vendor, uarch); } if (max_extended_index >= UINT32_C(0x80000004)) { struct cpuid_regs brand_string[3]; diff --git a/src/x86/isa.c b/src/x86/isa.c index d27dbca..f2e5a28 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -244,6 +244,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( */ break; case cpuinfo_vendor_amd: + case cpuinfo_vendor_hygon: isa.prefetch = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000))); break; default: @@ -265,6 +266,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( */ switch (vendor) { case cpuinfo_vendor_amd: + case cpuinfo_vendor_hygon: isa.prefetchw = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000))); break; default: diff --git a/src/x86/linux/init.c b/src/x86/linux/init.c index c096336..f565789 100644 --- a/src/x86/linux/init.c +++ b/src/x86/linux/init.c @@ -569,9 +569,6 @@ void cpuinfo_x86_linux_init(void) { } /* Commit changes */ - cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; - cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; - cpuinfo_processors = processors; cpuinfo_cores = cores; cpuinfo_clusters = clusters; @@ -591,24 +588,32 @@ void cpuinfo_x86_linux_init(void) { cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; - cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { + .uarch = x86_processor.uarch, + .cpuid = x86_processor.cpuid, + .processor_count = processors_count, + .core_count = cores_count, + }; + + cpuinfo_linux_cpu_max = x86_linux_processors_count; + cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; + cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; + __sync_synchronize(); cpuinfo_is_initialized = true; - linux_cpu_to_processor_map = NULL; - linux_cpu_to_core_map = NULL; processors = NULL; cores = NULL; clusters = NULL; packages = NULL; l1i = l1d = l2 = l3 = l4 = NULL; + linux_cpu_to_processor_map = NULL; + linux_cpu_to_core_map = NULL; cleanup: - free(linux_cpu_to_processor_map); - free(linux_cpu_to_core_map); free(x86_linux_processors); free(processors); free(cores); @@ -619,4 +624,6 @@ cleanup: free(l2); free(l3); free(l4); + free(linux_cpu_to_processor_map); + free(linux_cpu_to_core_map); } diff --git a/src/x86/mach/init.c b/src/x86/mach/init.c index ae2be33..b44d3ad 100644 --- a/src/x86/mach/init.c +++ b/src/x86/mach/init.c @@ -305,30 +305,34 @@ void cpuinfo_x86_mach_init(void) { } /* Commit changes */ + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = packages; cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; cpuinfo_cache[cpuinfo_cache_level_2] = l2; cpuinfo_cache[cpuinfo_cache_level_3] = l3; cpuinfo_cache[cpuinfo_cache_level_4] = l4; - cpuinfo_processors = processors; - cpuinfo_cores = cores; - cpuinfo_clusters = clusters; - cpuinfo_packages = packages; - + cpuinfo_processors_count = mach_topology.threads; + cpuinfo_cores_count = mach_topology.cores; + cpuinfo_clusters_count = mach_topology.packages; + cpuinfo_packages_count = mach_topology.packages; cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; - - cpuinfo_processors_count = mach_topology.threads; - cpuinfo_cores_count = mach_topology.cores; - cpuinfo_clusters_count = mach_topology.packages; - cpuinfo_packages_count = mach_topology.packages; - cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { + .uarch = x86_processor.uarch, + .cpuid = x86_processor.cpuid, + .processor_count = mach_topology.threads, + .core_count = mach_topology.cores, + }; + __sync_synchronize(); cpuinfo_is_initialized = true; diff --git a/src/x86/nacl/isa.c b/src/x86/nacl/isa.c deleted file mode 100644 index 662be33..0000000 --- a/src/x86/nacl/isa.c +++ /dev/null @@ -1,306 +0,0 @@ -#include <stdbool.h> -#include <stdint.h> -#include <stddef.h> - -#include <irt.h> - -#define NACL_CODE_BUNDLE_SIZE 32 -#include <cpuinfo.h> -#include <x86/api.h> - -static const uint8_t cmpxchg16b_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* MOV edi, edi */ - 0x89, 0xFF, - /* CMPXCHG16B [r15 + rdi * 1] */ - 0x49, 0x0F, 0xC7, 0x0C, 0x3F, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t lzcnt_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* LZCNT eax, ecx */ - 0xF3, 0x0F, 0xBD, 0xC1, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t popcnt_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* POPCNT eax, ecx */ - 0xF3, 0x0F, 0xB8, 0xC1, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t movbe_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* MOV ecx, ecx */ - 0x89, 0xC9, - /* MOVBE eax, [r15 + rcx * 1] */ - 0x41, 0x0F, 0x38, 0xF0, 0x04, 0x0F, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t bmi_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* ANDN eax, ecx, edx */ - 0xC4, 0xE2, 0x70, 0xF2, 0xC2, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t tbm_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* BLCS eax, ecx */ - 0x8F, 0xE9, 0x78, 0x01, 0xD9, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t three_d_now_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* PFADD mm0, mm1 */ - 0x0F, 0x0F, 0xC1, 0x9E, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t three_d_now_plus_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* PFNACC mm0, mm1 */ - 0x0F, 0x0F, 0xC1, 0x8A, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t sse3_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* HADDPS xmm0, xmm1 */ - 0xF2, 0x0F, 0x7C, 0xC1, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t ssse3_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* PSHUFB xmm0, xmm1 */ - 0x66, 0x0F, 0x38, 0x00, 0xC1, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t sse4_1_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* PMULLD xmm0, xmm1 */ - 0x66, 0x0F, 0x38, 0x40, 0xC1, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t sse4_2_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* PCMPGTQ xmm0, xmm1 */ - 0x66, 0x0F, 0x38, 0x37, 0xC1, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t sse4a_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* EXTRQ xmm0, xmm1 */ - 0x66, 0x0F, 0x79, 0xC1, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t aes_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* AESENC xmm0, xmm1 */ - 0x66, 0x0F, 0x38, 0xDC, 0xC1, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t pclmulqdq_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* PCLMULQDQ xmm0, xmm1, 0 */ - 0x66, 0x0F, 0x3A, 0x44, 0xC1, 0x00, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t avx_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* VPERMILPS ymm0, ymm1, 0xAA */ - 0xC4, 0xE3, 0x7D, 0x04, 0xC1, 0xAA, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t fma3_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* VFMADDSUB213PS ymm0, ymm1, ymm2 */ - 0xC4, 0xE2, 0x75, 0xA6, 0xC2, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t fma4_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* VFMADDPS ymm0, ymm1, ymm2, ymm3 */ - 0xC4, 0xE3, 0xF5, 0x68, 0xC3, 0x20, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t xop_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* VPHADDBQ xmm0, xmm1 */ - 0x8F, 0xE9, 0x78, 0xC3, 0xC1, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t f16c_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* VCVTPH2PS ymm0, xmm1 */ - 0xC4, 0xE2, 0x7D, 0x13, 0xC1, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - -static const uint8_t avx2_bundle[NACL_CODE_BUNDLE_SIZE] = { - /* VPERMPS ymm0, ymm1, ymm2 */ - 0xC4, 0xE2, 0x75, 0x16, 0xC2, - /* Fill remainder with HLTs */ - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, - 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, -}; - - -struct cpuinfo_x86_isa cpuinfo_x86_nacl_detect_isa(void) { - /* - * Under Native Client sandbox we can't just ask the CPU: - * - First, some instructions (XGETBV) necessary to query AVX support are not white-listed in the validator. - * - Secondly, even if CPU supports some instruction, but validator doesn't know about it (e.g. due a bug in the - * ISA detection in the validator), all instructions from the "unsupported" ISA extensions will be replaced by - * HLTs when the module is loaded. - * Thus, instead of quering the CPU about supported ISA extensions, we query the validator: we pass bundles with - * instructions from ISA extensions to dynamic code generation APIs, and test if they are accepted. - */ - - struct cpuinfo_x86_isa isa = { 0 }; - - struct nacl_irt_code_data_alloc nacl_irt_code_data_alloc = { 0 }; - struct nacl_irt_dyncode nacl_irt_dyncode = { 0 }; - if (sizeof(nacl_irt_code_data_alloc) != nacl_interface_query(NACL_IRT_CODE_DATA_ALLOC_v0_1, - &nacl_irt_code_data_alloc, - sizeof(nacl_irt_code_data_alloc))) - { - goto finish; - } - - if (sizeof(nacl_irt_dyncode) != nacl_interface_query(NACL_IRT_DYNCODE_v0_1, - &nacl_irt_dyncode, - sizeof(nacl_irt_dyncode))) - { - goto finish; - } - - const size_t allocation_size = 65536; - uintptr_t code_segment = 0; - if (0 != nacl_irt_code_data_alloc.allocate_code_data(0, allocation_size, 0, 0, &code_segment)) - { - goto finish; - } - - isa.cmpxchg16b = !nacl_irt_dyncode.dyncode_create((void*) code_segment, cmpxchg16b_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.lzcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, lzcnt_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.popcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, popcnt_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.movbe = !nacl_irt_dyncode.dyncode_create((void*) code_segment, movbe_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.bmi = !nacl_irt_dyncode.dyncode_create((void*) code_segment, bmi_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.tbm = !nacl_irt_dyncode.dyncode_create((void*) code_segment, tbm_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.three_d_now = !nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.three_d_now_plus = - !nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_plus_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.sse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse3_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.ssse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, ssse3_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.sse4_1 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_1_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.sse4_2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_2_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.sse4a = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4a_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.aes = !nacl_irt_dyncode.dyncode_create((void*) code_segment, aes_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.pclmulqdq = !nacl_irt_dyncode.dyncode_create((void*) code_segment, pclmulqdq_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.avx = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.fma3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma3_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.fma4 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma4_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.xop = !nacl_irt_dyncode.dyncode_create((void*) code_segment, xop_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.f16c = !nacl_irt_dyncode.dyncode_create((void*) code_segment, f16c_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - code_segment += NACL_CODE_BUNDLE_SIZE; - - isa.avx2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx2_bundle, NACL_CODE_BUNDLE_SIZE) && - (*((const uint8_t*) code_segment) != 0xF4); - -finish: - return isa; -} diff --git a/src/x86/name.c b/src/x86/name.c index 708be1d..e0d5a5b 100644 --- a/src/x86/name.c +++ b/src/x86/name.c @@ -671,6 +671,7 @@ static const char* vendor_string_map[] = { [cpuinfo_vendor_intel] = "Intel", [cpuinfo_vendor_amd] = "AMD", [cpuinfo_vendor_via] = "VIA", + [cpuinfo_vendor_hygon] = "Hygon", [cpuinfo_vendor_rdc] = "RDC", [cpuinfo_vendor_dmp] = "DM&P", [cpuinfo_vendor_transmeta] = "Transmeta", diff --git a/src/x86/uarch.c b/src/x86/uarch.c index ba72d8a..ecaa762 100644 --- a/src/x86/uarch.c +++ b/src/x86/uarch.c @@ -79,6 +79,8 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( case 0x5E: // Sky Lake Client DT/H/S case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U case 0x9E: // Kaby/Coffee Lake DT/H/S + case 0xA5: // Comet Lake H/S + case 0xA6: // Comet Lake U/Y return cpuinfo_uarch_sky_lake; case 0x66: // Cannon Lake (Core i3-8121U) return cpuinfo_uarch_palm_cove; @@ -94,7 +96,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( return cpuinfo_uarch_bonnell; case 0x27: // Medfield case 0x35: // Cloverview - case 0x36: // Cedarview, Centerton + case 0x36: // Cedarview, Centerton return cpuinfo_uarch_saltwell; case 0x37: // Bay Trail case 0x4A: // Merrifield @@ -110,6 +112,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( return cpuinfo_uarch_goldmont; case 0x7A: // Gemini Lake return cpuinfo_uarch_goldmont_plus; + /* Knights-series cores */ case 0x57: return cpuinfo_uarch_knights_landing; @@ -173,7 +176,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( case 0x38: // Godavari case 0x30: // Kaveri return cpuinfo_uarch_steamroller; - case 0x60: // Carrizo + case 0x60: // Carrizo case 0x65: // Bristol Ridge case 0x70: // Stoney Ridge return cpuinfo_uarch_excavator; @@ -201,14 +204,22 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( switch (model_info->model) { case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl case 0x08: // 12 nm Pinnacle Ridge - case 0x11: // 14 nm Raven Ridge + case 0x11: // 14 nm Raven Ridge, Great Horned Owl case 0x18: // 12 nm Picasso return cpuinfo_uarch_zen; + case 0x31: // Rome, Castle Peak + case 0x60: // Renoir case 0x71: // Matisse return cpuinfo_uarch_zen2; } } break; + case cpuinfo_vendor_hygon: + switch (model_info->family) { + case 0x00: + return cpuinfo_uarch_dhyana; + } + break; default: break; } diff --git a/src/x86/vendor.c b/src/x86/vendor.c index 3f3c753..2bba90d 100644 --- a/src/x86/vendor.c +++ b/src/x86/vendor.c @@ -26,6 +26,11 @@ #define auls UINT32_C(0x736C7561) #define VIA UINT32_C(0x20414956) +/* Hygon vendor string: "HygonGenuine" */ +#define Hygo UINT32_C(0x6F677948) +#define nGen UINT32_C(0x6E65476E) +#define uine UINT32_C(0x656E6975) + /* Transmeta vendor strings: "GenuineTMx86", "TransmetaCPU" */ #define ineT UINT32_C(0x54656E69) #define Mx86 UINT32_C(0x3638784D) @@ -105,6 +110,12 @@ enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32 return cpuinfo_vendor_via; } break; + case Hygo: + if (edx == nGen && ecx == uine) { + /* "HygonGenuine" */ + return cpuinfo_vendor_hygon; + } + break; #if CPUINFO_ARCH_X86 case AMDi: if (edx == sbet && ecx == ter) { diff --git a/src/x86/windows/init.c b/src/x86/windows/init.c index 7a2090e..2c7e3cd 100644 --- a/src/x86/windows/init.c +++ b/src/x86/windows/init.c @@ -417,9 +417,6 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV for (uint32_t i = 0; i < processors_count; i++) { const uint32_t apic_id = processors[i].apic_id; - //linux_cpu_to_processor_map[x86_linux_processors[i].linux_id] = processors + processor_index; - //linux_cpu_to_core_map[x86_linux_processors[i].linux_id] = cores + core_index; - if (x86_processor.cache.l1i.size != 0) { const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits); processors[i].cache.l1i = &l1i[l1i_index]; @@ -549,30 +546,34 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV /* Commit changes */ + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = packages; cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; cpuinfo_cache[cpuinfo_cache_level_2] = l2; cpuinfo_cache[cpuinfo_cache_level_3] = l3; cpuinfo_cache[cpuinfo_cache_level_4] = l4; - cpuinfo_processors = processors; - cpuinfo_cores = cores; - cpuinfo_clusters = clusters; - cpuinfo_packages = packages; - + cpuinfo_processors_count = processors_count; + cpuinfo_cores_count = cores_count; + cpuinfo_clusters_count = packages_count; + cpuinfo_packages_count = packages_count; cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count; cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count; cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; - - cpuinfo_processors_count = processors_count; - cpuinfo_cores_count = cores_count; - cpuinfo_clusters_count = packages_count; - cpuinfo_packages_count = packages_count; - cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { + .uarch = x86_processor.uarch, + .cpuid = x86_processor.cpuid, + .processor_count = processors_count, + .core_count = cores_count, + }; + MemoryBarrier(); cpuinfo_is_initialized = true; |