diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/arm/cache.c | 61 | ||||
-rw-r--r-- | src/arm/linux/aarch32-isa.c | 9 | ||||
-rw-r--r-- | src/arm/linux/aarch64-isa.c | 18 | ||||
-rw-r--r-- | src/arm/linux/chipset.c | 94 | ||||
-rw-r--r-- | src/arm/linux/clusters.c | 20 | ||||
-rw-r--r-- | src/arm/linux/cpuinfo.c | 8 | ||||
-rw-r--r-- | src/arm/linux/init.c | 2 | ||||
-rw-r--r-- | src/arm/linux/midr.c | 6 | ||||
-rw-r--r-- | src/arm/mach/init.c | 219 | ||||
-rw-r--r-- | src/arm/midr.h | 31 | ||||
-rw-r--r-- | src/arm/uarch.c | 17 | ||||
-rw-r--r-- | src/arm/windows/init-by-logical-sys-info.c | 885 | ||||
-rw-r--r-- | src/arm/windows/init.c | 253 | ||||
-rw-r--r-- | src/arm/windows/windows-arm-init.h | 32 | ||||
-rw-r--r-- | src/cpuinfo/internal-api.h | 6 | ||||
-rw-r--r-- | src/init.c | 2 | ||||
-rw-r--r-- | src/x86/name.c | 2 |
17 files changed, 1525 insertions, 140 deletions
diff --git a/src/arm/cache.c b/src/arm/cache.c index 446b02b..1a6dd38 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -535,6 +535,7 @@ void cpuinfo_arm_decode_cache( l2_size = 1024 * 1024; break; case 660: + case 662: /* Snapdragon 660: 1 MB L2 (little cores only) */ l2_size = 1024 * 1024; break; @@ -1238,6 +1239,63 @@ void cpuinfo_arm_decode_cache( }; break; } + case cpuinfo_uarch_neoverse_n1: + case cpuinfo_uarch_neoverse_v1: + case cpuinfo_uarch_neoverse_n2: + { + /* + * ARM Neoverse-n1 Core Technical Reference Manual + * A6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB. + * + * A6.1.1 L1 instruction-side memory system + * The L1 instruction memory system has the following key features: + * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed, + * Physically Tagged (PIPT) 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * + * A6.1.2 L1 data-side memory system + * The L1 data memory system has the following features: + * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed, + * Physically Tagged (PIPT) 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * - Pseudo-LRU cache replacement policy. + * + * A7.1 About the L2 memory system + * The L2 memory subsystem consist of: + * - An 8-way set associative L2 cache with a configurable size of 256KB, 512KB, or 1024KB. Cache lines + * have a fixed length of 64 bytes. + * - Strictly inclusive with L1 data cache. + * - When configured with instruction cache hardware coherency, strictly inclusive with L1 instruction cache. + * - When configured without instruction cache hardware coherency, weakly inclusive with L1 instruction cache. + */ + + const uint32_t min_l2_size_KB= 256; + const uint32_t min_l3_size_KB = 0; + + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l2 = (struct cpuinfo_cache) { + .size = min_l2_size_KB * 1024, + .associativity = 8, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + *l3 = (struct cpuinfo_cache) { + .size = min_l3_size_KB * 1024, + .associativity = 16, + .line_size = 64, + }; + break; + } #if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__) case cpuinfo_uarch_scorpion: /* @@ -1655,6 +1713,9 @@ uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* proc */ return 8 * 1024 * 1024; case cpuinfo_uarch_cortex_a55: + case cpuinfo_uarch_neoverse_n1: + case cpuinfo_uarch_neoverse_v1: + case cpuinfo_uarch_neoverse_n2: case cpuinfo_uarch_cortex_a75: case cpuinfo_uarch_cortex_a76: case cpuinfo_uarch_exynos_m4: diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c index df68aa1..fb95ee9 100644 --- a/src/arm/linux/aarch32-isa.c +++ b/src/arm/linux/aarch32-isa.c @@ -64,6 +64,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( * - Processors with Exynos M4 cores * - Processors with Exynos M5 cores * - Neoverse N1 cores + * - Neoverse V1 cores + * - Neoverse N2 cores */ if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) { /* Only little cores of Exynos 9810 support FP16 & RDM */ @@ -73,9 +75,11 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( case UINT32_C(0x4100D050): /* Cortex-A55 */ case UINT32_C(0x4100D060): /* Cortex-A65 */ case UINT32_C(0x4100D0B0): /* Cortex-A76 */ - case UINT32_C(0x4100D0C0): /* Neoverse N1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D460): /* Cortex-A510 */ + case UINT32_C(0x4100D470): /* Cortex-A710 */ + case UINT32_C(0x4100D480): /* Cortex-X2 */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */ case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */ @@ -98,6 +102,9 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x4100D460): /* Cortex-A510 */ + case UINT32_C(0x4100D470): /* Cortex-A710 */ + case UINT32_C(0x4100D480): /* Cortex-X2 */ case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ case UINT32_C(0x53000030): /* Exynos-M4 */ diff --git a/src/arm/linux/aarch64-isa.c b/src/arm/linux/aarch64-isa.c index 2000e1a..44a8f4d 100644 --- a/src/arm/linux/aarch64-isa.c +++ b/src/arm/linux/aarch64-isa.c @@ -41,6 +41,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( * - Processors with Exynos M4 cores * - Processors with Exynos M5 cores * - Neoverse N1 cores + * - Neoverse V1 cores + * - Neoverse N2 cores */ if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) { /* Exynos 9810 reports that it supports FP16 compute, but in fact only little cores do */ @@ -54,6 +56,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( case UINT32_C(0x4100D0C0): /* Neoverse N1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D400): /* Neoverse V1 */ + case UINT32_C(0x4100D490): /* Neoverse N2 */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */ case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */ @@ -78,6 +82,9 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( break; } } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_I8MM) { + isa->i8mm = true; + } /* * Many phones ship with an old kernel configuration that doesn't report UDOT/SDOT instructions. @@ -89,6 +96,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( case UINT32_C(0x4100D0C0): /* Neoverse N1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D400): /* Neoverse V1 */ + case UINT32_C(0x4100D490): /* Neoverse N2 */ case UINT32_C(0x4100D4A0): /* Neoverse E1 */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ @@ -124,4 +133,13 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) { isa->sve2 = true; } + // SVEBF16 is set iff SVE and BF16 are both supported, but the SVEBF16 feature flag + // was added in Linux kernel before the BF16 feature flag, so we check for either. + if (features2 & (CPUINFO_ARM_LINUX_FEATURE2_BF16 | CPUINFO_ARM_LINUX_FEATURE2_SVEBF16)) { + isa->bf16 = true; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM) { + isa->fhm = true; + } } + diff --git a/src/arm/linux/chipset.c b/src/arm/linux/chipset.c index e36283c..f2a002d 100644 --- a/src/arm/linux/chipset.c +++ b/src/arm/linux/chipset.c @@ -1,3 +1,4 @@ +#include <ctype.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> @@ -281,6 +282,82 @@ static bool match_sm( return true; } + +struct special_map_entry { + const char* platform; + uint16_t model; + uint8_t series; + char suffix; +}; + +static const struct special_map_entry qualcomm_hardware_map_entries[] = { + { + /* "Kona" -> Qualcomm Kona */ + .platform = "Kona", + .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon, + .model = 865, + }, + { + /* "Bengal" -> Qualcomm Bengal */ + .platform = "Bengal", + .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon, + .model = 662, + }, + { + /* "Bengalp" -> Qualcomm Bengalp */ + .platform = "Bengalp", + .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon, + .model = 662, + }, + { + /* "Lito" -> Qualcomm Lito */ + .platform = "Lito", + .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon, + .model = 765, + .suffix = 'G' + }, + { + /* "Lagoon" -> Qualcomm Lagoon */ + .platform = "Lagoon", + .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon, + .model = 0, + }, +}; + + +int strcicmp(char const *a, char const *b) +{ + for (;; a++, b++) { + int d = tolower((unsigned char)*a) - tolower((unsigned char)*b); + if (d != 0 || !*a) + return d; + } +} + +static bool match_qualcomm_special( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + for (size_t i = 0; i < CPUINFO_COUNT_OF(qualcomm_hardware_map_entries); i++) { + int length = end - start; + if (strcicmp(qualcomm_hardware_map_entries[i].platform, start) == 0 && + qualcomm_hardware_map_entries[i].platform[length] == 0) + { + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = chipset_series_vendor[qualcomm_hardware_map_entries[i].series], + .series = (enum cpuinfo_arm_chipset_series) qualcomm_hardware_map_entries[i].series, + .model = qualcomm_hardware_map_entries[i].model, + .suffix = { + [0] = qualcomm_hardware_map_entries[i].suffix, + }, + }; + return true; + } + } + return false; + +} + /** * Tries to match /Samsung Exynos\d{4}$/ signature (case-insensitive) for Samsung Exynos chipsets. * If match successful, extracts model information into \p chipset argument. @@ -1351,7 +1428,7 @@ static bool match_and_parse_sunxi( return false; } - /* Compare sunXi platform id and number of cores to tabluted values to decode chipset name */ + /* Compare sunXi platform id and number of cores to tabulated values to decode chipset name */ uint32_t model = 0; char suffix = 0; for (size_t i = 0; i < CPUINFO_COUNT_OF(sunxi_map_entries); i++) { @@ -1752,13 +1829,6 @@ static bool is_tegra(const char* start, const char* end) { return (length == 5 || start[5] == '3'); } -struct special_map_entry { - const char* platform; - uint16_t model; - uint8_t series; - char suffix; -}; - static const struct special_map_entry special_hardware_map_entries[] = { #if CPUINFO_ARCH_ARM { @@ -2317,6 +2387,14 @@ struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_ha (int) hardware_length, hardware); return chipset; } + + if (match_qualcomm_special(pos, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Qualcomm signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + } word_start = false; break; diff --git a/src/arm/linux/clusters.c b/src/arm/linux/clusters.c index c7a4045..430773d 100644 --- a/src/arm/linux/clusters.c +++ b/src/arm/linux/clusters.c @@ -48,7 +48,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { * @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags. * @param max_processors - number of elements in the @p processors array. * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum - * frequency, MIDR infromation, and core cluster (package siblings list) information. + * frequency, MIDR information, and core cluster (package siblings list) information. * * @retval true if the heuristic successfully assigned all processors into clusters of cores. * @retval false if known details about processors contradict the heuristic configuration of core clusters. @@ -292,9 +292,9 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic( * - Processors assigned to these clusters stay assigned to the same clusters * - No new processors are added to these clusters * - Processors without pre-assigned cluster are clustered in one sequential scan: - * - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceeding - * processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceeding processor. - * - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceeding + * - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceding + * processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceding processor. + * - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceding * processor, the processor is assigned to a newly created cluster. * * The function must be called after parsing OS-provided information on core clusters, and usually is called only @@ -309,7 +309,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic( * * @param max_processors - number of elements in the @p processors array. * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum - * frequency, MIDR infromation, and core cluster (package siblings list) information. + * frequency, MIDR information, and core cluster (package siblings list) information. * * @retval true if the heuristic successfully assigned all processors into clusters of cores. * @retval false if known details about processors contradict the heuristic configuration of core clusters. @@ -331,7 +331,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan( if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { if (cluster_min_frequency != processors[i].min_frequency) { cpuinfo_log_info( - "minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); " + "minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceding cluster (%"PRIu32" KHz); " "processor %"PRIu32" starts to a new cluster", i, processors[i].min_frequency, cluster_min_frequency, i); goto new_cluster; @@ -346,7 +346,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan( if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { if (cluster_max_frequency != processors[i].max_frequency) { cpuinfo_log_debug( - "maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); " + "maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceding cluster (%"PRIu32" KHz); " "processor %"PRIu32" starts a new cluster", i, processors[i].max_frequency, cluster_max_frequency, i); goto new_cluster; @@ -361,7 +361,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan( if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) { if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) { cpuinfo_log_debug( - "CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceeding cluster (0x%02"PRIx32"); " + "CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceding cluster (0x%02"PRIx32"); " "processor %"PRIu32" starts to a new cluster", i, midr_get_implementer(processors[i].midr), midr_get_implementer(cluster_midr), i); goto new_cluster; @@ -417,11 +417,11 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan( } } - /* All checks passed, attach processor to the preceeding cluster */ + /* All checks passed, attach processor to the preceding cluster */ cluster_processors++; processors[i].package_leader_id = cluster_start; processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; - cpuinfo_log_debug("assigned processor %"PRIu32" to preceeding cluster of processor %"PRIu32, i, cluster_start); + cpuinfo_log_debug("assigned processor %"PRIu32" to preceding cluster of processor %"PRIu32, i, cluster_start); continue; new_cluster: diff --git a/src/arm/linux/cpuinfo.c b/src/arm/linux/cpuinfo.c index 90e1631..817da12 100644 --- a/src/arm/linux/cpuinfo.c +++ b/src/arm/linux/cpuinfo.c @@ -177,6 +177,10 @@ static void parse_features( #if CPUINFO_ARCH_ARM64 processor->features |= CPUINFO_ARM_LINUX_FEATURE_FCMA; #endif + } else if (memcmp(feature_start, "i8mm", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_I8MM; + #endif #if CPUINFO_ARCH_ARM } else if (memcmp(feature_start, "half", feature_length) == 0) { processor->features |= CPUINFO_ARM_LINUX_FEATURE_HALF; @@ -283,6 +287,10 @@ static void parse_features( #if CPUINFO_ARCH_ARM64 processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM; #endif + } else if (memcmp(feature_start, "asimdfhm", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM; + #endif #if CPUINFO_ARCH_ARM } else if (memcmp(feature_start, "fastmult", feature_length) == 0) { processor->features |= CPUINFO_ARM_LINUX_FEATURE_FASTMULT; diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c index 23d8439..d3da5a9 100644 --- a/src/arm/linux/init.c +++ b/src/arm/linux/init.c @@ -510,7 +510,7 @@ void cpuinfo_arm_linux_init(void) { uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX; /* Indication whether L3 (if it exists) is shared between all cores */ bool shared_l3 = true; - /* Populate cache infromation structures in l1i, l1d */ + /* Populate cache information structures in l1i, l1d */ for (uint32_t i = 0; i < valid_processors; i++) { if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) { cluster_id += 1; diff --git a/src/arm/linux/midr.c b/src/arm/linux/midr.c index 2c3116b..0d8f03f 100644 --- a/src/arm/linux/midr.c +++ b/src/arm/linux/midr.c @@ -675,10 +675,10 @@ static bool cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic( /* * Initializes MIDR for leaders of core clusters in a single sequential scan: - * - Clusters preceeding the first reported MIDR value are assumed to have default MIDR value. + * - Clusters preceding the first reported MIDR value are assumed to have default MIDR value. * - Clusters following any reported MIDR value to have that MIDR value. * - * @param default_midr - MIDR value that will be assigned to cluster leaders preceeding any reported MIDR value. + * @param default_midr - MIDR value that will be assigned to cluster leaders preceding any reported MIDR value. * @param processors_count - number of logical processor descriptions in the @p processors array. * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency, * and decoded core cluster (package_leader_id) information. @@ -833,7 +833,7 @@ uint32_t cpuinfo_arm_linux_detect_cluster_midr( * 2. For systems with 2 clusters and MIDR known for one cluster, assume big.LITTLE configuration, * and estimate MIDR for the other cluster under assumption that MIDR for the big cluster is known. * 3. Initialize MIDRs for core clusters in a single sequential scan: - * - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value. + * - Clusters preceding the first reported MIDR value are assumed to have the last reported MIDR value. * - Clusters following any reported MIDR value to have that MIDR value. */ diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c index dbea578..6a28b2d 100644 --- a/src/arm/mach/init.c +++ b/src/arm/mach/init.c @@ -15,43 +15,25 @@ #include <cpuinfo/log.h> /* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */ -#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL - #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xE81E7EF6 -#endif #ifndef CPUFAMILY_ARM_VORTEX_TEMPEST - #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F + #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F #endif #ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER - #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2 + #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2 #endif #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3 #endif +#ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD + #define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D +#endif struct cpuinfo_arm_isa cpuinfo_isa = { -#if CPUINFO_ARCH_ARM - .thumb = true, - .thumb2 = true, - .thumbee = false, - .jazelle = false, - .armv5e = true, - .armv6 = true, - .armv6k = true, - .armv7 = true, - .vfpv2 = false, - .vfpv3 = true, - .d32 = true, - .wmmx = false, - .wmmx2 = false, - .neon = true, -#endif -#if CPUINFO_ARCH_ARM64 .aes = true, .sha1 = true, .sha2 = true, .pmull = true, .crc32 = true, -#endif }; static uint32_t get_sys_info(int type_specifier, const char* name) { @@ -83,10 +65,8 @@ static uint32_t get_sys_info_by_name(const char* type_specifier) { return result; } -static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype, uint32_t core_index, uint32_t core_count) { +static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t core_index, uint32_t core_count) { switch (cpu_family) { - case CPUFAMILY_ARM_SWIFT: - return cpuinfo_uarch_swift; case CPUFAMILY_ARM_CYCLONE: return cpuinfo_uarch_cyclone; case CPUFAMILY_ARM_TYPHOON: @@ -107,25 +87,15 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype case CPUFAMILY_ARM_FIRESTORM_ICESTORM: /* Hexa-core: 2x Firestorm + 4x Icestorm; Octa-core: 4x Firestorm + 4x Icestorm */ return core_index + 4 < core_count ? cpuinfo_uarch_firestorm : cpuinfo_uarch_icestorm; + case CPUFAMILY_ARM_AVALANCHE_BLIZZARD: + /* Hexa-core: 2x Avalanche + 4x Blizzard */ + return core_index + 4 < core_count ? cpuinfo_uarch_avalanche : cpuinfo_uarch_blizzard; default: /* Use hw.cpusubtype for detection */ break; } - #if CPUINFO_ARCH_ARM - switch (cpu_subtype) { - case CPU_SUBTYPE_ARM_V7: - return cpuinfo_uarch_cortex_a8; - case CPU_SUBTYPE_ARM_V7F: - return cpuinfo_uarch_cortex_a9; - case CPU_SUBTYPE_ARM_V7K: - return cpuinfo_uarch_cortex_a7; - default: - return cpuinfo_uarch_unknown; - } - #else - return cpuinfo_uarch_unknown; - #endif + return cpuinfo_uarch_unknown; } static void decode_package_name(char* package_name) { @@ -299,71 +269,118 @@ void cpuinfo_arm_mach_init(void) { const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); - const uint32_t cpu_type = get_sys_info_by_name("hw.cputype"); - const uint32_t cpu_subtype = get_sys_info_by_name("hw.cpusubtype"); - switch (cpu_type) { - case CPU_TYPE_ARM64: - cpuinfo_isa.aes = true; - cpuinfo_isa.sha1 = true; - cpuinfo_isa.sha2 = true; - cpuinfo_isa.pmull = true; - cpuinfo_isa.crc32 = true; - break; -#if CPUINFO_ARCH_ARM - case CPU_TYPE_ARM: - switch (cpu_subtype) { - case CPU_SUBTYPE_ARM_V8: - cpuinfo_isa.armv8 = true; - cpuinfo_isa.aes = true; - cpuinfo_isa.sha1 = true; - cpuinfo_isa.sha2 = true; - cpuinfo_isa.pmull = true; - cpuinfo_isa.crc32 = true; - /* Fall-through to add ARMv7S features */ - case CPU_SUBTYPE_ARM_V7S: - case CPU_SUBTYPE_ARM_V7K: - cpuinfo_isa.fma = true; - /* Fall-through to add ARMv7F features */ - case CPU_SUBTYPE_ARM_V7F: - cpuinfo_isa.armv7mp = true; - cpuinfo_isa.fp16 = true; - /* Fall-through to add ARMv7 features */ - case CPU_SUBTYPE_ARM_V7: - break; - default: - break; - } - break; -#endif - } + /* - * Support for ARMv8.1 Atomics & FP16 arithmetic instructions is supposed to be detected via - * sysctlbyname calls with "hw.optional.armv8_1_atomics" and "hw.optional.neon_fp16" arguments - * (see https://devstreaming-cdn.apple.com/videos/wwdc/2018/409t8zw7rumablsh/409/409_whats_new_in_llvm.pdf), - * but on new iOS versions these calls just fail with EPERM. - * - * Thus, we whitelist CPUs known to support these instructions. + * iOS 15 and macOS 12 added sysctls for ARM features, use them where possible. + * Otherwise, fallback to hardcoded set of CPUs with known support. */ - switch (cpu_family) { - case CPUFAMILY_ARM_MONSOON_MISTRAL: - case CPUFAMILY_ARM_VORTEX_TEMPEST: - case CPUFAMILY_ARM_LIGHTNING_THUNDER: - case CPUFAMILY_ARM_FIRESTORM_ICESTORM: - #if CPUINFO_ARCH_ARM64 + const uint32_t has_feat_lse = get_sys_info_by_name("hw.optional.arm.FEAT_LSE"); + if (has_feat_lse != 0) { + cpuinfo_isa.atomics = true; + } else { + // Mandatory in ARMv8.1-A, list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_MONSOON_MISTRAL: + case CPUFAMILY_ARM_VORTEX_TEMPEST: + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: cpuinfo_isa.atomics = true; - #endif - cpuinfo_isa.fp16arith = true; + } } - /* - * There does not yet seem to exist an OS mechanism to detect support for - * ARMv8.2 optional dot-product instructions, so we currently whitelist CPUs - * known to support these instruction. - */ - switch (cpu_family) { - case CPUFAMILY_ARM_LIGHTNING_THUNDER: - case CPUFAMILY_ARM_FIRESTORM_ICESTORM: - cpuinfo_isa.dot = true; + const uint32_t has_feat_rdm = get_sys_info_by_name("hw.optional.arm.FEAT_RDM"); + if (has_feat_rdm != 0) { + cpuinfo_isa.rdm = true; + } else { + // Optional in ARMv8.2-A (implemented in Apple cores), + // list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_MONSOON_MISTRAL: + case CPUFAMILY_ARM_VORTEX_TEMPEST: + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.rdm = true; + } + } + + const uint32_t has_feat_fp16 = get_sys_info_by_name("hw.optional.arm.FEAT_FP16"); + if (has_feat_fp16 != 0) { + cpuinfo_isa.fp16arith = true; + } else { + // Optional in ARMv8.2-A (implemented in Apple cores), + // list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_MONSOON_MISTRAL: + case CPUFAMILY_ARM_VORTEX_TEMPEST: + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.fp16arith = true; + } + } + + const uint32_t has_feat_fhm = get_sys_info_by_name("hw.optional.arm.FEAT_FHM"); + if (has_feat_fhm != 0) { + cpuinfo_isa.fhm = true; + } else { + // Prior to iOS 15, use 'hw.optional.armv8_2_fhm' + const uint32_t has_feat_fhm_legacy = get_sys_info_by_name("hw.optional.armv8_2_fhm"); + if (has_feat_fhm_legacy != 0) { + cpuinfo_isa.fhm = true; + } else { + // Mandatory in ARMv8.4-A when FP16 arithmetics is implemented, + // list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.fhm = true; + } + } + } + + const uint32_t has_feat_bf16 = get_sys_info_by_name("hw.optional.arm.FEAT_BF16"); + if (has_feat_bf16 != 0) { + cpuinfo_isa.bf16 = true; + } + + const uint32_t has_feat_fcma = get_sys_info_by_name("hw.optional.arm.FEAT_FCMA"); + if (has_feat_fcma != 0) { + cpuinfo_isa.fcma = true; + } else { + // Mandatory in ARMv8.3-A, list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.fcma = true; + } + } + + const uint32_t has_feat_jscvt = get_sys_info_by_name("hw.optional.arm.FEAT_JSCVT"); + if (has_feat_jscvt != 0) { + cpuinfo_isa.jscvt = true; + } else { + // Mandatory in ARMv8.3-A, list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.jscvt = true; + } + } + + const uint32_t has_feat_dotprod = get_sys_info_by_name("hw.optional.arm.FEAT_DotProd"); + if (has_feat_dotprod != 0) { + cpuinfo_isa.dot = true; + } else { + // Mandatory in ARMv8.4-A, list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.dot = true; + } + } + + const uint32_t has_feat_i8mm = get_sys_info_by_name("hw.optional.arm.FEAT_I8MM"); + if (has_feat_i8mm != 0) { + cpuinfo_isa.i8mm = true; } uint32_t num_clusters = 1; @@ -374,7 +391,7 @@ void cpuinfo_arm_mach_init(void) { .core_id = i % cores_per_package, .package = packages + i / cores_per_package, .vendor = cpuinfo_vendor_apple, - .uarch = decode_uarch(cpu_family, cpu_subtype, i, mach_topology.cores), + .uarch = decode_uarch(cpu_family, i, mach_topology.cores), }; if (i != 0 && cores[i].uarch != cores[i - 1].uarch) { num_clusters++; diff --git a/src/arm/midr.h b/src/arm/midr.h index 739dc19..b0e244c 100644 --- a/src/arm/midr.h +++ b/src/arm/midr.h @@ -174,23 +174,25 @@ inline static uint32_t midr_score_core(uint32_t midr) { case UINT32_C(0x53000030): /* Exynos M4 */ case UINT32_C(0x53000040): /* Exynos M5 */ case UINT32_C(0x4100D440): /* Cortex-X1 */ - /* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78 */ + case UINT32_C(0x4100D480): /* Cortex-X2 */ + /* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78/-A710 */ return 6; + case UINT32_C(0x4100D080): /* Cortex-A72 */ + case UINT32_C(0x4100D090): /* Cortex-A73 */ + case UINT32_C(0x4100D0A0): /* Cortex-A75 */ + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D410): /* Cortex-A78 */ + case UINT32_C(0x4100D470): /* Cortex-A710 */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x4E000030): /* Denver 2 */ + case UINT32_C(0x51002050): /* Kryo Gold */ + case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */ + case UINT32_C(0x51008020): /* Kryo 385 Gold */ + case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */ case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */ case UINT32_C(0x53000020): /* Exynos M3 */ - case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */ - case UINT32_C(0x51008020): /* Kryo 385 Gold */ - case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */ - case UINT32_C(0x51002050): /* Kryo Gold */ - case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ - case UINT32_C(0x4100D410): /* Cortex-A78 */ - case UINT32_C(0x4100D0D0): /* Cortex-A77 */ - case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ - case UINT32_C(0x4100D0B0): /* Cortex-A76 */ - case UINT32_C(0x4100D0A0): /* Cortex-A75 */ - case UINT32_C(0x4100D090): /* Cortex-A73 */ - case UINT32_C(0x4100D080): /* Cortex-A72 */ #if CPUINFO_ARCH_ARM case UINT32_C(0x4100C0F0): /* Cortex-A15 */ case UINT32_C(0x4100C0E0): /* Cortex-A17 */ @@ -205,8 +207,9 @@ inline static uint32_t midr_score_core(uint32_t midr) { #if CPUINFO_ARCH_ARM64 case UINT32_C(0x4100D060): /* Cortex-A65 */ #endif /* CPUINFO_ARCH_ARM64 */ - case UINT32_C(0x4100D050): /* Cortex-A55 */ case UINT32_C(0x4100D030): /* Cortex-A53 */ + case UINT32_C(0x4100D050): /* Cortex-A55 */ + case UINT32_C(0x4100D460): /* Cortex-A510 */ /* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */ return 2; case UINT32_C(0x4100D040): /* Cortex-A35 */ diff --git a/src/arm/uarch.c b/src/arm/uarch.c index 8b5362b..1d4c6ee 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -91,13 +91,30 @@ void cpuinfo_arm_decode_vendor_uarch( case 0xD0E: /* Cortex-A76AE */ *uarch = cpuinfo_uarch_cortex_a76; break; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD40: + *uarch = cpuinfo_uarch_neoverse_v1; + break; +#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */ case 0xD41: /* Cortex-A78 */ *uarch = cpuinfo_uarch_cortex_a78; break; case 0xD44: /* Cortex-X1 */ *uarch = cpuinfo_uarch_cortex_x1; break; + case 0xD46: /* Cortex-A510 */ + *uarch = cpuinfo_uarch_cortex_a510; + break; + case 0xD47: /* Cortex-A710 */ + *uarch = cpuinfo_uarch_cortex_a710; + break; + case 0xD48: /* Cortex-X2 */ + *uarch = cpuinfo_uarch_cortex_x2; + break; #if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD49: + *uarch = cpuinfo_uarch_neoverse_n2; + break; case 0xD4A: *uarch = cpuinfo_uarch_neoverse_e1; break; diff --git a/src/arm/windows/init-by-logical-sys-info.c b/src/arm/windows/init-by-logical-sys-info.c new file mode 100644 index 0000000..f088011 --- /dev/null +++ b/src/arm/windows/init-by-logical-sys-info.c @@ -0,0 +1,885 @@ +#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <errno.h>
+#include <sys/types.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include "windows-arm-init.h"
+
+#define MAX_NR_OF_CACHES (cpuinfo_cache_level_max - 1)
+
+/* Call chain:
+ * cpu_info_init_by_logical_sys_info
+ * read_packages_for_processors
+ * read_cores_for_processors
+ * read_caches_for_processors
+ * read_all_logical_processor_info_of_relation
+ * parse_relation_processor_info
+ * store_package_info_per_processor
+ * store_core_info_per_processor
+ * parse_relation_cache_info
+ * store_cache_info_per_processor
+ */
+
+static uint32_t count_logical_processors(
+ const uint32_t max_group_count,
+ uint32_t* global_proc_index_per_group);
+
+static uint32_t read_packages_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info);
+
+static uint32_t read_cores_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info);
+
+static uint32_t read_caches_for_processors(
+ struct cpuinfo_processor *processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache *caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info);
+
+static uint32_t read_all_logical_processor_info_of_relation(
+ LOGICAL_PROCESSOR_RELATIONSHIP info_type,
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ struct cpuinfo_core* cores,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info);
+
+static bool parse_relation_processor_info(
+ struct cpuinfo_processor* processors,
+ uint32_t nr_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ const uint32_t info_id,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info);
+
+static bool parse_relation_cache_info(
+ struct cpuinfo_processor* processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info);
+
+static void store_package_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t package_id,
+ const uint32_t group_id,
+ const uint32_t processor_id_in_group);
+
+static void store_core_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t core_id,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info);
+
+static void store_cache_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ struct cpuinfo_cache* current_cache);
+
+static bool connect_packages_cores_clusters_by_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t nr_of_processors,
+ struct cpuinfo_package* packages,
+ const uint32_t nr_of_packages,
+ struct cpuinfo_cluster* clusters,
+ struct cpuinfo_core* cores,
+ const uint32_t nr_of_cores,
+ const struct woa_chip_info* chip_info,
+ enum cpuinfo_vendor vendor);
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity);
+
+
+bool cpu_info_init_by_logical_sys_info(
+ const struct woa_chip_info *chip_info,
+ const enum cpuinfo_vendor vendor)
+{
+ struct cpuinfo_processor* processors = NULL;
+ struct cpuinfo_package* packages = NULL;
+ struct cpuinfo_cluster* clusters = NULL;
+ struct cpuinfo_core* cores = NULL;
+ struct cpuinfo_cache* caches = NULL;
+ struct cpuinfo_uarch_info* uarchs = NULL;
+
+ uint32_t nr_of_packages = 0;
+ uint32_t nr_of_cores = 0;
+ uint32_t nr_of_all_caches = 0;
+ uint32_t numbers_of_caches[MAX_NR_OF_CACHES] = {0};
+
+ uint32_t nr_of_uarchs = 0;
+ bool result = false;
+
+ HANDLE heap = GetProcessHeap();
+
+ /* 1. Count available logical processor groups and processors */
+ const uint32_t max_group_count = (uint32_t) GetMaximumProcessorGroupCount();
+ cpuinfo_log_debug("detected %"PRIu32" processor group(s)", max_group_count);
+ /* We need to store the absolute processor ID offsets for every groups, because
+ * 1. We can't assume every processor groups include the same number of
+ * logical processors.
+ * 2. Every processor groups know its group number and processor IDs within
+ * the group, but not the global processor IDs.
+ * 3. We need to list every logical processors by global IDs.
+ */
+ uint32_t* global_proc_index_per_group =
+ (uint32_t*) HeapAlloc(heap, 0, max_group_count * sizeof(uint32_t));
+ if (global_proc_index_per_group == NULL) {
+ cpuinfo_log_error(
+ "failed to allocate %zu bytes for descriptions of %"PRIu32" processor groups",
+ max_group_count * sizeof(struct cpuinfo_processor), max_group_count);
+ goto clean_up;
+ }
+
+ uint32_t nr_of_processors =
+ count_logical_processors(max_group_count, global_proc_index_per_group);
+ processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_processors * sizeof(struct cpuinfo_processor));
+ if (processors == NULL) {
+ cpuinfo_log_error(
+ "failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
+ nr_of_processors * sizeof(struct cpuinfo_processor), nr_of_processors);
+ goto clean_up;
+ }
+
+ /* 2. Read topology information via MSDN API: packages, cores and caches*/
+ nr_of_packages = read_packages_for_processors(
+ processors, nr_of_processors,
+ global_proc_index_per_group,
+ chip_info);
+ if (!nr_of_packages) {
+ cpuinfo_log_error("error in reading package information");
+ goto clean_up;
+ }
+ cpuinfo_log_debug("detected %"PRIu32" processor package(s)", nr_of_packages);
+
+ /* We need the EfficiencyClass to parse uarch from the core information,
+ * but we need to iterate first to count cores and allocate memory then
+ * we will iterate again to read and store data to cpuinfo_core structures.
+ */
+ nr_of_cores = read_cores_for_processors(
+ processors, nr_of_processors,
+ global_proc_index_per_group, NULL,
+ chip_info);
+ if (!nr_of_cores) {
+ cpuinfo_log_error("error in reading core information");
+ goto clean_up;
+ }
+ cpuinfo_log_debug("detected %"PRIu32" processor core(s)", nr_of_cores);
+
+ /* There is no API to read number of caches, so we need to iterate twice on caches:
+ 1. Count all type of caches -> allocate memory
+ 2. Read out cache data and store to allocated memory
+ */
+ nr_of_all_caches = read_caches_for_processors(
+ processors, nr_of_processors,
+ caches, numbers_of_caches,
+ global_proc_index_per_group, chip_info);
+ if (!nr_of_all_caches) {
+ cpuinfo_log_error("error in reading cache information");
+ goto clean_up;
+ }
+ cpuinfo_log_debug("detected %"PRIu32" processor cache(s)", nr_of_all_caches);
+
+ /* 3. Allocate memory for package, cluster, core and cache structures */
+ packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_packages * sizeof(struct cpuinfo_package));
+ if (packages == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages",
+ nr_of_packages * sizeof(struct cpuinfo_package), nr_of_packages);
+ goto clean_up;
+ }
+
+ /* We don't have cluster information so we explicitly set clusters to equal to cores. */
+ clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_cluster));
+ if (clusters == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
+ nr_of_cores * sizeof(struct cpuinfo_cluster), nr_of_cores);
+ goto clean_up;
+ }
+
+ cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_core));
+ if (cores == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
+ nr_of_cores * sizeof(struct cpuinfo_core), nr_of_cores);
+ goto clean_up;
+ }
+
+ /* We allocate one contiguous cache array for all caches, then use offsets per cache type. */
+ caches = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_all_caches * sizeof(struct cpuinfo_cache));
+ if (caches == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" caches",
+ nr_of_all_caches * sizeof(struct cpuinfo_cache), nr_of_all_caches);
+ goto clean_up;
+ }
+
+ /* 4.Read missing topology information that can't be saved without counted
+ * allocate structures in the first round.
+ */
+ nr_of_all_caches = read_caches_for_processors(
+ processors, nr_of_processors,
+ caches, numbers_of_caches, global_proc_index_per_group, chip_info);
+ if (!nr_of_all_caches) {
+ cpuinfo_log_error("error in reading cache information");
+ goto clean_up;
+ }
+
+ nr_of_cores = read_cores_for_processors(
+ processors, nr_of_processors,
+ global_proc_index_per_group, cores,
+ chip_info);
+ if (!nr_of_cores) {
+ cpuinfo_log_error("error in reading core information");
+ goto clean_up;
+ }
+
+ /* 5. Now that we read out everything from the system we can, fill the package, cluster
+ * and core structures respectively.
+ */
+ result = connect_packages_cores_clusters_by_processors(
+ processors, nr_of_processors,
+ packages, nr_of_packages,
+ clusters,
+ cores, nr_of_cores,
+ chip_info,
+ vendor);
+ if(!result) {
+ cpuinfo_log_error("error in connecting information");
+ goto clean_up;
+ }
+
+ /* 6. Count and store uarchs of cores, assuming same uarchs are neighbors */
+ enum cpuinfo_uarch prev_uarch = cpuinfo_uarch_unknown;
+ for (uint32_t i = 0; i < nr_of_cores; i++) {
+ if (prev_uarch != cores[i].uarch) {
+ nr_of_uarchs++;
+ prev_uarch = cores[i].uarch;
+ }
+ }
+ uarchs = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_uarchs * sizeof(struct cpuinfo_uarch_info));
+ if (uarchs == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs",
+ nr_of_uarchs * sizeof(struct cpuinfo_uarch_info), nr_of_uarchs);
+ goto clean_up;
+ }
+ prev_uarch = cpuinfo_uarch_unknown;
+ for (uint32_t i = 0, uarch_counter = 0; i < nr_of_cores; i++) {
+ if (prev_uarch != cores[i].uarch) {
+ prev_uarch = cores[i].uarch;
+ uarchs[uarch_counter].uarch = cores[i].uarch;
+ uarchs[uarch_counter].core_count = 1;
+ uarchs[uarch_counter].processor_count = cores[i].processor_count;
+ uarch_counter++;
+ } else if (prev_uarch != cpuinfo_uarch_unknown) {
+ uarchs[uarch_counter].core_count++;
+ uarchs[uarch_counter].processor_count += cores[i].processor_count;
+ }
+ }
+
+ /* 7. Commit changes */
+ cpuinfo_processors = processors;
+ cpuinfo_packages = packages;
+ cpuinfo_clusters = clusters;
+ cpuinfo_cores = cores;
+ cpuinfo_uarchs = uarchs;
+
+ cpuinfo_processors_count = nr_of_processors;
+ cpuinfo_packages_count = nr_of_packages;
+ cpuinfo_clusters_count = nr_of_cores;
+ cpuinfo_cores_count = nr_of_cores;
+ cpuinfo_uarchs_count = nr_of_uarchs;
+
+ for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
+ cpuinfo_cache_count[i] = numbers_of_caches[i];
+ }
+ cpuinfo_cache[cpuinfo_cache_level_1i] = caches;
+ cpuinfo_cache[cpuinfo_cache_level_1d] = cpuinfo_cache[cpuinfo_cache_level_1i] + cpuinfo_cache_count[cpuinfo_cache_level_1i];
+ cpuinfo_cache[cpuinfo_cache_level_2] = cpuinfo_cache[cpuinfo_cache_level_1d] + cpuinfo_cache_count[cpuinfo_cache_level_1d];
+ cpuinfo_cache[cpuinfo_cache_level_3] = cpuinfo_cache[cpuinfo_cache_level_2] + cpuinfo_cache_count[cpuinfo_cache_level_2];
+ cpuinfo_cache[cpuinfo_cache_level_4] = cpuinfo_cache[cpuinfo_cache_level_3] + cpuinfo_cache_count[cpuinfo_cache_level_3];
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+ result = true;
+ MemoryBarrier();
+
+ processors = NULL;
+ packages = NULL;
+ clusters = NULL;
+ cores = NULL;
+ caches = NULL;
+ uarchs = NULL;
+
+clean_up:
+ /* The propagated pointers, shouldn't be freed, only in case of error
+ * and unfinished init.
+ */
+ if (processors != NULL) {
+ HeapFree(heap, 0, processors);
+ }
+ if (packages != NULL) {
+ HeapFree(heap, 0, packages);
+ }
+ if (clusters != NULL) {
+ HeapFree(heap, 0, clusters);
+ }
+ if (cores != NULL) {
+ HeapFree(heap, 0, cores);
+ }
+ if (caches != NULL) {
+ HeapFree(heap, 0, caches);
+ }
+ if (uarchs != NULL) {
+ HeapFree(heap, 0, uarchs);
+ }
+
+ /* Free the locally used temporary pointers */
+ HeapFree(heap, 0, global_proc_index_per_group);
+ global_proc_index_per_group = NULL;
+ return result;
+}
+
+static uint32_t count_logical_processors(
+ const uint32_t max_group_count,
+ uint32_t* global_proc_index_per_group)
+{
+ uint32_t nr_of_processors = 0;
+
+ for (uint32_t i = 0; i < max_group_count; i++) {
+ uint32_t nr_of_processors_per_group = GetMaximumProcessorCount((WORD) i);
+ cpuinfo_log_debug("detected %"PRIu32" processor(s) in group %"PRIu32"",
+ nr_of_processors_per_group, i);
+ global_proc_index_per_group[i] = nr_of_processors;
+ nr_of_processors += nr_of_processors_per_group;
+ }
+ return nr_of_processors;
+}
+
+static uint32_t read_packages_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info)
+{
+ return read_all_logical_processor_info_of_relation(
+ RelationProcessorPackage,
+ processors,
+ number_of_processors,
+ NULL,
+ NULL,
+ NULL,
+ global_proc_index_per_group,
+ chip_info);
+}
+
+uint32_t read_cores_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info)
+{
+ return read_all_logical_processor_info_of_relation(
+ RelationProcessorCore,
+ processors,
+ number_of_processors,
+ NULL,
+ NULL,
+ cores,
+ global_proc_index_per_group,
+ chip_info);
+}
+
+static uint32_t read_caches_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info)
+{
+ /* Reset processor start indexes */
+ if (caches) {
+ uint32_t cache_offset = 0;
+ for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
+ for (uint32_t j = 0; j < numbers_of_caches[i]; j++) {
+ caches[cache_offset + j].processor_start = UINT32_MAX;
+ }
+ cache_offset += numbers_of_caches[i];
+ }
+ }
+
+ return read_all_logical_processor_info_of_relation(
+ RelationCache,
+ processors,
+ number_of_processors,
+ caches,
+ numbers_of_caches,
+ NULL,
+ global_proc_index_per_group,
+ chip_info);
+}
+
+static uint32_t read_all_logical_processor_info_of_relation(
+ LOGICAL_PROCESSOR_RELATIONSHIP info_type,
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ struct cpuinfo_core* cores,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info* chip_info)
+{
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX infos = NULL;
+ uint32_t nr_of_structs = 0;
+ DWORD info_size = 0;
+ bool result = false;
+ HANDLE heap = GetProcessHeap();
+
+ /* 1. Query the size of the information structure first */
+ if (GetLogicalProcessorInformationEx(info_type, NULL, &info_size) == FALSE) {
+ const DWORD last_error = GetLastError();
+ if (last_error != ERROR_INSUFFICIENT_BUFFER) {
+ cpuinfo_log_error(
+ "failed to query size of processor %"PRIu32" information information: error %"PRIu32"",
+ (uint32_t)info_type, (uint32_t) last_error);
+ goto clean_up;
+ }
+ }
+ /* 2. Allocate memory for the information structure */
+ infos = HeapAlloc(heap, 0, info_size);
+ if (infos == NULL) {
+ cpuinfo_log_error("failed to allocate %"PRIu32" bytes for logical processor information",
+ (uint32_t) info_size);
+ goto clean_up;
+ }
+ /* 3. Read the information structure */
+ if (GetLogicalProcessorInformationEx(info_type, infos, &info_size) == FALSE) {
+ cpuinfo_log_error("failed to query processor %"PRIu32" information: error %"PRIu32"",
+ (uint32_t)info_type, (uint32_t) GetLastError());
+ goto clean_up;
+ }
+
+ /* 4. Parse the structure and store relevant data */
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info_end =
+ (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) infos + info_size);
+ for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = infos;
+ info < info_end;
+ info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) info + info->Size))
+ {
+ if (info->Relationship != info_type) {
+ cpuinfo_log_warning(
+ "unexpected processor info type (%"PRIu32") for processor information",
+ (uint32_t) info->Relationship);
+ continue;
+ }
+
+ const uint32_t info_id = nr_of_structs++;
+
+ switch(info_type) {
+ case RelationProcessorPackage:
+ result = parse_relation_processor_info(
+ processors,
+ number_of_processors,
+ global_proc_index_per_group,
+ info,
+ info_id,
+ cores,
+ chip_info);
+ break;
+ case RelationProcessorCore:
+ result = parse_relation_processor_info(
+ processors,
+ number_of_processors,
+ global_proc_index_per_group,
+ info,
+ info_id,
+ cores,
+ chip_info);
+ break;
+ case RelationCache:
+ result = parse_relation_cache_info(
+ processors,
+ caches,
+ numbers_of_caches,
+ global_proc_index_per_group,
+ info);
+ break;
+ default:
+ cpuinfo_log_error(
+ "unexpected processor info type (%"PRIu32") for processor information",
+ (uint32_t) info->Relationship);
+ result = false;
+ break;
+ }
+ if (!result) {
+ nr_of_structs = 0;
+ goto clean_up;
+ }
+ }
+clean_up:
+ /* 5. Release dynamically allocated info structure. */
+ HeapFree(heap, 0, infos);
+ infos = NULL;
+ return nr_of_structs;
+}
+
+static bool parse_relation_processor_info(
+ struct cpuinfo_processor* processors,
+ uint32_t nr_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ const uint32_t info_id,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info)
+{
+ for (uint32_t i = 0; i < info->Processor.GroupCount; i++) {
+ const uint32_t group_id = info->Processor.GroupMask[i].Group;
+ /* Bitmask representing processors in this group belonging to this package */
+ KAFFINITY group_processors_mask = info->Processor.GroupMask[i].Mask;
+ while (group_processors_mask != 0) {
+ const uint32_t processor_id_in_group =
+ low_index_from_kaffinity(group_processors_mask);
+ const uint32_t processor_global_index =
+ global_proc_index_per_group[group_id] + processor_id_in_group;
+
+ if(processor_global_index >= nr_of_processors) {
+ cpuinfo_log_error("unexpected processor index %"PRIu32"",
+ processor_global_index);
+ return false;
+ }
+
+ switch(info->Relationship) {
+ case RelationProcessorPackage:
+ store_package_info_per_processor(
+ processors, processor_global_index, info_id,
+ group_id, processor_id_in_group);
+ break;
+ case RelationProcessorCore:
+ store_core_info_per_processor(
+ processors, processor_global_index,
+ info_id, info,
+ cores, chip_info);
+ break;
+ default:
+ cpuinfo_log_error(
+ "unexpected processor info type (%"PRIu32") for processor information",
+ (uint32_t) info->Relationship);
+ break;
+ }
+ /* Clear the bits in affinity mask, lower the least set bit. */
+ group_processors_mask &= (group_processors_mask - 1);
+ }
+ }
+ return true;
+}
+
+static bool parse_relation_cache_info(
+ struct cpuinfo_processor* processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info)
+{
+ static uint32_t l1i_counter = 0;
+ static uint32_t l1d_counter = 0;
+ static uint32_t l2_counter = 0;
+ static uint32_t l3_counter = 0;
+
+ /* Count cache types for allocation at first. */
+ if (caches == NULL) {
+ switch(info->Cache.Level) {
+ case 1:
+ switch (info->Cache.Type) {
+ case CacheInstruction:
+ numbers_of_caches[cpuinfo_cache_level_1i]++;
+ break;
+ case CacheData:
+ numbers_of_caches[cpuinfo_cache_level_1d]++;
+ break;
+ case CacheUnified:
+ break;
+ case CacheTrace:
+ break;
+ default:
+ break;
+ }
+ break;
+ case 2:
+ numbers_of_caches[cpuinfo_cache_level_2]++;
+ break;
+ case 3:
+ numbers_of_caches[cpuinfo_cache_level_3]++;
+ break;
+ }
+ return true;
+ }
+ struct cpuinfo_cache* l1i_base = caches;
+ struct cpuinfo_cache* l1d_base = l1i_base + numbers_of_caches[cpuinfo_cache_level_1i];
+ struct cpuinfo_cache* l2_base = l1d_base + numbers_of_caches[cpuinfo_cache_level_1d];
+ struct cpuinfo_cache* l3_base = l2_base + numbers_of_caches[cpuinfo_cache_level_2];
+
+ cpuinfo_log_debug(
+ "info->Cache.GroupCount:%"PRIu32", info->Cache.GroupMask:%"PRIu32","
+ "info->Cache.Level:%"PRIu32", info->Cache.Associativity:%"PRIu32","
+ "info->Cache.LineSize:%"PRIu32","
+ "info->Cache.CacheSize:%"PRIu32", info->Cache.Type:%"PRIu32"",
+ info->Cache.GroupCount, (unsigned int)info->Cache.GroupMask.Mask,
+ info->Cache.Level, info->Cache.Associativity, info->Cache.LineSize,
+ info->Cache.CacheSize, info->Cache.Type);
+
+ struct cpuinfo_cache* current_cache = NULL;
+ switch (info->Cache.Level) {
+ case 1:
+ switch (info->Cache.Type) {
+ case CacheInstruction:
+ current_cache = l1i_base + l1i_counter;
+ l1i_counter++;
+ break;
+ case CacheData:
+ current_cache = l1d_base + l1d_counter;
+ l1d_counter++;
+ break;
+ case CacheUnified:
+ break;
+ case CacheTrace:
+ break;
+ default:
+ break;
+ }
+ break;
+ case 2:
+ current_cache = l2_base + l2_counter;
+ l2_counter++;
+ break;
+ case 3:
+ current_cache = l3_base + l3_counter;
+ l3_counter++;
+ break;
+ }
+ current_cache->size = info->Cache.CacheSize;
+ current_cache->line_size = info->Cache.LineSize;
+ current_cache->associativity = info->Cache.Associativity;
+ /* We don't have partition and set information of caches on Windows,
+ * so we set partitions to 1 and calculate the expected sets.
+ */
+ current_cache->partitions = 1;
+ current_cache->sets =
+ current_cache->size / current_cache->line_size / current_cache->associativity;
+ if (info->Cache.Type == CacheUnified) {
+ current_cache->flags = CPUINFO_CACHE_UNIFIED;
+ }
+
+ for (uint32_t i = 0; i <= info->Cache.GroupCount; i++) {
+ /* Zero GroupCount is valid, GroupMask still can store bits set. */
+ const uint32_t group_id = info->Cache.GroupMasks[i].Group;
+ /* Bitmask representing processors in this group belonging to this package */
+ KAFFINITY group_processors_mask = info->Cache.GroupMasks[i].Mask;
+ while (group_processors_mask != 0) {
+ const uint32_t processor_id_in_group =
+ low_index_from_kaffinity(group_processors_mask);
+ const uint32_t processor_global_index =
+ global_proc_index_per_group[group_id] + processor_id_in_group;
+
+ store_cache_info_per_processor(
+ processors, processor_global_index,
+ info, current_cache);
+
+ /* Clear the bits in affinity mask, lower the least set bit. */
+ group_processors_mask &= (group_processors_mask - 1);
+ }
+ }
+ return true;
+}
+
+static void store_package_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t package_id,
+ const uint32_t group_id,
+ const uint32_t processor_id_in_group)
+{
+ processors[processor_global_index].windows_group_id =
+ (uint16_t) group_id;
+ processors[processor_global_index].windows_processor_id =
+ (uint16_t) processor_id_in_group;
+
+ /* As we're counting the number of packages now, we haven't allocated memory for
+ * cpuinfo_packages yet, so we only set the package pointer's offset now.
+ */
+ processors[processor_global_index].package =
+ (const struct cpuinfo_package*) NULL + package_id;
+}
+
+void store_core_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t core_id,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info)
+{
+ if (cores) {
+ processors[processor_global_index].core = cores + core_id;
+ cores[core_id].core_id = core_id;
+ get_core_uarch_for_efficiency(
+ chip_info->chip_name, core_info->Processor.EfficiencyClass,
+ &(cores[core_id].uarch), &(cores[core_id].frequency));
+
+ /* We don't have cluster information, so we handle it as
+ * fixed 1 to (cluster / cores).
+ * Set the cluster offset ID now, as soon as we have the
+ * cluster base address, we'll set the absolute address.
+ */
+ processors[processor_global_index].cluster =
+ (const struct cpuinfo_cluster*) NULL + core_id;
+ }
+}
+
+static void store_cache_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ struct cpuinfo_cache* current_cache)
+{
+ if (current_cache->processor_start > processor_global_index) {
+ current_cache->processor_start = processor_global_index;
+ }
+ current_cache->processor_count++;
+
+ switch(info->Cache.Level) {
+ case 1:
+ switch (info->Cache.Type) {
+ case CacheInstruction:
+ processors[processor_global_index].cache.l1i = current_cache;
+ break;
+ case CacheData:
+ processors[processor_global_index].cache.l1d = current_cache;
+ break;
+ case CacheUnified:
+ break;
+ case CacheTrace:
+ break;
+ default:
+ break;
+ }
+ break;
+ case 2:
+ processors[processor_global_index].cache.l2 = current_cache;
+ break;
+ case 3:
+ processors[processor_global_index].cache.l3 = current_cache;
+ break;
+ }
+}
+
+static bool connect_packages_cores_clusters_by_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t nr_of_processors,
+ struct cpuinfo_package* packages,
+ const uint32_t nr_of_packages,
+ struct cpuinfo_cluster* clusters,
+ struct cpuinfo_core* cores,
+ const uint32_t nr_of_cores,
+ const struct woa_chip_info* chip_info,
+ enum cpuinfo_vendor vendor)
+{
+ /* Adjust core and package pointers for all logical processors. */
+ for (uint32_t i = nr_of_processors; i != 0; i--) {
+ const uint32_t processor_id = i - 1;
+ struct cpuinfo_processor* processor = processors + processor_id;
+
+ struct cpuinfo_core* core = (struct cpuinfo_core*)processor->core;
+
+ /* We stored the offset of pointers when we haven't allocated memory
+ * for packages and clusters, so now add offsets to base addresses.
+ */
+ struct cpuinfo_package* package =
+ (struct cpuinfo_package*) ((uintptr_t) packages + (uintptr_t) processor->package);
+ if (package < packages ||
+ package >= (packages + nr_of_packages)) {
+ cpuinfo_log_error("invalid package indexing");
+ return false;
+ }
+ processor->package = package;
+
+ struct cpuinfo_cluster* cluster =
+ (struct cpuinfo_cluster*) ((uintptr_t) clusters + (uintptr_t) processor->cluster);
+ if (cluster < clusters ||
+ cluster >= (clusters + nr_of_cores)) {
+ cpuinfo_log_error("invalid cluster indexing");
+ return false;
+ }
+ processor->cluster = cluster;
+
+ if (chip_info) {
+ strncpy_s(package->name, CPUINFO_PACKAGE_NAME_MAX, chip_info->chip_name_string,
+ strnlen(chip_info->chip_name_string, CPUINFO_PACKAGE_NAME_MAX));
+ }
+
+ /* Set start indexes and counts per packages / clusters / cores - going backwards */
+
+ /* This can be overwritten by lower-index processors on the same package. */
+ package->processor_start = processor_id;
+ package->processor_count++;
+
+ /* This can be overwritten by lower-index processors on the same cluster. */
+ cluster->processor_start = processor_id;
+ cluster->processor_count++;
+
+ /* This can be overwritten by lower-index processors on the same core. */
+ core->processor_start = processor_id;
+ core->processor_count++;
+ }
+ /* Fill cores */
+ for (uint32_t i = nr_of_cores; i != 0; i--) {
+ const uint32_t global_core_id = i - 1;
+ struct cpuinfo_core* core = cores + global_core_id;
+ const struct cpuinfo_processor* processor = processors + core->processor_start;
+ struct cpuinfo_package* package = (struct cpuinfo_package*) processor->package;
+ struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*) processor->cluster;
+
+ core->package = package;
+ core->cluster = cluster;
+ core->vendor = vendor;
+
+ /* This can be overwritten by lower-index cores on the same cluster/package. */
+ cluster->core_start = global_core_id;
+ cluster->core_count++;
+ package->core_start = global_core_id;
+ package->core_count++;
+ package->cluster_start = global_core_id;
+ package->cluster_count = package->core_count;
+
+ cluster->package = package;
+ cluster->vendor = cores[cluster->core_start].vendor;
+ cluster->uarch = cores[cluster->core_start].uarch;
+ cluster->frequency = cores[cluster->core_start].frequency;
+ }
+ return true;
+}
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) {
+ unsigned long index;
+ _BitScanForward64(&index, (unsigned __int64) kaffinity);
+ return (uint32_t) index;
+}
diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c new file mode 100644 index 0000000..8effc15 --- /dev/null +++ b/src/arm/windows/init.c @@ -0,0 +1,253 @@ +#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include "windows-arm-init.h"
+
+/* Efficiency class = 0 means little core, while 1 means big core for now */
+#define MAX_WOA_VALID_EFFICIENCY_CLASSES 2
+#define VENDOR_NAME_MAX CPUINFO_PACKAGE_NAME_MAX
+
+struct cpuinfo_arm_isa cpuinfo_isa;
+
+static void set_cpuinfo_isa_fields(void);
+static bool get_system_info_from_registry(
+ struct woa_chip_info** chip_info,
+ enum cpuinfo_vendor* vendor);
+
+struct vendor_info {
+ char vendor_name[VENDOR_NAME_MAX];
+ enum cpuinfo_vendor vendor;
+};
+
+/* Please add new vendor here! */
+static struct vendor_info vendors[] = {
+ {
+ "Qualcomm",
+ cpuinfo_vendor_qualcomm
+ }
+};
+
+/* Please add new SoC/chip info here! */
+static struct woa_chip_info woa_chips[] = {
+ /* Microsoft SQ1 Kryo 495 4 + 4 cores (3 GHz + 1.80 GHz) */
+ {
+ "Microsoft SQ1",
+ woa_chip_name_microsoft_sq_1,
+ {
+ {
+ cpuinfo_uarch_cortex_a55,
+ 1800000000,
+ },
+ {
+ cpuinfo_uarch_cortex_a76,
+ 3000000000,
+ }
+ }
+ },
+ /* Microsoft SQ2 Kryo 495 4 + 4 cores (3.15 GHz + 2.42 GHz) */
+ {
+ "Microsoft SQ2",
+ woa_chip_name_microsoft_sq_2,
+ {
+ {
+ cpuinfo_uarch_cortex_a55,
+ 2420000000,
+ },
+ {
+ cpuinfo_uarch_cortex_a76,
+ 3150000000
+ }
+ }
+ }
+};
+
+BOOL CALLBACK cpuinfo_arm_windows_init(
+ PINIT_ONCE init_once, PVOID parameter, PVOID* context)
+{
+ struct woa_chip_info *chip_info = NULL;
+ enum cpuinfo_vendor vendor = cpuinfo_vendor_unknown;
+ bool result = false;
+
+ set_cpuinfo_isa_fields();
+ result = get_system_info_from_registry(&chip_info, &vendor);
+ result &= cpu_info_init_by_logical_sys_info(chip_info, vendor);
+ cpuinfo_is_initialized = result;
+ return ((result == true) ? TRUE : FALSE);
+}
+
+bool get_core_uarch_for_efficiency(
+ enum woa_chip_name chip, BYTE EfficiencyClass,
+ enum cpuinfo_uarch* uarch, uint64_t* frequency)
+{
+ /* For currently supported WoA chips, the Efficiency class selects
+ * the pre-defined little and big core.
+ * Any further supported SoC's logic should be implemented here.
+ */
+ if (uarch && frequency && chip < woa_chip_name_last &&
+ EfficiencyClass < MAX_WOA_VALID_EFFICIENCY_CLASSES) {
+ *uarch = woa_chips[chip].uarchs[EfficiencyClass].uarch;
+ *frequency = woa_chips[chip].uarchs[EfficiencyClass].frequency;
+ return true;
+ }
+ return false;
+}
+
+/* Static helper functions */
+
+static bool read_registry(
+ LPCTSTR subkey,
+ LPCTSTR value,
+ char** textBuffer)
+{
+ DWORD keyType = 0;
+ DWORD dataSize = 0;
+ const DWORD flags = RRF_RT_REG_SZ; /* Only read strings (REG_SZ) */
+ LSTATUS result = 0;
+ HANDLE heap = GetProcessHeap();
+
+ result = RegGetValue(
+ HKEY_LOCAL_MACHINE,
+ subkey,
+ value,
+ flags,
+ &keyType,
+ NULL, /* Request buffer size */
+ &dataSize);
+ if (result != 0 || dataSize == 0) {
+ cpuinfo_log_error("Registry entry size read error");
+ return false;
+ }
+
+ if (*textBuffer) {
+ HeapFree(heap, 0, *textBuffer);
+ }
+ *textBuffer = HeapAlloc(heap, HEAP_ZERO_MEMORY, dataSize);
+ if (*textBuffer == NULL) {
+ cpuinfo_log_error("Registry textbuffer allocation error");
+ return false;
+ }
+
+ result = RegGetValue(
+ HKEY_LOCAL_MACHINE,
+ subkey,
+ value,
+ flags,
+ NULL,
+ *textBuffer, /* Write string in this destination buffer */
+ &dataSize);
+ if (result != 0) {
+ cpuinfo_log_error("Registry read error");
+ return false;
+ }
+ return true;
+}
+
+static bool get_system_info_from_registry(
+ struct woa_chip_info** chip_info,
+ enum cpuinfo_vendor* vendor)
+{
+ bool result = false;
+ char* textBuffer = NULL;
+ LPCTSTR cpu0_subkey =
+ (LPCTSTR)"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0";
+ LPCTSTR chip_name_value = (LPCTSTR)"ProcessorNameString";
+ LPCTSTR vendor_name_value = (LPCTSTR)"VendorIdentifier";
+
+ *chip_info = NULL;
+ *vendor = cpuinfo_vendor_unknown;
+ HANDLE heap = GetProcessHeap();
+
+ /* 1. Read processor model name from registry and find in the hard-coded list. */
+ if (!read_registry(cpu0_subkey, chip_name_value, &textBuffer)) {
+ cpuinfo_log_error("Registry read error");
+ goto cleanup;
+ }
+ for (uint32_t i = 0; i < (uint32_t) woa_chip_name_last; i++) {
+ size_t compare_length = strnlen(woa_chips[i].chip_name_string, CPUINFO_PACKAGE_NAME_MAX);
+ int compare_result = strncmp(textBuffer, woa_chips[i].chip_name_string, compare_length);
+ if (compare_result == 0) {
+ *chip_info = woa_chips+i;
+ break;
+ }
+ }
+ if (*chip_info == NULL) {
+ cpuinfo_log_error("Unknown chip model name.\n Please add new Windows on Arm SoC/chip support!");
+ goto cleanup;
+ }
+ cpuinfo_log_debug("detected chip model name: %s", (**chip_info).chip_name_string);
+
+ /* 2. Read vendor/manufacturer name from registry. */
+ if (!read_registry(cpu0_subkey, vendor_name_value, &textBuffer)) {
+ cpuinfo_log_error("Registry read error");
+ goto cleanup;
+ }
+
+ for (uint32_t i = 0; i < (sizeof(vendors) / sizeof(struct vendor_info)); i++) {
+ if (strncmp(textBuffer, vendors[i].vendor_name,
+ strlen(vendors[i].vendor_name)) == 0) {
+ *vendor = vendors[i].vendor;
+ result = true;
+ break;
+ }
+ }
+ if (*vendor == cpuinfo_vendor_unknown) {
+ cpuinfo_log_error("Unexpected vendor: %s", textBuffer);
+ }
+
+cleanup:
+ HeapFree(heap, 0, textBuffer);
+ textBuffer = NULL;
+ return result;
+}
+
+static void set_cpuinfo_isa_fields(void)
+{
+ bool armv8 = IsProcessorFeaturePresent(PF_ARM_V8_INSTRUCTIONS_AVAILABLE);
+ bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
+ bool load_store_atomic = IsProcessorFeaturePresent(PF_ARM_64BIT_LOADSTORE_ATOMIC);
+ bool float_multiply_accumulate = IsProcessorFeaturePresent(PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE);
+ bool crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
+ bool float_emulated = IsProcessorFeaturePresent(PF_FLOATING_POINT_EMULATED);
+
+ /* Read all Arm related Windows features for debug purposes, even if we can't
+ * pair Arm ISA feature to that now.
+ */
+#if CPUINFO_LOG_DEBUG_PARSERS
+ bool divide = IsProcessorFeaturePresent(PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE);
+ bool ext_cache = IsProcessorFeaturePresent(PF_ARM_EXTERNAL_CACHE_AVAILABLE);
+ bool vfp_registers = IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE);
+ bool arm_v81 = IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE);
+
+ cpuinfo_log_debug("divide present: %d", divide);
+ cpuinfo_log_debug("ext_cache present: %d", ext_cache);
+ cpuinfo_log_debug("vfp_registers present: %d", vfp_registers);
+ cpuinfo_log_debug("arm_v81 present: %d", arm_v81);
+#endif
+
+ cpuinfo_log_debug("armv8 present: %d", armv8);
+ cpuinfo_log_debug("crypto present: %d", crypto);
+ cpuinfo_log_debug("load_store_atomic present: %d", load_store_atomic);
+ cpuinfo_log_debug("float_multiply_accumulate present: %d", float_multiply_accumulate);
+ cpuinfo_log_debug("crc32 present: %d", crc32);
+ cpuinfo_log_debug("float_emulated: %d", float_emulated);
+
+#if CPUINFO_ARCH_ARM
+ cpuinfo_isa.armv8 = armv8;
+#endif
+#if CPUINFO_ARCH_ARM64
+ cpuinfo_isa.atomics = load_store_atomic;
+#endif
+ cpuinfo_isa.crc32 = crc32;
+ /* Windows API reports all or nothing for cryptographic instructions. */
+ cpuinfo_isa.aes = crypto;
+ cpuinfo_isa.sha1 = crypto;
+ cpuinfo_isa.sha2 = crypto;
+ cpuinfo_isa.pmull = crypto;
+ cpuinfo_isa.fp16arith = !float_emulated && float_multiply_accumulate;
+}
diff --git a/src/arm/windows/windows-arm-init.h b/src/arm/windows/windows-arm-init.h new file mode 100644 index 0000000..76cc51e --- /dev/null +++ b/src/arm/windows/windows-arm-init.h @@ -0,0 +1,32 @@ +#pragma once
+
+/* List of known and supported Windows on Arm SoCs/chips. */
+enum woa_chip_name {
+ woa_chip_name_microsoft_sq_1 = 0,
+ woa_chip_name_microsoft_sq_2 = 1,
+ woa_chip_name_unknown = 2,
+ woa_chip_name_last = woa_chip_name_unknown
+};
+
+/* Topology information hard-coded by SoC/chip name */
+struct core_info_by_chip_name {
+ enum cpuinfo_uarch uarch;
+ uint64_t frequency; /* Hz */
+};
+
+/* SoC/chip info that's currently not readable by logical system information,
+ * but can be read from registry.
+ */
+struct woa_chip_info {
+ char* chip_name_string;
+ enum woa_chip_name chip_name;
+ struct core_info_by_chip_name uarchs[woa_chip_name_last];
+};
+
+bool get_core_uarch_for_efficiency(
+ enum woa_chip_name chip, BYTE EfficiencyClass,
+ enum cpuinfo_uarch* uarch, uint64_t* frequency);
+
+bool cpu_info_init_by_logical_sys_info(
+ const struct woa_chip_info *chip_info,
+ enum cpuinfo_vendor vendor);
diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h index 9c23d7c..c04620e 100644 --- a/src/cpuinfo/internal-api.h +++ b/src/cpuinfo/internal-api.h @@ -51,7 +51,11 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size; CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void); CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void); #if defined(_WIN32) || defined(__CYGWIN__) - CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context); + #if CPUINFO_ARCH_ARM64 + CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context); + #else + CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context); + #endif #endif CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void); CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void); @@ -37,6 +37,8 @@ bool CPUINFO_ABI cpuinfo_initialize(void) { pthread_once(&init_guard, &cpuinfo_arm_linux_init); #elif defined(__MACH__) && defined(__APPLE__) pthread_once(&init_guard, &cpuinfo_arm_mach_init); + #elif defined(_WIN32) + InitOnceExecuteOnce(&init_guard, &cpuinfo_arm_windows_init, NULL, NULL); #else cpuinfo_log_error("operating system is not supported in cpuinfo"); #endif diff --git a/src/x86/name.c b/src/x86/name.c index a7cc7c6..957a0d8 100644 --- a/src/x86/name.c +++ b/src/x86/name.c @@ -234,7 +234,7 @@ static bool transform_token(char* token_start, char* token_end, struct parser_st return true; } /* - * Erase everywhing after "SOC" on AMD System-on-Chips, e.g. + * Erase everything after "SOC" on AMD System-on-Chips, e.g. * "AMD GX-212JC SOC with Radeon(TM) R2E Graphics \0" */ if (erase_matching(token_start, token_length, "SOC")) { |