aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/arm/cache.c61
-rw-r--r--src/arm/linux/aarch32-isa.c9
-rw-r--r--src/arm/linux/aarch64-isa.c18
-rw-r--r--src/arm/linux/chipset.c94
-rw-r--r--src/arm/linux/clusters.c20
-rw-r--r--src/arm/linux/cpuinfo.c8
-rw-r--r--src/arm/linux/init.c2
-rw-r--r--src/arm/linux/midr.c6
-rw-r--r--src/arm/mach/init.c219
-rw-r--r--src/arm/midr.h31
-rw-r--r--src/arm/uarch.c17
-rw-r--r--src/arm/windows/init-by-logical-sys-info.c885
-rw-r--r--src/arm/windows/init.c253
-rw-r--r--src/arm/windows/windows-arm-init.h32
-rw-r--r--src/cpuinfo/internal-api.h6
-rw-r--r--src/init.c2
-rw-r--r--src/x86/name.c2
17 files changed, 1525 insertions, 140 deletions
diff --git a/src/arm/cache.c b/src/arm/cache.c
index 446b02b..1a6dd38 100644
--- a/src/arm/cache.c
+++ b/src/arm/cache.c
@@ -535,6 +535,7 @@ void cpuinfo_arm_decode_cache(
l2_size = 1024 * 1024;
break;
case 660:
+ case 662:
/* Snapdragon 660: 1 MB L2 (little cores only) */
l2_size = 1024 * 1024;
break;
@@ -1238,6 +1239,63 @@ void cpuinfo_arm_decode_cache(
};
break;
}
+ case cpuinfo_uarch_neoverse_n1:
+ case cpuinfo_uarch_neoverse_v1:
+ case cpuinfo_uarch_neoverse_n2:
+ {
+ /*
+ * ARM Neoverse-n1 Core Technical Reference Manual
+ * A6.1. About the L1 memory system
+ * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB.
+ *
+ * A6.1.1 L1 instruction-side memory system
+ * The L1 instruction memory system has the following key features:
+ * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
+ * Physically Tagged (PIPT) 4-way set-associative L1 data cache.
+ * - Fixed cache line length of 64 bytes.
+ *
+ * A6.1.2 L1 data-side memory system
+ * The L1 data memory system has the following features:
+ * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
+ * Physically Tagged (PIPT) 4-way set-associative L1 data cache.
+ * - Fixed cache line length of 64 bytes.
+ * - Pseudo-LRU cache replacement policy.
+ *
+ * A7.1 About the L2 memory system
+ * The L2 memory subsystem consist of:
+ * - An 8-way set associative L2 cache with a configurable size of 256KB, 512KB, or 1024KB. Cache lines
+ * have a fixed length of 64 bytes.
+ * - Strictly inclusive with L1 data cache.
+ * - When configured with instruction cache hardware coherency, strictly inclusive with L1 instruction cache.
+ * - When configured without instruction cache hardware coherency, weakly inclusive with L1 instruction cache.
+ */
+
+ const uint32_t min_l2_size_KB= 256;
+ const uint32_t min_l3_size_KB = 0;
+
+ *l1i = (struct cpuinfo_cache) {
+ .size = 64 * 1024,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l1d = (struct cpuinfo_cache) {
+ .size = 64 * 1024,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l2 = (struct cpuinfo_cache) {
+ .size = min_l2_size_KB * 1024,
+ .associativity = 8,
+ .line_size = 64,
+ .flags = CPUINFO_CACHE_INCLUSIVE,
+ };
+ *l3 = (struct cpuinfo_cache) {
+ .size = min_l3_size_KB * 1024,
+ .associativity = 16,
+ .line_size = 64,
+ };
+ break;
+ }
#if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__)
case cpuinfo_uarch_scorpion:
/*
@@ -1655,6 +1713,9 @@ uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* proc
*/
return 8 * 1024 * 1024;
case cpuinfo_uarch_cortex_a55:
+ case cpuinfo_uarch_neoverse_n1:
+ case cpuinfo_uarch_neoverse_v1:
+ case cpuinfo_uarch_neoverse_n2:
case cpuinfo_uarch_cortex_a75:
case cpuinfo_uarch_cortex_a76:
case cpuinfo_uarch_exynos_m4:
diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c
index df68aa1..fb95ee9 100644
--- a/src/arm/linux/aarch32-isa.c
+++ b/src/arm/linux/aarch32-isa.c
@@ -64,6 +64,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
* - Processors with Exynos M4 cores
* - Processors with Exynos M5 cores
* - Neoverse N1 cores
+ * - Neoverse V1 cores
+ * - Neoverse N2 cores
*/
if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
/* Only little cores of Exynos 9810 support FP16 & RDM */
@@ -73,9 +75,11 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
case UINT32_C(0x4100D050): /* Cortex-A55 */
case UINT32_C(0x4100D060): /* Cortex-A65 */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
- case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ case UINT32_C(0x4100D460): /* Cortex-A510 */
+ case UINT32_C(0x4100D470): /* Cortex-A710 */
+ case UINT32_C(0x4100D480): /* Cortex-X2 */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
@@ -98,6 +102,9 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+ case UINT32_C(0x4100D460): /* Cortex-A510 */
+ case UINT32_C(0x4100D470): /* Cortex-A710 */
+ case UINT32_C(0x4100D480): /* Cortex-X2 */
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
case UINT32_C(0x53000030): /* Exynos-M4 */
diff --git a/src/arm/linux/aarch64-isa.c b/src/arm/linux/aarch64-isa.c
index 2000e1a..44a8f4d 100644
--- a/src/arm/linux/aarch64-isa.c
+++ b/src/arm/linux/aarch64-isa.c
@@ -41,6 +41,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
* - Processors with Exynos M4 cores
* - Processors with Exynos M5 cores
* - Neoverse N1 cores
+ * - Neoverse V1 cores
+ * - Neoverse N2 cores
*/
if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
/* Exynos 9810 reports that it supports FP16 compute, but in fact only little cores do */
@@ -54,6 +56,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ case UINT32_C(0x4100D400): /* Neoverse V1 */
+ case UINT32_C(0x4100D490): /* Neoverse N2 */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
@@ -78,6 +82,9 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
break;
}
}
+ if (features2 & CPUINFO_ARM_LINUX_FEATURE2_I8MM) {
+ isa->i8mm = true;
+ }
/*
* Many phones ship with an old kernel configuration that doesn't report UDOT/SDOT instructions.
@@ -89,6 +96,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ case UINT32_C(0x4100D400): /* Neoverse V1 */
+ case UINT32_C(0x4100D490): /* Neoverse N2 */
case UINT32_C(0x4100D4A0): /* Neoverse E1 */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
@@ -124,4 +133,13 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) {
isa->sve2 = true;
}
+ // SVEBF16 is set iff SVE and BF16 are both supported, but the SVEBF16 feature flag
+ // was added in Linux kernel before the BF16 feature flag, so we check for either.
+ if (features2 & (CPUINFO_ARM_LINUX_FEATURE2_BF16 | CPUINFO_ARM_LINUX_FEATURE2_SVEBF16)) {
+ isa->bf16 = true;
+ }
+ if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM) {
+ isa->fhm = true;
+ }
}
+
diff --git a/src/arm/linux/chipset.c b/src/arm/linux/chipset.c
index e36283c..f2a002d 100644
--- a/src/arm/linux/chipset.c
+++ b/src/arm/linux/chipset.c
@@ -1,3 +1,4 @@
+#include <ctype.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -281,6 +282,82 @@ static bool match_sm(
return true;
}
+
+struct special_map_entry {
+ const char* platform;
+ uint16_t model;
+ uint8_t series;
+ char suffix;
+};
+
+static const struct special_map_entry qualcomm_hardware_map_entries[] = {
+ {
+ /* "Kona" -> Qualcomm Kona */
+ .platform = "Kona",
+ .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+ .model = 865,
+ },
+ {
+ /* "Bengal" -> Qualcomm Bengal */
+ .platform = "Bengal",
+ .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+ .model = 662,
+ },
+ {
+ /* "Bengalp" -> Qualcomm Bengalp */
+ .platform = "Bengalp",
+ .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+ .model = 662,
+ },
+ {
+ /* "Lito" -> Qualcomm Lito */
+ .platform = "Lito",
+ .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+ .model = 765,
+ .suffix = 'G'
+ },
+ {
+ /* "Lagoon" -> Qualcomm Lagoon */
+ .platform = "Lagoon",
+ .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+ .model = 0,
+ },
+};
+
+
+int strcicmp(char const *a, char const *b)
+{
+ for (;; a++, b++) {
+ int d = tolower((unsigned char)*a) - tolower((unsigned char)*b);
+ if (d != 0 || !*a)
+ return d;
+ }
+}
+
+static bool match_qualcomm_special(
+ const char* start, const char* end,
+ struct cpuinfo_arm_chipset chipset[restrict static 1])
+{
+ for (size_t i = 0; i < CPUINFO_COUNT_OF(qualcomm_hardware_map_entries); i++) {
+ int length = end - start;
+ if (strcicmp(qualcomm_hardware_map_entries[i].platform, start) == 0 &&
+ qualcomm_hardware_map_entries[i].platform[length] == 0)
+ {
+ *chipset = (struct cpuinfo_arm_chipset) {
+ .vendor = chipset_series_vendor[qualcomm_hardware_map_entries[i].series],
+ .series = (enum cpuinfo_arm_chipset_series) qualcomm_hardware_map_entries[i].series,
+ .model = qualcomm_hardware_map_entries[i].model,
+ .suffix = {
+ [0] = qualcomm_hardware_map_entries[i].suffix,
+ },
+ };
+ return true;
+ }
+ }
+ return false;
+
+}
+
/**
* Tries to match /Samsung Exynos\d{4}$/ signature (case-insensitive) for Samsung Exynos chipsets.
* If match successful, extracts model information into \p chipset argument.
@@ -1351,7 +1428,7 @@ static bool match_and_parse_sunxi(
return false;
}
- /* Compare sunXi platform id and number of cores to tabluted values to decode chipset name */
+ /* Compare sunXi platform id and number of cores to tabulated values to decode chipset name */
uint32_t model = 0;
char suffix = 0;
for (size_t i = 0; i < CPUINFO_COUNT_OF(sunxi_map_entries); i++) {
@@ -1752,13 +1829,6 @@ static bool is_tegra(const char* start, const char* end) {
return (length == 5 || start[5] == '3');
}
-struct special_map_entry {
- const char* platform;
- uint16_t model;
- uint8_t series;
- char suffix;
-};
-
static const struct special_map_entry special_hardware_map_entries[] = {
#if CPUINFO_ARCH_ARM
{
@@ -2317,6 +2387,14 @@ struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_ha
(int) hardware_length, hardware);
return chipset;
}
+
+ if (match_qualcomm_special(pos, hardware_end, &chipset)) {
+ cpuinfo_log_debug(
+ "matched Qualcomm signature in /proc/cpuinfo Hardware string \"%.*s\"",
+ (int) hardware_length, hardware);
+ return chipset;
+ }
+
}
word_start = false;
break;
diff --git a/src/arm/linux/clusters.c b/src/arm/linux/clusters.c
index c7a4045..430773d 100644
--- a/src/arm/linux/clusters.c
+++ b/src/arm/linux/clusters.c
@@ -48,7 +48,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
* @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags.
* @param max_processors - number of elements in the @p processors array.
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
- * frequency, MIDR infromation, and core cluster (package siblings list) information.
+ * frequency, MIDR information, and core cluster (package siblings list) information.
*
* @retval true if the heuristic successfully assigned all processors into clusters of cores.
* @retval false if known details about processors contradict the heuristic configuration of core clusters.
@@ -292,9 +292,9 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
* - Processors assigned to these clusters stay assigned to the same clusters
* - No new processors are added to these clusters
* - Processors without pre-assigned cluster are clustered in one sequential scan:
- * - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceeding
- * processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceeding processor.
- * - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceeding
+ * - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceding
+ * processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceding processor.
+ * - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceding
* processor, the processor is assigned to a newly created cluster.
*
* The function must be called after parsing OS-provided information on core clusters, and usually is called only
@@ -309,7 +309,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
*
* @param max_processors - number of elements in the @p processors array.
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
- * frequency, MIDR infromation, and core cluster (package siblings list) information.
+ * frequency, MIDR information, and core cluster (package siblings list) information.
*
* @retval true if the heuristic successfully assigned all processors into clusters of cores.
* @retval false if known details about processors contradict the heuristic configuration of core clusters.
@@ -331,7 +331,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
if (cluster_min_frequency != processors[i].min_frequency) {
cpuinfo_log_info(
- "minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); "
+ "minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceding cluster (%"PRIu32" KHz); "
"processor %"PRIu32" starts to a new cluster",
i, processors[i].min_frequency, cluster_min_frequency, i);
goto new_cluster;
@@ -346,7 +346,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
if (cluster_max_frequency != processors[i].max_frequency) {
cpuinfo_log_debug(
- "maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); "
+ "maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceding cluster (%"PRIu32" KHz); "
"processor %"PRIu32" starts a new cluster",
i, processors[i].max_frequency, cluster_max_frequency, i);
goto new_cluster;
@@ -361,7 +361,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) {
cpuinfo_log_debug(
- "CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceeding cluster (0x%02"PRIx32"); "
+ "CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceding cluster (0x%02"PRIx32"); "
"processor %"PRIu32" starts to a new cluster",
i, midr_get_implementer(processors[i].midr), midr_get_implementer(cluster_midr), i);
goto new_cluster;
@@ -417,11 +417,11 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
}
}
- /* All checks passed, attach processor to the preceeding cluster */
+ /* All checks passed, attach processor to the preceding cluster */
cluster_processors++;
processors[i].package_leader_id = cluster_start;
processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
- cpuinfo_log_debug("assigned processor %"PRIu32" to preceeding cluster of processor %"PRIu32, i, cluster_start);
+ cpuinfo_log_debug("assigned processor %"PRIu32" to preceding cluster of processor %"PRIu32, i, cluster_start);
continue;
new_cluster:
diff --git a/src/arm/linux/cpuinfo.c b/src/arm/linux/cpuinfo.c
index 90e1631..817da12 100644
--- a/src/arm/linux/cpuinfo.c
+++ b/src/arm/linux/cpuinfo.c
@@ -177,6 +177,10 @@ static void parse_features(
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_FCMA;
#endif
+ } else if (memcmp(feature_start, "i8mm", feature_length) == 0) {
+ #if CPUINFO_ARCH_ARM64
+ processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_I8MM;
+ #endif
#if CPUINFO_ARCH_ARM
} else if (memcmp(feature_start, "half", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_HALF;
@@ -283,6 +287,10 @@ static void parse_features(
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM;
#endif
+ } else if (memcmp(feature_start, "asimdfhm", feature_length) == 0) {
+ #if CPUINFO_ARCH_ARM64
+ processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM;
+ #endif
#if CPUINFO_ARCH_ARM
} else if (memcmp(feature_start, "fastmult", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_FASTMULT;
diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c
index 23d8439..d3da5a9 100644
--- a/src/arm/linux/init.c
+++ b/src/arm/linux/init.c
@@ -510,7 +510,7 @@ void cpuinfo_arm_linux_init(void) {
uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX;
/* Indication whether L3 (if it exists) is shared between all cores */
bool shared_l3 = true;
- /* Populate cache infromation structures in l1i, l1d */
+ /* Populate cache information structures in l1i, l1d */
for (uint32_t i = 0; i < valid_processors; i++) {
if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
cluster_id += 1;
diff --git a/src/arm/linux/midr.c b/src/arm/linux/midr.c
index 2c3116b..0d8f03f 100644
--- a/src/arm/linux/midr.c
+++ b/src/arm/linux/midr.c
@@ -675,10 +675,10 @@ static bool cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic(
/*
* Initializes MIDR for leaders of core clusters in a single sequential scan:
- * - Clusters preceeding the first reported MIDR value are assumed to have default MIDR value.
+ * - Clusters preceding the first reported MIDR value are assumed to have default MIDR value.
* - Clusters following any reported MIDR value to have that MIDR value.
*
- * @param default_midr - MIDR value that will be assigned to cluster leaders preceeding any reported MIDR value.
+ * @param default_midr - MIDR value that will be assigned to cluster leaders preceding any reported MIDR value.
* @param processors_count - number of logical processor descriptions in the @p processors array.
* @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency,
* and decoded core cluster (package_leader_id) information.
@@ -833,7 +833,7 @@ uint32_t cpuinfo_arm_linux_detect_cluster_midr(
* 2. For systems with 2 clusters and MIDR known for one cluster, assume big.LITTLE configuration,
* and estimate MIDR for the other cluster under assumption that MIDR for the big cluster is known.
* 3. Initialize MIDRs for core clusters in a single sequential scan:
- * - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value.
+ * - Clusters preceding the first reported MIDR value are assumed to have the last reported MIDR value.
* - Clusters following any reported MIDR value to have that MIDR value.
*/
diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c
index dbea578..6a28b2d 100644
--- a/src/arm/mach/init.c
+++ b/src/arm/mach/init.c
@@ -15,43 +15,25 @@
#include <cpuinfo/log.h>
/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */
-#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL
- #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xE81E7EF6
-#endif
#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
- #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F
+ #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F
#endif
#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
- #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2
+ #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2
#endif
#ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3
#endif
+#ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD
+ #define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D
+#endif
struct cpuinfo_arm_isa cpuinfo_isa = {
-#if CPUINFO_ARCH_ARM
- .thumb = true,
- .thumb2 = true,
- .thumbee = false,
- .jazelle = false,
- .armv5e = true,
- .armv6 = true,
- .armv6k = true,
- .armv7 = true,
- .vfpv2 = false,
- .vfpv3 = true,
- .d32 = true,
- .wmmx = false,
- .wmmx2 = false,
- .neon = true,
-#endif
-#if CPUINFO_ARCH_ARM64
.aes = true,
.sha1 = true,
.sha2 = true,
.pmull = true,
.crc32 = true,
-#endif
};
static uint32_t get_sys_info(int type_specifier, const char* name) {
@@ -83,10 +65,8 @@ static uint32_t get_sys_info_by_name(const char* type_specifier) {
return result;
}
-static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype, uint32_t core_index, uint32_t core_count) {
+static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t core_index, uint32_t core_count) {
switch (cpu_family) {
- case CPUFAMILY_ARM_SWIFT:
- return cpuinfo_uarch_swift;
case CPUFAMILY_ARM_CYCLONE:
return cpuinfo_uarch_cyclone;
case CPUFAMILY_ARM_TYPHOON:
@@ -107,25 +87,15 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype
case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
/* Hexa-core: 2x Firestorm + 4x Icestorm; Octa-core: 4x Firestorm + 4x Icestorm */
return core_index + 4 < core_count ? cpuinfo_uarch_firestorm : cpuinfo_uarch_icestorm;
+ case CPUFAMILY_ARM_AVALANCHE_BLIZZARD:
+ /* Hexa-core: 2x Avalanche + 4x Blizzard */
+ return core_index + 4 < core_count ? cpuinfo_uarch_avalanche : cpuinfo_uarch_blizzard;
default:
/* Use hw.cpusubtype for detection */
break;
}
- #if CPUINFO_ARCH_ARM
- switch (cpu_subtype) {
- case CPU_SUBTYPE_ARM_V7:
- return cpuinfo_uarch_cortex_a8;
- case CPU_SUBTYPE_ARM_V7F:
- return cpuinfo_uarch_cortex_a9;
- case CPU_SUBTYPE_ARM_V7K:
- return cpuinfo_uarch_cortex_a7;
- default:
- return cpuinfo_uarch_unknown;
- }
- #else
- return cpuinfo_uarch_unknown;
- #endif
+ return cpuinfo_uarch_unknown;
}
static void decode_package_name(char* package_name) {
@@ -299,71 +269,118 @@ void cpuinfo_arm_mach_init(void) {
const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
- const uint32_t cpu_type = get_sys_info_by_name("hw.cputype");
- const uint32_t cpu_subtype = get_sys_info_by_name("hw.cpusubtype");
- switch (cpu_type) {
- case CPU_TYPE_ARM64:
- cpuinfo_isa.aes = true;
- cpuinfo_isa.sha1 = true;
- cpuinfo_isa.sha2 = true;
- cpuinfo_isa.pmull = true;
- cpuinfo_isa.crc32 = true;
- break;
-#if CPUINFO_ARCH_ARM
- case CPU_TYPE_ARM:
- switch (cpu_subtype) {
- case CPU_SUBTYPE_ARM_V8:
- cpuinfo_isa.armv8 = true;
- cpuinfo_isa.aes = true;
- cpuinfo_isa.sha1 = true;
- cpuinfo_isa.sha2 = true;
- cpuinfo_isa.pmull = true;
- cpuinfo_isa.crc32 = true;
- /* Fall-through to add ARMv7S features */
- case CPU_SUBTYPE_ARM_V7S:
- case CPU_SUBTYPE_ARM_V7K:
- cpuinfo_isa.fma = true;
- /* Fall-through to add ARMv7F features */
- case CPU_SUBTYPE_ARM_V7F:
- cpuinfo_isa.armv7mp = true;
- cpuinfo_isa.fp16 = true;
- /* Fall-through to add ARMv7 features */
- case CPU_SUBTYPE_ARM_V7:
- break;
- default:
- break;
- }
- break;
-#endif
- }
+
/*
- * Support for ARMv8.1 Atomics & FP16 arithmetic instructions is supposed to be detected via
- * sysctlbyname calls with "hw.optional.armv8_1_atomics" and "hw.optional.neon_fp16" arguments
- * (see https://devstreaming-cdn.apple.com/videos/wwdc/2018/409t8zw7rumablsh/409/409_whats_new_in_llvm.pdf),
- * but on new iOS versions these calls just fail with EPERM.
- *
- * Thus, we whitelist CPUs known to support these instructions.
+ * iOS 15 and macOS 12 added sysctls for ARM features, use them where possible.
+ * Otherwise, fallback to hardcoded set of CPUs with known support.
*/
- switch (cpu_family) {
- case CPUFAMILY_ARM_MONSOON_MISTRAL:
- case CPUFAMILY_ARM_VORTEX_TEMPEST:
- case CPUFAMILY_ARM_LIGHTNING_THUNDER:
- case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
- #if CPUINFO_ARCH_ARM64
+ const uint32_t has_feat_lse = get_sys_info_by_name("hw.optional.arm.FEAT_LSE");
+ if (has_feat_lse != 0) {
+ cpuinfo_isa.atomics = true;
+ } else {
+ // Mandatory in ARMv8.1-A, list only cores released before iOS 15 / macOS 12
+ switch (cpu_family) {
+ case CPUFAMILY_ARM_MONSOON_MISTRAL:
+ case CPUFAMILY_ARM_VORTEX_TEMPEST:
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
cpuinfo_isa.atomics = true;
- #endif
- cpuinfo_isa.fp16arith = true;
+ }
}
- /*
- * There does not yet seem to exist an OS mechanism to detect support for
- * ARMv8.2 optional dot-product instructions, so we currently whitelist CPUs
- * known to support these instruction.
- */
- switch (cpu_family) {
- case CPUFAMILY_ARM_LIGHTNING_THUNDER:
- case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
- cpuinfo_isa.dot = true;
+ const uint32_t has_feat_rdm = get_sys_info_by_name("hw.optional.arm.FEAT_RDM");
+ if (has_feat_rdm != 0) {
+ cpuinfo_isa.rdm = true;
+ } else {
+ // Optional in ARMv8.2-A (implemented in Apple cores),
+ // list only cores released before iOS 15 / macOS 12
+ switch (cpu_family) {
+ case CPUFAMILY_ARM_MONSOON_MISTRAL:
+ case CPUFAMILY_ARM_VORTEX_TEMPEST:
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+ cpuinfo_isa.rdm = true;
+ }
+ }
+
+ const uint32_t has_feat_fp16 = get_sys_info_by_name("hw.optional.arm.FEAT_FP16");
+ if (has_feat_fp16 != 0) {
+ cpuinfo_isa.fp16arith = true;
+ } else {
+ // Optional in ARMv8.2-A (implemented in Apple cores),
+ // list only cores released before iOS 15 / macOS 12
+ switch (cpu_family) {
+ case CPUFAMILY_ARM_MONSOON_MISTRAL:
+ case CPUFAMILY_ARM_VORTEX_TEMPEST:
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+ cpuinfo_isa.fp16arith = true;
+ }
+ }
+
+ const uint32_t has_feat_fhm = get_sys_info_by_name("hw.optional.arm.FEAT_FHM");
+ if (has_feat_fhm != 0) {
+ cpuinfo_isa.fhm = true;
+ } else {
+ // Prior to iOS 15, use 'hw.optional.armv8_2_fhm'
+ const uint32_t has_feat_fhm_legacy = get_sys_info_by_name("hw.optional.armv8_2_fhm");
+ if (has_feat_fhm_legacy != 0) {
+ cpuinfo_isa.fhm = true;
+ } else {
+ // Mandatory in ARMv8.4-A when FP16 arithmetics is implemented,
+ // list only cores released before iOS 15 / macOS 12
+ switch (cpu_family) {
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+ cpuinfo_isa.fhm = true;
+ }
+ }
+ }
+
+ const uint32_t has_feat_bf16 = get_sys_info_by_name("hw.optional.arm.FEAT_BF16");
+ if (has_feat_bf16 != 0) {
+ cpuinfo_isa.bf16 = true;
+ }
+
+ const uint32_t has_feat_fcma = get_sys_info_by_name("hw.optional.arm.FEAT_FCMA");
+ if (has_feat_fcma != 0) {
+ cpuinfo_isa.fcma = true;
+ } else {
+ // Mandatory in ARMv8.3-A, list only cores released before iOS 15 / macOS 12
+ switch (cpu_family) {
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+ cpuinfo_isa.fcma = true;
+ }
+ }
+
+ const uint32_t has_feat_jscvt = get_sys_info_by_name("hw.optional.arm.FEAT_JSCVT");
+ if (has_feat_jscvt != 0) {
+ cpuinfo_isa.jscvt = true;
+ } else {
+ // Mandatory in ARMv8.3-A, list only cores released before iOS 15 / macOS 12
+ switch (cpu_family) {
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+ cpuinfo_isa.jscvt = true;
+ }
+ }
+
+ const uint32_t has_feat_dotprod = get_sys_info_by_name("hw.optional.arm.FEAT_DotProd");
+ if (has_feat_dotprod != 0) {
+ cpuinfo_isa.dot = true;
+ } else {
+ // Mandatory in ARMv8.4-A, list only cores released before iOS 15 / macOS 12
+ switch (cpu_family) {
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+ cpuinfo_isa.dot = true;
+ }
+ }
+
+ const uint32_t has_feat_i8mm = get_sys_info_by_name("hw.optional.arm.FEAT_I8MM");
+ if (has_feat_i8mm != 0) {
+ cpuinfo_isa.i8mm = true;
}
uint32_t num_clusters = 1;
@@ -374,7 +391,7 @@ void cpuinfo_arm_mach_init(void) {
.core_id = i % cores_per_package,
.package = packages + i / cores_per_package,
.vendor = cpuinfo_vendor_apple,
- .uarch = decode_uarch(cpu_family, cpu_subtype, i, mach_topology.cores),
+ .uarch = decode_uarch(cpu_family, i, mach_topology.cores),
};
if (i != 0 && cores[i].uarch != cores[i - 1].uarch) {
num_clusters++;
diff --git a/src/arm/midr.h b/src/arm/midr.h
index 739dc19..b0e244c 100644
--- a/src/arm/midr.h
+++ b/src/arm/midr.h
@@ -174,23 +174,25 @@ inline static uint32_t midr_score_core(uint32_t midr) {
case UINT32_C(0x53000030): /* Exynos M4 */
case UINT32_C(0x53000040): /* Exynos M5 */
case UINT32_C(0x4100D440): /* Cortex-X1 */
- /* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78 */
+ case UINT32_C(0x4100D480): /* Cortex-X2 */
+ /* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78/-A710 */
return 6;
+ case UINT32_C(0x4100D080): /* Cortex-A72 */
+ case UINT32_C(0x4100D090): /* Cortex-A73 */
+ case UINT32_C(0x4100D0A0): /* Cortex-A75 */
+ case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+ case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ case UINT32_C(0x4100D410): /* Cortex-A78 */
+ case UINT32_C(0x4100D470): /* Cortex-A710 */
+ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x4E000030): /* Denver 2 */
+ case UINT32_C(0x51002050): /* Kryo Gold */
+ case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
+ case UINT32_C(0x51008020): /* Kryo 385 Gold */
+ case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */
case UINT32_C(0x53000020): /* Exynos M3 */
- case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
- case UINT32_C(0x51008020): /* Kryo 385 Gold */
- case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
- case UINT32_C(0x51002050): /* Kryo Gold */
- case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
- case UINT32_C(0x4100D410): /* Cortex-A78 */
- case UINT32_C(0x4100D0D0): /* Cortex-A77 */
- case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
- case UINT32_C(0x4100D0B0): /* Cortex-A76 */
- case UINT32_C(0x4100D0A0): /* Cortex-A75 */
- case UINT32_C(0x4100D090): /* Cortex-A73 */
- case UINT32_C(0x4100D080): /* Cortex-A72 */
#if CPUINFO_ARCH_ARM
case UINT32_C(0x4100C0F0): /* Cortex-A15 */
case UINT32_C(0x4100C0E0): /* Cortex-A17 */
@@ -205,8 +207,9 @@ inline static uint32_t midr_score_core(uint32_t midr) {
#if CPUINFO_ARCH_ARM64
case UINT32_C(0x4100D060): /* Cortex-A65 */
#endif /* CPUINFO_ARCH_ARM64 */
- case UINT32_C(0x4100D050): /* Cortex-A55 */
case UINT32_C(0x4100D030): /* Cortex-A53 */
+ case UINT32_C(0x4100D050): /* Cortex-A55 */
+ case UINT32_C(0x4100D460): /* Cortex-A510 */
/* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */
return 2;
case UINT32_C(0x4100D040): /* Cortex-A35 */
diff --git a/src/arm/uarch.c b/src/arm/uarch.c
index 8b5362b..1d4c6ee 100644
--- a/src/arm/uarch.c
+++ b/src/arm/uarch.c
@@ -91,13 +91,30 @@ void cpuinfo_arm_decode_vendor_uarch(
case 0xD0E: /* Cortex-A76AE */
*uarch = cpuinfo_uarch_cortex_a76;
break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ case 0xD40:
+ *uarch = cpuinfo_uarch_neoverse_v1;
+ break;
+#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
case 0xD41: /* Cortex-A78 */
*uarch = cpuinfo_uarch_cortex_a78;
break;
case 0xD44: /* Cortex-X1 */
*uarch = cpuinfo_uarch_cortex_x1;
break;
+ case 0xD46: /* Cortex-A510 */
+ *uarch = cpuinfo_uarch_cortex_a510;
+ break;
+ case 0xD47: /* Cortex-A710 */
+ *uarch = cpuinfo_uarch_cortex_a710;
+ break;
+ case 0xD48: /* Cortex-X2 */
+ *uarch = cpuinfo_uarch_cortex_x2;
+ break;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ case 0xD49:
+ *uarch = cpuinfo_uarch_neoverse_n2;
+ break;
case 0xD4A:
*uarch = cpuinfo_uarch_neoverse_e1;
break;
diff --git a/src/arm/windows/init-by-logical-sys-info.c b/src/arm/windows/init-by-logical-sys-info.c
new file mode 100644
index 0000000..f088011
--- /dev/null
+++ b/src/arm/windows/init-by-logical-sys-info.c
@@ -0,0 +1,885 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <errno.h>
+#include <sys/types.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include "windows-arm-init.h"
+
+#define MAX_NR_OF_CACHES (cpuinfo_cache_level_max - 1)
+
+/* Call chain:
+ * cpu_info_init_by_logical_sys_info
+ * read_packages_for_processors
+ * read_cores_for_processors
+ * read_caches_for_processors
+ * read_all_logical_processor_info_of_relation
+ * parse_relation_processor_info
+ * store_package_info_per_processor
+ * store_core_info_per_processor
+ * parse_relation_cache_info
+ * store_cache_info_per_processor
+ */
+
+static uint32_t count_logical_processors(
+ const uint32_t max_group_count,
+ uint32_t* global_proc_index_per_group);
+
+static uint32_t read_packages_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info);
+
+static uint32_t read_cores_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info);
+
+static uint32_t read_caches_for_processors(
+ struct cpuinfo_processor *processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache *caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info);
+
+static uint32_t read_all_logical_processor_info_of_relation(
+ LOGICAL_PROCESSOR_RELATIONSHIP info_type,
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ struct cpuinfo_core* cores,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info);
+
+static bool parse_relation_processor_info(
+ struct cpuinfo_processor* processors,
+ uint32_t nr_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ const uint32_t info_id,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info);
+
+static bool parse_relation_cache_info(
+ struct cpuinfo_processor* processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info);
+
+static void store_package_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t package_id,
+ const uint32_t group_id,
+ const uint32_t processor_id_in_group);
+
+static void store_core_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t core_id,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info);
+
+static void store_cache_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ struct cpuinfo_cache* current_cache);
+
+static bool connect_packages_cores_clusters_by_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t nr_of_processors,
+ struct cpuinfo_package* packages,
+ const uint32_t nr_of_packages,
+ struct cpuinfo_cluster* clusters,
+ struct cpuinfo_core* cores,
+ const uint32_t nr_of_cores,
+ const struct woa_chip_info* chip_info,
+ enum cpuinfo_vendor vendor);
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity);
+
+
+bool cpu_info_init_by_logical_sys_info(
+ const struct woa_chip_info *chip_info,
+ const enum cpuinfo_vendor vendor)
+{
+ struct cpuinfo_processor* processors = NULL;
+ struct cpuinfo_package* packages = NULL;
+ struct cpuinfo_cluster* clusters = NULL;
+ struct cpuinfo_core* cores = NULL;
+ struct cpuinfo_cache* caches = NULL;
+ struct cpuinfo_uarch_info* uarchs = NULL;
+
+ uint32_t nr_of_packages = 0;
+ uint32_t nr_of_cores = 0;
+ uint32_t nr_of_all_caches = 0;
+ uint32_t numbers_of_caches[MAX_NR_OF_CACHES] = {0};
+
+ uint32_t nr_of_uarchs = 0;
+ bool result = false;
+
+ HANDLE heap = GetProcessHeap();
+
+ /* 1. Count available logical processor groups and processors */
+ const uint32_t max_group_count = (uint32_t) GetMaximumProcessorGroupCount();
+ cpuinfo_log_debug("detected %"PRIu32" processor group(s)", max_group_count);
+ /* We need to store the absolute processor ID offsets for every groups, because
+ * 1. We can't assume every processor groups include the same number of
+ * logical processors.
+ * 2. Every processor groups know its group number and processor IDs within
+ * the group, but not the global processor IDs.
+ * 3. We need to list every logical processors by global IDs.
+ */
+ uint32_t* global_proc_index_per_group =
+ (uint32_t*) HeapAlloc(heap, 0, max_group_count * sizeof(uint32_t));
+ if (global_proc_index_per_group == NULL) {
+ cpuinfo_log_error(
+ "failed to allocate %zu bytes for descriptions of %"PRIu32" processor groups",
+ max_group_count * sizeof(struct cpuinfo_processor), max_group_count);
+ goto clean_up;
+ }
+
+ uint32_t nr_of_processors =
+ count_logical_processors(max_group_count, global_proc_index_per_group);
+ processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_processors * sizeof(struct cpuinfo_processor));
+ if (processors == NULL) {
+ cpuinfo_log_error(
+ "failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
+ nr_of_processors * sizeof(struct cpuinfo_processor), nr_of_processors);
+ goto clean_up;
+ }
+
+ /* 2. Read topology information via MSDN API: packages, cores and caches*/
+ nr_of_packages = read_packages_for_processors(
+ processors, nr_of_processors,
+ global_proc_index_per_group,
+ chip_info);
+ if (!nr_of_packages) {
+ cpuinfo_log_error("error in reading package information");
+ goto clean_up;
+ }
+ cpuinfo_log_debug("detected %"PRIu32" processor package(s)", nr_of_packages);
+
+ /* We need the EfficiencyClass to parse uarch from the core information,
+ * but we need to iterate first to count cores and allocate memory then
+ * we will iterate again to read and store data to cpuinfo_core structures.
+ */
+ nr_of_cores = read_cores_for_processors(
+ processors, nr_of_processors,
+ global_proc_index_per_group, NULL,
+ chip_info);
+ if (!nr_of_cores) {
+ cpuinfo_log_error("error in reading core information");
+ goto clean_up;
+ }
+ cpuinfo_log_debug("detected %"PRIu32" processor core(s)", nr_of_cores);
+
+ /* There is no API to read number of caches, so we need to iterate twice on caches:
+ 1. Count all type of caches -> allocate memory
+ 2. Read out cache data and store to allocated memory
+ */
+ nr_of_all_caches = read_caches_for_processors(
+ processors, nr_of_processors,
+ caches, numbers_of_caches,
+ global_proc_index_per_group, chip_info);
+ if (!nr_of_all_caches) {
+ cpuinfo_log_error("error in reading cache information");
+ goto clean_up;
+ }
+ cpuinfo_log_debug("detected %"PRIu32" processor cache(s)", nr_of_all_caches);
+
+ /* 3. Allocate memory for package, cluster, core and cache structures */
+ packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_packages * sizeof(struct cpuinfo_package));
+ if (packages == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages",
+ nr_of_packages * sizeof(struct cpuinfo_package), nr_of_packages);
+ goto clean_up;
+ }
+
+ /* We don't have cluster information so we explicitly set clusters to equal to cores. */
+ clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_cluster));
+ if (clusters == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
+ nr_of_cores * sizeof(struct cpuinfo_cluster), nr_of_cores);
+ goto clean_up;
+ }
+
+ cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_core));
+ if (cores == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
+ nr_of_cores * sizeof(struct cpuinfo_core), nr_of_cores);
+ goto clean_up;
+ }
+
+ /* We allocate one contiguous cache array for all caches, then use offsets per cache type. */
+ caches = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_all_caches * sizeof(struct cpuinfo_cache));
+ if (caches == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" caches",
+ nr_of_all_caches * sizeof(struct cpuinfo_cache), nr_of_all_caches);
+ goto clean_up;
+ }
+
+ /* 4.Read missing topology information that can't be saved without counted
+ * allocate structures in the first round.
+ */
+ nr_of_all_caches = read_caches_for_processors(
+ processors, nr_of_processors,
+ caches, numbers_of_caches, global_proc_index_per_group, chip_info);
+ if (!nr_of_all_caches) {
+ cpuinfo_log_error("error in reading cache information");
+ goto clean_up;
+ }
+
+ nr_of_cores = read_cores_for_processors(
+ processors, nr_of_processors,
+ global_proc_index_per_group, cores,
+ chip_info);
+ if (!nr_of_cores) {
+ cpuinfo_log_error("error in reading core information");
+ goto clean_up;
+ }
+
+ /* 5. Now that we read out everything from the system we can, fill the package, cluster
+ * and core structures respectively.
+ */
+ result = connect_packages_cores_clusters_by_processors(
+ processors, nr_of_processors,
+ packages, nr_of_packages,
+ clusters,
+ cores, nr_of_cores,
+ chip_info,
+ vendor);
+ if(!result) {
+ cpuinfo_log_error("error in connecting information");
+ goto clean_up;
+ }
+
+ /* 6. Count and store uarchs of cores, assuming same uarchs are neighbors */
+ enum cpuinfo_uarch prev_uarch = cpuinfo_uarch_unknown;
+ for (uint32_t i = 0; i < nr_of_cores; i++) {
+ if (prev_uarch != cores[i].uarch) {
+ nr_of_uarchs++;
+ prev_uarch = cores[i].uarch;
+ }
+ }
+ uarchs = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_uarchs * sizeof(struct cpuinfo_uarch_info));
+ if (uarchs == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs",
+ nr_of_uarchs * sizeof(struct cpuinfo_uarch_info), nr_of_uarchs);
+ goto clean_up;
+ }
+ prev_uarch = cpuinfo_uarch_unknown;
+ for (uint32_t i = 0, uarch_counter = 0; i < nr_of_cores; i++) {
+ if (prev_uarch != cores[i].uarch) {
+ prev_uarch = cores[i].uarch;
+ uarchs[uarch_counter].uarch = cores[i].uarch;
+ uarchs[uarch_counter].core_count = 1;
+ uarchs[uarch_counter].processor_count = cores[i].processor_count;
+ uarch_counter++;
+ } else if (prev_uarch != cpuinfo_uarch_unknown) {
+ uarchs[uarch_counter].core_count++;
+ uarchs[uarch_counter].processor_count += cores[i].processor_count;
+ }
+ }
+
+ /* 7. Commit changes */
+ cpuinfo_processors = processors;
+ cpuinfo_packages = packages;
+ cpuinfo_clusters = clusters;
+ cpuinfo_cores = cores;
+ cpuinfo_uarchs = uarchs;
+
+ cpuinfo_processors_count = nr_of_processors;
+ cpuinfo_packages_count = nr_of_packages;
+ cpuinfo_clusters_count = nr_of_cores;
+ cpuinfo_cores_count = nr_of_cores;
+ cpuinfo_uarchs_count = nr_of_uarchs;
+
+ for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
+ cpuinfo_cache_count[i] = numbers_of_caches[i];
+ }
+ cpuinfo_cache[cpuinfo_cache_level_1i] = caches;
+ cpuinfo_cache[cpuinfo_cache_level_1d] = cpuinfo_cache[cpuinfo_cache_level_1i] + cpuinfo_cache_count[cpuinfo_cache_level_1i];
+ cpuinfo_cache[cpuinfo_cache_level_2] = cpuinfo_cache[cpuinfo_cache_level_1d] + cpuinfo_cache_count[cpuinfo_cache_level_1d];
+ cpuinfo_cache[cpuinfo_cache_level_3] = cpuinfo_cache[cpuinfo_cache_level_2] + cpuinfo_cache_count[cpuinfo_cache_level_2];
+ cpuinfo_cache[cpuinfo_cache_level_4] = cpuinfo_cache[cpuinfo_cache_level_3] + cpuinfo_cache_count[cpuinfo_cache_level_3];
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+ result = true;
+ MemoryBarrier();
+
+ processors = NULL;
+ packages = NULL;
+ clusters = NULL;
+ cores = NULL;
+ caches = NULL;
+ uarchs = NULL;
+
+clean_up:
+ /* The propagated pointers, shouldn't be freed, only in case of error
+ * and unfinished init.
+ */
+ if (processors != NULL) {
+ HeapFree(heap, 0, processors);
+ }
+ if (packages != NULL) {
+ HeapFree(heap, 0, packages);
+ }
+ if (clusters != NULL) {
+ HeapFree(heap, 0, clusters);
+ }
+ if (cores != NULL) {
+ HeapFree(heap, 0, cores);
+ }
+ if (caches != NULL) {
+ HeapFree(heap, 0, caches);
+ }
+ if (uarchs != NULL) {
+ HeapFree(heap, 0, uarchs);
+ }
+
+ /* Free the locally used temporary pointers */
+ HeapFree(heap, 0, global_proc_index_per_group);
+ global_proc_index_per_group = NULL;
+ return result;
+}
+
+static uint32_t count_logical_processors(
+ const uint32_t max_group_count,
+ uint32_t* global_proc_index_per_group)
+{
+ uint32_t nr_of_processors = 0;
+
+ for (uint32_t i = 0; i < max_group_count; i++) {
+ uint32_t nr_of_processors_per_group = GetMaximumProcessorCount((WORD) i);
+ cpuinfo_log_debug("detected %"PRIu32" processor(s) in group %"PRIu32"",
+ nr_of_processors_per_group, i);
+ global_proc_index_per_group[i] = nr_of_processors;
+ nr_of_processors += nr_of_processors_per_group;
+ }
+ return nr_of_processors;
+}
+
+static uint32_t read_packages_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info)
+{
+ return read_all_logical_processor_info_of_relation(
+ RelationProcessorPackage,
+ processors,
+ number_of_processors,
+ NULL,
+ NULL,
+ NULL,
+ global_proc_index_per_group,
+ chip_info);
+}
+
+uint32_t read_cores_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info)
+{
+ return read_all_logical_processor_info_of_relation(
+ RelationProcessorCore,
+ processors,
+ number_of_processors,
+ NULL,
+ NULL,
+ cores,
+ global_proc_index_per_group,
+ chip_info);
+}
+
+static uint32_t read_caches_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info)
+{
+ /* Reset processor start indexes */
+ if (caches) {
+ uint32_t cache_offset = 0;
+ for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
+ for (uint32_t j = 0; j < numbers_of_caches[i]; j++) {
+ caches[cache_offset + j].processor_start = UINT32_MAX;
+ }
+ cache_offset += numbers_of_caches[i];
+ }
+ }
+
+ return read_all_logical_processor_info_of_relation(
+ RelationCache,
+ processors,
+ number_of_processors,
+ caches,
+ numbers_of_caches,
+ NULL,
+ global_proc_index_per_group,
+ chip_info);
+}
+
+static uint32_t read_all_logical_processor_info_of_relation(
+ LOGICAL_PROCESSOR_RELATIONSHIP info_type,
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ struct cpuinfo_core* cores,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info* chip_info)
+{
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX infos = NULL;
+ uint32_t nr_of_structs = 0;
+ DWORD info_size = 0;
+ bool result = false;
+ HANDLE heap = GetProcessHeap();
+
+ /* 1. Query the size of the information structure first */
+ if (GetLogicalProcessorInformationEx(info_type, NULL, &info_size) == FALSE) {
+ const DWORD last_error = GetLastError();
+ if (last_error != ERROR_INSUFFICIENT_BUFFER) {
+ cpuinfo_log_error(
+ "failed to query size of processor %"PRIu32" information information: error %"PRIu32"",
+ (uint32_t)info_type, (uint32_t) last_error);
+ goto clean_up;
+ }
+ }
+ /* 2. Allocate memory for the information structure */
+ infos = HeapAlloc(heap, 0, info_size);
+ if (infos == NULL) {
+ cpuinfo_log_error("failed to allocate %"PRIu32" bytes for logical processor information",
+ (uint32_t) info_size);
+ goto clean_up;
+ }
+ /* 3. Read the information structure */
+ if (GetLogicalProcessorInformationEx(info_type, infos, &info_size) == FALSE) {
+ cpuinfo_log_error("failed to query processor %"PRIu32" information: error %"PRIu32"",
+ (uint32_t)info_type, (uint32_t) GetLastError());
+ goto clean_up;
+ }
+
+ /* 4. Parse the structure and store relevant data */
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info_end =
+ (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) infos + info_size);
+ for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = infos;
+ info < info_end;
+ info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) info + info->Size))
+ {
+ if (info->Relationship != info_type) {
+ cpuinfo_log_warning(
+ "unexpected processor info type (%"PRIu32") for processor information",
+ (uint32_t) info->Relationship);
+ continue;
+ }
+
+ const uint32_t info_id = nr_of_structs++;
+
+ switch(info_type) {
+ case RelationProcessorPackage:
+ result = parse_relation_processor_info(
+ processors,
+ number_of_processors,
+ global_proc_index_per_group,
+ info,
+ info_id,
+ cores,
+ chip_info);
+ break;
+ case RelationProcessorCore:
+ result = parse_relation_processor_info(
+ processors,
+ number_of_processors,
+ global_proc_index_per_group,
+ info,
+ info_id,
+ cores,
+ chip_info);
+ break;
+ case RelationCache:
+ result = parse_relation_cache_info(
+ processors,
+ caches,
+ numbers_of_caches,
+ global_proc_index_per_group,
+ info);
+ break;
+ default:
+ cpuinfo_log_error(
+ "unexpected processor info type (%"PRIu32") for processor information",
+ (uint32_t) info->Relationship);
+ result = false;
+ break;
+ }
+ if (!result) {
+ nr_of_structs = 0;
+ goto clean_up;
+ }
+ }
+clean_up:
+ /* 5. Release dynamically allocated info structure. */
+ HeapFree(heap, 0, infos);
+ infos = NULL;
+ return nr_of_structs;
+}
+
+static bool parse_relation_processor_info(
+ struct cpuinfo_processor* processors,
+ uint32_t nr_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ const uint32_t info_id,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info)
+{
+ for (uint32_t i = 0; i < info->Processor.GroupCount; i++) {
+ const uint32_t group_id = info->Processor.GroupMask[i].Group;
+ /* Bitmask representing processors in this group belonging to this package */
+ KAFFINITY group_processors_mask = info->Processor.GroupMask[i].Mask;
+ while (group_processors_mask != 0) {
+ const uint32_t processor_id_in_group =
+ low_index_from_kaffinity(group_processors_mask);
+ const uint32_t processor_global_index =
+ global_proc_index_per_group[group_id] + processor_id_in_group;
+
+ if(processor_global_index >= nr_of_processors) {
+ cpuinfo_log_error("unexpected processor index %"PRIu32"",
+ processor_global_index);
+ return false;
+ }
+
+ switch(info->Relationship) {
+ case RelationProcessorPackage:
+ store_package_info_per_processor(
+ processors, processor_global_index, info_id,
+ group_id, processor_id_in_group);
+ break;
+ case RelationProcessorCore:
+ store_core_info_per_processor(
+ processors, processor_global_index,
+ info_id, info,
+ cores, chip_info);
+ break;
+ default:
+ cpuinfo_log_error(
+ "unexpected processor info type (%"PRIu32") for processor information",
+ (uint32_t) info->Relationship);
+ break;
+ }
+ /* Clear the bits in affinity mask, lower the least set bit. */
+ group_processors_mask &= (group_processors_mask - 1);
+ }
+ }
+ return true;
+}
+
+static bool parse_relation_cache_info(
+ struct cpuinfo_processor* processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info)
+{
+ static uint32_t l1i_counter = 0;
+ static uint32_t l1d_counter = 0;
+ static uint32_t l2_counter = 0;
+ static uint32_t l3_counter = 0;
+
+ /* Count cache types for allocation at first. */
+ if (caches == NULL) {
+ switch(info->Cache.Level) {
+ case 1:
+ switch (info->Cache.Type) {
+ case CacheInstruction:
+ numbers_of_caches[cpuinfo_cache_level_1i]++;
+ break;
+ case CacheData:
+ numbers_of_caches[cpuinfo_cache_level_1d]++;
+ break;
+ case CacheUnified:
+ break;
+ case CacheTrace:
+ break;
+ default:
+ break;
+ }
+ break;
+ case 2:
+ numbers_of_caches[cpuinfo_cache_level_2]++;
+ break;
+ case 3:
+ numbers_of_caches[cpuinfo_cache_level_3]++;
+ break;
+ }
+ return true;
+ }
+ struct cpuinfo_cache* l1i_base = caches;
+ struct cpuinfo_cache* l1d_base = l1i_base + numbers_of_caches[cpuinfo_cache_level_1i];
+ struct cpuinfo_cache* l2_base = l1d_base + numbers_of_caches[cpuinfo_cache_level_1d];
+ struct cpuinfo_cache* l3_base = l2_base + numbers_of_caches[cpuinfo_cache_level_2];
+
+ cpuinfo_log_debug(
+ "info->Cache.GroupCount:%"PRIu32", info->Cache.GroupMask:%"PRIu32","
+ "info->Cache.Level:%"PRIu32", info->Cache.Associativity:%"PRIu32","
+ "info->Cache.LineSize:%"PRIu32","
+ "info->Cache.CacheSize:%"PRIu32", info->Cache.Type:%"PRIu32"",
+ info->Cache.GroupCount, (unsigned int)info->Cache.GroupMask.Mask,
+ info->Cache.Level, info->Cache.Associativity, info->Cache.LineSize,
+ info->Cache.CacheSize, info->Cache.Type);
+
+ struct cpuinfo_cache* current_cache = NULL;
+ switch (info->Cache.Level) {
+ case 1:
+ switch (info->Cache.Type) {
+ case CacheInstruction:
+ current_cache = l1i_base + l1i_counter;
+ l1i_counter++;
+ break;
+ case CacheData:
+ current_cache = l1d_base + l1d_counter;
+ l1d_counter++;
+ break;
+ case CacheUnified:
+ break;
+ case CacheTrace:
+ break;
+ default:
+ break;
+ }
+ break;
+ case 2:
+ current_cache = l2_base + l2_counter;
+ l2_counter++;
+ break;
+ case 3:
+ current_cache = l3_base + l3_counter;
+ l3_counter++;
+ break;
+ }
+ current_cache->size = info->Cache.CacheSize;
+ current_cache->line_size = info->Cache.LineSize;
+ current_cache->associativity = info->Cache.Associativity;
+ /* We don't have partition and set information of caches on Windows,
+ * so we set partitions to 1 and calculate the expected sets.
+ */
+ current_cache->partitions = 1;
+ current_cache->sets =
+ current_cache->size / current_cache->line_size / current_cache->associativity;
+ if (info->Cache.Type == CacheUnified) {
+ current_cache->flags = CPUINFO_CACHE_UNIFIED;
+ }
+
+ for (uint32_t i = 0; i <= info->Cache.GroupCount; i++) {
+ /* Zero GroupCount is valid, GroupMask still can store bits set. */
+ const uint32_t group_id = info->Cache.GroupMasks[i].Group;
+ /* Bitmask representing processors in this group belonging to this package */
+ KAFFINITY group_processors_mask = info->Cache.GroupMasks[i].Mask;
+ while (group_processors_mask != 0) {
+ const uint32_t processor_id_in_group =
+ low_index_from_kaffinity(group_processors_mask);
+ const uint32_t processor_global_index =
+ global_proc_index_per_group[group_id] + processor_id_in_group;
+
+ store_cache_info_per_processor(
+ processors, processor_global_index,
+ info, current_cache);
+
+ /* Clear the bits in affinity mask, lower the least set bit. */
+ group_processors_mask &= (group_processors_mask - 1);
+ }
+ }
+ return true;
+}
+
+static void store_package_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t package_id,
+ const uint32_t group_id,
+ const uint32_t processor_id_in_group)
+{
+ processors[processor_global_index].windows_group_id =
+ (uint16_t) group_id;
+ processors[processor_global_index].windows_processor_id =
+ (uint16_t) processor_id_in_group;
+
+ /* As we're counting the number of packages now, we haven't allocated memory for
+ * cpuinfo_packages yet, so we only set the package pointer's offset now.
+ */
+ processors[processor_global_index].package =
+ (const struct cpuinfo_package*) NULL + package_id;
+}
+
+void store_core_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t core_id,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info)
+{
+ if (cores) {
+ processors[processor_global_index].core = cores + core_id;
+ cores[core_id].core_id = core_id;
+ get_core_uarch_for_efficiency(
+ chip_info->chip_name, core_info->Processor.EfficiencyClass,
+ &(cores[core_id].uarch), &(cores[core_id].frequency));
+
+ /* We don't have cluster information, so we handle it as
+ * fixed 1 to (cluster / cores).
+ * Set the cluster offset ID now, as soon as we have the
+ * cluster base address, we'll set the absolute address.
+ */
+ processors[processor_global_index].cluster =
+ (const struct cpuinfo_cluster*) NULL + core_id;
+ }
+}
+
+static void store_cache_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ struct cpuinfo_cache* current_cache)
+{
+ if (current_cache->processor_start > processor_global_index) {
+ current_cache->processor_start = processor_global_index;
+ }
+ current_cache->processor_count++;
+
+ switch(info->Cache.Level) {
+ case 1:
+ switch (info->Cache.Type) {
+ case CacheInstruction:
+ processors[processor_global_index].cache.l1i = current_cache;
+ break;
+ case CacheData:
+ processors[processor_global_index].cache.l1d = current_cache;
+ break;
+ case CacheUnified:
+ break;
+ case CacheTrace:
+ break;
+ default:
+ break;
+ }
+ break;
+ case 2:
+ processors[processor_global_index].cache.l2 = current_cache;
+ break;
+ case 3:
+ processors[processor_global_index].cache.l3 = current_cache;
+ break;
+ }
+}
+
+static bool connect_packages_cores_clusters_by_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t nr_of_processors,
+ struct cpuinfo_package* packages,
+ const uint32_t nr_of_packages,
+ struct cpuinfo_cluster* clusters,
+ struct cpuinfo_core* cores,
+ const uint32_t nr_of_cores,
+ const struct woa_chip_info* chip_info,
+ enum cpuinfo_vendor vendor)
+{
+ /* Adjust core and package pointers for all logical processors. */
+ for (uint32_t i = nr_of_processors; i != 0; i--) {
+ const uint32_t processor_id = i - 1;
+ struct cpuinfo_processor* processor = processors + processor_id;
+
+ struct cpuinfo_core* core = (struct cpuinfo_core*)processor->core;
+
+ /* We stored the offset of pointers when we haven't allocated memory
+ * for packages and clusters, so now add offsets to base addresses.
+ */
+ struct cpuinfo_package* package =
+ (struct cpuinfo_package*) ((uintptr_t) packages + (uintptr_t) processor->package);
+ if (package < packages ||
+ package >= (packages + nr_of_packages)) {
+ cpuinfo_log_error("invalid package indexing");
+ return false;
+ }
+ processor->package = package;
+
+ struct cpuinfo_cluster* cluster =
+ (struct cpuinfo_cluster*) ((uintptr_t) clusters + (uintptr_t) processor->cluster);
+ if (cluster < clusters ||
+ cluster >= (clusters + nr_of_cores)) {
+ cpuinfo_log_error("invalid cluster indexing");
+ return false;
+ }
+ processor->cluster = cluster;
+
+ if (chip_info) {
+ strncpy_s(package->name, CPUINFO_PACKAGE_NAME_MAX, chip_info->chip_name_string,
+ strnlen(chip_info->chip_name_string, CPUINFO_PACKAGE_NAME_MAX));
+ }
+
+ /* Set start indexes and counts per packages / clusters / cores - going backwards */
+
+ /* This can be overwritten by lower-index processors on the same package. */
+ package->processor_start = processor_id;
+ package->processor_count++;
+
+ /* This can be overwritten by lower-index processors on the same cluster. */
+ cluster->processor_start = processor_id;
+ cluster->processor_count++;
+
+ /* This can be overwritten by lower-index processors on the same core. */
+ core->processor_start = processor_id;
+ core->processor_count++;
+ }
+ /* Fill cores */
+ for (uint32_t i = nr_of_cores; i != 0; i--) {
+ const uint32_t global_core_id = i - 1;
+ struct cpuinfo_core* core = cores + global_core_id;
+ const struct cpuinfo_processor* processor = processors + core->processor_start;
+ struct cpuinfo_package* package = (struct cpuinfo_package*) processor->package;
+ struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*) processor->cluster;
+
+ core->package = package;
+ core->cluster = cluster;
+ core->vendor = vendor;
+
+ /* This can be overwritten by lower-index cores on the same cluster/package. */
+ cluster->core_start = global_core_id;
+ cluster->core_count++;
+ package->core_start = global_core_id;
+ package->core_count++;
+ package->cluster_start = global_core_id;
+ package->cluster_count = package->core_count;
+
+ cluster->package = package;
+ cluster->vendor = cores[cluster->core_start].vendor;
+ cluster->uarch = cores[cluster->core_start].uarch;
+ cluster->frequency = cores[cluster->core_start].frequency;
+ }
+ return true;
+}
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) {
+ unsigned long index;
+ _BitScanForward64(&index, (unsigned __int64) kaffinity);
+ return (uint32_t) index;
+}
diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c
new file mode 100644
index 0000000..8effc15
--- /dev/null
+++ b/src/arm/windows/init.c
@@ -0,0 +1,253 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include "windows-arm-init.h"
+
+/* Efficiency class = 0 means little core, while 1 means big core for now */
+#define MAX_WOA_VALID_EFFICIENCY_CLASSES 2
+#define VENDOR_NAME_MAX CPUINFO_PACKAGE_NAME_MAX
+
+struct cpuinfo_arm_isa cpuinfo_isa;
+
+static void set_cpuinfo_isa_fields(void);
+static bool get_system_info_from_registry(
+ struct woa_chip_info** chip_info,
+ enum cpuinfo_vendor* vendor);
+
+struct vendor_info {
+ char vendor_name[VENDOR_NAME_MAX];
+ enum cpuinfo_vendor vendor;
+};
+
+/* Please add new vendor here! */
+static struct vendor_info vendors[] = {
+ {
+ "Qualcomm",
+ cpuinfo_vendor_qualcomm
+ }
+};
+
+/* Please add new SoC/chip info here! */
+static struct woa_chip_info woa_chips[] = {
+ /* Microsoft SQ1 Kryo 495 4 + 4 cores (3 GHz + 1.80 GHz) */
+ {
+ "Microsoft SQ1",
+ woa_chip_name_microsoft_sq_1,
+ {
+ {
+ cpuinfo_uarch_cortex_a55,
+ 1800000000,
+ },
+ {
+ cpuinfo_uarch_cortex_a76,
+ 3000000000,
+ }
+ }
+ },
+ /* Microsoft SQ2 Kryo 495 4 + 4 cores (3.15 GHz + 2.42 GHz) */
+ {
+ "Microsoft SQ2",
+ woa_chip_name_microsoft_sq_2,
+ {
+ {
+ cpuinfo_uarch_cortex_a55,
+ 2420000000,
+ },
+ {
+ cpuinfo_uarch_cortex_a76,
+ 3150000000
+ }
+ }
+ }
+};
+
+BOOL CALLBACK cpuinfo_arm_windows_init(
+ PINIT_ONCE init_once, PVOID parameter, PVOID* context)
+{
+ struct woa_chip_info *chip_info = NULL;
+ enum cpuinfo_vendor vendor = cpuinfo_vendor_unknown;
+ bool result = false;
+
+ set_cpuinfo_isa_fields();
+ result = get_system_info_from_registry(&chip_info, &vendor);
+ result &= cpu_info_init_by_logical_sys_info(chip_info, vendor);
+ cpuinfo_is_initialized = result;
+ return ((result == true) ? TRUE : FALSE);
+}
+
+bool get_core_uarch_for_efficiency(
+ enum woa_chip_name chip, BYTE EfficiencyClass,
+ enum cpuinfo_uarch* uarch, uint64_t* frequency)
+{
+ /* For currently supported WoA chips, the Efficiency class selects
+ * the pre-defined little and big core.
+ * Any further supported SoC's logic should be implemented here.
+ */
+ if (uarch && frequency && chip < woa_chip_name_last &&
+ EfficiencyClass < MAX_WOA_VALID_EFFICIENCY_CLASSES) {
+ *uarch = woa_chips[chip].uarchs[EfficiencyClass].uarch;
+ *frequency = woa_chips[chip].uarchs[EfficiencyClass].frequency;
+ return true;
+ }
+ return false;
+}
+
+/* Static helper functions */
+
+static bool read_registry(
+ LPCTSTR subkey,
+ LPCTSTR value,
+ char** textBuffer)
+{
+ DWORD keyType = 0;
+ DWORD dataSize = 0;
+ const DWORD flags = RRF_RT_REG_SZ; /* Only read strings (REG_SZ) */
+ LSTATUS result = 0;
+ HANDLE heap = GetProcessHeap();
+
+ result = RegGetValue(
+ HKEY_LOCAL_MACHINE,
+ subkey,
+ value,
+ flags,
+ &keyType,
+ NULL, /* Request buffer size */
+ &dataSize);
+ if (result != 0 || dataSize == 0) {
+ cpuinfo_log_error("Registry entry size read error");
+ return false;
+ }
+
+ if (*textBuffer) {
+ HeapFree(heap, 0, *textBuffer);
+ }
+ *textBuffer = HeapAlloc(heap, HEAP_ZERO_MEMORY, dataSize);
+ if (*textBuffer == NULL) {
+ cpuinfo_log_error("Registry textbuffer allocation error");
+ return false;
+ }
+
+ result = RegGetValue(
+ HKEY_LOCAL_MACHINE,
+ subkey,
+ value,
+ flags,
+ NULL,
+ *textBuffer, /* Write string in this destination buffer */
+ &dataSize);
+ if (result != 0) {
+ cpuinfo_log_error("Registry read error");
+ return false;
+ }
+ return true;
+}
+
+static bool get_system_info_from_registry(
+ struct woa_chip_info** chip_info,
+ enum cpuinfo_vendor* vendor)
+{
+ bool result = false;
+ char* textBuffer = NULL;
+ LPCTSTR cpu0_subkey =
+ (LPCTSTR)"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0";
+ LPCTSTR chip_name_value = (LPCTSTR)"ProcessorNameString";
+ LPCTSTR vendor_name_value = (LPCTSTR)"VendorIdentifier";
+
+ *chip_info = NULL;
+ *vendor = cpuinfo_vendor_unknown;
+ HANDLE heap = GetProcessHeap();
+
+ /* 1. Read processor model name from registry and find in the hard-coded list. */
+ if (!read_registry(cpu0_subkey, chip_name_value, &textBuffer)) {
+ cpuinfo_log_error("Registry read error");
+ goto cleanup;
+ }
+ for (uint32_t i = 0; i < (uint32_t) woa_chip_name_last; i++) {
+ size_t compare_length = strnlen(woa_chips[i].chip_name_string, CPUINFO_PACKAGE_NAME_MAX);
+ int compare_result = strncmp(textBuffer, woa_chips[i].chip_name_string, compare_length);
+ if (compare_result == 0) {
+ *chip_info = woa_chips+i;
+ break;
+ }
+ }
+ if (*chip_info == NULL) {
+ cpuinfo_log_error("Unknown chip model name.\n Please add new Windows on Arm SoC/chip support!");
+ goto cleanup;
+ }
+ cpuinfo_log_debug("detected chip model name: %s", (**chip_info).chip_name_string);
+
+ /* 2. Read vendor/manufacturer name from registry. */
+ if (!read_registry(cpu0_subkey, vendor_name_value, &textBuffer)) {
+ cpuinfo_log_error("Registry read error");
+ goto cleanup;
+ }
+
+ for (uint32_t i = 0; i < (sizeof(vendors) / sizeof(struct vendor_info)); i++) {
+ if (strncmp(textBuffer, vendors[i].vendor_name,
+ strlen(vendors[i].vendor_name)) == 0) {
+ *vendor = vendors[i].vendor;
+ result = true;
+ break;
+ }
+ }
+ if (*vendor == cpuinfo_vendor_unknown) {
+ cpuinfo_log_error("Unexpected vendor: %s", textBuffer);
+ }
+
+cleanup:
+ HeapFree(heap, 0, textBuffer);
+ textBuffer = NULL;
+ return result;
+}
+
+static void set_cpuinfo_isa_fields(void)
+{
+ bool armv8 = IsProcessorFeaturePresent(PF_ARM_V8_INSTRUCTIONS_AVAILABLE);
+ bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
+ bool load_store_atomic = IsProcessorFeaturePresent(PF_ARM_64BIT_LOADSTORE_ATOMIC);
+ bool float_multiply_accumulate = IsProcessorFeaturePresent(PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE);
+ bool crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
+ bool float_emulated = IsProcessorFeaturePresent(PF_FLOATING_POINT_EMULATED);
+
+ /* Read all Arm related Windows features for debug purposes, even if we can't
+ * pair Arm ISA feature to that now.
+ */
+#if CPUINFO_LOG_DEBUG_PARSERS
+ bool divide = IsProcessorFeaturePresent(PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE);
+ bool ext_cache = IsProcessorFeaturePresent(PF_ARM_EXTERNAL_CACHE_AVAILABLE);
+ bool vfp_registers = IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE);
+ bool arm_v81 = IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE);
+
+ cpuinfo_log_debug("divide present: %d", divide);
+ cpuinfo_log_debug("ext_cache present: %d", ext_cache);
+ cpuinfo_log_debug("vfp_registers present: %d", vfp_registers);
+ cpuinfo_log_debug("arm_v81 present: %d", arm_v81);
+#endif
+
+ cpuinfo_log_debug("armv8 present: %d", armv8);
+ cpuinfo_log_debug("crypto present: %d", crypto);
+ cpuinfo_log_debug("load_store_atomic present: %d", load_store_atomic);
+ cpuinfo_log_debug("float_multiply_accumulate present: %d", float_multiply_accumulate);
+ cpuinfo_log_debug("crc32 present: %d", crc32);
+ cpuinfo_log_debug("float_emulated: %d", float_emulated);
+
+#if CPUINFO_ARCH_ARM
+ cpuinfo_isa.armv8 = armv8;
+#endif
+#if CPUINFO_ARCH_ARM64
+ cpuinfo_isa.atomics = load_store_atomic;
+#endif
+ cpuinfo_isa.crc32 = crc32;
+ /* Windows API reports all or nothing for cryptographic instructions. */
+ cpuinfo_isa.aes = crypto;
+ cpuinfo_isa.sha1 = crypto;
+ cpuinfo_isa.sha2 = crypto;
+ cpuinfo_isa.pmull = crypto;
+ cpuinfo_isa.fp16arith = !float_emulated && float_multiply_accumulate;
+}
diff --git a/src/arm/windows/windows-arm-init.h b/src/arm/windows/windows-arm-init.h
new file mode 100644
index 0000000..76cc51e
--- /dev/null
+++ b/src/arm/windows/windows-arm-init.h
@@ -0,0 +1,32 @@
+#pragma once
+
+/* List of known and supported Windows on Arm SoCs/chips. */
+enum woa_chip_name {
+ woa_chip_name_microsoft_sq_1 = 0,
+ woa_chip_name_microsoft_sq_2 = 1,
+ woa_chip_name_unknown = 2,
+ woa_chip_name_last = woa_chip_name_unknown
+};
+
+/* Topology information hard-coded by SoC/chip name */
+struct core_info_by_chip_name {
+ enum cpuinfo_uarch uarch;
+ uint64_t frequency; /* Hz */
+};
+
+/* SoC/chip info that's currently not readable by logical system information,
+ * but can be read from registry.
+ */
+struct woa_chip_info {
+ char* chip_name_string;
+ enum woa_chip_name chip_name;
+ struct core_info_by_chip_name uarchs[woa_chip_name_last];
+};
+
+bool get_core_uarch_for_efficiency(
+ enum woa_chip_name chip, BYTE EfficiencyClass,
+ enum cpuinfo_uarch* uarch, uint64_t* frequency);
+
+bool cpu_info_init_by_logical_sys_info(
+ const struct woa_chip_info *chip_info,
+ enum cpuinfo_vendor vendor);
diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h
index 9c23d7c..c04620e 100644
--- a/src/cpuinfo/internal-api.h
+++ b/src/cpuinfo/internal-api.h
@@ -51,7 +51,11 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
#if defined(_WIN32) || defined(__CYGWIN__)
- CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+ #if CPUINFO_ARCH_ARM64
+ CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+ #else
+ CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+ #endif
#endif
CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void);
diff --git a/src/init.c b/src/init.c
index d61e7be..ed37c07 100644
--- a/src/init.c
+++ b/src/init.c
@@ -37,6 +37,8 @@ bool CPUINFO_ABI cpuinfo_initialize(void) {
pthread_once(&init_guard, &cpuinfo_arm_linux_init);
#elif defined(__MACH__) && defined(__APPLE__)
pthread_once(&init_guard, &cpuinfo_arm_mach_init);
+ #elif defined(_WIN32)
+ InitOnceExecuteOnce(&init_guard, &cpuinfo_arm_windows_init, NULL, NULL);
#else
cpuinfo_log_error("operating system is not supported in cpuinfo");
#endif
diff --git a/src/x86/name.c b/src/x86/name.c
index a7cc7c6..957a0d8 100644
--- a/src/x86/name.c
+++ b/src/x86/name.c
@@ -234,7 +234,7 @@ static bool transform_token(char* token_start, char* token_end, struct parser_st
return true;
}
/*
- * Erase everywhing after "SOC" on AMD System-on-Chips, e.g.
+ * Erase everything after "SOC" on AMD System-on-Chips, e.g.
* "AMD GX-212JC SOC with Radeon(TM) R2E Graphics \0"
*/
if (erase_matching(token_start, token_length, "SOC")) {