17 files changed, 1525 insertions, 140 deletions
diff --git a/src/arm/cache.c b/src/arm/cache.c
index 446b02b..1a6dd38 100644
--- a/src/arm/cache.c
+++ b/src/arm/cache.c
@@ -535,6 +535,7 @@ void cpuinfo_arm_decode_cache(
 								l2_size = 1024 * 1024;
 								break;
 							case 660:
+							case 662:
 								/* Snapdragon 660: 1 MB L2 (little cores only) */
 								l2_size = 1024 * 1024;
 								break;
@@ -1238,6 +1239,63 @@ void cpuinfo_arm_decode_cache(
 			};
 			break;
 		}
+		case cpuinfo_uarch_neoverse_n1:
+		case cpuinfo_uarch_neoverse_v1:
+		case cpuinfo_uarch_neoverse_n2:
+		{
+                        /*
+                         * ARM Neoverse-n1 Core Technical Reference Manual
+                         * A6.1. About the L1 memory system
+			 *   The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB.
+                         *
+                         * A6.1.1 L1 instruction-side memory system
+                         *   The L1 instruction memory system has the following key features:
+                         *    - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
+                         *      Physically Tagged (PIPT) 4-way set-associative L1 data cache.
+                         *    - Fixed cache line length of 64 bytes.
+                         *
+                         * A6.1.2 L1 data-side memory system
+                         *   The L1 data memory system has the following features:
+                         *    - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
+                         *      Physically Tagged (PIPT) 4-way set-associative L1 data cache.
+                         *    - Fixed cache line length of 64 bytes.
+                         *    - Pseudo-LRU cache replacement policy.
+                         *
+                         * A7.1 About the L2 memory system
+                         *   The L2 memory subsystem consist of:
+			 *    - An 8-way set associative L2 cache with a configurable size of 256KB, 512KB, or 1024KB. Cache lines
+			 *      have a fixed length of 64 bytes.
+                         *    - Strictly inclusive with L1 data cache.
+			 *    - When configured with instruction cache hardware coherency, strictly inclusive with L1 instruction cache.
+			 *    - When configured without instruction cache hardware coherency, weakly inclusive with L1 instruction cache.
+                         */
+
+			const uint32_t min_l2_size_KB= 256;
+			const uint32_t min_l3_size_KB = 0;
+
+			*l1i = (struct cpuinfo_cache) {
+				.size = 64 * 1024,
+				.associativity = 4,
+				.line_size = 64,
+			};
+			*l1d = (struct cpuinfo_cache) {
+				.size = 64 * 1024,
+				.associativity = 4,
+				.line_size = 64,
+			};
+			*l2 = (struct cpuinfo_cache) {
+				.size = min_l2_size_KB * 1024,
+				.associativity = 8,
+				.line_size = 64,
+				.flags = CPUINFO_CACHE_INCLUSIVE,
+			};
+			*l3 = (struct cpuinfo_cache) {
+				.size = min_l3_size_KB * 1024,
+				.associativity = 16,
+				.line_size = 64,
+			};
+			break;
+		}
 #if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__)
 		case cpuinfo_uarch_scorpion:
 			/*
@@ -1655,6 +1713,9 @@ uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* proc
 			 */
 			return 8 * 1024 * 1024;
 		case cpuinfo_uarch_cortex_a55:
+		case cpuinfo_uarch_neoverse_n1:
+		case cpuinfo_uarch_neoverse_v1:
+		case cpuinfo_uarch_neoverse_n2:
 		case cpuinfo_uarch_cortex_a75:
 		case cpuinfo_uarch_cortex_a76:
 		case cpuinfo_uarch_exynos_m4:
diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c
index df68aa1..fb95ee9 100644
--- a/src/arm/linux/aarch32-isa.c
+++ b/src/arm/linux/aarch32-isa.c
@@ -64,6 +64,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
 		 * - Processors with Exynos M4 cores
 		 * - Processors with Exynos M5 cores
 		 * - Neoverse N1 cores
+		 * - Neoverse V1 cores
+		 * - Neoverse N2 cores
 		 */
 		if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
 			/* Only little cores of Exynos 9810 support FP16 & RDM */
@@ -73,9 +75,11 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
 				case UINT32_C(0x4100D050): /* Cortex-A55 */
 				case UINT32_C(0x4100D060): /* Cortex-A65 */
 				case UINT32_C(0x4100D0B0): /* Cortex-A76 */
-				case UINT32_C(0x4100D0C0): /* Neoverse N1 */
 				case UINT32_C(0x4100D0D0): /* Cortex-A77 */
 				case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+				case UINT32_C(0x4100D460): /* Cortex-A510 */
+				case UINT32_C(0x4100D470): /* Cortex-A710 */
+				case UINT32_C(0x4100D480): /* Cortex-X2 */
 				case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
 				case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
 				case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
@@ -98,6 +102,9 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
 			case UINT32_C(0x4100D0D0): /* Cortex-A77 */
 			case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
 			case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+			case UINT32_C(0x4100D460): /* Cortex-A510 */
+			case UINT32_C(0x4100D470): /* Cortex-A710 */
+			case UINT32_C(0x4100D480): /* Cortex-X2 */
 			case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
 			case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
 			case UINT32_C(0x53000030): /* Exynos-M4 */
diff --git a/src/arm/linux/aarch64-isa.c b/src/arm/linux/aarch64-isa.c
index 2000e1a..44a8f4d 100644
--- a/src/arm/linux/aarch64-isa.c
+++ b/src/arm/linux/aarch64-isa.c
@@ -41,6 +41,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
 	 * - Processors with Exynos M4 cores
 	 * - Processors with Exynos M5 cores
 	 * - Neoverse N1 cores
+	 * - Neoverse V1 cores
+	 * - Neoverse N2 cores
 	 */
 	if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
 		/* Exynos 9810 reports that it supports FP16 compute, but in fact only little cores do */
@@ -54,6 +56,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
 			case UINT32_C(0x4100D0C0): /* Neoverse N1 */
 			case UINT32_C(0x4100D0D0): /* Cortex-A77 */
 			case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+			case UINT32_C(0x4100D400): /* Neoverse V1 */
+			case UINT32_C(0x4100D490): /* Neoverse N2 */
 			case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
 			case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
 			case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
@@ -78,6 +82,9 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
 				break;
 		}
 	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_I8MM) {
+		isa->i8mm = true;
+	}
 
 	/*
 	 * Many phones ship with an old kernel configuration that doesn't report UDOT/SDOT instructions.
@@ -89,6 +96,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
 		case UINT32_C(0x4100D0C0): /* Neoverse N1 */
 		case UINT32_C(0x4100D0D0): /* Cortex-A77 */
 		case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+		case UINT32_C(0x4100D400): /* Neoverse V1 */
+		case UINT32_C(0x4100D490): /* Neoverse N2 */
 		case UINT32_C(0x4100D4A0): /* Neoverse E1 */
 		case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
 		case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
@@ -124,4 +133,13 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
 	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) {
 		isa->sve2 = true;
 	}
+	// SVEBF16 is set iff SVE and BF16 are both supported, but the SVEBF16 feature flag
+	// was added in Linux kernel before the BF16 feature flag, so we check for either.
+	if (features2 & (CPUINFO_ARM_LINUX_FEATURE2_BF16 | CPUINFO_ARM_LINUX_FEATURE2_SVEBF16)) {
+		isa->bf16 = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM) {
+		isa->fhm = true;
+	}
 }
+
diff --git a/src/arm/linux/chipset.c b/src/arm/linux/chipset.c
index e36283c..f2a002d 100644
--- a/src/arm/linux/chipset.c
+++ b/src/arm/linux/chipset.c
@@ -1,3 +1,4 @@
+#include <ctype.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -281,6 +282,82 @@ static bool match_sm(
 	return true;
 }
 
+
+struct special_map_entry {
+	const char* platform;
+	uint16_t model;
+	uint8_t series;
+	char suffix;
+};
+
+static const struct special_map_entry qualcomm_hardware_map_entries[] = {
+		{
+				/* "Kona" -> Qualcomm Kona */
+				.platform = "Kona",
+				.series = cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+				.model = 865,
+		},
+		{
+				/* "Bengal" -> Qualcomm Bengal */
+				.platform = "Bengal",
+				.series = cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+				.model = 662,
+		},
+		{
+				/* "Bengalp" -> Qualcomm Bengalp */
+				.platform = "Bengalp",
+				.series = cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+				.model = 662,
+		},
+		{
+				/* "Lito" -> Qualcomm Lito */
+				.platform = "Lito",
+				.series = cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+				.model = 765,
+				.suffix = 'G'
+		},
+		{
+				/* "Lagoon" -> Qualcomm Lagoon */
+				.platform = "Lagoon",
+				.series = cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+				.model = 0,
+		},
+};
+
+
+int strcicmp(char const *a, char const *b)
+{
+	for (;; a++, b++) {
+		int d = tolower((unsigned char)*a) - tolower((unsigned char)*b);
+		if (d != 0 || !*a)
+			return d;
+	}
+}
+
+static bool match_qualcomm_special(
+		const char* start, const char* end,
+		struct cpuinfo_arm_chipset chipset[restrict static 1])
+{
+	for (size_t i = 0; i < CPUINFO_COUNT_OF(qualcomm_hardware_map_entries); i++) {
+		int length = end - start;
+		if (strcicmp(qualcomm_hardware_map_entries[i].platform, start) == 0 &&
+			qualcomm_hardware_map_entries[i].platform[length] == 0)
+		{
+			*chipset = (struct cpuinfo_arm_chipset) {
+					.vendor = chipset_series_vendor[qualcomm_hardware_map_entries[i].series],
+					.series = (enum cpuinfo_arm_chipset_series) qualcomm_hardware_map_entries[i].series,
+					.model = qualcomm_hardware_map_entries[i].model,
+					.suffix = {
+							[0] = qualcomm_hardware_map_entries[i].suffix,
+					},
+			};
+			return true;
+		}
+	}
+	return false;
+
+}
+
 /**
  * Tries to match /Samsung Exynos\d{4}$/ signature (case-insensitive) for Samsung Exynos chipsets.
  * If match successful, extracts model information into \p chipset argument.
@@ -1351,7 +1428,7 @@ static bool match_and_parse_sunxi(
 		return false;
 	}
 
-	/* Compare sunXi platform id and number of cores to tabluted values to decode chipset name */
+	/* Compare sunXi platform id and number of cores to tabulated values to decode chipset name */
 	uint32_t model = 0;
 	char suffix = 0;
 	for (size_t i = 0; i < CPUINFO_COUNT_OF(sunxi_map_entries); i++) {
@@ -1752,13 +1829,6 @@ static bool is_tegra(const char* start, const char* end) {
 	return (length == 5 || start[5] == '3');
 }
 
-struct special_map_entry {
-	const char* platform;
-	uint16_t model;
-	uint8_t series;
-	char suffix;
-};
-
 static const struct special_map_entry special_hardware_map_entries[] = {
 #if CPUINFO_ARCH_ARM
 	{
@@ -2317,6 +2387,14 @@ struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_ha
 								(int) hardware_length, hardware);
 							return chipset;
 						}
+
+						if (match_qualcomm_special(pos, hardware_end, &chipset)) {
+							cpuinfo_log_debug(
+									"matched Qualcomm signature in /proc/cpuinfo Hardware string \"%.*s\"",
+									(int) hardware_length, hardware);
+							return chipset;
+						}
+
 					}
 					word_start = false;
 					break;
diff --git a/src/arm/linux/clusters.c b/src/arm/linux/clusters.c
index c7a4045..430773d 100644
--- a/src/arm/linux/clusters.c
+++ b/src/arm/linux/clusters.c
@@ -48,7 +48,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
  * @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags.
  * @param max_processors - number of elements in the @p processors array.
  * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
- *                             frequency, MIDR infromation, and core cluster (package siblings list) information.
+ *                             frequency, MIDR information, and core cluster (package siblings list) information.
  *
  * @retval true if the heuristic successfully assigned all processors into clusters of cores.
  * @retval false if known details about processors contradict the heuristic configuration of core clusters.
@@ -292,9 +292,9 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
  *   - Processors assigned to these clusters stay assigned to the same clusters
  *   - No new processors are added to these clusters
  * - Processors without pre-assigned cluster are clustered in one sequential scan:
- *   - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceeding
- *     processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceeding processor.
- *   - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceeding
+ *   - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceding
+ *     processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceding processor.
+ *   - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceding
  *     processor, the processor is assigned to a newly created cluster.
  *
  * The function must be called after parsing OS-provided information on core clusters, and usually is called only
@@ -309,7 +309,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
  *
  * @param max_processors - number of elements in the @p processors array.
  * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
- *                             frequency, MIDR infromation, and core cluster (package siblings list) information.
+ *                             frequency, MIDR information, and core cluster (package siblings list) information.
  *
  * @retval true if the heuristic successfully assigned all processors into clusters of cores.
  * @retval false if known details about processors contradict the heuristic configuration of core clusters.
@@ -331,7 +331,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
 				if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
 					if (cluster_min_frequency != processors[i].min_frequency) {
 						cpuinfo_log_info(
-							"minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); "
+							"minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceding cluster (%"PRIu32" KHz); "
 							"processor %"PRIu32" starts to a new cluster",
 							i, processors[i].min_frequency, cluster_min_frequency, i);
 						goto new_cluster;
@@ -346,7 +346,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
 				if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
 					if (cluster_max_frequency != processors[i].max_frequency) {
 						cpuinfo_log_debug(
-							"maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); "
+							"maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceding cluster (%"PRIu32" KHz); "
 							"processor %"PRIu32" starts a new cluster",
 							i, processors[i].max_frequency, cluster_max_frequency, i);
 						goto new_cluster;
@@ -361,7 +361,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
 				if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
 					if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) {
 						cpuinfo_log_debug(
-							"CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceeding cluster (0x%02"PRIx32"); "
+							"CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceding cluster (0x%02"PRIx32"); "
 							"processor %"PRIu32" starts to a new cluster",
 							i, midr_get_implementer(processors[i].midr), midr_get_implementer(cluster_midr), i);
 						goto new_cluster;
@@ -417,11 +417,11 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
 				}
 			}
 
-			/* All checks passed, attach processor to the preceeding cluster */
+			/* All checks passed, attach processor to the preceding cluster */
 			cluster_processors++;
 			processors[i].package_leader_id = cluster_start;
 			processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
-			cpuinfo_log_debug("assigned processor %"PRIu32" to preceeding cluster of processor %"PRIu32, i, cluster_start);
+			cpuinfo_log_debug("assigned processor %"PRIu32" to preceding cluster of processor %"PRIu32, i, cluster_start);
 			continue;
 
 new_cluster:
diff --git a/src/arm/linux/cpuinfo.c b/src/arm/linux/cpuinfo.c
index 90e1631..817da12 100644
--- a/src/arm/linux/cpuinfo.c
+++ b/src/arm/linux/cpuinfo.c
@@ -177,6 +177,10 @@ static void parse_features(
 					#if CPUINFO_ARCH_ARM64
 						processor->features |= CPUINFO_ARM_LINUX_FEATURE_FCMA;
 					#endif
+				} else if (memcmp(feature_start, "i8mm", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_I8MM;
+					#endif
 #if CPUINFO_ARCH_ARM
 				} else if (memcmp(feature_start, "half", feature_length) == 0) {
 					processor->features |= CPUINFO_ARM_LINUX_FEATURE_HALF;
@@ -283,6 +287,10 @@ static void parse_features(
 					#if CPUINFO_ARCH_ARM64
 						processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM;
 					#endif
+				} else if (memcmp(feature_start, "asimdfhm", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM;
+					#endif
 #if CPUINFO_ARCH_ARM
 				} else if (memcmp(feature_start, "fastmult", feature_length) == 0) {
 					processor->features |= CPUINFO_ARM_LINUX_FEATURE_FASTMULT;
diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c
index 23d8439..d3da5a9 100644
--- a/src/arm/linux/init.c
+++ b/src/arm/linux/init.c
@@ -510,7 +510,7 @@ void cpuinfo_arm_linux_init(void) {
 	uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX;
 	/* Indication whether L3 (if it exists) is shared between all cores */
 	bool shared_l3 = true;
-	/* Populate cache infromation structures in l1i, l1d */
+	/* Populate cache information structures in l1i, l1d */
 	for (uint32_t i = 0; i < valid_processors; i++) {
 		if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
 			cluster_id += 1;
diff --git a/src/arm/linux/midr.c b/src/arm/linux/midr.c
index 2c3116b..0d8f03f 100644
--- a/src/arm/linux/midr.c
+++ b/src/arm/linux/midr.c
@@ -675,10 +675,10 @@ static bool cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic(
 
 /*
  * Initializes MIDR for leaders of core clusters in a single sequential scan:
- *  - Clusters preceeding the first reported MIDR value are assumed to have default MIDR value.
+ *  - Clusters preceding the first reported MIDR value are assumed to have default MIDR value.
  *  - Clusters following any reported MIDR value to have that MIDR value.
  *
- * @param default_midr - MIDR value that will be assigned to cluster leaders preceeding any reported MIDR value.
+ * @param default_midr - MIDR value that will be assigned to cluster leaders preceding any reported MIDR value.
  * @param processors_count - number of logical processor descriptions in the @p processors array.
  * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency,
  *                             and decoded core cluster (package_leader_id) information.
@@ -833,7 +833,7 @@ uint32_t cpuinfo_arm_linux_detect_cluster_midr(
 			 * 2. For systems with 2 clusters and MIDR known for one cluster, assume big.LITTLE configuration,
 			 *    and estimate MIDR for the other cluster under assumption that MIDR for the big cluster is known.
 			 * 3. Initialize MIDRs for core clusters in a single sequential scan:
-			 *    - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value.
+			 *    - Clusters preceding the first reported MIDR value are assumed to have the last reported MIDR value.
 			 *    - Clusters following any reported MIDR value to have that MIDR value.
 			 */
 
diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c
index dbea578..6a28b2d 100644
--- a/src/arm/mach/init.c
+++ b/src/arm/mach/init.c
@@ -15,43 +15,25 @@
 #include <cpuinfo/log.h>
 
 /* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */
-#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL
-	#define CPUFAMILY_ARM_MONSOON_MISTRAL   0xE81E7EF6
-#endif
 #ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
-	#define CPUFAMILY_ARM_VORTEX_TEMPEST    0x07D34B9F
+	#define CPUFAMILY_ARM_VORTEX_TEMPEST     0x07D34B9F
 #endif
 #ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
-	#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2
+	#define CPUFAMILY_ARM_LIGHTNING_THUNDER  0x462504D2
 #endif
 #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
 	#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3
 #endif
+#ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD
+	#define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D
+#endif
 
 struct cpuinfo_arm_isa cpuinfo_isa = {
-#if CPUINFO_ARCH_ARM
-	.thumb = true,
-	.thumb2 = true,
-	.thumbee = false,
-	.jazelle = false,
-	.armv5e = true,
-	.armv6 = true,
-	.armv6k = true,
-	.armv7 = true,
-	.vfpv2 = false,
-	.vfpv3 = true,
-	.d32 = true,
-	.wmmx = false,
-	.wmmx2 = false,
-	.neon = true,
-#endif
-#if CPUINFO_ARCH_ARM64
 	.aes = true,
 	.sha1 = true,
 	.sha2 = true,
 	.pmull = true,
 	.crc32 = true,
-#endif
 };
 
 static uint32_t get_sys_info(int type_specifier, const char* name) {
@@ -83,10 +65,8 @@ static uint32_t get_sys_info_by_name(const char* type_specifier) {
 	return result;
 }
 
-static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype, uint32_t core_index, uint32_t core_count) {
+static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t core_index, uint32_t core_count) {
 	switch (cpu_family) {
-		case CPUFAMILY_ARM_SWIFT:
-			return cpuinfo_uarch_swift;
 		case CPUFAMILY_ARM_CYCLONE:
 			return cpuinfo_uarch_cyclone;
 		case CPUFAMILY_ARM_TYPHOON:
@@ -107,25 +87,15 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype
 		case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
 			/* Hexa-core: 2x Firestorm + 4x Icestorm; Octa-core: 4x Firestorm + 4x Icestorm */
 			return core_index + 4 < core_count ? cpuinfo_uarch_firestorm : cpuinfo_uarch_icestorm;
+		case CPUFAMILY_ARM_AVALANCHE_BLIZZARD:
+			/* Hexa-core: 2x Avalanche + 4x Blizzard */
+			return core_index + 4 < core_count ? cpuinfo_uarch_avalanche : cpuinfo_uarch_blizzard;
 		default:
 			/* Use hw.cpusubtype for detection */
 			break;
 	}
 
-	#if CPUINFO_ARCH_ARM
-		switch (cpu_subtype) {
-			case CPU_SUBTYPE_ARM_V7:
-				return cpuinfo_uarch_cortex_a8;
-			case CPU_SUBTYPE_ARM_V7F:
-				return cpuinfo_uarch_cortex_a9;
-			case CPU_SUBTYPE_ARM_V7K:
-				return cpuinfo_uarch_cortex_a7;
-			default:
-				return cpuinfo_uarch_unknown;
-		}
-	#else
-		return cpuinfo_uarch_unknown;
-	#endif
+	return cpuinfo_uarch_unknown;
 }
 
 static void decode_package_name(char* package_name) {
@@ -299,71 +269,118 @@ void cpuinfo_arm_mach_init(void) {
 
 
 	const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
-	const uint32_t cpu_type = get_sys_info_by_name("hw.cputype");
-	const uint32_t cpu_subtype = get_sys_info_by_name("hw.cpusubtype");
-	switch (cpu_type) {
-		case CPU_TYPE_ARM64:
-			cpuinfo_isa.aes = true;
-			cpuinfo_isa.sha1 = true;
-			cpuinfo_isa.sha2 = true;
-			cpuinfo_isa.pmull = true;
-			cpuinfo_isa.crc32 = true;
-			break;
-#if CPUINFO_ARCH_ARM
-		case CPU_TYPE_ARM:
-			switch (cpu_subtype) {
-				case CPU_SUBTYPE_ARM_V8:
-					cpuinfo_isa.armv8 = true;
-					cpuinfo_isa.aes = true;
-					cpuinfo_isa.sha1 = true;
-					cpuinfo_isa.sha2 = true;
-					cpuinfo_isa.pmull = true;
-					cpuinfo_isa.crc32 = true;
-					/* Fall-through to add ARMv7S features */
-				case CPU_SUBTYPE_ARM_V7S:
-				case CPU_SUBTYPE_ARM_V7K:
-					cpuinfo_isa.fma = true;
-					/* Fall-through to add ARMv7F features */
-				case CPU_SUBTYPE_ARM_V7F:
-					cpuinfo_isa.armv7mp = true;
-					cpuinfo_isa.fp16 = true;
-					/* Fall-through to add ARMv7 features */
-				case CPU_SUBTYPE_ARM_V7:
-					break;
-				default:
-					break;
-			}
-			break;
-#endif
-	}
+
 	/*
-	 * Support for ARMv8.1 Atomics & FP16 arithmetic instructions is supposed to be detected via
-	 * sysctlbyname calls with "hw.optional.armv8_1_atomics" and "hw.optional.neon_fp16" arguments
-	 * (see https://devstreaming-cdn.apple.com/videos/wwdc/2018/409t8zw7rumablsh/409/409_whats_new_in_llvm.pdf),
-	 * but on new iOS versions these calls just fail with EPERM.
-	 *
-	 * Thus, we whitelist CPUs known to support these instructions.
+	 * iOS 15 and macOS 12 added sysctls for ARM features, use them where possible.
+	 * Otherwise, fallback to hardcoded set of CPUs with known support.
 	 */
-	switch (cpu_family) {
-		case CPUFAMILY_ARM_MONSOON_MISTRAL:
-		case CPUFAMILY_ARM_VORTEX_TEMPEST:
-		case CPUFAMILY_ARM_LIGHTNING_THUNDER:
-		case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
-			#if CPUINFO_ARCH_ARM64
+	const uint32_t has_feat_lse = get_sys_info_by_name("hw.optional.arm.FEAT_LSE");
+	if (has_feat_lse != 0) {
+		cpuinfo_isa.atomics = true;
+	} else {
+		// Mandatory in ARMv8.1-A, list only cores released before iOS 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_MONSOON_MISTRAL:
+			case CPUFAMILY_ARM_VORTEX_TEMPEST:
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
 				cpuinfo_isa.atomics = true;
-			#endif
-			cpuinfo_isa.fp16arith = true;
+		}
 	}
 
-	/*
-	 * There does not yet seem to exist an OS mechanism to detect support for
-	 * ARMv8.2 optional dot-product instructions, so we currently whitelist CPUs
-	 * known to support these instruction.
-	 */
-	switch (cpu_family) {
-		case CPUFAMILY_ARM_LIGHTNING_THUNDER:
-		case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
-			cpuinfo_isa.dot = true;
+	const uint32_t has_feat_rdm = get_sys_info_by_name("hw.optional.arm.FEAT_RDM");
+	if (has_feat_rdm != 0) {
+		cpuinfo_isa.rdm = true;
+	} else {
+		// Optional in ARMv8.2-A (implemented in Apple cores),
+		// list only cores released before iOS 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_MONSOON_MISTRAL:
+			case CPUFAMILY_ARM_VORTEX_TEMPEST:
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.rdm = true;
+		}
+	}
+
+	const uint32_t has_feat_fp16 = get_sys_info_by_name("hw.optional.arm.FEAT_FP16");
+	if (has_feat_fp16 != 0) {
+		cpuinfo_isa.fp16arith = true;
+	} else {
+		// Optional in ARMv8.2-A (implemented in Apple cores),
+		// list only cores released before iOS 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_MONSOON_MISTRAL:
+			case CPUFAMILY_ARM_VORTEX_TEMPEST:
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.fp16arith = true;
+		}
+	}
+
+	const uint32_t has_feat_fhm = get_sys_info_by_name("hw.optional.arm.FEAT_FHM");
+	if (has_feat_fhm != 0) {
+		cpuinfo_isa.fhm = true;
+	} else {
+		// Prior to iOS 15, use 'hw.optional.armv8_2_fhm'
+		const uint32_t has_feat_fhm_legacy = get_sys_info_by_name("hw.optional.armv8_2_fhm");
+		if (has_feat_fhm_legacy != 0) {
+			cpuinfo_isa.fhm = true;
+		} else {
+			// Mandatory in ARMv8.4-A when FP16 arithmetics is implemented,
+			// list only cores released before iOS 15 / macOS 12
+			switch (cpu_family) {
+				case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+				case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+					cpuinfo_isa.fhm = true;
+			}
+		}
+	}
+
+	const uint32_t has_feat_bf16 = get_sys_info_by_name("hw.optional.arm.FEAT_BF16");
+	if (has_feat_bf16 != 0) {
+		cpuinfo_isa.bf16 = true;
+	}
+
+	const uint32_t has_feat_fcma = get_sys_info_by_name("hw.optional.arm.FEAT_FCMA");
+	if (has_feat_fcma != 0) {
+		cpuinfo_isa.fcma = true;
+	} else {
+		// Mandatory in ARMv8.3-A, list only cores released before iOS 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.fcma = true;
+		}
+	}
+
+	const uint32_t has_feat_jscvt = get_sys_info_by_name("hw.optional.arm.FEAT_JSCVT");
+	if (has_feat_jscvt != 0) {
+		cpuinfo_isa.jscvt = true;
+	} else {
+		// Mandatory in ARMv8.3-A, list only cores released before iOS 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.jscvt = true;
+		}
+	}
+
+	const uint32_t has_feat_dotprod = get_sys_info_by_name("hw.optional.arm.FEAT_DotProd");
+	if (has_feat_dotprod != 0) {
+		cpuinfo_isa.dot = true;
+	} else {
+		// Mandatory in ARMv8.4-A, list only cores released before iOS 15 / macOS 12
+		switch (cpu_family) {
+			case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+				cpuinfo_isa.dot = true;
+		}
+	}
+
+	const uint32_t has_feat_i8mm = get_sys_info_by_name("hw.optional.arm.FEAT_I8MM");
+	if (has_feat_i8mm != 0) {
+		cpuinfo_isa.i8mm = true;
 	}
 
 	uint32_t num_clusters = 1;
@@ -374,7 +391,7 @@ void cpuinfo_arm_mach_init(void) {
 			.core_id = i % cores_per_package,
 			.package = packages + i / cores_per_package,
 			.vendor = cpuinfo_vendor_apple,
-			.uarch = decode_uarch(cpu_family, cpu_subtype, i, mach_topology.cores),
+			.uarch = decode_uarch(cpu_family, i, mach_topology.cores),
 		};
 		if (i != 0 && cores[i].uarch != cores[i - 1].uarch) {
 			num_clusters++;
diff --git a/src/arm/midr.h b/src/arm/midr.h
index 739dc19..b0e244c 100644
--- a/src/arm/midr.h
+++ b/src/arm/midr.h
@@ -174,23 +174,25 @@ inline static uint32_t midr_score_core(uint32_t midr) {
 		case UINT32_C(0x53000030): /* Exynos M4 */
 		case UINT32_C(0x53000040): /* Exynos M5 */
 		case UINT32_C(0x4100D440): /* Cortex-X1 */
-			/* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78 */
+		case UINT32_C(0x4100D480): /* Cortex-X2 */
+			/* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78/-A710 */
 			return 6;
+		case UINT32_C(0x4100D080): /* Cortex-A72 */
+		case UINT32_C(0x4100D090): /* Cortex-A73 */
+		case UINT32_C(0x4100D0A0): /* Cortex-A75 */
+		case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+		case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+		case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+		case UINT32_C(0x4100D410): /* Cortex-A78 */
+		case UINT32_C(0x4100D470): /* Cortex-A710 */
+		case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
 		case UINT32_C(0x4E000030): /* Denver 2 */
+		case UINT32_C(0x51002050): /* Kryo Gold */
+		case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
+		case UINT32_C(0x51008020): /* Kryo 385 Gold */
+		case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
 		case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */
 		case UINT32_C(0x53000020): /* Exynos M3 */
-		case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
-		case UINT32_C(0x51008020): /* Kryo 385 Gold */
-		case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
-		case UINT32_C(0x51002050): /* Kryo Gold */
-		case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
-		case UINT32_C(0x4100D410): /* Cortex-A78 */
-		case UINT32_C(0x4100D0D0): /* Cortex-A77 */
-		case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
-		case UINT32_C(0x4100D0B0): /* Cortex-A76 */
-		case UINT32_C(0x4100D0A0): /* Cortex-A75 */
-		case UINT32_C(0x4100D090): /* Cortex-A73 */
-		case UINT32_C(0x4100D080): /* Cortex-A72 */
 #if CPUINFO_ARCH_ARM
 		case UINT32_C(0x4100C0F0): /* Cortex-A15 */
 		case UINT32_C(0x4100C0E0): /* Cortex-A17 */
@@ -205,8 +207,9 @@ inline static uint32_t midr_score_core(uint32_t midr) {
 #if CPUINFO_ARCH_ARM64
 		case UINT32_C(0x4100D060): /* Cortex-A65 */
 #endif /* CPUINFO_ARCH_ARM64 */
-		case UINT32_C(0x4100D050): /* Cortex-A55 */
 		case UINT32_C(0x4100D030): /* Cortex-A53 */
+		case UINT32_C(0x4100D050): /* Cortex-A55 */
+		case UINT32_C(0x4100D460): /* Cortex-A510 */
 			/* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */
 			return 2;
 		case UINT32_C(0x4100D040): /* Cortex-A35 */
diff --git a/src/arm/uarch.c b/src/arm/uarch.c
index 8b5362b..1d4c6ee 100644
--- a/src/arm/uarch.c
+++ b/src/arm/uarch.c
@@ -91,13 +91,30 @@ void cpuinfo_arm_decode_vendor_uarch(
 				case 0xD0E: /* Cortex-A76AE */
 					*uarch = cpuinfo_uarch_cortex_a76;
 					break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+				case 0xD40:
+					*uarch = cpuinfo_uarch_neoverse_v1;
+					break;
+#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
 				case 0xD41: /* Cortex-A78 */
 					*uarch = cpuinfo_uarch_cortex_a78;
 					break;
 				case 0xD44: /* Cortex-X1 */
 					*uarch = cpuinfo_uarch_cortex_x1;
 					break;
+				case 0xD46: /* Cortex-A510 */
+					*uarch = cpuinfo_uarch_cortex_a510;
+					break;
+				case 0xD47: /* Cortex-A710 */
+					*uarch = cpuinfo_uarch_cortex_a710;
+					break;
+				case 0xD48: /* Cortex-X2 */
+					*uarch = cpuinfo_uarch_cortex_x2;
+					break;
 #if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+				case 0xD49:
+					*uarch = cpuinfo_uarch_neoverse_n2;
+					break;
 				case 0xD4A:
 					*uarch = cpuinfo_uarch_neoverse_e1;
 					break;
diff --git a/src/arm/windows/init-by-logical-sys-info.c b/src/arm/windows/init-by-logical-sys-info.c
new file mode 100644
index 0000000..f088011
--- /dev/null
+++ b/src/arm/windows/init-by-logical-sys-info.c
@@ -0,0 +1,885 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <errno.h>
+#include <sys/types.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include "windows-arm-init.h"
+
+#define MAX_NR_OF_CACHES	(cpuinfo_cache_level_max - 1)
+
+/* Call chain:
+ * cpu_info_init_by_logical_sys_info
+ * 		read_packages_for_processors
+ * 		read_cores_for_processors
+ * 		read_caches_for_processors
+ * 			read_all_logical_processor_info_of_relation
+ * 				parse_relation_processor_info
+ * 					store_package_info_per_processor
+ * 					store_core_info_per_processor
+ * 				parse_relation_cache_info
+ * 					store_cache_info_per_processor
+ */
+
+static uint32_t count_logical_processors(
+	const uint32_t max_group_count,
+	uint32_t* global_proc_index_per_group);
+
+static uint32_t read_packages_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info *chip_info);
+
+static uint32_t read_cores_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info *chip_info);
+
+static uint32_t read_caches_for_processors(
+	struct cpuinfo_processor *processors,
+	const uint32_t number_of_processors,
+	struct cpuinfo_cache *caches,
+	uint32_t* numbers_of_caches,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info *chip_info);
+
+static uint32_t read_all_logical_processor_info_of_relation(
+	LOGICAL_PROCESSOR_RELATIONSHIP info_type,
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	struct cpuinfo_core* cores,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info *chip_info);
+
+static bool parse_relation_processor_info(
+	struct cpuinfo_processor* processors,
+	uint32_t nr_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+	const uint32_t info_id,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info *chip_info);
+
+static bool parse_relation_cache_info(
+	struct cpuinfo_processor* processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	const uint32_t* global_proc_index_per_group,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info);
+
+static void store_package_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	const uint32_t package_id,
+	const uint32_t group_id,
+	const uint32_t processor_id_in_group);
+
+static void store_core_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	const uint32_t core_id,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info *chip_info);
+
+static void store_cache_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+	struct cpuinfo_cache* current_cache);
+
+static bool connect_packages_cores_clusters_by_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t nr_of_processors,
+	struct cpuinfo_package* packages,
+	const uint32_t nr_of_packages,
+	struct cpuinfo_cluster* clusters,
+	struct cpuinfo_core* cores,
+	const uint32_t nr_of_cores,
+	const struct woa_chip_info* chip_info,
+	enum cpuinfo_vendor vendor);
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity);
+
+
+bool cpu_info_init_by_logical_sys_info(
+	const struct woa_chip_info *chip_info,
+	const enum cpuinfo_vendor vendor)
+{
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_package* packages = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cache* caches = NULL;
+	struct cpuinfo_uarch_info* uarchs = NULL;
+
+	uint32_t nr_of_packages = 0;
+	uint32_t nr_of_cores = 0;
+	uint32_t nr_of_all_caches = 0;
+	uint32_t numbers_of_caches[MAX_NR_OF_CACHES] = {0};
+	
+	uint32_t nr_of_uarchs = 0;
+	bool result = false;
+	
+	HANDLE heap = GetProcessHeap();
+
+	/* 1. Count available logical processor groups and processors */
+	const uint32_t max_group_count = (uint32_t) GetMaximumProcessorGroupCount();
+	cpuinfo_log_debug("detected %"PRIu32" processor group(s)", max_group_count);
+	/* We need to store the absolute processor ID offsets for every groups, because
+	 *  1. We can't assume every processor groups include the same number of
+	 *     logical processors.
+	 *  2. Every processor groups know its group number and processor IDs within
+	 *     the group, but not the global processor IDs.
+	 *  3. We need to list every logical processors by global IDs.
+	*/
+	uint32_t* global_proc_index_per_group =
+		(uint32_t*) HeapAlloc(heap, 0, max_group_count * sizeof(uint32_t));
+	if (global_proc_index_per_group == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %"PRIu32" processor groups",
+			max_group_count * sizeof(struct cpuinfo_processor), max_group_count);
+		goto clean_up;
+	}
+	
+	uint32_t nr_of_processors =
+		count_logical_processors(max_group_count, global_proc_index_per_group);
+	processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_processors * sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
+			nr_of_processors * sizeof(struct cpuinfo_processor), nr_of_processors);
+		goto clean_up;
+	}
+
+	/* 2. Read topology information via MSDN API: packages, cores and caches*/
+	nr_of_packages = read_packages_for_processors(
+						processors, nr_of_processors,
+						global_proc_index_per_group,
+						chip_info);
+	if (!nr_of_packages) {
+		cpuinfo_log_error("error in reading package information");
+		goto clean_up;
+	}
+	cpuinfo_log_debug("detected %"PRIu32" processor package(s)", nr_of_packages);
+
+	/* We need the EfficiencyClass to parse uarch from the core information,
+	 * but we need to iterate first to count cores and allocate memory then
+	 * we will iterate again to read and store data to cpuinfo_core structures.
+	 */
+	nr_of_cores = read_cores_for_processors(
+					processors, nr_of_processors,
+					global_proc_index_per_group, NULL,
+					chip_info);
+	if (!nr_of_cores) {
+		cpuinfo_log_error("error in reading core information");
+		goto clean_up;
+	}
+	cpuinfo_log_debug("detected %"PRIu32" processor core(s)", nr_of_cores);
+
+	/* There is no API to read number of caches, so we need to iterate twice on caches:
+		1. Count all type of caches -> allocate memory
+		2. Read out cache data and store to allocated memory
+	 */
+	nr_of_all_caches = read_caches_for_processors(
+						processors, nr_of_processors,
+						caches, numbers_of_caches,
+						global_proc_index_per_group, chip_info);
+	if (!nr_of_all_caches) {
+		cpuinfo_log_error("error in reading cache information");
+		goto clean_up;
+	}
+	cpuinfo_log_debug("detected %"PRIu32" processor cache(s)", nr_of_all_caches);
+
+	/* 3. Allocate memory for package, cluster, core and cache structures */
+	packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_packages * sizeof(struct cpuinfo_package));
+	if (packages == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages",
+			nr_of_packages * sizeof(struct cpuinfo_package), nr_of_packages);
+		goto clean_up;
+	}
+
+	/* We don't have cluster information so we explicitly set clusters to equal to cores. */
+	clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
+			nr_of_cores * sizeof(struct cpuinfo_cluster), nr_of_cores);
+		goto clean_up;
+	}
+
+	cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
+			nr_of_cores * sizeof(struct cpuinfo_core), nr_of_cores);
+		goto clean_up;
+	}
+
+	/* We allocate one contiguous cache array for all caches, then use offsets per cache type. */
+	caches = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_all_caches * sizeof(struct cpuinfo_cache));
+	if (caches == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" caches",
+			nr_of_all_caches * sizeof(struct cpuinfo_cache), nr_of_all_caches);
+		goto clean_up;
+	}
+
+	/* 4.Read missing topology information that can't be saved without counted
+	 *   allocate structures in the first round.
+	 */
+	nr_of_all_caches = read_caches_for_processors(
+						processors, nr_of_processors,
+						caches, numbers_of_caches, global_proc_index_per_group, chip_info);
+	if (!nr_of_all_caches) {
+		cpuinfo_log_error("error in reading cache information");
+		goto clean_up;
+	}
+
+	nr_of_cores = read_cores_for_processors(
+		processors, nr_of_processors,
+		global_proc_index_per_group, cores,
+		chip_info);
+	if (!nr_of_cores) {
+		cpuinfo_log_error("error in reading core information");
+		goto clean_up;
+	}
+
+	/* 5. Now that we read out everything from the system we can, fill the package, cluster
+	 *    and core structures respectively.
+	 */
+	result = connect_packages_cores_clusters_by_processors(
+				processors, nr_of_processors,
+				packages, nr_of_packages,
+				clusters,
+				cores, nr_of_cores,
+				chip_info,
+				vendor);
+	if(!result) {
+		cpuinfo_log_error("error in connecting information");
+		goto clean_up;
+	}
+
+	/* 6. Count and store uarchs of cores, assuming same uarchs are neighbors */
+	enum cpuinfo_uarch prev_uarch = cpuinfo_uarch_unknown;
+	for (uint32_t i = 0; i < nr_of_cores; i++) {
+		if (prev_uarch != cores[i].uarch) {
+			nr_of_uarchs++;
+			prev_uarch = cores[i].uarch;
+		}
+	}
+	uarchs = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_uarchs * sizeof(struct cpuinfo_uarch_info));
+	if (uarchs == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs",
+			nr_of_uarchs * sizeof(struct cpuinfo_uarch_info), nr_of_uarchs);
+		goto clean_up;
+	}
+	prev_uarch = cpuinfo_uarch_unknown;
+	for (uint32_t i = 0, uarch_counter = 0; i < nr_of_cores; i++) {
+		if (prev_uarch != cores[i].uarch) {
+			prev_uarch = cores[i].uarch;
+			uarchs[uarch_counter].uarch = cores[i].uarch;
+			uarchs[uarch_counter].core_count = 1;
+			uarchs[uarch_counter].processor_count = cores[i].processor_count;
+			uarch_counter++;
+		} else if (prev_uarch != cpuinfo_uarch_unknown) {
+			uarchs[uarch_counter].core_count++;
+			uarchs[uarch_counter].processor_count += cores[i].processor_count;
+		}
+	}
+
+	/* 7. Commit changes */
+	cpuinfo_processors = processors;
+	cpuinfo_packages = packages;
+	cpuinfo_clusters = clusters;
+	cpuinfo_cores = cores;
+	cpuinfo_uarchs = uarchs;
+
+	cpuinfo_processors_count = nr_of_processors;
+	cpuinfo_packages_count = nr_of_packages;
+	cpuinfo_clusters_count = nr_of_cores;
+	cpuinfo_cores_count = nr_of_cores;
+	cpuinfo_uarchs_count = nr_of_uarchs;
+
+	for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
+		cpuinfo_cache_count[i] = numbers_of_caches[i];
+	}
+	cpuinfo_cache[cpuinfo_cache_level_1i] = caches;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = cpuinfo_cache[cpuinfo_cache_level_1i] + cpuinfo_cache_count[cpuinfo_cache_level_1i];
+	cpuinfo_cache[cpuinfo_cache_level_2]  = cpuinfo_cache[cpuinfo_cache_level_1d] + cpuinfo_cache_count[cpuinfo_cache_level_1d];
+	cpuinfo_cache[cpuinfo_cache_level_3]  = cpuinfo_cache[cpuinfo_cache_level_2]  + cpuinfo_cache_count[cpuinfo_cache_level_2];
+	cpuinfo_cache[cpuinfo_cache_level_4]  = cpuinfo_cache[cpuinfo_cache_level_3]  + cpuinfo_cache_count[cpuinfo_cache_level_3];
+	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+	result = true;
+	MemoryBarrier();
+
+	processors = NULL;
+	packages = NULL;
+	clusters = NULL;
+	cores = NULL;
+	caches = NULL;
+	uarchs = NULL;
+
+clean_up:
+	/* The propagated pointers, shouldn't be freed, only in case of error
+	 * and unfinished init.
+	 */
+	if (processors != NULL) {
+		HeapFree(heap, 0, processors);
+	}
+	if (packages != NULL) {
+		HeapFree(heap, 0, packages);
+	}
+	if (clusters != NULL) {
+		HeapFree(heap, 0, clusters);
+	}
+	if (cores != NULL) {
+		HeapFree(heap, 0, cores);
+	}
+	if (caches != NULL) {
+		HeapFree(heap, 0, caches);
+	}
+	if (uarchs != NULL) {
+		HeapFree(heap, 0, uarchs);
+	}
+
+	/* Free the locally used temporary pointers */
+	HeapFree(heap, 0, global_proc_index_per_group);
+	global_proc_index_per_group = NULL;
+	return result;
+}
+
+static uint32_t count_logical_processors(
+	const uint32_t max_group_count,
+	uint32_t* global_proc_index_per_group)
+{
+	uint32_t nr_of_processors = 0;
+
+	for (uint32_t i = 0; i < max_group_count; i++) {
+		uint32_t nr_of_processors_per_group = GetMaximumProcessorCount((WORD) i);
+		cpuinfo_log_debug("detected %"PRIu32" processor(s) in group %"PRIu32"",
+			nr_of_processors_per_group, i);
+		global_proc_index_per_group[i] = nr_of_processors;
+		nr_of_processors += nr_of_processors_per_group;
+	}
+	return nr_of_processors;
+}
+
+static uint32_t read_packages_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info *chip_info)
+{
+	return read_all_logical_processor_info_of_relation(
+		RelationProcessorPackage,
+		processors,
+		number_of_processors,
+		NULL,
+		NULL,
+		NULL,
+		global_proc_index_per_group,
+		chip_info);
+}
+
+uint32_t read_cores_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info *chip_info)
+{
+	return read_all_logical_processor_info_of_relation(
+		RelationProcessorCore,
+		processors,
+		number_of_processors,
+		NULL,
+		NULL,
+		cores,
+		global_proc_index_per_group,
+		chip_info);
+}
+
+static uint32_t read_caches_for_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info *chip_info)
+{
+	/* Reset processor start indexes */
+	if (caches) {
+		uint32_t cache_offset = 0;
+		for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
+			for (uint32_t j = 0; j < numbers_of_caches[i]; j++) {
+				caches[cache_offset + j].processor_start = UINT32_MAX;
+			}
+			cache_offset += numbers_of_caches[i];
+		}
+	}
+
+	return read_all_logical_processor_info_of_relation(
+		RelationCache,
+		processors,
+		number_of_processors,
+		caches,
+		numbers_of_caches,
+		NULL,
+		global_proc_index_per_group,
+		chip_info);
+}
+
+static uint32_t read_all_logical_processor_info_of_relation(
+	LOGICAL_PROCESSOR_RELATIONSHIP info_type,
+	struct cpuinfo_processor* processors,
+	const uint32_t number_of_processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	struct cpuinfo_core* cores,
+	const uint32_t* global_proc_index_per_group,
+	const struct woa_chip_info* chip_info)
+{
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX infos = NULL;
+	uint32_t nr_of_structs = 0;
+	DWORD info_size = 0;
+	bool result = false;
+	HANDLE heap = GetProcessHeap();
+
+	/* 1. Query the size of the information structure first */
+	if (GetLogicalProcessorInformationEx(info_type, NULL, &info_size) == FALSE) {
+		const DWORD last_error = GetLastError();
+		if (last_error != ERROR_INSUFFICIENT_BUFFER) {
+			cpuinfo_log_error(
+				"failed to query size of processor %"PRIu32" information information: error %"PRIu32"",
+				(uint32_t)info_type, (uint32_t) last_error);
+			goto clean_up;
+		}
+	}
+	/* 2. Allocate memory for the information structure */
+	infos = HeapAlloc(heap, 0, info_size);
+	if (infos == NULL) {
+		cpuinfo_log_error("failed to allocate %"PRIu32" bytes for logical processor information",
+			(uint32_t) info_size);
+		goto clean_up;
+	}
+	/* 3. Read the information structure */
+	if (GetLogicalProcessorInformationEx(info_type, infos, &info_size) == FALSE) {
+		cpuinfo_log_error("failed to query processor %"PRIu32" information: error %"PRIu32"",
+			(uint32_t)info_type, (uint32_t) GetLastError());
+		goto clean_up;
+	}
+
+	/* 4. Parse the structure and store relevant data */
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info_end =
+		(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) infos + info_size);
+	for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = infos;
+		info < info_end;
+		info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) info + info->Size))
+	{
+		if (info->Relationship != info_type) {
+			cpuinfo_log_warning(
+				"unexpected processor info type (%"PRIu32") for processor information",
+				(uint32_t) info->Relationship);
+			continue;
+		}
+
+		const uint32_t info_id = nr_of_structs++;
+
+		switch(info_type) {
+			case RelationProcessorPackage:
+				result = parse_relation_processor_info(
+							processors,
+							number_of_processors,
+							global_proc_index_per_group,
+							info,
+							info_id,
+							cores,
+							chip_info);
+			break;
+			case RelationProcessorCore:
+				result = parse_relation_processor_info(
+							processors,
+							number_of_processors,
+							global_proc_index_per_group,
+							info,
+							info_id,
+							cores,
+							chip_info);
+			break;
+			case RelationCache:
+				result = parse_relation_cache_info(
+							processors,
+							caches,
+							numbers_of_caches,
+							global_proc_index_per_group,
+							info);
+			break;
+			default:
+				cpuinfo_log_error(
+					"unexpected processor info type (%"PRIu32") for processor information",
+					(uint32_t) info->Relationship);
+				result = false;
+			break;
+		}
+		if (!result) {
+			nr_of_structs = 0;
+			goto clean_up;
+		}
+	}
+clean_up:
+	/* 5. Release dynamically allocated info structure. */
+	HeapFree(heap, 0, infos);
+	infos = NULL;
+	return nr_of_structs;
+}
+
+static bool parse_relation_processor_info(
+	struct cpuinfo_processor* processors,
+	uint32_t nr_of_processors,
+	const uint32_t* global_proc_index_per_group,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+	const uint32_t info_id,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info *chip_info)
+{
+	for (uint32_t i = 0; i < info->Processor.GroupCount; i++) {
+		const uint32_t group_id = info->Processor.GroupMask[i].Group;
+		/* Bitmask representing processors in this group belonging to this package */
+		KAFFINITY group_processors_mask = info->Processor.GroupMask[i].Mask;
+		while (group_processors_mask != 0) {
+			const uint32_t processor_id_in_group =
+				low_index_from_kaffinity(group_processors_mask);
+			const uint32_t processor_global_index =
+				global_proc_index_per_group[group_id] + processor_id_in_group;
+
+			if(processor_global_index >= nr_of_processors) {
+				cpuinfo_log_error("unexpected processor index %"PRIu32"",
+					processor_global_index);
+				return false;
+			}
+
+			switch(info->Relationship) {
+				case RelationProcessorPackage:
+					store_package_info_per_processor(
+						processors, processor_global_index, info_id,
+						group_id, processor_id_in_group);
+				break;
+				case RelationProcessorCore:
+					store_core_info_per_processor(
+						processors, processor_global_index,
+						info_id, info,
+						cores, chip_info);
+				break;
+				default:
+					cpuinfo_log_error(
+						"unexpected processor info type (%"PRIu32") for processor information",
+						(uint32_t) info->Relationship);
+				break;
+			}
+			/* Clear the bits in affinity mask, lower the least set bit. */
+			group_processors_mask &= (group_processors_mask - 1);
+		}
+	}
+	return true;
+}
+
+static bool parse_relation_cache_info(
+	struct cpuinfo_processor* processors,
+	struct cpuinfo_cache* caches,
+	uint32_t* numbers_of_caches,
+	const uint32_t* global_proc_index_per_group,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info)
+{
+	static uint32_t l1i_counter = 0;
+	static uint32_t l1d_counter = 0;
+	static uint32_t l2_counter = 0;
+	static uint32_t l3_counter = 0;
+
+	/* Count cache types for allocation at first. */
+	if (caches == NULL) {
+		switch(info->Cache.Level) {
+			case 1:
+				switch (info->Cache.Type) {
+					case CacheInstruction:
+						numbers_of_caches[cpuinfo_cache_level_1i]++;
+					break;
+					case CacheData:
+						numbers_of_caches[cpuinfo_cache_level_1d]++;
+					break;
+					case CacheUnified:
+					break;
+					case CacheTrace:
+					break;
+					default:
+					break;
+				}
+			break;
+			case 2:
+				numbers_of_caches[cpuinfo_cache_level_2]++;
+			break;
+			case 3:
+				numbers_of_caches[cpuinfo_cache_level_3]++;
+			break;
+		}
+		return true;
+	}
+	struct cpuinfo_cache* l1i_base = caches;
+	struct cpuinfo_cache* l1d_base = l1i_base + numbers_of_caches[cpuinfo_cache_level_1i];
+	struct cpuinfo_cache* l2_base  = l1d_base + numbers_of_caches[cpuinfo_cache_level_1d];
+	struct cpuinfo_cache* l3_base  = l2_base  + numbers_of_caches[cpuinfo_cache_level_2];
+
+	cpuinfo_log_debug(
+		"info->Cache.GroupCount:%"PRIu32", info->Cache.GroupMask:%"PRIu32","
+		"info->Cache.Level:%"PRIu32", info->Cache.Associativity:%"PRIu32","
+		"info->Cache.LineSize:%"PRIu32","
+		"info->Cache.CacheSize:%"PRIu32", info->Cache.Type:%"PRIu32"",
+		info->Cache.GroupCount, (unsigned int)info->Cache.GroupMask.Mask,
+		info->Cache.Level, info->Cache.Associativity, info->Cache.LineSize,
+		info->Cache.CacheSize, info->Cache.Type);
+
+	struct cpuinfo_cache* current_cache = NULL;
+	switch (info->Cache.Level) {
+		case 1:
+			switch (info->Cache.Type) {
+				case CacheInstruction:
+					current_cache = l1i_base + l1i_counter;
+					l1i_counter++;
+				break;
+				case CacheData:
+					current_cache = l1d_base + l1d_counter;
+					l1d_counter++;
+				break;
+				case CacheUnified:
+				break;
+				case CacheTrace:
+				break;
+				default:
+				break;
+			}
+		break;
+		case 2:
+			current_cache = l2_base + l2_counter;
+			l2_counter++;
+		break;
+		case 3:
+			current_cache = l3_base + l3_counter;
+			l3_counter++;
+		break;
+	}
+	current_cache->size = info->Cache.CacheSize;
+	current_cache->line_size = info->Cache.LineSize;
+	current_cache->associativity = info->Cache.Associativity;
+	/* We don't have partition and set information of caches on Windows,
+	 * so we set partitions to 1 and calculate the expected sets.
+	 */
+	current_cache->partitions = 1;
+	current_cache->sets =
+		current_cache->size / current_cache->line_size / current_cache->associativity;
+	if (info->Cache.Type == CacheUnified) {
+		current_cache->flags = CPUINFO_CACHE_UNIFIED;
+	}
+
+	for (uint32_t i = 0; i <= info->Cache.GroupCount; i++) {
+	/* Zero GroupCount is valid, GroupMask still can store bits set. */
+		const uint32_t group_id = info->Cache.GroupMasks[i].Group;
+		/* Bitmask representing processors in this group belonging to this package */
+		KAFFINITY group_processors_mask = info->Cache.GroupMasks[i].Mask;
+		while (group_processors_mask != 0) {
+			const uint32_t processor_id_in_group =
+				low_index_from_kaffinity(group_processors_mask);
+			const uint32_t processor_global_index =
+				global_proc_index_per_group[group_id] + processor_id_in_group;
+
+			store_cache_info_per_processor(
+				processors, processor_global_index,
+				info, current_cache);
+
+			/* Clear the bits in affinity mask, lower the least set bit. */
+			group_processors_mask &= (group_processors_mask - 1);
+		}
+	}
+	return true;
+}
+
+static void store_package_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	const uint32_t package_id,
+	const uint32_t group_id,
+	const uint32_t processor_id_in_group)
+{
+	processors[processor_global_index].windows_group_id =
+		(uint16_t) group_id;
+	processors[processor_global_index].windows_processor_id =
+		(uint16_t) processor_id_in_group;
+
+	/* As we're counting the number of packages now, we haven't allocated memory for
+	 * cpuinfo_packages yet, so we only set the package pointer's offset now.
+	 */
+	processors[processor_global_index].package =
+		(const struct cpuinfo_package*) NULL + package_id;
+}
+
+void store_core_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	const uint32_t core_id,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
+	struct cpuinfo_core* cores,
+	const struct woa_chip_info *chip_info)
+{
+	if (cores) {
+		processors[processor_global_index].core = cores + core_id;
+		cores[core_id].core_id = core_id;
+		get_core_uarch_for_efficiency(
+			chip_info->chip_name, core_info->Processor.EfficiencyClass,
+			&(cores[core_id].uarch), &(cores[core_id].frequency));
+
+		/* We don't have cluster information, so we handle it as
+		 * fixed 1 to (cluster / cores).
+		 * Set the cluster offset ID now, as soon as we have the
+		 * cluster base address, we'll set the absolute address.
+		 */
+		processors[processor_global_index].cluster =
+			(const struct cpuinfo_cluster*) NULL + core_id;
+	}
+}
+
+static void store_cache_info_per_processor(
+	struct cpuinfo_processor* processors,
+	const uint32_t processor_global_index,
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+	struct cpuinfo_cache* current_cache)
+{
+	if (current_cache->processor_start > processor_global_index) {
+		current_cache->processor_start = processor_global_index;
+	}
+	current_cache->processor_count++;
+
+	switch(info->Cache.Level) {
+		case 1:
+			switch (info->Cache.Type) {
+				case CacheInstruction:
+					processors[processor_global_index].cache.l1i = current_cache;
+				break;
+				case CacheData:
+					processors[processor_global_index].cache.l1d = current_cache;
+				break;
+				case CacheUnified:
+				break;
+				case CacheTrace:
+				break;
+				default:
+				break;
+			}
+		break;
+		case 2:
+			processors[processor_global_index].cache.l2 = current_cache;
+		break;
+		case 3:
+			processors[processor_global_index].cache.l3 = current_cache;
+		break;
+	}
+}
+
+static bool connect_packages_cores_clusters_by_processors(
+	struct cpuinfo_processor* processors,
+	const uint32_t nr_of_processors,
+	struct cpuinfo_package* packages,
+	const uint32_t nr_of_packages,
+	struct cpuinfo_cluster* clusters,
+	struct cpuinfo_core* cores,
+	const uint32_t nr_of_cores,
+	const struct woa_chip_info* chip_info,
+	enum cpuinfo_vendor vendor)
+{
+	/* Adjust core and package pointers for all logical processors. */
+	for (uint32_t i = nr_of_processors; i != 0; i--) {
+		const uint32_t processor_id = i - 1;
+		struct cpuinfo_processor* processor = processors + processor_id;
+
+		struct cpuinfo_core* core = (struct cpuinfo_core*)processor->core;
+
+		/* We stored the offset of pointers when we haven't allocated memory
+		 * for packages and clusters, so now add offsets to base addresses.
+		 */
+		struct cpuinfo_package* package =
+			(struct cpuinfo_package*) ((uintptr_t) packages + (uintptr_t) processor->package);
+		if (package < packages ||
+			package >= (packages + nr_of_packages)) {
+			cpuinfo_log_error("invalid package indexing");
+			return false;
+		}
+		processor->package = package;
+
+		struct cpuinfo_cluster* cluster =
+			(struct cpuinfo_cluster*) ((uintptr_t) clusters + (uintptr_t) processor->cluster);
+		if (cluster < clusters ||
+			cluster >= (clusters + nr_of_cores)) {
+			cpuinfo_log_error("invalid cluster indexing");
+			return false;
+		}
+		processor->cluster = cluster;
+
+		if (chip_info) {
+			strncpy_s(package->name, CPUINFO_PACKAGE_NAME_MAX, chip_info->chip_name_string,
+				strnlen(chip_info->chip_name_string, CPUINFO_PACKAGE_NAME_MAX));
+		}
+
+		/* Set start indexes and counts per packages / clusters / cores - going backwards */
+
+		/* This can be overwritten by lower-index processors on the same package. */
+		package->processor_start = processor_id;
+		package->processor_count++;
+
+		/* This can be overwritten by lower-index processors on the same cluster. */
+		cluster->processor_start = processor_id;
+		cluster->processor_count++;
+
+		/* This can be overwritten by lower-index processors on the same core. */
+		core->processor_start = processor_id;
+		core->processor_count++;
+	}
+	/* Fill cores */
+	for (uint32_t i = nr_of_cores; i != 0; i--) {
+		const uint32_t global_core_id = i - 1;
+		struct cpuinfo_core* core = cores + global_core_id;
+		const struct cpuinfo_processor* processor = processors + core->processor_start;
+		struct cpuinfo_package* package = (struct cpuinfo_package*) processor->package;
+		struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*) processor->cluster;
+
+		core->package = package;
+		core->cluster = cluster;
+		core->vendor = vendor;
+
+		/* This can be overwritten by lower-index cores on the same cluster/package. */
+		cluster->core_start = global_core_id;
+		cluster->core_count++;
+		package->core_start = global_core_id;
+		package->core_count++;
+		package->cluster_start = global_core_id;
+		package->cluster_count = package->core_count;
+
+		cluster->package = package;
+		cluster->vendor = cores[cluster->core_start].vendor;
+		cluster->uarch = cores[cluster->core_start].uarch;
+		cluster->frequency = cores[cluster->core_start].frequency;
+	}
+	return true;
+}
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) {
+	unsigned long index;
+	_BitScanForward64(&index, (unsigned __int64) kaffinity);
+	return (uint32_t) index;
+}
diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c
new file mode 100644
index 0000000..8effc15
--- /dev/null
+++ b/src/arm/windows/init.c
@@ -0,0 +1,253 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include "windows-arm-init.h"
+
+/* Efficiency class = 0 means little core, while 1 means big core for now */
+#define MAX_WOA_VALID_EFFICIENCY_CLASSES		2
+#define VENDOR_NAME_MAX		CPUINFO_PACKAGE_NAME_MAX
+
+struct cpuinfo_arm_isa cpuinfo_isa;
+
+static void set_cpuinfo_isa_fields(void);
+static bool get_system_info_from_registry(
+	struct woa_chip_info** chip_info,
+	enum cpuinfo_vendor* vendor);
+
+struct vendor_info {
+	char vendor_name[VENDOR_NAME_MAX];
+	enum cpuinfo_vendor vendor;
+};
+
+/* Please add new vendor here! */
+static struct vendor_info vendors[] = {
+	{
+		"Qualcomm",
+		cpuinfo_vendor_qualcomm
+	}
+};
+
+/* Please add new SoC/chip info here! */
+static struct woa_chip_info woa_chips[] = {
+	/* Microsoft SQ1 Kryo 495 4 + 4 cores (3 GHz + 1.80 GHz) */
+	{
+		"Microsoft SQ1",
+		woa_chip_name_microsoft_sq_1,
+		{
+			{
+				cpuinfo_uarch_cortex_a55,
+				1800000000,
+			},
+			{
+				cpuinfo_uarch_cortex_a76,
+				3000000000,
+			}
+		}
+	},
+	/* Microsoft SQ2 Kryo 495 4 + 4 cores (3.15 GHz + 2.42 GHz) */
+	{
+		"Microsoft SQ2",
+		woa_chip_name_microsoft_sq_2,
+		{
+			{
+				cpuinfo_uarch_cortex_a55,
+				2420000000,
+			},
+			{
+				cpuinfo_uarch_cortex_a76,
+				3150000000
+			}
+		}
+	}
+};
+
+BOOL CALLBACK cpuinfo_arm_windows_init(
+	PINIT_ONCE init_once, PVOID parameter, PVOID* context)
+{
+	struct woa_chip_info *chip_info = NULL;
+	enum cpuinfo_vendor vendor = cpuinfo_vendor_unknown;
+	bool result = false;
+	
+	set_cpuinfo_isa_fields();
+	result = get_system_info_from_registry(&chip_info, &vendor);	
+	result &= cpu_info_init_by_logical_sys_info(chip_info, vendor);
+	cpuinfo_is_initialized = result;
+	return ((result == true) ? TRUE : FALSE);
+}
+
+bool get_core_uarch_for_efficiency(
+	enum woa_chip_name chip, BYTE EfficiencyClass,
+	enum cpuinfo_uarch* uarch, uint64_t* frequency)
+{
+	/* For currently supported WoA chips, the Efficiency class selects
+	 * the pre-defined little and big core.
+	 * Any further supported SoC's logic should be implemented here.
+	 */
+	if (uarch && frequency && chip < woa_chip_name_last &&
+		EfficiencyClass < MAX_WOA_VALID_EFFICIENCY_CLASSES) {
+		*uarch = woa_chips[chip].uarchs[EfficiencyClass].uarch;
+		*frequency = woa_chips[chip].uarchs[EfficiencyClass].frequency;
+		return true;
+	}
+	return false;
+}
+
+/* Static helper functions */
+
+static bool read_registry(
+	LPCTSTR subkey,
+	LPCTSTR value,
+	char** textBuffer)
+{
+	DWORD keyType = 0;
+	DWORD dataSize = 0;
+	const DWORD flags = RRF_RT_REG_SZ; /* Only read strings (REG_SZ) */
+	LSTATUS result = 0;
+	HANDLE heap = GetProcessHeap();
+
+	result = RegGetValue(
+		HKEY_LOCAL_MACHINE, 
+		subkey,
+		value,
+		flags,
+		&keyType,
+		NULL, /* Request buffer size */
+		&dataSize);
+	if (result != 0 || dataSize == 0) {
+		cpuinfo_log_error("Registry entry size read error");
+		return false;
+	}
+
+	if (*textBuffer) {
+		HeapFree(heap, 0, *textBuffer);
+	}
+	*textBuffer = HeapAlloc(heap, HEAP_ZERO_MEMORY, dataSize);
+	if (*textBuffer == NULL) {
+		cpuinfo_log_error("Registry textbuffer allocation error");
+		return false;
+	}
+
+	result = RegGetValue(
+		HKEY_LOCAL_MACHINE,
+		subkey,
+		value,
+		flags,
+		NULL,
+		*textBuffer, /* Write string in this destination buffer */
+		&dataSize);
+	if (result != 0) {
+		cpuinfo_log_error("Registry read error");
+		return false;
+	}
+	return true;
+}
+
+static bool get_system_info_from_registry(
+	struct woa_chip_info** chip_info,
+	enum cpuinfo_vendor* vendor)
+{
+	bool result = false;
+	char* textBuffer = NULL;
+	LPCTSTR cpu0_subkey =
+		(LPCTSTR)"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0";
+	LPCTSTR chip_name_value = (LPCTSTR)"ProcessorNameString";
+	LPCTSTR vendor_name_value = (LPCTSTR)"VendorIdentifier";
+
+	*chip_info = NULL;
+	*vendor = cpuinfo_vendor_unknown;
+	HANDLE heap = GetProcessHeap();
+
+	/* 1. Read processor model name from registry and find in the hard-coded list. */
+	if (!read_registry(cpu0_subkey, chip_name_value, &textBuffer)) {
+		cpuinfo_log_error("Registry read error");
+		goto cleanup;
+	}
+	for (uint32_t i = 0; i < (uint32_t) woa_chip_name_last; i++) {
+		size_t compare_length = strnlen(woa_chips[i].chip_name_string, CPUINFO_PACKAGE_NAME_MAX);
+		int compare_result = strncmp(textBuffer, woa_chips[i].chip_name_string, compare_length);
+		if (compare_result == 0) {
+			*chip_info = woa_chips+i;
+			break;
+		}
+	}
+	if (*chip_info == NULL) {
+		cpuinfo_log_error("Unknown chip model name.\n Please add new Windows on Arm SoC/chip support!");
+		goto cleanup;
+	}
+	cpuinfo_log_debug("detected chip model name: %s", (**chip_info).chip_name_string);
+
+	/* 2. Read vendor/manufacturer name from registry. */
+	if (!read_registry(cpu0_subkey, vendor_name_value, &textBuffer)) {
+		cpuinfo_log_error("Registry read error");
+		goto cleanup;
+	}
+
+	for (uint32_t i = 0; i < (sizeof(vendors) / sizeof(struct vendor_info)); i++) {
+		if (strncmp(textBuffer, vendors[i].vendor_name,
+				strlen(vendors[i].vendor_name)) == 0) {
+			*vendor = vendors[i].vendor;
+			result = true;
+			break;
+		}
+	}
+	if (*vendor == cpuinfo_vendor_unknown) {
+		cpuinfo_log_error("Unexpected vendor: %s", textBuffer);
+	}
+
+cleanup:
+	HeapFree(heap, 0, textBuffer);
+	textBuffer = NULL;
+	return result;
+}
+
+static void set_cpuinfo_isa_fields(void)
+{
+	bool armv8 = IsProcessorFeaturePresent(PF_ARM_V8_INSTRUCTIONS_AVAILABLE);
+	bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
+	bool load_store_atomic = IsProcessorFeaturePresent(PF_ARM_64BIT_LOADSTORE_ATOMIC);
+	bool float_multiply_accumulate = IsProcessorFeaturePresent(PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE);
+	bool crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
+	bool float_emulated = IsProcessorFeaturePresent(PF_FLOATING_POINT_EMULATED);
+
+	/* Read all Arm related Windows features for debug purposes, even if we can't
+	 * pair Arm ISA feature to that now.
+	 */
+#if CPUINFO_LOG_DEBUG_PARSERS
+	bool divide = IsProcessorFeaturePresent(PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE);
+	bool ext_cache = IsProcessorFeaturePresent(PF_ARM_EXTERNAL_CACHE_AVAILABLE);
+	bool vfp_registers = IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE);
+	bool arm_v81 = IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE);
+
+	cpuinfo_log_debug("divide present: %d", divide);
+	cpuinfo_log_debug("ext_cache present: %d", ext_cache);
+	cpuinfo_log_debug("vfp_registers present: %d", vfp_registers);
+	cpuinfo_log_debug("arm_v81 present: %d", arm_v81);
+#endif
+
+	cpuinfo_log_debug("armv8 present: %d", armv8);
+	cpuinfo_log_debug("crypto present: %d", crypto);
+	cpuinfo_log_debug("load_store_atomic present: %d", load_store_atomic);
+	cpuinfo_log_debug("float_multiply_accumulate present: %d", float_multiply_accumulate);
+	cpuinfo_log_debug("crc32 present: %d", crc32);
+	cpuinfo_log_debug("float_emulated: %d", float_emulated);
+
+#if CPUINFO_ARCH_ARM
+	cpuinfo_isa.armv8 = armv8;
+#endif
+#if CPUINFO_ARCH_ARM64
+	cpuinfo_isa.atomics = load_store_atomic;
+#endif
+	cpuinfo_isa.crc32 = crc32;
+	/* Windows API reports all or nothing for cryptographic instructions. */
+	cpuinfo_isa.aes = crypto;
+	cpuinfo_isa.sha1 = crypto;
+	cpuinfo_isa.sha2 = crypto;
+	cpuinfo_isa.pmull = crypto;
+	cpuinfo_isa.fp16arith = !float_emulated && float_multiply_accumulate;
+}
diff --git a/src/arm/windows/windows-arm-init.h b/src/arm/windows/windows-arm-init.h
new file mode 100644
index 0000000..76cc51e
--- /dev/null
+++ b/src/arm/windows/windows-arm-init.h
@@ -0,0 +1,32 @@
+#pragma once
+
+/* List of known and supported Windows on Arm SoCs/chips. */
+enum woa_chip_name {
+	woa_chip_name_microsoft_sq_1 = 0,
+	woa_chip_name_microsoft_sq_2 = 1,
+	woa_chip_name_unknown = 2,
+	woa_chip_name_last = woa_chip_name_unknown
+};
+
+/* Topology information hard-coded by SoC/chip name */
+struct core_info_by_chip_name {
+	enum cpuinfo_uarch uarch;
+	uint64_t frequency; /* Hz */
+};
+
+/* SoC/chip info that's currently not readable by logical system information,
+ * but can be read from registry.
+ */
+struct woa_chip_info {
+	char* chip_name_string;
+	enum woa_chip_name chip_name;
+	struct core_info_by_chip_name uarchs[woa_chip_name_last];
+};
+
+bool get_core_uarch_for_efficiency(
+	enum woa_chip_name chip, BYTE EfficiencyClass,
+	enum cpuinfo_uarch* uarch, uint64_t* frequency);
+
+bool cpu_info_init_by_logical_sys_info(
+	const struct woa_chip_info *chip_info,
+	enum cpuinfo_vendor vendor);
diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h
index 9c23d7c..c04620e 100644
--- a/src/cpuinfo/internal-api.h
+++ b/src/cpuinfo/internal-api.h
@@ -51,7 +51,11 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
 CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
 CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
 #if defined(_WIN32) || defined(__CYGWIN__)
-	CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+	#if CPUINFO_ARCH_ARM64
+		CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+	#else
+		CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+	#endif
 #endif
 CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
 CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void);
diff --git a/src/init.c b/src/init.c
index d61e7be..ed37c07 100644
--- a/src/init.c
+++ b/src/init.c
@@ -37,6 +37,8 @@ bool CPUINFO_ABI cpuinfo_initialize(void) {
 		pthread_once(&init_guard, &cpuinfo_arm_linux_init);
 	#elif defined(__MACH__) && defined(__APPLE__)
 		pthread_once(&init_guard, &cpuinfo_arm_mach_init);
+	#elif defined(_WIN32)
+		InitOnceExecuteOnce(&init_guard, &cpuinfo_arm_windows_init, NULL, NULL);
 	#else
 		cpuinfo_log_error("operating system is not supported in cpuinfo");
 	#endif
diff --git a/src/x86/name.c b/src/x86/name.c
index a7cc7c6..957a0d8 100644
--- a/src/x86/name.c
+++ b/src/x86/name.c
@@ -234,7 +234,7 @@ static bool transform_token(char* token_start, char* token_end, struct parser_st
 				return true;
 			}
 			/*
-			 * Erase everywhing after "SOC" on AMD System-on-Chips, e.g.
+			 * Erase everything after "SOC" on AMD System-on-Chips, e.g.
 			 *  "AMD GX-212JC SOC with Radeon(TM) R2E Graphics  \0"
 			 */
 			if (erase_matching(token_start, token_length, "SOC")) {