From c2092219e7c874783a00a62edb94ddc672f57ab3 Mon Sep 17 00:00:00 2001
From: Ashkan Aliabadi <ashkan.aliabadi@gmail.com>
Date: Fri, 8 May 2020 20:40:33 -0700
Subject: Upstream cpuinfo updates in XNNPACK as of
 XNNPACK:d793f6c2ec145be3ddbffea951e6e5480f4646b8.

---
 src/arm/cache.c             | 43 ++++++++++++++++++++++---------------------
 src/arm/linux/aarch32-isa.c |  2 +-
 src/arm/linux/clusters.c    | 10 +++++-----
 src/arm/linux/cpuinfo.c     |  6 +++---
 src/arm/tlb.c               |  2 +-
 src/arm/uarch.c             |  6 ++++--
 6 files changed, 36 insertions(+), 33 deletions(-)

(limited to 'src/arm')

diff --git a/src/arm/cache.c b/src/arm/cache.c
index 70f11fd..1a8bf91 100644
--- a/src/arm/cache.c
+++ b/src/arm/cache.c
@@ -1448,23 +1448,24 @@ void cpuinfo_arm_decode_cache(
 				.line_size = 64 /* assumption */
 			};
 			break;
-		case cpuinfo_uarch_taishanv110:
+		case cpuinfo_uarch_taishan_v110:
 			/*
-			 *  Kunpeng920 series CPU designed by Huawei hisilicon for server, 
-			 *  L1 and L2 cache is private to each core, L3 is shared with all cores.
-			 *  +--------------------+-------+-----------+-----------+-----------+----------+------------+
-			 *  | Processor model    | Cores | L1D cache | L1I cache | L2 cache  | L3 cache | Reference  |
-			 *  +--------------------+-------+-----------+-----------+-----------+----------+------------+
-			 *  | Kunpeng920-3226    |  32   |    64K    |     64K   |    512K   |    32M   |     [1]    |
-			 *  +--------------------+-------+-----------+-----------+-----------+----------+------------+
-			 *  | Kunpeng920-4826    |  48   |    64K    |     64K   |    512K   |    48M   |     [2]    |
-			 *  +--------------------+-------+-----------+-----------+-----------+----------+------------+
-			 *  | Kunpeng920-6426    |  64   |    64K    |     64K   |    512K   |    64M   |     [3]    |
-			 *  +--------------------+-------+-----------+-----------+-----------+----------+------------+
-			 *
-			 * [1] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226
-			 * [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826
-			 * [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
+			 * It features private 64 KiB L1 instruction and data caches as well as 512 KiB of private L2. [1]
+			 *
+			 *  +------------------+-------+-----------+-----------+-----------+----------+-----------+
+			 *  | Processor model  | Cores | L1D cache | L1I cache | L2 cache  | L3 cache | Reference |
+			 *  +------------------+-------+-----------+-----------+-----------+----------+-----------+
+			 *  | Kunpeng 920-3226 |  32   |    64K    |    64K    |    512K   |    32M   |     [2]   |
+			 *  +------------------+-------+-----------+-----------+-----------+----------+-----------+
+			 *  | Kunpeng 920-4826 |  48   |    64K    |    64K    |    512K   |    48M   |     [3]   |
+			 *  +------------------+-------+-----------+-----------+-----------+----------+-----------+
+			 *  | Kunpeng 920-6426 |  64   |    64K    |    64K    |    512K   |    64M   |     [4]   |
+			 *  +------------------+-------+-----------+-----------+-----------+----------+-----------+
+			 *
+			 * [1] https://en.wikichip.org/wiki/hisilicon/microarchitectures/taishan_v110
+			 * [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226
+			 * [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826
+			 * [4] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
 			 */
 			*l1i = (struct cpuinfo_cache) {
 				.size = 64 * 1024,
@@ -1482,11 +1483,11 @@ void cpuinfo_arm_decode_cache(
 				.line_size = 128 /* assumption */,
 				.flags = CPUINFO_CACHE_INCLUSIVE /* assumption */,
 			};
-		        *l3 = (struct cpuinfo_cache) {
-			        .size = cluster_cores * 1024 * 1024,
-			        .associativity = 16 /* assumption */,
-			        .line_size = 128 /* assumption */,
-		        };
+			*l3 = (struct cpuinfo_cache) {
+				.size = cluster_cores * 1024 * 1024,
+				.associativity = 16 /* assumption */,
+				.line_size = 128 /* assumption */,
+			};
 			break;
 #endif
 		case cpuinfo_uarch_cortex_a12:
diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c
index 92095e1..6aedda3 100644
--- a/src/arm/linux/aarch32-isa.c
+++ b/src/arm/linux/aarch32-isa.c
@@ -193,7 +193,7 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
 				CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
 			if ((architecture_version >= 7) || (features & vfpv3_mask)) {
 				isa->vfpv3 = true;
-			
+
 				const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON;
 				if (features & d32_mask) {
 					isa->d32 = true;
diff --git a/src/arm/linux/clusters.c b/src/arm/linux/clusters.c
index 8daeae5..c7a4045 100644
--- a/src/arm/linux/clusters.c
+++ b/src/arm/linux/clusters.c
@@ -47,7 +47,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
  *
  * @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags.
  * @param max_processors - number of elements in the @p processors array.
- * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum 
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
  *                             frequency, MIDR infromation, and core cluster (package siblings list) information.
  *
  * @retval true if the heuristic successfully assigned all processors into clusters of cores.
@@ -308,7 +308,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
  * @p processors array have cluster information.
  *
  * @param max_processors - number of elements in the @p processors array.
- * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum 
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
  *                             frequency, MIDR infromation, and core cluster (package siblings list) information.
  *
  * @retval true if the heuristic successfully assigned all processors into clusters of cores.
@@ -466,7 +466,7 @@ new_cluster:
  * This function should be called after all processors are assigned to core clusters.
  *
  * @param max_processors - number of elements in the @p processors array.
- * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, 
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
  *                             and decoded core cluster (package_leader_id) information.
  *                             The function expects the value of processors[i].package_processor_count to be zero.
  *                             Upon return, processors[i].package_processor_count will contain the number of logical
@@ -482,12 +482,12 @@ void cpuinfo_arm_linux_count_cluster_processors(
 			const uint32_t package_leader_id = processors[i].package_leader_id;
 			processors[package_leader_id].package_processor_count += 1;
 		}
-	}	
+	}
 	/* Second pass: copy the package_processor_count from the group leader processor */
 	for (uint32_t i = 0; i < max_processors; i++) {
 		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
 			const uint32_t package_leader_id = processors[i].package_leader_id;
 			processors[i].package_processor_count = processors[package_leader_id].package_processor_count;
 		}
-	}	
+	}
 }
diff --git a/src/arm/linux/cpuinfo.c b/src/arm/linux/cpuinfo.c
index 2df0c6e..c70055f 100644
--- a/src/arm/linux/cpuinfo.c
+++ b/src/arm/linux/cpuinfo.c
@@ -44,7 +44,7 @@ static uint32_t parse_processor_number(
 
 /*
  *	Full list of ARM features reported in /proc/cpuinfo:
- *	
+ *
  *	* swp - support for SWP instruction (deprecated in ARMv7, can be removed in future)
  *	* half - support for half-word loads and stores. These instruction are part of ARMv4,
  *	         so no need to check it on supported CPUs.
@@ -620,7 +620,7 @@ static void parse_cache_number(
 				break;
 			default:
 				cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected",
-					number_name, (int) (number_end - number_start), number_start);			
+					number_name, (int) (number_end - number_start), number_start);
 		}
 	}
 
@@ -670,7 +670,7 @@ static bool parse_line(
 	if (line_start == line_end) {
 		return true;
 	}
-	
+
 	/* Search for ':' on the line. */
 	const char* separator = line_start;
 	for (; separator != line_end; separator++) {
diff --git a/src/arm/tlb.c b/src/arm/tlb.c
index ba42a3e..9beb832 100644
--- a/src/arm/tlb.c
+++ b/src/arm/tlb.c
@@ -6,7 +6,7 @@ switch (uarch) {
 		 * Cortex-A5 Technical Reference Manual:
 		 * 6.3.1. Micro TLB
 		 *   The first level of caching for the page table information is a micro TLB of
-		 *   10 entries that is implemented on each of the instruction and data sides. 
+		 *   10 entries that is implemented on each of the instruction and data sides.
 		 * 6.3.2. Main TLB
 		 *   Misses from the instruction and data micro TLBs are handled by a unified main TLB.
 		 *   The main TLB is 128-entry two-way set-associative.
diff --git a/src/arm/uarch.c b/src/arm/uarch.c
index e5e3cbc..63b1a55 100644
--- a/src/arm/uarch.c
+++ b/src/arm/uarch.c
@@ -155,9 +155,11 @@ void cpuinfo_arm_decode_vendor_uarch(
 		case 'H':
 			*vendor = cpuinfo_vendor_huawei;
 			switch (midr_get_part(midr)) {
-				case 0xD01: /* Kunpeng920 ARM-base CPU*/
-					*uarch = cpuinfo_uarch_taishanv110;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+				case 0xD01: /* Kunpeng 920 series */
+					*uarch = cpuinfo_uarch_taishan_v110;
 					break;
+#endif
 				case 0xD40: /* Kirin 980 Big/Medium cores -> Cortex-A76 */
 					*vendor = cpuinfo_vendor_arm;
 					*uarch = cpuinfo_uarch_cortex_a76;
-- 
cgit v1.2.3