From c2092219e7c874783a00a62edb94ddc672f57ab3 Mon Sep 17 00:00:00 2001 From: Ashkan Aliabadi Date: Fri, 8 May 2020 20:40:33 -0700 Subject: Upstream cpuinfo updates in XNNPACK as of XNNPACK:d793f6c2ec145be3ddbffea951e6e5480f4646b8. --- src/arm/cache.c | 43 ++++++++++++++++++++++--------------------- src/arm/linux/aarch32-isa.c | 2 +- src/arm/linux/clusters.c | 10 +++++----- src/arm/linux/cpuinfo.c | 6 +++--- src/arm/tlb.c | 2 +- src/arm/uarch.c | 6 ++++-- 6 files changed, 36 insertions(+), 33 deletions(-) (limited to 'src/arm') diff --git a/src/arm/cache.c b/src/arm/cache.c index 70f11fd..1a8bf91 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -1448,23 +1448,24 @@ void cpuinfo_arm_decode_cache( .line_size = 64 /* assumption */ }; break; - case cpuinfo_uarch_taishanv110: + case cpuinfo_uarch_taishan_v110: /* - * Kunpeng920 series CPU designed by Huawei hisilicon for server, - * L1 and L2 cache is private to each core, L3 is shared with all cores. - * +--------------------+-------+-----------+-----------+-----------+----------+------------+ - * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference | - * +--------------------+-------+-----------+-----------+-----------+----------+------------+ - * | Kunpeng920-3226 | 32 | 64K | 64K | 512K | 32M | [1] | - * +--------------------+-------+-----------+-----------+-----------+----------+------------+ - * | Kunpeng920-4826 | 48 | 64K | 64K | 512K | 48M | [2] | - * +--------------------+-------+-----------+-----------+-----------+----------+------------+ - * | Kunpeng920-6426 | 64 | 64K | 64K | 512K | 64M | [3] | - * +--------------------+-------+-----------+-----------+-----------+----------+------------+ - * - * [1] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226 - * [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826 - * [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426 + * It features private 64 KiB L1 instruction and data caches as well as 512 KiB of private L2. [1] + * + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Kunpeng 920-3226 | 32 | 64K | 64K | 512K | 32M | [2] | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Kunpeng 920-4826 | 48 | 64K | 64K | 512K | 48M | [3] | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Kunpeng 920-6426 | 64 | 64K | 64K | 512K | 64M | [4] | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * + * [1] https://en.wikichip.org/wiki/hisilicon/microarchitectures/taishan_v110 + * [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226 + * [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826 + * [4] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426 */ *l1i = (struct cpuinfo_cache) { .size = 64 * 1024, @@ -1482,11 +1483,11 @@ void cpuinfo_arm_decode_cache( .line_size = 128 /* assumption */, .flags = CPUINFO_CACHE_INCLUSIVE /* assumption */, }; - *l3 = (struct cpuinfo_cache) { - .size = cluster_cores * 1024 * 1024, - .associativity = 16 /* assumption */, - .line_size = 128 /* assumption */, - }; + *l3 = (struct cpuinfo_cache) { + .size = cluster_cores * 1024 * 1024, + .associativity = 16 /* assumption */, + .line_size = 128 /* assumption */, + }; break; #endif case cpuinfo_uarch_cortex_a12: diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c index 92095e1..6aedda3 100644 --- a/src/arm/linux/aarch32-isa.c +++ b/src/arm/linux/aarch32-isa.c @@ -193,7 +193,7 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON; if ((architecture_version >= 7) || (features & vfpv3_mask)) { isa->vfpv3 = true; - + const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON; if (features & d32_mask) { isa->d32 = true; diff --git a/src/arm/linux/clusters.c b/src/arm/linux/clusters.c index 8daeae5..c7a4045 100644 --- a/src/arm/linux/clusters.c +++ b/src/arm/linux/clusters.c @@ -47,7 +47,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { * * @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags. * @param max_processors - number of elements in the @p processors array. - * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum + * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum * frequency, MIDR infromation, and core cluster (package siblings list) information. * * @retval true if the heuristic successfully assigned all processors into clusters of cores. @@ -308,7 +308,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic( * @p processors array have cluster information. * * @param max_processors - number of elements in the @p processors array. - * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum + * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum * frequency, MIDR infromation, and core cluster (package siblings list) information. * * @retval true if the heuristic successfully assigned all processors into clusters of cores. @@ -466,7 +466,7 @@ new_cluster: * This function should be called after all processors are assigned to core clusters. * * @param max_processors - number of elements in the @p processors array. - * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, + * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, * and decoded core cluster (package_leader_id) information. * The function expects the value of processors[i].package_processor_count to be zero. * Upon return, processors[i].package_processor_count will contain the number of logical @@ -482,12 +482,12 @@ void cpuinfo_arm_linux_count_cluster_processors( const uint32_t package_leader_id = processors[i].package_leader_id; processors[package_leader_id].package_processor_count += 1; } - } + } /* Second pass: copy the package_processor_count from the group leader processor */ for (uint32_t i = 0; i < max_processors; i++) { if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { const uint32_t package_leader_id = processors[i].package_leader_id; processors[i].package_processor_count = processors[package_leader_id].package_processor_count; } - } + } } diff --git a/src/arm/linux/cpuinfo.c b/src/arm/linux/cpuinfo.c index 2df0c6e..c70055f 100644 --- a/src/arm/linux/cpuinfo.c +++ b/src/arm/linux/cpuinfo.c @@ -44,7 +44,7 @@ static uint32_t parse_processor_number( /* * Full list of ARM features reported in /proc/cpuinfo: - * + * * * swp - support for SWP instruction (deprecated in ARMv7, can be removed in future) * * half - support for half-word loads and stores. These instruction are part of ARMv4, * so no need to check it on supported CPUs. @@ -620,7 +620,7 @@ static void parse_cache_number( break; default: cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected", - number_name, (int) (number_end - number_start), number_start); + number_name, (int) (number_end - number_start), number_start); } } @@ -670,7 +670,7 @@ static bool parse_line( if (line_start == line_end) { return true; } - + /* Search for ':' on the line. */ const char* separator = line_start; for (; separator != line_end; separator++) { diff --git a/src/arm/tlb.c b/src/arm/tlb.c index ba42a3e..9beb832 100644 --- a/src/arm/tlb.c +++ b/src/arm/tlb.c @@ -6,7 +6,7 @@ switch (uarch) { * Cortex-A5 Technical Reference Manual: * 6.3.1. Micro TLB * The first level of caching for the page table information is a micro TLB of - * 10 entries that is implemented on each of the instruction and data sides. + * 10 entries that is implemented on each of the instruction and data sides. * 6.3.2. Main TLB * Misses from the instruction and data micro TLBs are handled by a unified main TLB. * The main TLB is 128-entry two-way set-associative. diff --git a/src/arm/uarch.c b/src/arm/uarch.c index e5e3cbc..63b1a55 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -155,9 +155,11 @@ void cpuinfo_arm_decode_vendor_uarch( case 'H': *vendor = cpuinfo_vendor_huawei; switch (midr_get_part(midr)) { - case 0xD01: /* Kunpeng920 ARM-base CPU*/ - *uarch = cpuinfo_uarch_taishanv110; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD01: /* Kunpeng 920 series */ + *uarch = cpuinfo_uarch_taishan_v110; break; +#endif case 0xD40: /* Kirin 980 Big/Medium cores -> Cortex-A76 */ *vendor = cpuinfo_vendor_arm; *uarch = cpuinfo_uarch_cortex_a76; -- cgit v1.2.3