From 6288930068efc8dff4f3c0b95f062fc5ddceba04 Mon Sep 17 00:00:00 2001 From: snadampal <87143774+snadampal@users.noreply.github.com> Date: Mon, 28 Feb 2022 08:42:57 -0600 Subject: cpuinfo: aarch64: add cache configuration details for neoverse-n1/v1/n2 (#75) * cpuinfo: aarch64: add cache configuration details for neoverse-n1 * cpuinfo: aarch64: add support for neoverse-v1 and n2 architectures --- README.md | 2 +- include/cpuinfo.h | 22 +++++++++++++++++ src/arm/cache.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ src/arm/linux/aarch32-isa.c | 4 +++ src/arm/linux/aarch64-isa.c | 12 +++++++++ src/arm/midr.h | 3 +++ src/arm/uarch.c | 8 ++++++ tools/cpu-info.c | 6 +++++ tools/isa-info.c | 2 ++ 9 files changed, 118 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0eb71a5..7866fd6 100644 --- a/README.md +++ b/README.md @@ -239,7 +239,7 @@ LDFLAGS+= $(pkg-config --libs libcpuinfo) - [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen 2) - [ ] VIA-designed x86/x86-64 cores - [ ] Other x86 cores (DM&P, RDC, Transmeta, Cyrix, Rise) - - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/N1) + - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/N1/V1/N2) - [x] Qualcomm-designed ARM cores (Scorpion, Krait, and Kryo) - [x] Nvidia-designed ARM cores (Denver and Carmel) - [x] Samsung-designed ARM cores (Exynos) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index cffa299..258abd0 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -426,6 +426,10 @@ enum cpuinfo_uarch { cpuinfo_uarch_neoverse_n1 = 0x00300400, /** ARM Neoverse E1. */ cpuinfo_uarch_neoverse_e1 = 0x00300401, + /** ARM Neoverse V1. */ + cpuinfo_uarch_neoverse_v1 = 0x00300402, + /** ARM Neoverse N2. */ + cpuinfo_uarch_neoverse_n2 = 0x00300403, /** ARM Cortex-X1. */ cpuinfo_uarch_cortex_x1 = 0x00300500, @@ -1460,7 +1464,9 @@ static inline bool cpuinfo_has_x86_sha(void) { #endif #if CPUINFO_ARCH_ARM64 bool atomics; + bool bf16; bool sve; + bool svebf16; bool sve2; #endif bool rdm; @@ -1793,6 +1799,22 @@ static inline bool cpuinfo_has_arm_sve2(void) { #endif } +static inline bool cpuinfo_has_arm_bf16(void) { + #if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.bf16; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_svebf16(void) { + #if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.svebf16; + #else + return false; + #endif +} + const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processors(void); const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_cores(void); const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_clusters(void); diff --git a/src/arm/cache.c b/src/arm/cache.c index 6ec2d5b..1a6dd38 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -1239,6 +1239,63 @@ void cpuinfo_arm_decode_cache( }; break; } + case cpuinfo_uarch_neoverse_n1: + case cpuinfo_uarch_neoverse_v1: + case cpuinfo_uarch_neoverse_n2: + { + /* + * ARM Neoverse-n1 Core Technical Reference Manual + * A6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB. + * + * A6.1.1 L1 instruction-side memory system + * The L1 instruction memory system has the following key features: + * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed, + * Physically Tagged (PIPT) 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * + * A6.1.2 L1 data-side memory system + * The L1 data memory system has the following features: + * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed, + * Physically Tagged (PIPT) 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * - Pseudo-LRU cache replacement policy. + * + * A7.1 About the L2 memory system + * The L2 memory subsystem consist of: + * - An 8-way set associative L2 cache with a configurable size of 256KB, 512KB, or 1024KB. Cache lines + * have a fixed length of 64 bytes. + * - Strictly inclusive with L1 data cache. + * - When configured with instruction cache hardware coherency, strictly inclusive with L1 instruction cache. + * - When configured without instruction cache hardware coherency, weakly inclusive with L1 instruction cache. + */ + + const uint32_t min_l2_size_KB= 256; + const uint32_t min_l3_size_KB = 0; + + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l2 = (struct cpuinfo_cache) { + .size = min_l2_size_KB * 1024, + .associativity = 8, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + *l3 = (struct cpuinfo_cache) { + .size = min_l3_size_KB * 1024, + .associativity = 16, + .line_size = 64, + }; + break; + } #if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__) case cpuinfo_uarch_scorpion: /* @@ -1656,6 +1713,9 @@ uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* proc */ return 8 * 1024 * 1024; case cpuinfo_uarch_cortex_a55: + case cpuinfo_uarch_neoverse_n1: + case cpuinfo_uarch_neoverse_v1: + case cpuinfo_uarch_neoverse_n2: case cpuinfo_uarch_cortex_a75: case cpuinfo_uarch_cortex_a76: case cpuinfo_uarch_exynos_m4: diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c index df68aa1..d6f6a21 100644 --- a/src/arm/linux/aarch32-isa.c +++ b/src/arm/linux/aarch32-isa.c @@ -64,6 +64,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( * - Processors with Exynos M4 cores * - Processors with Exynos M5 cores * - Neoverse N1 cores + * - Neoverse V1 cores + * - Neoverse N2 cores */ if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) { /* Only little cores of Exynos 9810 support FP16 & RDM */ @@ -76,6 +78,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( case UINT32_C(0x4100D0C0): /* Neoverse N1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D400): /* Neoverse V1 */ + case UINT32_C(0x4100D490): /* Neoverse N2 */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */ case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */ diff --git a/src/arm/linux/aarch64-isa.c b/src/arm/linux/aarch64-isa.c index 2000e1a..7b18095 100644 --- a/src/arm/linux/aarch64-isa.c +++ b/src/arm/linux/aarch64-isa.c @@ -41,6 +41,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( * - Processors with Exynos M4 cores * - Processors with Exynos M5 cores * - Neoverse N1 cores + * - Neoverse V1 cores + * - Neoverse N2 cores */ if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) { /* Exynos 9810 reports that it supports FP16 compute, but in fact only little cores do */ @@ -54,6 +56,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( case UINT32_C(0x4100D0C0): /* Neoverse N1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D400): /* Neoverse V1 */ + case UINT32_C(0x4100D490): /* Neoverse N2 */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */ case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */ @@ -89,6 +93,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( case UINT32_C(0x4100D0C0): /* Neoverse N1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D400): /* Neoverse V1 */ + case UINT32_C(0x4100D490): /* Neoverse N2 */ case UINT32_C(0x4100D4A0): /* Neoverse E1 */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ @@ -124,4 +130,10 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) { isa->sve2 = true; } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_BF16) { + isa->bf16 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVEBF16) { + isa->svebf16 = true; + } } diff --git a/src/arm/midr.h b/src/arm/midr.h index 739dc19..6329783 100644 --- a/src/arm/midr.h +++ b/src/arm/midr.h @@ -184,9 +184,12 @@ inline static uint32_t midr_score_core(uint32_t midr) { case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */ case UINT32_C(0x51002050): /* Kryo Gold */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x4100D490): /* Neoverse N2 */ case UINT32_C(0x4100D410): /* Cortex-A78 */ + case UINT32_C(0x4100D400): /* Neoverse V1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D0C0): /* Neoverse-N1 */ case UINT32_C(0x4100D0B0): /* Cortex-A76 */ case UINT32_C(0x4100D0A0): /* Cortex-A75 */ case UINT32_C(0x4100D090): /* Cortex-A73 */ diff --git a/src/arm/uarch.c b/src/arm/uarch.c index 8b5362b..346e1c1 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -91,6 +91,11 @@ void cpuinfo_arm_decode_vendor_uarch( case 0xD0E: /* Cortex-A76AE */ *uarch = cpuinfo_uarch_cortex_a76; break; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD40: + *uarch = cpuinfo_uarch_neoverse_v1; + break; +#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */ case 0xD41: /* Cortex-A78 */ *uarch = cpuinfo_uarch_cortex_a78; break; @@ -98,6 +103,9 @@ void cpuinfo_arm_decode_vendor_uarch( *uarch = cpuinfo_uarch_cortex_x1; break; #if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD49: + *uarch = cpuinfo_uarch_neoverse_n2; + break; case 0xD4A: *uarch = cpuinfo_uarch_neoverse_e1; break; diff --git a/tools/cpu-info.c b/tools/cpu-info.c index 30ec633..ff80405 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -187,6 +187,12 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Cortex-A77"; case cpuinfo_uarch_cortex_a78: return "Cortex-A78"; + case cpuinfo_uarch_neoverse_n1: + return "Neoverse-N1"; + case cpuinfo_uarch_neoverse_v1: + return "Neoverse-V1"; + case cpuinfo_uarch_neoverse_n2: + return "Neoverse-N2"; case cpuinfo_uarch_cortex_x1: return "Cortex-X1"; case cpuinfo_uarch_scorpion: diff --git a/tools/isa-info.c b/tools/isa-info.c index 92abb57..7320b74 100644 --- a/tools/isa-info.c +++ b/tools/isa-info.c @@ -157,12 +157,14 @@ int main(int argc, char** argv) { printf("\tARM v8.1 atomics: %s\n", cpuinfo_has_arm_atomics() ? "yes" : "no"); printf("\tARM v8.1 SQRDMLxH: %s\n", cpuinfo_has_arm_neon_rdm() ? "yes" : "no"); printf("\tARM v8.2 FP16 arithmetics: %s\n", cpuinfo_has_arm_fp16_arith() ? "yes" : "no"); + printf("\tARM v8.2 BF16: %s\n", cpuinfo_has_arm_bf16() ? "yes" : "no"); printf("\tARM v8.3 dot product: %s\n", cpuinfo_has_arm_neon_dot() ? "yes" : "no"); printf("\tARM v8.3 JS conversion: %s\n", cpuinfo_has_arm_jscvt() ? "yes" : "no"); printf("\tARM v8.3 complex: %s\n", cpuinfo_has_arm_fcma() ? "yes" : "no"); printf("SIMD extensions:\n"); printf("\tARM SVE: %s\n", cpuinfo_has_arm_sve() ? "yes" : "no"); + printf("\tARM SVE BF16: %s\n", cpuinfo_has_arm_svebf16() ? "yes" : "no"); printf("\tARM SVE 2: %s\n", cpuinfo_has_arm_sve2() ? "yes" : "no"); printf("Cryptography extensions:\n"); -- cgit v1.2.3