diff options
author | Ashkan Aliabadi <ashkanaliabadi@fb.com> | 2020-03-19 19:34:26 -0700 |
---|---|---|
committer | Ashkan Aliabadi <ashkanaliabadi@fb.com> | 2020-03-19 19:34:26 -0700 |
commit | dcf8e1896b8b0df6ad9a02e2a8765b6630557e0c (patch) | |
tree | 8ed9c44333fc3994ae11ea3426e68784417338a0 /src/arm | |
parent | d6c0f915ee737f961915c9d17f1679b6777af207 (diff) | |
download | cpuinfo-dcf8e1896b8b0df6ad9a02e2a8765b6630557e0c.tar.gz |
Upstream cpuinfo updates in XNNPACK as of XNNPACK:c58bd3486d52db9c6b9934912790db741bc366f6.
Diffstat (limited to 'src/arm')
-rw-r--r-- | src/arm/cache.c | 1 | ||||
-rw-r--r-- | src/arm/linux/api.h | 4 | ||||
-rw-r--r-- | src/arm/linux/init.c | 77 | ||||
-rw-r--r-- | src/arm/mach/init.c | 102 | ||||
-rw-r--r-- | src/arm/uarch.c | 8 |
5 files changed, 136 insertions, 56 deletions
diff --git a/src/arm/cache.c b/src/arm/cache.c index ccadeb4..c2bc7d2 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -659,6 +659,7 @@ void cpuinfo_arm_decode_cache( }; } break; + case cpuinfo_uarch_cortex_a55r0: case cpuinfo_uarch_cortex_a55: /* * ARM Cortex-A55 Core Technical Reference Manual diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h index 275d072..f99da66 100644 --- a/src/arm/linux/api.h +++ b/src/arm/linux/api.h @@ -153,6 +153,7 @@ struct cpuinfo_arm_linux_processor { uint32_t midr; enum cpuinfo_vendor vendor; enum cpuinfo_uarch uarch; + uint32_t uarch_index; /** * ID of the physical package which includes this logical processor. * The value is parsed from /sys/devices/system/cpu/cpu<N>/topology/physical_package_id @@ -346,3 +347,6 @@ CPUINFO_INTERNAL uint32_t cpuinfo_arm_linux_detect_cluster_midr( uint32_t max_processors, uint32_t usable_processors, struct cpuinfo_arm_linux_processor processors[restrict static max_processors]); + +extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map; +extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries; diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c index f0c432c..6272abf 100644 --- a/src/arm/linux/init.c +++ b/src/arm/linux/init.c @@ -106,12 +106,14 @@ void cpuinfo_arm_linux_init(void) { struct cpuinfo_processor* processors = NULL; struct cpuinfo_core* cores = NULL; struct cpuinfo_cluster* clusters = NULL; - const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; - const struct cpuinfo_core** linux_cpu_to_core_map = NULL; + struct cpuinfo_uarch_info* uarchs = NULL; struct cpuinfo_cache* l1i = NULL; struct cpuinfo_cache* l1d = NULL; struct cpuinfo_cache* l2 = NULL; struct cpuinfo_cache* l3 = NULL; + const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; + const struct cpuinfo_core** linux_cpu_to_core_map = NULL; + uint32_t* linux_cpu_to_uarch_index_map = NULL; const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count(); cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count); @@ -400,6 +402,18 @@ void cpuinfo_arm_linux_init(void) { } } + uint32_t uarchs_count = 0; + enum cpuinfo_uarch last_uarch; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (uarchs_count == 0 || arm_linux_processors[i].uarch != last_uarch) { + last_uarch = arm_linux_processors[i].uarch; + uarchs_count += 1; + } + arm_linux_processors[i].uarch_index = uarchs_count - 1; + } + } + /* * Assumptions: * - No SMP (i.e. each core supports only one hardware thread). @@ -432,6 +446,13 @@ void cpuinfo_arm_linux_init(void) { goto cleanup; } + uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info)); + if (uarchs == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures", + uarchs_count * sizeof(struct cpuinfo_uarch_info), uarchs_count); + goto cleanup; + } + linux_cpu_to_processor_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_processor*)); if (linux_cpu_to_processor_map == NULL) { cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" logical processor mapping entries", @@ -446,6 +467,15 @@ void cpuinfo_arm_linux_init(void) { goto cleanup; } + if (uarchs_count > 1) { + linux_cpu_to_uarch_index_map = calloc(arm_linux_processors_count, sizeof(uint32_t)); + if (linux_cpu_to_uarch_index_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch index mapping entries", + arm_linux_processors_count * sizeof(uint32_t), arm_linux_processors_count); + goto cleanup; + } + } + l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache)); if (l1i == NULL) { cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", @@ -460,6 +490,22 @@ void cpuinfo_arm_linux_init(void) { goto cleanup; } + uint32_t uarchs_index = 0; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (uarchs_index == 0 || arm_linux_processors[i].uarch != last_uarch) { + last_uarch = arm_linux_processors[i].uarch; + uarchs[uarchs_index] = (struct cpuinfo_uarch_info) { + .uarch = arm_linux_processors[i].uarch, + .midr = arm_linux_processors[i].midr, + }; + uarchs_index += 1; + } + uarchs[uarchs_index - 1].processor_count += 1; + uarchs[uarchs_index - 1].core_count += 1; + } + } + uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX; /* Indication whether L3 (if it exists) is shared between all cores */ bool shared_l3 = true; @@ -499,6 +545,11 @@ void cpuinfo_arm_linux_init(void) { cores[i].midr = arm_linux_processors[i].midr; linux_cpu_to_core_map[arm_linux_processors[i].system_processor_id] = &cores[i]; + if (linux_cpu_to_uarch_index_map != NULL) { + linux_cpu_to_uarch_index_map[arm_linux_processors[i].system_processor_id] = + arm_linux_processors[i].uarch_index; + } + struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 }; cpuinfo_arm_decode_cache( arm_linux_processors[i].uarch, @@ -658,12 +709,11 @@ void cpuinfo_arm_linux_init(void) { } /* Commit */ - cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; - cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; cpuinfo_processors = processors; cpuinfo_cores = cores; cpuinfo_clusters = clusters; cpuinfo_packages = &package; + cpuinfo_uarchs = uarchs; cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; cpuinfo_cache[cpuinfo_cache_level_2] = l2; @@ -673,33 +723,42 @@ void cpuinfo_arm_linux_init(void) { cpuinfo_cores_count = valid_processors; cpuinfo_clusters_count = cluster_count; cpuinfo_packages_count = 1; + cpuinfo_uarchs_count = uarchs_count; cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors; cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors; cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; - cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]); + cpuinfo_linux_cpu_max = arm_linux_processors_count; + cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; + cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; + cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map; + __sync_synchronize(); cpuinfo_is_initialized = true; - linux_cpu_to_processor_map = NULL; - linux_cpu_to_core_map = NULL; processors = NULL; cores = NULL; clusters = NULL; + uarchs = NULL; l1i = l1d = l2 = l3 = NULL; + linux_cpu_to_processor_map = NULL; + linux_cpu_to_core_map = NULL; + linux_cpu_to_uarch_index_map = NULL; cleanup: free(arm_linux_processors); - free(linux_cpu_to_processor_map); - free(linux_cpu_to_core_map); free(processors); free(cores); free(clusters); + free(uarchs); free(l1i); free(l1d); free(l2); free(l3); + free(linux_cpu_to_processor_map); + free(linux_cpu_to_core_map); + free(linux_cpu_to_uarch_index_map); } diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c index e64cc18..bd27259 100644 --- a/src/arm/mach/init.c +++ b/src/arm/mach/init.c @@ -14,6 +14,16 @@ #include <cpuinfo/internal-api.h> #include <cpuinfo/log.h> +/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */ +#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL + #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xE81E7EF6 +#endif +#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST + #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F +#endif +#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER + #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2 +#endif struct cpuinfo_arm_isa cpuinfo_isa = { #if CPUINFO_ARCH_ARM @@ -82,37 +92,34 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype return cpuinfo_uarch_twister; case CPUFAMILY_ARM_HURRICANE: return cpuinfo_uarch_hurricane; -#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL case CPUFAMILY_ARM_MONSOON_MISTRAL: -#else - case 0xe81e7ef6: - /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */ -#endif /* 2x Monsoon + 4x Mistral cores */ return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral; -#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST case CPUFAMILY_ARM_VORTEX_TEMPEST: -#else - case 0x07d34b9f: - /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */ -#endif /* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */ return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest; + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + /* Hexa-core: 2x Lightning + 4x Thunder; Octa-core (presumed): 4x Lightning + 4x Thunder */ + return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder; default: /* Use hw.cpusubtype for detection */ break; } - switch (cpu_subtype) { - case CPU_SUBTYPE_ARM_V7: - return cpuinfo_uarch_cortex_a8; - case CPU_SUBTYPE_ARM_V7F: - return cpuinfo_uarch_cortex_a9; - case CPU_SUBTYPE_ARM_V7K: - return cpuinfo_uarch_cortex_a7; - default: - return cpuinfo_uarch_unknown; - } + #if CPUINFO_ARCH_ARM + switch (cpu_subtype) { + case CPU_SUBTYPE_ARM_V7: + return cpuinfo_uarch_cortex_a8; + case CPU_SUBTYPE_ARM_V7F: + return cpuinfo_uarch_cortex_a9; + case CPU_SUBTYPE_ARM_V7K: + return cpuinfo_uarch_cortex_a7; + default: + return cpuinfo_uarch_unknown; + } + #else + return cpuinfo_uarch_unknown; + #endif } static void decode_package_name(char* package_name) { @@ -244,6 +251,7 @@ void cpuinfo_arm_mach_init(void) { struct cpuinfo_core* cores = NULL; struct cpuinfo_cluster* clusters = NULL; struct cpuinfo_package* packages = NULL; + struct cpuinfo_uarch_info* uarchs = NULL; struct cpuinfo_cache* l1i = NULL; struct cpuinfo_cache* l1d = NULL; struct cpuinfo_cache* l2 = NULL; @@ -330,21 +338,12 @@ void cpuinfo_arm_mach_init(void) { * Thus, we whitelist CPUs known to support these instructions. */ switch (cpu_family) { -#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL case CPUFAMILY_ARM_MONSOON_MISTRAL: -#else - case 0xe81e7ef6: - /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */ -#endif -#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST case CPUFAMILY_ARM_VORTEX_TEMPEST: -#else - case 0x07d34b9f: - /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */ -#endif -#if CPUINFO_ARCH_ARM64 - cpuinfo_isa.atomics = true; -#endif + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + #if CPUINFO_ARCH_ARM64 + cpuinfo_isa.atomics = true; + #endif cpuinfo_isa.fp16arith = true; } @@ -379,10 +378,22 @@ void cpuinfo_arm_mach_init(void) { num_clusters * sizeof(struct cpuinfo_cluster), num_clusters); goto cleanup; } + uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info)); + if (uarchs == NULL) { + cpuinfo_log_error( + "failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs", + num_clusters * sizeof(enum cpuinfo_uarch), num_clusters); + goto cleanup; + } uint32_t cluster_idx = UINT32_MAX; for (uint32_t i = 0; i < mach_topology.cores; i++) { if (i == 0 || cores[i].uarch != cores[i - 1].uarch) { cluster_idx++; + uarchs[cluster_idx] = (struct cpuinfo_uarch_info) { + .uarch = cores[i].uarch, + .processor_count = 1, + .core_count = 1, + }; clusters[cluster_idx] = (struct cpuinfo_cluster) { .processor_start = i * threads_per_core, .processor_count = 1, @@ -394,6 +405,8 @@ void cpuinfo_arm_mach_init(void) { .uarch = cores[i].uarch, }; } else { + uarchs[cluster_idx].processor_count++; + uarchs[cluster_idx].core_count++; clusters[cluster_idx].processor_count++; clusters[cluster_idx].core_count++; } @@ -542,26 +555,25 @@ void cpuinfo_arm_mach_init(void) { } /* Commit changes */ - cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; - cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; - cpuinfo_cache[cpuinfo_cache_level_2] = l2; - cpuinfo_cache[cpuinfo_cache_level_3] = l3; - cpuinfo_processors = processors; cpuinfo_cores = cores; cpuinfo_clusters = clusters; cpuinfo_packages = packages; - - cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; - cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; - cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; - cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_uarchs = uarchs; + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + cpuinfo_cache[cpuinfo_cache_level_3] = l3; cpuinfo_processors_count = mach_topology.threads; cpuinfo_cores_count = mach_topology.cores; cpuinfo_clusters_count = num_clusters; cpuinfo_packages_count = mach_topology.packages; - + cpuinfo_uarchs_count = num_clusters; + cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); __sync_synchronize(); @@ -572,6 +584,7 @@ void cpuinfo_arm_mach_init(void) { cores = NULL; clusters = NULL; packages = NULL; + uarchs = NULL; l1i = l1d = l2 = l3 = NULL; cleanup: @@ -579,6 +592,7 @@ cleanup: free(cores); free(clusters); free(packages); + free(uarchs); free(l1i); free(l1d); free(l2); diff --git a/src/arm/uarch.c b/src/arm/uarch.c index a38250a..2aef9e7 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -58,7 +58,9 @@ void cpuinfo_arm_decode_vendor_uarch( *uarch = cpuinfo_uarch_cortex_a35; break; case 0xD05: - *uarch = cpuinfo_uarch_cortex_a55; + // Note: use Variant, not Revision, field + *uarch = (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) == 0 ? + cpuinfo_uarch_cortex_a55r0 : cpuinfo_uarch_cortex_a55; break; case 0xD06: *uarch = cpuinfo_uarch_cortex_a65; @@ -257,9 +259,9 @@ void cpuinfo_arm_decode_vendor_uarch( *vendor = cpuinfo_vendor_arm; *uarch = cpuinfo_uarch_cortex_a75; break; - case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55 */ + case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55r0 */ *vendor = cpuinfo_vendor_arm; - *uarch = cpuinfo_uarch_cortex_a55; + *uarch = cpuinfo_uarch_cortex_a55r0; break; case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */ *vendor = cpuinfo_vendor_arm; |