aboutsummaryrefslogtreecommitdiff
path: root/src/arm
diff options
context:
space:
mode:
authorAshkan Aliabadi <ashkanaliabadi@fb.com>2020-03-19 19:34:26 -0700
committerAshkan Aliabadi <ashkanaliabadi@fb.com>2020-03-19 19:34:26 -0700
commitdcf8e1896b8b0df6ad9a02e2a8765b6630557e0c (patch)
tree8ed9c44333fc3994ae11ea3426e68784417338a0 /src/arm
parentd6c0f915ee737f961915c9d17f1679b6777af207 (diff)
downloadcpuinfo-dcf8e1896b8b0df6ad9a02e2a8765b6630557e0c.tar.gz
Upstream cpuinfo updates in XNNPACK as of XNNPACK:c58bd3486d52db9c6b9934912790db741bc366f6.
Diffstat (limited to 'src/arm')
-rw-r--r--src/arm/cache.c1
-rw-r--r--src/arm/linux/api.h4
-rw-r--r--src/arm/linux/init.c77
-rw-r--r--src/arm/mach/init.c102
-rw-r--r--src/arm/uarch.c8
5 files changed, 136 insertions, 56 deletions
diff --git a/src/arm/cache.c b/src/arm/cache.c
index ccadeb4..c2bc7d2 100644
--- a/src/arm/cache.c
+++ b/src/arm/cache.c
@@ -659,6 +659,7 @@ void cpuinfo_arm_decode_cache(
};
}
break;
+ case cpuinfo_uarch_cortex_a55r0:
case cpuinfo_uarch_cortex_a55:
/*
* ARM Cortex-A55 Core Technical Reference Manual
diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h
index 275d072..f99da66 100644
--- a/src/arm/linux/api.h
+++ b/src/arm/linux/api.h
@@ -153,6 +153,7 @@ struct cpuinfo_arm_linux_processor {
uint32_t midr;
enum cpuinfo_vendor vendor;
enum cpuinfo_uarch uarch;
+ uint32_t uarch_index;
/**
* ID of the physical package which includes this logical processor.
* The value is parsed from /sys/devices/system/cpu/cpu<N>/topology/physical_package_id
@@ -346,3 +347,6 @@ CPUINFO_INTERNAL uint32_t cpuinfo_arm_linux_detect_cluster_midr(
uint32_t max_processors,
uint32_t usable_processors,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+
+extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map;
+extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries;
diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c
index f0c432c..6272abf 100644
--- a/src/arm/linux/init.c
+++ b/src/arm/linux/init.c
@@ -106,12 +106,14 @@ void cpuinfo_arm_linux_init(void) {
struct cpuinfo_processor* processors = NULL;
struct cpuinfo_core* cores = NULL;
struct cpuinfo_cluster* clusters = NULL;
- const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
- const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
+ struct cpuinfo_uarch_info* uarchs = NULL;
struct cpuinfo_cache* l1i = NULL;
struct cpuinfo_cache* l1d = NULL;
struct cpuinfo_cache* l2 = NULL;
struct cpuinfo_cache* l3 = NULL;
+ const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
+ const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
+ uint32_t* linux_cpu_to_uarch_index_map = NULL;
const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count();
cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count);
@@ -400,6 +402,18 @@ void cpuinfo_arm_linux_init(void) {
}
}
+ uint32_t uarchs_count = 0;
+ enum cpuinfo_uarch last_uarch;
+ for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+ if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+ if (uarchs_count == 0 || arm_linux_processors[i].uarch != last_uarch) {
+ last_uarch = arm_linux_processors[i].uarch;
+ uarchs_count += 1;
+ }
+ arm_linux_processors[i].uarch_index = uarchs_count - 1;
+ }
+ }
+
/*
* Assumptions:
* - No SMP (i.e. each core supports only one hardware thread).
@@ -432,6 +446,13 @@ void cpuinfo_arm_linux_init(void) {
goto cleanup;
}
+ uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info));
+ if (uarchs == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures",
+ uarchs_count * sizeof(struct cpuinfo_uarch_info), uarchs_count);
+ goto cleanup;
+ }
+
linux_cpu_to_processor_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_processor*));
if (linux_cpu_to_processor_map == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" logical processor mapping entries",
@@ -446,6 +467,15 @@ void cpuinfo_arm_linux_init(void) {
goto cleanup;
}
+ if (uarchs_count > 1) {
+ linux_cpu_to_uarch_index_map = calloc(arm_linux_processors_count, sizeof(uint32_t));
+ if (linux_cpu_to_uarch_index_map == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch index mapping entries",
+ arm_linux_processors_count * sizeof(uint32_t), arm_linux_processors_count);
+ goto cleanup;
+ }
+ }
+
l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache));
if (l1i == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
@@ -460,6 +490,22 @@ void cpuinfo_arm_linux_init(void) {
goto cleanup;
}
+ uint32_t uarchs_index = 0;
+ for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+ if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+ if (uarchs_index == 0 || arm_linux_processors[i].uarch != last_uarch) {
+ last_uarch = arm_linux_processors[i].uarch;
+ uarchs[uarchs_index] = (struct cpuinfo_uarch_info) {
+ .uarch = arm_linux_processors[i].uarch,
+ .midr = arm_linux_processors[i].midr,
+ };
+ uarchs_index += 1;
+ }
+ uarchs[uarchs_index - 1].processor_count += 1;
+ uarchs[uarchs_index - 1].core_count += 1;
+ }
+ }
+
uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX;
/* Indication whether L3 (if it exists) is shared between all cores */
bool shared_l3 = true;
@@ -499,6 +545,11 @@ void cpuinfo_arm_linux_init(void) {
cores[i].midr = arm_linux_processors[i].midr;
linux_cpu_to_core_map[arm_linux_processors[i].system_processor_id] = &cores[i];
+ if (linux_cpu_to_uarch_index_map != NULL) {
+ linux_cpu_to_uarch_index_map[arm_linux_processors[i].system_processor_id] =
+ arm_linux_processors[i].uarch_index;
+ }
+
struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 };
cpuinfo_arm_decode_cache(
arm_linux_processors[i].uarch,
@@ -658,12 +709,11 @@ void cpuinfo_arm_linux_init(void) {
}
/* Commit */
- cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
- cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
cpuinfo_processors = processors;
cpuinfo_cores = cores;
cpuinfo_clusters = clusters;
cpuinfo_packages = &package;
+ cpuinfo_uarchs = uarchs;
cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
cpuinfo_cache[cpuinfo_cache_level_2] = l2;
@@ -673,33 +723,42 @@ void cpuinfo_arm_linux_init(void) {
cpuinfo_cores_count = valid_processors;
cpuinfo_clusters_count = cluster_count;
cpuinfo_packages_count = 1;
+ cpuinfo_uarchs_count = uarchs_count;
cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors;
cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors;
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
-
cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]);
+ cpuinfo_linux_cpu_max = arm_linux_processors_count;
+ cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
+ cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
+ cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map;
+
__sync_synchronize();
cpuinfo_is_initialized = true;
- linux_cpu_to_processor_map = NULL;
- linux_cpu_to_core_map = NULL;
processors = NULL;
cores = NULL;
clusters = NULL;
+ uarchs = NULL;
l1i = l1d = l2 = l3 = NULL;
+ linux_cpu_to_processor_map = NULL;
+ linux_cpu_to_core_map = NULL;
+ linux_cpu_to_uarch_index_map = NULL;
cleanup:
free(arm_linux_processors);
- free(linux_cpu_to_processor_map);
- free(linux_cpu_to_core_map);
free(processors);
free(cores);
free(clusters);
+ free(uarchs);
free(l1i);
free(l1d);
free(l2);
free(l3);
+ free(linux_cpu_to_processor_map);
+ free(linux_cpu_to_core_map);
+ free(linux_cpu_to_uarch_index_map);
}
diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c
index e64cc18..bd27259 100644
--- a/src/arm/mach/init.c
+++ b/src/arm/mach/init.c
@@ -14,6 +14,16 @@
#include <cpuinfo/internal-api.h>
#include <cpuinfo/log.h>
+/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */
+#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL
+ #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xE81E7EF6
+#endif
+#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
+ #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F
+#endif
+#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
+ #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2
+#endif
struct cpuinfo_arm_isa cpuinfo_isa = {
#if CPUINFO_ARCH_ARM
@@ -82,37 +92,34 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype
return cpuinfo_uarch_twister;
case CPUFAMILY_ARM_HURRICANE:
return cpuinfo_uarch_hurricane;
-#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL
case CPUFAMILY_ARM_MONSOON_MISTRAL:
-#else
- case 0xe81e7ef6:
- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */
-#endif
/* 2x Monsoon + 4x Mistral cores */
return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral;
-#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST
case CPUFAMILY_ARM_VORTEX_TEMPEST:
-#else
- case 0x07d34b9f:
- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */
-#endif
/* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */
return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest;
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ /* Hexa-core: 2x Lightning + 4x Thunder; Octa-core (presumed): 4x Lightning + 4x Thunder */
+ return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder;
default:
/* Use hw.cpusubtype for detection */
break;
}
- switch (cpu_subtype) {
- case CPU_SUBTYPE_ARM_V7:
- return cpuinfo_uarch_cortex_a8;
- case CPU_SUBTYPE_ARM_V7F:
- return cpuinfo_uarch_cortex_a9;
- case CPU_SUBTYPE_ARM_V7K:
- return cpuinfo_uarch_cortex_a7;
- default:
- return cpuinfo_uarch_unknown;
- }
+ #if CPUINFO_ARCH_ARM
+ switch (cpu_subtype) {
+ case CPU_SUBTYPE_ARM_V7:
+ return cpuinfo_uarch_cortex_a8;
+ case CPU_SUBTYPE_ARM_V7F:
+ return cpuinfo_uarch_cortex_a9;
+ case CPU_SUBTYPE_ARM_V7K:
+ return cpuinfo_uarch_cortex_a7;
+ default:
+ return cpuinfo_uarch_unknown;
+ }
+ #else
+ return cpuinfo_uarch_unknown;
+ #endif
}
static void decode_package_name(char* package_name) {
@@ -244,6 +251,7 @@ void cpuinfo_arm_mach_init(void) {
struct cpuinfo_core* cores = NULL;
struct cpuinfo_cluster* clusters = NULL;
struct cpuinfo_package* packages = NULL;
+ struct cpuinfo_uarch_info* uarchs = NULL;
struct cpuinfo_cache* l1i = NULL;
struct cpuinfo_cache* l1d = NULL;
struct cpuinfo_cache* l2 = NULL;
@@ -330,21 +338,12 @@ void cpuinfo_arm_mach_init(void) {
* Thus, we whitelist CPUs known to support these instructions.
*/
switch (cpu_family) {
-#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL
case CPUFAMILY_ARM_MONSOON_MISTRAL:
-#else
- case 0xe81e7ef6:
- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */
-#endif
-#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST
case CPUFAMILY_ARM_VORTEX_TEMPEST:
-#else
- case 0x07d34b9f:
- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */
-#endif
-#if CPUINFO_ARCH_ARM64
- cpuinfo_isa.atomics = true;
-#endif
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ #if CPUINFO_ARCH_ARM64
+ cpuinfo_isa.atomics = true;
+ #endif
cpuinfo_isa.fp16arith = true;
}
@@ -379,10 +378,22 @@ void cpuinfo_arm_mach_init(void) {
num_clusters * sizeof(struct cpuinfo_cluster), num_clusters);
goto cleanup;
}
+ uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info));
+ if (uarchs == NULL) {
+ cpuinfo_log_error(
+ "failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs",
+ num_clusters * sizeof(enum cpuinfo_uarch), num_clusters);
+ goto cleanup;
+ }
uint32_t cluster_idx = UINT32_MAX;
for (uint32_t i = 0; i < mach_topology.cores; i++) {
if (i == 0 || cores[i].uarch != cores[i - 1].uarch) {
cluster_idx++;
+ uarchs[cluster_idx] = (struct cpuinfo_uarch_info) {
+ .uarch = cores[i].uarch,
+ .processor_count = 1,
+ .core_count = 1,
+ };
clusters[cluster_idx] = (struct cpuinfo_cluster) {
.processor_start = i * threads_per_core,
.processor_count = 1,
@@ -394,6 +405,8 @@ void cpuinfo_arm_mach_init(void) {
.uarch = cores[i].uarch,
};
} else {
+ uarchs[cluster_idx].processor_count++;
+ uarchs[cluster_idx].core_count++;
clusters[cluster_idx].processor_count++;
clusters[cluster_idx].core_count++;
}
@@ -542,26 +555,25 @@ void cpuinfo_arm_mach_init(void) {
}
/* Commit changes */
- cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
- cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
- cpuinfo_cache[cpuinfo_cache_level_2] = l2;
- cpuinfo_cache[cpuinfo_cache_level_3] = l3;
-
cpuinfo_processors = processors;
cpuinfo_cores = cores;
cpuinfo_clusters = clusters;
cpuinfo_packages = packages;
-
- cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
- cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
- cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
- cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+ cpuinfo_uarchs = uarchs;
+ cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+ cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+ cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+ cpuinfo_cache[cpuinfo_cache_level_3] = l3;
cpuinfo_processors_count = mach_topology.threads;
cpuinfo_cores_count = mach_topology.cores;
cpuinfo_clusters_count = num_clusters;
cpuinfo_packages_count = mach_topology.packages;
-
+ cpuinfo_uarchs_count = num_clusters;
+ cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
+ cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
+ cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+ cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
__sync_synchronize();
@@ -572,6 +584,7 @@ void cpuinfo_arm_mach_init(void) {
cores = NULL;
clusters = NULL;
packages = NULL;
+ uarchs = NULL;
l1i = l1d = l2 = l3 = NULL;
cleanup:
@@ -579,6 +592,7 @@ cleanup:
free(cores);
free(clusters);
free(packages);
+ free(uarchs);
free(l1i);
free(l1d);
free(l2);
diff --git a/src/arm/uarch.c b/src/arm/uarch.c
index a38250a..2aef9e7 100644
--- a/src/arm/uarch.c
+++ b/src/arm/uarch.c
@@ -58,7 +58,9 @@ void cpuinfo_arm_decode_vendor_uarch(
*uarch = cpuinfo_uarch_cortex_a35;
break;
case 0xD05:
- *uarch = cpuinfo_uarch_cortex_a55;
+ // Note: use Variant, not Revision, field
+ *uarch = (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) == 0 ?
+ cpuinfo_uarch_cortex_a55r0 : cpuinfo_uarch_cortex_a55;
break;
case 0xD06:
*uarch = cpuinfo_uarch_cortex_a65;
@@ -257,9 +259,9 @@ void cpuinfo_arm_decode_vendor_uarch(
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a75;
break;
- case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55 */
+ case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55r0 */
*vendor = cpuinfo_vendor_arm;
- *uarch = cpuinfo_uarch_cortex_a55;
+ *uarch = cpuinfo_uarch_cortex_a55r0;
break;
case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */
*vendor = cpuinfo_vendor_arm;