diff options
author | Marat Dukhan <marat@fb.com> | 2017-08-09 13:49:39 -0700 |
---|---|---|
committer | Marat Dukhan <marat@fb.com> | 2017-08-09 13:49:39 -0700 |
commit | a8fb3dd0aa41013e8ec5c93900a1c81e26ef6552 (patch) | |
tree | 79de3423eb2c5c76bae4ba04c8f3f7e4973dd8ea | |
parent | 43576d6ca7c58e9931d068c2e5f878611f55eb2b (diff) | |
download | cpuinfo-a8fb3dd0aa41013e8ec5c93900a1c81e26ef6552.tar.gz |
Detect big.LITTLE ARM systems
-rw-r--r-- | include/cpuinfo.h | 4 | ||||
-rw-r--r-- | jni/Android.mk | 83 | ||||
-rw-r--r-- | src/arm/api.h | 7 | ||||
-rw-r--r-- | src/arm/cache.c | 35 | ||||
-rw-r--r-- | src/arm/linux/api.h | 303 | ||||
-rw-r--r-- | src/arm/linux/arm32-isa.c | 108 | ||||
-rw-r--r-- | src/arm/linux/arm64-isa.c | 32 | ||||
-rw-r--r-- | src/arm/linux/cpuinfo.c | 380 | ||||
-rw-r--r-- | src/arm/linux/init.c | 672 | ||||
-rw-r--r-- | src/arm/midr.h | 209 | ||||
-rw-r--r-- | src/arm/uarch.c | 53 | ||||
-rw-r--r-- | src/linux/api.h | 50 | ||||
-rw-r--r-- | src/linux/cpulist.c (renamed from src/linux/cpuset.c) | 33 | ||||
-rw-r--r-- | src/linux/processors.c | 484 | ||||
-rw-r--r-- | src/linux/smallfile.c | 69 | ||||
-rw-r--r-- | src/log.c | 2 | ||||
-rw-r--r-- | src/log.h | 2 | ||||
-rw-r--r-- | src/x86/linux/init.c | 14 |
18 files changed, 1995 insertions, 545 deletions
diff --git a/include/cpuinfo.h b/include/cpuinfo.h index c8e339c..3169144 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -531,12 +531,16 @@ enum cpuinfo_uarch { cpuinfo_uarch_cortex_a35 = 0x00300335, /** ARM Cortex-A53. */ cpuinfo_uarch_cortex_a53 = 0x00300353, + /** ARM Cortex-A55. */ + cpuinfo_uarch_cortex_a55 = 0x00300355, /** ARM Cortex-A57. */ cpuinfo_uarch_cortex_a57 = 0x00300357, /** ARM Cortex-A72. */ cpuinfo_uarch_cortex_a72 = 0x00300372, /** ARM Cortex-A73. */ cpuinfo_uarch_cortex_a73 = 0x00300373, + /** ARM Cortex-A75. */ + cpuinfo_uarch_cortex_a75 = 0x00300375, /** Qualcomm Scorpion. */ cpuinfo_uarch_scorpion = 0x00400100, diff --git a/jni/Android.mk b/jni/Android.mk index 539b761..6217019 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -5,7 +5,9 @@ LOCAL_MODULE := cpuinfo LOCAL_SRC_FILES := $(LOCAL_PATH)/src/init.c \ $(LOCAL_PATH)/src/cache.c \ $(LOCAL_PATH)/src/log.c \ - $(LOCAL_PATH)/src/linux/cpuset.c + $(LOCAL_PATH)/src/linux/processors.c \ + $(LOCAL_PATH)/src/linux/smallfile.c \ + $(LOCAL_PATH)/src/linux/cpulist.c ifeq ($(TARGET_ARCH_ABI),$(filter $(TARGET_ARCH_ABI),armeabi armeabi-v7a arm64-v8a)) LOCAL_SRC_FILES += \ $(LOCAL_PATH)/src/arm/uarch.c \ @@ -41,6 +43,49 @@ LOCAL_CFLAGS := -std=gnu99 -D_GNU_SOURCE=1 include $(BUILD_STATIC_LIBRARY) include $(CLEAR_VARS) +LOCAL_MODULE := cpuinfo_mock +LOCAL_SRC_FILES := $(LOCAL_PATH)/src/init.c \ + $(LOCAL_PATH)/src/cache.c \ + $(LOCAL_PATH)/src/log.c \ + $(LOCAL_PATH)/src/linux/processors.c \ + $(LOCAL_PATH)/src/linux/smallfile.c \ + $(LOCAL_PATH)/src/linux/cpulist.c +ifeq ($(TARGET_ARCH_ABI),$(filter $(TARGET_ARCH_ABI),armeabi armeabi-v7a arm64-v8a)) +LOCAL_SRC_FILES += \ + $(LOCAL_PATH)/src/arm/uarch.c \ + $(LOCAL_PATH)/src/arm/cache.c \ + $(LOCAL_PATH)/src/arm/linux/init.c \ + $(LOCAL_PATH)/src/arm/linux/cpuinfo.c +ifeq ($(TARGET_ARCH_ABI),armeabi) +LOCAL_SRC_FILES += $(LOCAL_PATH)/src/arm/linux/arm32-isa.c.arm +endif # armeabi +ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) +LOCAL_SRC_FILES += $(LOCAL_PATH)/src/arm/linux/arm32-isa.c +endif # armeabi-v7a +ifeq ($(TARGET_ARCH_ABI),arm64-v8a) +LOCAL_SRC_FILES += $(LOCAL_PATH)/src/arm/linux/arm64-isa.c +endif # arm64-v8a +endif # armeabi, armeabi-v7a, or arm64-v8a +ifeq ($(TARGET_ARCH_ABI),$(filter $(TARGET_ARCH_ABI),x86 x86_64)) +LOCAL_SRC_FILES += \ + $(LOCAL_PATH)/src/x86/init.c \ + $(LOCAL_PATH)/src/x86/info.c \ + $(LOCAL_PATH)/src/x86/isa.c \ + $(LOCAL_PATH)/src/x86/vendor.c \ + $(LOCAL_PATH)/src/x86/uarch.c \ + $(LOCAL_PATH)/src/x86/topology.c \ + $(LOCAL_PATH)/src/x86/cache/init.c \ + $(LOCAL_PATH)/src/x86/cache/descriptor.c \ + $(LOCAL_PATH)/src/x86/cache/deterministic.c \ + $(LOCAL_PATH)/src/x86/linux/init.c +endif # x86 or x86_64 +LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include +LOCAL_C_INCLUDES := $(LOCAL_EXPORT_C_INCLUDES) $(LOCAL_PATH)/src +LOCAL_CFLAGS := -std=gnu99 -D_GNU_SOURCE=1 -DCPUINFO_LOG_LEVEL=4 -DCPUINFO_MOCK=1 +LOCAL_EXPORT_CFLAGS := -DCPUINFO_MOCK=1 +include $(BUILD_STATIC_LIBRARY) + +include $(CLEAR_VARS) LOCAL_MODULE := cpu-info LOCAL_SRC_FILES := $(LOCAL_PATH)/tools/cpu-info.c LOCAL_STATIC_LIBRARIES := cpuinfo @@ -57,3 +102,39 @@ LOCAL_MODULE := cache-info LOCAL_SRC_FILES := $(LOCAL_PATH)/tools/cache-info.c LOCAL_STATIC_LIBRARIES := cpuinfo include $(BUILD_EXECUTABLE) + +include $(CLEAR_VARS) +LOCAL_MODULE := gtest +LOCAL_SRC_FILES := $(LOCAL_PATH)/deps/googletest/googletest/src/gtest-all.cc +LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/deps/googletest/googletest/include +LOCAL_C_INCLUDES := $(LOCAL_EXPORT_C_INCLUDES) $(LOCAL_PATH)/deps/googletest/googletest +ifeq ($(TARGET_ARCH_ABI),armeabi) +LOCAL_EXPORT_LDLIBS := -latomic +endif # armeabi +include $(BUILD_STATIC_LIBRARY) + +include $(CLEAR_VARS) +LOCAL_MODULE := init-test +LOCAL_SRC_FILES := $(LOCAL_PATH)/test/init.cc +LOCAL_C_INCLUDES := $(LOCAL_PATH)/test +LOCAL_STATIC_LIBRARIES := cpuinfo gtest +include $(BUILD_EXECUTABLE) + + +ifeq ($(TARGET_ARCH_ABI),$(filter $(TARGET_ARCH_ABI),armeabi armeabi-v7a)) + +include $(CLEAR_VARS) +LOCAL_MODULE := nexus4-test +LOCAL_SRC_FILES := $(LOCAL_PATH)/test/nexus4.cc +LOCAL_C_INCLUDES := $(LOCAL_PATH)/test +LOCAL_STATIC_LIBRARIES := cpuinfo_mock gtest +include $(BUILD_EXECUTABLE) + +include $(CLEAR_VARS) +LOCAL_MODULE := nexus5-test +LOCAL_SRC_FILES := $(LOCAL_PATH)/test/nexus5.cc +LOCAL_C_INCLUDES := $(LOCAL_PATH)/test +LOCAL_STATIC_LIBRARIES := cpuinfo_mock gtest +include $(BUILD_EXECUTABLE) + +endif # armeabi, armeabi-v7a diff --git a/src/arm/api.h b/src/arm/api.h index e68c2ef..8ed3587 100644 --- a/src/arm/api.h +++ b/src/arm/api.h @@ -7,8 +7,7 @@ void cpuinfo_arm_decode_vendor_uarch( - uint32_t cpu_implementer, - uint32_t cpu_part, + uint32_t midr, #if CPUINFO_ARCH_ARM bool has_vfpv4, #endif @@ -17,8 +16,8 @@ void cpuinfo_arm_decode_vendor_uarch( void cpuinfo_arm_decode_cache( enum cpuinfo_uarch uarch, - uint32_t uarch_cores, - uint32_t cpu_part, + uint32_t cluster_cores, + uint32_t midr, uint32_t arch_version, struct cpuinfo_cache l1i[restrict static 1], struct cpuinfo_cache l1d[restrict static 1], diff --git a/src/arm/cache.c b/src/arm/cache.c index 62c8ab0..b9c7778 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -3,12 +3,13 @@ #include <cpuinfo.h> #include <log.h> #include <arm/api.h> +#include <arm/midr.h> void cpuinfo_arm_decode_cache( enum cpuinfo_uarch uarch, - uint32_t uarch_cores, - uint32_t cpu_part, + uint32_t cluster_cores, + uint32_t midr, uint32_t arch_version, struct cpuinfo_cache l1i[restrict static 1], struct cpuinfo_cache l1d[restrict static 1], @@ -16,7 +17,7 @@ void cpuinfo_arm_decode_cache( { switch (uarch) { case cpuinfo_uarch_xscale: - switch (cpu_part >> 8) { + switch (midr_get_part(midr) >> 8) { case 2: /* * PXA 210/25X/26X @@ -200,7 +201,7 @@ void cpuinfo_arm_decode_cache( .line_size = 64 }; *l2 = (struct cpuinfo_cache) { - .size = 128 * 1024 * uarch_cores, + .size = 128 * 1024 * cluster_cores, .associativity = 8, .line_size = 64 }; @@ -337,7 +338,7 @@ void cpuinfo_arm_decode_cache( .line_size = 64 }; *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 512 * 1024, + .size = cluster_cores * 512 * 1024, .associativity = 16, .line_size = 64 }; @@ -373,7 +374,7 @@ void cpuinfo_arm_decode_cache( * * [1] https://www.raspberrypi.org/forums/viewtopic.php?f=91&t=145766 */ - if (cpu_part == 0x800) { + if (midr_is_kryo280_silver(midr)) { /* Little cores of Snapdragon 835 */ *l1i = (struct cpuinfo_cache) { .size = 32 * 1024, @@ -386,7 +387,7 @@ void cpuinfo_arm_decode_cache( .line_size = 64 }; *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 256 * 1024, + .size = cluster_cores * 256 * 1024, .associativity = 16, .line_size = 64 }; @@ -403,7 +404,7 @@ void cpuinfo_arm_decode_cache( .line_size = 64 }; *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 128 * 1024, + .size = cluster_cores * 128 * 1024, .associativity = 16, .line_size = 64 }; @@ -449,7 +450,7 @@ void cpuinfo_arm_decode_cache( .line_size = 64 }; *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 512 * 1024, + .size = cluster_cores * 512 * 1024, .associativity = 16, .line_size = 64, .flags = CPUINFO_CACHE_INCLUSIVE @@ -566,7 +567,7 @@ void cpuinfo_arm_decode_cache( .line_size = 64 }; *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 512 * 1024, + .size = cluster_cores * 512 * 1024, .associativity = 16, .line_size = 64, .flags = CPUINFO_CACHE_INCLUSIVE @@ -601,7 +602,7 @@ void cpuinfo_arm_decode_cache( .line_size = 32 }; *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 256 * 1024, + .size = cluster_cores * 256 * 1024, .associativity = 4, .line_size = 128 }; @@ -629,7 +630,7 @@ void cpuinfo_arm_decode_cache( .line_size = 64 }; *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 512 * 1024, + .size = cluster_cores * 512 * 1024, .associativity = 8, .line_size = 128 }; @@ -655,17 +656,17 @@ void cpuinfo_arm_decode_cache( .associativity = 4 /* assume same as Krait */, .line_size = 64 /* assume same as Krait */ }; - if (cpu_part == 0x205) { + if (midr_is_kryo_silver(midr)) { /* Kryo "Silver" */ *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 256 * 1024, + .size = cluster_cores * 256 * 1024, .associativity = 8, /* assume same as Krait */ .line_size = 64 /* assume same as Krait */ }; } else { /* Kryo "Gold" */ *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 512 * 1024, + .size = cluster_cores * 512 * 1024, .associativity = 8, /* assume same as Krait */ .line_size = 64 /* assume same as Krait */ }; @@ -772,7 +773,7 @@ void cpuinfo_arm_decode_cache( .line_size = 64 }; *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 256 * 1024, + .size = cluster_cores * 256 * 1024, .associativity = 8, .line_size = 64 }; @@ -789,7 +790,7 @@ void cpuinfo_arm_decode_cache( }; if (arch_version >= 7) { *l2 = (struct cpuinfo_cache) { - .size = uarch_cores * 128 * 1024, + .size = cluster_cores * 128 * 1024, .associativity = 8, .line_size = 32 }; diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h index cd62700..6ccadb2 100644 --- a/src/arm/linux/api.h +++ b/src/arm/linux/api.h @@ -4,18 +4,18 @@ #include <stdint.h> #include <cpuinfo.h> +#include <arm/midr.h> +#include <linux/api.h> -#define PROC_CPUINFO_ARCH_T UINT32_C(0x00000001) -#define PROC_CPUINFO_ARCH_E UINT32_C(0x00000002) -#define PROC_CPUINFO_ARCH_J UINT32_C(0x00000004) +#define CPUINFO_ARM_LINUX_ARCH_T UINT32_C(0x00000001) +#define CPUINFO_ARM_LINUX_ARCH_E UINT32_C(0x00000002) +#define CPUINFO_ARM_LINUX_ARCH_J UINT32_C(0x00000004) -struct proc_cpuinfo_arch { - uint32_t version; - uint32_t flags; -}; +#define CPUINFO_ARM_LINUX_ARCH_TE UINT32_C(0x00000003) +#define CPUINFO_ARM_LINUX_ARCH_TEJ UINT32_C(0x00000007) -struct proc_cpuinfo_cache { +struct cpuinfo_arm_linux_proc_cpuinfo_cache { uint32_t i_size; uint32_t i_assoc; uint32_t i_line_length; @@ -29,107 +29,238 @@ struct proc_cpuinfo_cache { #if CPUINFO_ARCH_ARM /* arch/arm/include/uapi/asm/hwcap.h */ - #define PROC_CPUINFO_FEATURE_SWP UINT32_C(0x00000001) - #define PROC_CPUINFO_FEATURE_HALF UINT32_C(0x00000002) - #define PROC_CPUINFO_FEATURE_THUMB UINT32_C(0x00000004) - #define PROC_CPUINFO_FEATURE_26BIT UINT32_C(0x00000008) - #define PROC_CPUINFO_FEATURE_FASTMULT UINT32_C(0x00000010) - #define PROC_CPUINFO_FEATURE_FPA UINT32_C(0x00000020) - #define PROC_CPUINFO_FEATURE_VFP UINT32_C(0x00000040) - #define PROC_CPUINFO_FEATURE_EDSP UINT32_C(0x00000080) - #define PROC_CPUINFO_FEATURE_JAVA UINT32_C(0x00000100) - #define PROC_CPUINFO_FEATURE_IWMMXT UINT32_C(0x00000200) - #define PROC_CPUINFO_FEATURE_CRUNCH UINT32_C(0x00000400) - #define PROC_CPUINFO_FEATURE_THUMBEE UINT32_C(0x00000800) - #define PROC_CPUINFO_FEATURE_NEON UINT32_C(0x00001000) - #define PROC_CPUINFO_FEATURE_VFPV3 UINT32_C(0x00002000) - #define PROC_CPUINFO_FEATURE_VFPV3D16 UINT32_C(0x00004000) /* Also set for VFPv4 with 16 double-precision registers */ - #define PROC_CPUINFO_FEATURE_TLS UINT32_C(0x00008000) - #define PROC_CPUINFO_FEATURE_VFPV4 UINT32_C(0x00010000) - #define PROC_CPUINFO_FEATURE_IDIVA UINT32_C(0x00020000) - #define PROC_CPUINFO_FEATURE_IDIVT UINT32_C(0x00040000) - #define PROC_CPUINFO_FEATURE_IDIV UINT32_C(0x00060000) - #define PROC_CPUINFO_FEATURE_VFPD32 UINT32_C(0x00080000) - #define PROC_CPUINFO_FEATURE_LPAE UINT32_C(0x00100000) - #define PROC_CPUINFO_FEATURE_EVTSTRM UINT32_C(0x00200000) - - #define PROC_CPUINFO_FEATURE2_AES UINT32_C(0x00000001) - #define PROC_CPUINFO_FEATURE2_PMULL UINT32_C(0x00000002) - #define PROC_CPUINFO_FEATURE2_SHA1 UINT32_C(0x00000004) - #define PROC_CPUINFO_FEATURE2_SHA2 UINT32_C(0x00000008) - #define PROC_CPUINFO_FEATURE2_CRC32 UINT32_C(0x00000010) + #define CPUINFO_ARM_LINUX_FEATURE_SWP UINT32_C(0x00000001) + #define CPUINFO_ARM_LINUX_FEATURE_HALF UINT32_C(0x00000002) + #define CPUINFO_ARM_LINUX_FEATURE_THUMB UINT32_C(0x00000004) + #define CPUINFO_ARM_LINUX_FEATURE_26BIT UINT32_C(0x00000008) + #define CPUINFO_ARM_LINUX_FEATURE_FASTMULT UINT32_C(0x00000010) + #define CPUINFO_ARM_LINUX_FEATURE_FPA UINT32_C(0x00000020) + #define CPUINFO_ARM_LINUX_FEATURE_VFP UINT32_C(0x00000040) + #define CPUINFO_ARM_LINUX_FEATURE_EDSP UINT32_C(0x00000080) + #define CPUINFO_ARM_LINUX_FEATURE_JAVA UINT32_C(0x00000100) + #define CPUINFO_ARM_LINUX_FEATURE_IWMMXT UINT32_C(0x00000200) + #define CPUINFO_ARM_LINUX_FEATURE_CRUNCH UINT32_C(0x00000400) + #define CPUINFO_ARM_LINUX_FEATURE_THUMBEE UINT32_C(0x00000800) + #define CPUINFO_ARM_LINUX_FEATURE_NEON UINT32_C(0x00001000) + #define CPUINFO_ARM_LINUX_FEATURE_VFPV3 UINT32_C(0x00002000) + #define CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 UINT32_C(0x00004000) /* Also set for VFPv4 with 16 double-precision registers */ + #define CPUINFO_ARM_LINUX_FEATURE_TLS UINT32_C(0x00008000) + #define CPUINFO_ARM_LINUX_FEATURE_VFPV4 UINT32_C(0x00010000) + #define CPUINFO_ARM_LINUX_FEATURE_IDIVA UINT32_C(0x00020000) + #define CPUINFO_ARM_LINUX_FEATURE_IDIVT UINT32_C(0x00040000) + #define CPUINFO_ARM_LINUX_FEATURE_IDIV UINT32_C(0x00060000) + #define CPUINFO_ARM_LINUX_FEATURE_VFPD32 UINT32_C(0x00080000) + #define CPUINFO_ARM_LINUX_FEATURE_LPAE UINT32_C(0x00100000) + #define CPUINFO_ARM_LINUX_FEATURE_EVTSTRM UINT32_C(0x00200000) + + #define CPUINFO_ARM_LINUX_FEATURE2_AES UINT32_C(0x00000001) + #define CPUINFO_ARM_LINUX_FEATURE2_PMULL UINT32_C(0x00000002) + #define CPUINFO_ARM_LINUX_FEATURE2_SHA1 UINT32_C(0x00000004) + #define CPUINFO_ARM_LINUX_FEATURE2_SHA2 UINT32_C(0x00000008) + #define CPUINFO_ARM_LINUX_FEATURE2_CRC32 UINT32_C(0x00000010) #elif CPUINFO_ARCH_ARM64 /* arch/arm64/include/uapi/asm/hwcap.h */ - #define PROC_CPUINFO_FEATURE_FP UINT32_C(0x00000001) - #define PROC_CPUINFO_FEATURE_ASIMD UINT32_C(0x00000002) - #define PROC_CPUINFO_FEATURE_EVTSTRM UINT32_C(0x00000004) - #define PROC_CPUINFO_FEATURE_AES UINT32_C(0x00000008) - #define PROC_CPUINFO_FEATURE_PMULL UINT32_C(0x00000010) - #define PROC_CPUINFO_FEATURE_SHA1 UINT32_C(0x00000020) - #define PROC_CPUINFO_FEATURE_SHA2 UINT32_C(0x00000040) - #define PROC_CPUINFO_FEATURE_CRC32 UINT32_C(0x00000080) - #define PROC_CPUINFO_FEATURE_ATOMICS UINT32_C(0x00000100) - #define PROC_CPUINFO_FEATURE_FPHP UINT32_C(0x00000200) - #define PROC_CPUINFO_FEATURE_ASIMDHP UINT32_C(0x00000400) - #define PROC_CPUINFO_FEATURE_CPUID UINT32_C(0x00000800) - #define PROC_CPUINFO_FEATURE_ASIMDRDM UINT32_C(0x00001000) - #define PROC_CPUINFO_FEATURE_JSCVT UINT32_C(0x00002000) - #define PROC_CPUINFO_FEATURE_FCMA UINT32_C(0x00004000) - #define PROC_CPUINFO_FEATURE_LRCPC UINT32_C(0x00008000) + #define CPUINFO_ARM_LINUX_FEATURE_FP UINT32_C(0x00000001) + #define CPUINFO_ARM_LINUX_FEATURE_ASIMD UINT32_C(0x00000002) + #define CPUINFO_ARM_LINUX_FEATURE_EVTSTRM UINT32_C(0x00000004) + #define CPUINFO_ARM_LINUX_FEATURE_AES UINT32_C(0x00000008) + #define CPUINFO_ARM_LINUX_FEATURE_PMULL UINT32_C(0x00000010) + #define CPUINFO_ARM_LINUX_FEATURE_SHA1 UINT32_C(0x00000020) + #define CPUINFO_ARM_LINUX_FEATURE_SHA2 UINT32_C(0x00000040) + #define CPUINFO_ARM_LINUX_FEATURE_CRC32 UINT32_C(0x00000080) + #define CPUINFO_ARM_LINUX_FEATURE_ATOMICS UINT32_C(0x00000100) + #define CPUINFO_ARM_LINUX_FEATURE_FPHP UINT32_C(0x00000200) + #define CPUINFO_ARM_LINUX_FEATURE_ASIMDHP UINT32_C(0x00000400) + #define CPUINFO_ARM_LINUX_FEATURE_CPUID UINT32_C(0x00000800) + #define CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM UINT32_C(0x00001000) + #define CPUINFO_ARM_LINUX_FEATURE_JSCVT UINT32_C(0x00002000) + #define CPUINFO_ARM_LINUX_FEATURE_FCMA UINT32_C(0x00004000) + #define CPUINFO_ARM_LINUX_FEATURE_LRCPC UINT32_C(0x00008000) #endif - -#define PROC_CPUINFO_VALID_ARCHITECTURE UINT32_C(0x00000001) -#define PROC_CPUINFO_VALID_IMPLEMENTER UINT32_C(0x00000002) -#define PROC_CPUINFO_VALID_VARIANT UINT32_C(0x00000004) -#define PROC_CPUINFO_VALID_PART UINT32_C(0x00000008) -#define PROC_CPUINFO_VALID_REVISION UINT32_C(0x00000010) -#define PROC_CPUINFO_VALID_FEATURES UINT32_C(0x00000020) +#define CPUINFO_ARM_LINUX_VALID_ARCHITECTURE UINT32_C(0x00010000) +#define CPUINFO_ARM_LINUX_VALID_IMPLEMENTER UINT32_C(0x00020000) +#define CPUINFO_ARM_LINUX_VALID_VARIANT UINT32_C(0x00040000) +#define CPUINFO_ARM_LINUX_VALID_PART UINT32_C(0x00080000) +#define CPUINFO_ARM_LINUX_VALID_REVISION UINT32_C(0x00100000) +#define CPUINFO_ARM_LINUX_VALID_PROCESSOR UINT32_C(0x00200000) +#define CPUINFO_ARM_LINUX_VALID_FEATURES UINT32_C(0x00400000) #if CPUINFO_ARCH_ARM - #define PROC_CPUINFO_VALID_ICACHE_SIZE UINT32_C(0x00000100) - #define PROC_CPUINFO_VALID_ICACHE_SETS UINT32_C(0x00000200) - #define PROC_CPUINFO_VALID_ICACHE_WAYS UINT32_C(0x00000400) - #define PROC_CPUINFO_VALID_ICACHE_LINE UINT32_C(0x00000800) - #define PROC_CPUINFO_VALID_DCACHE_SIZE UINT32_C(0x00001000) - #define PROC_CPUINFO_VALID_DCACHE_SETS UINT32_C(0x00002000) - #define PROC_CPUINFO_VALID_DCACHE_WAYS UINT32_C(0x00004000) - #define PROC_CPUINFO_VALID_DCACHE_LINE UINT32_C(0x00008000) + #define CPUINFO_ARM_LINUX_VALID_ICACHE_SIZE UINT32_C(0x01000000) + #define CPUINFO_ARM_LINUX_VALID_ICACHE_SETS UINT32_C(0x02000000) + #define CPUINFO_ARM_LINUX_VALID_ICACHE_WAYS UINT32_C(0x04000000) + #define CPUINFO_ARM_LINUX_VALID_ICACHE_LINE UINT32_C(0x08000000) + #define CPUINFO_ARM_LINUX_VALID_DCACHE_SIZE UINT32_C(0x10000000) + #define CPUINFO_ARM_LINUX_VALID_DCACHE_SETS UINT32_C(0x20000000) + #define CPUINFO_ARM_LINUX_VALID_DCACHE_WAYS UINT32_C(0x40000000) + #define CPUINFO_ARM_LINUX_VALID_DCACHE_LINE UINT32_C(0x80000000) #endif -#define PROC_CPUINFO_VALID_INFO UINT32_C(0x0000003F) +#define CPUINFO_ARM_LINUX_VALID_INFO UINT32_C(0x007F0000) +#define CPUINFO_ARM_LINUX_VALID_MIDR UINT32_C(0x003F0000) #if CPUINFO_ARCH_ARM - #define PROC_CPUINFO_VALID_ICACHE UINT32_C(0x00000F00) - #define PROC_CPUINFO_VALID_DCACHE UINT32_C(0x0000F000) - #define PROC_CPUINFO_VALID_CACHE_LINE UINT32_C(0x00008800) + #define CPUINFO_ARM_LINUX_VALID_ICACHE UINT32_C(0x0F000000) + #define CPUINFO_ARM_LINUX_VALID_DCACHE UINT32_C(0xF0000000) + #define CPUINFO_ARM_LINUX_VALID_CACHE_LINE UINT32_C(0x88000000) #endif -struct proc_cpuinfo { - struct proc_cpuinfo_arch architecture; +struct cpuinfo_arm_linux_processor { + uint32_t architecture_version; #if CPUINFO_ARCH_ARM - struct proc_cpuinfo_cache cache; + uint32_t architecture_flags; + struct cpuinfo_arm_linux_proc_cpuinfo_cache proc_cpuinfo_cache; #endif uint32_t features; #if CPUINFO_ARCH_ARM uint32_t features2; #endif - uint32_t cpuid; - uint32_t implementer; - uint32_t variant; - uint32_t part; - uint32_t revision; - uint32_t valid_mask; + /** + * Main ID Register value. + */ + uint32_t midr; + enum cpuinfo_vendor vendor; + enum cpuinfo_uarch uarch; + /** + * ID of the core which includes this logical processor. + * The value is parsed from /sys/devices/system/cpu/cpu<N>/topology/core_id + */ + uint32_t core_id; + /** + * Maximum processor ID on the core which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same core. + */ + uint32_t core_group_max; + /** + * Minimum processor ID on the core which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same core. + */ + uint32_t core_group_min; + /** + * ID of the physical package which includes this logical processor. + * The value is parsed from /sys/devices/system/cpu/cpu<N>/topology/physical_package_id + */ + uint32_t package_id; + /** + * Maximum processor ID on the package which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same package. + */ + uint32_t package_group_max; + /** + * Minimum processor ID on the package which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same package. + */ + uint32_t package_group_min; + /** + * Number of logical processors in the package. + */ + uint32_t package_processor_count; + /** + * Maximum frequency, in kHZ. + * The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_max_freq + * If failed to read or parse the file, the value is 0. + */ + uint32_t max_frequency; + /** + * Minimum frequency, in kHZ. + * The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_min_freq + * If failed to read or parse the file, the value is 0. + */ + uint32_t min_frequency; + /** + * Normalized processor number. + */ + uint32_t processor_id; + /** + * Linux processor ID + */ + uint32_t system_processor_id; + uint32_t flags; }; -struct proc_cpuinfo* cpuinfo_arm_linux_parse_proc_cpuinfo( - uint32_t processors_count[restrict static 1]); +struct cpuinfo_arm_linux_cluster { + uint32_t processor_id_min; + uint32_t processor_id_max; +}; + +/* Returns true if the two processors do belong to the same cluster */ +static bool cpuinfo_arm_linux_processor_equals( + struct cpuinfo_arm_linux_processor processor_i[restrict static 1], + struct cpuinfo_arm_linux_processor processor_j[restrict static 1]) +{ + const uint32_t joint_flags = processor_i->flags & processor_j->flags; + + bool same_max_frequency = false; + if (joint_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + if (processor_i->max_frequency != processor_j->max_frequency) { + return false; + } else { + same_max_frequency = true; + } + } + + bool same_min_frequency = false; + if (joint_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { + if (processor_j->min_frequency != processor_j->min_frequency) { + return false; + } else { + same_min_frequency = true; + } + } + + if ((joint_flags & CPUINFO_ARM_LINUX_VALID_MIDR) == CPUINFO_ARM_LINUX_VALID_MIDR) { + if (processor_i->midr == processor_j->midr) { + if (midr_is_cortex_a53(processor_i->midr)) { + return same_min_frequency & same_max_frequency; + } else { + return true; + } + } + } + + return same_max_frequency && same_min_frequency; +} + +/* Returns true if the two processors certainly don't belong to the same cluster */ +static bool cpuinfo_arm_linux_processor_not_equals( + struct cpuinfo_arm_linux_processor processor_i[restrict static 1], + struct cpuinfo_arm_linux_processor processor_j[restrict static 1]) +{ + const uint32_t joint_flags = processor_i->flags & processor_j->flags; + + if (joint_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + if (processor_i->max_frequency != processor_j->max_frequency) { + return true; + } + } + + if (joint_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { + if (processor_j->min_frequency != processor_j->min_frequency) { + return true; + } + } + + if ((joint_flags & CPUINFO_ARM_LINUX_VALID_MIDR) == CPUINFO_ARM_LINUX_VALID_MIDR) { + if (processor_i->midr != processor_j->midr) { + return true; + } + } + + return false; +} + +bool cpuinfo_arm_linux_parse_proc_cpuinfo(uint32_t max_processors_count, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count]); #if CPUINFO_ARCH_ARM void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( - const struct proc_cpuinfo proc_cpuinfo[restrict static 1], - uint32_t proc_cpuinfo_count, + const struct cpuinfo_arm_linux_processor processors[restrict static 1], struct cpuinfo_arm_isa isa[restrict static 1]); #elif CPUINFO_ARCH_ARM64 void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( - const struct proc_cpuinfo proc_cpuinfo[restrict static 1], + const struct cpuinfo_arm_linux_processor processors[restrict static 1], struct cpuinfo_arm_isa isa[restrict static 1]); #endif diff --git a/src/arm/linux/arm32-isa.c b/src/arm/linux/arm32-isa.c index 217879e..092d50f 100644 --- a/src/arm/linux/arm32-isa.c +++ b/src/arm/linux/arm32-isa.c @@ -5,6 +5,7 @@ #endif #include <arm/linux/api.h> #include <arm/linux/cp.h> +#include <arm/midr.h> #include <log.h> @@ -24,17 +25,15 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( - const struct proc_cpuinfo proc_cpuinfo[restrict static 1], - uint32_t proc_cpuinfo_count, + const struct cpuinfo_arm_linux_processor processor[restrict static 1], struct cpuinfo_arm_isa isa[restrict static 1]) { - const uint32_t cpu_part = proc_cpuinfo->part; - const uint32_t cpu_implementer = proc_cpuinfo->implementer; - uint32_t architecture = proc_cpuinfo->architecture.version; - if (architecture >= 8) { + const uint32_t midr = processor->midr; + uint32_t architecture_version = processor->architecture_version; + if (architecture_version >= 8) { /* * ARMv7 code running on ARMv8: IDIV, VFP, NEON are always supported, - * but only ARMv8 optional features are reported. + * but may be not reported in /proc/cpuinfo features. */ isa->armv5e = true; isa->armv6 = true; @@ -55,30 +54,49 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( /* * ARM11 (ARM 1136/1156/1176/11 MPCore) processors can report v7 architecture * even though they support only ARMv6 instruction set. - * Detecting this situation by CPU implementer == 'A' (ARM) and CPU part 0xBXX. */ - if (architecture == 7 && cpu_implementer == 'A' && (cpu_part & 0xF00) == 0xB00) { - cpuinfo_log_warning("Kernel-reported architecture ARMv7 ignored due to mismatch with processor microarchitecture (ARM11)"); - architecture = 6; + if (architecture_version == 7 && midr_is_arm11(midr)) { + cpuinfo_log_warning("kernel-reported architecture ARMv7 ignored due to mismatch with processor microarchitecture (ARM11)"); + architecture_version = 6; } - const uint32_t features = proc_cpuinfo->features; - if ((architecture >= 6) || (features & PROC_CPUINFO_FEATURE_EDSP) || (proc_cpuinfo->architecture.flags & PROC_CPUINFO_ARCH_E)) { + const uint32_t features = processor->features; + const uint32_t architecture_flags = processor->architecture_flags; + if ((architecture_version >= 6) || (features & CPUINFO_ARM_LINUX_FEATURE_EDSP) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_E)) { isa->armv5e = true; } - if (architecture >= 6) { + if (architecture_version >= 6) { isa->armv6 = true; } - if (architecture >= 7) { + if (architecture_version >= 7) { isa->armv6k = true; isa->armv7 = true; - if (proc_cpuinfo_count > 1) { - isa->armv7mp = true; + /* + * ARMv7 MP extension (PLDW instruction) is not indicated in /proc/cpuinfo. + * Use heuristic list of supporting processors: + * - Processors supporting UDIV/SDIV instructions ("idiva" + "idivt" features in /proc/cpuinfo) + * - Cortex-A5 + * - Cortex-A9 + * - Krait (supports UDIV/SDIV, but kernels may not report it in /proc/cpuinfo) + * + * TODO: check Qualcomm Scorpion. + */ + switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x4100C050): /* Cortex-A5 */ + case UINT32_C(0x4100C0A0): /* Cortex-A9 */ + case UINT32_C(0x510004D0): /* Krait (dual-core) */ + case UINT32_C(0x510006F0): /* Krait (quad-core) */ + isa->armv7mp = true; + break; + default: + /* In practice IDIV instruction implies ARMv7+MP ISA */ + isa->armv7mp = (features & CPUINFO_ARM_LINUX_FEATURE_IDIV) == CPUINFO_ARM_LINUX_FEATURE_IDIV; + break; } } - if (features & PROC_CPUINFO_FEATURE_IWMMXT) { + if (features & CPUINFO_ARM_LINUX_FEATURE_IWMMXT) { const uint32_t wcid = read_wcid(); cpuinfo_log_debug("WCID = 0x%08"PRIx32, wcid); const uint32_t coprocessor_type = (wcid >> 8) & UINT32_C(0xFF); @@ -94,47 +112,39 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( } } - if ((features & PROC_CPUINFO_FEATURE_THUMB) || (proc_cpuinfo->architecture.flags & PROC_CPUINFO_ARCH_T)) { + if ((features & CPUINFO_ARM_LINUX_FEATURE_THUMB) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_T)) { isa->thumb = true; /* * There is no separate feature flag for Thumb 2. - * All ARMv7 processors and ARM 1156 (CPU part 0xB56) support Thumb 2. + * All ARMv7 processors and ARM 1156 support Thumb 2. */ - if (architecture >= 7 || (cpu_implementer == 'A' && cpu_part == 0xB56)) { + if (architecture_version >= 7 || midr_is_arm1156(midr)) { isa->thumb2 = true; } } - if (features & PROC_CPUINFO_FEATURE_THUMBEE) { + if (features & CPUINFO_ARM_LINUX_FEATURE_THUMBEE) { isa->thumbee = true; } - if ((features & PROC_CPUINFO_FEATURE_JAVA) || (proc_cpuinfo->architecture.flags & PROC_CPUINFO_ARCH_J)) { + if ((features & CPUINFO_ARM_LINUX_FEATURE_JAVA) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_J)) { isa->jazelle = true; } - if ((features & PROC_CPUINFO_FEATURE_IDIV) == PROC_CPUINFO_FEATURE_IDIV) { + /* Qualcomm Krait may have buggy kernel configuration that doesn't report IDIV */ + if ((features & CPUINFO_ARM_LINUX_FEATURE_IDIV) == CPUINFO_ARM_LINUX_FEATURE_IDIV || midr_is_krait(midr)) { isa->idiv = true; - } else { - /* Qualcomm Krait may have buggy kernel configuration that doesn't report IDIV */ - if (cpu_implementer == 'Q') { - switch (cpu_part) { - case 0x04D: /* Dual-core Krait */ - case 0x06F: /* Quad-core Krait */ - isa->idiv = true; - } - } } const uint32_t vfp_mask = \ - PROC_CPUINFO_FEATURE_VFP | PROC_CPUINFO_FEATURE_VFPV3 | PROC_CPUINFO_FEATURE_VFPV3D16 | \ - PROC_CPUINFO_FEATURE_VFPD32 | PROC_CPUINFO_FEATURE_VFPV4 | PROC_CPUINFO_FEATURE_NEON; + CPUINFO_ARM_LINUX_FEATURE_VFP | CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | \ + CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON; if (features & vfp_mask) { - const uint32_t vfpv3_mask = PROC_CPUINFO_FEATURE_VFPV3 | PROC_CPUINFO_FEATURE_VFPV3D16 | \ - PROC_CPUINFO_FEATURE_VFPD32 | PROC_CPUINFO_FEATURE_VFPV4 | PROC_CPUINFO_FEATURE_NEON; - if ((architecture >= 7) | (features & vfpv3_mask)) { + const uint32_t vfpv3_mask = CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | \ + CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON; + if ((architecture_version >= 7) | (features & vfpv3_mask)) { isa->vfpv3 = true; - const uint32_t d32_mask = PROC_CPUINFO_FEATURE_VFPD32 | PROC_CPUINFO_FEATURE_NEON; + const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON; if (features & d32_mask) { isa->d32 = true; } @@ -147,38 +157,38 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( } } } - if (features & PROC_CPUINFO_FEATURE_NEON) { + if (features & CPUINFO_ARM_LINUX_FEATURE_NEON) { isa->neon = true; } /* * There is no separate feature flag for FP16 support. * VFPv4 implies VFPv3-FP16 support (and in practice, NEON-HP as well). - * Additionally, ARM Cortex-A9 (CPU part 0xC09) supports FP16. + * Additionally, ARM Cortex-A9 supports FP16. */ - if ((features & PROC_CPUINFO_FEATURE_VFPV4) || (cpu_implementer == 'A' && cpu_part == 0xC09)) { + if ((features & CPUINFO_ARM_LINUX_FEATURE_VFPV4) || midr_is_cortex_a9(midr)) { isa->fp16 = true; } - if (features & PROC_CPUINFO_FEATURE_VFPV4) { + if (features & CPUINFO_ARM_LINUX_FEATURE_VFPV4) { isa->fma = true; } } - const uint32_t features2 = proc_cpuinfo->features2; - if (features2 & PROC_CPUINFO_FEATURE2_AES) { + const uint32_t features2 = processor->features2; + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_AES) { isa->aes = true; } - if (features2 & PROC_CPUINFO_FEATURE2_PMULL) { + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_PMULL) { isa->pmull = true; } - if (features2 & PROC_CPUINFO_FEATURE2_SHA1) { + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SHA1) { isa->sha1 = true; } - if (features2 & PROC_CPUINFO_FEATURE2_SHA2) { + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SHA2) { isa->sha2 = true; } - if (features2 & PROC_CPUINFO_FEATURE2_CRC32) { + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_CRC32) { isa->crc32 = true; } } diff --git a/src/arm/linux/arm64-isa.c b/src/arm/linux/arm64-isa.c index d074684..ae92d48 100644 --- a/src/arm/linux/arm64-isa.c +++ b/src/arm/linux/arm64-isa.c @@ -5,49 +5,49 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( - const struct proc_cpuinfo proc_cpuinfo[restrict static 1], + const struct cpuinfo_arm_linux_processor processor[restrict static 1], struct cpuinfo_arm_isa isa[restrict static 1]) { - const uint32_t features = proc_cpuinfo->features; - if (features & PROC_CPUINFO_FEATURE_AES) { + const uint32_t features = processor->features; + if (features & CPUINFO_ARM_LINUX_FEATURE_AES) { isa->aes = true; } - if (features & PROC_CPUINFO_FEATURE_PMULL) { + if (features & CPUINFO_ARM_LINUX_FEATURE_PMULL) { isa->pmull = true; } - if (features & PROC_CPUINFO_FEATURE_SHA1) { + if (features & CPUINFO_ARM_LINUX_FEATURE_SHA1) { isa->sha1 = true; } - if (features & PROC_CPUINFO_FEATURE_SHA2) { + if (features & CPUINFO_ARM_LINUX_FEATURE_SHA2) { isa->sha2 = true; } - if (features & PROC_CPUINFO_FEATURE_CRC32) { + if (features & CPUINFO_ARM_LINUX_FEATURE_CRC32) { isa->crc32 = true; } - if (features & PROC_CPUINFO_FEATURE_ATOMICS) { + if (features & CPUINFO_ARM_LINUX_FEATURE_ATOMICS) { isa->atomics = true; } - const uint32_t fp16arith_mask = PROC_CPUINFO_FEATURE_FPHP | PROC_CPUINFO_FEATURE_ASIMDHP; + const uint32_t fp16arith_mask = CPUINFO_ARM_LINUX_FEATURE_FPHP | CPUINFO_ARM_LINUX_FEATURE_ASIMDHP; if ((features & fp16arith_mask) == fp16arith_mask) { isa->fp16arith = true; - } else if (features & PROC_CPUINFO_FEATURE_FPHP) { + } else if (features & CPUINFO_ARM_LINUX_FEATURE_FPHP) { cpuinfo_log_warning("FP16 arithmetics disabled: detected support only for scalar operations"); - } else if (features & PROC_CPUINFO_FEATURE_ASIMDHP) { + } else if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDHP) { cpuinfo_log_warning("FP16 arithmetics disabled: detected support only for SIMD operations"); } - if (features & PROC_CPUINFO_FEATURE_ASIMDRDM) { + if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM) { isa->rdm = true; } - if (features & PROC_CPUINFO_FEATURE_JSCVT) { + if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) { isa->jscvt = true; } - if (features & PROC_CPUINFO_FEATURE_ASIMDRDM) { + if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM) { isa->rdm = true; } - if (features & PROC_CPUINFO_FEATURE_JSCVT) { + if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) { isa->jscvt = true; } - if (features & PROC_CPUINFO_FEATURE_FCMA) { + if (features & CPUINFO_ARM_LINUX_FEATURE_FCMA) { isa->fcma = true; } } diff --git a/src/arm/linux/cpuinfo.c b/src/arm/linux/cpuinfo.c index f1ac00b..c4c7b9a 100644 --- a/src/arm/linux/cpuinfo.c +++ b/src/arm/linux/cpuinfo.c @@ -15,6 +15,7 @@ #include <cpuinfo-mock.h> #endif #include <arm/linux/api.h> +#include <arm/midr.h> #include <log.h> @@ -29,18 +30,16 @@ static const char* proc_cpuinfo_path = "/proc/cpuinfo"; /* - * Size, in chars, of the on-stack buffer used for parsing cpu lists. - * This is also the limit on the length of a single entry - * (<cpu-number> or <cpu-number-start>-<cpu-number-end>) - * in the cpu list. + * Size, in chars, of the on-stack buffer used for parsing lines of /proc/cpuinfo. + * This is also the limit on the length of a single line. */ -#define BUFFER_SIZE 256 +#define BUFFER_SIZE 1024 static uint32_t parse_processor_number( const char* processor_start, const char* processor_end, - struct proc_cpuinfo proc_cpuinfo[restrict static 1]) + struct cpuinfo_arm_linux_processor processor[restrict static 1]) { const size_t processor_length = (size_t) (processor_end - processor_start); @@ -122,13 +121,13 @@ static uint32_t parse_processor_number( static void parse_features( const char* features_start, const char* features_end, - struct proc_cpuinfo proc_cpuinfo[restrict static 1]) + struct cpuinfo_arm_linux_processor processor[restrict static 1]) { const char* feature_start = features_start; const char* feature_end; /* Mark the features as valid */ - proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_FEATURES; + processor->flags |= CPUINFO_ARM_LINUX_VALID_FEATURES; do { feature_end = feature_start + 1; @@ -140,22 +139,39 @@ static void parse_features( const size_t feature_length = (size_t) (feature_end - feature_start); switch (feature_length) { + case 2: + if (memcmp(feature_start, "fp", feature_length) == 0) { +#if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_FP; +#endif +#if CPUINFO_ARCH_ARM + } else if (memcmp(feature_start, "wp", feature_length) == 0) { + /* + * Some AArch64 kernels, including the one on Nexus 5X, + * erroneously report "swp" as "wp" to AArch32 programs + */ + processor->features |= CPUINFO_ARM_LINUX_FEATURE_SWP; +#endif + } else { + goto unexpected; + } + break; case 3: if (memcmp(feature_start, "aes", feature_length) == 0) { #if CPUINFO_ARCH_ARM - proc_cpuinfo->features2 |= PROC_CPUINFO_FEATURE2_AES; + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_AES; #elif CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_AES; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_AES; #endif #if CPUINFO_ARCH_ARM } else if (memcmp(feature_start, "swp", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_SWP; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_SWP; } else if (memcmp(feature_start, "fpa", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_FPA; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_FPA; } else if (memcmp(feature_start, "vfp", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_VFP; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFP; } else if (memcmp(feature_start, "tls", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_TLS; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_TLS; #endif /* CPUINFO_ARCH_ARM */ } else { goto unexpected; @@ -164,35 +180,41 @@ static void parse_features( case 4: if (memcmp(feature_start, "sha1", feature_length) == 0) { #if CPUINFO_ARCH_ARM - proc_cpuinfo->features2 |= PROC_CPUINFO_FEATURE2_SHA1; + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_SHA1; #elif CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_SHA1; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_SHA1; #endif } else if (memcmp(feature_start, "sha2", feature_length) == 0) { #if CPUINFO_ARCH_ARM - proc_cpuinfo->features2 |= PROC_CPUINFO_FEATURE2_SHA2; + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_SHA2; #elif CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_SHA2; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_SHA2; #endif } else if (memcmp(feature_start, "fphp", feature_length) == 0) { #if CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_FPHP; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_FPHP; #endif } else if (memcmp(feature_start, "fcma", feature_length) == 0) { #if CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_FCMA; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_FCMA; #endif #if CPUINFO_ARCH_ARM } else if (memcmp(feature_start, "half", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_HALF; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_HALF; } else if (memcmp(feature_start, "edsp", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_EDSP; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_EDSP; } else if (memcmp(feature_start, "java", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_JAVA; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_JAVA; } else if (memcmp(feature_start, "neon", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_NEON; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_NEON; } else if (memcmp(feature_start, "lpae", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_LPAE; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_LPAE; + } else if (memcmp(feature_start, "tlsi", feature_length) == 0) { + /* + * Some AArch64 kernels, including the one on Nexus 5X, + * erroneously report "tls" as "tlsi" to AArch32 programs + */ + processor->features |= CPUINFO_ARM_LINUX_FEATURE_TLS; #endif /* CPUINFO_ARCH_ARM */ } else { goto unexpected; @@ -201,41 +223,45 @@ static void parse_features( case 5: if (memcmp(feature_start, "pmull", feature_length) == 0) { #if CPUINFO_ARCH_ARM - proc_cpuinfo->features2 |= PROC_CPUINFO_FEATURE2_PMULL; + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_PMULL; #elif CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_PMULL; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_PMULL; #endif } else if (memcmp(feature_start, "crc32", feature_length) == 0) { #if CPUINFO_ARCH_ARM - proc_cpuinfo->features2 |= PROC_CPUINFO_FEATURE2_CRC32; + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_CRC32; #elif CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_CRC32; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_CRC32; + #endif + } else if (memcmp(feature_start, "asimd", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMD; #endif } else if (memcmp(feature_start, "cpuid", feature_length) == 0) { #if CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_CPUID; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_CPUID; #endif } else if (memcmp(feature_start, "jscvt", feature_length) == 0) { #if CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_JSCVT; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_JSCVT; #endif } else if (memcmp(feature_start, "lrcpc", feature_length) == 0) { #if CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_LRCPC; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_LRCPC; #endif #if CPUINFO_ARCH_ARM } else if (memcmp(feature_start, "thumb", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_THUMB; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_THUMB; } else if (memcmp(feature_start, "26bit", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_26BIT; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_26BIT; } else if (memcmp(feature_start, "vfpv3", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_VFPV3; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV3; } else if (memcmp(feature_start, "vfpv4", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_VFPV4; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV4; } else if (memcmp(feature_start, "idiva", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_IDIVA; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_IDIVA; } else if (memcmp(feature_start, "idivt", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_IDIVT; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_IDIVT; #endif /* CPUINFO_ARCH_ARM */ } else { goto unexpected; @@ -244,11 +270,11 @@ static void parse_features( #if CPUINFO_ARCH_ARM case 6: if (memcmp(feature_start, "iwmmxt", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_IWMMXT; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_IWMMXT; } else if (memcmp(feature_start, "crunch", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_CRUNCH; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_CRUNCH; } else if (memcmp(feature_start, "vfpd32", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_VFPD32; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPD32; } else { goto unexpected; } @@ -256,18 +282,18 @@ static void parse_features( #endif /* CPUINFO_ARCH_ARM */ case 7: if (memcmp(feature_start, "evtstrm", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_EVTSTRM; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_EVTSTRM; } else if (memcmp(feature_start, "atomics", feature_length) == 0) { #if CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_ATOMICS; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_ATOMICS; #endif } else if (memcmp(feature_start, "asimdhp", feature_length) == 0) { #if CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_ASIMDHP; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDHP; #endif #if CPUINFO_ARCH_ARM } else if (memcmp(feature_start, "thumbee", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_THUMBEE; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_THUMBEE; #endif /* CPUINFO_ARCH_ARM */ } else { goto unexpected; @@ -276,13 +302,13 @@ static void parse_features( case 8: if (memcmp(feature_start, "asimdrdm", feature_length) == 0) { #if CPUINFO_ARCH_ARM64 - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_ASIMDRDM; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM; #endif #if CPUINFO_ARCH_ARM } else if (memcmp(feature_start, "fastmult", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_FASTMULT; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_FASTMULT; } else if (memcmp(feature_start, "vfpv3d16", feature_length) == 0) { - proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_VFPV3D16; + processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV3D16; #endif /* CPUINFO_ARCH_ARM */ } else { goto unexpected; @@ -290,7 +316,7 @@ static void parse_features( break; default: unexpected: - cpuinfo_log_warning("unexpected /proc/cpuinfo features %.*s is ignored", + cpuinfo_log_warning("unexpected /proc/cpuinfo feature \"%.*s\" is ignored", (int) feature_length, feature_start); break; } @@ -306,14 +332,15 @@ static void parse_features( static void parse_cpu_architecture( const char* cpu_architecture_start, const char* cpu_architecture_end, - struct proc_cpuinfo proc_cpuinfo[restrict static 1]) + struct cpuinfo_arm_linux_processor processor[restrict static 1]) { const size_t cpu_architecture_length = (size_t) (cpu_architecture_end - cpu_architecture_start); /* Early AArch64 kernels report "CPU architecture: AArch64" instead of a numeric value 8 */ if (cpu_architecture_length == 7) { if (memcmp(cpu_architecture_start, "AArch64", cpu_architecture_length) == 0) { - proc_cpuinfo->architecture.version = 8; - proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_ARCHITECTURE; + processor->midr = midr_set_architecture(processor->midr, UINT32_C(0xF)); + processor->architecture_version = 8; + processor->flags |= CPUINFO_ARM_LINUX_VALID_ARCHITECTURE; return; } } @@ -337,21 +364,23 @@ static void parse_cpu_architecture( (int) cpu_architecture_length, cpu_architecture_start); } else { if (architecture != 0) { - proc_cpuinfo->architecture.version = architecture; - proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_ARCHITECTURE; + processor->architecture_version = architecture; + processor->flags |= CPUINFO_ARM_LINUX_VALID_ARCHITECTURE; for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) { const char feature = *cpu_architecture_ptr; switch (feature) { +#if CPUINFO_ARCH_ARM case 'T': - proc_cpuinfo->architecture.flags |= PROC_CPUINFO_ARCH_T; + processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_T; break; case 'E': - proc_cpuinfo->architecture.flags |= PROC_CPUINFO_ARCH_E; + processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_E; break; case 'J': - proc_cpuinfo->architecture.flags |= PROC_CPUINFO_ARCH_J; + processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_J; break; +#endif /* CPUINFO_ARCH_ARM */ case ' ': case '\t': /* Ignore whitespace at the end */ @@ -367,12 +396,31 @@ static void parse_cpu_architecture( (int) cpu_architecture_length, cpu_architecture_start); } } + + uint32_t midr_architecture = UINT32_C(0xF); +#if CPUINFO_ARCH_ARM + switch (processor->architecture_version) { + case 6: + midr_architecture = UINT32_C(0x7); /* ARMv6 */ + break; + case 5: + if ((processor->architecture_flags & CPUINFO_ARM_LINUX_ARCH_TEJ) == CPUINFO_ARM_LINUX_ARCH_TEJ) { + midr_architecture = UINT32_C(0x6); /* ARMv5TEJ */ + } else if ((processor->architecture_flags & CPUINFO_ARM_LINUX_ARCH_TE) == CPUINFO_ARM_LINUX_ARCH_TE) { + midr_architecture = UINT32_C(0x5); /* ARMv5TE */ + } else { + midr_architecture = UINT32_C(0x4); /* ARMv5T */ + } + break; + } +#endif + processor->midr = midr_set_architecture(processor->midr, midr_architecture); } static void parse_cpu_part( const char* cpu_part_start, const char* cpu_part_end, - struct proc_cpuinfo proc_cpuinfo[restrict static 1]) + struct cpuinfo_arm_linux_processor processor[restrict static 1]) { const size_t cpu_part_length = (size_t) (cpu_part_end - cpu_part_start); @@ -415,14 +463,14 @@ static void parse_cpu_part( cpu_part = cpu_part * 16 + digit; } - proc_cpuinfo->part = cpu_part; - proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_PART; + processor->midr = midr_set_part(processor->midr, cpu_part); + processor->flags |= CPUINFO_ARM_LINUX_VALID_PART; } static void parse_cpu_implementer( const char* cpu_implementer_start, const char* cpu_implementer_end, - struct proc_cpuinfo proc_cpuinfo[restrict static 1]) + struct cpuinfo_arm_linux_processor processor[restrict static 1]) { const size_t cpu_implementer_length = cpu_implementer_end - cpu_implementer_start; @@ -469,14 +517,14 @@ static void parse_cpu_implementer( cpu_implementer = cpu_implementer * 16 + digit; } - proc_cpuinfo->implementer = cpu_implementer; - proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_IMPLEMENTER; + processor->midr = midr_set_implementer(processor->midr, cpu_implementer); + processor->flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER; } static void parse_cpu_variant( const char* cpu_variant_start, const char* cpu_variant_end, - struct proc_cpuinfo proc_cpuinfo[restrict static 1]) + struct cpuinfo_arm_linux_processor processor[restrict static 1]) { const size_t cpu_variant_length = cpu_variant_end - cpu_variant_start; @@ -500,25 +548,27 @@ static void parse_cpu_variant( /* Check if the value after hex prefix is indeed a hex digit and decode it. */ const char digit_char = cpu_variant_start[2]; + uint32_t cpu_variant; if ((uint32_t) (digit_char - '0') < 10) { - proc_cpuinfo->variant = (uint32_t) (digit_char - '0'); + cpu_variant = (uint32_t) (digit_char - '0'); } else if ((uint32_t) (digit_char - 'A') < 6) { - proc_cpuinfo->variant = 10 + (uint32_t) (digit_char - 'A'); + cpu_variant = 10 + (uint32_t) (digit_char - 'A'); } else if ((uint32_t) (digit_char - 'a') < 6) { - proc_cpuinfo->variant = 10 + (uint32_t) (digit_char - 'a'); + cpu_variant = 10 + (uint32_t) (digit_char - 'a'); } else { cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c'", (int) cpu_variant_length, cpu_variant_start, digit_char); return; } - proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_VARIANT; + processor->midr = midr_set_variant(processor->midr, cpu_variant); + processor->flags |= CPUINFO_ARM_LINUX_VALID_VARIANT; } static void parse_cpu_revision( const char* cpu_revision_start, const char* cpu_revision_end, - struct proc_cpuinfo proc_cpuinfo[restrict static 1]) + struct cpuinfo_arm_linux_processor processor[restrict static 1]) { uint32_t cpu_revision = 0; for (const char* digit_ptr = cpu_revision_start; digit_ptr != cpu_revision_end; digit_ptr++) { @@ -535,8 +585,8 @@ static void parse_cpu_revision( cpu_revision = cpu_revision * 10 + digit; } - proc_cpuinfo->revision = cpu_revision; - proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_REVISION; + processor->midr = midr_set_revision(processor->midr, cpu_revision); + processor->flags |= CPUINFO_ARM_LINUX_VALID_REVISION; } #if CPUINFO_ARCH_ARM @@ -560,7 +610,7 @@ static void parse_cache_number( const char* number_end, const char* number_name, uint32_t number_ptr[restrict static 1], - uint32_t valid_mask[restrict static 1], + uint32_t flags[restrict static 1], uint32_t number_mask) { uint32_t number = 0; @@ -582,7 +632,7 @@ static void parse_cache_number( } /* If the number specifies a cache line size, verify that is a reasonable power of 2 */ - if (number_mask & PROC_CPUINFO_VALID_CACHE_LINE) { + if (number_mask & CPUINFO_ARM_LINUX_VALID_CACHE_LINE) { switch (number) { case 16: case 32: @@ -596,7 +646,7 @@ static void parse_cache_number( } *number_ptr = number; - *valid_mask |= number_mask; + *flags |= number_mask; } #endif /* CPUINFO_ARCH_ARM */ @@ -626,12 +676,12 @@ static void parse_cache_number( static uint32_t parse_line( const char* line_start, const char* line_end, - uint32_t processor_count, - struct proc_cpuinfo* proc_cpuinfo) + uint32_t processor_index, + struct cpuinfo_arm_linux_processor* processor) { /* Empty line. Skip. */ if (line_start == line_end) { - return processor_count; + return processor_index; } /* Search for ':' on the line. */ @@ -645,7 +695,7 @@ static uint32_t parse_line( if (separator == line_end) { cpuinfo_log_warning("Line %.*s in /proc/cpuinfo is ignored: key/value separator ':' not found", (int) (line_end - line_start), line_start); - return processor_count; + return processor_index; } /* Skip trailing spaces in key part. */ @@ -659,7 +709,7 @@ static uint32_t parse_line( if (key_end == line_start) { cpuinfo_log_warning("Line %.*s in /proc/cpuinfo is ignored: key contains only spaces", (int) (line_end - line_start), line_start); - return processor_count; + return processor_index; } /* Skip leading spaces in value part. */ @@ -673,7 +723,7 @@ static uint32_t parse_line( if (value_start == line_end) { cpuinfo_log_warning("Line %.*s in /proc/cpuinfo is ignored: value contains only spaces", (int) (line_end - line_start), line_start); - return processor_count; + return processor_index; } /* Skip trailing spaces in value part (if any) */ @@ -692,20 +742,20 @@ static uint32_t parse_line( #if CPUINFO_ARCH_ARM } else if (memcmp(line_start, "I size", key_length) == 0) { parse_cache_number(value_start, value_end, - "instruction cache size", &proc_cpuinfo->cache.i_size, - &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_ICACHE_SIZE); + "instruction cache size", &processor->proc_cpuinfo_cache.i_size, + &processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_SIZE); } else if (memcmp(line_start, "I sets", key_length) == 0) { parse_cache_number(value_start, value_end, - "instruction cache sets", &proc_cpuinfo->cache.i_sets, - &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_ICACHE_SETS); + "instruction cache sets", &processor->proc_cpuinfo_cache.i_sets, + &processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_SETS); } else if (memcmp(line_start, "D size", key_length) == 0) { parse_cache_number(value_start, value_end, - "data cache size", &proc_cpuinfo->cache.d_size, - &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_DCACHE_SIZE); + "data cache size", &processor->proc_cpuinfo_cache.d_size, + &processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_SIZE); } else if (memcmp(line_start, "D sets", key_length) == 0) { parse_cache_number(value_start, value_end, - "data cache sets", &proc_cpuinfo->cache.d_sets, - &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_DCACHE_SETS); + "data cache sets", &processor->proc_cpuinfo_cache.d_sets, + &processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_SETS); #endif /* CPUINFO_ARCH_ARM */ } else { goto unknown; @@ -715,12 +765,12 @@ static uint32_t parse_line( case 7: if (memcmp(line_start, "I assoc", key_length) == 0) { parse_cache_number(value_start, value_end, - "instruction cache associativity", &proc_cpuinfo->cache.i_assoc, - &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_ICACHE_WAYS); + "instruction cache associativity", &processor->proc_cpuinfo_cache.i_assoc, + &processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_WAYS); } else if (memcmp(line_start, "D assoc", key_length) == 0) { parse_cache_number(value_start, value_end, - "data cache associativity", &proc_cpuinfo->cache.d_assoc, - &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_DCACHE_WAYS); + "data cache associativity", &processor->proc_cpuinfo_cache.d_assoc, + &processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_WAYS); } else { goto unknown; } @@ -728,9 +778,9 @@ static uint32_t parse_line( #endif /* CPUINFO_ARCH_ARM */ case 8: if (memcmp(line_start, "CPU part", key_length) == 0) { - parse_cpu_part(value_start, value_end, proc_cpuinfo); + parse_cpu_part(value_start, value_end, processor); } else if (memcmp(line_start, "Features", key_length) == 0) { - parse_features(value_start, value_end, proc_cpuinfo); + parse_features(value_start, value_end, processor); } else if (memcmp(line_start, "BogoMIPS", key_length) == 0) { /* BogoMIPS is useless, don't parse */ } else if (memcmp(line_start, "Hardware", key_length) == 0) { @@ -743,20 +793,19 @@ static uint32_t parse_line( break; case 9: if (memcmp(line_start, "processor", key_length) == 0) { - const uint32_t new_processor_number = - parse_processor_number(value_start, value_end, proc_cpuinfo); - const uint32_t new_processors_count = new_processor_number + 1; - if (new_processor_number < processor_count && new_processor_number != 0) { - cpuinfo_log_warning("ignored unexpectedly low processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", - new_processor_number, processor_count - 1); - } else { - if (new_processor_number > processor_count) { - cpuinfo_log_info("unexpectedly high processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", - new_processor_number, processor_count - 1); - return new_processors_count; - } - return new_processors_count; + const uint32_t new_processor_index = parse_processor_number(value_start, value_end, processor); + if (new_processor_index < processor_index) { + /* Strange: decreasing processor number */ + cpuinfo_log_warning( + "unexpectedly low processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", + new_processor_index, processor_index); + } else if (new_processor_index > processor_index + 1) { + /* Strange: skipped processor $(processor_index + 1) */ + cpuinfo_log_warning( + "unexpectedly high processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", + new_processor_index, processor_index); } + return new_processor_index; } else if (memcmp(line_start, "Processor", key_length) == 0) { /* TODO: parse to fix misreported architecture, similar to Android's cpufeatures */ } else { @@ -765,14 +814,14 @@ static uint32_t parse_line( break; case 11: if (memcmp(line_start, "CPU variant", key_length) == 0) { - parse_cpu_variant(value_start, value_end, proc_cpuinfo); + parse_cpu_variant(value_start, value_end, processor); } else { goto unknown; } break; case 12: if (memcmp(line_start, "CPU revision", key_length) == 0) { - parse_cpu_revision(value_start, value_end, proc_cpuinfo); + parse_cpu_revision(value_start, value_end, processor); } else { goto unknown; } @@ -781,12 +830,12 @@ static uint32_t parse_line( case 13: if (memcmp(line_start, "I line length", key_length) == 0) { parse_cache_number(value_start, value_end, - "instruction cache line size", &proc_cpuinfo->cache.i_line_length, - &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_ICACHE_LINE); + "instruction cache line size", &processor->proc_cpuinfo_cache.i_line_length, + &processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_LINE); } else if (memcmp(line_start, "D line length", key_length) == 0) { parse_cache_number(value_start, value_end, - "data cache line size", &proc_cpuinfo->cache.d_line_length, - &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_DCACHE_LINE); + "data cache line size", &processor->proc_cpuinfo_cache.d_line_length, + &processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_LINE); } else { goto unknown; } @@ -794,16 +843,16 @@ static uint32_t parse_line( #endif /* CPUINFO_ARCH_ARM */ case 15: if (memcmp(line_start, "CPU implementer", key_length) == 0) { - parse_cpu_implementer(value_start, value_end, proc_cpuinfo); + parse_cpu_implementer(value_start, value_end, processor); } else if (memcmp(line_start, "CPU implementor", key_length) == 0) { - parse_cpu_implementer(value_start, value_end, proc_cpuinfo); + parse_cpu_implementer(value_start, value_end, processor); } else { goto unknown; } break; case 16: if (memcmp(line_start, "CPU architecture", key_length) == 0) { - parse_cpu_architecture(value_start, value_end, proc_cpuinfo); + parse_cpu_architecture(value_start, value_end, processor); } else { goto unknown; } @@ -813,23 +862,18 @@ static uint32_t parse_line( cpuinfo_log_debug("unknown /proc/cpuinfo key: %.*s", (int) key_length, line_start); } - return processor_count; + return processor_index; } -struct proc_cpuinfo* cpuinfo_arm_linux_parse_proc_cpuinfo(uint32_t processors_count_ptr[restrict static 1]) { +bool cpuinfo_arm_linux_parse_proc_cpuinfo( + uint32_t max_processors_count, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count]) +{ int file = -1; - struct proc_cpuinfo* processors = NULL; - struct proc_cpuinfo* result = NULL; - uint32_t processors_capacity = 8; - uint32_t processors_count = 1; + bool status = false; + uint32_t processor_index = 0; char buffer[BUFFER_SIZE]; - - processors = calloc(processors_capacity, sizeof(struct proc_cpuinfo)); - if (processors == NULL) { - cpuinfo_log_error("failed to allocate %zu bytes for /proc/cpuinfo data", - processors_capacity * sizeof(struct proc_cpuinfo)); - goto cleanup; - } + struct cpuinfo_arm_linux_processor dummy_processor; cpuinfo_log_debug("parsing cpu info from file %s", proc_cpuinfo_path); file = open(proc_cpuinfo_path, O_RDONLY); @@ -838,6 +882,7 @@ struct proc_cpuinfo* cpuinfo_arm_linux_parse_proc_cpuinfo(uint32_t processors_co goto cleanup; } + /* Only used for error reporting */ size_t position = 0; const char* buffer_end = &buffer[BUFFER_SIZE]; char* data_start = buffer; @@ -856,13 +901,20 @@ struct proc_cpuinfo* cpuinfo_arm_linux_parse_proc_cpuinfo(uint32_t processors_co if (bytes_read == 0) { /* No more data in the file: process the remaining text in the buffer as a single entry */ const char* line_end = data_end; - const uint32_t new_processors_count = - parse_line(line_start, line_end, processors_count, &processors[processors_count - 1]); - if (new_processors_count > processors_capacity) { - processors = realloc(processors, new_processors_count * sizeof(struct proc_cpuinfo)); + const uint32_t new_processor_index = parse_line(line_start, line_end, processor_index, + processor_index < max_processors_count ? &processors[processor_index] : &dummy_processor); + processors[processor_index].flags |= CPUINFO_ARM_LINUX_VALID_PROCESSOR; + if (new_processor_index < max_processors_count) { + processors[processor_index].flags |= CPUINFO_ARM_LINUX_VALID_PROCESSOR; + } else { + /* Log and ignore processor */ + if (new_processor_index != processor_index) { + /* Log only once */ + cpuinfo_log_warning("processor %"PRIu32" in /proc/cpuinfo is ignored: index exceeds system limit %"PRIu32, + new_processor_index, max_processors_count - 1); + } } - memset(&processors[processors_count], 0, (new_processors_count - processors_count) * sizeof(struct proc_cpuinfo)); - processors_count = processors_capacity = new_processors_count; + processor_index = new_processor_index; } else { const char* line_end; do { @@ -878,13 +930,19 @@ struct proc_cpuinfo* cpuinfo_arm_linux_parse_proc_cpuinfo(uint32_t processors_co * Otherwise, there may be more data at the end; read the file once again. */ if (line_end != data_end) { - const uint32_t new_processors_count = - parse_line(line_start, line_end, processors_count, &processors[processors_count - 1]); - if (new_processors_count > processors_capacity) { - processors = realloc(processors, new_processors_count * sizeof(struct proc_cpuinfo)); + const uint32_t new_processor_index = parse_line(line_start, line_end, processor_index, + processor_index < max_processors_count ? &processors[processor_index] : &dummy_processor); + if (new_processor_index < max_processors_count) { + processors[processor_index].flags |= CPUINFO_ARM_LINUX_VALID_PROCESSOR; + } else { + /* Log and ignore processor */ + if (new_processor_index != processor_index) { + /* Log only once */ + cpuinfo_log_warning("processor %"PRIu32" in /proc/cpuinfo is ignored: index exceeds system limit %"PRIu32, + new_processor_index, max_processors_count - 1); + } } - memset(&processors[processors_count], 0, (new_processors_count - processors_count) * sizeof(struct proc_cpuinfo)); - processors_count = processors_capacity = new_processors_count; + processor_index = new_processor_index; line_start = line_end + 1; } } while (line_end != data_end); @@ -894,51 +952,15 @@ struct proc_cpuinfo* cpuinfo_arm_linux_parse_proc_cpuinfo(uint32_t processors_co memmove(buffer, line_start, line_length); data_start = &buffer[line_length]; } - - if (processors == NULL) { - cpuinfo_log_error("failed to allocate %zu bytes for /proc/cpuinfo data", - processors_capacity * sizeof(struct proc_cpuinfo)); - goto cleanup; - } } while (bytes_read != 0); - - uint32_t last_i = 0; - for (uint32_t i = processors_count; i != 0; i--) { - if ((processors[i - 1].valid_mask & PROC_CPUINFO_VALID_INFO) == PROC_CPUINFO_VALID_INFO) { - last_i = i; - break; - } - } - - if (last_i == 0) { - cpuinfo_log_error("none of the %"PRIu32" processors reported in /proc/cpuinfo were successfully parsed", - processors_count); - goto cleanup; - } - - for (uint32_t i = last_i - 1; i < processors_count; i++) { - processors[i] = processors[last_i - 1]; - } - for (uint32_t i = last_i; i != 0; i--) { - if ((processors[i - 1].valid_mask & PROC_CPUINFO_VALID_INFO) == PROC_CPUINFO_VALID_INFO) { - last_i = i; - } else { - processors[i - 1] = processors[last_i - 1]; - } - } - /* Commit */ - result = processors; - processors = NULL; - *processors_count_ptr = processors_count; + status = true; cleanup: - free(processors); - processors = NULL; if (file != -1) { close(file); file = -1; } - return result; + return status; } diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c index ba0f930..fa43f0c 100644 --- a/src/arm/linux/init.c +++ b/src/arm/linux/init.c @@ -6,6 +6,7 @@ #include <cpuinfo.h> #include <arm/linux/api.h> #include <arm/api.h> +#include <arm/midr.h> #include <linux/api.h> #include <api.h> #include <log.h> @@ -14,189 +15,552 @@ struct cpuinfo_arm_isa cpuinfo_isa = { 0 }; +static inline uint32_t min(uint32_t a, uint32_t b) { + return a < b ? a : b; +} + +static inline uint32_t max(uint32_t a, uint32_t b) { + return a > b ? a : b; +} + +static inline int cmp(uint32_t a, uint32_t b) { + return (a > b) - (a < b); +} + +static int cmp_x86_processor_by_apic_id(const void* ptr_a, const void* ptr_b) { + const struct cpuinfo_arm_linux_processor* processor_a = (const struct cpuinfo_arm_linux_processor*) ptr_a; + const struct cpuinfo_arm_linux_processor* processor_b = (const struct cpuinfo_arm_linux_processor*) ptr_b; + + /* Move usable processors towards the start of the array */ + const bool usable_a = (processor_a->flags & CPUINFO_LINUX_MASK_USABLE) == CPUINFO_LINUX_MASK_USABLE; + const bool usable_b = (processor_b->flags & CPUINFO_LINUX_MASK_USABLE) == CPUINFO_LINUX_MASK_USABLE; + if (usable_a != usable_b) { + return (int) usable_b - (int) usable_a; + } + + /* Compare based on core type (e.g. Cortex-A57 < Cortex-A53) */ + const uint32_t midr_a = processor_a->midr; + const uint32_t midr_b = processor_b->midr; + if (midr_a != midr_b) { + if (midr_is_big_core(midr_a) || midr_is_little_core(midr_b)) { + return -1; + } else if (midr_is_big_core(midr_b) || midr_is_little_core(midr_a)) { + return 1; + } + } + + /* Compare based on core frequency (e.g. 2.0 GHz < 1.2 GHz) */ + const uint32_t frequency_a = processor_a->max_frequency; + const uint32_t frequency_b = processor_b->max_frequency; + if (frequency_a != frequency_b) { + return frequency_a > frequency_b ? -1 : 1; + } + + /* Compare based on system processor id (i.e. processor 0 < processor 1) */ + const uint32_t id_a = processor_a->system_processor_id; + const uint32_t id_b = processor_b->system_processor_id; + return cmp(id_a, id_b); +} + void cpuinfo_arm_linux_init(void) { - uint32_t proc_cpuinfo_count = 0; + struct cpuinfo_arm_linux_processor* arm_linux_processors = NULL; struct cpuinfo_processor* processors = NULL; struct cpuinfo_cache* l1i = NULL; struct cpuinfo_cache* l1d = NULL; struct cpuinfo_cache* l2 = NULL; - uint32_t processors_count = 0; - uint32_t l1i_count = 0; - uint32_t l1d_count = 0; - uint32_t l2_count = 0; - struct proc_cpuinfo* proc_cpuinfo_entries = cpuinfo_arm_linux_parse_proc_cpuinfo(&proc_cpuinfo_count); + const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count(); + cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count); - if (proc_cpuinfo_count != 0) { - #if CPUINFO_ARCH_ARM - cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( - proc_cpuinfo_entries, proc_cpuinfo_count, &cpuinfo_isa); - #elif CPUINFO_ARCH_ARM64 - cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( - proc_cpuinfo_entries, &cpuinfo_isa); - #endif - processors_count = proc_cpuinfo_count; - - processors = calloc(processors_count, sizeof(struct cpuinfo_processor)); - if (processors == NULL) { - cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", - proc_cpuinfo_count * sizeof(struct cpuinfo_processor), proc_cpuinfo_count); - goto cleanup; - } - for (uint32_t i = 0; i < proc_cpuinfo_count; i++) { - cpuinfo_arm_decode_vendor_uarch( - proc_cpuinfo_entries[i].implementer, - proc_cpuinfo_entries[i].part, -#if CPUINFO_ARCH_ARM - !!(proc_cpuinfo_entries[i].features & PROC_CPUINFO_FEATURE_VFPV4), -#endif - &processors[i].vendor, &processors[i].uarch); - processors[i].topology = (struct cpuinfo_topology) { - .thread_id = 0, - .core_id = i, - .package_id = 0, - .linux_id = (int) i - }; + const uint32_t max_possible_processors_count = 1 + + cpuinfo_linux_get_max_possible_processor(max_processors_count); + cpuinfo_log_debug("maximum possible processors count: %"PRIu32, max_possible_processors_count); + const uint32_t max_present_processors_count = 1 + + cpuinfo_linux_get_max_possible_processor(max_processors_count); + cpuinfo_log_debug("maximum present processors count: %"PRIu32, max_present_processors_count); + + const uint32_t arm_linux_processors_count = min(max_possible_processors_count, max_present_processors_count); + arm_linux_processors = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_arm_linux_processor)); + if (arm_linux_processors == NULL) { + cpuinfo_log_error( + "failed to allocate %zu bytes for descriptions of %"PRIu32" ARM logical processors", + arm_linux_processors_count * sizeof(struct cpuinfo_arm_linux_processor), + arm_linux_processors_count); + return; + } + + cpuinfo_linux_detect_possible_processors( + arm_linux_processors_count, &arm_linux_processors->flags, + sizeof(struct cpuinfo_arm_linux_processor), + CPUINFO_LINUX_FLAG_POSSIBLE); + + cpuinfo_linux_detect_present_processors( + arm_linux_processors_count, &arm_linux_processors->flags, + sizeof(struct cpuinfo_arm_linux_processor), + CPUINFO_LINUX_FLAG_PRESENT); + + if (!cpuinfo_arm_linux_parse_proc_cpuinfo(arm_linux_processors_count, arm_linux_processors)) { + cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo"); + return; + } + + uint32_t usable_processors = 0; + uint32_t known_processors = 0; + uint32_t last_reported_processor = 0; + uint32_t last_reported_midr = 0; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + arm_linux_processors[i].system_processor_id = i; + if ((arm_linux_processors[i].flags & CPUINFO_LINUX_MASK_USABLE) == CPUINFO_LINUX_MASK_USABLE) { + arm_linux_processors[i].processor_id = usable_processors; + usable_processors += 1; + + if (arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR) { + last_reported_processor = i; + } else { + /* + * Processor is in possible and present lists, but not reported in /proc/cpuinfo. + * This is fairly common: high-index processors can be not reported if they are offline. + */ + cpuinfo_log_info("processor %"PRIu32" is not listed in /proc/cpuinfo", i); + } + + if ((arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_INFO) == CPUINFO_ARM_LINUX_VALID_INFO) { + last_reported_midr = i; + if (known_processors == 0) { + /* + * This is the first processor for which we have complete information. + * Use it to detect instruction set architecture. + * If there are several cores with different microarchitecture, we expect + * Linux kernel to report the same ISA extensions for each of them. + */ + #if CPUINFO_ARCH_ARM + cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( + &arm_linux_processors[i], &cpuinfo_isa); + #elif CPUINFO_ARCH_ARM64 + cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( + &arm_linux_processors[i], &cpuinfo_isa); + #endif + } + + known_processors += 1; + } + } else { + /* Processor reported in /proc/cpuinfo, but not in possible and/or present lists: log and ignore */ + if (!(arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR)) { + cpuinfo_log_warning("invalid processor %"PRIu32" reported in /proc/cpuinfo", i); + } + } + } + + /* Detect min/max frequency, core ID, and package ID */ + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if ((arm_linux_processors[i].flags & CPUINFO_LINUX_MASK_USABLE) == CPUINFO_LINUX_MASK_USABLE) { + const uint32_t max_frequency = cpuinfo_linux_get_processor_max_frequency(i); + if (max_frequency != 0) { + arm_linux_processors[i].max_frequency = max_frequency; + arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY; + } + + const uint32_t min_frequency = cpuinfo_linux_get_processor_min_frequency(i); + if (min_frequency != 0) { + arm_linux_processors[i].min_frequency = min_frequency; + arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY; + } + + if (cpuinfo_linux_get_processor_core_id(i, &arm_linux_processors[i].core_id)) { + arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_CORE_ID; + } + + if (cpuinfo_linux_get_processor_package_id(i, &arm_linux_processors[i].package_id)) { + arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_ID; + } } + } + + /* Initialize topology group IDs */ + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + arm_linux_processors[i].core_group_min = arm_linux_processors[i].core_group_max = i; + arm_linux_processors[i].package_group_min = arm_linux_processors[i].package_group_max = i; + } + /* Propagate topology group IDs among siblings */ + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if ((arm_linux_processors[i].flags & CPUINFO_LINUX_MASK_USABLE) != CPUINFO_LINUX_MASK_USABLE) { + continue; + } + + cpuinfo_linux_detect_core_siblings( + arm_linux_processors_count, + i, + &arm_linux_processors->flags, + &arm_linux_processors->package_id, + &arm_linux_processors->package_group_min, + &arm_linux_processors->package_group_max, + sizeof(struct cpuinfo_arm_linux_processor)); + cpuinfo_linux_detect_thread_siblings( + arm_linux_processors_count, + i, + &arm_linux_processors->flags, + &arm_linux_processors->core_id, + &arm_linux_processors->core_group_min, + &arm_linux_processors->core_group_max, + sizeof(struct cpuinfo_arm_linux_processor)); + } + + /* + * Topology information about some or all logical processors may be unavailable, for the following reasons: + * - Linux kernel is too old, or configured without support for topology information in sysfs. + * - Core is offline, and Linux kernel is configured to not report topology for offline cores. + * + * In these cases, we use a fall-back mechanism for topology detection, based on the assumption that equivalent + * cores belong to the same cluster: + * - Cores with the same min/max frequency and microarchitecture are assumed to belong to the same cluster. + * - If min or max frequency is not known for any of the cores, but microarchitecture for both cores is the same, + * and different from Cortex-A53, both cores are assumed to belong to the same cluster. Cortex-A53 is the only + * microarchitecture, which is simultaneously used in multiple clusters in the same SoCs, e.g. Qualcomm + * Snapdragon 615 combines 4 "big" Cortex-A53 cores + 4 "LITTLE" Cortex-A53 cores, and MediaTek Helio X20 + * combines 2 "max" Cortex-A72 cores + 4 "med" Cortex-A53 cores + 4 "min" Cortex-A53 cores. + * - If microarchitecture is not known, but min/max frequency are the same for two cores, assume both cores + * belong to the same cluster. + */ + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if ((arm_linux_processors[i].flags & CPUINFO_LINUX_MASK_USABLE) != CPUINFO_LINUX_MASK_USABLE) { + continue; + } + + if (arm_linux_processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_ID) { + continue; + } + + for (uint32_t j = 0; j < arm_linux_processors_count; j++) { + if (i == j) { + continue; + } + + if ((arm_linux_processors[j].flags & CPUINFO_LINUX_MASK_USABLE) != CPUINFO_LINUX_MASK_USABLE) { + /* Logical processor is not possible or not present */ + continue; + } + + if (arm_linux_processors[j].flags & CPUINFO_LINUX_FLAG_PACKAGE_ID) { + /* Cluster for this processor was already parsed from sysfs */ + continue; + } + + if (cpuinfo_arm_linux_processor_equals(&arm_linux_processors[i], &arm_linux_processors[j])) { + cpuinfo_log_info( + "processors %"PRIu32" and %"PRIu32" are assigned to the same cluster based on similarity", i, j); + + arm_linux_processors[i].package_group_min = arm_linux_processors[j].package_group_min = + min(arm_linux_processors[i].package_group_min, arm_linux_processors[j].package_group_min); + arm_linux_processors[i].package_group_max = arm_linux_processors[j].package_group_max = + max(arm_linux_processors[i].package_group_max, arm_linux_processors[j].package_group_max); + arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + arm_linux_processors[j].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + } + } + } + + /* + * It may happen that neither of sysfs topology information, min/max frequencies, or microarchitecture + * is known for some or all cores. This can happen for the following reasons: + * - Kernel is configured without support for sysfs cpufreq and topology information, and reports + * detailed information only for one of the cores listed in /proc/cpuinfo + * - Some of the cores are offline, and Linux kernel is configured to report information only about + * online cores. + * + * In this case, it is generally impossible to reconstruct topology information, and we use a heuristic: + * each core which wasn't assigned to any cluster yet, is assumed to belong to the same cluster as + * the preceeding core for which no sysfs information is available. + */ + uint32_t cluster_processor_id = 0; + bool last_processor_has_sysfs_topology = false; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if ((arm_linux_processors[i].flags & CPUINFO_LINUX_MASK_USABLE) != CPUINFO_LINUX_MASK_USABLE) { + continue; + } + + if (arm_linux_processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_ID) { + /* sysfs topology information is available for this processor */ + last_processor_has_sysfs_topology = true; + } else { + if (last_processor_has_sysfs_topology) { + /* + * Subsequent processors unassigned to any cluster will be added to the cluster of this + * processor. Note that if this processor itself is not assigned to any cluster, + * it will start a new cluster of processors. + */ + cluster_processor_id = i; + } + last_processor_has_sysfs_topology = false; + } + + if (!(arm_linux_processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) { + // TODO: check that processors are not the same + if (cluster_processor_id == i) { + cpuinfo_log_info("processor %"PRIu32" is assumed to belong to a new cluster", i); + } else { + cpuinfo_log_info("processor %"PRIu32" is assumed to belong to the cluster of processor %"PRIu32, + i, cluster_processor_id); + arm_linux_processors[i].package_group_min = arm_linux_processors[cluster_processor_id].package_group_min; + arm_linux_processors[cluster_processor_id].package_group_max = + arm_linux_processors[i].package_group_max = + max(i, arm_linux_processors[cluster_processor_id].package_group_max); + } + arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + } + } + + /* + * Run Shiloach-Vishkin (well, almost) connected components algorithm + */ + uint32_t update; + do { + update = 0; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if ((arm_linux_processors[i].flags & CPUINFO_LINUX_MASK_USABLE) != CPUINFO_LINUX_MASK_USABLE) { + continue; + } + + const uint32_t group_max_processor_id = arm_linux_processors[i].package_group_max; + const uint32_t group_min_processor_id = arm_linux_processors[i].package_group_min; + + const uint32_t group_max_processor_group_max = arm_linux_processors[group_max_processor_id].package_group_max; + const uint32_t group_max_processor_group_min = arm_linux_processors[group_max_processor_id].package_group_min; + const uint32_t group_min_processor_group_max = arm_linux_processors[group_min_processor_id].package_group_max; + const uint32_t group_min_processor_group_min = arm_linux_processors[group_min_processor_id].package_group_min; + + const uint32_t new_group_max_processor_id = max(group_max_processor_group_max, group_min_processor_group_max); + const uint32_t new_group_min_processor_id = min(group_min_processor_group_min, group_max_processor_group_min); + + arm_linux_processors[i].package_group_max = + arm_linux_processors[group_max_processor_id].package_group_max = + arm_linux_processors[group_min_processor_id].package_group_max = + new_group_max_processor_id; + arm_linux_processors[i].package_group_min = + arm_linux_processors[group_max_processor_id].package_group_min = + arm_linux_processors[group_min_processor_id].package_group_min = + new_group_min_processor_id; + + update |= (group_max_processor_id ^ new_group_max_processor_id) | (group_min_processor_id ^ new_group_min_processor_id) | + (group_max_processor_group_max ^ new_group_max_processor_id) | (group_max_processor_group_min ^ new_group_min_processor_id) | + (group_min_processor_group_max ^ new_group_max_processor_id) | (group_min_processor_group_min ^ new_group_min_processor_id); + } + } while (update != 0); + + uint32_t cluster_count = 0; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if ((arm_linux_processors[i].flags & CPUINFO_LINUX_MASK_USABLE) != CPUINFO_LINUX_MASK_USABLE) { + continue; + } + + if (arm_linux_processors[i].package_group_min == i) { + cluster_count += 1; + } + } + cpuinfo_log_debug("detected %"PRIu32" core clusters", cluster_count); + + /* + * Two relations between reported /proc/cpuinfo information, and cores is possible: + * - /proc/cpuinfo reports information for all or some of the cores below the corresponding + * "processor : <number>" lines. Information on offline cores may be missing. + * - /proc/cpuinfo reports information only once, after all "processor : <number>" lines. + * The reported information may relate to processor #0 or to the processor which + * executed the system calls to read /proc/cpuinfo. It is also indistinguishable + * from /proc/cpuinfo reporting information only for the last core (e.g. if all other + * cores are offline). + * + * We detect the second case by checking if /proc/cpuinfo contains valid MIDR only for one, + * last reported, processor. Note, that the last reported core may be not the last + * present+possible processor, as /proc/cpuinfo may not report high-index offline cores. + */ + if (usable_processors != 1 && known_processors == 1 && last_reported_processor == last_reported_midr && cluster_count > 1) { + cpuinfo_log_error("not sufficient per-cluster information"); + } else { /* - * Assumptions: - * - At most 2 cache levels - * - Either all or no cores have L1I/L1D/L2 cache. - * - If present, L1 cache is private to the core. - * - If present, L2 cache is shared between all cores. + * Propagate MIDR, vendor, and microarchitecture values along clusters in two passes: + * - Copy MIDR to min processor of a cluster, if it doesn't have this information + * - Copy max frequency to min processor of a clsuter, if it doesn't have this information + * - Detect vendor and microarchitecture + * - Copy MIDR, vendor, and microarchitecture to all processors of a cluster, overwriting + * current values for the processors in the group. */ - struct cpuinfo_cache private_l1i = { 0 }; - struct cpuinfo_cache private_l1d = { 0 }; - struct cpuinfo_cache shared_l2 = { 0 }; + + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if ((arm_linux_processors[i].flags & CPUINFO_LINUX_MASK_USABLE) != CPUINFO_LINUX_MASK_USABLE) { + continue; + } + + const uint32_t group_min_processor_id = arm_linux_processors[i].package_group_min; + if (i != group_min_processor_id) { + if ((arm_linux_processors[group_min_processor_id].flags & CPUINFO_ARM_LINUX_VALID_MIDR) != CPUINFO_ARM_LINUX_VALID_MIDR) { + arm_linux_processors[group_min_processor_id].midr = arm_linux_processors[i].midr; + arm_linux_processors[group_min_processor_id].flags |= CPUINFO_ARM_LINUX_VALID_MIDR; + } + } + } + + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + const uint32_t group_min_processor_id = arm_linux_processors[i].package_group_min; + if (i == group_min_processor_id) { + /* Decode vendor and uarch only once per cluster */ + cpuinfo_arm_decode_vendor_uarch( + arm_linux_processors[i].midr, +#if CPUINFO_ARCH_ARM + !!(arm_linux_processors[i].features & CPUINFO_ARM_LINUX_FEATURE_VFPV4), +#endif + &arm_linux_processors[i].vendor, + &arm_linux_processors[i].uarch); + } else { + arm_linux_processors[i].midr = arm_linux_processors[group_min_processor_id].midr; + arm_linux_processors[i].vendor = arm_linux_processors[group_min_processor_id].vendor; + arm_linux_processors[i].uarch = arm_linux_processors[group_min_processor_id].uarch; + } + } + + } + + /* + * At this point, we figured out the core clusters. Count the number of cores in each clusters: + * - In the first pass, for each logical processor increment the count in group-minimum processor. + * - In the second pass, copy the count from group-minimum processor. + */ + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if ((arm_linux_processors[i].flags & CPUINFO_LINUX_MASK_USABLE) != CPUINFO_LINUX_MASK_USABLE) { + continue; + } + + arm_linux_processors[arm_linux_processors[i].package_group_min].package_processor_count += 1; + } + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if ((arm_linux_processors[i].flags & CPUINFO_LINUX_MASK_USABLE) != CPUINFO_LINUX_MASK_USABLE) { + continue; + } + + arm_linux_processors[i].package_processor_count = + arm_linux_processors[arm_linux_processors[i].package_group_min].package_processor_count; + } + + qsort(arm_linux_processors, arm_linux_processors_count, + sizeof(struct cpuinfo_arm_linux_processor), cmp_x86_processor_by_apic_id); + + uint32_t system_to_sorted[arm_linux_processors_count]; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + system_to_sorted[arm_linux_processors[i].system_processor_id] = i; + } + + processors = calloc(usable_processors, sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + usable_processors * sizeof(struct cpuinfo_processor), usable_processors); + goto cleanup; + } + + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + processors[i].vendor = arm_linux_processors[i].vendor; + processors[i].uarch = arm_linux_processors[i].uarch; + processors[i].topology = (struct cpuinfo_topology) { + .thread_id = 0, + .core_id = arm_linux_processors[i].system_processor_id, + .package_id = 0, + .linux_id = (int) arm_linux_processors[i].system_processor_id, + }; + } + + /* + * Assumptions: + * - No SMP (i.e. each core supports only one hardware thread). + * - Level 1 instruction and data caches are private to the core clusters. + * - Level 2 cache is shared between cores in the same cluster. + */ + l1i = calloc(usable_processors, sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + usable_processors * sizeof(struct cpuinfo_cache), usable_processors); + goto cleanup; + } + + l1d = calloc(usable_processors, sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + usable_processors * sizeof(struct cpuinfo_cache), usable_processors); + goto cleanup; + } + + uint32_t l2_count = cluster_count; + l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + goto cleanup; + } + + /* Populate cache infromation structures in l1i, l1d, and l2 */ + struct cpuinfo_cache shared_l2; + uint32_t l2_index = 0; + for (uint32_t i = 0; i < usable_processors; i++) { cpuinfo_arm_decode_cache( - processors[0].uarch, - proc_cpuinfo_count, - proc_cpuinfo_entries[0].part, - proc_cpuinfo_entries[0].architecture.version, - &private_l1i, &private_l1d, &shared_l2); - if (private_l1i.size != 0) { - l1i_count = proc_cpuinfo_count; - } - if (private_l1d.size != 0) { - l1d_count = proc_cpuinfo_count; - if (shared_l2.size != 0) { - l2_count = 1; - } - } - - cpuinfo_log_info("detected %"PRIu32" L1I caches", l1i_count); - cpuinfo_log_info("detected %"PRIu32" L1D caches", l1d_count); - cpuinfo_log_info("detected %"PRIu32" L2 caches", l2_count); - - if (l1i_count != 0) { - l1i = malloc(l1i_count * sizeof(struct cpuinfo_cache)); - if (l1i == NULL) { - cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", - l1i_count * sizeof(struct cpuinfo_cache), l1i_count); - goto cleanup; - } - for (uint32_t i = 0; i < l1i_count; i++) { - /* L1I reported in /proc/cpuinfo overrides defaults */ - #if CPUINFO_ARCH_ARM - if ((proc_cpuinfo_entries[i].valid_mask & PROC_CPUINFO_VALID_ICACHE) == PROC_CPUINFO_VALID_ICACHE) { - l1i[i] = (struct cpuinfo_cache) { - .size = proc_cpuinfo_entries[i].cache.i_size, - .associativity = proc_cpuinfo_entries[i].cache.i_assoc, - .sets = proc_cpuinfo_entries[i].cache.i_sets, - .partitions = 1, - .line_size = proc_cpuinfo_entries[i].cache.i_line_length - }; - } else { - cpuinfo_arm_decode_cache( - processors[i].uarch, - proc_cpuinfo_count, - proc_cpuinfo_entries[i].part, - proc_cpuinfo_entries[i].architecture.version, - &l1i[i], &private_l1d, &shared_l2); - } - #elif CPUINFO_ARCH_ARM64 - cpuinfo_arm_decode_cache( - processors[i].uarch, - proc_cpuinfo_count, - proc_cpuinfo_entries[i].part, - proc_cpuinfo_entries[i].architecture.version, - &l1i[i], &private_l1d, &shared_l2); - #endif - l1i[i].thread_start = i; - l1i[i].thread_count = 1; - } - } - if (l1d_count != 0) { - l1d = malloc(l1d_count * sizeof(struct cpuinfo_cache)); - if (l1d == NULL) { - cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", - l1d_count * sizeof(struct cpuinfo_cache), l1d_count); - goto cleanup; - } - for (uint32_t i = 0; i < l1d_count; i++) { - #if CPUINFO_ARCH_ARM - /* L1D reported in /proc/cpuinfo overrides defaults */ - if ((proc_cpuinfo_entries[i].valid_mask & PROC_CPUINFO_VALID_DCACHE) == PROC_CPUINFO_VALID_DCACHE) { - l1d[i] = (struct cpuinfo_cache) { - .size = proc_cpuinfo_entries[i].cache.d_size, - .associativity = proc_cpuinfo_entries[i].cache.d_assoc, - .sets = proc_cpuinfo_entries[i].cache.d_sets, - .partitions = 1, - .line_size = proc_cpuinfo_entries[i].cache.d_line_length - }; - } else { - cpuinfo_arm_decode_cache( - processors[i].uarch, - proc_cpuinfo_count, - proc_cpuinfo_entries[i].part, - proc_cpuinfo_entries[i].architecture.version, - &private_l1i, &l1d[i], &shared_l2); - } - #elif CPUINFO_ARCH_ARM64 - cpuinfo_arm_decode_cache( - processors[i].uarch, - proc_cpuinfo_count, - proc_cpuinfo_entries[i].part, - proc_cpuinfo_entries[i].architecture.version, - &private_l1i, &l1d[i], &shared_l2); - #endif - l1d[i].thread_start = i; - l1d[i].thread_count = 1; - } - } - if (l2_count != 0) { - l2 = malloc(l2_count * sizeof(struct cpuinfo_cache)); - if (l2 == NULL) { - cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", - l2_count * sizeof(struct cpuinfo_cache), l2_count); - goto cleanup; - } - /* L2 cache is never reported in /proc/cpuinfo; use defaults */ - *l2 = shared_l2; - l2->thread_start = 0; - l2->thread_count = proc_cpuinfo_count; + processors[i].uarch, + arm_linux_processors[i].package_processor_count, + arm_linux_processors[i].midr, + arm_linux_processors[i].architecture_version, + &l1i[i], &l1d[i], &shared_l2); + l1i[i].thread_start = l1d[i].thread_start = i; + l1i[i].thread_count = l1d[i].thread_count = 1; + #if CPUINFO_ARCH_ARM + /* L1I reported in /proc/cpuinfo overrides defaults */ + if ((arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_ICACHE) == CPUINFO_ARM_LINUX_VALID_ICACHE) { + l1i[i] = (struct cpuinfo_cache) { + .size = arm_linux_processors[i].proc_cpuinfo_cache.i_size, + .associativity = arm_linux_processors[i].proc_cpuinfo_cache.i_assoc, + .sets = arm_linux_processors[i].proc_cpuinfo_cache.i_sets, + .partitions = 1, + .line_size = arm_linux_processors[i].proc_cpuinfo_cache.i_line_length + }; + } + /* L1D reported in /proc/cpuinfo overrides defaults */ + if ((arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_ICACHE) == CPUINFO_ARM_LINUX_VALID_ICACHE) { + l1d[i] = (struct cpuinfo_cache) { + .size = arm_linux_processors[i].proc_cpuinfo_cache.d_size, + .associativity = arm_linux_processors[i].proc_cpuinfo_cache.d_assoc, + .sets = arm_linux_processors[i].proc_cpuinfo_cache.d_sets, + .partitions = 1, + .line_size = arm_linux_processors[i].proc_cpuinfo_cache.d_line_length + }; + } + #endif + if (arm_linux_processors[i].package_group_min == arm_linux_processors[i].system_processor_id) { + shared_l2.thread_start = i; + shared_l2.thread_count = arm_linux_processors[i].package_processor_count; + l2[l2_index++] = shared_l2; } } + if (cluster_count == 1 && l2[0].size == 0) { + /* CPU without L2 cache */ + free(l2); + l2 = NULL; + l2_count = 0; + } + /* Commit */ cpuinfo_processors = processors; cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; cpuinfo_cache[cpuinfo_cache_level_2] = l2; - cpuinfo_processors_count = processors_count; - cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count; - cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count; + cpuinfo_processors_count = usable_processors; + cpuinfo_cache_count[cpuinfo_cache_level_1i] = usable_processors; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = usable_processors; cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; processors = NULL; l1i = l1d = l2 = NULL; cleanup: + free(arm_linux_processors); free(processors); free(l1i); free(l1d); free(l2); - free(proc_cpuinfo_entries); } diff --git a/src/arm/midr.h b/src/arm/midr.h new file mode 100644 index 0000000..fb756e6 --- /dev/null +++ b/src/arm/midr.h @@ -0,0 +1,209 @@ +#pragma once +#include <stdint.h> + + +#define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK UINT32_C(0xFF000000) +#define CPUINFO_ARM_MIDR_VARIANT_MASK UINT32_C(0x00F00000) +#define CPUINFO_ARM_MIDR_ARCHITECTURE_MASK UINT32_C(0x000F0000) +#define CPUINFO_ARM_MIDR_PART_MASK UINT32_C(0x0000FFF0) +#define CPUINFO_ARM_MIDR_REVISION_MASK UINT32_C(0x0000000F) + +#define CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET 24 +#define CPUINFO_ARM_MIDR_VARIANT_OFFSET 20 +#define CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET 16 +#define CPUINFO_ARM_MIDR_PART_OFFSET 4 +#define CPUINFO_ARM_MIDR_REVISION_OFFSET 0 + +#define CPUINFO_ARM_MIDR_ARM1156 UINT32_C(0x410FB560) +#define CPUINFO_ARM_MIDR_CORTEX_A7 UINT32_C(0x410FC070) +#define CPUINFO_ARM_MIDR_CORTEX_A9 UINT32_C(0x410FC090) +#define CPUINFO_ARM_MIDR_CORTEX_A15 UINT32_C(0x410FC0F0) +#define CPUINFO_ARM_MIDR_CORTEX_A17 UINT32_C(0x410FC0E0) +#define CPUINFO_ARM_MIDR_CORTEX_A35 UINT32_C(0x410FD040) +#define CPUINFO_ARM_MIDR_CORTEX_A53 UINT32_C(0x410FD030) +#define CPUINFO_ARM_MIDR_CORTEX_A55 UINT32_C(0x410FD050) +#define CPUINFO_ARM_MIDR_CORTEX_A57 UINT32_C(0x410FD070) +#define CPUINFO_ARM_MIDR_CORTEX_A72 UINT32_C(0x410FD080) +#define CPUINFO_ARM_MIDR_CORTEX_A73 UINT32_C(0x410FD090) +#define CPUINFO_ARM_MIDR_CORTEX_A75 UINT32_C(0x410FD0A0) +#define CPUINFO_ARM_MIDR_KRYO280_GOLD UINT32_C(0x510F8010) +#define CPUINFO_ARM_MIDR_KRYO280_SILVER UINT32_C(0x510F8000) +#define CPUINFO_ARM_MIDR_KRYO_SILVER_821 UINT32_C(0x510F2010) +#define CPUINFO_ARM_MIDR_KRYO_GOLD UINT32_C(0x510F2050) +#define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110) +#define CPUINFO_ARM_MIDR_MONGOOSE UINT32_C(0x530F0010) +#define CPUINFO_ARM_MIDR_DENVER2 UINT32_C(0x4E0F0030) + +inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) { + return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | + ((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK); +} + +inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) { + return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) | + ((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK); +} + +inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) { + return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) | + ((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK); +} + +inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) { + return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | + ((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK); +} + +inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) { + return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) | + ((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK); +} + +inline static uint32_t midr_get_implementer(uint32_t midr) { + return (midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) >> CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET; +} + +inline static uint32_t midr_get_part(uint32_t midr) { + return (midr & CPUINFO_ARM_MIDR_PART_MASK) >> CPUINFO_ARM_MIDR_PART_OFFSET; +} + +inline static bool midr_is_arm1156(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_ARM1156 & uarch_mask); +} + +inline static bool midr_is_arm11(uint32_t midr) { + return (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | 0x0000F000)) == UINT32_C(0x4100B000); +} + +inline static bool midr_is_cortex_a9(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_CORTEX_A9 & uarch_mask); +} + +inline static bool midr_is_krait(uint32_t midr) { + switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x510004D0): + case UINT32_C(0x510006F0): + return true; + default: + return false; + } +} + +inline static bool midr_is_cortex_a53(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_CORTEX_A53 & uarch_mask); +} + +inline static bool midr_is_kryo280_silver(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO280_SILVER & uarch_mask); +} + +inline static bool midr_is_kryo280_gold(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO280_GOLD & uarch_mask); +} + +inline static bool midr_is_kryo_silver(uint32_t midr) { + const uint32_t uarch_mask = + CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK; + switch (midr & uarch_mask) { + case CPUINFO_ARM_MIDR_KRYO_SILVER_820: + case CPUINFO_ARM_MIDR_KRYO_SILVER_821: + return true; + default: + return false; + } +} + +inline static bool midr_is_kryo_gold(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO_GOLD & uarch_mask); +} + +inline static bool midr_is_big_core(uint32_t midr) { + const uint32_t core_mask = + CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK; + switch (midr & core_mask) { + case CPUINFO_ARM_MIDR_CORTEX_A75: + case CPUINFO_ARM_MIDR_CORTEX_A73: + case CPUINFO_ARM_MIDR_CORTEX_A72: + case CPUINFO_ARM_MIDR_CORTEX_A57: + case CPUINFO_ARM_MIDR_CORTEX_A17: + case CPUINFO_ARM_MIDR_CORTEX_A15: + case CPUINFO_ARM_MIDR_KRYO280_GOLD: + case CPUINFO_ARM_MIDR_KRYO_GOLD: + case CPUINFO_ARM_MIDR_MONGOOSE: + case CPUINFO_ARM_MIDR_DENVER2: + return true; + default: + return false; + } +} + +inline static bool midr_is_little_core(uint32_t midr) { + const uint32_t core_mask = + CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK; + switch (midr & core_mask) { + case CPUINFO_ARM_MIDR_CORTEX_A55: + case CPUINFO_ARM_MIDR_CORTEX_A53: + case CPUINFO_ARM_MIDR_CORTEX_A35: + case CPUINFO_ARM_MIDR_CORTEX_A7: + case CPUINFO_ARM_MIDR_KRYO280_SILVER: + case CPUINFO_ARM_MIDR_KRYO_SILVER_820: + case CPUINFO_ARM_MIDR_KRYO_SILVER_821: + return true; + default: + return false; + } +} + +inline static uint32_t midr_little_core_for_big(uint32_t midr) { + const uint32_t core_mask = + CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK; + switch (midr & core_mask) { + case CPUINFO_ARM_MIDR_CORTEX_A75: + return CPUINFO_ARM_MIDR_CORTEX_A55; + case CPUINFO_ARM_MIDR_CORTEX_A73: + case CPUINFO_ARM_MIDR_CORTEX_A72: + case CPUINFO_ARM_MIDR_CORTEX_A57: + case CPUINFO_ARM_MIDR_MONGOOSE: + return CPUINFO_ARM_MIDR_CORTEX_A53; + case CPUINFO_ARM_MIDR_CORTEX_A17: + case CPUINFO_ARM_MIDR_CORTEX_A15: + return CPUINFO_ARM_MIDR_CORTEX_A7; + case CPUINFO_ARM_MIDR_KRYO280_GOLD: + return CPUINFO_ARM_MIDR_KRYO280_SILVER; + case CPUINFO_ARM_MIDR_KRYO_GOLD: + return CPUINFO_ARM_MIDR_KRYO_SILVER_820; + case CPUINFO_ARM_MIDR_DENVER2: + return CPUINFO_ARM_MIDR_CORTEX_A57; + default: + return midr; + } +} + +inline static uint32_t midr_big_core_for_little(uint32_t midr) { + const uint32_t core_mask = + CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK; + switch (midr & core_mask) { + case CPUINFO_ARM_MIDR_CORTEX_A57: + return CPUINFO_ARM_MIDR_DENVER2; + case CPUINFO_ARM_MIDR_CORTEX_A55: + return CPUINFO_ARM_MIDR_CORTEX_A75; + case CPUINFO_ARM_MIDR_CORTEX_A53: + case CPUINFO_ARM_MIDR_CORTEX_A35: + return CPUINFO_ARM_MIDR_CORTEX_A57; + case CPUINFO_ARM_MIDR_CORTEX_A7: + return CPUINFO_ARM_MIDR_CORTEX_A15; + case CPUINFO_ARM_MIDR_KRYO280_SILVER: + return CPUINFO_ARM_MIDR_KRYO280_GOLD; + case CPUINFO_ARM_MIDR_KRYO_SILVER_820: + case CPUINFO_ARM_MIDR_KRYO_SILVER_821: + return CPUINFO_ARM_MIDR_KRYO_GOLD; + default: + return midr; + } +} diff --git a/src/arm/uarch.c b/src/arm/uarch.c index e565f1f..28dbece 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -1,22 +1,22 @@ #include <stdint.h> #include <arm/api.h> +#include <arm/midr.h> #include <log.h> void cpuinfo_arm_decode_vendor_uarch( - uint32_t cpu_implementer, - uint32_t cpu_part, + uint32_t midr, #if CPUINFO_ARCH_ARM bool has_vfpv4, #endif /* CPUINFO_ARCH_ARM */ enum cpuinfo_vendor vendor[restrict static 1], enum cpuinfo_uarch uarch[restrict static 1]) { - switch (cpu_implementer) { + switch (midr_get_implementer(midr)) { case 'A': *vendor = cpuinfo_vendor_arm; - switch (cpu_part) { + switch (midr_get_part(midr)) { #if CPUINFO_ARCH_ARM case 0xC05: *uarch = cpuinfo_uarch_cortex_a5; @@ -49,6 +49,9 @@ void cpuinfo_arm_decode_vendor_uarch( case 0xD04: *uarch = cpuinfo_uarch_cortex_a35; break; + case 0xD05: + *uarch = cpuinfo_uarch_cortex_a55; + break; case 0xD07: *uarch = cpuinfo_uarch_cortex_a57; break; @@ -58,8 +61,12 @@ void cpuinfo_arm_decode_vendor_uarch( case 0xD09: *uarch = cpuinfo_uarch_cortex_a73; break; + case 0xD0A: + *uarch = cpuinfo_uarch_cortex_a75; + break; default: - switch (cpu_part >> 8) { + switch (midr_get_part(midr) >> 8) { +#if CPUINFO_ARCH_ARM case 7: *uarch = cpuinfo_uarch_arm7; break; @@ -69,38 +76,39 @@ void cpuinfo_arm_decode_vendor_uarch( case 11: *uarch = cpuinfo_uarch_arm11; break; +#endif /* CPUINFO_ARCH_ARM */ default: - cpuinfo_log_warning("unknown ARM CPU part 0x%03"PRIx32" ignored", cpu_part); + cpuinfo_log_warning("unknown ARM CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } } break; case 'C': *vendor = cpuinfo_vendor_cavium; - switch (cpu_part) { + switch (midr_get_part(midr)) { case 0x0A1: *uarch = cpuinfo_uarch_thunderx; break; default: - cpuinfo_log_warning("unknown Cavium CPU part 0x%03"PRIx32" ignored", cpu_part); + cpuinfo_log_warning("unknown Cavium CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } break; #if CPUINFO_ARCH_ARM case 'i': *vendor = cpuinfo_vendor_intel; - switch (cpu_part >> 8) { + switch (midr_get_part(midr_get_part(midr)) >> 8) { case 2: /* PXA 210/25X/26X */ case 4: /* PXA 27X */ case 6: /* PXA 3XX */ *uarch = cpuinfo_uarch_xscale; break; default: - cpuinfo_log_warning("unknown Intel CPU part 0x%03"PRIx32" ignored", cpu_part); + cpuinfo_log_warning("unknown Intel CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } break; #endif /* CPUINFO_ARCH_ARM */ case 'N': *vendor = cpuinfo_vendor_nvidia; - switch (cpu_part) { + switch (midr_get_part(midr)) { case 0x000: *uarch = cpuinfo_uarch_denver; break; @@ -113,12 +121,12 @@ void cpuinfo_arm_decode_vendor_uarch( *uarch = cpuinfo_uarch_denver; break; default: - cpuinfo_log_warning("unknown nVidia CPU part 0x%03"PRIx32" ignored", cpu_part); + cpuinfo_log_warning("unknown nVidia CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } break; case 'Q': *vendor = cpuinfo_vendor_qualcomm; - switch (cpu_part) { + switch (midr_get_part(midr)) { #if CPUINFO_ARCH_ARM case 0x00F: /* Mostly Scorpions, but some Cortex A5 may report this value as well */ @@ -138,40 +146,41 @@ void cpuinfo_arm_decode_vendor_uarch( *uarch = cpuinfo_uarch_krait; break; #endif /* CPUINFO_ARCH_ARM */ - case 0x205: /* Low-power Kryo "Silver" */ - case 0x211: /* High-performance Kryo "Gold" */ + case 0x201: /* Qualcomm Snapdragon 821: Low-power Kryo "Silver" */ + case 0x205: /* Qualcomm Snapdragon 820 & 821: High-performance Kryo "Gold" */ + case 0x211: /* Qualcomm Snapdragon 820: Low-power Kryo "Silver" */ *uarch = cpuinfo_uarch_kryo; break; - case 0x800: /* Low-power Kryo 280 -> Cortex-A53 */ + case 0x800: /* Low-power Kryo 280 "Silver" -> Cortex-A53 */ *vendor = cpuinfo_vendor_arm; *uarch = cpuinfo_uarch_cortex_a53; break; - case 0x801: /* High-performance Kryo 280 -> Cortex-A73 */ + case 0x801: /* High-performance Kryo 280 "Gold" -> Cortex-A73 */ *vendor = cpuinfo_vendor_arm; *uarch = cpuinfo_uarch_cortex_a73; break; default: - cpuinfo_log_warning("unknown Qualcomm CPU part 0x%03"PRIx32" ignored", cpu_part); + cpuinfo_log_warning("unknown Qualcomm CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } break; case 'S': *vendor = cpuinfo_vendor_samsung; - switch (cpu_part) { + switch (midr_get_part(midr)) { case 0x001: *uarch = cpuinfo_uarch_mongoose; break; default: - cpuinfo_log_warning("unknown Samsung CPU part 0x%03"PRIx32" ignored", cpu_part); + cpuinfo_log_warning("unknown Samsung CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } break; #if CPUINFO_ARCH_ARM case 'V': *vendor = cpuinfo_vendor_marvell; - cpuinfo_log_warning("unknown Marvell CPU part 0x%03"PRIx32" ignored", cpu_part); + cpuinfo_log_warning("unknown Marvell CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); break; #endif /* CPUINFO_ARCH_ARM */ default: cpuinfo_log_warning("unknown CPU implementer '%c' (0x%02"PRIx32") with CPU part 0x%03"PRIx32" ignored", - (char) cpu_implementer, cpu_implementer, cpu_part); + (char) midr_get_implementer(midr), midr_get_implementer(midr), midr_get_part(midr)); } } diff --git a/src/linux/api.h b/src/linux/api.h index b7ba198..221294e 100644 --- a/src/linux/api.h +++ b/src/linux/api.h @@ -1,5 +1,51 @@ #pragma once -#include <sched.h> +#include <stdbool.h> -bool cpuinfo_linux_parse_cpuset(const char* filename, cpu_set_t* cpuset); + +#define CPUINFO_LINUX_FLAG_PRESENT UINT32_C(0x00000001) +#define CPUINFO_LINUX_FLAG_POSSIBLE UINT32_C(0x00000002) +#define CPUINFO_LINUX_MASK_USABLE UINT32_C(0x00000003) +#define CPUINFO_LINUX_FLAG_ONLINE UINT32_C(0x00000004) +#define CPUINFO_LINUX_FLAG_MAX_FREQUENCY UINT32_C(0x00000010) +#define CPUINFO_LINUX_FLAG_MIN_FREQUENCY UINT32_C(0x00000020) +#define CPUINFO_LINUX_FLAG_CORE_ID UINT32_C(0x00000100) +#define CPUINFO_LINUX_FLAG_CORE_CLUSTER UINT32_C(0x00000200) +#define CPUINFO_LINUX_FLAG_PACKAGE_ID UINT32_C(0x00000400) +#define CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER UINT32_C(0x00000800) + + +typedef bool (*cpuinfo_cpulist_callback)(uint32_t, uint32_t, void*); +bool cpuinfo_linux_parse_cpulist(const char* filename, cpuinfo_cpulist_callback callback, void* context); +typedef bool (*cpuinfo_smallfile_callback)(const char*, const char*, void*); +bool cpuinfo_linux_parse_small_file(const char* filename, size_t buffer_size, cpuinfo_smallfile_callback, void* context); + +uint32_t cpuinfo_linux_get_max_processors_count(void); +uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count); +uint32_t cpuinfo_linux_get_max_present_processor(uint32_t max_processors_count); +uint32_t cpuinfo_linux_get_processor_min_frequency(uint32_t processor); +uint32_t cpuinfo_linux_get_processor_max_frequency(uint32_t processor); +bool cpuinfo_linux_get_processor_package_id(uint32_t processor, uint32_t package_id[restrict static 1]); +bool cpuinfo_linux_get_processor_core_id(uint32_t processor, uint32_t core_id[restrict static 1]); + +bool cpuinfo_linux_detect_possible_processors(uint32_t max_processors_count, + uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t possible_flag); +bool cpuinfo_linux_detect_present_processors(uint32_t max_processors_count, + uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t present_flag); + +bool cpuinfo_linux_detect_core_siblings( + uint32_t max_processors_count, + uint32_t processor, + uint32_t* processor0_flags, + uint32_t* processor0_package_id, + uint32_t* processor0_package_group_min, + uint32_t* processor0_package_group_max, + uint32_t processor_struct_size); +bool cpuinfo_linux_detect_thread_siblings( + uint32_t max_processors_count, + uint32_t processor, + uint32_t* processor0_flags, + uint32_t* processor0_core_id, + uint32_t* processor0_core_group_min, + uint32_t* processor0_core_group_max, + uint32_t processor_struct_size); diff --git a/src/linux/cpuset.c b/src/linux/cpulist.c index e5379bd..9b9543d 100644 --- a/src/linux/cpuset.c +++ b/src/linux/cpulist.c @@ -50,7 +50,7 @@ inline static const char* parse_number(const char* string, const char* end, uint return end; } -inline static bool parse_entry(const char* entry_start, const char* entry_end, cpu_set_t* cpuset) { +inline static bool parse_entry(const char* entry_start, const char* entry_end, cpuinfo_cpulist_callback callback, void* context) { /* Skip whitespace at the beginning of an entry */ for (; entry_start != entry_end; entry_start++) { if (!is_whitespace(*entry_start)) { @@ -70,7 +70,9 @@ inline static bool parse_entry(const char* entry_start, const char* entry_end, c return false; } - cpuinfo_log_debug("parse cpu list entry \"%.*s\" (%zu chars)", (int) entry_length, entry_start, entry_length); + #if CPUINFO_LOG_DEBUG_PARSERS + cpuinfo_log_debug("parse cpu list entry \"%.*s\" (%zu chars)", (int) entry_length, entry_start, entry_length); + #endif uint32_t first_cpu, last_cpu; const char* number_end = parse_number(entry_start, entry_end, &first_cpu); @@ -81,8 +83,11 @@ inline static bool parse_entry(const char* entry_start, const char* entry_end, c return false; } else if (number_end == entry_end) { /* Completely parsed the entry */ - CPU_SET((int) first_cpu, cpuset); - return true; + #if CPUINFO_LOG_DEBUG_PARSERS + cpuinfo_log_debug("cpulist: call callback with list_start = %"PRIu32", list_end = %"PRIu32, + first_cpu, first_cpu + 1); + #endif + return callback(first_cpu, first_cpu + 1, context); } /* Parse the second part of the entry */ @@ -114,18 +119,20 @@ inline static bool parse_entry(const char* entry_start, const char* entry_end, c } /* Parsed both parts of the entry; update CPU set */ - for (uint32_t i = first_cpu; i <= last_cpu; i++) { - CPU_SET((int) i, cpuset); - } - return true; + #if CPUINFO_LOG_DEBUG_PARSERS + cpuinfo_log_debug("cpulist: call callback with list_start = %"PRIu32", list_end = %"PRIu32, + first_cpu, last_cpu + 1); + #endif + return callback(first_cpu, last_cpu + 1, context); } -bool cpuinfo_linux_parse_cpuset(const char* filename, cpu_set_t* cpuset) { +bool cpuinfo_linux_parse_cpulist(const char* filename, cpuinfo_cpulist_callback callback, void* context) { bool status = true; int file = -1; char buffer[BUFFER_SIZE]; - CPU_ZERO(cpuset); - cpuinfo_log_debug("parsing cpu list from file %s", filename); + #if CPUINFO_LOG_DEBUG_PARSERS + cpuinfo_log_debug("parsing cpu list from file %s", filename); + #endif file = open(filename, O_RDONLY); if (file == -1) { @@ -153,7 +160,7 @@ bool cpuinfo_linux_parse_cpuset(const char* filename, cpu_set_t* cpuset) { if (bytes_read == 0) { /* No more data in the file: process the remaining text in the buffer as a single entry */ const char* entry_end = data_end; - const bool entry_status = parse_entry(entry_start, entry_end, cpuset); + const bool entry_status = parse_entry(entry_start, entry_end, callback, context); status &= entry_status; } else { const char* entry_end; @@ -170,7 +177,7 @@ bool cpuinfo_linux_parse_cpuset(const char* filename, cpu_set_t* cpuset) { * Otherwise, there may be more data at the end; read the file once again. */ if (entry_end != data_end) { - const bool entry_status = parse_entry(entry_start, entry_end, cpuset); + const bool entry_status = parse_entry(entry_start, entry_end, callback, context); status &= entry_status; entry_start = entry_end + 1; } diff --git a/src/linux/processors.c b/src/linux/processors.c new file mode 100644 index 0000000..1234657 --- /dev/null +++ b/src/linux/processors.c @@ -0,0 +1,484 @@ +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#if !defined(__ANDROID__) + /* + * sched.h is only used for CPU_SETSIZE constant. + * Android NDK headers before platform 21 do have this constant in sched.h + */ + #include <sched.h> +#endif + +#include <linux/api.h> +#include <log.h> + + +#define STRINGIFY(token) #token + +#define KERNEL_MAX_FILENAME "/sys/devices/system/cpu/kernel_max" +#define KERNEL_MAX_FILESIZE 32 +#define FREQUENCY_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/cpufreq/cpuinfo_max_freq")) +#define MAX_FREQUENCY_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cpufreq/cpuinfo_max_freq" +#define MIN_FREQUENCY_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cpufreq/cpuinfo_min_freq" +#define FREQUENCY_FILESIZE 32 +#define PACKAGE_ID_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/physical_package_id")) +#define PACKAGE_ID_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/physical_package_id" +#define PACKAGE_ID_FILESIZE 32 +#define CORE_ID_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/core_id")) +#define CORE_ID_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_id" +#define CORE_ID_FILESIZE 32 + +#define CORE_SIBLINGS_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/core_siblings_list")) +#define CORE_SIBLINGS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_siblings_list" +#define THREAD_SIBLINGS_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/thread_siblings_list")) +#define THREAD_SIBLINGS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/thread_siblings_list" + +#define POSSIBLE_CPULIST_FILENAME "/sys/devices/system/cpu/possible" +#define PRESENT_CPULIST_FILENAME "/sys/devices/system/cpu/present" + + +inline static const char* parse_number(const char* string, const char* end, uint32_t number_ptr[restrict static 1]) { + uint32_t number = 0; + while (string != end) { + const uint32_t digit = (uint32_t) (*string) - (uint32_t) '0'; + if (digit >= 10) { + break; + } + number = number * UINT32_C(10) + digit; + string += 1; + } + *number_ptr = number; + return end; +} + +/* Locale-independent */ +inline static bool is_whitespace(char c) { + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + return true; + default: + return false; + } +} + +#if defined(__ANDROID__) && !defined(CPU_SETSIZE) + /* + * Android NDK headers before platform 21 do not define CPU_SETSIZE, + * so we hard-code its value, as defined in platform 21 headers + */ + #if defined(__LP64__) + static const uint32_t default_max_processors_count = 1024; + #else + static const uint32_t default_max_processors_count = 32; + #endif +#else + static const uint32_t default_max_processors_count = CPU_SETSIZE; +#endif + +static bool uint32_parser(const char* text_start, const char* text_end, void* context) { + if (text_start == text_end) { + cpuinfo_log_error("failed to parse file %s: file is empty", KERNEL_MAX_FILENAME); + return false; + } + + uint32_t kernel_max = 0; + const char* parsed_end = parse_number(text_start, text_end, &kernel_max); + if (parsed_end == text_start) { + cpuinfo_log_error("failed to parse file %s: \"%*.s\" is not an unsigned number", + KERNEL_MAX_FILENAME, (int) (text_end - text_start), text_start); + return false; + } else { + for (const char* char_ptr = parsed_end; char_ptr != text_end; char_ptr++) { + if (!is_whitespace(*char_ptr)) { + cpuinfo_log_warning("non-whitespace characters \"%*.s\" following number in file %s are ignored", + (int) (text_end - char_ptr), char_ptr, KERNEL_MAX_FILENAME); + break; + } + } + } + + uint32_t* kernel_max_ptr = (uint32_t*) context; + *kernel_max_ptr = kernel_max; + return true; +} + +uint32_t cpuinfo_linux_get_max_processors_count(void) { + uint32_t kernel_max; + if (cpuinfo_linux_parse_small_file(KERNEL_MAX_FILENAME, KERNEL_MAX_FILESIZE, uint32_parser, &kernel_max)) { + cpuinfo_log_debug("parsed kernel_max value of %"PRIu32" from %s", kernel_max, KERNEL_MAX_FILENAME); + + if (kernel_max >= default_max_processors_count) { + cpuinfo_log_warning("kernel_max value of %"PRIu32" parsed from %s exceeds platform-default limit %"PRIu32, + kernel_max, KERNEL_MAX_FILENAME, default_max_processors_count - 1); + } + + return kernel_max + 1; + } else { + cpuinfo_log_warning("using platform-default max processors count = %"PRIu32, default_max_processors_count); + return default_max_processors_count; + } +} + +uint32_t cpuinfo_linux_get_processor_max_frequency(uint32_t processor) { + char max_frequency_filename[FREQUENCY_FILENAME_SIZE]; + const int chars_formatted = snprintf( + max_frequency_filename, FREQUENCY_FILENAME_SIZE, MAX_FREQUENCY_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= FREQUENCY_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for max frequency of processor %"PRIu32, processor); + return 0; + } + + uint32_t max_frequency; + if (cpuinfo_linux_parse_small_file(max_frequency_filename, FREQUENCY_FILESIZE, uint32_parser, &max_frequency)) { + cpuinfo_log_debug("parsed max frequency value of %"PRIu32" KHz for logical processor %"PRIu32" from %s", + max_frequency, processor, max_frequency_filename); + return max_frequency; + } else { + cpuinfo_log_warning("failed to parse max frequency for processor %"PRIu32" from %s", + processor, max_frequency_filename); + return 0; + } +} + +uint32_t cpuinfo_linux_get_processor_min_frequency(uint32_t processor) { + char min_frequency_filename[FREQUENCY_FILENAME_SIZE]; + const int chars_formatted = snprintf( + min_frequency_filename, FREQUENCY_FILENAME_SIZE, MIN_FREQUENCY_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= FREQUENCY_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for min frequency of processor %"PRIu32, processor); + return 0; + } + + uint32_t min_frequency; + if (cpuinfo_linux_parse_small_file(min_frequency_filename, FREQUENCY_FILESIZE, uint32_parser, &min_frequency)) { + cpuinfo_log_debug("parsed min frequency value of %"PRIu32" KHz for logical processor %"PRIu32" from %s", + min_frequency, processor, min_frequency_filename); + return min_frequency; + } else { + /* + * This error is less severe than parsing max frequency, because min frequency is only useful for clustering, + * while max frequency is also needed for peak FLOPS calculation. + */ + cpuinfo_log_info("failed to parse min frequency for processor %"PRIu32" from %s", + processor, min_frequency_filename); + return 0; + } +} + +bool cpuinfo_linux_get_processor_core_id(uint32_t processor, uint32_t core_id_ptr[restrict static 1]) { + char core_id_filename[PACKAGE_ID_FILENAME_SIZE]; + const int chars_formatted = snprintf( + core_id_filename, CORE_ID_FILENAME_SIZE, CORE_ID_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= CORE_ID_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for core id of processor %"PRIu32, processor); + return 0; + } + + uint32_t core_id; + if (cpuinfo_linux_parse_small_file(core_id_filename, CORE_ID_FILESIZE, uint32_parser, &core_id)) { + cpuinfo_log_debug("parsed core id value of %"PRIu32" for logical processor %"PRIu32" from %s", + core_id, processor, core_id_filename); + *core_id_ptr = core_id; + return true; + } else { + cpuinfo_log_info("failed to parse core id for processor %"PRIu32" from %s", + processor, core_id_filename); + return false; + } +} + +bool cpuinfo_linux_get_processor_package_id(uint32_t processor, uint32_t package_id_ptr[restrict static 1]) { + char package_id_filename[PACKAGE_ID_FILENAME_SIZE]; + const int chars_formatted = snprintf( + package_id_filename, PACKAGE_ID_FILENAME_SIZE, PACKAGE_ID_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= PACKAGE_ID_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for package id of processor %"PRIu32, processor); + return 0; + } + + uint32_t package_id; + if (cpuinfo_linux_parse_small_file(package_id_filename, PACKAGE_ID_FILESIZE, uint32_parser, &package_id)) { + cpuinfo_log_debug("parsed package id value of %"PRIu32" for logical processor %"PRIu32" from %s", + package_id, processor, package_id_filename); + *package_id_ptr = package_id; + return true; + } else { + cpuinfo_log_info("failed to parse package id for processor %"PRIu32" from %s", + processor, package_id_filename); + return false; + } +} + +static bool max_processor_number_parser(uint32_t processor_list_start, uint32_t processor_list_end, void* context) { + uint32_t* processor_number_ptr = (uint32_t*) context; + const uint32_t processor_list_last = processor_list_end - 1; + if (*processor_number_ptr < processor_list_last) { + *processor_number_ptr = processor_list_last; + } + return true; +} + +uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count) { + uint32_t max_possible_processor = 0; + if (!cpuinfo_linux_parse_cpulist(POSSIBLE_CPULIST_FILENAME, max_processor_number_parser, &max_possible_processor)) { + cpuinfo_log_error("failed to parse the list of possible procesors in %s", POSSIBLE_CPULIST_FILENAME); + return max_processors_count; + } + if (max_possible_processor >= max_processors_count) { + cpuinfo_log_warning( + "maximum possible processor number %"PRIu32" exceeds system limit %"PRIu32": truncating to the latter", + max_possible_processor, max_processors_count - 1); + max_possible_processor = max_processors_count - 1; + } + return max_possible_processor; +} + +uint32_t cpuinfo_linux_get_max_present_processor(uint32_t max_processors_count) { + uint32_t max_present_processor = 0; + if (!cpuinfo_linux_parse_cpulist(PRESENT_CPULIST_FILENAME, max_processor_number_parser, &max_present_processor)) { + cpuinfo_log_error("failed to parse the list of present procesors in %s", PRESENT_CPULIST_FILENAME); + return max_processors_count; + } + if (max_present_processor >= max_processors_count) { + cpuinfo_log_warning( + "maximum present processor number %"PRIu32" exceeds system limit %"PRIu32": truncating to the latter", + max_present_processor, max_processors_count - 1); + max_present_processor = max_processors_count - 1; + } + return max_present_processor; +} + +struct detect_processors_context { + uint32_t max_processors_count; + uint32_t* processor0_flags; + uint32_t processor_struct_size; + uint32_t detected_flag; +}; + +static bool detect_processor_parser(uint32_t processor_list_start, uint32_t processor_list_end, void* context) { + const uint32_t max_processors_count = ((struct detect_processors_context*) context)->max_processors_count; + const uint32_t* processor0_flags = ((struct detect_processors_context*) context)->processor0_flags; + const uint32_t processor_struct_size = ((struct detect_processors_context*) context)->processor_struct_size; + const uint32_t detected_flag = ((struct detect_processors_context*) context)->detected_flag; + + for (uint32_t processor = processor_list_start; processor < processor_list_end; processor++) { + if (processor >= max_processors_count) { + break; + } + *((uint32_t*) ((void*) processor0_flags + processor_struct_size * processor)) |= detected_flag; + } + return true; +} + +bool cpuinfo_linux_detect_possible_processors(uint32_t max_processors_count, + uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t possible_flag) +{ + struct detect_processors_context context = { + .max_processors_count = max_processors_count, + .processor0_flags = processor0_flags, + .processor_struct_size = processor_struct_size, + .detected_flag = possible_flag, + }; + if (cpuinfo_linux_parse_cpulist(POSSIBLE_CPULIST_FILENAME, detect_processor_parser, &context)) { + return true; + } else { + cpuinfo_log_warning("failed to parse the list of possible procesors in %s", POSSIBLE_CPULIST_FILENAME); + return false; + } +} + +bool cpuinfo_linux_detect_present_processors(uint32_t max_processors_count, + uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t present_flag) +{ + struct detect_processors_context context = { + .max_processors_count = max_processors_count, + .processor0_flags = processor0_flags, + .processor_struct_size = processor_struct_size, + .detected_flag = present_flag, + }; + if (cpuinfo_linux_parse_cpulist(PRESENT_CPULIST_FILENAME, detect_processor_parser, &context)) { + return true; + } else { + cpuinfo_log_warning("failed to parse the list of present procesors in %s", PRESENT_CPULIST_FILENAME); + return false; + } +} + +struct siblings_context { + const char* group_name; + const char* item_name; + uint32_t max_processors_count; + uint32_t processor; + uint32_t* processor_flags; + uint32_t* processor_group_id; + uint32_t* processor_group_min; + uint32_t* processor_group_max; + uint32_t* processor0_flags; + uint32_t* processor0_group_id; + uint32_t* processor0_group_min; + uint32_t* processor0_group_max; + uint32_t processor_struct_size; + uint32_t group_cluster_flag; + uint32_t group_id_flag; +}; + +static inline uint32_t min(uint32_t a, uint32_t b) { + return a < b ? a : b; +} + +static inline uint32_t max(uint32_t a, uint32_t b) { + return a > b ? a : b; +} + +static bool siblings_parser(uint32_t sibling_list_start, uint32_t sibling_list_end, struct siblings_context* context) { + const char* group_name = context->group_name; + const char* item_name = context->item_name; + const uint32_t max_processors_count = context->max_processors_count; + const uint32_t processor = context->processor; + uint32_t* processor_flags_ptr = context->processor_flags; + uint32_t* processor_group_id_ptr = context->processor_group_id; + uint32_t* processor_group_min_ptr = context->processor_group_min; + uint32_t* processor_group_max_ptr = context->processor_group_max; + void* processor0_flags_ptr = context->processor0_flags; + void* processor0_group_id_ptr = context->processor0_group_id; + void* processor0_group_min_ptr = context->processor0_group_min; + void* processor0_group_max_ptr = context->processor0_group_max; + const uint32_t processor_struct_size = context->processor_struct_size; + const uint32_t group_cluster_flag = context->group_cluster_flag; + const uint32_t group_id_flag = context->group_id_flag; + + *processor_flags_ptr |= group_cluster_flag; + + for (uint32_t sibling = sibling_list_start; sibling < sibling_list_end; sibling++) { + if (sibling >= max_processors_count) { + cpuinfo_log_warning("ignore siblings %"PRIu32"-%"PRIu32" of processor %"PRIu32, + sibling, sibling_list_end - 1, processor); + break; + } + + uint32_t* sibling_flags_ptr = (uint32_t*) (processor0_flags_ptr + processor_struct_size * sibling); + uint32_t* sibling_group_id_ptr = (uint32_t*) (processor0_group_id_ptr + processor_struct_size * sibling); + uint32_t* sibling_group_min_ptr = (uint32_t*) (processor0_group_min_ptr + processor_struct_size * sibling); + uint32_t* sibling_group_max_ptr = (uint32_t*) (processor0_group_max_ptr + processor_struct_size * sibling); + + *sibling_group_min_ptr = *processor_group_min_ptr = min(*sibling_group_min_ptr, *processor_group_min_ptr); + *sibling_group_max_ptr = *processor_group_max_ptr = max(*sibling_group_max_ptr, *processor_group_max_ptr); + + if (*sibling_flags_ptr & group_id_flag) { + if (*processor_flags_ptr & group_id_flag) { + if (*sibling_group_id_ptr != *processor_group_id_ptr) { + cpuinfo_log_warning("%s sibling processors %"PRIu32" and %"PRIu32" have different %s IDs %"PRIu32" and %"PRIu32, + item_name, processor, sibling, + group_name, *processor_group_id_ptr, *sibling_group_id_ptr); + } + } else { + cpuinfo_log_debug("propagate %s ID %"PRIu32" from processor %"PRIu32" to processor %"PRIu32, + group_name, *sibling_group_id_ptr, sibling, processor); + *processor_flags_ptr |= group_id_flag; + } + *sibling_flags_ptr |= group_cluster_flag; + } else { + cpuinfo_log_debug("propagate %s ID %"PRIu32" from processor %"PRIu32" to processor %"PRIu32, + group_name, *processor_group_id_ptr, processor, sibling); + *sibling_group_id_ptr = *processor_group_id_ptr; + *sibling_flags_ptr |= group_cluster_flag | group_id_flag; + } + } + return true; +} + +bool cpuinfo_linux_detect_core_siblings( + uint32_t max_processors_count, + uint32_t processor, + uint32_t* processor0_flags, + uint32_t* processor0_package_id, + uint32_t* processor0_package_group_min, + uint32_t* processor0_package_group_max, + uint32_t processor_struct_size) +{ + char core_siblings_filename[CORE_SIBLINGS_FILENAME_SIZE]; + const int chars_formatted = snprintf( + core_siblings_filename, CORE_SIBLINGS_FILENAME_SIZE, CORE_SIBLINGS_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= CORE_SIBLINGS_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for core siblings of processor %"PRIu32, processor); + return false; + } + + struct siblings_context context = { + .group_name = "package", + .item_name = "core", + .max_processors_count = max_processors_count, + .processor = processor, + .processor_flags = (uint32_t*) ((void*) processor0_flags + processor * processor_struct_size), + .processor_group_id = (uint32_t*) ((void*) processor0_package_id + processor * processor_struct_size), + .processor_group_min = (uint32_t*) ((void*) processor0_package_group_min + processor * processor_struct_size), + .processor_group_max = (uint32_t*) ((void*) processor0_package_group_max + processor * processor_struct_size), + .processor0_flags = processor0_flags, + .processor0_group_id = processor0_package_id, + .processor0_group_min = processor0_package_group_min, + .processor0_group_max = processor0_package_group_max, + .processor_struct_size = processor_struct_size, + .group_cluster_flag = CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER, + .group_id_flag = CPUINFO_LINUX_FLAG_PACKAGE_ID, + }; + if (cpuinfo_linux_parse_cpulist(core_siblings_filename, + (cpuinfo_cpulist_callback) siblings_parser, &context)) + { + return true; + } else { + cpuinfo_log_info("failed to parse the list of core siblings for processor %"PRIu32" from %s", + processor, core_siblings_filename); + return false; + } +} + +bool cpuinfo_linux_detect_thread_siblings( + uint32_t max_processors_count, + uint32_t processor, + uint32_t* processor0_flags, + uint32_t* processor0_core_id, + uint32_t* processor0_core_group_min, + uint32_t* processor0_core_group_max, + uint32_t processor_struct_size) +{ + char thread_siblings_filename[THREAD_SIBLINGS_FILENAME_SIZE]; + const int chars_formatted = snprintf( + thread_siblings_filename, THREAD_SIBLINGS_FILENAME_SIZE, THREAD_SIBLINGS_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= THREAD_SIBLINGS_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for thread siblings of processor %"PRIu32, processor); + return false; + } + + struct siblings_context context = { + .group_name = "core", + .item_name = "thread", + .max_processors_count = max_processors_count, + .processor = processor, + .processor_flags = (uint32_t*) ((void*) processor0_flags + processor * processor_struct_size), + .processor_group_id = (uint32_t*) ((void*) processor0_core_id + processor * processor_struct_size), + .processor_group_min = (uint32_t*) ((void*) processor0_core_group_min + processor * processor_struct_size), + .processor_group_max = (uint32_t*) ((void*) processor0_core_group_max + processor * processor_struct_size), + .processor0_flags = processor0_flags, + .processor0_group_id = processor0_core_id, + .processor0_group_min = processor0_core_group_min, + .processor0_group_max = processor0_core_group_max, + .processor_struct_size = processor_struct_size, + .group_cluster_flag = CPUINFO_LINUX_FLAG_CORE_CLUSTER, + .group_id_flag = CPUINFO_LINUX_FLAG_CORE_ID, + }; + if (cpuinfo_linux_parse_cpulist(thread_siblings_filename, + (cpuinfo_cpulist_callback) siblings_parser, &context)) + { + return true; + } else { + cpuinfo_log_info("failed to parse the list of thread siblings for processor %"PRIu32" from %s", + processor, thread_siblings_filename); + return false; + } +} + diff --git a/src/linux/smallfile.c b/src/linux/smallfile.c new file mode 100644 index 0000000..0056024 --- /dev/null +++ b/src/linux/smallfile.c @@ -0,0 +1,69 @@ +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> + +#include <linux/api.h> +#include <log.h> + + +inline static const char* parse_number(const char* string, const char* end, uint32_t number_ptr[restrict static 1]) { + uint32_t number = 0; + while (string != end) { + const uint32_t digit = (uint32_t) (*string) - (uint32_t) '0'; + if (digit >= 10) { + return string; + } + number = number * UINT32_C(10) + digit; + string += 1; + } + *number_ptr = number; + return end; +} + +bool cpuinfo_linux_parse_small_file(const char* filename, size_t buffer_size, cpuinfo_smallfile_callback callback, void* context) { + int file = -1; + bool status = false; + char* buffer = (char*) alloca(buffer_size); + + #if CPUINFO_LOG_DEBUG_PARSERS + cpuinfo_log_debug("parsing small file %s", filename); + #endif + + file = open(filename, O_RDONLY); + if (file == -1) { + cpuinfo_log_error("failed to open %s: %s", filename, strerror(errno)); + goto cleanup; + } + + char* buffer_end = &buffer[buffer_size]; + size_t buffer_position = 0; + ssize_t bytes_read; + do { + bytes_read = read(file, &buffer[buffer_position], buffer_size - buffer_position); + if (bytes_read < 0) { + cpuinfo_log_error("failed to read file %s at position %zu: %s", filename, buffer_position, strerror(errno)); + goto cleanup; + } + buffer_position += (size_t) bytes_read; + if (buffer_position >= buffer_size) { + cpuinfo_log_error("failed to read file %s: insufficient buffer of size %zu", filename, buffer_size); + goto cleanup; + } + } while (bytes_read != 0); + + status = callback(buffer, &buffer[buffer_position], context); + +cleanup: + if (file != -1) { + close(file); + file = -1; + } + return status; +} @@ -50,6 +50,7 @@ va_start(args, format); #ifdef __ANDROID__ + printf("Note: "); vprintf(format, args); printf("\n"); fflush(stdout); @@ -68,6 +69,7 @@ va_start(args, format); #ifdef __ANDROID__ + printf("Debug: "); vprintf(format, args); printf("\n"); fflush(stdout); @@ -7,6 +7,8 @@ #define CPUINFO_LOG_INFO 3 #define CPUINFO_LOG_DEBUG 4 +#define CPUINFO_LOG_DEBUG_PARSERS 0 + #ifndef CPUINFO_LOG_LEVEL #define CPUINFO_LOG_LEVEL CPUINFO_LOG_ERROR #endif diff --git a/src/x86/linux/init.c b/src/x86/linux/init.c index 60eedc4..6d27805 100644 --- a/src/x86/linux/init.c +++ b/src/x86/linux/init.c @@ -89,6 +89,14 @@ static void cpuinfo_x86_count_caches( *l4_count_ptr = l4_count; } +static bool cpuinfo_x86_linux_cpulist_callback(uint32_t cpulist_start, uint32_t cpulist_end, void* context) { + cpu_set_t* cpuset = (cpu_set_t*) context; + for (uint32_t cpu = cpulist_start; cpu < cpulist_end; cpu++) { + CPU_SET((int) cpu, cpuset); + } + return true; +} + void cpuinfo_x86_linux_init(void) { struct cpuinfo_x86_processor* x86_processors = NULL; struct cpuinfo_processor* processors = NULL; @@ -105,10 +113,12 @@ void cpuinfo_x86_linux_init(void) { } cpu_set_t present_set; - cpuinfo_linux_parse_cpuset("/sys/devices/system/cpu/present", &present_set); + CPU_ZERO(&present_set); + cpuinfo_linux_parse_cpulist("/sys/devices/system/cpu/present", cpuinfo_x86_linux_cpulist_callback, &present_set); cpu_set_t possible_set; - cpuinfo_linux_parse_cpuset("/sys/devices/system/cpu/possible", &possible_set); + CPU_ZERO(&possible_set); + cpuinfo_linux_parse_cpulist("/sys/devices/system/cpu/possible", cpuinfo_x86_linux_cpulist_callback, &possible_set); cpu_set_t processors_set; CPU_AND(&processors_set, &present_set, &possible_set); |