diff options
author | Marat Dukhan <maratek@gmail.com> | 2017-05-08 06:16:45 +0000 |
---|---|---|
committer | Marat Dukhan <maratek@gmail.com> | 2017-05-08 06:16:45 +0000 |
commit | 3c98276d6008439d8faae3350e8bfa0f144c5bef (patch) | |
tree | 4bb002f558b52849e232d3190415970a56c106f1 | |
parent | d226627d39599f37aadaccee1755aa2a8bb8694c (diff) | |
download | cpuinfo-3c98276d6008439d8faae3350e8bfa0f144c5bef.tar.gz |
Working ARM + Linux version
-rwxr-xr-x | configure.py | 8 | ||||
-rw-r--r-- | include/cpuinfo.h | 61 | ||||
-rw-r--r-- | src/api.h | 2 | ||||
-rw-r--r-- | src/arm/api.h | 23 | ||||
-rw-r--r-- | src/arm/cache.c | 513 | ||||
-rw-r--r-- | src/arm/linux/api.h | 103 | ||||
-rw-r--r-- | src/arm/linux/cp.h | 20 | ||||
-rw-r--r-- | src/arm/linux/cpuinfo.c | 842 | ||||
-rw-r--r-- | src/arm/linux/init.c | 177 | ||||
-rw-r--r-- | src/arm/linux/isa.c | 153 | ||||
-rw-r--r-- | src/arm/uarch.c | 143 | ||||
-rw-r--r-- | src/init.c | 6 | ||||
-rw-r--r-- | src/linux/cpuset.c | 4 | ||||
-rw-r--r-- | tools/cpu-info.c | 6 | ||||
-rw-r--r-- | tools/isa-info.c | 33 |
15 files changed, 2084 insertions, 10 deletions
diff --git a/configure.py b/configure.py index 717daac..c357b46 100755 --- a/configure.py +++ b/configure.py @@ -31,6 +31,14 @@ def main(args): elif build.target.is_linux: sources += ["x86/linux/init.c"] sources.append("x86/isa.c" if not build.target.is_nacl else "x86/nacl/isa.c") + if build.target.is_arm: + sources += ["arm/uarch.c", "arm/cache.c"] + if build.target.is_linux: + sources += [ + "arm/linux/init.c", + "arm/linux/isa.c", + "arm/linux/cpuinfo.c" + ] if build.target.is_macos: sources += ["mach/topology.c"] if build.target.is_linux: diff --git a/include/cpuinfo.h b/include/cpuinfo.h index f2e129a..ee0354e 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -187,6 +187,44 @@ }; #endif +#if CPUINFO_ARCH_ARM + struct cpuinfo_arm_isa { + bool thumb; + bool thumb2; + bool thumbee; + bool jazelle; + bool armv5e; + bool armv6; + bool armv6k; + bool armv7; + bool armv7mp; + bool idiv; + + bool vfpv2; + bool vfpv3; + bool d32; + bool fp16; + bool fma; + + bool wmmx; + bool wmmx2; + bool neon; + + bool aes; + bool sha1; + bool sha2; + bool pmull; + bool crc32; + }; + + struct cpuinfo_arm_model_info { + uint16_t implementer; + uint16_t variant; + uint16_t part; + uint16_t revision; + }; +#endif + #define CPUINFO_CACHE_UNIFIED 0x00000001 #define CPUINFO_CACHE_INCLUSIVE 0x00000002 #define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004 @@ -417,6 +455,9 @@ enum cpuinfo_uarch { /** Intel Knights Mill Xeon Phi. */ cpuinfo_uarch_knights_mill = 0x00100504, + /** Intel/Marvell XScale series. */ + cpuinfo_uarch_xscale = 0x00100600, + /** AMD K5. */ cpuinfo_uarch_k5 = 0x00200100, /** AMD K6 and alike. */ @@ -493,19 +534,22 @@ enum cpuinfo_uarch { /** Qualcomm Kryo. */ cpuinfo_uarch_kryo = 0x00400102, + /** nVidia Denver. */ + cpuinfo_uarch_denver = 0x00500100, + /** Samsung Mongoose. */ - cpuinfo_uarch_mongoose = 0x00500100, + cpuinfo_uarch_mongoose = 0x00600100, /** Apple A6 and A6X processors. */ - cpuinfo_uarch_swift = 0x00600100, + cpuinfo_uarch_swift = 0x00700100, /** Apple A7 processor. */ - cpuinfo_uarch_cyclone = 0x00600101, + cpuinfo_uarch_cyclone = 0x00700101, /** Apple A8 processor. */ - cpuinfo_uarch_typhoon = 0x00600102, + cpuinfo_uarch_typhoon = 0x00700102, /** Apple A9 processor. */ - cpuinfo_uarch_twister = 0x00600103, + cpuinfo_uarch_twister = 0x00700103, /** Apple A10 processor. */ - cpuinfo_uarch_hurricane = 0x00600104, + cpuinfo_uarch_hurricane = 0x00700104, }; struct cpuinfo_topology { @@ -516,7 +560,6 @@ struct cpuinfo_topology { /* Package (socket) ID */ uint32_t package_id; #if defined(__linux__) - int linux_id; #endif #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 @@ -564,6 +607,10 @@ void CPUINFO_ABI cpuinfo_deinitialize(void); extern struct cpuinfo_x86_isa cpuinfo_isa; #endif +#if CPUINFO_ARCH_ARM + extern struct cpuinfo_arm_isa cpuinfo_isa; +#endif + struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l1i_cache(void); struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l1d_cache(void); struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l2_cache(void); @@ -6,6 +6,6 @@ extern uint32_t cpuinfo_processors_count; void cpuinfo_x86_mach_init(void); void cpuinfo_x86_linux_init(void); -void cpuinfo_arm_init(void); +void cpuinfo_arm_linux_init(void); typedef void (*cpuinfo_processor_callback)(uint32_t); diff --git a/src/arm/api.h b/src/arm/api.h new file mode 100644 index 0000000..3da0cf3 --- /dev/null +++ b/src/arm/api.h @@ -0,0 +1,23 @@ +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include <cpuinfo.h> + + +void cpuinfo_arm_decode_vendor_uarch( + uint32_t cpu_implementer, + uint32_t cpu_part, + bool has_vfpv4, + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]); + +void cpuinfo_arm_decode_cache( + enum cpuinfo_uarch uarch, + uint32_t uarch_cores, + uint32_t cpu_part, + uint32_t arch_version, + struct cpuinfo_cache l1i[restrict static 1], + struct cpuinfo_cache l1d[restrict static 1], + struct cpuinfo_cache l2[restrict static 1]); diff --git a/src/arm/cache.c b/src/arm/cache.c new file mode 100644 index 0000000..440ec0b --- /dev/null +++ b/src/arm/cache.c @@ -0,0 +1,513 @@ +#include <stdint.h> + +#include <cpuinfo.h> +#include <log.h> +#include <arm/api.h> + + +void cpuinfo_arm_decode_cache( + enum cpuinfo_uarch uarch, + uint32_t uarch_cores, + uint32_t cpu_part, + uint32_t arch_version, + struct cpuinfo_cache l1i[restrict static 1], + struct cpuinfo_cache l1d[restrict static 1], + struct cpuinfo_cache l2[restrict static 1]) +{ + switch (uarch) { + case cpuinfo_uarch_xscale: + switch (cpu_part >> 8) { + case 2: + /* + * PXA 210/25X/26X + * + * See "Computer Organization and Design, Revised Printing: The Hardware/Software Interface" + * by David A. Patterson, John L. Hennessy + */ + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 32, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 64 + }; + break; + case 4: + /* PXA 27X */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 32, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 32, + .line_size = 32 + }; + break; + case 6: + /* + * PXA 3XX + * + * See http://download.intel.com/design/intelxscale/31628302.pdf + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l2 = (struct cpuinfo_cache) { + .size = 256 * 1024, + .associativity = 8, + .line_size = 32 + }; + break; + } + break; + case cpuinfo_uarch_arm11: + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 32 + }; + break; + case cpuinfo_uarch_cortex_a5: + /* + * Cortex-A5 Technical Reference Manual: + * 7.1.1. Memory system + * The Cortex-A5 processor has separate instruction and data caches. + * The caches have the following features: + * - Data cache is 4-way set-associative. + * - Instruction cache is 2-way set-associative. + * - The cache line length is eight words. + * - You can configure the instruction and data caches independently during implementation + * to sizes of 4KB, 8KB, 16KB, 32KB, or 64KB. + * 1.1.3. System design components + * PrimeCell Level 2 Cache Controller (PL310) + * The addition of an on-chip secondary cache, also referred to as a Level 2 or L2 cache, is a + * recognized method of improving the performance of ARM-based systems when significant memory traffic + * is generated by the processor. The PrimeCell Level 2 Cache Controller reduces the number of external + * memory accesses and has been optimized for use with the Cortex-A5 processor. + * 8.1.7. Exclusive L2 cache + * The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode. + * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller. + * + * +--------------------+-----------+-----------+----------+-----------+ + * | Processor model | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-----------+-----------+----------+-----------+ + * | Qualcomm MSM7225A | | | | | + * | Qualcomm MSM7625A | | | | | + * | Qualcomm MSM7227A | | | | | + * | Qualcomm MSM7627A | 32K | 32K | 256K | Wiki [1] | + * | Qualcomm MSM7225AB | | | | | + * | Qualcomm MSM7225AB | | | | | + * | Qualcomm QSD8250 | | | | | + * | Qualcomm QSD8650 | | | | | + * +--------------------+-----------+-----------+----------+-----------+ + * | Spreadtrum SC6821 | 32K | 32K | ? | | + * | Spreadtrum SC6825 | 32K | 32K | 256K | Wiki [2] | + * | Spreadtrum SC8810 | ? | ? | ? | | + * | Spreadtrum SC8825 | 32K | 32K | ? | | + * +--------------------+-----------+-----------+----------+-----------+ + * + * [1] https://en.wikipedia.org/wiki/List_of_Qualcomm_Snapdragon_systems-on-chip#Snapdragon_S1 + * [2] https://en.wikipedia.org/wiki/Spreadtrum + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l2 = (struct cpuinfo_cache) { + .size = 256 * 1024, + /* + * Follow NXP specification: "Eight-way set-associative 512 kB L2 cache with 32B line size" + * Reference: http://www.nxp.com/assets/documents/data/en/application-notes/AN4947.pdf + */ + .associativity = 8, + .line_size = 32 + }; + break; + case cpuinfo_uarch_cortex_a7: + /* + * Cortex-A7 MPCore Technical Reference Manual: + * 6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. You can configure the + * instruction and data caches independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB. + * + * The L1 instruction memory system has the following features: + * - Instruction side cache line length of 32-bytes. + * - 2-way set-associative instruction cache. + * + * The L1 data memory system has the following features: + * - Data side cache line length of 64-bytes. + * - 4-way set-associative data cache. + * + * 7.1. About the L2 Memory system + * The L2 memory system consists of an: + * - Optional tightly-coupled L2 cache that includes: + * - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB. + * + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Allwinner A20 | 2 | 32K | 32K | 256K | [1] | + * | Allwinner A23 | 2 | 32K | 32K | 256K | [2] | + * | Allwinner A31 | 4 | 32K | 32K | 1M | [3] | + * | Allwinner A31s | 4 | 32K | 32K | 1M | [4] | + * | Allwinner A33 | 4 | 32K | 32K | 512K | [5] | + * | Allwinner A80 Octa | 4(+4) | 32K | 32K | 512K(+2M) | [6] | + * | Allwinner A81T | 8 | 32K | 32K | 1M | [7] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Broadcom BCM2836 | 4 | 32K | 32K | 512K | [8] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] https://linux-sunxi.org/A20 + * [2] https://linux-sunxi.org/A23 + * [3] http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf + * [4] https://github.com/allwinner-zh/documents/blob/master/A31s/A31s_Datasheet_v1.5_20150510.pdf + * [5] http://dl.linux-sunxi.org/A33/A33_Datasheet_release1.0.pdf + * [6] https://linux-sunxi.org/images/1/10/A80_Datasheet_Revision_1.0_0404.pdf + * [7] http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf + * [8] https://www.raspberrypi.org/forums/viewtopic.php?t=98428 + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = 128 * 1024 * uarch_cores, + .associativity = 8, + .line_size = 64 + }; + break; + case cpuinfo_uarch_cortex_a8: + /* + * Cortex-A8 Technical Reference Manual: + * 7.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches in a Harvard arrangement. + * The L1 memory system provides the core with: + * - fixed line length of 64 bytes + * - support for 16KB or 32KB caches + * - 4-way set associative cache structure + * 8.1. About the L2 memory system + * The L2 memory system is tightly coupled to the L1 data cache and L1 instruction cache. + * The key features of the L2 memory system include: + * - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB + * - fixed line length of 64 bytes + * - 8-way set associative cache structure + */ + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = 128 * 1024, + .associativity = 8, + .line_size = 64 + }; + break; + case cpuinfo_uarch_cortex_a9: + /* + * ARM Cortex‑A9 Technical Reference Manual: + * 7.1.1 Memory system + * The Cortex‑A9 processor has separate instruction and data caches. + * The caches have the following features: + * - Both caches are 4-way set-associative. + * - The cache line length is eight words. + * - You can configure the instruction and data caches independently during implementation + * to sizes of 16KB, 32KB, or 64KB. + * 8.1.5 Exclusive L2 cache + * The Cortex‑A9 processor can be connected to an L2 cache that supports an exclusive cache mode. + * This mode must be activated both in the Cortex‑A9 processor and in the L2 cache controller. + * + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Exynos 4 Dual 4210 | 2 | 32K | 32K | 1M | [1] | + * | Exynos 4 Dual 4212 | 2 | 32K | 32K | 1M | [2] | + * | Exynos 4 Quad 4412 | 4 | 32K | 32K | 1M | [3] | + * | Exynos 4 Quad 4415 | 4 | 32K | 32K | 1M | | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_45nm_User_Manaul_Public_REV1.00-0.pdf + * [2] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_32nm_User_Manaul_Public_REV100-0.pdf + * [3] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Quad_User_Manaul_Public_REV1.00-0.pdf + */ + + /* Use Exynos 4 specs */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l2 = (struct cpuinfo_cache) { + .size = 1024 * 1024, + .associativity = 8, + .line_size = 32 + }; + break; + case cpuinfo_uarch_cortex_a15: + /* + * 6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. + * The L1 instruction memory system has the following features: + * - 32KB 2-way set-associative instruction cache. + * - Fixed line length of 64 bytes. + * The L1 data memory system has the following features: + * - 32KB 2-way set-associative data cache. + * - Fixed line length of 64 bytes. + * 7.1. About the L2 memory system + * The features of the L2 memory system include: + * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB. + * - Fixed line length of 64 bytes. + * - 16-way set-associative cache structure. + * + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Exynos 5 Dual 5250 | 2 | 32K | 32K | 1M | [1] | + * | Exynos 5 Hexa 5260 | 2(+4) | 32K | 32K | 1M(+512K) | [2] | + * | Exynos 5 Octa 5410 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | + * | Exynos 5 Octa 5420 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | + * | Exynos 5 Octa 5422 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | + * | Exynos 5 Octa 5430 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | + * | Exynos 5 Octa 5800 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] http://www.arndaleboard.org/wiki/downloads/supports/Exynos_5_Dual_User_Manaul_Public_REV1.00.pdf + * [2] http://www.yicsystem.com/wp-content/uploads/2014/08/Espresso5260P-Guide-Book.pdf + * [3] http://www.anandtech.com/show/6768/samsung-details-exynos-5-octa-architecture-power-at-isscc-13 + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = uarch_cores * 512 * 1024, + .associativity = 16, + .line_size = 64 + }; + break; + case cpuinfo_uarch_scorpion: + /* + * - "The CPU includes 32KB instruction and data caches as + * well as a complete memory-management unit (MMU) suitable + * for high-level operating systems. The CPU also has + * 256KB of SRAM that can be allocated in 64KB increments + * to level-two (L2) cache or tightly coupled memory (TCM)." [1] + * We interpret it as L2 cache being 4-way set-associative on single-core Scorpion. + * - L1 Data Cache = 32 KB. 32 B/line. [2] + * - L2 Cache = 256 KB. 128 B/line. [2] + * - 256 KB (single-core) or 512 KB (dual-core) L2 cache [3] + * - Single or dual-core configuration [3] + * - For L1 cache assume the same associativity as Krait + * + * [1] https://www.qualcomm.com/media/documents/files/linley-report-on-dual-core-snapdragon.pdf + * [2] http://www.7-cpu.com/cpu/Snapdragon.html + * [3] https://en.wikipedia.org/wiki/Scorpion_(CPU) + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l2 = (struct cpuinfo_cache) { + .size = uarch_cores * 256 * 1024, + .associativity = 4, + .line_size = 128 + }; + break; + case cpuinfo_uarch_krait: + /* + * - L0 Data cache = 4 KB. 64 B/line, direct mapped [1] + * - L0 Instruction cache = 4 KB. [1] + * - L1 Data cache = 16 KB. 64 B/line, 4-way [1] + * - L1 Instruction cache = 16 KB, 4-way [1] + * - L2 Cache = 1 MB, 128 B/line, 8-way. Each core has fast access only to 512 KB of L2 cache. [1] + * - L2 = 1MB (dual core) or 2MB (quad core), 8-way set associative [2] + * + * [1] http://www.7-cpu.com/cpu/Krait.html + * [2] http://www.anandtech.com/show/4940/qualcomm-new-snapdragon-s4-msm8960-krait-architecture/2 + */ + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 64 /* assume same as L1D */ + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = uarch_cores * 512 * 1024, + .associativity = 8, + .line_size = 128 + }; + break; + case cpuinfo_uarch_kryo: + /* + * +-----------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +-----------------+-------+-----------+-----------+-----------+-----------+ + * | Snapdragon 820 | 2+2 | 32K | 32K | 1M+512K | [1] | + * | Snapdragon 821 | 2+2 | 32K | 32K | 1M+512K | [1] | + * | Snapdragon 835 | 4(+4) | 64K+32K | 64K+32K | 2M(+1M) | sysfs | + * +-----------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] http://www.anandtech.com/show/9837/snapdragon-820-preview/2 + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4 /* assume same as Krait */, + .line_size = 64 /* assume same as Krait */ + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4 /* assume same as Krait */, + .line_size = 64 /* assume same as Krait */ + }; + *l2 = (struct cpuinfo_cache) { + .size = uarch_cores * 512 * 1024, + .associativity = 16 /* sysfs-reported on Snapdragon 835 */, + .line_size = 64 /* assume same as Krait */ + }; + break; + case cpuinfo_uarch_mongoose: + /* + * - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$, + * namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1] + * - "For loads and stores, a 32 KB, 8-way set associative cache with 64 byte line size is used" [1] + * - "The L2 cache here is 2MB shared across all cores split into 16 sets. This memory is also split + * into 4 banks and has a 22 cycle latency" [1] + * + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Exynos 8 Octa 8890 | 4(+4) | 64K | 32K | 2M | [1] | + * | Exynos 8 Octa 8895 | 4(+4) | 64K | 32K | 2M | [2] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] http://www.anandtech.com/show/10590/hot-chips-2016-exynos-m1-architecture-disclosed + * [2] https://www.extremetech.com/mobile/244949-samsungs-exynos-8895-features-custom-cpu-cores-first-10nm-chip-market + */ + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 128 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 8, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = 2 * 1024 * 1024, + .associativity = 16, + .line_size = 64 + }; + break; + case cpuinfo_uarch_cortex_a12: + case cpuinfo_uarch_cortex_a17: + case cpuinfo_uarch_cortex_a32: + case cpuinfo_uarch_cortex_a35: + case cpuinfo_uarch_cortex_a53: + case cpuinfo_uarch_cortex_a57: + case cpuinfo_uarch_cortex_a72: + case cpuinfo_uarch_cortex_a73: + default: + cpuinfo_log_warning("target uarch not recognized; using generic cache parameters"); + /* Follow OpenBLAS */ + if (arch_version >= 8) { + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = uarch_cores * 256 * 1024, + .associativity = 8, + .line_size = 64 + }; + } else { + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 32 + }; + if (arch_version >= 7) { + *l2 = (struct cpuinfo_cache) { + .size = uarch_cores * 128 * 1024, + .associativity = 8, + .line_size = 32 + }; + } + } + break; + } + l1i->sets = l1i->size / (l1i->associativity * l1i->line_size); + l1i->partitions = 1; + l1d->sets = l1d->size / (l1d->associativity * l1d->line_size); + l1d->partitions = 1; + if (l2->size != 0) { + l2->sets = l1d->size / (l1d->associativity * l1d->line_size); + l2->partitions = 1; + } +} diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h new file mode 100644 index 0000000..4e13e00 --- /dev/null +++ b/src/arm/linux/api.h @@ -0,0 +1,103 @@ +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include <cpuinfo.h> + + +#define PROC_CPUINFO_ARCH_T UINT32_C(0x00000001) +#define PROC_CPUINFO_ARCH_E UINT32_C(0x00000002) +#define PROC_CPUINFO_ARCH_J UINT32_C(0x00000004) + +struct proc_cpuinfo_arch { + uint32_t version; + uint32_t flags; +}; + +struct proc_cpuinfo_cache { + uint32_t i_size; + uint32_t i_assoc; + uint32_t i_line_length; + uint32_t i_sets; + uint32_t d_size; + uint32_t d_assoc; + uint32_t d_line_length; + uint32_t d_sets; +}; + +/* arch/arm/include/uapi/asm/hwcap.h */ + +#define PROC_CPUINFO_FEATURE_SWP UINT32_C(0x00000001) +#define PROC_CPUINFO_FEATURE_HALF UINT32_C(0x00000002) +#define PROC_CPUINFO_FEATURE_THUMB UINT32_C(0x00000004) +#define PROC_CPUINFO_FEATURE_26BIT UINT32_C(0x00000008) +#define PROC_CPUINFO_FEATURE_FASTMULT UINT32_C(0x00000010) +#define PROC_CPUINFO_FEATURE_FPA UINT32_C(0x00000020) +#define PROC_CPUINFO_FEATURE_VFP UINT32_C(0x00000040) +#define PROC_CPUINFO_FEATURE_EDSP UINT32_C(0x00000080) +#define PROC_CPUINFO_FEATURE_JAVA UINT32_C(0x00000100) +#define PROC_CPUINFO_FEATURE_IWMMXT UINT32_C(0x00000200) +#define PROC_CPUINFO_FEATURE_CRUNCH UINT32_C(0x00000400) +#define PROC_CPUINFO_FEATURE_THUMBEE UINT32_C(0x00000800) +#define PROC_CPUINFO_FEATURE_NEON UINT32_C(0x00001000) +#define PROC_CPUINFO_FEATURE_VFPV3 UINT32_C(0x00002000) +#define PROC_CPUINFO_FEATURE_VFPV3D16 UINT32_C(0x00004000) /* Also set for VFPv4 with 16 double-precision registers */ +#define PROC_CPUINFO_FEATURE_TLS UINT32_C(0x00008000) +#define PROC_CPUINFO_FEATURE_VFPV4 UINT32_C(0x00010000) +#define PROC_CPUINFO_FEATURE_IDIVA UINT32_C(0x00020000) +#define PROC_CPUINFO_FEATURE_IDIVT UINT32_C(0x00040000) +#define PROC_CPUINFO_FEATURE_IDIV UINT32_C(0x00060000) +#define PROC_CPUINFO_FEATURE_VFPD32 UINT32_C(0x00080000) +#define PROC_CPUINFO_FEATURE_LPAE UINT32_C(0x00100000) +#define PROC_CPUINFO_FEATURE_EVTSTRM UINT32_C(0x00200000) + +#define PROC_CPUINFO_FEATURE2_AES UINT32_C(0x00000001) +#define PROC_CPUINFO_FEATURE2_PMULL UINT32_C(0x00000002) +#define PROC_CPUINFO_FEATURE2_SHA1 UINT32_C(0x00000004) +#define PROC_CPUINFO_FEATURE2_SHA2 UINT32_C(0x00000008) +#define PROC_CPUINFO_FEATURE2_CRC32 UINT32_C(0x00000010) + + +#define PROC_CPUINFO_VALID_ARCHITECTURE UINT32_C(0x00000001) +#define PROC_CPUINFO_VALID_IMPLEMENTER UINT32_C(0x00000002) +#define PROC_CPUINFO_VALID_VARIANT UINT32_C(0x00000004) +#define PROC_CPUINFO_VALID_PART UINT32_C(0x00000008) +#define PROC_CPUINFO_VALID_REVISION UINT32_C(0x00000010) +#define PROC_CPUINFO_VALID_FEATURES UINT32_C(0x00000020) +#define PROC_CPUINFO_VALID_ICACHE_SIZE UINT32_C(0x00000100) +#define PROC_CPUINFO_VALID_ICACHE_SETS UINT32_C(0x00000200) +#define PROC_CPUINFO_VALID_ICACHE_WAYS UINT32_C(0x00000400) +#define PROC_CPUINFO_VALID_ICACHE_LINE UINT32_C(0x00000800) +#define PROC_CPUINFO_VALID_DCACHE_SIZE UINT32_C(0x00001000) +#define PROC_CPUINFO_VALID_DCACHE_SETS UINT32_C(0x00002000) +#define PROC_CPUINFO_VALID_DCACHE_WAYS UINT32_C(0x00004000) +#define PROC_CPUINFO_VALID_DCACHE_LINE UINT32_C(0x00008000) + +#define PROC_CPUINFO_VALID_INFO UINT32_C(0x0000003F) +#define PROC_CPUINFO_VALID_ICACHE UINT32_C(0x00000F00) +#define PROC_CPUINFO_VALID_DCACHE UINT32_C(0x0000F000) +#define PROC_CPUINFO_VALID_CACHE_LINE UINT32_C(0x00008800) + +struct proc_cpuinfo { + struct proc_cpuinfo_arch architecture; + struct proc_cpuinfo_cache cache; + uint32_t features; + uint32_t features2; + uint32_t processor_number; + uint32_t cpuid; + uint32_t implementer; + uint32_t variant; + uint32_t part; + uint32_t revision; + uint32_t valid_mask; +}; + +struct proc_cpuinfo* cpuinfo_arm_linux_parse_proc_cpuinfo( + const char* filename, + uint32_t processors_count[restrict static 1]); + +void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( + const struct proc_cpuinfo proc_cpuinfo[restrict static 1], + uint32_t proc_cpuinfo_count, + struct cpuinfo_arm_isa isa[restrict static 1]); diff --git a/src/arm/linux/cp.h b/src/arm/linux/cp.h new file mode 100644 index 0000000..2914a95 --- /dev/null +++ b/src/arm/linux/cp.h @@ -0,0 +1,20 @@ +#include <stdint.h> + + +static inline uint32_t read_fpsid(void) { + uint32_t fpsid; + __asm__ __volatile__("MRC p10, 0x7, %[fpsid], cr0, cr0, 0" : [fpsid] "=r" (fpsid)); + return fpsid; +} + +static inline uint32_t read_mvfr0(void) { + uint32_t mvfr0; + __asm__ __volatile__("MRC p10, 0x7, %[mvfr0], cr7, cr0, 0" : [mvfr0] "=r" (mvfr0)); + return mvfr0; +} + +static inline uint32_t read_wcid(void) { + uint32_t wcid; + __asm__ __volatile__("MRC p1, 0, %[wcid], c0, c0" : [wcid] "=r" (wcid)); + return wcid; +} diff --git a/src/arm/linux/cpuinfo.c b/src/arm/linux/cpuinfo.c new file mode 100644 index 0000000..76e1a1f --- /dev/null +++ b/src/arm/linux/cpuinfo.c @@ -0,0 +1,842 @@ +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <errno.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <sched.h> + +#include <arm/linux/api.h> +#include <log.h> + + +/* + * Size, in chars, of the on-stack buffer used for parsing cpu lists. + * This is also the limit on the length of a single entry + * (<cpu-number> or <cpu-number-start>-<cpu-number-end>) + * in the cpu list. + */ +#define BUFFER_SIZE 256 + + +static uint32_t parse_processor_number( + const char* processor_start, + const char* processor_end, + struct proc_cpuinfo proc_cpuinfo[restrict static 1]) +{ + const size_t processor_length = (size_t) (processor_end - processor_start); + + if (processor_length == 0) { + cpuinfo_log_warning("Processor number in /proc/cpuinfo is ignored: string is empty"); + return 0; + } + + uint32_t processor_number = 0; + for (const char* digit_ptr = processor_start; digit_ptr != processor_end; digit_ptr++) { + const uint32_t digit = (uint32_t) (*digit_ptr - '0'); + if (digit > 10) { + cpuinfo_log_warning("non-decimal suffix %.*s in /proc/cpuinfo processor number is ignored", + (int) (processor_end - digit_ptr), digit_ptr); + break; + } + + processor_number = processor_number * 10 + digit; + } + + return processor_number; +} + +/* + * Full list of ARM features reported in /proc/cpuinfo: + * + * * swp - support for SWP instruction (deprecated in ARMv7, can be removed in future) + * * half - support for half-word loads and stores. These instruction are part of ARMv4, + * so no need to check it on supported CPUs. + * * thumb - support for 16-bit Thumb instruction set. Note that BX instruction is detected + * by ARMv4T architecture, not by this flag. + * * 26bit - old CPUs merged 26-bit PC and program status register (flags) into 32-bit PC + * and had special instructions for working with packed PC. Now it is all deprecated. + * * fastmult - most old ARM CPUs could only compute 2 bits of multiplication result per clock + * cycle, but CPUs with M suffix (e.g. ARM7TDMI) could compute 4 bits per cycle. + * Of course, now it makes no sense. + * * fpa - floating point accelerator available. On original ARM ABI all floating-point operations + * generated FPA instructions. If FPA was not available, these instructions generated + * "illegal operation" interrupts, and the OS processed them by emulating the FPA instructions. + * Debian used this ABI before it switched to EABI. Now FPA is deprecated. + * * vfp - vector floating point instructions. Available on most modern CPUs (as part of VFPv3). + * Required by Android ARMv7A ABI and by Ubuntu on ARM. + * Note: there is no flag for VFPv2. + * * edsp - V5E instructions: saturating add/sub and 16-bit x 16-bit -> 32/64-bit multiplications. + * Required on Android, supported by all CPUs in production. + * * java - Jazelle extension. Supported on most CPUs. + * * iwmmxt - Intel/Marvell Wireless MMX instructions. 64-bit integer SIMD. + * Supported on XScale (Since PXA270) and Sheeva (PJ1, PJ4) architectures. + * Note that there is no flag for WMMX2 instructions. + * * crunch - Maverick Crunch instructions. Junk. + * * thumbee - ThumbEE instructions. Almost no documentation is available. + * * neon - NEON instructions (aka Advanced SIMD). MVFR1 register gives more + * fine-grained information on particular supported features, but + * the Linux kernel exports only a single flag for all of them. + * According to ARMv7A docs it also implies the availability of VFPv3 + * (with 32 double-precision registers d0-d31). + * * vfpv3 - VFPv3 instructions. Available on most modern CPUs. Augment VFPv2 by + * conversion to/from integers and load constant instructions. + * Required by Android ARMv7A ABI and by Ubuntu on ARM. + * * vfpv3d16 - VFPv3 instructions with only 16 double-precision registers (d0-d15). + * * tls - software thread ID registers. + * Used by kernel (and likely libc) for efficient implementation of TLS. + * * vfpv4 - fused multiply-add instructions. + * * idiva - DIV instructions available in ARM mode. + * * idivt - DIV instructions available in Thumb mode. + * * vfpd32 - VFP (of any version) with 32 double-precision registers d0-d31. + * * lpae - Large Physical Address Extension (physical address up to 40 bits). + * * evtstrm - generation of Event Stream by timer. + * * aes - AES instructions. + * * pmull - Polinomial Multiplication instructions. + * * sha1 - SHA1 instructions. + * * sha2 - SHA2 instructions. + * * crc32 - CRC32 instructions. + * + * /proc/cpuinfo on ARM is populated in file arch/arm/kernel/setup.c in Linux kernel + * Note that some devices may use patched Linux kernels with different feature names. + * However, the names above were checked on a large number of /proc/cpuinfo listings. + */ +static void parse_features( + const char* features_start, + const char* features_end, + struct proc_cpuinfo proc_cpuinfo[restrict static 1]) +{ + const char* feature_start = features_start; + const char* feature_end; + + /* Mark the features as valid */ + proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_FEATURES; + + do { + feature_end = feature_start + 1; + for (; feature_end != features_end; feature_end++) { + if (*feature_end == ' ') { + break; + } + } + const size_t feature_length = (size_t) (feature_end - feature_start); + + switch (feature_length) { + case 3: + if (memcmp(feature_start, "swp", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_SWP; + } else if (memcmp(feature_start, "fpa", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_FPA; + } else if (memcmp(feature_start, "vfp", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_VFP; + } else if (memcmp(feature_start, "tls", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_TLS; + } else if (memcmp(feature_start, "aes", feature_length) == 0) { + proc_cpuinfo->features2 |= PROC_CPUINFO_FEATURE2_AES; + } else { + goto unexpected; + } + break; + case 4: + if (memcmp(feature_start, "half", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_HALF; + } else if (memcmp(feature_start, "edsp", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_EDSP; + } else if (memcmp(feature_start, "java", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_JAVA; + } else if (memcmp(feature_start, "neon", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_NEON; + } else if (memcmp(feature_start, "lpae", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_LPAE; + } else if (memcmp(feature_start, "sha1", feature_length) == 0) { + proc_cpuinfo->features2 |= PROC_CPUINFO_FEATURE2_SHA1; + } else if (memcmp(feature_start, "sha2", feature_length) == 0) { + proc_cpuinfo->features2 |= PROC_CPUINFO_FEATURE2_SHA2; + } else { + goto unexpected; + } + break; + case 5: + if (memcmp(feature_start, "thumb", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_THUMB; + } else if (memcmp(feature_start, "26bit", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_26BIT; + } else if (memcmp(feature_start, "vfpv3", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_VFPV3; + } else if (memcmp(feature_start, "vfpv4", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_VFPV4; + } else if (memcmp(feature_start, "idiva", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_IDIVA; + } else if (memcmp(feature_start, "idivt", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_IDIVT; + } else if (memcmp(feature_start, "pmull", feature_length) == 0) { + proc_cpuinfo->features2 |= PROC_CPUINFO_FEATURE2_PMULL; + } else if (memcmp(feature_start, "idivt", feature_length) == 0) { + proc_cpuinfo->features2 |= PROC_CPUINFO_FEATURE2_CRC32; + } else { + goto unexpected; + } + break; + case 6: + if (memcmp(feature_start, "iwmmxt", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_IWMMXT; + } else if (memcmp(feature_start, "crunch", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_CRUNCH; + } else if (memcmp(feature_start, "vfpd32", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_VFPD32; + } else { + goto unexpected; + } + break; + case 7: + if (memcmp(feature_start, "thumbee", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_THUMBEE; + } else if (memcmp(feature_start, "evtstrm", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_EVTSTRM; + } else { + goto unexpected; + } + break; + case 8: + if (memcmp(feature_start, "fastmult", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_FASTMULT; + } else if (memcmp(feature_start, "vfpv3d16", feature_length) == 0) { + proc_cpuinfo->features |= PROC_CPUINFO_FEATURE_VFPV3D16; + } else { + goto unexpected; + } + break; + default: + unexpected: + cpuinfo_log_warning("unexpected /proc/cpuinfo features %.*s is ignored", + (int) feature_length, feature_start); + break; + } + feature_start = feature_end; + for (; feature_start != features_end; feature_start++) { + if (*feature_start != ' ') { + break; + } + } + } while (feature_start != feature_end); +} + +static void parse_cpu_architecture( + const char* cpu_architecture_start, + const char* cpu_architecture_end, + struct proc_cpuinfo proc_cpuinfo[restrict static 1]) +{ + const size_t cpu_architecture_length = (size_t) (cpu_architecture_end - cpu_architecture_start); + + uint32_t architecture = 0; + const char* cpu_architecture_ptr = cpu_architecture_start; + for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) { + const uint32_t digit = (*cpu_architecture_ptr) - '0'; + + /* Verify that CPU architecture is a decimal number */ + if (digit >= 10) { + break; + } + + architecture = architecture * 10 + digit; + } + + if (architecture != 0) { + proc_cpuinfo->architecture.version = architecture; + proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_ARCHITECTURE; + + for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) { + const char feature = *cpu_architecture_ptr; + switch (feature) { + case 'T': + proc_cpuinfo->architecture.flags |= PROC_CPUINFO_ARCH_T; + break; + case 'E': + proc_cpuinfo->architecture.flags |= PROC_CPUINFO_ARCH_E; + break; + case 'J': + proc_cpuinfo->architecture.flags |= PROC_CPUINFO_ARCH_J; + break; + case ' ': + case '\t': + /* Ignore whitespace at the end */ + break; + default: + cpuinfo_log_warning("skipped unknown architectural feature '%c' for ARMv%"PRIu32, + feature, architecture); + break; + } + } + } +} + +static void parse_cpu_part( + const char* cpu_part_start, + const char* cpu_part_end, + struct proc_cpuinfo proc_cpuinfo[restrict static 1]) +{ + const size_t cpu_part_length = (size_t) (cpu_part_end - cpu_part_start); + + /* + * CPU part should contain hex prefix (0x) and one to three hex digits. + * I have never seen less than three digits as a value of this field, + * but I don't think it is impossible to see such values in future. + * Value can not contain more than three hex digits since + * Main ID Register (MIDR) assigns only a 12-bit value for CPU part. + */ + if (cpu_part_length < 3 || cpu_part_length > 5) { + cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)", + (int) cpu_part_length, cpu_part_start, cpu_part_length); + return; + } + + /* Verify the presence of hex prefix */ + if (cpu_part_start[0] != '0' || cpu_part_start[1] != 'x') { + cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix", + (int) cpu_part_length, cpu_part_start); + return; + } + + /* Verify that characters after hex prefix are hexadecimal digits and decode them */ + uint32_t cpu_part = 0; + for (const char* digit_ptr = cpu_part_start + 2; digit_ptr != cpu_part_end; digit_ptr++) { + const char digit_char = *digit_ptr; + uint32_t digit; + if (digit_char >= '0' && digit_char <= '9') { + digit = digit_char - '0'; + } else if ((uint32_t) (digit_char - 'A') < 6) { + digit = 10 + (digit_char - 'A'); + } else if ((uint32_t) (digit_char - 'a') < 6) { + digit = 10 + (digit_char - 'a'); + } else { + cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character %c at offset %zu", + (int) cpu_part_length, cpu_part_start, digit_char, (size_t) (digit_ptr - cpu_part_start)); + return; + } + cpu_part = cpu_part * 16 + digit; + } + + proc_cpuinfo->part = cpu_part; + proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_PART; +} + +static void parse_cpu_implementer( + const char* cpu_implementer_start, + const char* cpu_implementer_end, + struct proc_cpuinfo proc_cpuinfo[restrict static 1]) +{ + const size_t cpu_implementer_length = cpu_implementer_end - cpu_implementer_start; + + /* + * Value should contain hex prefix (0x) and one or two hex digits. + * I have never seen single hex digit as a value of this field, + * but I don't think it is impossible in future. + * Value can not contain more than two hex digits since + * Main ID Register (MIDR) assigns only an 8-bit value for CPU implementer. + */ + switch (cpu_implementer_length) { + case 3: + case 4: + break; + default: + cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)", + (int) cpu_implementer_length, cpu_implementer_start, cpu_implementer_length); + return; + } + + /* Verify the presence of hex prefix */ + if (cpu_implementer_start[0] != '0' || cpu_implementer_start[1] != 'x') { + cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix", + (int) cpu_implementer_length, cpu_implementer_start); + return; + } + + /* Verify that characters after hex prefix are hexadecimal digits and decode them */ + uint32_t cpu_implementer = 0; + for (const char* digit_ptr = cpu_implementer_start + 2; digit_ptr != cpu_implementer_end; digit_ptr++) { + const char digit_char = *digit_ptr; + uint32_t digit; + if (digit_char >= '0' && digit_char <= '9') { + digit = digit_char - '0'; + } else if ((uint32_t) (digit_char - 'A') < 6) { + digit = 10 + (digit_char - 'A'); + } else if ((uint32_t) (digit_char - 'a') < 6) { + digit = 10 + (digit_char - 'a'); + } else { + cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c' at offset %zu", + (int) cpu_implementer_length, cpu_implementer_start, digit_char, (size_t) (digit_ptr - cpu_implementer_start)); + return; + } + cpu_implementer = cpu_implementer * 16 + digit; + } + + proc_cpuinfo->implementer = cpu_implementer; + proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_IMPLEMENTER; +} + +static void parse_cpu_variant( + const char* cpu_variant_start, + const char* cpu_variant_end, + struct proc_cpuinfo proc_cpuinfo[restrict static 1]) +{ + const size_t cpu_variant_length = cpu_variant_end - cpu_variant_start; + + /* + * Value should contain hex prefix (0x) and one hex digit. + * Value can not contain more than one hex digits since + * Main ID Register (MIDR) assigns only a 4-bit value for CPU variant. + */ + if (cpu_variant_length != 3) { + cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)", + (int) cpu_variant_length, cpu_variant_start, cpu_variant_length); + return; + } + + /* Skip if there is no hex prefix (0x) */ + if (cpu_variant_start[0] != '0' || cpu_variant_start[1] != 'x') { + cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix", + (int) cpu_variant_length, cpu_variant_start); + return; + } + + /* Check if the value after hex prefix is indeed a hex digit and decode it. */ + const char digit_char = cpu_variant_start[2]; + if ((uint32_t) (digit_char - '0') < 10) { + proc_cpuinfo->variant = (uint32_t) (digit_char - '0'); + } else if ((uint32_t) (digit_char - 'A') < 6) { + proc_cpuinfo->variant = 10 + (uint32_t) (digit_char - 'A'); + } else if ((uint32_t) (digit_char - 'a') < 6) { + proc_cpuinfo->variant = 10 + (uint32_t) (digit_char - 'a'); + } else { + cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c'", + (int) cpu_variant_length, cpu_variant_start, digit_char); + return; + } + + proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_VARIANT; +} + +static void parse_cpu_revision( + const char* cpu_revision_start, + const char* cpu_revision_end, + struct proc_cpuinfo proc_cpuinfo[restrict static 1]) +{ + uint32_t cpu_revision = 0; + for (const char* digit_ptr = cpu_revision_start; digit_ptr != cpu_revision_end; digit_ptr++) { + const uint32_t digit = (uint32_t) (*digit_ptr - '0'); + + /* Verify that the character in CPU revision is a decimal digit */ + if (digit >= 10) { + cpuinfo_log_warning("CPU revision %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu", + (int) (cpu_revision_end - cpu_revision_start), cpu_revision_start, + *digit_ptr, (size_t) (digit_ptr - cpu_revision_start)); + return; + } + + cpu_revision = cpu_revision * 10 + digit; + } + + proc_cpuinfo->revision = cpu_revision; + proc_cpuinfo->valid_mask |= PROC_CPUINFO_VALID_REVISION; +} + +/* + * Decode one of the cache-related numbers reported by Linux kernel + * for pre-ARMv7 architecture. + * An example cache-related information in /proc/cpuinfo: + * + * I size : 32768 + * I assoc : 4 + * I line length : 32 + * I sets : 256 + * D size : 16384 + * D assoc : 4 + * D line length : 32 + * D sets : 128 + * + */ +static void parse_cache_number( + const char* number_start, + const char* number_end, + const char* number_name, + uint32_t number_ptr[restrict static 1], + uint32_t valid_mask[restrict static 1], + uint32_t number_mask) +{ + uint32_t number = 0; + for (const char* digit_ptr = number_start; digit_ptr != number_end; digit_ptr++) { + const uint32_t digit = *digit_ptr - '0'; + if (digit >= 10) { + cpuinfo_log_warning("%s %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu", + number_name, (int) (number_end - number_start), number_start, + *digit_ptr, (size_t) (digit_ptr - number_start)); + return; + } + + number = number * 10 + digit; + } + + if (number == 0) { + cpuinfo_log_warning("%s %.*s in /proc/cpuinfo is ignored due to invalid value of zero reported by the kernel", + number_name, (int) (number_end - number_start), number_start); + } + + /* If the number specifies a cache line size, verify that is a reasonable power of 2 */ + if (number_mask & PROC_CPUINFO_VALID_CACHE_LINE) { + switch (number) { + case 16: + case 32: + case 64: + case 128: + break; + default: + cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected", + number_name, (int) (number_end - number_start), number_start); + } + } + + *number_ptr = number; + *valid_mask |= number_mask; +} + +/* + * Decode a single line of /proc/cpuinfo information. + * Lines have format <words-with-spaces>[ ]*:[ ]<space-separated words> + * An example of /proc/cpuinfo (from Pandaboard-ES): + * + * Processor : ARMv7 Processor rev 10 (v7l) + * processor : 0 + * BogoMIPS : 1392.74 + * + * processor : 1 + * BogoMIPS : 1363.33 + * + * Features : swp half thumb fastmult vfp edsp thumbee neon vfpv3 + * CPU implementer : 0x41 + * CPU architecture: 7 + * CPU variant : 0x2 + * CPU part : 0xc09 + * CPU revision : 10 + * + * Hardware : OMAP4 Panda board + * Revision : 0020 + * Serial : 0000000000000000 + */ +static uint32_t parse_line( + const char* line_start, + const char* line_end, + uint32_t processor_number, + struct proc_cpuinfo* proc_cpuinfo) +{ + /* Empty line. Skip. */ + if (line_start == line_end) { + return processor_number; + } + + /* Search for ':' on the line. */ + const char* separator = line_start; + for (; separator != line_end; separator++) { + if (*separator == ':') { + break; + } + } + /* Skip line if no ':' separator was found. */ + if (separator == line_end) { + cpuinfo_log_warning("Line %.*s in /proc/cpuinfo is ignored: key/value separator ':' not found", + (int) (line_end - line_start), line_start); + return processor_number; + } + + /* Skip trailing spaces in key part. */ + const char* key_end = separator; + for (; key_end != line_start; key_end--) { + if (key_end[-1] != ' ' && key_end[-1] != '\t') { + break; + } + } + /* Skip line if key contains nothing but spaces. */ + if (key_end == line_start) { + cpuinfo_log_warning("Line %.*s in /proc/cpuinfo is ignored: key contains only spaces", + (int) (line_end - line_start), line_start); + return processor_number; + } + + /* Skip leading spaces in value part. */ + const char* value_start = separator + 1; + for (; value_start != line_end; value_start++) { + if (*value_start != ' ') { + break; + } + } + /* Value part contains nothing but spaces. Skip line. */ + if (value_start == line_end) { + cpuinfo_log_warning("Line %.*s in /proc/cpuinfo is ignored: value contains only spaces", + (int) (line_end - line_start), line_start); + return processor_number; + } + + /* Skip trailing spaces in value part (if any) */ + const char* value_end = line_end; + for (; value_end != separator; value_end--) { + if (value_end[-1] != ' ') { + break; + } + } + + const size_t key_length = key_end - line_start; + switch (key_length) { + case 6: + if (memcmp(line_start, "I size", key_length) == 0) { + parse_cache_number(value_start, value_end, + "instruction cache size", &proc_cpuinfo->cache.i_size, + &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_ICACHE_SIZE); + } else if (memcmp(line_start, "I sets", key_length) == 0) { + parse_cache_number(value_start, value_end, + "instruction cache sets", &proc_cpuinfo->cache.i_sets, + &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_ICACHE_SETS); + } else if (memcmp(line_start, "D size", key_length) == 0) { + parse_cache_number(value_start, value_end, + "data cache size", &proc_cpuinfo->cache.d_size, + &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_DCACHE_SIZE); + } else if (memcmp(line_start, "D sets", key_length) == 0) { + parse_cache_number(value_start, value_end, + "data cache sets", &proc_cpuinfo->cache.d_sets, + &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_DCACHE_SETS); + } else if (memcmp(line_start, "Serial", key_length) == 0) { + /* Usually contains just zeros, useless */ + } else { + goto unknown; + } + break; + case 7: + if (memcmp(line_start, "I assoc", key_length) == 0) { + parse_cache_number(value_start, value_end, + "instruction cache associativity", &proc_cpuinfo->cache.i_assoc, + &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_ICACHE_WAYS); + } else if (memcmp(line_start, "D assoc", key_length) == 0) { + parse_cache_number(value_start, value_end, + "data cache associativity", &proc_cpuinfo->cache.d_assoc, + &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_DCACHE_WAYS); + } else { + goto unknown; + } + break; + case 8: + if (memcmp(line_start, "CPU part", key_length) == 0) { + parse_cpu_part(value_start, value_end, proc_cpuinfo); + } else if (memcmp(line_start, "Features", key_length) == 0) { + parse_features(value_start, value_end, proc_cpuinfo); + } else if (memcmp(line_start, "BogoMIPS", key_length) == 0) { + /* BogoMIPS is useless, don't parse */ + } else if (memcmp(line_start, "Hardware", key_length) == 0) { + /* TODO: parse to extract SoC name */ + } else if (memcmp(line_start, "Revision", key_length) == 0) { + /* Board revision, no use for now */ + } else { + goto unknown; + } + break; + case 9: + if (memcmp(line_start, "processor", key_length) == 0) { + const uint32_t new_processor_number = + parse_processor_number(value_start, value_end, proc_cpuinfo); + const uint32_t new_processors_count = new_processor_number + 1; + if (new_processors_count <= processor_number && processor_number != 0) { + cpuinfo_log_warning("ignored unexpectedly low processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", + new_processor_number, processor_number); + } else { + if (new_processors_count > processor_number + 1) { + cpuinfo_log_info("unexpectedly high processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", + new_processor_number, processor_number); + return new_processors_count; + } + return new_processors_count; + } + } else if (memcmp(line_start, "Processor", key_length) == 0) { + /* TODO: parse to fix misreported architecture, similar to Android's cpufeatures */ + } else { + goto unknown; + } + break; + case 11: + if (memcmp(line_start, "CPU variant", key_length) == 0) { + parse_cpu_variant(value_start, value_end, proc_cpuinfo); + } else { + goto unknown; + } + break; + case 12: + if (memcmp(line_start, "CPU revision", key_length) == 0) { + parse_cpu_revision(value_start, value_end, proc_cpuinfo); + } else { + goto unknown; + } + break; + case 13: + if (memcmp(line_start, "I line length", key_length) == 0) { + parse_cache_number(value_start, value_end, + "instruction cache line size", &proc_cpuinfo->cache.i_line_length, + &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_ICACHE_LINE); + } else if (memcmp(line_start, "D line length", key_length) == 0) { + parse_cache_number(value_start, value_end, + "data cache line size", &proc_cpuinfo->cache.d_line_length, + &proc_cpuinfo->valid_mask, PROC_CPUINFO_VALID_DCACHE_LINE); + } else { + goto unknown; + } + break; + case 15: + if (memcmp(line_start, "CPU implementer", key_length) == 0) { + parse_cpu_implementer(value_start, value_end, proc_cpuinfo); + } else if (memcmp(line_start, "CPU implementor", key_length) == 0) { + parse_cpu_implementer(value_start, value_end, proc_cpuinfo); + } else { + goto unknown; + } + break; + case 16: + if (memcmp(line_start, "CPU architecture", key_length) == 0) { + parse_cpu_architecture(value_start, value_end, proc_cpuinfo); + } else { + goto unknown; + } + break; + default: + unknown: + cpuinfo_log_debug("unknown /proc/cpuinfo key: %.*s", (int) key_length, line_start); + + } + return processor_number; +} + +struct proc_cpuinfo* cpuinfo_arm_linux_parse_proc_cpuinfo(const char* filename, uint32_t processors_count_ptr[restrict static 1]) { + int file = -1; + struct proc_cpuinfo* processors = NULL; + struct proc_cpuinfo* result = NULL; + uint32_t processors_capacity = 8; + uint32_t processors_count = 0; + char buffer[BUFFER_SIZE]; + + processors = malloc(processors_capacity * sizeof(struct proc_cpuinfo)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for /proc/cpuinfo data", + processors_capacity * sizeof(struct proc_cpuinfo)); + goto cleanup; + } + + cpuinfo_log_debug("parsing cpu info from file %s", filename); + file = open(filename, O_RDONLY); + if (file == -1) { + cpuinfo_log_error("failed to open %s: %s", filename, strerror(errno)); + goto cleanup; + } + + size_t position = 0; + const char* buffer_end = &buffer[BUFFER_SIZE]; + char* data_start = buffer; + ssize_t bytes_read; + do { + bytes_read = read(file, data_start, (size_t) (buffer_end - data_start)); + if (bytes_read < 0) { + cpuinfo_log_error("failed to read file %s at position %zu: %s", filename, position, strerror(errno)); + goto cleanup; + } + + position += (size_t) bytes_read; + const char* data_end = data_start + (size_t) bytes_read; + const char* line_start = buffer; + + if (bytes_read == 0) { + /* No more data in the file: process the remaining text in the buffer as a single entry */ + const char* line_end = data_end; + const uint32_t new_processors_count = + parse_line(line_start, line_end, processors_count, &processors[processors_count - 1]); + if (new_processors_count > processors_capacity) { + processors = realloc(processors, new_processors_count * sizeof(struct proc_cpuinfo)); + } + memset(&processors[processors_count], 0, (new_processors_count - processors_count) * sizeof(struct proc_cpuinfo)); + processors_count = processors_capacity = new_processors_count; + } else { + const char* line_end; + do { + /* Find the end of the entry, as indicated by a comma (',') */ + for (line_end = line_start; line_end != data_end; line_end++) { + if (*line_end == '\n') { + break; + } + } + + /* + * If we located separator at the end of the entry, parse it. + * Otherwise, there may be more data at the end; read the file once again. + */ + if (line_end != data_end) { + const uint32_t new_processors_count = + parse_line(line_start, line_end, processors_count, &processors[processors_count - 1]); + if (new_processors_count > processors_capacity) { + processors = realloc(processors, new_processors_count * sizeof(struct proc_cpuinfo)); + } + memset(&processors[processors_count], 0, (new_processors_count - processors_count) * sizeof(struct proc_cpuinfo)); + processors_count = processors_capacity = new_processors_count; + line_start = line_end + 1; + } + } while (line_end != data_end); + + /* Move remaining partial line data at the end to the beginning of the buffer */ + const size_t line_length = (size_t) (line_end - line_start); + memmove(buffer, line_start, line_length); + data_start = &buffer[line_length]; + } + + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for /proc/cpuinfo data", + processors_capacity * sizeof(struct proc_cpuinfo)); + goto cleanup; + } + } while (bytes_read != 0); + + + uint32_t last_i = 0; + for (uint32_t i = processors_count; i != 0; i--) { + if ((processors[i - 1].valid_mask & PROC_CPUINFO_VALID_INFO) == PROC_CPUINFO_VALID_INFO) { + last_i = i; + break; + } + } + + if (last_i == 0) { + cpuinfo_log_error("none of the %"PRIu32" processors reported in /proc/cpuinfo were successfully parsed", + processors_count); + goto cleanup; + } + + for (uint32_t i = last_i - 1; i < processors_count; i++) { + processors[i] = processors[last_i - 1]; + } + for (uint32_t i = last_i; i != 0; i--) { + if ((processors[i - 1].valid_mask & PROC_CPUINFO_VALID_INFO) == PROC_CPUINFO_VALID_INFO) { + last_i = i; + } else { + processors[i - 1] = processors[last_i - 1]; + } + } + + /* Commit */ + result = processors; + processors = NULL; + *processors_count_ptr = processors_count; + +cleanup: + free(processors); + processors = NULL; + if (file != -1) { + close(file); + file = -1; + } + return result; +} diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c new file mode 100644 index 0000000..53f3e1e --- /dev/null +++ b/src/arm/linux/init.c @@ -0,0 +1,177 @@ +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include <cpuinfo.h> +#include <arm/linux/api.h> +#include <arm/api.h> +#include <linux/api.h> +#include <api.h> +#include <log.h> + + +struct cpuinfo_arm_isa cpuinfo_isa = { 0 }; + + +void cpuinfo_arm_linux_init(void) { + uint32_t proc_cpuinfo_count = 0; + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_cache* l1i = NULL; + struct cpuinfo_cache* l1d = NULL; + struct cpuinfo_cache* l2 = NULL; + uint32_t processors_count = 0; + uint32_t l1i_count = 0; + uint32_t l1d_count = 0; + uint32_t l2_count = 0; + + struct proc_cpuinfo* proc_cpuinfo_entries = cpuinfo_arm_linux_parse_proc_cpuinfo("/proc/cpuinfo", &proc_cpuinfo_count); + + if (proc_cpuinfo_count != 0) { + cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( + proc_cpuinfo_entries, proc_cpuinfo_count, &cpuinfo_isa); + processors_count = proc_cpuinfo_count; + + processors = calloc(processors_count, sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + proc_cpuinfo_count * sizeof(struct cpuinfo_processor), proc_cpuinfo_count); + goto cleanup; + } + for (uint32_t i = 0; i < proc_cpuinfo_count; i++) { + cpuinfo_arm_decode_vendor_uarch( + proc_cpuinfo_entries[i].implementer, + proc_cpuinfo_entries[i].part, + !!(proc_cpuinfo_entries[i].features & PROC_CPUINFO_FEATURE_VFPV4), + &processors[i].vendor, &processors[i].uarch); + processors[i].topology = (struct cpuinfo_topology) { + .thread_id = 0, + .core_id = i, + .package_id = 0, + .linux_id = (int) i + }; + } + + /* + * Assumptions: + * - At most 2 cache levels + * - Either all or no cores have L1I/L1D/L2 cache. + * - If present, L1 cache is private to the core. + * - If present, L2 cache is shared between all cores. + */ + struct cpuinfo_cache private_l1i = { 0 }; + struct cpuinfo_cache private_l1d = { 0 }; + struct cpuinfo_cache shared_l2 = { 0 }; + cpuinfo_arm_decode_cache( + processors[0].uarch, + proc_cpuinfo_count, + proc_cpuinfo_entries[0].part, + proc_cpuinfo_entries[0].architecture.version, + &private_l1i, &private_l1d, &shared_l2); + if (private_l1i.size != 0) { + l1i_count = proc_cpuinfo_count; + } + if (private_l1d.size != 0) { + l1d_count = proc_cpuinfo_count; + if (shared_l2.size != 0) { + l2_count = 1; + } + } + + cpuinfo_log_info("detected %"PRIu32" L1I caches", l1i_count); + cpuinfo_log_info("detected %"PRIu32" L1D caches", l1d_count); + cpuinfo_log_info("detected %"PRIu32" L2 caches", l2_count); + + if (l1i_count != 0) { + l1i = malloc(l1i_count * sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + l1i_count * sizeof(struct cpuinfo_cache), l1i_count); + goto cleanup; + } + for (uint32_t i = 0; i < l1i_count; i++) { + /* L1I reported in /proc/cpuinfo overrides defaults */ + if ((proc_cpuinfo_entries[i].valid_mask & PROC_CPUINFO_VALID_ICACHE) == PROC_CPUINFO_VALID_ICACHE) { + l1i[i] = (struct cpuinfo_cache) { + .size = proc_cpuinfo_entries[i].cache.i_size, + .associativity = proc_cpuinfo_entries[i].cache.i_assoc, + .sets = proc_cpuinfo_entries[i].cache.i_sets, + .partitions = 1, + .line_size = proc_cpuinfo_entries[i].cache.i_line_length + }; + } else { + cpuinfo_arm_decode_cache( + processors[i].uarch, + proc_cpuinfo_count, + proc_cpuinfo_entries[i].part, + proc_cpuinfo_entries[i].architecture.version, + &l1i[i], &private_l1d, &shared_l2); + } + l1i[i].thread_start = i; + l1i[i].thread_count = 1; + } + } + if (l1d_count != 0) { + l1d = malloc(l1d_count * sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + l1d_count * sizeof(struct cpuinfo_cache), l1d_count); + goto cleanup; + } + for (uint32_t i = 0; i < l1d_count; i++) { + /* L1D reported in /proc/cpuinfo overrides defaults */ + if ((proc_cpuinfo_entries[i].valid_mask & PROC_CPUINFO_VALID_DCACHE) == PROC_CPUINFO_VALID_DCACHE) { + l1d[i] = (struct cpuinfo_cache) { + .size = proc_cpuinfo_entries[i].cache.d_size, + .associativity = proc_cpuinfo_entries[i].cache.d_assoc, + .sets = proc_cpuinfo_entries[i].cache.d_sets, + .partitions = 1, + .line_size = proc_cpuinfo_entries[i].cache.d_line_length + }; + } else { + cpuinfo_arm_decode_cache( + processors[i].uarch, + proc_cpuinfo_count, + proc_cpuinfo_entries[i].part, + proc_cpuinfo_entries[i].architecture.version, + &private_l1i, &l1d[i], &shared_l2); + } + l1d[i].thread_start = i; + l1d[i].thread_count = 1; + } + } + if (l2_count != 0) { + l2 = malloc(l2_count * sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + goto cleanup; + } + /* L2 cache is never reported in /proc/cpuinfo; use defaults */ + *l2 = shared_l2; + l2->thread_start = 0; + l2->thread_count = proc_cpuinfo_count; + } + } + + /* Commit */ + cpuinfo_processors = processors; + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + + cpuinfo_processors_count = processors_count; + cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + + processors = NULL; + l1i = l1d = l2 = NULL; + +cleanup: + free(processors); + free(l1i); + free(l1d); + free(l2); + free(proc_cpuinfo_entries); +} diff --git a/src/arm/linux/isa.c b/src/arm/linux/isa.c new file mode 100644 index 0000000..2a2af3b --- /dev/null +++ b/src/arm/linux/isa.c @@ -0,0 +1,153 @@ +#include <stdint.h> + +#include <arm/linux/api.h> +#include <arm/linux/cp.h> +#include <log.h> + + +void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( + const struct proc_cpuinfo proc_cpuinfo[restrict static 1], + uint32_t proc_cpuinfo_count, + struct cpuinfo_arm_isa isa[restrict static 1]) +{ + const uint32_t cpu_part = proc_cpuinfo->part; + const uint32_t cpu_implementer = proc_cpuinfo->implementer; + uint32_t architecture = proc_cpuinfo->architecture.version; + if (architecture >= 8) { + /* + * ARMv7 code running on ARMv8: IDIV, VFP, NEON are always supported, + * but only ARMv8 optional features are reported. + */ + isa->armv5e = true; + isa->armv6 = true; + isa->armv6k = true; + isa->armv7 = true; + isa->armv7mp = true; + isa->thumb = true; + isa->thumb2 = true; + isa->vfpv3 = true; + isa->d32 = true; + isa->fp16 = true; + isa->fma = true; + isa->neon = true; + } else { + /* ARMv7 or lower: use feature flags to detect optional features */ + + /* + * ARM11 (ARM 1136/1156/1176/11 MPCore) processors can report v7 architecture + * even though they support only ARMv6 instruction set. + * Detecting this situation by CPU implementer == 'A' (ARM) and CPU part 0xBXX. + */ + if (architecture == 7 && cpu_implementer == 'A' && (cpu_part & 0xF00) == 0xB00) { + cpuinfo_log_warning("Kernel-reported architecture ARMv7 ignored due to mismatch with processor microarchitecture (ARM11)"); + architecture = 6; + } + + const uint32_t features = proc_cpuinfo->features; + if (architecture >= 6 || (features & PROC_CPUINFO_FEATURE_EDSP) || (proc_cpuinfo->architecture.flags & PROC_CPUINFO_ARCH_E)) { + cpuinfo_isa.armv5e = true; + } + if (architecture >= 6) { + cpuinfo_isa.armv6 = true; + } + if (architecture >= 7) { + cpuinfo_isa.armv6k = true; + cpuinfo_isa.armv7 = true; + + if (proc_cpuinfo_count > 1) { + cpuinfo_isa.armv7mp = true; + } + } + + if (features & PROC_CPUINFO_FEATURE_IWMMXT) { + const uint32_t wcid = read_wcid(); + const uint32_t coprocessor_type = (wcid >> 8) & UINT32_C(0xFF); + if (coprocessor_type >= 0x10) { + cpuinfo_isa.wmmx = true; + if (coprocessor_type >= 0x20) { + cpuinfo_isa.wmmx2 = true; + } + } else { + cpuinfo_log_warning("WMMX ISA disabled: OS reported iwmmxt feature, " + "but WCID coprocessor type 0x%"PRIx32" indicates no WMMX support", + coprocessor_type); + } + } + + if ((features & PROC_CPUINFO_FEATURE_THUMB) || (proc_cpuinfo->architecture.flags & PROC_CPUINFO_ARCH_T)) { + cpuinfo_isa.thumb = true; + + /* + * There is no separate feature flag for Thumb 2. + * All ARMv7 processors and ARM 1156 (CPU part 0xB56) support Thumb 2. + */ + if (architecture >= 7 || (cpu_implementer == 'A' && cpu_part == 0xB56)) { + cpuinfo_isa.thumb2 = true; + } + } + if (features & PROC_CPUINFO_FEATURE_THUMBEE) { + cpuinfo_isa.thumbee = true; + } + if ((features & PROC_CPUINFO_FEATURE_JAVA) || (proc_cpuinfo->architecture.flags & PROC_CPUINFO_ARCH_J)) { + cpuinfo_isa.jazelle = true; + } + + if ((features & PROC_CPUINFO_FEATURE_IDIV) == PROC_CPUINFO_FEATURE_IDIV) { + cpuinfo_isa.idiv = true; + } + const uint32_t vfp_mask = \ + PROC_CPUINFO_FEATURE_VFP | PROC_CPUINFO_FEATURE_VFPV3 | PROC_CPUINFO_FEATURE_VFPV3D16 | \ + PROC_CPUINFO_FEATURE_VFPD32 | PROC_CPUINFO_FEATURE_VFPV4 | PROC_CPUINFO_FEATURE_NEON; + if (features & vfp_mask) { + const uint32_t vfpv3_mask = PROC_CPUINFO_FEATURE_VFPV3 | PROC_CPUINFO_FEATURE_VFPV3D16 | \ + PROC_CPUINFO_FEATURE_VFPD32 | PROC_CPUINFO_FEATURE_VFPV4 | PROC_CPUINFO_FEATURE_NEON; + if (architecture >= 7 | (features & vfpv3_mask)) { + cpuinfo_isa.vfpv3 = true; + + const uint32_t d32_mask = PROC_CPUINFO_FEATURE_VFPD32 | PROC_CPUINFO_FEATURE_NEON; + if (features & d32_mask) { + cpuinfo_isa.d32 = true; + } + } else { + const uint32_t fpsid = read_fpsid(); + const uint32_t subarchitecture = (fpsid >> 16) & UINT32_C(0x7F); + if (subarchitecture >= 0x01) { + cpuinfo_isa.vfpv2 = true; + } + } + } + if (features & PROC_CPUINFO_FEATURE_NEON) { + cpuinfo_isa.neon = true; + } + + /* + * There is no separate feature flag for FP16 support. + * VFPv4 implies VFPv3-FP16 support (and in practice, NEON-HP as well). + * Additionally, ARM Cortex-A9 (CPU part 0xC09) supports FP16. + */ + if ((features & PROC_CPUINFO_FEATURE_VFPV4) || (cpu_implementer == 'A' && cpu_part == 0xC09)) { + cpuinfo_isa.fp16 = true; + } + + if (features & PROC_CPUINFO_FEATURE_VFPV4) { + cpuinfo_isa.fma = true; + } + } + + const uint32_t features2 = proc_cpuinfo->features2; + if (features2 & PROC_CPUINFO_FEATURE2_AES) { + cpuinfo_isa.aes = true; + } + if (features2 & PROC_CPUINFO_FEATURE2_PMULL) { + cpuinfo_isa.pmull = true; + } + if (features2 & PROC_CPUINFO_FEATURE2_SHA1) { + cpuinfo_isa.sha1 = true; + } + if (features2 & PROC_CPUINFO_FEATURE2_SHA2) { + cpuinfo_isa.sha2 = true; + } + if (features2 & PROC_CPUINFO_FEATURE2_CRC32) { + cpuinfo_isa.crc32 = true; + } +} diff --git a/src/arm/uarch.c b/src/arm/uarch.c new file mode 100644 index 0000000..5dbd4ec --- /dev/null +++ b/src/arm/uarch.c @@ -0,0 +1,143 @@ +#include <stdint.h> + +#include <arm/api.h> +#include <log.h> + + +void cpuinfo_arm_decode_vendor_uarch( + uint32_t cpu_implementer, + uint32_t cpu_part, + bool has_vfpv4, + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]) +{ + switch (cpu_implementer) { + case 'A': + *vendor = cpuinfo_vendor_arm; + switch (cpu_part) { + case 0xC05: + *uarch = cpuinfo_uarch_cortex_a5; + break; + case 0xC07: + *uarch = cpuinfo_uarch_cortex_a7; + break; + case 0xC08: + *uarch = cpuinfo_uarch_cortex_a8; + break; + case 0xC09: + *uarch = cpuinfo_uarch_cortex_a9; + break; + case 0xC0C: + *uarch = cpuinfo_uarch_cortex_a12; + break; + case 0xC0E: + *uarch = cpuinfo_uarch_cortex_a17; + break; + case 0xC0F: + *uarch = cpuinfo_uarch_cortex_a15; + break; + case 0xD01: + *uarch = cpuinfo_uarch_cortex_a32; + break; + case 0xD03: + *uarch = cpuinfo_uarch_cortex_a53; + break; + case 0xD04: + *uarch = cpuinfo_uarch_cortex_a35; + break; + case 0xD07: + *uarch = cpuinfo_uarch_cortex_a57; + break; + case 0xD08: + *uarch = cpuinfo_uarch_cortex_a72; + break; + case 0xD09: + *uarch = cpuinfo_uarch_cortex_a73; + break; + default: + switch (cpu_part >> 8) { + case 7: + *uarch = cpuinfo_uarch_arm7; + break; + case 9: + *uarch = cpuinfo_uarch_arm9; + break; + case 11: + *uarch = cpuinfo_uarch_arm11; + break; + default: + cpuinfo_log_warning("unknown ARM CPU part 0x%03"PRIx32" ignored", cpu_part); + } + } + break; + case 'i': + *vendor = cpuinfo_vendor_intel; + switch (cpu_part >> 8) { + case 2: /* PXA 210/25X/26X */ + case 4: /* PXA 27X */ + case 6: /* PXA 3XX */ + *uarch = cpuinfo_uarch_xscale; + break; + default: + cpuinfo_log_warning("unknown Intel CPU part 0x%03"PRIx32" ignored", cpu_part); + } + break; + case 'N': + *vendor = cpuinfo_vendor_nvidia; + switch (cpu_part) { + case 0x000: + *uarch = cpuinfo_uarch_denver; + break; + default: + cpuinfo_log_warning("unknown nVidia CPU part 0x%03"PRIx32" ignored", cpu_part); + } + break; + case 'Q': + *vendor = cpuinfo_vendor_qualcomm; + switch (cpu_part) { + case 0x00F: + /* Mostly Scorpions, but some Cortex A5 may report this value as well */ + if (has_vfpv4) { + /* Unlike Scorpion, Cortex-A5 comes with VFPv4 */ + *vendor = cpuinfo_vendor_arm; + *uarch = cpuinfo_uarch_cortex_a5; + } else { + *uarch = cpuinfo_uarch_scorpion; + } + break; + case 0x02D: /* Dual-core Scorpions */ + *uarch = cpuinfo_uarch_scorpion; + break; + case 0x04D: /* Dual-core Krait */ + case 0x06F: /* Quad-core Krait */ + *uarch = cpuinfo_uarch_krait; + break; + case 0x205: /* Low-power Kryo "Silver" */ + case 0x211: /* High-performance Kryo "Gold" */ + case 0x800: /* Low-power Kryo 800 */ + case 0x801: /* High-performance Kryo 800 */ + *uarch = cpuinfo_uarch_kryo; + break; + default: + cpuinfo_log_warning("unknown Qualcomm CPU part 0x%03"PRIx32" ignored", cpu_part); + } + break; + case 'S': + *vendor = cpuinfo_vendor_samsung; + switch (cpu_part) { + case 0x001: + *uarch = cpuinfo_uarch_mongoose; + break; + default: + cpuinfo_log_warning("unknown Samsung CPU part 0x%03"PRIx32" ignored", cpu_part); + } + break; + case 'V': + *vendor = cpuinfo_vendor_marvell; + cpuinfo_log_warning("unknown Marvell CPU part 0x%03"PRIx32" ignored", cpu_part); + break; + default: + cpuinfo_log_warning("unknown CPU implementer '%c' (0x%02"PRIx32") with CPU part 0x%03"PRIx32" ignored", + (char) cpu_implementer, cpu_implementer, cpu_part); + } +} @@ -22,6 +22,12 @@ void CPUINFO_ABI cpuinfo_initialize(void) { #else #error Unsupported target OS #endif +#elif CPUINFO_ARCH_ARM + #if defined(__linux__) + pthread_once(&init_guard, &cpuinfo_arm_linux_init); + #else + #error Unsupported target OS + #endif #else #error Unsupported target architecture #endif diff --git a/src/linux/cpuset.c b/src/linux/cpuset.c index 9c01a8e..130dc59 100644 --- a/src/linux/cpuset.c +++ b/src/linux/cpuset.c @@ -175,9 +175,9 @@ bool cpuinfo_linux_parse_cpuset(const char* filename, cpu_set_t* cpuset) { } } while (entry_end != data_end); - /* Copy remaining partial entry data at the end to the beginning of the buffer */ + /* Move remaining partial entry data at the end to the beginning of the buffer */ const size_t entry_length = (size_t) (entry_end - entry_start); - memcpy(buffer, entry_start, entry_length); + memmove(buffer, entry_start, entry_length); data_start = &buffer[entry_length]; } } while (bytes_read != 0); diff --git a/tools/cpu-info.c b/tools/cpu-info.c index 5d54f6c..a464b1d 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -162,6 +162,9 @@ int main(int argc, char** argv) { case cpuinfo_uarch_jaguar: printf("uArch: Jaguar/Puma\n"); break; + case cpuinfo_uarch_xscale: + printf("uArch: XScale\n"); + break; case cpuinfo_uarch_arm7: printf("uArch: ARM7\n"); break; @@ -219,6 +222,9 @@ int main(int argc, char** argv) { case cpuinfo_uarch_kryo: printf("uArch: Kryo\n"); break; + case cpuinfo_uarch_denver: + printf("uArch: Denver\n"); + break; case cpuinfo_uarch_mongoose: printf("uArch: Mongoose\n"); break; diff --git a/tools/isa-info.c b/tools/isa-info.c index 60adb5a..76236db 100644 --- a/tools/isa-info.c +++ b/tools/isa-info.c @@ -121,4 +121,37 @@ int main(int argc, char** argv) { #endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */ +#if CPUINFO_ARCH_ARM + printf("Instruction sets:\n"); + printf("\tThumb: %s\n", cpuinfo_isa.thumb ? "yes" : "no"); + printf("\tThumb 2: %s\n", cpuinfo_isa.thumb2 ? "yes" : "no"); + printf("\tThumb EE: %s\n", cpuinfo_isa.thumbee ? "yes" : "no"); + printf("\tJazelle: %s\n", cpuinfo_isa.jazelle ? "yes" : "no"); + printf("\tARMv5E: %s\n", cpuinfo_isa.armv5e ? "yes" : "no"); + printf("\tARMv6: %s\n", cpuinfo_isa.armv6 ? "yes" : "no"); + printf("\tARMv6-K: %s\n", cpuinfo_isa.armv6k ? "yes" : "no"); + printf("\tARMv7: %s\n", cpuinfo_isa.armv7 ? "yes" : "no"); + printf("\tARMv7 MP: %s\n", cpuinfo_isa.armv7mp ? "yes" : "no"); + printf("\tIDIV: %s\n", cpuinfo_isa.idiv ? "yes" : "no"); + + printf("Floating-Point support:\n"); + printf("\tVFPv2: %s\n", cpuinfo_isa.vfpv2 ? "yes" : "no"); + printf("\tVFPv3: %s\n", cpuinfo_isa.vfpv3 ? "yes" : "no"); + printf("\tD32: %s\n", cpuinfo_isa.d32 ? "yes" : "no"); + printf("\tFP16: %s\n", cpuinfo_isa.fp16 ? "yes" : "no"); + printf("\tFMA: %s\n", cpuinfo_isa.fma ? "yes" : "no"); + + printf("SIMD extensions:\n"); + printf("\tWMMX: %s\n", cpuinfo_isa.wmmx ? "yes" : "no"); + printf("\tWMMX 2: %s\n", cpuinfo_isa.wmmx2 ? "yes" : "no"); + printf("\tNEON: %s\n", cpuinfo_isa.neon ? "yes" : "no"); + + printf("Cryptography extensions:\n"); + printf("\tAES: %s\n", cpuinfo_isa.aes ? "yes" : "no"); + printf("\tSHA1: %s\n", cpuinfo_isa.sha1 ? "yes" : "no"); + printf("\tSHA2: %s\n", cpuinfo_isa.sha2 ? "yes" : "no"); + printf("\tPMULL: %s\n", cpuinfo_isa.pmull ? "yes" : "no"); + printf("\tCRC32: %s\n", cpuinfo_isa.crc32 ? "yes" : "no"); +#endif /* CPUINFO_ARCH_ARM */ + } |