diff options
author | Marat Dukhan <maratek@gmail.com> | 2017-03-04 01:51:42 -0500 |
---|---|---|
committer | Marat Dukhan <maratek@gmail.com> | 2017-03-04 01:51:42 -0500 |
commit | 3045d4f9c3d242bfa726c1b0f6e6e58da53e7ad4 (patch) | |
tree | 293787c36cbd72fa54468c34ce4a81fb850822f7 | |
parent | 1747f8607a8c0fc408043b3046953cd439e4bcca (diff) | |
download | cpuinfo-3045d4f9c3d242bfa726c1b0f6e6e58da53e7ad4.tar.gz |
Working x86-64 + Linux version
-rwxr-xr-x | configure.py | 16 | ||||
-rw-r--r-- | include/cpuinfo.h | 307 | ||||
-rw-r--r-- | src/api.h | 5 | ||||
-rw-r--r-- | src/cache.c | 41 | ||||
-rw-r--r-- | src/init.c | 15 | ||||
-rw-r--r-- | src/linux/api.h | 5 | ||||
-rw-r--r-- | src/linux/cpuset.c | 191 | ||||
-rw-r--r-- | src/mach/api.h | 1 | ||||
-rw-r--r-- | src/x86/api.h | 7 | ||||
-rw-r--r-- | src/x86/linux/init.c | 383 | ||||
-rw-r--r-- | src/x86/mach/init.c | 10 | ||||
-rw-r--r-- | src/x86/topology.c | 18 |
12 files changed, 860 insertions, 139 deletions
diff --git a/configure.py b/configure.py index ecaf559..717daac 100755 --- a/configure.py +++ b/configure.py @@ -10,13 +10,17 @@ parser.add_argument("--log", dest="log_level", def main(args): options = parser.parse_args(args) build = confu.Build.from_options(options) - build.add_macro("CPUINFO_LOG_LEVEL", - value={"none": 0, "error": 1, "warning": 2, "info": 3, "debug": 4}[options.log_level]) + + macros = { + "CPUINFO_LOG_LEVEL": {"none": 0, "error": 1, "warning": 2, "info": 3, "debug": 4}[options.log_level] + } + if build.target.is_linux: + macros["_GNU_SOURCE"] = 1 build.export_cpath("include", ["cpuinfo.h"]) - with build.options(source_dir="src", extra_include_dirs="src"): - sources = ["init.c", "log.c"] + with build.options(source_dir="src", macros=macros, extra_include_dirs="src"): + sources = ["init.c", "cache.c", "log.c"] if build.target.is_x86_64: sources += [ "x86/init.c", "x86/info.c", "x86/vendor.c", "x86/uarch.c", "x86/topology.c", @@ -24,9 +28,13 @@ def main(args): ] if build.target.is_macos: sources += ["x86/mach/init.c"] + elif build.target.is_linux: + sources += ["x86/linux/init.c"] sources.append("x86/isa.c" if not build.target.is_nacl else "x86/nacl/isa.c") if build.target.is_macos: sources += ["mach/topology.c"] + if build.target.is_linux: + sources += ["linux/cpuset.c"] build.static_library("cpuinfo", map(build.cc, sources)) with build.options(source_dir="tools", deps=build): diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 904b681..8238f08 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -186,17 +186,43 @@ #define CPUINFO_CACHE_INCLUSIVE 0x00000002 #define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004 +enum cpuinfo_cache_level { + cpuinfo_cache_level_1i = 0, + cpuinfo_cache_level_1d = 1, + cpuinfo_cache_level_2 = 2, + cpuinfo_cache_level_3 = 3, + cpuinfo_cache_level_4 = 4, + cpuinfo_cache_level_max = 5, +}; + struct cpuinfo_cache { + /** Cache size in bytes */ uint32_t size; + /** Number of ways of associativity */ uint32_t associativity; + /** Number of sets */ uint32_t sets; + /** Number of partitions */ uint32_t partitions; + /** Line size in bytes */ uint32_t line_size; + /** + * Binary characteristics of the cache (unified cache, inclusive cache, cache with complex indexing). + * + * @see CPUINFO_CACHE_UNIFIED, CPUINFO_CACHE_INCLUSIVE, CPUINFO_CACHE_COMPLEX_INDEXING + */ uint32_t flags; + /** Index of the first logical processor that shares this cache */ uint32_t thread_start; + /** Number of logical processors that share this cache */ uint32_t thread_count; }; +struct cpuinfo_caches { + uint32_t count; + const struct cpuinfo_cache* instances; +}; + struct cpuinfo_trace_cache { uint32_t uops; uint32_t associativity; @@ -215,241 +241,288 @@ struct cpuinfo_tlb { uint64_t pages; }; -/** @name Processor vendor information */ +/** Vendor of processor core design */ enum cpuinfo_vendor { - /** @brief Processor vendor is not known to the library, or the library failed to get vendor information from the OS. */ + /** Processor vendor is not known to the library, or the library failed to get vendor information from the OS. */ cpuinfo_vendor_unknown = 0, /* Active vendors of modern CPUs */ - /** @brief Intel Corporation. Vendor of x86, x86-64, IA64, and ARM processor microarchitectures. */ - /** @details Sold its ARM design subsidiary in 2006. The last ARM processor design was released in 2004. */ + /** + * Intel Corporation. Vendor of x86, x86-64, IA64, and ARM processor microarchitectures. + * + * Sold its ARM design subsidiary in 2006. The last ARM processor design was released in 2004. + */ cpuinfo_vendor_intel = 1, - /** @brief Advanced Micro Devices, Inc. Vendor of x86 and x86-64 processor microarchitectures. */ + /** Advanced Micro Devices, Inc. Vendor of x86 and x86-64 processor microarchitectures. */ cpuinfo_vendor_amd = 2, - /** @brief ARM Holdings plc. Vendor of ARM and ARM64 processor microarchitectures. */ + /** ARM Holdings plc. Vendor of ARM and ARM64 processor microarchitectures. */ cpuinfo_vendor_arm = 3, - /** @brief Qualcomm Incorporated. Vendor of ARM and ARM64 processor microarchitectures. */ + /** Qualcomm Incorporated. Vendor of ARM and ARM64 processor microarchitectures. */ cpuinfo_vendor_qualcomm = 4, - /** @brief Apple Inc. Vendor of ARM and ARM64 processor microarchitectures. */ + /** Apple Inc. Vendor of ARM and ARM64 processor microarchitectures. */ cpuinfo_vendor_apple = 5, - /** @brief Samsung Electronics Co., Ltd. Vendir if ARM64 processor microarchitectures. */ + /** Samsung Electronics Co., Ltd. Vendir if ARM64 processor microarchitectures. */ cpuinfo_vendor_samsung = 6, - /** @brief nVidia Corporation. Vendor of ARM64-compatible processor microarchitectures. */ + /** nVidia Corporation. Vendor of ARM64-compatible processor microarchitectures. */ cpuinfo_vendor_nvidia = 7, - /** @brief MIPS Technologies, Inc. Vendor of MIPS processor microarchitectures. */ + /** MIPS Technologies, Inc. Vendor of MIPS processor microarchitectures. */ cpuinfo_vendor_mips = 8, - /** @brief International Business Machines Corporation. Vendor of PowerPC processor microarchitectures. */ + /** International Business Machines Corporation. Vendor of PowerPC processor microarchitectures. */ cpuinfo_vendor_ibm = 9, - /** @brief Ingenic Semiconductor. Vendor of MIPS processor microarchitectures. */ + /** Ingenic Semiconductor. Vendor of MIPS processor microarchitectures. */ cpuinfo_vendor_ingenic = 10, - /** @brief VIA Technologies, Inc. Vendor of x86 and x86-64 processor microarchitectures. */ - /** @details Processors are designed by Centaur Technology, a subsidiary of VIA Technologies. */ + /** + * VIA Technologies, Inc. Vendor of x86 and x86-64 processor microarchitectures. + * + * Processors are designed by Centaur Technology, a subsidiary of VIA Technologies. + */ cpuinfo_vendor_via = 11, /* Active vendors of embedded CPUs */ - /** @brief Texas Instruments Inc. Vendor of ARM processor microarchitectures. */ + /** Texas Instruments Inc. Vendor of ARM processor microarchitectures. */ cpuinfo_vendor_texas_instruments = 30, - /** @brief Marvell Technology Group Ltd. Vendor of ARM processor microarchitectures. */ + /** Marvell Technology Group Ltd. Vendor of ARM processor microarchitectures. */ cpuinfo_vendor_marvell = 31, - /** @brief RDC Semiconductor Co., Ltd. Vendor of x86 processor microarchitectures. */ - /** @details Designes embedded x86 CPUs. */ + /** RDC Semiconductor Co., Ltd. Vendor of x86 processor microarchitectures. */ cpuinfo_vendor_rdc = 32, - /** @brief DM&P Electronics Inc. Vendor of x86 processor microarchitectures. */ - /** @details Mostly embedded x86 designs. */ + /** DM&P Electronics Inc. Vendor of x86 processor microarchitectures. */ cpuinfo_vendor_dmp = 33, - /** @brief Motorola, Inc. Vendor of PowerPC and ARM processor microarchitectures. */ + /** Motorola, Inc. Vendor of PowerPC and ARM processor microarchitectures. */ cpuinfo_vendor_motorola = 34, /* Defunct CPU vendors */ - /** @brief Transmeta Corporation. Vendor of x86 processor microarchitectures. */ - /** @details Now defunct. The last processor design was released in 2004. */ - /** Transmeta processors implemented VLIW ISA and used binary translation to execute x86 code. */ + /** + * Transmeta Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 2004. + * Transmeta processors implemented VLIW ISA and used binary translation to execute x86 code. + */ cpuinfo_vendor_transmeta = 50, - /** @brief Cyrix Corporation. Vendor of x86 processor microarchitectures. */ - /** @details Now defunct. The last processor design was released in 1996. */ + /** + * Cyrix Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1996. + */ cpuinfo_vendor_cyrix = 51, - /** @brief Rise Technology. Vendor of x86 processor microarchitectures. */ - /** @details Now defunct. The last processor design was released in 1999. */ + /** + * Rise Technology. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1999. + */ cpuinfo_vendor_rise = 52, - /** @brief National Semiconductor. Vendor of x86 processor microarchitectures. */ - /** @details Sold its x86 design subsidiary in 1999. The last processor design was released in 1998. */ + /** + * National Semiconductor. Vendor of x86 processor microarchitectures. + * + * Sold its x86 design subsidiary in 1999. The last processor design was released in 1998. + */ cpuinfo_vendor_nsc = 53, - /** @brief Silicon Integrated Systems. Vendor of x86 processor microarchitectures. */ - /** @details Sold its x86 design subsidiary in 2001. The last processor design was released in 2001. */ + /** + * Silicon Integrated Systems. Vendor of x86 processor microarchitectures. + * + * Sold its x86 design subsidiary in 2001. The last processor design was released in 2001. + */ cpuinfo_vendor_sis = 54, - /** @brief NexGen. Vendor of x86 processor microarchitectures. */ - /** @details Now defunct. The last processor design was released in 1994. */ - /** NexGen designed the first x86 microarchitecture which decomposed x86 instructions into simple microoperations. */ + /** + * NexGen. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1994. + * NexGen designed the first x86 microarchitecture which decomposed x86 instructions into simple microoperations. + */ cpuinfo_vendor_nexgen = 55, - /** @brief United Microelectronics Corporation. Vendor of x86 processor microarchitectures. */ - /** @details Ceased x86 in the early 1990s. The last processor design was released in 1991. */ - /** Designed U5C and U5D processors. Both are 486 level. */ + /** + * United Microelectronics Corporation. Vendor of x86 processor microarchitectures. + * + * Ceased x86 in the early 1990s. The last processor design was released in 1991. + * Designed U5C and U5D processors. Both are 486 level. + */ cpuinfo_vendor_umc = 56, - /** @brief Digital Equipment Corporation. Vendor of ARM processor microarchitecture. */ - /** @details Sold its ARM designs in 1997. The last processor design was released in 1997. */ + /** + * Digital Equipment Corporation. Vendor of ARM processor microarchitecture. + * + * Sold its ARM designs in 1997. The last processor design was released in 1997. + */ cpuinfo_vendor_dec = 57, }; +/** + * Processor microarchitecture + * + * Processors with different microarchitectures often have different instruction performance characteristics, + * and may have dramatically different pipeline organization. + */ enum cpuinfo_uarch { - /** @brief Microarchitecture is unknown, or the library failed to get information about the microarchitecture from OS */ + /** Microarchitecture is unknown, or the library failed to get information about the microarchitecture from OS */ cpuinfo_uarch_unknown = 0, - /** @brief Pentium and Pentium MMX microarchitecture. */ + /** Pentium and Pentium MMX microarchitecture. */ cpuinfo_uarch_p5 = 0x00100100, - /** @brief Intel Quark microarchitecture. */ + /** Intel Quark microarchitecture. */ cpuinfo_uarch_quark = 0x00100101, - /** @brief Pentium Pro, Pentium II, and Pentium III. */ + /** Pentium Pro, Pentium II, and Pentium III. */ cpuinfo_uarch_p6 = 0x00100200, - /** @brief Pentium M. */ + /** Pentium M. */ cpuinfo_uarch_dothan = 0x00100201, - /** @brief Intel Core microarchitecture. */ + /** Intel Core microarchitecture. */ cpuinfo_uarch_yonah = 0x00100202, - /** @brief Intel Core 2 microarchitecture on 65 nm process. */ + /** Intel Core 2 microarchitecture on 65 nm process. */ cpuinfo_uarch_conroe = 0x00100203, - /** @brief Intel Core 2 microarchitecture on 45 nm process. */ + /** Intel Core 2 microarchitecture on 45 nm process. */ cpuinfo_uarch_penryn = 0x00100204, - /** @brief Intel Nehalem and Westmere microarchitectures (Core i3/i5/i7 1st gen). */ + /** Intel Nehalem and Westmere microarchitectures (Core i3/i5/i7 1st gen). */ cpuinfo_uarch_nehalem = 0x00100205, - /** @brief Intel Sandy Bridge microarchitecture (Core i3/i5/i7 2nd gen). */ + /** Intel Sandy Bridge microarchitecture (Core i3/i5/i7 2nd gen). */ cpuinfo_uarch_sandy_bridge = 0x00100206, - /** @brief Intel Ivy Bridge microarchitecture (Core i3/i5/i7 3rd gen). */ + /** Intel Ivy Bridge microarchitecture (Core i3/i5/i7 3rd gen). */ cpuinfo_uarch_ivy_bridge = 0x00100207, - /** @brief Intel Haswell microarchitecture (Core i3/i5/i7 4th gen). */ + /** Intel Haswell microarchitecture (Core i3/i5/i7 4th gen). */ cpuinfo_uarch_haswell = 0x00100208, - /** @brief Intel Broadwell microarchitecture. */ + /** Intel Broadwell microarchitecture. */ cpuinfo_uarch_broadwell = 0x00100209, - /** @brief Intel Sky Lake microarchitecture. */ + /** Intel Sky Lake microarchitecture. */ cpuinfo_uarch_sky_lake = 0x0010020A, - /** @brief Intel Kaby Lake microarchitecture. */ + /** Intel Kaby Lake microarchitecture. */ cpuinfo_uarch_kaby_lake = 0x0010020B, - /** @brief Pentium 4 with Willamette, Northwood, or Foster cores. */ + /** Pentium 4 with Willamette, Northwood, or Foster cores. */ cpuinfo_uarch_willamette = 0x00100300, - /** @brief Pentium 4 with Prescott and later cores. */ + /** Pentium 4 with Prescott and later cores. */ cpuinfo_uarch_prescott = 0x00100301, - /** @brief Intel Atom on 45 nm process. */ + /** Intel Atom on 45 nm process. */ cpuinfo_uarch_bonnell = 0x00100400, - /** @brief Intel Atom on 32 nm process. */ + /** Intel Atom on 32 nm process. */ cpuinfo_uarch_saltwell = 0x00100401, - /** @brief Intel Silvermont microarchitecture (22 nm out-of-order Atom). */ + /** Intel Silvermont microarchitecture (22 nm out-of-order Atom). */ cpuinfo_uarch_silvermont = 0x00100402, - /** @brief Intel Airmont microarchitecture (14 nm out-of-order Atom). */ + /** Intel Airmont microarchitecture (14 nm out-of-order Atom). */ cpuinfo_uarch_airmont = 0x00100403, - /** @brief Intel Knights Ferry HPC boards. */ + /** Intel Knights Ferry HPC boards. */ cpuinfo_uarch_knights_ferry = 0x00100500, - /** @brief Intel Knights Corner HPC boards (aka Xeon Phi). */ + /** Intel Knights Corner HPC boards (aka Xeon Phi). */ cpuinfo_uarch_knights_corner = 0x00100501, - /** @brief Intel Knights Landing microarchitecture (second-gen MIC). */ + /** Intel Knights Landing microarchitecture (second-gen MIC). */ cpuinfo_uarch_knights_landing = 0x00100502, - /** @brief Intel Knights Hill microarchitecture (third-gen MIC). */ + /** Intel Knights Hill microarchitecture (third-gen MIC). */ cpuinfo_uarch_knights_hill = 0x00100503, - /** @brief Intel Knights Mill Xeon Phi. */ + /** Intel Knights Mill Xeon Phi. */ cpuinfo_uarch_knights_mill = 0x00100504, - /** @brief AMD K5. */ + /** AMD K5. */ cpuinfo_uarch_k5 = 0x00200100, - /** @brief AMD K6 and alike. */ + /** AMD K6 and alike. */ cpuinfo_uarch_k6 = 0x00200101, - /** @brief AMD Athlon and Duron. */ + /** AMD Athlon and Duron. */ cpuinfo_uarch_k7 = 0x00200102, - /** @brief AMD Athlon 64, Opteron 64. */ + /** AMD Athlon 64, Opteron 64. */ cpuinfo_uarch_k8 = 0x00200103, - /** @brief AMD Family 10h (Barcelona, Istambul, Magny-Cours). */ + /** AMD Family 10h (Barcelona, Istambul, Magny-Cours). */ cpuinfo_uarch_k10 = 0x00200104, /** - * @brief AMD Bulldozer microarchitecture - * @details Zambezi FX-series CPUs, Zurich, Valencia and Interlagos Opteron CPUs. + * AMD Bulldozer microarchitecture + * Zambezi FX-series CPUs, Zurich, Valencia and Interlagos Opteron CPUs. */ cpuinfo_uarch_bulldozer = 0x00200105, /** - * @brief AMD Piledriver microarchitecture - * @details Vishera FX-series CPUs, Trinity and Richland APUs, Delhi, Seoul, Abu Dhabi Opteron CPUs. + * AMD Piledriver microarchitecture + * Vishera FX-series CPUs, Trinity and Richland APUs, Delhi, Seoul, Abu Dhabi Opteron CPUs. */ cpuinfo_uarch_piledriver = 0x00200106, - /** @brief AMD Steamroller microarchitecture (Kaveri APUs). */ + /** AMD Steamroller microarchitecture (Kaveri APUs). */ cpuinfo_uarch_steamroller = 0x00200107, - /** @brief AMD Excavator microarchitecture (Carizzo APUs). */ + /** AMD Excavator microarchitecture (Carizzo APUs). */ cpuinfo_uarch_excavator = 0x00200108, - /** @brief AMD Zen microarchitecture (Ryzen CPUs). */ + /** AMD Zen microarchitecture (Ryzen CPUs). */ cpuinfo_uarch_zen = 0x00200109, - /** @brief NSC Geode and AMD Geode GX and LX. */ + /** NSC Geode and AMD Geode GX and LX. */ cpuinco_uarch_geode = 0x00200200, - /** @brief AMD Bobcat mobile microarchitecture. */ + /** AMD Bobcat mobile microarchitecture. */ cpuinfo_uarch_bobcat = 0x00200201, - /** @brief AMD Jaguar mobile microarchitecture. */ + /** AMD Jaguar mobile microarchitecture. */ cpuinfo_uarch_jaguar = 0x00200202, - /** @brief ARM7 series. */ + /** ARM7 series. */ cpuinfo_uarch_arm7 = 0x00300100, - /** @brief ARM9 series. */ + /** ARM9 series. */ cpuinfo_uarch_arm9 = 0x00300101, - /** @brief ARM 1136, ARM 1156, ARM 1176, or ARM 11MPCore. */ + /** ARM 1136, ARM 1156, ARM 1176, or ARM 11MPCore. */ cpuinfo_uarch_arm11 = 0x00300102, - /** @brief ARM Cortex-A5. */ + /** ARM Cortex-A5. */ cpuinfo_uarch_cortex_a5 = 0x00300205, - /** @brief ARM Cortex-A7. */ + /** ARM Cortex-A7. */ cpuinfo_uarch_cortex_a7 = 0x00300207, - /** @brief ARM Cortex-A8. */ + /** ARM Cortex-A8. */ cpuinfo_uarch_cortex_a8 = 0x00300208, - /** @brief ARM Cortex-A9. */ + /** ARM Cortex-A9. */ cpuinfo_uarch_cortex_a9 = 0x00300209, - /** @brief ARM Cortex-A12. */ + /** ARM Cortex-A12. */ cpuinfo_uarch_cortex_a12 = 0x00300212, - /** @brief ARM Cortex-A15. */ + /** ARM Cortex-A15. */ cpuinfo_uarch_cortex_a15 = 0x00300215, - /** @brief ARM Cortex-A15. */ + /** ARM Cortex-A17. */ cpuinfo_uarch_cortex_a17 = 0x00300217, - /** @brief ARM Cortex-A32. */ + /** ARM Cortex-A32. */ cpuinfo_uarch_cortex_a32 = 0x00300332, - /** @brief ARM Cortex-A35. */ + /** ARM Cortex-A35. */ cpuinfo_uarch_cortex_a35 = 0x00300335, - /** @brief ARM Cortex-A53. */ + /** ARM Cortex-A53. */ cpuinfo_uarch_cortex_a53 = 0x00300353, - /** @brief ARM Cortex-A57. */ + /** ARM Cortex-A57. */ cpuinfo_uarch_cortex_a57 = 0x00300357, - /** @brief ARM Cortex-A72. */ + /** ARM Cortex-A72. */ cpuinfo_uarch_cortex_a72 = 0x00300372, - /** @brief ARM Cortex-A73. */ + /** ARM Cortex-A73. */ cpuinfo_uarch_cortex_a73 = 0x00300373, - /** @brief Qualcomm Scorpion. */ + /** Qualcomm Scorpion. */ cpuinfo_uarch_scorpion = 0x00400100, - /** @brief Qualcomm Krait. */ + /** Qualcomm Krait. */ cpuinfo_uarch_krait = 0x00400101, - /** @brief Qualcomm Kryo. */ + /** Qualcomm Kryo. */ cpuinfo_uarch_kryo = 0x00400102, - /** @brief Samsung Mongoose. */ + /** Samsung Mongoose. */ cpuinfo_uarch_mongoose = 0x00500100, - /** @brief Apple A6 and A6X processors. */ + /** Apple A6 and A6X processors. */ cpuinfo_uarch_swift = 0x00600100, - /** @brief Apple A7 processor. */ + /** Apple A7 processor. */ cpuinfo_uarch_cyclone = 0x00600101, - /** @brief Apple A8 processor. */ + /** Apple A8 processor. */ cpuinfo_uarch_typhoon = 0x00600102, - /** @brief Apple A9 processor. */ + /** Apple A9 processor. */ cpuinfo_uarch_twister = 0x00600103, - /** @brief Apple A10 processor. */ + /** Apple A10 processor. */ cpuinfo_uarch_hurricane = 0x00600104, }; -struct cpuinfo_processor { - enum cpuinfo_vendor vendor; - enum cpuinfo_uarch uarch; - uint64_t system_id; +struct cpuinfo_topology { + /** Thread (hyperthread, or SMT) ID within a core */ uint32_t thread_id; + /** Core ID within a package */ uint32_t core_id; + /* Package (socket) ID */ uint32_t package_id; + #if defined(__linux__) + + int linux_id; + #endif + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + uint32_t apic_id; + #endif +}; + +struct cpuinfo_processor { + enum cpuinfo_vendor vendor; + enum cpuinfo_uarch uarch; + struct cpuinfo_topology topology; struct { const struct cpuinfo_trace_cache* trace; const struct cpuinfo_cache* l1i; @@ -486,14 +559,16 @@ void CPUINFO_ABI cpuinfo_deinitialize(void); extern struct cpuinfo_x86_isa cpuinfo_isa; #endif +struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l1i_cache(void); +struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l1d_cache(void); +struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l2_cache(void); +struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l3_cache(void); +struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l4_cache(void); + extern struct cpuinfo_processor* cpuinfo_processors; extern struct cpuinfo_cores* cpuinfo_cores; extern struct cpuinfo_package* cpuinfo_packages; -extern struct cpuinfo_cache* cpuinfo_l1i_cache; -extern struct cpuinfo_cache* cpuinfo_l1d_cache; -extern struct cpuinfo_cache* cpuinfo_l2_cache; -extern struct cpuinfo_cache* cpuinfo_l3_cache; -extern struct cpuinfo_cache* cpuinfo_l4_cache; + #ifdef __cplusplus } /* extern "C" */ @@ -1,6 +1,11 @@ #pragma once +extern struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max]; +extern uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max]; +extern uint32_t cpuinfo_processors_count; + void cpuinfo_x86_mach_init(void); +void cpuinfo_x86_linux_init(void); void cpuinfo_arm_init(void); typedef void (*cpuinfo_processor_callback)(uint32_t); diff --git a/src/cache.c b/src/cache.c new file mode 100644 index 0000000..dadaaf1 --- /dev/null +++ b/src/cache.c @@ -0,0 +1,41 @@ +#include <cpuinfo.h> +#include <api.h> + + +struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max]; +uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max]; + +struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l1i_cache(void) { + return (struct cpuinfo_caches) { + .count = cpuinfo_cache_count[cpuinfo_cache_level_1i], + .instances = cpuinfo_cache[cpuinfo_cache_level_1i] + }; +} + +struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l1d_cache(void) { + return (struct cpuinfo_caches) { + .count = cpuinfo_cache_count[cpuinfo_cache_level_1d], + .instances = cpuinfo_cache[cpuinfo_cache_level_1d] + }; +} + +struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l2_cache(void) { + return (struct cpuinfo_caches) { + .count = cpuinfo_cache_count[cpuinfo_cache_level_2], + .instances = cpuinfo_cache[cpuinfo_cache_level_2] + }; +} + +struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l3_cache(void) { + return (struct cpuinfo_caches) { + .count = cpuinfo_cache_count[cpuinfo_cache_level_3], + .instances = cpuinfo_cache[cpuinfo_cache_level_3] + }; +} + +struct cpuinfo_caches CPUINFO_ABI cpuinfo_get_l4_cache(void) { + return (struct cpuinfo_caches) { + .count = cpuinfo_cache_count[cpuinfo_cache_level_4], + .instances = cpuinfo_cache[cpuinfo_cache_level_4] + }; +} @@ -7,18 +7,21 @@ struct cpuinfo_processor* cpuinfo_processors; struct cpuinfo_cores* cpuinfo_cores; struct cpuinfo_package* cpuinfo_packages; -struct cpuinfo_cache* cpuinfo_l1i_cache; -struct cpuinfo_cache* cpuinfo_l1d_cache; -struct cpuinfo_cache* cpuinfo_l2_cache; -struct cpuinfo_cache* cpuinfo_l3_cache; -struct cpuinfo_cache* cpuinfo_l4_cache; + +uint32_t cpuinfo_processors_count; static pthread_once_t init_guard = PTHREAD_ONCE_INIT; void CPUINFO_ABI cpuinfo_initialize(void) { #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 - pthread_once(&init_guard, &cpuinfo_x86_mach_init); + #if defined(__MACH__) && defined(__APPLE__) + pthread_once(&init_guard, &cpuinfo_x86_mach_init); + #elif defined(__linux__) + pthread_once(&init_guard, &cpuinfo_x86_linux_init); + #else + #error Unsupported target OS + #endif #else #error Unsupported target architecture #endif diff --git a/src/linux/api.h b/src/linux/api.h new file mode 100644 index 0000000..b7ba198 --- /dev/null +++ b/src/linux/api.h @@ -0,0 +1,5 @@ +#pragma once + +#include <sched.h> + +bool cpuinfo_linux_parse_cpuset(const char* filename, cpu_set_t* cpuset); diff --git a/src/linux/cpuset.c b/src/linux/cpuset.c new file mode 100644 index 0000000..9c01a8e --- /dev/null +++ b/src/linux/cpuset.c @@ -0,0 +1,191 @@ +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <sched.h> + +#include <linux/api.h> +#include <log.h> + + +/* + * Size, in chars, of the on-stack buffer used for parsing cpu lists. + * This is also the limit on the length of a single entry + * (<cpu-number> or <cpu-number-start>-<cpu-number-end>) + * in the cpu list. + */ +#define BUFFER_SIZE 256 + + +/* Locale-independent */ +inline static bool is_whitespace(char c) { + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + return true; + default: + return false; + } +} + +inline static const char* parse_number(const char* string, const char* end, uint32_t number_ptr[restrict static 1]) { + uint32_t number = 0; + while (string != end) { + const uint32_t digit = (uint32_t) (*string) - (uint32_t) '0'; + if (digit >= 10) { + return string; + } + number = number * UINT32_C(10) + digit; + string += 1; + } + *number_ptr = number; +} + +inline static bool parse_entry(const char* entry_start, const char* entry_end, cpu_set_t* cpuset) { + /* Skip whitespace at the beginning of an entry */ + for (; entry_start != entry_end; entry_start++) { + if (!is_whitespace(*entry_start)) { + break; + } + } + /* Skip whitespace at the end of an entry */ + for (; entry_end != entry_start; entry_end--) { + if (!is_whitespace(entry_end[-1])) { + break; + } + } + + const size_t entry_length = (size_t) (entry_end - entry_start); + if (entry_length == 0) { + cpuinfo_log_warning("unexpected zero-length cpu list entry ignored"); + return false; + } + + cpuinfo_log_debug("parse cpu list entry \"%.*s\" (%zu chars)", (int) entry_length, entry_start, entry_length); + uint32_t first_cpu, last_cpu; + + const char* number_end = parse_number(entry_start, entry_end, &first_cpu); + if (number_end == entry_start) { + /* Failed to parse the number; ignore the entry */ + cpuinfo_log_warning("invalid character '%c' in the cpu list entry \"%.*s\": entry is ignored", + entry_start[0], (int) entry_length, entry_start); + return false; + } else if (number_end == entry_end) { + /* Completely parsed the entry */ + CPU_SET((int) first_cpu, cpuset); + return true; + } + + /* Parse the second part of the entry */ + if (*number_end != '-') { + cpuinfo_log_warning("invalid character '%c' in the cpu list entry \"%.*s\": entry is ignored", + *number_end, (int) entry_length, entry_start); + return false; + } + + const char* number_start = number_end + 1; + number_end = parse_number(number_start, entry_end, &last_cpu); + if (number_end == number_start) { + /* Failed to parse the second number; ignore the entry */ + cpuinfo_log_warning("invalid character '%c' in the cpu list entry \"%.*s\": entry is ignored", + *number_start, (int) entry_length, entry_start); + return false; + } + + if (number_end != entry_end) { + /* Partially parsed the entry; ignore unparsed characters and continue with the parsed part */ + cpuinfo_log_warning("ignored invalid characters \"%.*s\" at the end of cpu list entry \"%.*s\"", + (int) (entry_end - number_end), number_start, (int) entry_length, entry_start); + } + + if (last_cpu < first_cpu) { + cpuinfo_log_warning("ignored cpu list entry \"%.*s\": invalid range %"PRIu32"-%"PRIu32, + (int) entry_length, entry_start, first_cpu, last_cpu); + return false; + } + + /* Parsed both parts of the entry; update CPU set */ + for (uint32_t i = first_cpu; i <= last_cpu; i++) { + CPU_SET((int) i, cpuset); + } + return true; +} + +bool cpuinfo_linux_parse_cpuset(const char* filename, cpu_set_t* cpuset) { + bool status = true; + int file = -1; + char buffer[BUFFER_SIZE]; + CPU_ZERO(cpuset); + cpuinfo_log_debug("parsing cpu list from file %s", filename); + + file = open(filename, O_RDONLY); + if (file == -1) { + cpuinfo_log_error("failed to open %s: %s", filename, strerror(errno)); + status = false; + goto cleanup; + } + + size_t position = 0; + const char* buffer_end = &buffer[BUFFER_SIZE]; + char* data_start = buffer; + ssize_t bytes_read; + do { + bytes_read = read(file, data_start, (size_t) (buffer_end - data_start)); + if (bytes_read < 0) { + cpuinfo_log_error("failed to read file %s at position %zu: %s", filename, position, strerror(errno)); + status = false; + goto cleanup; + } + + position += (size_t) bytes_read; + const char* data_end = data_start + (size_t) bytes_read; + const char* entry_start = buffer; + + if (bytes_read == 0) { + /* No more data in the file: process the remaining text in the buffer as a single entry */ + const char* entry_end = data_end; + const bool entry_status = parse_entry(entry_start, entry_end, cpuset); + status &= entry_status; + } else { + const char* entry_end; + do { + /* Find the end of the entry, as indicated by a comma (',') */ + for (entry_end = entry_start; entry_end != data_end; entry_end++) { + if (*entry_end == ',') { + break; + } + } + + /* + * If we located separator at the end of the entry, parse it. + * Otherwise, there may be more data at the end; read the file once again. + */ + if (entry_end != data_end) { + const bool entry_status = parse_entry(entry_start, entry_end, cpuset); + status &= entry_status; + entry_start = entry_end + 1; + } + } while (entry_end != data_end); + + /* Copy remaining partial entry data at the end to the beginning of the buffer */ + const size_t entry_length = (size_t) (entry_end - entry_start); + memcpy(buffer, entry_start, entry_length); + data_start = &buffer[entry_length]; + } + } while (bytes_read != 0); + +cleanup: + if (file != -1) { + close(file); + file = -1; + } + return status; +} diff --git a/src/mach/api.h b/src/mach/api.h index 593030a..fdef5bd 100644 --- a/src/mach/api.h +++ b/src/mach/api.h @@ -1,6 +1,5 @@ #pragma once - #include <stdint.h> #define CPUINFO_MACH_MAX_CACHE_LEVELS 8 diff --git a/src/x86/api.h b/src/x86/api.h index 1a8da05..061e3e5 100644 --- a/src/x86/api.h +++ b/src/x86/api.h @@ -34,9 +34,12 @@ struct cpuinfo_x86_caches { }; struct cpuinfo_x86_topology { + #ifdef __linux__ + int linux_id; + #endif uint32_t apic_id; - uint32_t smt_bits_offset; - uint32_t smt_bits_length; + uint32_t thread_bits_offset; + uint32_t thread_bits_length; uint32_t core_bits_offset; uint32_t core_bits_length; }; diff --git a/src/x86/linux/init.c b/src/x86/linux/init.c new file mode 100644 index 0000000..3ccac1a --- /dev/null +++ b/src/x86/linux/init.c @@ -0,0 +1,383 @@ +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <sched.h> + +#include <cpuinfo.h> +#include <x86/api.h> +#include <linux/api.h> +#include <api.h> +#include <log.h> + + +static inline uint32_t max(uint32_t a, uint32_t b) { + return a > b ? a : b; +} + +static int cmp_x86_processor_by_apic_id(const void* processor_a, const void* processor_b) { + const uint32_t id_a = ((const struct cpuinfo_x86_processor*) processor_a)->topology.apic_id; + const uint32_t id_b = ((const struct cpuinfo_x86_processor*) processor_b)->topology.apic_id; + + if (id_a < id_b) { + return -1; + } else { + return id_a > id_b; + } +} + +static uint32_t bit_mask(uint32_t bits) { + return (UINT32_C(1) << bits) - UINT32_C(1); +} + +static uint32_t cpuinfo_x86_count_caches( + const struct cpuinfo_x86_processor* processors, + uint32_t processors_count, + uint32_t l1i_count_ptr[restrict static 1], + uint32_t l1d_count_ptr[restrict static 1], + uint32_t l2_count_ptr[restrict static 1], + uint32_t l3_count_ptr[restrict static 1], + uint32_t l4_count_ptr[restrict static 1]) +{ + uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0; + uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX; + uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX; + for (uint32_t i = 0; i < processors_count; i++) { + const uint32_t apic_id = processors[i].topology.apic_id; + if (processors[i].cache.l1i.size != 0) { + const uint32_t l1i_id = apic_id & ~bit_mask(processors[i].cache.l1i.apic_bits); + if (l1i_id != last_l1i_id) { + last_l1i_id = l1i_id; + l1i_count++; + } + } + if (processors[i].cache.l1d.size != 0) { + const uint32_t l1d_id = apic_id & ~bit_mask(processors[i].cache.l1d.apic_bits); + if (l1d_id != last_l1d_id) { + last_l1d_id = l1d_id; + l1d_count++; + } + } + if (processors[i].cache.l2.size != 0) { + const uint32_t l2_id = apic_id & ~bit_mask(processors[i].cache.l2.apic_bits); + if (l2_id != last_l2_id) { + last_l2_id = l2_id; + l2_count++; + } + } + if (processors[i].cache.l3.size != 0) { + const uint32_t l3_id = apic_id & ~bit_mask(processors[i].cache.l3.apic_bits); + if (l3_id != last_l3_id) { + last_l3_id = l3_id; + l3_count++; + } + } + if (processors[i].cache.l4.size != 0) { + const uint32_t l4_id = apic_id & ~bit_mask(processors[i].cache.l4.apic_bits); + if (l4_id != last_l4_id) { + last_l4_id = l4_id; + l4_count++; + } + } + } + *l1i_count_ptr = l1i_count; + *l1d_count_ptr = l1d_count; + *l2_count_ptr = l2_count; + *l3_count_ptr = l3_count; + *l4_count_ptr = l4_count; +} + +void cpuinfo_x86_linux_init(void) { + struct cpuinfo_x86_processor* x86_processors = NULL; + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_cache* l1i = NULL; + struct cpuinfo_cache* l1d = NULL; + struct cpuinfo_cache* l2 = NULL; + struct cpuinfo_cache* l3 = NULL; + struct cpuinfo_cache* l4 = NULL; + + cpu_set_t old_affinity; + if (sched_getaffinity(0, sizeof(cpu_set_t), &old_affinity) != 0) { + cpuinfo_log_error("sched_getaffinity failed: %s", strerror(errno)); + return; + } + + cpu_set_t present_set; + cpuinfo_linux_parse_cpuset("/sys/devices/system/cpu/present", &present_set); + + cpu_set_t possible_set; + cpuinfo_linux_parse_cpuset("/sys/devices/system/cpu/possible", &possible_set); + + cpu_set_t processors_set; + CPU_AND(&processors_set, &present_set, &possible_set); + const uint32_t processors_count = (uint32_t) CPU_COUNT(&processors_set); + cpuinfo_log_debug("detected %"PRIu32" logical processors", processors_count); + + x86_processors = calloc(processors_count, sizeof(struct cpuinfo_x86_processor)); + if (x86_processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %d x86 logical processors", + processors_count * sizeof(struct cpuinfo_x86_processor), processors_count); + goto cleanup; + } + + int processor_bit = 0; + for (int i = 0; i < processors_count; i++, processor_bit++) { + while (!CPU_ISSET(processor_bit, &processors_set)) { + processor_bit++; + } + cpu_set_t processor_set; + CPU_ZERO(&processor_set); + CPU_SET(processor_bit, &processor_set); + if (sched_setaffinity(0, sizeof(cpu_set_t), &processor_set) != 0) { + cpuinfo_log_error("sched_setaffinity for processor %d (bit %d) failed: %s", + i, processor_bit, strerror(errno)); + goto cleanup; + } + + cpuinfo_x86_init_processor(&x86_processors[i]); + x86_processors[i].topology.linux_id = processor_bit; + } + + qsort(x86_processors, (size_t) processors_count, sizeof(struct cpuinfo_x86_processor), + cmp_x86_processor_by_apic_id); + + processors = calloc((size_t) processors_count, sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %d logical processors", + (size_t) processors_count * sizeof(struct cpuinfo_processor), processors_count); + goto cleanup; + } + + for (uint32_t i = 0; i < (uint32_t) processors_count; i++) { + processors[i].vendor = x86_processors[i].vendor; + processors[i].uarch = x86_processors[i].uarch; + + /* Initialize topology information */ + const uint32_t apic_id = x86_processors[i].topology.apic_id; + const uint32_t thread_mask = bit_mask(x86_processors[i].topology.thread_bits_length); + const uint32_t core_mask = bit_mask(x86_processors[i].topology.core_bits_length); + const uint32_t package_offset = max( + x86_processors[i].topology.thread_bits_offset, + x86_processors[i].topology.core_bits_offset); + processors[i].topology = (struct cpuinfo_topology) { + .thread_id = (apic_id >> x86_processors[i].topology.thread_bits_offset) & thread_mask, + .core_id = (apic_id >> x86_processors[i].topology.core_bits_offset) & core_mask, + .package_id = apic_id >> package_offset, + .linux_id = x86_processors[i].topology.linux_id, + .apic_id = x86_processors[i].topology.apic_id, + }; + } + + uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0; + cpuinfo_x86_count_caches(x86_processors, processors_count, + &l1i_count, &l1d_count, &l2_count, &l3_count, &l4_count); + + cpuinfo_log_info("detected %"PRIu32" L1I caches", l1i_count); + cpuinfo_log_info("detected %"PRIu32" L1D caches", l1d_count); + cpuinfo_log_info("detected %"PRIu32" L2 caches", l2_count); + cpuinfo_log_info("detected %"PRIu32" L3 caches", l3_count); + cpuinfo_log_info("detected %"PRIu32" L4 caches", l4_count); + + if (l1i_count != 0) { + l1i = calloc(l1i_count, sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + l1i_count * sizeof(struct cpuinfo_cache), l1i_count); + goto cleanup; + } + } + if (l1d_count != 0) { + l1d = calloc(l1d_count, sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + l1d_count * sizeof(struct cpuinfo_cache), l1d_count); + goto cleanup; + } + } + if (l2_count != 0) { + l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + goto cleanup; + } + } + if (l3_count != 0) { + l3 = calloc(l3_count, sizeof(struct cpuinfo_cache)); + if (l3 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches", + l3_count * sizeof(struct cpuinfo_cache), l3_count); + goto cleanup; + } + } + if (l4_count != 0) { + l4 = calloc(l4_count, sizeof(struct cpuinfo_cache)); + if (l4 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L4 caches", + l4_count * sizeof(struct cpuinfo_cache), l4_count); + goto cleanup; + } + } + + uint32_t l1i_index = 0, l1d_index = 0, l2_index = 0, l3_index = 0, l4_index = 0; + uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX; + uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX; + for (uint32_t i = 0; i < processors_count; i++) { + const uint32_t apic_id = processors[i].topology.apic_id; + if (x86_processors[i].cache.l1i.size != 0) { + const uint32_t l1i_id = apic_id & ~bit_mask(x86_processors[i].cache.l1i.apic_bits); + processors[i].cache.l1i = &l1i[l1i_index]; + if (l1i_id != last_l1i_id) { + /* new cache */ + last_l1i_id = l1i_id; + l1i[l1i_index++] = (struct cpuinfo_cache) { + .size = x86_processors[i].cache.l1i.size, + .associativity = x86_processors[i].cache.l1i.associativity, + .sets = x86_processors[i].cache.l1i.sets, + .partitions = x86_processors[i].cache.l1i.partitions, + .line_size = x86_processors[i].cache.l1i.line_size, + .flags = x86_processors[i].cache.l1i.flags, + .thread_start = i, + .thread_count = 1 + }; + } else { + /* another processor sharing the same cache */ + l1i[l1i_index - 1].thread_count += 1; + } + } else { + /* reset cache id */ + last_l1i_id = UINT32_MAX; + } + if (x86_processors[i].cache.l1d.size != 0) { + const uint32_t l1d_id = apic_id & ~bit_mask(x86_processors[i].cache.l1d.apic_bits); + processors[i].cache.l1d = &l1d[l1d_index]; + if (l1d_id != last_l1d_id) { + /* new cache */ + last_l1d_id = l1d_id; + l1d[l1d_index++] = (struct cpuinfo_cache) { + .size = x86_processors[i].cache.l1d.size, + .associativity = x86_processors[i].cache.l1d.associativity, + .sets = x86_processors[i].cache.l1d.sets, + .partitions = x86_processors[i].cache.l1d.partitions, + .line_size = x86_processors[i].cache.l1d.line_size, + .flags = x86_processors[i].cache.l1d.flags, + .thread_start = i, + .thread_count = 1 + }; + } else { + /* another processor sharing the same cache */ + l1d[l1d_index - 1].thread_count += 1; + } + } else { + /* reset cache id */ + last_l1d_id = UINT32_MAX; + } + if (x86_processors[i].cache.l2.size != 0) { + const uint32_t l2_id = apic_id & ~bit_mask(x86_processors[i].cache.l2.apic_bits); + processors[i].cache.l2 = &l2[l2_index]; + if (l2_id != last_l2_id) { + /* new cache */ + last_l2_id = l2_id; + l2[l2_index++] = (struct cpuinfo_cache) { + .size = x86_processors[i].cache.l2.size, + .associativity = x86_processors[i].cache.l2.associativity, + .sets = x86_processors[i].cache.l2.sets, + .partitions = x86_processors[i].cache.l2.partitions, + .line_size = x86_processors[i].cache.l2.line_size, + .flags = x86_processors[i].cache.l2.flags, + .thread_start = i, + .thread_count = 1 + }; + } else { + /* another processor sharing the same cache */ + l2[l2_index - 1].thread_count += 1; + } + } else { + /* reset cache id */ + last_l2_id = UINT32_MAX; + } + if (x86_processors[i].cache.l3.size != 0) { + const uint32_t l3_id = apic_id & ~bit_mask(x86_processors[i].cache.l3.apic_bits); + processors[i].cache.l3 = &l3[l3_index]; + if (l3_id != last_l3_id) { + /* new cache */ + last_l3_id = l3_id; + l3[l3_index++] = (struct cpuinfo_cache) { + .size = x86_processors[i].cache.l3.size, + .associativity = x86_processors[i].cache.l3.associativity, + .sets = x86_processors[i].cache.l3.sets, + .partitions = x86_processors[i].cache.l3.partitions, + .line_size = x86_processors[i].cache.l3.line_size, + .flags = x86_processors[i].cache.l3.flags, + .thread_start = i, + .thread_count = 1 + }; + } else { + /* another processor sharing the same cache */ + l3[l3_index - 1].thread_count += 1; + } + } else { + /* reset cache id */ + last_l3_id = UINT32_MAX; + } + if (x86_processors[i].cache.l4.size != 0) { + const uint32_t l4_id = apic_id & ~bit_mask(x86_processors[i].cache.l4.apic_bits); + processors[i].cache.l4 = &l4[l4_index]; + if (l4_id != last_l4_id) { + /* new cache */ + last_l4_id = l4_id; + l4[l4_index++] = (struct cpuinfo_cache) { + .size = x86_processors[i].cache.l4.size, + .associativity = x86_processors[i].cache.l4.associativity, + .sets = x86_processors[i].cache.l4.sets, + .partitions = x86_processors[i].cache.l4.partitions, + .line_size = x86_processors[i].cache.l4.line_size, + .flags = x86_processors[i].cache.l4.flags, + .thread_start = i, + .thread_count = 1 + }; + } else { + /* another processor sharing the same cache */ + l4[l4_index - 1].thread_count += 1; + } + } else { + /* reset cache id */ + last_l4_id = UINT32_MAX; + } + } + + /* Commit changes */ + cpuinfo_processors = processors; + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + cpuinfo_cache[cpuinfo_cache_level_3] = l3; + cpuinfo_cache[cpuinfo_cache_level_4] = l4; + + cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; + + cpuinfo_processors_count = processors_count; + + processors = NULL; + l1i = l1d = l2 = l3 = l4 = NULL; + +cleanup: + if (sched_setaffinity(0, sizeof(cpu_set_t), &old_affinity) != 0) { + cpuinfo_log_warning("could not restore initial process affinity: " + "sched_getaffinity failed: %s", strerror(errno)); + } + + free(x86_processors); + free(processors); + free(l1i); + free(l1d); + free(l2); + free(l3); + free(l4); +} diff --git a/src/x86/mach/init.c b/src/x86/mach/init.c index 9ad639a..5e6ab20 100644 --- a/src/x86/mach/init.c +++ b/src/x86/mach/init.c @@ -31,11 +31,12 @@ void cpuinfo_x86_mach_init(void) { processors[t].uarch = x86_processor.uarch; /* Reconstruct APIC IDs from topology components */ + const uint32_t thread_bits_mask = (UINT32_C(1) << x86_processor.topology.thread_bits_length) - UINT32_C(1); + const uint32_t core_bits_mask = (UINT32_C(1) << x86_processor.topology.core_bits_length) - UINT32_C(1); + const uint32_t smt_id = t % threads_per_core; const uint32_t core_id = t / threads_per_core; const uint32_t package_id = t / threads_per_package; - const uint32_t smt_bits_mask = (UINT32_C(1) << x86_processor.topology.smt_bits_length) - UINT32_C(1); - const uint32_t core_bits_mask = (UINT32_C(1) << x86_processor.topology.core_bits_length) - UINT32_C(1); const uint32_t package_bits_offset = max( x86_processor.topology.smt_bits_offset + x86_processor.topology.smt_bits_length, x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length); @@ -44,6 +45,11 @@ void cpuinfo_x86_mach_init(void) { ((smt_id & smt_bits_mask) << x86_processor.topology.smt_bits_offset) | ((core_id & core_bits_mask) << x86_processor.topology.core_bits_offset) | (package_id << package_bits_offset); + processors[t].topology = (cpuinfo_topology) { + .thread_id = smt_id, + .core_id = core_id, + .package_id = package_id + }; cpuinfo_log_info("reconstructed APIC ID 0x%08"PRIx32" for thread %"PRIu32, apic_id, t); } diff --git a/src/x86/topology.c b/src/x86/topology.c index d651388..89f2dae 100644 --- a/src/x86/topology.c +++ b/src/x86/topology.c @@ -33,7 +33,7 @@ void cpuinfo_x86_detect_topology( const uint32_t log2_max_logical_processors = 32 - __builtin_clz(logical_processors - 1); const uint32_t log2_max_threads_per_core = log2_max_logical_processors - topology->core_bits_length; topology->core_bits_offset = log2_max_threads_per_core; - topology->smt_bits_length = log2_max_threads_per_core; + topology->thread_bits_length = log2_max_threads_per_core; } cpuinfo_log_debug("HTT: APIC ID = %08"PRIx32", logical processors = %"PRIu32, apic_id, logical_processors); } @@ -47,13 +47,13 @@ void cpuinfo_x86_detect_topology( uint32_t level = 0; uint32_t type; uint32_t total_shift = 0; - topology->smt_bits_offset = topology->smt_bits_length = 0; - topology->core_bits_offset = topology->core_bits_length = 0; + topology->thread_bits_offset = topology->thread_bits_length = 0; + topology->core_bits_offset = topology->core_bits_length = 0; do { - const struct cpuid_regs leafB = cpuidex(UINT32_C(0xB), level++); + const struct cpuid_regs leafB = cpuidex(UINT32_C(0xB), level); type = (leafB.ecx >> 8) & UINT32_C(0x000000FF); const uint32_t level_shift = leafB.eax & UINT32_C(0x0000001F); - const uint32_t x2apic_id = leafB.edx; + const uint32_t x2apic_id = leafB.edx; const uint32_t logical_processors = leafB.ebx & UINT32_C(0x0000FFFF); apic_id = x2apic_id; switch (type) { @@ -63,8 +63,8 @@ void cpuinfo_x86_detect_topology( cpuinfo_log_debug("x2 level %"PRIu32": APIC ID = %08"PRIx32", " "type SMT, shift %"PRIu32", total shift %"PRIu32, level, apic_id, level_shift, total_shift); - topology->smt_bits_offset = total_shift; - topology->smt_bits_length = level_shift; + topology->thread_bits_offset = total_shift; + topology->thread_bits_length = level_shift; break; case topology_type_core: cpuinfo_log_debug("x2 level %"PRIu32": APIC ID = %08"PRIx32", " @@ -79,10 +79,12 @@ void cpuinfo_x86_detect_topology( break; } total_shift += level_shift; + level += 1; } while (type != 0); cpuinfo_log_debug("x2APIC ID 0x%08"PRIx32", " "SMT offset %"PRIu32" length %"PRIu32", core offset %"PRIu32" length %"PRIu32, apic_id, - topology->smt_bits_offset, topology->smt_bits_length, topology->core_bits_offset, topology->core_bits_length); + topology->thread_bits_offset, topology->thread_bits_length, + topology->core_bits_offset, topology->core_bits_length); } topology->apic_id = apic_id; |