From c2092219e7c874783a00a62edb94ddc672f57ab3 Mon Sep 17 00:00:00 2001 From: Ashkan Aliabadi Date: Fri, 8 May 2020 20:40:33 -0700 Subject: Upstream cpuinfo updates in XNNPACK as of XNNPACK:d793f6c2ec145be3ddbffea951e6e5480f4646b8. --- CMakeLists.txt | 16 ++- README.md | 5 + bench/get-current.cc | 9 ++ configure.py | 3 +- include/cpuinfo.h | 19 ++- src/api.c | 30 +++++ src/arm/cache.c | 43 +++---- src/arm/linux/aarch32-isa.c | 2 +- src/arm/linux/clusters.c | 10 +- src/arm/linux/cpuinfo.c | 6 +- src/arm/tlb.c | 2 +- src/arm/uarch.c | 6 +- src/cpuinfo/internal-api.h | 4 +- src/emscripten/init.c | 277 ++++++++++++++++++++++++++++++++++++++++++++ src/init.c | 6 +- src/linux/mockfile.c | 2 +- src/x86/cache/descriptor.c | 14 +-- src/x86/mockcpuid.c | 2 +- src/x86/name.c | 9 +- src/x86/vendor.c | 2 +- src/x86/windows/init.c | 17 ++- tools/cpu-info.c | 6 + tools/gpu-dump.c | 2 +- 23 files changed, 423 insertions(+), 69 deletions(-) create mode 100644 src/emscripten/init.c diff --git a/CMakeLists.txt b/CMakeLists.txt index fefb60b..b85620f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,7 +79,7 @@ IF(NOT CMAKE_SYSTEM_NAME) "Target operating system is not specified. " "cpuinfo will compile, but cpuinfo_initialize() will always fail.") SET(CPUINFO_SUPPORTED_PLATFORM FALSE) -ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|Darwin|Linux|Android)$") +ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$") IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS") MESSAGE(WARNING "Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. " @@ -125,7 +125,7 @@ SET(CPUINFO_SRCS src/cache.c) IF(CPUINFO_SUPPORTED_PLATFORM) - IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$") + IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")) LIST(APPEND CPUINFO_SRCS src/x86/init.c src/x86/info.c @@ -143,7 +143,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM) src/x86/linux/cpuinfo.c) ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c) - ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Windows") + ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$") LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c) ENDIF() ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$") @@ -175,6 +175,11 @@ IF(CPUINFO_SUPPORTED_PLATFORM) ENDIF() ENDIF() + IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + LIST(APPEND CPUINFO_SRCS + src/emscripten/init.c) + ENDIF() + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") LIST(APPEND CPUINFO_SRCS src/linux/smallfile.c @@ -205,6 +210,11 @@ ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS}) CPUINFO_TARGET_ENABLE_C99(cpuinfo) CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals) CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo) +IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$") + # Target Windows 7+ API + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601) + TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601) +ENDIF() SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h) TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include) TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src) diff --git a/README.md b/README.md index ee5fb82..97e65cd 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ Detect if target is a 32-bit or 64-bit ARM system: ``` Check if the host CPU support ARM NEON + ```c cpuinfo_initialize(); if (cpuinfo_has_arm_neon()) { @@ -57,6 +58,7 @@ if (cpuinfo_has_arm_neon()) { ``` Check if the host CPU supports x86 AVX + ```c cpuinfo_initialize(); if (cpuinfo_has_x86_avx()) { @@ -65,6 +67,7 @@ if (cpuinfo_has_x86_avx()) { ``` Check if the thread runs on a Cortex-A53 core + ```c cpuinfo_initialize(); switch (cpuinfo_get_current_core()->uarch) { @@ -78,12 +81,14 @@ switch (cpuinfo_get_current_core()->uarch) { ``` Get the size of level 1 data cache on the fastest core in the processor (e.g. big core in big.LITTLE ARM systems): + ```c cpuinfo_initialize(); const size_t l1_size = cpuinfo_get_processor(0)->cache.l1d->size; ``` Pin thread to cores sharing L2 cache with the current core (Linux or Android) + ```c cpuinfo_initialize(); cpu_set_t cpu_set; diff --git a/bench/get-current.cc b/bench/get-current.cc index b547df0..e475767 100644 --- a/bench/get-current.cc +++ b/bench/get-current.cc @@ -30,4 +30,13 @@ static void cpuinfo_get_current_uarch_index(benchmark::State& state) { } BENCHMARK(cpuinfo_get_current_uarch_index)->Unit(benchmark::kNanosecond); +static void cpuinfo_get_current_uarch_index_with_default(benchmark::State& state) { + cpuinfo_initialize(); + while (state.KeepRunning()) { + const uint32_t uarch_index = cpuinfo_get_current_uarch_index_with_default(0); + benchmark::DoNotOptimize(uarch_index); + } +} +BENCHMARK(cpuinfo_get_current_uarch_index_with_default)->Unit(benchmark::kNanosecond); + BENCHMARK_MAIN(); diff --git a/configure.py b/configure.py index 0e58dba..66f2ec9 100755 --- a/configure.py +++ b/configure.py @@ -23,7 +23,7 @@ def main(args): build.export_cpath("include", ["cpuinfo.h"]) with build.options(source_dir="src", macros=macros, extra_include_dirs="src", deps=build.deps.clog): - sources = ["init.c", "api.c"] + sources = ["api.c", "init.c", "cache.c"] if build.target.is_x86 or build.target.is_x86_64: sources += [ "x86/init.c", "x86/info.c", "x86/isa.c", "x86/vendor.c", @@ -61,7 +61,6 @@ def main(args): sources += ["mach/topology.c"] if build.target.is_linux or build.target.is_android: sources += [ - "linux/current.c", "linux/cpulist.c", "linux/smallfile.c", "linux/multiline.c", diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 903d1cf..e89a4c1 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -499,11 +499,11 @@ enum cpuinfo_uarch { /** Applied Micro X-Gene. */ cpuinfo_uarch_xgene = 0x00B00100, - /** Huawei hisilicon Kunpeng Series CPU. */ - cpuinfo_uarch_taishanv110 = 0x00C00100, - /* Hygon Dhyana (a modification of AMD Zen for Chinese market). */ cpuinfo_uarch_dhyana = 0x01000100, + + /** HiSilicon TaiShan v110 (Huawei Kunpeng 920 series processors). */ + cpuinfo_uarch_taishan_v110 = 0x00C00100, }; struct cpuinfo_processor { @@ -523,7 +523,7 @@ struct cpuinfo_processor { */ int linux_id; #endif -#if defined(_WIN32) +#if defined(_WIN32) || defined(__CYGWIN__) /** Windows-specific ID for the group containing the logical processor. */ uint16_t windows_group_id; /** @@ -1799,13 +1799,22 @@ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void); /** * Identify the microarchitecture index of the core that executes the current thread. - * If the system does not support such identification, the function return 0. + * If the system does not support such identification, the function returns 0. * * There is no guarantee that the thread will stay on the same type of core for any time. * Callers should treat the result as only a hint. */ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void); +/** + * Identify the microarchitecture index of the core that executes the current thread. + * If the system does not support such identification, the function returns the user-specified default value. + * + * There is no guarantee that the thread will stay on the same type of core for any time. + * Callers should treat the result as only a hint. + */ +uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/api.c b/src/api.c index 38cea86..832b085 100644 --- a/src/api.c +++ b/src/api.c @@ -374,3 +374,33 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) { return 0; #endif } + +uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index_with_default"); + } + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #ifdef __linux__ + if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) { + /* Special case: avoid syscall on systems with only a single type of cores */ + return 0; + } + + /* General case */ + unsigned cpu; + if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { + return default_uarch_index; + } + if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { + return default_uarch_index; + } + return cpuinfo_linux_cpu_to_uarch_index_map[cpu]; + #else + /* Fallback: no API to query current core, use default uarch index. */ + return default_uarch_index; + #endif + #else + /* Only ARM/ARM64 processors may include cores of different types in the same package. */ + return 0; + #endif +} diff --git a/src/arm/cache.c b/src/arm/cache.c index 70f11fd..1a8bf91 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -1448,23 +1448,24 @@ void cpuinfo_arm_decode_cache( .line_size = 64 /* assumption */ }; break; - case cpuinfo_uarch_taishanv110: + case cpuinfo_uarch_taishan_v110: /* - * Kunpeng920 series CPU designed by Huawei hisilicon for server, - * L1 and L2 cache is private to each core, L3 is shared with all cores. - * +--------------------+-------+-----------+-----------+-----------+----------+------------+ - * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference | - * +--------------------+-------+-----------+-----------+-----------+----------+------------+ - * | Kunpeng920-3226 | 32 | 64K | 64K | 512K | 32M | [1] | - * +--------------------+-------+-----------+-----------+-----------+----------+------------+ - * | Kunpeng920-4826 | 48 | 64K | 64K | 512K | 48M | [2] | - * +--------------------+-------+-----------+-----------+-----------+----------+------------+ - * | Kunpeng920-6426 | 64 | 64K | 64K | 512K | 64M | [3] | - * +--------------------+-------+-----------+-----------+-----------+----------+------------+ - * - * [1] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226 - * [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826 - * [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426 + * It features private 64 KiB L1 instruction and data caches as well as 512 KiB of private L2. [1] + * + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Kunpeng 920-3226 | 32 | 64K | 64K | 512K | 32M | [2] | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Kunpeng 920-4826 | 48 | 64K | 64K | 512K | 48M | [3] | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Kunpeng 920-6426 | 64 | 64K | 64K | 512K | 64M | [4] | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * + * [1] https://en.wikichip.org/wiki/hisilicon/microarchitectures/taishan_v110 + * [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226 + * [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826 + * [4] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426 */ *l1i = (struct cpuinfo_cache) { .size = 64 * 1024, @@ -1482,11 +1483,11 @@ void cpuinfo_arm_decode_cache( .line_size = 128 /* assumption */, .flags = CPUINFO_CACHE_INCLUSIVE /* assumption */, }; - *l3 = (struct cpuinfo_cache) { - .size = cluster_cores * 1024 * 1024, - .associativity = 16 /* assumption */, - .line_size = 128 /* assumption */, - }; + *l3 = (struct cpuinfo_cache) { + .size = cluster_cores * 1024 * 1024, + .associativity = 16 /* assumption */, + .line_size = 128 /* assumption */, + }; break; #endif case cpuinfo_uarch_cortex_a12: diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c index 92095e1..6aedda3 100644 --- a/src/arm/linux/aarch32-isa.c +++ b/src/arm/linux/aarch32-isa.c @@ -193,7 +193,7 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON; if ((architecture_version >= 7) || (features & vfpv3_mask)) { isa->vfpv3 = true; - + const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON; if (features & d32_mask) { isa->d32 = true; diff --git a/src/arm/linux/clusters.c b/src/arm/linux/clusters.c index 8daeae5..c7a4045 100644 --- a/src/arm/linux/clusters.c +++ b/src/arm/linux/clusters.c @@ -47,7 +47,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { * * @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags. * @param max_processors - number of elements in the @p processors array. - * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum + * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum * frequency, MIDR infromation, and core cluster (package siblings list) information. * * @retval true if the heuristic successfully assigned all processors into clusters of cores. @@ -308,7 +308,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic( * @p processors array have cluster information. * * @param max_processors - number of elements in the @p processors array. - * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum + * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum * frequency, MIDR infromation, and core cluster (package siblings list) information. * * @retval true if the heuristic successfully assigned all processors into clusters of cores. @@ -466,7 +466,7 @@ new_cluster: * This function should be called after all processors are assigned to core clusters. * * @param max_processors - number of elements in the @p processors array. - * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, + * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, * and decoded core cluster (package_leader_id) information. * The function expects the value of processors[i].package_processor_count to be zero. * Upon return, processors[i].package_processor_count will contain the number of logical @@ -482,12 +482,12 @@ void cpuinfo_arm_linux_count_cluster_processors( const uint32_t package_leader_id = processors[i].package_leader_id; processors[package_leader_id].package_processor_count += 1; } - } + } /* Second pass: copy the package_processor_count from the group leader processor */ for (uint32_t i = 0; i < max_processors; i++) { if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { const uint32_t package_leader_id = processors[i].package_leader_id; processors[i].package_processor_count = processors[package_leader_id].package_processor_count; } - } + } } diff --git a/src/arm/linux/cpuinfo.c b/src/arm/linux/cpuinfo.c index 2df0c6e..c70055f 100644 --- a/src/arm/linux/cpuinfo.c +++ b/src/arm/linux/cpuinfo.c @@ -44,7 +44,7 @@ static uint32_t parse_processor_number( /* * Full list of ARM features reported in /proc/cpuinfo: - * + * * * swp - support for SWP instruction (deprecated in ARMv7, can be removed in future) * * half - support for half-word loads and stores. These instruction are part of ARMv4, * so no need to check it on supported CPUs. @@ -620,7 +620,7 @@ static void parse_cache_number( break; default: cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected", - number_name, (int) (number_end - number_start), number_start); + number_name, (int) (number_end - number_start), number_start); } } @@ -670,7 +670,7 @@ static bool parse_line( if (line_start == line_end) { return true; } - + /* Search for ':' on the line. */ const char* separator = line_start; for (; separator != line_end; separator++) { diff --git a/src/arm/tlb.c b/src/arm/tlb.c index ba42a3e..9beb832 100644 --- a/src/arm/tlb.c +++ b/src/arm/tlb.c @@ -6,7 +6,7 @@ switch (uarch) { * Cortex-A5 Technical Reference Manual: * 6.3.1. Micro TLB * The first level of caching for the page table information is a micro TLB of - * 10 entries that is implemented on each of the instruction and data sides. + * 10 entries that is implemented on each of the instruction and data sides. * 6.3.2. Main TLB * Misses from the instruction and data micro TLBs are handled by a unified main TLB. * The main TLB is 128-entry two-way set-associative. diff --git a/src/arm/uarch.c b/src/arm/uarch.c index e5e3cbc..63b1a55 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -155,9 +155,11 @@ void cpuinfo_arm_decode_vendor_uarch( case 'H': *vendor = cpuinfo_vendor_huawei; switch (midr_get_part(midr)) { - case 0xD01: /* Kunpeng920 ARM-base CPU*/ - *uarch = cpuinfo_uarch_taishanv110; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD01: /* Kunpeng 920 series */ + *uarch = cpuinfo_uarch_taishan_v110; break; +#endif case 0xD40: /* Kirin 980 Big/Medium cores -> Cortex-A76 */ *vendor = cpuinfo_vendor_arm; *uarch = cpuinfo_uarch_cortex_a76; diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h index c6eed0b..9c23d7c 100644 --- a/src/cpuinfo/internal-api.h +++ b/src/cpuinfo/internal-api.h @@ -3,7 +3,7 @@ #include #include -#ifdef _WIN32 +#if defined(_WIN32) || defined(__CYGWIN__) #include #endif @@ -50,7 +50,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size; CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void); CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void); -#ifdef _WIN32 +#if defined(_WIN32) || defined(__CYGWIN__) CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context); #endif CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void); diff --git a/src/emscripten/init.c b/src/emscripten/init.c new file mode 100644 index 0000000..ce4bdea --- /dev/null +++ b/src/emscripten/init.c @@ -0,0 +1,277 @@ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + + +static const volatile float infinity = INFINITY; + +static struct cpuinfo_package static_package = { }; + +static struct cpuinfo_cache static_x86_l3 = { + .size = 2 * 1024 * 1024, + .associativity = 16, + .sets = 2048, + .partitions = 1, + .line_size = 64, +}; + +void cpuinfo_emscripten_init(void) { + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_cache* l1i = NULL; + struct cpuinfo_cache* l1d = NULL; + struct cpuinfo_cache* l2 = NULL; + + const bool is_x86 = signbit(infinity - infinity); + + int logical_cores_count = emscripten_num_logical_cores(); + if (logical_cores_count <= 0) { + logical_cores_count = 1; + } + uint32_t processor_count = (uint32_t) logical_cores_count; + uint32_t core_count = processor_count; + uint32_t cluster_count = 1; + uint32_t big_cluster_core_count = core_count; + uint32_t processors_per_core = 1; + if (is_x86) { + if (processor_count % 2 == 0) { + processors_per_core = 2; + core_count = processor_count / 2; + big_cluster_core_count = core_count; + } + } else { + /* Assume ARM/ARM64 */ + if (processor_count > 4) { + /* Assume big.LITTLE architecture */ + cluster_count = 2; + big_cluster_core_count = processor_count >= 8 ? 4 : 2; + } + } + uint32_t l2_count = is_x86 ? core_count : cluster_count; + + processors = calloc(processor_count, sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + processor_count * sizeof(struct cpuinfo_processor), processor_count); + goto cleanup; + } + cores = calloc(processor_count, sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", + processor_count * sizeof(struct cpuinfo_core), processor_count); + goto cleanup; + } + clusters = calloc(cluster_count, sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" clusters", + cluster_count * sizeof(struct cpuinfo_cluster), cluster_count); + goto cleanup; + } + + l1i = calloc(core_count, sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + core_count * sizeof(struct cpuinfo_cache), core_count); + goto cleanup; + } + + l1d = calloc(core_count, sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + core_count * sizeof(struct cpuinfo_cache), core_count); + goto cleanup; + } + + l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + goto cleanup; + } + + static_package.processor_count = processor_count; + static_package.core_count = core_count; + static_package.cluster_count = cluster_count; + if (is_x86) { + strncpy(static_package.name, "x86 vCPU", CPUINFO_PACKAGE_NAME_MAX); + } else { + strncpy(static_package.name, "ARM vCPU", CPUINFO_PACKAGE_NAME_MAX); + } + + for (uint32_t i = 0; i < core_count; i++) { + for (uint32_t j = 0; j < processors_per_core; j++) { + processors[i * processors_per_core + j] = (struct cpuinfo_processor) { + .smt_id = j, + .core = cores + i, + .cluster = clusters + (uint32_t) (i >= big_cluster_core_count), + .package = &static_package, + .cache.l1i = l1i + i, + .cache.l1d = l1d + i, + .cache.l2 = is_x86 ? l2 + i : l2 + (uint32_t) (i >= big_cluster_core_count), + .cache.l3 = is_x86 ? &static_x86_l3 : NULL, + }; + } + + cores[i] = (struct cpuinfo_core) { + .processor_start = i * processors_per_core, + .processor_count = processors_per_core, + .core_id = i, + .cluster = clusters + (uint32_t) (i >= big_cluster_core_count), + .package = &static_package, + .vendor = cpuinfo_vendor_unknown, + .uarch = cpuinfo_uarch_unknown, + .frequency = 0, + }; + + l1i[i] = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .sets = 128, + .partitions = 1, + .line_size = 64, + .processor_start = i * processors_per_core, + .processor_count = processors_per_core, + }; + + l1d[i] = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .sets = 128, + .partitions = 1, + .line_size = 64, + .processor_start = i * processors_per_core, + .processor_count = processors_per_core, + }; + + if (is_x86) { + l2[i] = (struct cpuinfo_cache) { + .size = 256 * 1024, + .associativity = 8, + .sets = 512, + .partitions = 1, + .line_size = 64, + .processor_start = i * processors_per_core, + .processor_count = processors_per_core, + }; + } + } + + if (is_x86) { + clusters[0] = (struct cpuinfo_cluster) { + .processor_start = 0, + .processor_count = processor_count, + .core_start = 0, + .core_count = core_count, + .cluster_id = 0, + .package = &static_package, + .vendor = cpuinfo_vendor_unknown, + .uarch = cpuinfo_uarch_unknown, + .frequency = 0, + }; + + static_x86_l3.processor_count = processor_count; + } else { + clusters[0] = (struct cpuinfo_cluster) { + .processor_start = 0, + .processor_count = big_cluster_core_count, + .core_start = 0, + .core_count = big_cluster_core_count, + .cluster_id = 0, + .package = &static_package, + .vendor = cpuinfo_vendor_unknown, + .uarch = cpuinfo_uarch_unknown, + .frequency = 0, + }; + + l2[0] = (struct cpuinfo_cache) { + .size = 1024 * 1024, + .associativity = 8, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .processor_start = 0, + .processor_count = big_cluster_core_count, + }; + + if (cluster_count > 1) { + l2[1] = (struct cpuinfo_cache) { + .size = 256 * 1024, + .associativity = 8, + .sets = 512, + .partitions = 1, + .line_size = 64, + .processor_start = big_cluster_core_count, + .processor_count = processor_count - big_cluster_core_count, + }; + + clusters[1] = (struct cpuinfo_cluster) { + .processor_start = big_cluster_core_count, + .processor_count = processor_count - big_cluster_core_count, + .core_start = big_cluster_core_count, + .core_count = processor_count - big_cluster_core_count, + .cluster_id = 1, + .package = &static_package, + .vendor = cpuinfo_vendor_unknown, + .uarch = cpuinfo_uarch_unknown, + .frequency = 0, + }; + } + } + + /* Commit changes */ + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + if (is_x86) { + cpuinfo_cache[cpuinfo_cache_level_3] = &static_x86_l3; + } + + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = &static_package; + + cpuinfo_cache_count[cpuinfo_cache_level_1i] = processor_count; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = processor_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + if (is_x86) { + cpuinfo_cache_count[cpuinfo_cache_level_3] = 1; + } + + cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { + .uarch = cpuinfo_uarch_unknown, + .processor_count = processor_count, + .core_count = core_count, + }; + + cpuinfo_processors_count = processor_count; + cpuinfo_cores_count = processor_count; + cpuinfo_clusters_count = cluster_count; + cpuinfo_packages_count = 1; + + cpuinfo_max_cache_size = is_x86 ? 128 * 1024 * 1024 : 8 * 1024 * 1024; + + cpuinfo_is_initialized = true; + + processors = NULL; + cores = NULL; + clusters = NULL; + l1i = l1d = l2 = NULL; + +cleanup: + free(processors); + free(cores); + free(clusters); + free(l1i); + free(l1d); + free(l2); +} diff --git a/src/init.c b/src/init.c index 10a1afc..0d8cc3b 100644 --- a/src/init.c +++ b/src/init.c @@ -1,4 +1,4 @@ -#ifdef _WIN32 +#if defined(_WIN32) || defined(__CYGWIN__) #include #elif !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__) #include @@ -13,7 +13,7 @@ #endif -#ifdef _WIN32 +#if defined(_WIN32) || defined(__CYGWIN__) static INIT_ONCE init_guard = INIT_ONCE_STATIC_INIT; #elif !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__) static pthread_once_t init_guard = PTHREAD_ONCE_INIT; @@ -27,7 +27,7 @@ bool CPUINFO_ABI cpuinfo_initialize(void) { pthread_once(&init_guard, &cpuinfo_x86_mach_init); #elif defined(__linux__) pthread_once(&init_guard, &cpuinfo_x86_linux_init); - #elif defined(_WIN32) + #elif defined(_WIN32) || defined(__CYGWIN__) InitOnceExecuteOnce(&init_guard, &cpuinfo_x86_windows_init, NULL, NULL); #else cpuinfo_log_error("operating system is not supported in cpuinfo"); diff --git a/src/linux/mockfile.c b/src/linux/mockfile.c index 3fdd6bf..138acfe 100644 --- a/src/linux/mockfile.c +++ b/src/linux/mockfile.c @@ -34,7 +34,7 @@ void CPUINFO_ABI cpuinfo_mock_filesystem(struct cpuinfo_mock_file* files) { file_count += 1; } cpuinfo_mock_files = files; - cpuinfo_mock_file_count = file_count; + cpuinfo_mock_file_count = file_count; } int CPUINFO_ABI cpuinfo_mock_open(const char* path, int oflag) { diff --git a/src/x86/cache/descriptor.c b/src/x86/cache/descriptor.c index 6532e4d..69d38cc 100644 --- a/src/x86/cache/descriptor.c +++ b/src/x86/cache/descriptor.c @@ -353,7 +353,7 @@ void cpuinfo_x86_decode_cache_descriptor( }; break; case 0x39: - /* Where does this come from? */ + /* Where does this come from? */ cache->l2 = (struct cpuinfo_x86_cache) { .size = 128 * 1024, .associativity = 4, @@ -364,7 +364,7 @@ void cpuinfo_x86_decode_cache_descriptor( }; break; case 0x3A: - /* Where does this come from? */ + /* Where does this come from? */ cache->l2 = (struct cpuinfo_x86_cache) { .size = 192 * 1024, .associativity = 6, @@ -375,7 +375,7 @@ void cpuinfo_x86_decode_cache_descriptor( }; break; case 0x3B: - /* Where does this come from? */ + /* Where does this come from? */ cache->l2 = (struct cpuinfo_x86_cache) { .size = 128 * 1024, .associativity = 2, @@ -386,7 +386,7 @@ void cpuinfo_x86_decode_cache_descriptor( }; break; case 0x3C: - /* Where does this come from? */ + /* Where does this come from? */ cache->l2 = (struct cpuinfo_x86_cache) { .size = 256 * 1024, .associativity = 4, @@ -397,7 +397,7 @@ void cpuinfo_x86_decode_cache_descriptor( }; break; case 0x3D: - /* Where does this come from? */ + /* Where does this come from? */ cache->l2 = (struct cpuinfo_x86_cache) { .size = 384 * 1024, .associativity = 6, @@ -408,7 +408,7 @@ void cpuinfo_x86_decode_cache_descriptor( }; break; case 0x3E: - /* Where does this come from? */ + /* Where does this come from? */ cache->l2 = (struct cpuinfo_x86_cache) { .size = 512 * 1024, .associativity = 4, @@ -1011,7 +1011,7 @@ void cpuinfo_x86_decode_cache_descriptor( }; break; case 0x73: - /* Where does this come from? */ + /* Where does this come from? */ cache->trace = (struct cpuinfo_trace_cache) { .uops = 64 * 1024, .associativity = 8, diff --git a/src/x86/mockcpuid.c b/src/x86/mockcpuid.c index 6361dc2..2631f09 100644 --- a/src/x86/mockcpuid.c +++ b/src/x86/mockcpuid.c @@ -14,7 +14,7 @@ static uint32_t cpuinfo_mock_cpuid_leaf4_iteration = 0; void CPUINFO_ABI cpuinfo_mock_set_cpuid(struct cpuinfo_mock_cpuid* dump, size_t entries) { cpuinfo_mock_cpuid_data = dump; - cpuinfo_mock_cpuid_entries = entries; + cpuinfo_mock_cpuid_entries = entries; }; void CPUINFO_ABI cpuinfo_mock_get_cpuid(uint32_t eax, uint32_t regs[4]) { diff --git a/src/x86/name.c b/src/x86/name.c index e0d5a5b..a7cc7c6 100644 --- a/src/x86/name.c +++ b/src/x86/name.c @@ -135,7 +135,7 @@ static inline bool is_frequency(const char* token_start, const char* token_end) const size_t token_length = (size_t) (token_end - token_start); if (token_length > 3 && token_end[-2] == 'H' && token_end[-1] == 'z') { switch (token_end[-3]) { - case 'K': + case 'K': case 'M': case 'G': return true; @@ -347,7 +347,7 @@ static bool transform_token(char* token_start, char* token_end, struct parser_st return false; } /* - * Erase "Mobile" when it is not part of the processor name, + * Erase "Mobile" when it is not part of the processor name, * e.g. in "AMD Turion(tm) X2 Ultra Dual-Core Mobile ZM-82" */ if (previousState.context_core != NULL) { @@ -540,8 +540,7 @@ uint32_t cpuinfo_x86_normalize_brand_string( char* name_end = &name[48]; while (name_end[-1] == '\0') { /* - * Adject name_end by 1 position and - * check that we didn't reach the start of the brand string. + * Adject name_end by 1 position and check that we didn't reach the start of the brand string. * This is possible if all characters are zero. */ if (--name_end == name) { @@ -704,6 +703,6 @@ uint32_t cpuinfo_x86_format_package_name( } else { snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, "%s %s", vendor_string, normalized_brand_string); - return strlen(vendor_string) + 1; + return (uint32_t) strlen(vendor_string) + 1; } } diff --git a/src/x86/vendor.c b/src/x86/vendor.c index 2bba90d..bad50fa 100644 --- a/src/x86/vendor.c +++ b/src/x86/vendor.c @@ -79,7 +79,7 @@ enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32 case ineI: if (ecx == ntel) { /* "GenuineIntel" */ - return cpuinfo_vendor_intel; + return cpuinfo_vendor_intel; } break; #if CPUINFO_ARCH_X86 diff --git a/src/x86/windows/init.c b/src/x86/windows/init.c index 2c7e3cd..cf549d5 100644 --- a/src/x86/windows/init.c +++ b/src/x86/windows/init.c @@ -10,6 +10,13 @@ #include +#ifdef __GNUC__ + #define CPUINFO_ALLOCA __builtin_alloca +#else + #define CPUINFO_ALLOCA _alloca +#endif + + static inline uint32_t bit_mask(uint32_t bits) { return (UINT32_C(1) << bits) - UINT32_C(1); } @@ -118,7 +125,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV cpuinfo_log_debug("detected %"PRIu32" processor groups", max_group_count); uint32_t processors_count = 0; - uint32_t* processors_per_group = (uint32_t*) _alloca(max_group_count * sizeof(uint32_t)); + uint32_t* processors_per_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t)); for (uint32_t i = 0; i < max_group_count; i++) { processors_per_group[i] = GetMaximumProcessorCount((WORD) i); cpuinfo_log_debug("detected %"PRIu32" processors in group %"PRIu32, @@ -126,7 +133,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV processors_count += processors_per_group[i]; } - uint32_t* processors_before_group = (uint32_t*) _alloca(max_group_count * sizeof(uint32_t)); + uint32_t* processors_before_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t)); for (uint32_t i = 0, count = 0; i < max_group_count; i++) { processors_before_group[i] = count; cpuinfo_log_debug("detected %"PRIu32" processors before group %"PRIu32, @@ -196,7 +203,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV /* Iterate processor groups and set the package part of APIC ID */ for (uint32_t i = 0; i < package_info->Processor.GroupCount; i++) { const uint32_t group_id = package_info->Processor.GroupMask[i].Group; - /* Global index of the first logical processor belonging to this group */ + /* Global index of the first logical processor belonging to this group */ const uint32_t group_processors_start = processors_before_group[group_id]; /* Bitmask representing processors in this group belonging to this package */ KAFFINITY group_processors_mask = package_info->Processor.GroupMask[i].Mask; @@ -245,7 +252,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV /* Iterate processor groups and set the core & SMT parts of APIC ID */ for (uint32_t i = 0; i < core_info->Processor.GroupCount; i++) { const uint32_t group_id = core_info->Processor.GroupMask[i].Group; - /* Global index of the first logical processor belonging to this group */ + /* Global index of the first logical processor belonging to this group */ const uint32_t group_processors_start = processors_before_group[group_id]; /* Bitmask representing processors in this group belonging to this package */ KAFFINITY group_processors_mask = core_info->Processor.GroupMask[i].Mask; @@ -259,7 +266,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV current_package_apic_id = processors[processor_id].apic_id; } /* Core ID w.r.t package */ - const uint32_t package_core_id = core_id - package_core_start; + const uint32_t package_core_id = core_id - package_core_start; /* Update APIC ID with core and SMT parts */ processors[processor_id].apic_id |= diff --git a/tools/cpu-info.c b/tools/cpu-info.c index 7963c00..4453d88 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -14,6 +14,8 @@ static const char* vendor_to_string(enum cpuinfo_vendor vendor) { return "Intel"; case cpuinfo_vendor_amd: return "AMD"; + case cpuinfo_vendor_huawei: + return "Huawei"; case cpuinfo_vendor_hygon: return "Hygon"; case cpuinfo_vendor_arm: @@ -243,6 +245,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Brahma B53"; case cpuinfo_uarch_xgene: return "X-Gene"; + case cpuinfo_uarch_dhyana: + return "Dhyana"; + case cpuinfo_uarch_taishan_v110: + return "TaiShan v110"; default: return NULL; } diff --git a/tools/gpu-dump.c b/tools/gpu-dump.c index d7cfa9e..6d17374 100644 --- a/tools/gpu-dump.c +++ b/tools/gpu-dump.c @@ -314,7 +314,7 @@ void report_gles_attributes(void) { fprintf(stderr, "failed to get the number of EGL frame buffer configurations\n"); goto cleanup; } - + configs = (EGLConfig*) malloc(configs_count * sizeof(EGLConfig)); if (configs == NULL) { fprintf(stderr, "failed to allocate %zu bytes for %d frame buffer configurations\n", -- cgit v1.2.3 From edea24539488b3395bc3c3c049a2ea0f5ccbc7c9 Mon Sep 17 00:00:00 2001 From: Kamil Gurgul Date: Sun, 17 May 2020 18:10:26 +0200 Subject: Fix Android build for new NDK --- .vscode/settings.json | 7 +++++++ jni/Android.mk | 11 ++++------- jni/Application.mk | 4 ++-- 3 files changed, 13 insertions(+), 9 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..974b97c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "C_Cpp.default.configurationProvider": "vector-of-bool.cmake-tools", + "files.associations": { + "cpuinfo.h": "c", + "typeinfo": "c" + } +} \ No newline at end of file diff --git a/jni/Android.mk b/jni/Android.mk index c289df6..f66c5f8 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -5,12 +5,12 @@ LOCAL_MODULE := cpuinfo LOCAL_SRC_FILES := \ src/init.c \ src/api.c \ - src/linux/current.c \ + src/cache.c \ src/linux/processors.c \ src/linux/smallfile.c \ src/linux/multiline.c \ src/linux/cpulist.c -ifeq ($(TARGET_ARCH_ABI),$(filter $(TARGET_ARCH_ABI),armeabi armeabi-v7a arm64-v8a)) +ifeq ($(TARGET_ARCH_ABI),$(filter $(TARGET_ARCH_ABI),armeabi-v7a arm64-v8a)) LOCAL_SRC_FILES += \ src/arm/uarch.c \ src/arm/cache.c \ @@ -21,9 +21,6 @@ LOCAL_SRC_FILES += \ src/arm/linux/midr.c \ src/arm/linux/hwcap.c \ src/arm/android/properties.c -ifeq ($(TARGET_ARCH_ABI),armeabi) -LOCAL_SRC_FILES += src/arm/linux/aarch32-isa.c.arm -endif # armeabi ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) LOCAL_SRC_FILES += src/arm/linux/aarch32-isa.c endif # armeabi-v7a @@ -62,8 +59,8 @@ else LOCAL_CFLAGS += -DCPUINFO_LOG_LEVEL=0 endif LOCAL_STATIC_LIBRARIES := clog -include $(BUILD_STATIC_LIBRARY) - +# include $(BUILD_STATIC_LIBRARY) +include $(BUILD_SHARED_LIBRARY) $(call import-add-path,$(LOCAL_PATH)/deps) diff --git a/jni/Application.mk b/jni/Application.mk index 3667da3..04f7b0e 100644 --- a/jni/Application.mk +++ b/jni/Application.mk @@ -1,4 +1,4 @@ -APP_PLATFORM := android-15 +APP_PLATFORM := android-16 APP_PIE := true APP_STL := c++_static -APP_ABI := armeabi armeabi-v7a arm64-v8a x86 x86_64 +APP_ABI := armeabi-v7a arm64-v8a x86 x86_64 -- cgit v1.2.3 From d659a1bff8896c4589123d6cade4004e3c09273a Mon Sep 17 00:00:00 2001 From: Kamil Gurgul Date: Sun, 17 May 2020 18:55:49 +0200 Subject: Android fix cleanup --- .vscode/settings.json | 7 ------- jni/Android.mk | 5 ++--- 2 files changed, 2 insertions(+), 10 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 974b97c..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "C_Cpp.default.configurationProvider": "vector-of-bool.cmake-tools", - "files.associations": { - "cpuinfo.h": "c", - "typeinfo": "c" - } -} \ No newline at end of file diff --git a/jni/Android.mk b/jni/Android.mk index f66c5f8..2212625 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -27,7 +27,7 @@ endif # armeabi-v7a ifeq ($(TARGET_ARCH_ABI),arm64-v8a) LOCAL_SRC_FILES += src/arm/linux/aarch64-isa.c endif # arm64-v8a -endif # armeabi, armeabi-v7a, or arm64-v8a +endif # armeabi-v7a, or arm64-v8a ifeq ($(TARGET_ARCH_ABI),$(filter $(TARGET_ARCH_ABI),x86 x86_64)) LOCAL_SRC_FILES += \ src/x86/init.c \ @@ -59,8 +59,7 @@ else LOCAL_CFLAGS += -DCPUINFO_LOG_LEVEL=0 endif LOCAL_STATIC_LIBRARIES := clog -# include $(BUILD_STATIC_LIBRARY) -include $(BUILD_SHARED_LIBRARY) +include $(BUILD_STATIC_LIBRARY) $(call import-add-path,$(LOCAL_PATH)/deps) -- cgit v1.2.3 From 24e36b26eb28b1354e2625bd7d4530acbf3566c0 Mon Sep 17 00:00:00 2001 From: Syoyo Fujita Date: Fri, 22 May 2020 03:19:40 +0900 Subject: Fix build on MinGW + Linux cross-compile --- src/x86/windows/init.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/x86/windows/init.c b/src/x86/windows/init.c index cf549d5..f4e7a99 100644 --- a/src/x86/windows/init.c +++ b/src/x86/windows/init.c @@ -8,7 +8,12 @@ #include #include +#if defined(__MINGW32__) +// Windows header filename must be all lower case on MinGW +#include +#else #include +#endif #ifdef __GNUC__ #define CPUINFO_ALLOCA __builtin_alloca -- cgit v1.2.3 From ba2ca40c0534d355a99564e60e3e69d450f4e5b0 Mon Sep 17 00:00:00 2001 From: Syoyo Fujita Date: Fri, 22 May 2020 13:14:38 +0900 Subject: Using lower-case Win32 header filename should be always safe. --- src/x86/windows/init.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/x86/windows/init.c b/src/x86/windows/init.c index f4e7a99..9a23bd7 100644 --- a/src/x86/windows/init.c +++ b/src/x86/windows/init.c @@ -8,12 +8,7 @@ #include #include -#if defined(__MINGW32__) -// Windows header filename must be all lower case on MinGW #include -#else -#include -#endif #ifdef __GNUC__ #define CPUINFO_ALLOCA __builtin_alloca -- cgit v1.2.3 From a27ca5bf8b555530c3c5fd5432c7df506d14e15c Mon Sep 17 00:00:00 2001 From: Ashkan Aliabadi Date: Sun, 24 May 2020 22:33:14 -0700 Subject: Upstream cpuinfo updates in XNNPACK as of XNNPACK:33fcf7895be9cd64fef52c6e99a48d4dbc3f4b8b --- src/arm/cache.c | 9 ++++ src/arm/linux/aarch32-isa.c | 20 +++++--- src/arm/linux/aarch64-isa.c | 23 ++++++--- src/arm/linux/api.h | 8 +++ src/arm/linux/chipset.c | 117 +++++++++++++++++++++++++++++++++++++++++++- src/arm/linux/cpuinfo.c | 15 +++++- src/arm/linux/init.c | 10 ++-- test/arm-cache.cc | 88 +++++++++++++++++++++++++++++++++ test/get-current.cc | 6 +++ 9 files changed, 276 insertions(+), 20 deletions(-) diff --git a/src/arm/cache.c b/src/arm/cache.c index 1a8bf91..666ad78 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -635,6 +635,13 @@ void cpuinfo_arm_decode_cache( break; } break; + case cpuinfo_arm_chipset_series_broadcom_bcm: + switch (chipset->model) { + case 2837: /* BCM2837 */ + l2_size = 512 * 1024; + break; + } + break; case cpuinfo_arm_chipset_series_samsung_exynos: l1_size = 32 * 1024; break; @@ -922,11 +929,13 @@ void cpuinfo_arm_decode_cache( * | MediaTek Helio X23 | 2(+4+4) | ? | ? | ? | | * | MediaTek Helio X25 | 2(+4+4) | ? | ? | ? | | * | MediaTek Helio X27 | 2(+4+4) | ? | ? | ? | | + * | Broadcom BCM2711 | 4 | 32K | 48K | 1M | [4] | * +---------------------+---------+-----------+-----------+------------+-----------+ * * [1] http://pdadb.net/index.php?m=processor&id=578&c=qualcomm_snapdragon_618_msm8956__snapdragon_650 * [2] http://pdadb.net/index.php?m=processor&id=667&c=qualcomm_snapdragon_620_apq8076__snapdragon_652 * [3] http://pdadb.net/index.php?m=processor&id=692&c=qualcomm_snapdragon_653_msm8976sg__msm8976_pro + * [4] https://www.raspberrypi.org/documentation/hardware/raspberrypi/bcm2711/README.md */ uint32_t l2_size; switch (chipset->series) { diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c index 6aedda3..64dd168 100644 --- a/src/arm/linux/aarch32-isa.c +++ b/src/arm/linux/aarch32-isa.c @@ -77,18 +77,24 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( /* * NEON VDOT instructions are not indicated in /proc/cpuinfo. - * Use a MIDR-based heuristic to whitelist processors known to support it: - * - Processors with Qualcomm-modified Cortex-A76 cores - * - Kirin 980 processor + * Use a MIDR-based heuristic to whitelist processors known to support it. */ switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ + case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ + case UINT32_C(0x53000030): /* Exynos-M4 */ + case UINT32_C(0x53000040): /* Exynos-M5 */ isa->dot = true; break; - default: - if (chipset->series == cpuinfo_arm_chipset_series_hisilicon_kirin && chipset->model == 980) { - isa->dot = true; - } + case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */ + isa->dot = !!(midr_get_variant(midr) >= 1); + break; + case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */ + isa->dot = !!(midr_get_variant(midr) >= 2); break; } } else { diff --git a/src/arm/linux/aarch64-isa.c b/src/arm/linux/aarch64-isa.c index f193e81..619cda5 100644 --- a/src/arm/linux/aarch64-isa.c +++ b/src/arm/linux/aarch64-isa.c @@ -67,21 +67,32 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( } /* * Many phones ship with an old kernel configuration that doesn't report UDOT/SDOT instructions. - * Use a MIDR-based heuristic to whitelist processors known to support it: - * - Processors with Qualcomm-modified Cortex-A76 cores - * - Kirin 980 processor + * Use a MIDR-based heuristic to whitelist processors known to support it. */ switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x4100D060): /* Cortex-A65 */ + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0C0): /* Neoverse N1 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D4A0): /* Neoverse E1 */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ + case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ + case UINT32_C(0x53000030): /* Exynos-M4 */ + case UINT32_C(0x53000040): /* Exynos-M5 */ isa->dot = true; break; + case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */ + isa->dot = !!(midr_get_variant(midr) >= 1); + break; + case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */ + isa->dot = !!(midr_get_variant(midr) >= 2); + break; default: if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDDP) { isa->dot = true; } - if (chipset->series == cpuinfo_arm_chipset_series_hisilicon_kirin && chipset->model == 980) { - isa->dot = true; - } break; } if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) { diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h index f99da66..2597e49 100644 --- a/src/arm/linux/api.h +++ b/src/arm/linux/api.h @@ -11,6 +11,8 @@ /* No hard limit in the kernel, maximum length observed on non-rogue kernels is 64 */ #define CPUINFO_HARDWARE_VALUE_MAX 64 +/* No hard limit in the kernel, maximum length on Raspberry Pi is 8. Add 1 symbol to detect overly large revision strings */ +#define CPUINFO_REVISION_VALUE_MAX 9 #ifdef __ANDROID__ /* As per include/sys/system_properties.h in Android NDK */ @@ -259,6 +261,7 @@ static inline bool cpuinfo_arm_linux_processor_not_equals( CPUINFO_INTERNAL bool cpuinfo_arm_linux_parse_proc_cpuinfo( char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + char revision[restrict static CPUINFO_REVISION_VALUE_MAX], uint32_t max_processors_count, struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count]); @@ -297,6 +300,7 @@ CPUINFO_INTERNAL bool cpuinfo_arm_linux_parse_proc_cpuinfo( CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset( const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + const char revision[restrict static CPUINFO_REVISION_VALUE_MAX], uint32_t cores, uint32_t max_cpu_freq_max); #endif @@ -327,6 +331,10 @@ CPUINFO_INTERNAL struct cpuinfo_arm_chipset CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_hardware_chipname( const char ro_hardware_chipname[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]); +#else + CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_revision( + const char proc_cpuinfo_revision[restrict static CPUINFO_REVISION_VALUE_MAX]); #endif CPUINFO_INTERNAL bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic( diff --git a/src/arm/linux/chipset.c b/src/arm/linux/chipset.c index 35058d9..e36283c 100644 --- a/src/arm/linux/chipset.c +++ b/src/arm/linux/chipset.c @@ -1011,12 +1011,59 @@ write_chipset: return true; } +/** + * Tries to match /BCM\d{4}$/ signature for Broadcom BCM chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the /proc/cpuinfo Hardware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_bcm( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect exactly 7 symbols: "BCM" (3 symbols) + 4-digit model number */ + if (start + 7 != end) { + return false; + } + + /* Check that the string starts with "BCM". + * The first three characters are loaded and compared as a 24-bit little endian word. + */ + const uint32_t expected_bcm = load_u24le(start); + if (expected_bcm != UINT32_C(0x004D4342) /* "MCB" = reverse("BCM") */) { + return false; + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 3; i < 7; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Return parsed chipset. */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_broadcom, + .series = cpuinfo_arm_chipset_series_broadcom_bcm, + .model = model, + }; + return true; +} + /** * Tries to match /OMAP\d{4}$/ signature for Texas Instruments OMAP chipsets. * If match successful, extracts model information into \p chipset argument. * * @param start - start of the /proc/cpuinfo Hardware string to match. - * @param end - end of the /proc/cpuinfo Hardaware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. * @param[out] chipset - location where chipset information will be stored upon a successful match. * * @returns true if signature matched, false otherwise. @@ -2328,6 +2375,14 @@ struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_ha return chipset; } + /* Check Broadcom BCM signature */ + if (match_bcm(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Broadcom BCM signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + #if CPUINFO_ARCH_ARM /* Check Texas Instruments OMAP signature */ if (match_omap(hardware, hardware_end, &chipset)) { @@ -3713,6 +3768,62 @@ void cpuinfo_arm_chipset_to_string( return chipset; } #else /* !defined(__ANDROID__) */ + /* + * Fix commonly misreported Broadcom BCM models on Raspberry Pi boards. + * + * @param[in,out] chipset - chipset name to fix. + * @param[in] revision - /proc/cpuinfo Revision string. + */ + void cpuinfo_arm_fixup_raspberry_pi_chipset( + struct cpuinfo_arm_chipset chipset[restrict static 1], + const char revision[restrict static CPUINFO_HARDWARE_VALUE_MAX]) + { + const size_t revision_length = strnlen(revision, CPUINFO_REVISION_VALUE_MAX); + + /* Parse revision codes according to https://www.raspberrypi.org/documentation/hardware/raspberrypi/revision-codes/README.md */ + #if CPUINFO_ARCH_ARM + if (revision_length == 4) { + /* + * Old-style revision codes. + * All Raspberry Pi models with old-style revision code use Broadcom BCM2835. + */ + + /* BCM2835 often misreported as BCM2708 */ + if (chipset->model == 2708) { + chipset->model = 2835; + } + return; + } + #endif + if ((size_t) (revision_length - 5) <= (size_t) (8 - 5) /* 5 <= length(revision) <= 8 */) { + /* New-style revision codes */ + + uint32_t model = 0; + switch (revision[revision_length - 4]) { + case '0': + /* BCM2835 */ + model = 2835; + break; + case '1': + /* BCM2836 */ + model = 2836; + break; + case '2': + /* BCM2837 */ + model = 2837; + break; + case '3': + /* BCM2711 */ + model = 2711; + break; + } + + if (model != 0) { + chipset->model = model; + chipset->suffix[0] = 0; + } + } + } /* * Decodes chipset name from /proc/cpuinfo Hardware string. @@ -3727,6 +3838,7 @@ void cpuinfo_arm_chipset_to_string( */ struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset( const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + const char revision[restrict static CPUINFO_REVISION_VALUE_MAX], uint32_t cores, uint32_t max_cpu_freq_max) { @@ -3736,6 +3848,9 @@ void cpuinfo_arm_chipset_to_string( if (chipset.vendor == cpuinfo_arm_chipset_vendor_unknown) { cpuinfo_log_warning( "chipset detection failed: /proc/cpuinfo Hardware string did not match known signatures"); + } else if (chipset.vendor == cpuinfo_arm_chipset_vendor_broadcom) { + /* Raspberry Pi kernel reports bogus chipset models; detect chipset from RPi revision */ + cpuinfo_arm_fixup_raspberry_pi_chipset(&chipset, revision); } else { cpuinfo_arm_fixup_chipset(&chipset, cores, max_cpu_freq_max); } diff --git a/src/arm/linux/cpuinfo.c b/src/arm/linux/cpuinfo.c index c70055f..90e1631 100644 --- a/src/arm/linux/cpuinfo.c +++ b/src/arm/linux/cpuinfo.c @@ -631,6 +631,7 @@ static void parse_cache_number( struct proc_cpuinfo_parser_state { char* hardware; + char* revision; uint32_t processor_index; uint32_t max_processors_count; struct cpuinfo_arm_linux_processor* processors; @@ -791,7 +792,17 @@ static bool parse_line( memcpy(state->hardware, value_start, value_length); cpuinfo_log_debug("parsed /proc/cpuinfo Hardware = \"%.*s\"", (int) value_length, value_start); } else if (memcmp(line_start, "Revision", key_length) == 0) { - /* Board revision, no use for now */ + size_t value_length = value_end - value_start; + if (value_length > CPUINFO_REVISION_VALUE_MAX) { + cpuinfo_log_info( + "length of Revision value \"%.*s\" in /proc/cpuinfo exceeds limit (%d): truncating to the limit", + (int) value_length, value_start, CPUINFO_REVISION_VALUE_MAX); + value_length = CPUINFO_REVISION_VALUE_MAX; + } else { + state->revision[value_length] = '\0'; + } + memcpy(state->revision, value_start, value_length); + cpuinfo_log_debug("parsed /proc/cpuinfo Revision = \"%.*s\"", (int) value_length, value_start); } else { goto unknown; } @@ -881,11 +892,13 @@ static bool parse_line( bool cpuinfo_arm_linux_parse_proc_cpuinfo( char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + char revision[restrict static CPUINFO_REVISION_VALUE_MAX], uint32_t max_processors_count, struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count]) { struct proc_cpuinfo_parser_state state = { .hardware = hardware, + .revision = revision, .processor_index = 0, .max_processors_count = max_processors_count, .processors = processors, diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c index 6272abf..89d957e 100644 --- a/src/arm/linux/init.c +++ b/src/arm/linux/init.c @@ -167,8 +167,9 @@ void cpuinfo_arm_linux_init(void) { struct cpuinfo_android_properties android_properties; cpuinfo_arm_android_parse_properties(&android_properties); #else - char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX] = { 0 }; + char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX]; #endif + char proc_cpuinfo_revision[CPUINFO_REVISION_VALUE_MAX]; if (!cpuinfo_arm_linux_parse_proc_cpuinfo( #if defined(__ANDROID__) @@ -176,6 +177,7 @@ void cpuinfo_arm_linux_init(void) { #else proc_cpuinfo_hardware, #endif + proc_cpuinfo_revision, arm_linux_processors_count, arm_linux_processors)) { cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo"); @@ -228,10 +230,8 @@ void cpuinfo_arm_linux_init(void) { const struct cpuinfo_arm_chipset chipset = cpuinfo_arm_android_decode_chipset(&android_properties, valid_processors, 0); #else - const struct cpuinfo_arm_chipset chipset = { - .vendor = cpuinfo_arm_chipset_vendor_unknown, - .series = cpuinfo_arm_chipset_series_unknown, - }; + const struct cpuinfo_arm_chipset chipset = + cpuinfo_arm_linux_decode_chipset(proc_cpuinfo_hardware, proc_cpuinfo_revision, valid_processors, 0); #endif #if CPUINFO_ARCH_ARM diff --git a/test/arm-cache.cc b/test/arm-cache.cc index 7d2e4a4..4d6218b 100644 --- a/test/arm-cache.cc +++ b/test/arm-cache.cc @@ -1664,3 +1664,91 @@ TEST(ROCKCHIP, rk3368) { EXPECT_EQ(256 * 1024, little_l2.size); EXPECT_EQ(0, little_l3.size); } + +TEST(BROADCOM, bcm2835) { + const struct cpuinfo_arm_chipset chipset = { + .vendor = cpuinfo_arm_chipset_vendor_broadcom, + .series = cpuinfo_arm_chipset_series_broadcom_bcm, + .model = 2835, + }; + + struct cpuinfo_cache l1i = { 0 }; + struct cpuinfo_cache l1d = { 0 }; + struct cpuinfo_cache l2 = { 0 }; + struct cpuinfo_cache l3 = { 0 }; + cpuinfo_arm_decode_cache( + cpuinfo_uarch_arm11, 4, UINT32_C(0x410FB767), + &chipset, 0, 4, + &l1i, &l1d, &l2, &l3); + + EXPECT_EQ(16 * 1024, l1i.size); + EXPECT_EQ(16 * 1024, l1d.size); + EXPECT_EQ(0, l2.size); + EXPECT_EQ(0, big_l3.size); +} + +TEST(BROADCOM, bcm2836) { + const struct cpuinfo_arm_chipset chipset = { + .vendor = cpuinfo_arm_chipset_vendor_broadcom, + .series = cpuinfo_arm_chipset_series_broadcom_bcm, + .model = 2836, + }; + + struct cpuinfo_cache l1i = { 0 }; + struct cpuinfo_cache l1d = { 0 }; + struct cpuinfo_cache l2 = { 0 }; + struct cpuinfo_cache l3 = { 0 }; + cpuinfo_arm_decode_cache( + cpuinfo_uarch_cortex_a7, 4, UINT32_C(0x410FC075), + &chipset, 0, 4, + &l1i, &l1d, &l2, &l3); + + EXPECT_EQ(32 * 1024, l1i.size); + EXPECT_EQ(32 * 1024, l1d.size); + EXPECT_EQ(512 * 1024, l2.size); + EXPECT_EQ(0, big_l3.size); +} + +TEST(BROADCOM, bcm2837) { + const struct cpuinfo_arm_chipset chipset = { + .vendor = cpuinfo_arm_chipset_vendor_broadcom, + .series = cpuinfo_arm_chipset_series_broadcom_bcm, + .model = 2837, + }; + + struct cpuinfo_cache l1i = { 0 }; + struct cpuinfo_cache l1d = { 0 }; + struct cpuinfo_cache l2 = { 0 }; + struct cpuinfo_cache l3 = { 0 }; + cpuinfo_arm_decode_cache( + cpuinfo_uarch_cortex_a53, 4, UINT32_C(0x410FD034), + &chipset, 0, 4, + &l1i, &l1d, &l2, &l3); + + EXPECT_EQ(16 * 1024, l1i.size); + EXPECT_EQ(16 * 1024, l1d.size); + EXPECT_EQ(512 * 1024, l2.size); + EXPECT_EQ(0, big_l3.size); +} + +TEST(BROADCOM, bcm2711) { + const struct cpuinfo_arm_chipset chipset = { + .vendor = cpuinfo_arm_chipset_vendor_broadcom, + .series = cpuinfo_arm_chipset_series_broadcom_bcm, + .model = 2711, + }; + + struct cpuinfo_cache l1i = { 0 }; + struct cpuinfo_cache l1d = { 0 }; + struct cpuinfo_cache l2 = { 0 }; + struct cpuinfo_cache l3 = { 0 }; + cpuinfo_arm_decode_cache( + cpuinfo_uarch_cortex_a72, 4, UINT32_C(0x410FD083), + &chipset, 0, 4, + &l1i, &l1d, &l2, &l3); + + EXPECT_EQ(48 * 1024, l1i.size); + EXPECT_EQ(32 * 1024, l1d.size); + EXPECT_EQ(1024 * 1024, l2.size); + EXPECT_EQ(0, big_l3.size); +} diff --git a/test/get-current.cc b/test/get-current.cc index f410b12..96b11dc 100644 --- a/test/get-current.cc +++ b/test/get-current.cc @@ -36,3 +36,9 @@ TEST(CURRENT_UARCH_INDEX, within_bounds) { ASSERT_LT(cpuinfo_get_current_uarch_index(), cpuinfo_get_uarchs_count()); } + +TEST(CURRENT_UARCH_INDEX_WITH_DEFAULT, within_bounds) { + ASSERT_TRUE(cpuinfo_initialize()); + + ASSERT_LE(cpuinfo_get_current_uarch_index_with_default(cpuinfo_get_uarchs_count()), cpuinfo_get_uarchs_count()); +} -- cgit v1.2.3 From 7e4c0099cc84b744d3844c9811f2470c50d6dcbf Mon Sep 17 00:00:00 2001 From: Ashkan Aliabadi Date: Mon, 1 Jun 2020 18:23:11 -0700 Subject: Upstream cpuinfo updates as of XNNPACK:5d67652eb23c2e94ffeeafd3e82a41745eb3ce41 --- include/cpuinfo.h | 22 +++++++++++----------- src/api.c | 12 ++++++++---- src/arm/cache.c | 1 - src/arm/mach/init.c | 9 +++++++++ src/arm/midr.h | 6 ++++++ src/arm/uarch.c | 4 ++-- tools/cpu-info.c | 2 -- 7 files changed, 36 insertions(+), 20 deletions(-) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index e89a4c1..6c67c34 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -46,14 +46,6 @@ #endif #endif -#if CPUINFO_ARCH_X86 && defined(_MSC_VER) - #define CPUINFO_ABI __cdecl -#elif CPUINFO_ARCH_X86 && defined(__GNUC__) - #define CPUINFO_ABI __attribute__((__cdecl__)) -#else - #define CPUINFO_ABI -#endif - /* Define other architecture-specific macros as 0 */ #ifndef CPUINFO_ARCH_X86 @@ -88,6 +80,14 @@ #define CPUINFO_ARCH_WASMSIMD 0 #endif +#if CPUINFO_ARCH_X86 && defined(_MSC_VER) + #define CPUINFO_ABI __cdecl +#elif CPUINFO_ARCH_X86 && defined(__GNUC__) + #define CPUINFO_ABI __attribute__((__cdecl__)) +#else + #define CPUINFO_ABI +#endif + #define CPUINFO_CACHE_UNIFIED 0x00000001 #define CPUINFO_CACHE_INCLUSIVE 0x00000002 #define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004 @@ -415,8 +415,6 @@ enum cpuinfo_uarch { cpuinfo_uarch_cortex_a75 = 0x00300375, /** ARM Cortex-A76. */ cpuinfo_uarch_cortex_a76 = 0x00300376, - /** ARM Cortex-A76AE. */ - cpuinfo_uarch_cortex_a76ae = 0x00300378, /** ARM Cortex-A77. */ cpuinfo_uarch_cortex_a77 = 0x00300377, @@ -454,7 +452,9 @@ enum cpuinfo_uarch { /** Samsung Exynos M5 (Exynos 9830 big cores). */ cpuinfo_uarch_exynos_m5 = 0x00600104, - /* Old names for Exynos. */ + /* Deprecated synonym for Cortex-A76 */ + cpuinfo_uarch_cortex_a76ae = 0x00300376, + /* Deprecated names for Exynos. */ cpuinfo_uarch_mongoose_m1 = 0x00600100, cpuinfo_uarch_mongoose_m2 = 0x00600101, cpuinfo_uarch_meerkat_m3 = 0x00600102, diff --git a/src/api.c b/src/api.c index 832b085..f91b421 100644 --- a/src/api.c +++ b/src/api.c @@ -314,7 +314,8 @@ const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor"); } #ifdef __linux__ - unsigned cpu; + /* Initializing this variable silences a MemorySanitizer error. */ + unsigned cpu = 0; if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { return 0; } @@ -332,7 +333,8 @@ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core"); } #ifdef __linux__ - unsigned cpu; + /* Initializing this variable silences a MemorySanitizer error. */ + unsigned cpu = 0; if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { return 0; } @@ -357,7 +359,8 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) { } /* General case */ - unsigned cpu; + /* Initializing this variable silences a MemorySanitizer error. */ + unsigned cpu = 0; if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { return 0; } @@ -387,7 +390,8 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t defau } /* General case */ - unsigned cpu; + /* Initializing this variable silences a MemorySanitizer error. */ + unsigned cpu = 0; if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { return default_uarch_index; } diff --git a/src/arm/cache.c b/src/arm/cache.c index 666ad78..446b02b 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -1115,7 +1115,6 @@ void cpuinfo_arm_decode_cache( break; } case cpuinfo_uarch_cortex_a76: - case cpuinfo_uarch_cortex_a76ae: { /* * ARM Cortex-A76 Core Technical Reference Manual diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c index bd27259..058cfc2 100644 --- a/src/arm/mach/init.c +++ b/src/arm/mach/init.c @@ -347,6 +347,15 @@ void cpuinfo_arm_mach_init(void) { cpuinfo_isa.fp16arith = true; } + /* + * There does not yet seem to exist an OS mechanism to detect support for + * ARMv8.2 optional dot-product instructions, so we currently whitelist CPUs + * known to support these instruction. + */ + if (cpu_family == CPUFAMILY_ARM_LIGHTNING_THUNDER) { + cpuinfo_isa.dot = true; + } + uint32_t num_clusters = 1; for (uint32_t i = 0; i < mach_topology.cores; i++) { cores[i] = (struct cpuinfo_core) { diff --git a/src/arm/midr.h b/src/arm/midr.h index d5a28e3..34d7780 100644 --- a/src/arm/midr.h +++ b/src/arm/midr.h @@ -189,22 +189,28 @@ inline static uint32_t midr_score_core(uint32_t midr) { case UINT32_C(0x4100D0A0): /* Cortex-A75 */ case UINT32_C(0x4100D090): /* Cortex-A73 */ case UINT32_C(0x4100D080): /* Cortex-A72 */ +#if CPUINFO_ARCH_ARM case UINT32_C(0x4100C0F0): /* Cortex-A15 */ case UINT32_C(0x4100C0E0): /* Cortex-A17 */ case UINT32_C(0x4100C0D0): /* Rockchip RK3288 cores */ case UINT32_C(0x4100C0C0): /* Cortex-A12 */ +#endif /* CPUINFO_ARCH_ARM */ /* These cores are always in big role */ return 5; case UINT32_C(0x4100D070): /* Cortex-A57 */ /* Cortex-A57 can be in LITTLE role w.r.t. Denver 2, or in big role w.r.t. Cortex-A53 */ return 4; +#if CPUINFO_ARCH_ARM64 case UINT32_C(0x4100D060): /* Cortex-A65 */ +#endif /* CPUINFO_ARCH_ARM64 */ case UINT32_C(0x4100D050): /* Cortex-A55 */ case UINT32_C(0x4100D030): /* Cortex-A53 */ /* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */ return 2; case UINT32_C(0x4100D040): /* Cortex-A35 */ +#if CPUINFO_ARCH_ARM case UINT32_C(0x4100C070): /* Cortex-A7 */ +#endif /* CPUINFO_ARCH_ARM */ case UINT32_C(0x51008050): /* Kryo 485 Silver */ case UINT32_C(0x51008030): /* Kryo 385 Silver */ case UINT32_C(0x51008010): /* Kryo 260 / 280 Silver */ diff --git a/src/arm/uarch.c b/src/arm/uarch.c index 63b1a55..55b61df 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -88,8 +88,8 @@ void cpuinfo_arm_decode_vendor_uarch( case 0xD0D: *uarch = cpuinfo_uarch_cortex_a77; break; - case 0xD0E: - *uarch = cpuinfo_uarch_cortex_a76ae; + case 0xD0E: /* Cortex-A76AE */ + *uarch = cpuinfo_uarch_cortex_a76; break; #if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) case 0xD4A: diff --git a/tools/cpu-info.c b/tools/cpu-info.c index 4453d88..2759068 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -181,8 +181,6 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Cortex-A75"; case cpuinfo_uarch_cortex_a76: return "Cortex-A76"; - case cpuinfo_uarch_cortex_a76ae: - return "Cortex-A76AE"; case cpuinfo_uarch_cortex_a77: return "Cortex-A77"; case cpuinfo_uarch_scorpion: -- cgit v1.2.3 From f55824179793d83183f0c8caf20c4b57db2f5f3b Mon Sep 17 00:00:00 2001 From: Ashkan Aliabadi Date: Thu, 11 Jun 2020 21:17:20 -0700 Subject: Upstream cpuinfo updates as of XNNPACK:016e586e984c2d05a38a7d98fcade8f165a79278. --- include/cpuinfo.h | 23 +++++++++++++++++++++++ src/arm/linux/aarch32-isa.c | 1 + src/arm/mach/init.c | 1 + src/arm/midr.h | 1 + src/arm/uarch.c | 3 +++ tools/cpu-info.c | 2 ++ tools/isa-info.c | 1 + 7 files changed, 32 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 6c67c34..85ce174 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -417,6 +417,8 @@ enum cpuinfo_uarch { cpuinfo_uarch_cortex_a76 = 0x00300376, /** ARM Cortex-A77. */ cpuinfo_uarch_cortex_a77 = 0x00300377, + /** ARM Cortex-A78. */ + cpuinfo_uarch_cortex_a78 = 0x00300378, /** ARM Neoverse N1. */ cpuinfo_uarch_neoverse_n1 = 0x00300400, @@ -1434,6 +1436,7 @@ static inline bool cpuinfo_has_x86_sha(void) { bool armv6k; bool armv7; bool armv7mp; + bool armv8; bool idiv; bool vfpv2; @@ -1521,6 +1524,16 @@ static inline bool cpuinfo_has_arm_v7mp(void) { #endif } +static inline bool cpuinfo_has_arm_v8(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.armv8; + #else + return false; + #endif +} + static inline bool cpuinfo_has_arm_idiv(void) { #if CPUINFO_ARCH_ARM64 return true; @@ -1645,6 +1658,16 @@ static inline bool cpuinfo_has_arm_neon_fma(void) { #endif } +static inline bool cpuinfo_has_arm_neon_v8(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.armv8; + #else + return false; + #endif +} + static inline bool cpuinfo_has_arm_atomics(void) { #if CPUINFO_ARCH_ARM64 return cpuinfo_isa.atomics; diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c index 64dd168..41f9972 100644 --- a/src/arm/linux/aarch32-isa.c +++ b/src/arm/linux/aarch32-isa.c @@ -43,6 +43,7 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( isa->armv6k = true; isa->armv7 = true; isa->armv7mp = true; + isa->armv8 = true; isa->thumb = true; isa->thumb2 = true; isa->idiv = true; diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c index 058cfc2..e912de6 100644 --- a/src/arm/mach/init.c +++ b/src/arm/mach/init.c @@ -307,6 +307,7 @@ void cpuinfo_arm_mach_init(void) { case CPU_TYPE_ARM: switch (cpu_subtype) { case CPU_SUBTYPE_ARM_V8: + cpuinfo_isa.armv8 = true; cpuinfo_isa.aes = true; cpuinfo_isa.sha1 = true; cpuinfo_isa.sha2 = true; diff --git a/src/arm/midr.h b/src/arm/midr.h index 34d7780..2638517 100644 --- a/src/arm/midr.h +++ b/src/arm/midr.h @@ -183,6 +183,7 @@ inline static uint32_t midr_score_core(uint32_t midr) { case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */ case UINT32_C(0x51002050): /* Kryo Gold */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x4100D410): /* Cortex-A78 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ case UINT32_C(0x4100D0B0): /* Cortex-A76 */ diff --git a/src/arm/uarch.c b/src/arm/uarch.c index 55b61df..0d7a7d7 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -91,6 +91,9 @@ void cpuinfo_arm_decode_vendor_uarch( case 0xD0E: /* Cortex-A76AE */ *uarch = cpuinfo_uarch_cortex_a76; break; + case 0xD41: /* Cortex-A78 */ + *uarch = cpuinfo_uarch_cortex_a78; + break; #if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) case 0xD4A: *uarch = cpuinfo_uarch_neoverse_e1; diff --git a/tools/cpu-info.c b/tools/cpu-info.c index 2759068..429bbfa 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -183,6 +183,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Cortex-A76"; case cpuinfo_uarch_cortex_a77: return "Cortex-A77"; + case cpuinfo_uarch_cortex_a78: + return "Cortex-A78"; case cpuinfo_uarch_scorpion: return "Scorpion"; case cpuinfo_uarch_krait: diff --git a/tools/isa-info.c b/tools/isa-info.c index 98ef919..8365846 100644 --- a/tools/isa-info.c +++ b/tools/isa-info.c @@ -121,6 +121,7 @@ int main(int argc, char** argv) { printf("\tARMv6-K: %s\n", cpuinfo_has_arm_v6k() ? "yes" : "no"); printf("\tARMv7: %s\n", cpuinfo_has_arm_v7() ? "yes" : "no"); printf("\tARMv7 MP: %s\n", cpuinfo_has_arm_v7mp() ? "yes" : "no"); + printf("\tARMv8: %s\n", cpuinfo_has_arm_v8() ? "yes" : "no"); printf("\tIDIV: %s\n", cpuinfo_has_arm_idiv() ? "yes" : "no"); printf("Floating-Point support:\n"); -- cgit v1.2.3