aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAshkan Aliabadi <ashkan.aliabadi@gmail.com>2020-05-08 20:40:33 -0700
committerDavid Reiss <dreiss@fb.com>2020-05-11 09:57:00 -0700
commitc2092219e7c874783a00a62edb94ddc672f57ab3 (patch)
tree87c8ea93fb7e78de4243ae874fc0686c20ef26fc
parent2b14e445016dd46f7de821cdf3093e2823b9ab21 (diff)
downloadcpuinfo-c2092219e7c874783a00a62edb94ddc672f57ab3.tar.gz
Upstream cpuinfo updates in XNNPACK as of XNNPACK:d793f6c2ec145be3ddbffea951e6e5480f4646b8.
-rw-r--r--CMakeLists.txt16
-rw-r--r--README.md5
-rw-r--r--bench/get-current.cc9
-rwxr-xr-xconfigure.py3
-rw-r--r--include/cpuinfo.h19
-rw-r--r--src/api.c30
-rw-r--r--src/arm/cache.c43
-rw-r--r--src/arm/linux/aarch32-isa.c2
-rw-r--r--src/arm/linux/clusters.c10
-rw-r--r--src/arm/linux/cpuinfo.c6
-rw-r--r--src/arm/tlb.c2
-rw-r--r--src/arm/uarch.c6
-rw-r--r--src/cpuinfo/internal-api.h4
-rw-r--r--src/emscripten/init.c277
-rw-r--r--src/init.c6
-rw-r--r--src/linux/mockfile.c2
-rw-r--r--src/x86/cache/descriptor.c14
-rw-r--r--src/x86/mockcpuid.c2
-rw-r--r--src/x86/name.c9
-rw-r--r--src/x86/vendor.c2
-rw-r--r--src/x86/windows/init.c17
-rw-r--r--tools/cpu-info.c6
-rw-r--r--tools/gpu-dump.c2
23 files changed, 423 insertions, 69 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fefb60b..b85620f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,7 +79,7 @@ IF(NOT CMAKE_SYSTEM_NAME)
"Target operating system is not specified. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
-ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|Darwin|Linux|Android)$")
+ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$")
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
MESSAGE(WARNING
"Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
@@ -125,7 +125,7 @@ SET(CPUINFO_SRCS
src/cache.c)
IF(CPUINFO_SUPPORTED_PLATFORM)
- IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
+ IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$"))
LIST(APPEND CPUINFO_SRCS
src/x86/init.c
src/x86/info.c
@@ -143,7 +143,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
src/x86/linux/cpuinfo.c)
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c)
- ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c)
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$")
@@ -175,6 +175,11 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
ENDIF()
ENDIF()
+ IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ LIST(APPEND CPUINFO_SRCS
+ src/emscripten/init.c)
+ ENDIF()
+
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
LIST(APPEND CPUINFO_SRCS
src/linux/smallfile.c
@@ -205,6 +210,11 @@ ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS})
CPUINFO_TARGET_ENABLE_C99(cpuinfo)
CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals)
CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo)
+IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
+ # Target Windows 7+ API
+ TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601)
+ TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601)
+ENDIF()
SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include)
TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src)
diff --git a/README.md b/README.md
index ee5fb82..97e65cd 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,7 @@ Detect if target is a 32-bit or 64-bit ARM system:
```
Check if the host CPU support ARM NEON
+
```c
cpuinfo_initialize();
if (cpuinfo_has_arm_neon()) {
@@ -57,6 +58,7 @@ if (cpuinfo_has_arm_neon()) {
```
Check if the host CPU supports x86 AVX
+
```c
cpuinfo_initialize();
if (cpuinfo_has_x86_avx()) {
@@ -65,6 +67,7 @@ if (cpuinfo_has_x86_avx()) {
```
Check if the thread runs on a Cortex-A53 core
+
```c
cpuinfo_initialize();
switch (cpuinfo_get_current_core()->uarch) {
@@ -78,12 +81,14 @@ switch (cpuinfo_get_current_core()->uarch) {
```
Get the size of level 1 data cache on the fastest core in the processor (e.g. big core in big.LITTLE ARM systems):
+
```c
cpuinfo_initialize();
const size_t l1_size = cpuinfo_get_processor(0)->cache.l1d->size;
```
Pin thread to cores sharing L2 cache with the current core (Linux or Android)
+
```c
cpuinfo_initialize();
cpu_set_t cpu_set;
diff --git a/bench/get-current.cc b/bench/get-current.cc
index b547df0..e475767 100644
--- a/bench/get-current.cc
+++ b/bench/get-current.cc
@@ -30,4 +30,13 @@ static void cpuinfo_get_current_uarch_index(benchmark::State& state) {
}
BENCHMARK(cpuinfo_get_current_uarch_index)->Unit(benchmark::kNanosecond);
+static void cpuinfo_get_current_uarch_index_with_default(benchmark::State& state) {
+ cpuinfo_initialize();
+ while (state.KeepRunning()) {
+ const uint32_t uarch_index = cpuinfo_get_current_uarch_index_with_default(0);
+ benchmark::DoNotOptimize(uarch_index);
+ }
+}
+BENCHMARK(cpuinfo_get_current_uarch_index_with_default)->Unit(benchmark::kNanosecond);
+
BENCHMARK_MAIN();
diff --git a/configure.py b/configure.py
index 0e58dba..66f2ec9 100755
--- a/configure.py
+++ b/configure.py
@@ -23,7 +23,7 @@ def main(args):
build.export_cpath("include", ["cpuinfo.h"])
with build.options(source_dir="src", macros=macros, extra_include_dirs="src", deps=build.deps.clog):
- sources = ["init.c", "api.c"]
+ sources = ["api.c", "init.c", "cache.c"]
if build.target.is_x86 or build.target.is_x86_64:
sources += [
"x86/init.c", "x86/info.c", "x86/isa.c", "x86/vendor.c",
@@ -61,7 +61,6 @@ def main(args):
sources += ["mach/topology.c"]
if build.target.is_linux or build.target.is_android:
sources += [
- "linux/current.c",
"linux/cpulist.c",
"linux/smallfile.c",
"linux/multiline.c",
diff --git a/include/cpuinfo.h b/include/cpuinfo.h
index 903d1cf..e89a4c1 100644
--- a/include/cpuinfo.h
+++ b/include/cpuinfo.h
@@ -499,11 +499,11 @@ enum cpuinfo_uarch {
/** Applied Micro X-Gene. */
cpuinfo_uarch_xgene = 0x00B00100,
- /** Huawei hisilicon Kunpeng Series CPU. */
- cpuinfo_uarch_taishanv110 = 0x00C00100,
-
/* Hygon Dhyana (a modification of AMD Zen for Chinese market). */
cpuinfo_uarch_dhyana = 0x01000100,
+
+ /** HiSilicon TaiShan v110 (Huawei Kunpeng 920 series processors). */
+ cpuinfo_uarch_taishan_v110 = 0x00C00100,
};
struct cpuinfo_processor {
@@ -523,7 +523,7 @@ struct cpuinfo_processor {
*/
int linux_id;
#endif
-#if defined(_WIN32)
+#if defined(_WIN32) || defined(__CYGWIN__)
/** Windows-specific ID for the group containing the logical processor. */
uint16_t windows_group_id;
/**
@@ -1799,13 +1799,22 @@ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void);
/**
* Identify the microarchitecture index of the core that executes the current thread.
- * If the system does not support such identification, the function return 0.
+ * If the system does not support such identification, the function returns 0.
*
* There is no guarantee that the thread will stay on the same type of core for any time.
* Callers should treat the result as only a hint.
*/
uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void);
+/**
+ * Identify the microarchitecture index of the core that executes the current thread.
+ * If the system does not support such identification, the function returns the user-specified default value.
+ *
+ * There is no guarantee that the thread will stay on the same type of core for any time.
+ * Callers should treat the result as only a hint.
+ */
+uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/api.c b/src/api.c
index 38cea86..832b085 100644
--- a/src/api.c
+++ b/src/api.c
@@ -374,3 +374,33 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) {
return 0;
#endif
}
+
+uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index) {
+ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index_with_default");
+ }
+ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ #ifdef __linux__
+ if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
+ /* Special case: avoid syscall on systems with only a single type of cores */
+ return 0;
+ }
+
+ /* General case */
+ unsigned cpu;
+ if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+ return default_uarch_index;
+ }
+ if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+ return default_uarch_index;
+ }
+ return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
+ #else
+ /* Fallback: no API to query current core, use default uarch index. */
+ return default_uarch_index;
+ #endif
+ #else
+ /* Only ARM/ARM64 processors may include cores of different types in the same package. */
+ return 0;
+ #endif
+}
diff --git a/src/arm/cache.c b/src/arm/cache.c
index 70f11fd..1a8bf91 100644
--- a/src/arm/cache.c
+++ b/src/arm/cache.c
@@ -1448,23 +1448,24 @@ void cpuinfo_arm_decode_cache(
.line_size = 64 /* assumption */
};
break;
- case cpuinfo_uarch_taishanv110:
+ case cpuinfo_uarch_taishan_v110:
/*
- * Kunpeng920 series CPU designed by Huawei hisilicon for server,
- * L1 and L2 cache is private to each core, L3 is shared with all cores.
- * +--------------------+-------+-----------+-----------+-----------+----------+------------+
- * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
- * +--------------------+-------+-----------+-----------+-----------+----------+------------+
- * | Kunpeng920-3226 | 32 | 64K | 64K | 512K | 32M | [1] |
- * +--------------------+-------+-----------+-----------+-----------+----------+------------+
- * | Kunpeng920-4826 | 48 | 64K | 64K | 512K | 48M | [2] |
- * +--------------------+-------+-----------+-----------+-----------+----------+------------+
- * | Kunpeng920-6426 | 64 | 64K | 64K | 512K | 64M | [3] |
- * +--------------------+-------+-----------+-----------+-----------+----------+------------+
- *
- * [1] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226
- * [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826
- * [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
+ * It features private 64 KiB L1 instruction and data caches as well as 512 KiB of private L2. [1]
+ *
+ * +------------------+-------+-----------+-----------+-----------+----------+-----------+
+ * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
+ * +------------------+-------+-----------+-----------+-----------+----------+-----------+
+ * | Kunpeng 920-3226 | 32 | 64K | 64K | 512K | 32M | [2] |
+ * +------------------+-------+-----------+-----------+-----------+----------+-----------+
+ * | Kunpeng 920-4826 | 48 | 64K | 64K | 512K | 48M | [3] |
+ * +------------------+-------+-----------+-----------+-----------+----------+-----------+
+ * | Kunpeng 920-6426 | 64 | 64K | 64K | 512K | 64M | [4] |
+ * +------------------+-------+-----------+-----------+-----------+----------+-----------+
+ *
+ * [1] https://en.wikichip.org/wiki/hisilicon/microarchitectures/taishan_v110
+ * [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226
+ * [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826
+ * [4] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
*/
*l1i = (struct cpuinfo_cache) {
.size = 64 * 1024,
@@ -1482,11 +1483,11 @@ void cpuinfo_arm_decode_cache(
.line_size = 128 /* assumption */,
.flags = CPUINFO_CACHE_INCLUSIVE /* assumption */,
};
- *l3 = (struct cpuinfo_cache) {
- .size = cluster_cores * 1024 * 1024,
- .associativity = 16 /* assumption */,
- .line_size = 128 /* assumption */,
- };
+ *l3 = (struct cpuinfo_cache) {
+ .size = cluster_cores * 1024 * 1024,
+ .associativity = 16 /* assumption */,
+ .line_size = 128 /* assumption */,
+ };
break;
#endif
case cpuinfo_uarch_cortex_a12:
diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c
index 92095e1..6aedda3 100644
--- a/src/arm/linux/aarch32-isa.c
+++ b/src/arm/linux/aarch32-isa.c
@@ -193,7 +193,7 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
if ((architecture_version >= 7) || (features & vfpv3_mask)) {
isa->vfpv3 = true;
-
+
const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON;
if (features & d32_mask) {
isa->d32 = true;
diff --git a/src/arm/linux/clusters.c b/src/arm/linux/clusters.c
index 8daeae5..c7a4045 100644
--- a/src/arm/linux/clusters.c
+++ b/src/arm/linux/clusters.c
@@ -47,7 +47,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
*
* @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags.
* @param max_processors - number of elements in the @p processors array.
- * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
* frequency, MIDR infromation, and core cluster (package siblings list) information.
*
* @retval true if the heuristic successfully assigned all processors into clusters of cores.
@@ -308,7 +308,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
* @p processors array have cluster information.
*
* @param max_processors - number of elements in the @p processors array.
- * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
* frequency, MIDR infromation, and core cluster (package siblings list) information.
*
* @retval true if the heuristic successfully assigned all processors into clusters of cores.
@@ -466,7 +466,7 @@ new_cluster:
* This function should be called after all processors are assigned to core clusters.
*
* @param max_processors - number of elements in the @p processors array.
- * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
* and decoded core cluster (package_leader_id) information.
* The function expects the value of processors[i].package_processor_count to be zero.
* Upon return, processors[i].package_processor_count will contain the number of logical
@@ -482,12 +482,12 @@ void cpuinfo_arm_linux_count_cluster_processors(
const uint32_t package_leader_id = processors[i].package_leader_id;
processors[package_leader_id].package_processor_count += 1;
}
- }
+ }
/* Second pass: copy the package_processor_count from the group leader processor */
for (uint32_t i = 0; i < max_processors; i++) {
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
const uint32_t package_leader_id = processors[i].package_leader_id;
processors[i].package_processor_count = processors[package_leader_id].package_processor_count;
}
- }
+ }
}
diff --git a/src/arm/linux/cpuinfo.c b/src/arm/linux/cpuinfo.c
index 2df0c6e..c70055f 100644
--- a/src/arm/linux/cpuinfo.c
+++ b/src/arm/linux/cpuinfo.c
@@ -44,7 +44,7 @@ static uint32_t parse_processor_number(
/*
* Full list of ARM features reported in /proc/cpuinfo:
- *
+ *
* * swp - support for SWP instruction (deprecated in ARMv7, can be removed in future)
* * half - support for half-word loads and stores. These instruction are part of ARMv4,
* so no need to check it on supported CPUs.
@@ -620,7 +620,7 @@ static void parse_cache_number(
break;
default:
cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected",
- number_name, (int) (number_end - number_start), number_start);
+ number_name, (int) (number_end - number_start), number_start);
}
}
@@ -670,7 +670,7 @@ static bool parse_line(
if (line_start == line_end) {
return true;
}
-
+
/* Search for ':' on the line. */
const char* separator = line_start;
for (; separator != line_end; separator++) {
diff --git a/src/arm/tlb.c b/src/arm/tlb.c
index ba42a3e..9beb832 100644
--- a/src/arm/tlb.c
+++ b/src/arm/tlb.c
@@ -6,7 +6,7 @@ switch (uarch) {
* Cortex-A5 Technical Reference Manual:
* 6.3.1. Micro TLB
* The first level of caching for the page table information is a micro TLB of
- * 10 entries that is implemented on each of the instruction and data sides.
+ * 10 entries that is implemented on each of the instruction and data sides.
* 6.3.2. Main TLB
* Misses from the instruction and data micro TLBs are handled by a unified main TLB.
* The main TLB is 128-entry two-way set-associative.
diff --git a/src/arm/uarch.c b/src/arm/uarch.c
index e5e3cbc..63b1a55 100644
--- a/src/arm/uarch.c
+++ b/src/arm/uarch.c
@@ -155,9 +155,11 @@ void cpuinfo_arm_decode_vendor_uarch(
case 'H':
*vendor = cpuinfo_vendor_huawei;
switch (midr_get_part(midr)) {
- case 0xD01: /* Kunpeng920 ARM-base CPU*/
- *uarch = cpuinfo_uarch_taishanv110;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ case 0xD01: /* Kunpeng 920 series */
+ *uarch = cpuinfo_uarch_taishan_v110;
break;
+#endif
case 0xD40: /* Kirin 980 Big/Medium cores -> Cortex-A76 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a76;
diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h
index c6eed0b..9c23d7c 100644
--- a/src/cpuinfo/internal-api.h
+++ b/src/cpuinfo/internal-api.h
@@ -3,7 +3,7 @@
#include <stdint.h>
#include <stdbool.h>
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__CYGWIN__)
#include <windows.h>
#endif
@@ -50,7 +50,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__CYGWIN__)
CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
#endif
CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
diff --git a/src/emscripten/init.c b/src/emscripten/init.c
new file mode 100644
index 0000000..ce4bdea
--- /dev/null
+++ b/src/emscripten/init.c
@@ -0,0 +1,277 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include <emscripten/threading.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+
+static const volatile float infinity = INFINITY;
+
+static struct cpuinfo_package static_package = { };
+
+static struct cpuinfo_cache static_x86_l3 = {
+ .size = 2 * 1024 * 1024,
+ .associativity = 16,
+ .sets = 2048,
+ .partitions = 1,
+ .line_size = 64,
+};
+
+void cpuinfo_emscripten_init(void) {
+ struct cpuinfo_processor* processors = NULL;
+ struct cpuinfo_core* cores = NULL;
+ struct cpuinfo_cluster* clusters = NULL;
+ struct cpuinfo_cache* l1i = NULL;
+ struct cpuinfo_cache* l1d = NULL;
+ struct cpuinfo_cache* l2 = NULL;
+
+ const bool is_x86 = signbit(infinity - infinity);
+
+ int logical_cores_count = emscripten_num_logical_cores();
+ if (logical_cores_count <= 0) {
+ logical_cores_count = 1;
+ }
+ uint32_t processor_count = (uint32_t) logical_cores_count;
+ uint32_t core_count = processor_count;
+ uint32_t cluster_count = 1;
+ uint32_t big_cluster_core_count = core_count;
+ uint32_t processors_per_core = 1;
+ if (is_x86) {
+ if (processor_count % 2 == 0) {
+ processors_per_core = 2;
+ core_count = processor_count / 2;
+ big_cluster_core_count = core_count;
+ }
+ } else {
+ /* Assume ARM/ARM64 */
+ if (processor_count > 4) {
+ /* Assume big.LITTLE architecture */
+ cluster_count = 2;
+ big_cluster_core_count = processor_count >= 8 ? 4 : 2;
+ }
+ }
+ uint32_t l2_count = is_x86 ? core_count : cluster_count;
+
+ processors = calloc(processor_count, sizeof(struct cpuinfo_processor));
+ if (processors == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
+ processor_count * sizeof(struct cpuinfo_processor), processor_count);
+ goto cleanup;
+ }
+ cores = calloc(processor_count, sizeof(struct cpuinfo_core));
+ if (cores == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
+ processor_count * sizeof(struct cpuinfo_core), processor_count);
+ goto cleanup;
+ }
+ clusters = calloc(cluster_count, sizeof(struct cpuinfo_cluster));
+ if (clusters == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" clusters",
+ cluster_count * sizeof(struct cpuinfo_cluster), cluster_count);
+ goto cleanup;
+ }
+
+ l1i = calloc(core_count, sizeof(struct cpuinfo_cache));
+ if (l1i == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
+ core_count * sizeof(struct cpuinfo_cache), core_count);
+ goto cleanup;
+ }
+
+ l1d = calloc(core_count, sizeof(struct cpuinfo_cache));
+ if (l1d == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
+ core_count * sizeof(struct cpuinfo_cache), core_count);
+ goto cleanup;
+ }
+
+ l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
+ if (l2 == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
+ l2_count * sizeof(struct cpuinfo_cache), l2_count);
+ goto cleanup;
+ }
+
+ static_package.processor_count = processor_count;
+ static_package.core_count = core_count;
+ static_package.cluster_count = cluster_count;
+ if (is_x86) {
+ strncpy(static_package.name, "x86 vCPU", CPUINFO_PACKAGE_NAME_MAX);
+ } else {
+ strncpy(static_package.name, "ARM vCPU", CPUINFO_PACKAGE_NAME_MAX);
+ }
+
+ for (uint32_t i = 0; i < core_count; i++) {
+ for (uint32_t j = 0; j < processors_per_core; j++) {
+ processors[i * processors_per_core + j] = (struct cpuinfo_processor) {
+ .smt_id = j,
+ .core = cores + i,
+ .cluster = clusters + (uint32_t) (i >= big_cluster_core_count),
+ .package = &static_package,
+ .cache.l1i = l1i + i,
+ .cache.l1d = l1d + i,
+ .cache.l2 = is_x86 ? l2 + i : l2 + (uint32_t) (i >= big_cluster_core_count),
+ .cache.l3 = is_x86 ? &static_x86_l3 : NULL,
+ };
+ }
+
+ cores[i] = (struct cpuinfo_core) {
+ .processor_start = i * processors_per_core,
+ .processor_count = processors_per_core,
+ .core_id = i,
+ .cluster = clusters + (uint32_t) (i >= big_cluster_core_count),
+ .package = &static_package,
+ .vendor = cpuinfo_vendor_unknown,
+ .uarch = cpuinfo_uarch_unknown,
+ .frequency = 0,
+ };
+
+ l1i[i] = (struct cpuinfo_cache) {
+ .size = 32 * 1024,
+ .associativity = 4,
+ .sets = 128,
+ .partitions = 1,
+ .line_size = 64,
+ .processor_start = i * processors_per_core,
+ .processor_count = processors_per_core,
+ };
+
+ l1d[i] = (struct cpuinfo_cache) {
+ .size = 32 * 1024,
+ .associativity = 4,
+ .sets = 128,
+ .partitions = 1,
+ .line_size = 64,
+ .processor_start = i * processors_per_core,
+ .processor_count = processors_per_core,
+ };
+
+ if (is_x86) {
+ l2[i] = (struct cpuinfo_cache) {
+ .size = 256 * 1024,
+ .associativity = 8,
+ .sets = 512,
+ .partitions = 1,
+ .line_size = 64,
+ .processor_start = i * processors_per_core,
+ .processor_count = processors_per_core,
+ };
+ }
+ }
+
+ if (is_x86) {
+ clusters[0] = (struct cpuinfo_cluster) {
+ .processor_start = 0,
+ .processor_count = processor_count,
+ .core_start = 0,
+ .core_count = core_count,
+ .cluster_id = 0,
+ .package = &static_package,
+ .vendor = cpuinfo_vendor_unknown,
+ .uarch = cpuinfo_uarch_unknown,
+ .frequency = 0,
+ };
+
+ static_x86_l3.processor_count = processor_count;
+ } else {
+ clusters[0] = (struct cpuinfo_cluster) {
+ .processor_start = 0,
+ .processor_count = big_cluster_core_count,
+ .core_start = 0,
+ .core_count = big_cluster_core_count,
+ .cluster_id = 0,
+ .package = &static_package,
+ .vendor = cpuinfo_vendor_unknown,
+ .uarch = cpuinfo_uarch_unknown,
+ .frequency = 0,
+ };
+
+ l2[0] = (struct cpuinfo_cache) {
+ .size = 1024 * 1024,
+ .associativity = 8,
+ .sets = 2048,
+ .partitions = 1,
+ .line_size = 64,
+ .processor_start = 0,
+ .processor_count = big_cluster_core_count,
+ };
+
+ if (cluster_count > 1) {
+ l2[1] = (struct cpuinfo_cache) {
+ .size = 256 * 1024,
+ .associativity = 8,
+ .sets = 512,
+ .partitions = 1,
+ .line_size = 64,
+ .processor_start = big_cluster_core_count,
+ .processor_count = processor_count - big_cluster_core_count,
+ };
+
+ clusters[1] = (struct cpuinfo_cluster) {
+ .processor_start = big_cluster_core_count,
+ .processor_count = processor_count - big_cluster_core_count,
+ .core_start = big_cluster_core_count,
+ .core_count = processor_count - big_cluster_core_count,
+ .cluster_id = 1,
+ .package = &static_package,
+ .vendor = cpuinfo_vendor_unknown,
+ .uarch = cpuinfo_uarch_unknown,
+ .frequency = 0,
+ };
+ }
+ }
+
+ /* Commit changes */
+ cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+ cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+ cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+ if (is_x86) {
+ cpuinfo_cache[cpuinfo_cache_level_3] = &static_x86_l3;
+ }
+
+ cpuinfo_processors = processors;
+ cpuinfo_cores = cores;
+ cpuinfo_clusters = clusters;
+ cpuinfo_packages = &static_package;
+
+ cpuinfo_cache_count[cpuinfo_cache_level_1i] = processor_count;
+ cpuinfo_cache_count[cpuinfo_cache_level_1d] = processor_count;
+ cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+ if (is_x86) {
+ cpuinfo_cache_count[cpuinfo_cache_level_3] = 1;
+ }
+
+ cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
+ .uarch = cpuinfo_uarch_unknown,
+ .processor_count = processor_count,
+ .core_count = core_count,
+ };
+
+ cpuinfo_processors_count = processor_count;
+ cpuinfo_cores_count = processor_count;
+ cpuinfo_clusters_count = cluster_count;
+ cpuinfo_packages_count = 1;
+
+ cpuinfo_max_cache_size = is_x86 ? 128 * 1024 * 1024 : 8 * 1024 * 1024;
+
+ cpuinfo_is_initialized = true;
+
+ processors = NULL;
+ cores = NULL;
+ clusters = NULL;
+ l1i = l1d = l2 = NULL;
+
+cleanup:
+ free(processors);
+ free(cores);
+ free(clusters);
+ free(l1i);
+ free(l1d);
+ free(l2);
+}
diff --git a/src/init.c b/src/init.c
index 10a1afc..0d8cc3b 100644
--- a/src/init.c
+++ b/src/init.c
@@ -1,4 +1,4 @@
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__CYGWIN__)
#include <windows.h>
#elif !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
#include <pthread.h>
@@ -13,7 +13,7 @@
#endif
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__CYGWIN__)
static INIT_ONCE init_guard = INIT_ONCE_STATIC_INIT;
#elif !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
@@ -27,7 +27,7 @@ bool CPUINFO_ABI cpuinfo_initialize(void) {
pthread_once(&init_guard, &cpuinfo_x86_mach_init);
#elif defined(__linux__)
pthread_once(&init_guard, &cpuinfo_x86_linux_init);
- #elif defined(_WIN32)
+ #elif defined(_WIN32) || defined(__CYGWIN__)
InitOnceExecuteOnce(&init_guard, &cpuinfo_x86_windows_init, NULL, NULL);
#else
cpuinfo_log_error("operating system is not supported in cpuinfo");
diff --git a/src/linux/mockfile.c b/src/linux/mockfile.c
index 3fdd6bf..138acfe 100644
--- a/src/linux/mockfile.c
+++ b/src/linux/mockfile.c
@@ -34,7 +34,7 @@ void CPUINFO_ABI cpuinfo_mock_filesystem(struct cpuinfo_mock_file* files) {
file_count += 1;
}
cpuinfo_mock_files = files;
- cpuinfo_mock_file_count = file_count;
+ cpuinfo_mock_file_count = file_count;
}
int CPUINFO_ABI cpuinfo_mock_open(const char* path, int oflag) {
diff --git a/src/x86/cache/descriptor.c b/src/x86/cache/descriptor.c
index 6532e4d..69d38cc 100644
--- a/src/x86/cache/descriptor.c
+++ b/src/x86/cache/descriptor.c
@@ -353,7 +353,7 @@ void cpuinfo_x86_decode_cache_descriptor(
};
break;
case 0x39:
- /* Where does this come from? */
+ /* Where does this come from? */
cache->l2 = (struct cpuinfo_x86_cache) {
.size = 128 * 1024,
.associativity = 4,
@@ -364,7 +364,7 @@ void cpuinfo_x86_decode_cache_descriptor(
};
break;
case 0x3A:
- /* Where does this come from? */
+ /* Where does this come from? */
cache->l2 = (struct cpuinfo_x86_cache) {
.size = 192 * 1024,
.associativity = 6,
@@ -375,7 +375,7 @@ void cpuinfo_x86_decode_cache_descriptor(
};
break;
case 0x3B:
- /* Where does this come from? */
+ /* Where does this come from? */
cache->l2 = (struct cpuinfo_x86_cache) {
.size = 128 * 1024,
.associativity = 2,
@@ -386,7 +386,7 @@ void cpuinfo_x86_decode_cache_descriptor(
};
break;
case 0x3C:
- /* Where does this come from? */
+ /* Where does this come from? */
cache->l2 = (struct cpuinfo_x86_cache) {
.size = 256 * 1024,
.associativity = 4,
@@ -397,7 +397,7 @@ void cpuinfo_x86_decode_cache_descriptor(
};
break;
case 0x3D:
- /* Where does this come from? */
+ /* Where does this come from? */
cache->l2 = (struct cpuinfo_x86_cache) {
.size = 384 * 1024,
.associativity = 6,
@@ -408,7 +408,7 @@ void cpuinfo_x86_decode_cache_descriptor(
};
break;
case 0x3E:
- /* Where does this come from? */
+ /* Where does this come from? */
cache->l2 = (struct cpuinfo_x86_cache) {
.size = 512 * 1024,
.associativity = 4,
@@ -1011,7 +1011,7 @@ void cpuinfo_x86_decode_cache_descriptor(
};
break;
case 0x73:
- /* Where does this come from? */
+ /* Where does this come from? */
cache->trace = (struct cpuinfo_trace_cache) {
.uops = 64 * 1024,
.associativity = 8,
diff --git a/src/x86/mockcpuid.c b/src/x86/mockcpuid.c
index 6361dc2..2631f09 100644
--- a/src/x86/mockcpuid.c
+++ b/src/x86/mockcpuid.c
@@ -14,7 +14,7 @@ static uint32_t cpuinfo_mock_cpuid_leaf4_iteration = 0;
void CPUINFO_ABI cpuinfo_mock_set_cpuid(struct cpuinfo_mock_cpuid* dump, size_t entries) {
cpuinfo_mock_cpuid_data = dump;
- cpuinfo_mock_cpuid_entries = entries;
+ cpuinfo_mock_cpuid_entries = entries;
};
void CPUINFO_ABI cpuinfo_mock_get_cpuid(uint32_t eax, uint32_t regs[4]) {
diff --git a/src/x86/name.c b/src/x86/name.c
index e0d5a5b..a7cc7c6 100644
--- a/src/x86/name.c
+++ b/src/x86/name.c
@@ -135,7 +135,7 @@ static inline bool is_frequency(const char* token_start, const char* token_end)
const size_t token_length = (size_t) (token_end - token_start);
if (token_length > 3 && token_end[-2] == 'H' && token_end[-1] == 'z') {
switch (token_end[-3]) {
- case 'K':
+ case 'K':
case 'M':
case 'G':
return true;
@@ -347,7 +347,7 @@ static bool transform_token(char* token_start, char* token_end, struct parser_st
return false;
}
/*
- * Erase "Mobile" when it is not part of the processor name,
+ * Erase "Mobile" when it is not part of the processor name,
* e.g. in "AMD Turion(tm) X2 Ultra Dual-Core Mobile ZM-82"
*/
if (previousState.context_core != NULL) {
@@ -540,8 +540,7 @@ uint32_t cpuinfo_x86_normalize_brand_string(
char* name_end = &name[48];
while (name_end[-1] == '\0') {
/*
- * Adject name_end by 1 position and
- * check that we didn't reach the start of the brand string.
+ * Adject name_end by 1 position and check that we didn't reach the start of the brand string.
* This is possible if all characters are zero.
*/
if (--name_end == name) {
@@ -704,6 +703,6 @@ uint32_t cpuinfo_x86_format_package_name(
} else {
snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX,
"%s %s", vendor_string, normalized_brand_string);
- return strlen(vendor_string) + 1;
+ return (uint32_t) strlen(vendor_string) + 1;
}
}
diff --git a/src/x86/vendor.c b/src/x86/vendor.c
index 2bba90d..bad50fa 100644
--- a/src/x86/vendor.c
+++ b/src/x86/vendor.c
@@ -79,7 +79,7 @@ enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32
case ineI:
if (ecx == ntel) {
/* "GenuineIntel" */
- return cpuinfo_vendor_intel;
+ return cpuinfo_vendor_intel;
}
break;
#if CPUINFO_ARCH_X86
diff --git a/src/x86/windows/init.c b/src/x86/windows/init.c
index 2c7e3cd..cf549d5 100644
--- a/src/x86/windows/init.c
+++ b/src/x86/windows/init.c
@@ -10,6 +10,13 @@
#include <Windows.h>
+#ifdef __GNUC__
+ #define CPUINFO_ALLOCA __builtin_alloca
+#else
+ #define CPUINFO_ALLOCA _alloca
+#endif
+
+
static inline uint32_t bit_mask(uint32_t bits) {
return (UINT32_C(1) << bits) - UINT32_C(1);
}
@@ -118,7 +125,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
cpuinfo_log_debug("detected %"PRIu32" processor groups", max_group_count);
uint32_t processors_count = 0;
- uint32_t* processors_per_group = (uint32_t*) _alloca(max_group_count * sizeof(uint32_t));
+ uint32_t* processors_per_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t));
for (uint32_t i = 0; i < max_group_count; i++) {
processors_per_group[i] = GetMaximumProcessorCount((WORD) i);
cpuinfo_log_debug("detected %"PRIu32" processors in group %"PRIu32,
@@ -126,7 +133,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
processors_count += processors_per_group[i];
}
- uint32_t* processors_before_group = (uint32_t*) _alloca(max_group_count * sizeof(uint32_t));
+ uint32_t* processors_before_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t));
for (uint32_t i = 0, count = 0; i < max_group_count; i++) {
processors_before_group[i] = count;
cpuinfo_log_debug("detected %"PRIu32" processors before group %"PRIu32,
@@ -196,7 +203,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
/* Iterate processor groups and set the package part of APIC ID */
for (uint32_t i = 0; i < package_info->Processor.GroupCount; i++) {
const uint32_t group_id = package_info->Processor.GroupMask[i].Group;
- /* Global index of the first logical processor belonging to this group */
+ /* Global index of the first logical processor belonging to this group */
const uint32_t group_processors_start = processors_before_group[group_id];
/* Bitmask representing processors in this group belonging to this package */
KAFFINITY group_processors_mask = package_info->Processor.GroupMask[i].Mask;
@@ -245,7 +252,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
/* Iterate processor groups and set the core & SMT parts of APIC ID */
for (uint32_t i = 0; i < core_info->Processor.GroupCount; i++) {
const uint32_t group_id = core_info->Processor.GroupMask[i].Group;
- /* Global index of the first logical processor belonging to this group */
+ /* Global index of the first logical processor belonging to this group */
const uint32_t group_processors_start = processors_before_group[group_id];
/* Bitmask representing processors in this group belonging to this package */
KAFFINITY group_processors_mask = core_info->Processor.GroupMask[i].Mask;
@@ -259,7 +266,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
current_package_apic_id = processors[processor_id].apic_id;
}
/* Core ID w.r.t package */
- const uint32_t package_core_id = core_id - package_core_start;
+ const uint32_t package_core_id = core_id - package_core_start;
/* Update APIC ID with core and SMT parts */
processors[processor_id].apic_id |=
diff --git a/tools/cpu-info.c b/tools/cpu-info.c
index 7963c00..4453d88 100644
--- a/tools/cpu-info.c
+++ b/tools/cpu-info.c
@@ -14,6 +14,8 @@ static const char* vendor_to_string(enum cpuinfo_vendor vendor) {
return "Intel";
case cpuinfo_vendor_amd:
return "AMD";
+ case cpuinfo_vendor_huawei:
+ return "Huawei";
case cpuinfo_vendor_hygon:
return "Hygon";
case cpuinfo_vendor_arm:
@@ -243,6 +245,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Brahma B53";
case cpuinfo_uarch_xgene:
return "X-Gene";
+ case cpuinfo_uarch_dhyana:
+ return "Dhyana";
+ case cpuinfo_uarch_taishan_v110:
+ return "TaiShan v110";
default:
return NULL;
}
diff --git a/tools/gpu-dump.c b/tools/gpu-dump.c
index d7cfa9e..6d17374 100644
--- a/tools/gpu-dump.c
+++ b/tools/gpu-dump.c
@@ -314,7 +314,7 @@ void report_gles_attributes(void) {
fprintf(stderr, "failed to get the number of EGL frame buffer configurations\n");
goto cleanup;
}
-
+
configs = (EGLConfig*) malloc(configs_count * sizeof(EGLConfig));
if (configs == NULL) {
fprintf(stderr, "failed to allocate %zu bytes for %d frame buffer configurations\n",