aboutsummaryrefslogtreecommitdiff
path: root/cmake
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2020-04-25 14:59:26 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2020-04-25 14:59:57 -0700
commit54f5917db8c2d2c06fa8c2dbc8f674a89e3d7512 (patch)
treebfda4daf4b8f74365048047a21d5ffafd9e4377e /cmake
parentfc563f55701b450e0c0fdf6f0f8deb463459e691 (diff)
downloadXNNPACK-54f5917db8c2d2c06fa8c2dbc8f674a89e3d7512.tar.gz
Update cpuinfo
Add patch for Cygwin support and cpuinfo_get_current_uarch_index_with_default function PiperOrigin-RevId: 308441046
Diffstat (limited to 'cmake')
-rw-r--r--cmake/DownloadCpuinfo.cmake5
-rw-r--r--cmake/cpuinfo.patch870
2 files changed, 873 insertions, 2 deletions
diff --git a/cmake/DownloadCpuinfo.cmake b/cmake/DownloadCpuinfo.cmake
index 664948bc2..48f06fe36 100644
--- a/cmake/DownloadCpuinfo.cmake
+++ b/cmake/DownloadCpuinfo.cmake
@@ -12,11 +12,12 @@ PROJECT(cpuinfo-download NONE)
INCLUDE(ExternalProject)
ExternalProject_Add(cpuinfo
- URL https://github.com/pytorch/cpuinfo/archive/0cc563acb9baac39f2c1349bc42098c4a1da59e3.tar.gz
- URL_HASH SHA256=80625d0b69a3d69b70c2236f30db2c542d0922ccf9bb51a61bc39c49fac91a35
+ URL https://github.com/pytorch/cpuinfo/archive/a1e0b9571b51131cf80613d061d2aa123876bd0a.tar.gz
+ URL_HASH SHA256=18918d39a77616bdcf948c99ed57d03d52109a820487fcda9fb5ee9b530d3d64
SOURCE_DIR "${CMAKE_BINARY_DIR}/cpuinfo-source"
BINARY_DIR "${CMAKE_BINARY_DIR}/cpuinfo"
CONFIGURE_COMMAND ""
+ PATCH_COMMAND "patch -p0 -i ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cpuinfo.patch"
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
diff --git a/cmake/cpuinfo.patch b/cmake/cpuinfo.patch
new file mode 100644
index 000000000..63d8bacf5
--- /dev/null
+++ b/cmake/cpuinfo.patch
@@ -0,0 +1,870 @@
+diff --git CMakeLists.txt CMakeLists.txt
+index fefb60b..b85620f 100644
+--- CMakeLists.txt
++++ CMakeLists.txt
+@@ -79,7 +79,7 @@ IF(NOT CMAKE_SYSTEM_NAME)
+ "Target operating system is not specified. "
+ "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+ SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
+-ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|Darwin|Linux|Android)$")
++ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$")
+ IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
+ MESSAGE(WARNING
+ "Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
+@@ -125,7 +125,7 @@ SET(CPUINFO_SRCS
+ src/cache.c)
+
+ IF(CPUINFO_SUPPORTED_PLATFORM)
+- IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
++ IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$"))
+ LIST(APPEND CPUINFO_SRCS
+ src/x86/init.c
+ src/x86/info.c
+@@ -143,7 +143,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
+ src/x86/linux/cpuinfo.c)
+ ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
+ LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c)
+- ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Windows")
++ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
+ LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c)
+ ENDIF()
+ ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$")
+@@ -175,6 +175,11 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
+ ENDIF()
+ ENDIF()
+
++ IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
++ LIST(APPEND CPUINFO_SRCS
++ src/emscripten/init.c)
++ ENDIF()
++
+ IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+ LIST(APPEND CPUINFO_SRCS
+ src/linux/smallfile.c
+@@ -205,6 +210,11 @@ ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS})
+ CPUINFO_TARGET_ENABLE_C99(cpuinfo)
+ CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals)
+ CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo)
++IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
++ # Target Windows 7+ API
++ TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601)
++ TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601)
++ENDIF()
+ SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
+ TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include)
+ TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src)
+diff --git README.md README.md
+index ee5fb82..97e65cd 100644
+--- README.md
++++ README.md
+@@ -49,6 +49,7 @@ Detect if target is a 32-bit or 64-bit ARM system:
+ ```
+
+ Check if the host CPU support ARM NEON
++
+ ```c
+ cpuinfo_initialize();
+ if (cpuinfo_has_arm_neon()) {
+@@ -57,6 +58,7 @@ if (cpuinfo_has_arm_neon()) {
+ ```
+
+ Check if the host CPU supports x86 AVX
++
+ ```c
+ cpuinfo_initialize();
+ if (cpuinfo_has_x86_avx()) {
+@@ -65,6 +67,7 @@ if (cpuinfo_has_x86_avx()) {
+ ```
+
+ Check if the thread runs on a Cortex-A53 core
++
+ ```c
+ cpuinfo_initialize();
+ switch (cpuinfo_get_current_core()->uarch) {
+@@ -78,12 +81,14 @@ switch (cpuinfo_get_current_core()->uarch) {
+ ```
+
+ Get the size of level 1 data cache on the fastest core in the processor (e.g. big core in big.LITTLE ARM systems):
++
+ ```c
+ cpuinfo_initialize();
+ const size_t l1_size = cpuinfo_get_processor(0)->cache.l1d->size;
+ ```
+
+ Pin thread to cores sharing L2 cache with the current core (Linux or Android)
++
+ ```c
+ cpuinfo_initialize();
+ cpu_set_t cpu_set;
+diff --git bench/get-current.cc bench/get-current.cc
+index b547df0..e475767 100644
+--- bench/get-current.cc
++++ bench/get-current.cc
+@@ -30,4 +30,13 @@ static void cpuinfo_get_current_uarch_index(benchmark::State& state) {
+ }
+ BENCHMARK(cpuinfo_get_current_uarch_index)->Unit(benchmark::kNanosecond);
+
++static void cpuinfo_get_current_uarch_index_with_default(benchmark::State& state) {
++ cpuinfo_initialize();
++ while (state.KeepRunning()) {
++ const uint32_t uarch_index = cpuinfo_get_current_uarch_index_with_default(0);
++ benchmark::DoNotOptimize(uarch_index);
++ }
++}
++BENCHMARK(cpuinfo_get_current_uarch_index_with_default)->Unit(benchmark::kNanosecond);
++
+ BENCHMARK_MAIN();
+diff --git configure.py configure.py
+index 0e58dba..66f2ec9 100755
+--- configure.py
++++ configure.py
+@@ -23,7 +23,7 @@ def main(args):
+ build.export_cpath("include", ["cpuinfo.h"])
+
+ with build.options(source_dir="src", macros=macros, extra_include_dirs="src", deps=build.deps.clog):
+- sources = ["init.c", "api.c"]
++ sources = ["api.c", "init.c", "cache.c"]
+ if build.target.is_x86 or build.target.is_x86_64:
+ sources += [
+ "x86/init.c", "x86/info.c", "x86/isa.c", "x86/vendor.c",
+@@ -61,7 +61,6 @@ def main(args):
+ sources += ["mach/topology.c"]
+ if build.target.is_linux or build.target.is_android:
+ sources += [
+- "linux/current.c",
+ "linux/cpulist.c",
+ "linux/smallfile.c",
+ "linux/multiline.c",
+diff --git include/cpuinfo.h include/cpuinfo.h
+index e4d2d0c..60b8533 100644
+--- include/cpuinfo.h
++++ include/cpuinfo.h
+@@ -520,7 +520,7 @@ struct cpuinfo_processor {
+ */
+ int linux_id;
+ #endif
+-#if defined(_WIN32)
++#if defined(_WIN32) || defined(__CYGWIN__)
+ /** Windows-specific ID for the group containing the logical processor. */
+ uint16_t windows_group_id;
+ /**
+@@ -1796,13 +1796,22 @@ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void);
+
+ /**
+ * Identify the microarchitecture index of the core that executes the current thread.
+- * If the system does not support such identification, the function return 0.
++ * If the system does not support such identification, the function returns 0.
+ *
+ * There is no guarantee that the thread will stay on the same type of core for any time.
+ * Callers should treat the result as only a hint.
+ */
+ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void);
+
++/**
++ * Identify the microarchitecture index of the core that executes the current thread.
++ * If the system does not support such identification, the function returns the user-specified default value.
++ *
++ * There is no guarantee that the thread will stay on the same type of core for any time.
++ * Callers should treat the result as only a hint.
++ */
++uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index);
++
+ #ifdef __cplusplus
+ } /* extern "C" */
+ #endif
+diff --git src/api.c src/api.c
+index 38cea86..832b085 100644
+--- src/api.c
++++ src/api.c
+@@ -374,3 +374,33 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) {
+ return 0;
+ #endif
+ }
++
++uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index) {
++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index_with_default");
++ }
++ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
++ #ifdef __linux__
++ if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
++ /* Special case: avoid syscall on systems with only a single type of cores */
++ return 0;
++ }
++
++ /* General case */
++ unsigned cpu;
++ if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
++ return default_uarch_index;
++ }
++ if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
++ return default_uarch_index;
++ }
++ return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
++ #else
++ /* Fallback: no API to query current core, use default uarch index. */
++ return default_uarch_index;
++ #endif
++ #else
++ /* Only ARM/ARM64 processors may include cores of different types in the same package. */
++ return 0;
++ #endif
++}
+diff --git src/arm/linux/aarch32-isa.c src/arm/linux/aarch32-isa.c
+index 92095e1..6aedda3 100644
+--- src/arm/linux/aarch32-isa.c
++++ src/arm/linux/aarch32-isa.c
+@@ -193,7 +193,7 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
+ CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
+ if ((architecture_version >= 7) || (features & vfpv3_mask)) {
+ isa->vfpv3 = true;
+-
++
+ const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON;
+ if (features & d32_mask) {
+ isa->d32 = true;
+diff --git src/arm/linux/clusters.c src/arm/linux/clusters.c
+index 8daeae5..c7a4045 100644
+--- src/arm/linux/clusters.c
++++ src/arm/linux/clusters.c
+@@ -47,7 +47,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
+ *
+ * @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags.
+ * @param max_processors - number of elements in the @p processors array.
+- * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
++ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
+ * frequency, MIDR infromation, and core cluster (package siblings list) information.
+ *
+ * @retval true if the heuristic successfully assigned all processors into clusters of cores.
+@@ -308,7 +308,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
+ * @p processors array have cluster information.
+ *
+ * @param max_processors - number of elements in the @p processors array.
+- * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
++ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
+ * frequency, MIDR infromation, and core cluster (package siblings list) information.
+ *
+ * @retval true if the heuristic successfully assigned all processors into clusters of cores.
+@@ -466,7 +466,7 @@ new_cluster:
+ * This function should be called after all processors are assigned to core clusters.
+ *
+ * @param max_processors - number of elements in the @p processors array.
+- * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
++ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
+ * and decoded core cluster (package_leader_id) information.
+ * The function expects the value of processors[i].package_processor_count to be zero.
+ * Upon return, processors[i].package_processor_count will contain the number of logical
+@@ -482,12 +482,12 @@ void cpuinfo_arm_linux_count_cluster_processors(
+ const uint32_t package_leader_id = processors[i].package_leader_id;
+ processors[package_leader_id].package_processor_count += 1;
+ }
+- }
++ }
+ /* Second pass: copy the package_processor_count from the group leader processor */
+ for (uint32_t i = 0; i < max_processors; i++) {
+ if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+ const uint32_t package_leader_id = processors[i].package_leader_id;
+ processors[i].package_processor_count = processors[package_leader_id].package_processor_count;
+ }
+- }
++ }
+ }
+diff --git src/arm/linux/cpuinfo.c src/arm/linux/cpuinfo.c
+index 2df0c6e..c70055f 100644
+--- src/arm/linux/cpuinfo.c
++++ src/arm/linux/cpuinfo.c
+@@ -44,7 +44,7 @@ static uint32_t parse_processor_number(
+
+ /*
+ * Full list of ARM features reported in /proc/cpuinfo:
+- *
++ *
+ * * swp - support for SWP instruction (deprecated in ARMv7, can be removed in future)
+ * * half - support for half-word loads and stores. These instruction are part of ARMv4,
+ * so no need to check it on supported CPUs.
+@@ -620,7 +620,7 @@ static void parse_cache_number(
+ break;
+ default:
+ cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected",
+- number_name, (int) (number_end - number_start), number_start);
++ number_name, (int) (number_end - number_start), number_start);
+ }
+ }
+
+@@ -670,7 +670,7 @@ static bool parse_line(
+ if (line_start == line_end) {
+ return true;
+ }
+-
++
+ /* Search for ':' on the line. */
+ const char* separator = line_start;
+ for (; separator != line_end; separator++) {
+diff --git src/arm/tlb.c src/arm/tlb.c
+index ba42a3e..9beb832 100644
+--- src/arm/tlb.c
++++ src/arm/tlb.c
+@@ -6,7 +6,7 @@ switch (uarch) {
+ * Cortex-A5 Technical Reference Manual:
+ * 6.3.1. Micro TLB
+ * The first level of caching for the page table information is a micro TLB of
+- * 10 entries that is implemented on each of the instruction and data sides.
++ * 10 entries that is implemented on each of the instruction and data sides.
+ * 6.3.2. Main TLB
+ * Misses from the instruction and data micro TLBs are handled by a unified main TLB.
+ * The main TLB is 128-entry two-way set-associative.
+diff --git src/cpuinfo/internal-api.h src/cpuinfo/internal-api.h
+index c6eed0b..9c23d7c 100644
+--- src/cpuinfo/internal-api.h
++++ src/cpuinfo/internal-api.h
+@@ -3,7 +3,7 @@
+ #include <stdint.h>
+ #include <stdbool.h>
+
+-#ifdef _WIN32
++#if defined(_WIN32) || defined(__CYGWIN__)
+ #include <windows.h>
+ #endif
+
+@@ -50,7 +50,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
+
+ CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
+ CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
+-#ifdef _WIN32
++#if defined(_WIN32) || defined(__CYGWIN__)
+ CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+ #endif
+ CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
+diff --git src/emscripten/init.c src/emscripten/init.c
+new file mode 100644
+index 0000000..ce4bdea
+--- /dev/null
++++ src/emscripten/init.c
+@@ -0,0 +1,277 @@
++#include <stdbool.h>
++#include <stdint.h>
++#include <stdlib.h>
++#include <string.h>
++#include <math.h>
++
++#include <emscripten/threading.h>
++
++#include <cpuinfo.h>
++#include <cpuinfo/internal-api.h>
++#include <cpuinfo/log.h>
++
++
++static const volatile float infinity = INFINITY;
++
++static struct cpuinfo_package static_package = { };
++
++static struct cpuinfo_cache static_x86_l3 = {
++ .size = 2 * 1024 * 1024,
++ .associativity = 16,
++ .sets = 2048,
++ .partitions = 1,
++ .line_size = 64,
++};
++
++void cpuinfo_emscripten_init(void) {
++ struct cpuinfo_processor* processors = NULL;
++ struct cpuinfo_core* cores = NULL;
++ struct cpuinfo_cluster* clusters = NULL;
++ struct cpuinfo_cache* l1i = NULL;
++ struct cpuinfo_cache* l1d = NULL;
++ struct cpuinfo_cache* l2 = NULL;
++
++ const bool is_x86 = signbit(infinity - infinity);
++
++ int logical_cores_count = emscripten_num_logical_cores();
++ if (logical_cores_count <= 0) {
++ logical_cores_count = 1;
++ }
++ uint32_t processor_count = (uint32_t) logical_cores_count;
++ uint32_t core_count = processor_count;
++ uint32_t cluster_count = 1;
++ uint32_t big_cluster_core_count = core_count;
++ uint32_t processors_per_core = 1;
++ if (is_x86) {
++ if (processor_count % 2 == 0) {
++ processors_per_core = 2;
++ core_count = processor_count / 2;
++ big_cluster_core_count = core_count;
++ }
++ } else {
++ /* Assume ARM/ARM64 */
++ if (processor_count > 4) {
++ /* Assume big.LITTLE architecture */
++ cluster_count = 2;
++ big_cluster_core_count = processor_count >= 8 ? 4 : 2;
++ }
++ }
++ uint32_t l2_count = is_x86 ? core_count : cluster_count;
++
++ processors = calloc(processor_count, sizeof(struct cpuinfo_processor));
++ if (processors == NULL) {
++ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
++ processor_count * sizeof(struct cpuinfo_processor), processor_count);
++ goto cleanup;
++ }
++ cores = calloc(processor_count, sizeof(struct cpuinfo_core));
++ if (cores == NULL) {
++ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
++ processor_count * sizeof(struct cpuinfo_core), processor_count);
++ goto cleanup;
++ }
++ clusters = calloc(cluster_count, sizeof(struct cpuinfo_cluster));
++ if (clusters == NULL) {
++ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" clusters",
++ cluster_count * sizeof(struct cpuinfo_cluster), cluster_count);
++ goto cleanup;
++ }
++
++ l1i = calloc(core_count, sizeof(struct cpuinfo_cache));
++ if (l1i == NULL) {
++ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
++ core_count * sizeof(struct cpuinfo_cache), core_count);
++ goto cleanup;
++ }
++
++ l1d = calloc(core_count, sizeof(struct cpuinfo_cache));
++ if (l1d == NULL) {
++ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
++ core_count * sizeof(struct cpuinfo_cache), core_count);
++ goto cleanup;
++ }
++
++ l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
++ if (l2 == NULL) {
++ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
++ l2_count * sizeof(struct cpuinfo_cache), l2_count);
++ goto cleanup;
++ }
++
++ static_package.processor_count = processor_count;
++ static_package.core_count = core_count;
++ static_package.cluster_count = cluster_count;
++ if (is_x86) {
++ strncpy(static_package.name, "x86 vCPU", CPUINFO_PACKAGE_NAME_MAX);
++ } else {
++ strncpy(static_package.name, "ARM vCPU", CPUINFO_PACKAGE_NAME_MAX);
++ }
++
++ for (uint32_t i = 0; i < core_count; i++) {
++ for (uint32_t j = 0; j < processors_per_core; j++) {
++ processors[i * processors_per_core + j] = (struct cpuinfo_processor) {
++ .smt_id = j,
++ .core = cores + i,
++ .cluster = clusters + (uint32_t) (i >= big_cluster_core_count),
++ .package = &static_package,
++ .cache.l1i = l1i + i,
++ .cache.l1d = l1d + i,
++ .cache.l2 = is_x86 ? l2 + i : l2 + (uint32_t) (i >= big_cluster_core_count),
++ .cache.l3 = is_x86 ? &static_x86_l3 : NULL,
++ };
++ }
++
++ cores[i] = (struct cpuinfo_core) {
++ .processor_start = i * processors_per_core,
++ .processor_count = processors_per_core,
++ .core_id = i,
++ .cluster = clusters + (uint32_t) (i >= big_cluster_core_count),
++ .package = &static_package,
++ .vendor = cpuinfo_vendor_unknown,
++ .uarch = cpuinfo_uarch_unknown,
++ .frequency = 0,
++ };
++
++ l1i[i] = (struct cpuinfo_cache) {
++ .size = 32 * 1024,
++ .associativity = 4,
++ .sets = 128,
++ .partitions = 1,
++ .line_size = 64,
++ .processor_start = i * processors_per_core,
++ .processor_count = processors_per_core,
++ };
++
++ l1d[i] = (struct cpuinfo_cache) {
++ .size = 32 * 1024,
++ .associativity = 4,
++ .sets = 128,
++ .partitions = 1,
++ .line_size = 64,
++ .processor_start = i * processors_per_core,
++ .processor_count = processors_per_core,
++ };
++
++ if (is_x86) {
++ l2[i] = (struct cpuinfo_cache) {
++ .size = 256 * 1024,
++ .associativity = 8,
++ .sets = 512,
++ .partitions = 1,
++ .line_size = 64,
++ .processor_start = i * processors_per_core,
++ .processor_count = processors_per_core,
++ };
++ }
++ }
++
++ if (is_x86) {
++ clusters[0] = (struct cpuinfo_cluster) {
++ .processor_start = 0,
++ .processor_count = processor_count,
++ .core_start = 0,
++ .core_count = core_count,
++ .cluster_id = 0,
++ .package = &static_package,
++ .vendor = cpuinfo_vendor_unknown,
++ .uarch = cpuinfo_uarch_unknown,
++ .frequency = 0,
++ };
++
++ static_x86_l3.processor_count = processor_count;
++ } else {
++ clusters[0] = (struct cpuinfo_cluster) {
++ .processor_start = 0,
++ .processor_count = big_cluster_core_count,
++ .core_start = 0,
++ .core_count = big_cluster_core_count,
++ .cluster_id = 0,
++ .package = &static_package,
++ .vendor = cpuinfo_vendor_unknown,
++ .uarch = cpuinfo_uarch_unknown,
++ .frequency = 0,
++ };
++
++ l2[0] = (struct cpuinfo_cache) {
++ .size = 1024 * 1024,
++ .associativity = 8,
++ .sets = 2048,
++ .partitions = 1,
++ .line_size = 64,
++ .processor_start = 0,
++ .processor_count = big_cluster_core_count,
++ };
++
++ if (cluster_count > 1) {
++ l2[1] = (struct cpuinfo_cache) {
++ .size = 256 * 1024,
++ .associativity = 8,
++ .sets = 512,
++ .partitions = 1,
++ .line_size = 64,
++ .processor_start = big_cluster_core_count,
++ .processor_count = processor_count - big_cluster_core_count,
++ };
++
++ clusters[1] = (struct cpuinfo_cluster) {
++ .processor_start = big_cluster_core_count,
++ .processor_count = processor_count - big_cluster_core_count,
++ .core_start = big_cluster_core_count,
++ .core_count = processor_count - big_cluster_core_count,
++ .cluster_id = 1,
++ .package = &static_package,
++ .vendor = cpuinfo_vendor_unknown,
++ .uarch = cpuinfo_uarch_unknown,
++ .frequency = 0,
++ };
++ }
++ }
++
++ /* Commit changes */
++ cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
++ cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
++ cpuinfo_cache[cpuinfo_cache_level_2] = l2;
++ if (is_x86) {
++ cpuinfo_cache[cpuinfo_cache_level_3] = &static_x86_l3;
++ }
++
++ cpuinfo_processors = processors;
++ cpuinfo_cores = cores;
++ cpuinfo_clusters = clusters;
++ cpuinfo_packages = &static_package;
++
++ cpuinfo_cache_count[cpuinfo_cache_level_1i] = processor_count;
++ cpuinfo_cache_count[cpuinfo_cache_level_1d] = processor_count;
++ cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
++ if (is_x86) {
++ cpuinfo_cache_count[cpuinfo_cache_level_3] = 1;
++ }
++
++ cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
++ .uarch = cpuinfo_uarch_unknown,
++ .processor_count = processor_count,
++ .core_count = core_count,
++ };
++
++ cpuinfo_processors_count = processor_count;
++ cpuinfo_cores_count = processor_count;
++ cpuinfo_clusters_count = cluster_count;
++ cpuinfo_packages_count = 1;
++
++ cpuinfo_max_cache_size = is_x86 ? 128 * 1024 * 1024 : 8 * 1024 * 1024;
++
++ cpuinfo_is_initialized = true;
++
++ processors = NULL;
++ cores = NULL;
++ clusters = NULL;
++ l1i = l1d = l2 = NULL;
++
++cleanup:
++ free(processors);
++ free(cores);
++ free(clusters);
++ free(l1i);
++ free(l1d);
++ free(l2);
++}
+diff --git src/init.c src/init.c
+index 10a1afc..0d8cc3b 100644
+--- src/init.c
++++ src/init.c
+@@ -1,4 +1,4 @@
+-#ifdef _WIN32
++#if defined(_WIN32) || defined(__CYGWIN__)
+ #include <windows.h>
+ #elif !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
+ #include <pthread.h>
+@@ -13,7 +13,7 @@
+ #endif
+
+
+-#ifdef _WIN32
++#if defined(_WIN32) || defined(__CYGWIN__)
+ static INIT_ONCE init_guard = INIT_ONCE_STATIC_INIT;
+ #elif !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
+ static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
+@@ -27,7 +27,7 @@ bool CPUINFO_ABI cpuinfo_initialize(void) {
+ pthread_once(&init_guard, &cpuinfo_x86_mach_init);
+ #elif defined(__linux__)
+ pthread_once(&init_guard, &cpuinfo_x86_linux_init);
+- #elif defined(_WIN32)
++ #elif defined(_WIN32) || defined(__CYGWIN__)
+ InitOnceExecuteOnce(&init_guard, &cpuinfo_x86_windows_init, NULL, NULL);
+ #else
+ cpuinfo_log_error("operating system is not supported in cpuinfo");
+diff --git src/linux/mockfile.c src/linux/mockfile.c
+index 3fdd6bf..138acfe 100644
+--- src/linux/mockfile.c
++++ src/linux/mockfile.c
+@@ -34,7 +34,7 @@ void CPUINFO_ABI cpuinfo_mock_filesystem(struct cpuinfo_mock_file* files) {
+ file_count += 1;
+ }
+ cpuinfo_mock_files = files;
+- cpuinfo_mock_file_count = file_count;
++ cpuinfo_mock_file_count = file_count;
+ }
+
+ int CPUINFO_ABI cpuinfo_mock_open(const char* path, int oflag) {
+diff --git src/x86/cache/descriptor.c src/x86/cache/descriptor.c
+index 6532e4d..69d38cc 100644
+--- src/x86/cache/descriptor.c
++++ src/x86/cache/descriptor.c
+@@ -353,7 +353,7 @@ void cpuinfo_x86_decode_cache_descriptor(
+ };
+ break;
+ case 0x39:
+- /* Where does this come from? */
++ /* Where does this come from? */
+ cache->l2 = (struct cpuinfo_x86_cache) {
+ .size = 128 * 1024,
+ .associativity = 4,
+@@ -364,7 +364,7 @@ void cpuinfo_x86_decode_cache_descriptor(
+ };
+ break;
+ case 0x3A:
+- /* Where does this come from? */
++ /* Where does this come from? */
+ cache->l2 = (struct cpuinfo_x86_cache) {
+ .size = 192 * 1024,
+ .associativity = 6,
+@@ -375,7 +375,7 @@ void cpuinfo_x86_decode_cache_descriptor(
+ };
+ break;
+ case 0x3B:
+- /* Where does this come from? */
++ /* Where does this come from? */
+ cache->l2 = (struct cpuinfo_x86_cache) {
+ .size = 128 * 1024,
+ .associativity = 2,
+@@ -386,7 +386,7 @@ void cpuinfo_x86_decode_cache_descriptor(
+ };
+ break;
+ case 0x3C:
+- /* Where does this come from? */
++ /* Where does this come from? */
+ cache->l2 = (struct cpuinfo_x86_cache) {
+ .size = 256 * 1024,
+ .associativity = 4,
+@@ -397,7 +397,7 @@ void cpuinfo_x86_decode_cache_descriptor(
+ };
+ break;
+ case 0x3D:
+- /* Where does this come from? */
++ /* Where does this come from? */
+ cache->l2 = (struct cpuinfo_x86_cache) {
+ .size = 384 * 1024,
+ .associativity = 6,
+@@ -408,7 +408,7 @@ void cpuinfo_x86_decode_cache_descriptor(
+ };
+ break;
+ case 0x3E:
+- /* Where does this come from? */
++ /* Where does this come from? */
+ cache->l2 = (struct cpuinfo_x86_cache) {
+ .size = 512 * 1024,
+ .associativity = 4,
+@@ -1011,7 +1011,7 @@ void cpuinfo_x86_decode_cache_descriptor(
+ };
+ break;
+ case 0x73:
+- /* Where does this come from? */
++ /* Where does this come from? */
+ cache->trace = (struct cpuinfo_trace_cache) {
+ .uops = 64 * 1024,
+ .associativity = 8,
+diff --git src/x86/mockcpuid.c src/x86/mockcpuid.c
+index 6361dc2..2631f09 100644
+--- src/x86/mockcpuid.c
++++ src/x86/mockcpuid.c
+@@ -14,7 +14,7 @@ static uint32_t cpuinfo_mock_cpuid_leaf4_iteration = 0;
+
+ void CPUINFO_ABI cpuinfo_mock_set_cpuid(struct cpuinfo_mock_cpuid* dump, size_t entries) {
+ cpuinfo_mock_cpuid_data = dump;
+- cpuinfo_mock_cpuid_entries = entries;
++ cpuinfo_mock_cpuid_entries = entries;
+ };
+
+ void CPUINFO_ABI cpuinfo_mock_get_cpuid(uint32_t eax, uint32_t regs[4]) {
+diff --git src/x86/name.c src/x86/name.c
+index e0d5a5b..a7cc7c6 100644
+--- src/x86/name.c
++++ src/x86/name.c
+@@ -135,7 +135,7 @@ static inline bool is_frequency(const char* token_start, const char* token_end)
+ const size_t token_length = (size_t) (token_end - token_start);
+ if (token_length > 3 && token_end[-2] == 'H' && token_end[-1] == 'z') {
+ switch (token_end[-3]) {
+- case 'K':
++ case 'K':
+ case 'M':
+ case 'G':
+ return true;
+@@ -347,7 +347,7 @@ static bool transform_token(char* token_start, char* token_end, struct parser_st
+ return false;
+ }
+ /*
+- * Erase "Mobile" when it is not part of the processor name,
++ * Erase "Mobile" when it is not part of the processor name,
+ * e.g. in "AMD Turion(tm) X2 Ultra Dual-Core Mobile ZM-82"
+ */
+ if (previousState.context_core != NULL) {
+@@ -540,8 +540,7 @@ uint32_t cpuinfo_x86_normalize_brand_string(
+ char* name_end = &name[48];
+ while (name_end[-1] == '\0') {
+ /*
+- * Adject name_end by 1 position and
+- * check that we didn't reach the start of the brand string.
++ * Adject name_end by 1 position and check that we didn't reach the start of the brand string.
+ * This is possible if all characters are zero.
+ */
+ if (--name_end == name) {
+@@ -704,6 +703,6 @@ uint32_t cpuinfo_x86_format_package_name(
+ } else {
+ snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX,
+ "%s %s", vendor_string, normalized_brand_string);
+- return strlen(vendor_string) + 1;
++ return (uint32_t) strlen(vendor_string) + 1;
+ }
+ }
+diff --git src/x86/vendor.c src/x86/vendor.c
+index 2bba90d..bad50fa 100644
+--- src/x86/vendor.c
++++ src/x86/vendor.c
+@@ -79,7 +79,7 @@ enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32
+ case ineI:
+ if (ecx == ntel) {
+ /* "GenuineIntel" */
+- return cpuinfo_vendor_intel;
++ return cpuinfo_vendor_intel;
+ }
+ break;
+ #if CPUINFO_ARCH_X86
+diff --git src/x86/windows/init.c src/x86/windows/init.c
+index 2c7e3cd..cf549d5 100644
+--- src/x86/windows/init.c
++++ src/x86/windows/init.c
+@@ -10,6 +10,13 @@
+
+ #include <Windows.h>
+
++#ifdef __GNUC__
++ #define CPUINFO_ALLOCA __builtin_alloca
++#else
++ #define CPUINFO_ALLOCA _alloca
++#endif
++
++
+ static inline uint32_t bit_mask(uint32_t bits) {
+ return (UINT32_C(1) << bits) - UINT32_C(1);
+ }
+@@ -118,7 +125,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
+ cpuinfo_log_debug("detected %"PRIu32" processor groups", max_group_count);
+
+ uint32_t processors_count = 0;
+- uint32_t* processors_per_group = (uint32_t*) _alloca(max_group_count * sizeof(uint32_t));
++ uint32_t* processors_per_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t));
+ for (uint32_t i = 0; i < max_group_count; i++) {
+ processors_per_group[i] = GetMaximumProcessorCount((WORD) i);
+ cpuinfo_log_debug("detected %"PRIu32" processors in group %"PRIu32,
+@@ -126,7 +133,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
+ processors_count += processors_per_group[i];
+ }
+
+- uint32_t* processors_before_group = (uint32_t*) _alloca(max_group_count * sizeof(uint32_t));
++ uint32_t* processors_before_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t));
+ for (uint32_t i = 0, count = 0; i < max_group_count; i++) {
+ processors_before_group[i] = count;
+ cpuinfo_log_debug("detected %"PRIu32" processors before group %"PRIu32,
+@@ -196,7 +203,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
+ /* Iterate processor groups and set the package part of APIC ID */
+ for (uint32_t i = 0; i < package_info->Processor.GroupCount; i++) {
+ const uint32_t group_id = package_info->Processor.GroupMask[i].Group;
+- /* Global index of the first logical processor belonging to this group */
++ /* Global index of the first logical processor belonging to this group */
+ const uint32_t group_processors_start = processors_before_group[group_id];
+ /* Bitmask representing processors in this group belonging to this package */
+ KAFFINITY group_processors_mask = package_info->Processor.GroupMask[i].Mask;
+@@ -245,7 +252,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
+ /* Iterate processor groups and set the core & SMT parts of APIC ID */
+ for (uint32_t i = 0; i < core_info->Processor.GroupCount; i++) {
+ const uint32_t group_id = core_info->Processor.GroupMask[i].Group;
+- /* Global index of the first logical processor belonging to this group */
++ /* Global index of the first logical processor belonging to this group */
+ const uint32_t group_processors_start = processors_before_group[group_id];
+ /* Bitmask representing processors in this group belonging to this package */
+ KAFFINITY group_processors_mask = core_info->Processor.GroupMask[i].Mask;
+@@ -259,7 +266,7 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
+ current_package_apic_id = processors[processor_id].apic_id;
+ }
+ /* Core ID w.r.t package */
+- const uint32_t package_core_id = core_id - package_core_start;
++ const uint32_t package_core_id = core_id - package_core_start;
+
+ /* Update APIC ID with core and SMT parts */
+ processors[processor_id].apic_id |=
+diff --git tools/gpu-dump.c tools/gpu-dump.c
+index d7cfa9e..6d17374 100644
+--- tools/gpu-dump.c
++++ tools/gpu-dump.c
+@@ -314,7 +314,7 @@ void report_gles_attributes(void) {
+ fprintf(stderr, "failed to get the number of EGL frame buffer configurations\n");
+ goto cleanup;
+ }
+-
++
+ configs = (EGLConfig*) malloc(configs_count * sizeof(EGLConfig));
+ if (configs == NULL) {
+ fprintf(stderr, "failed to allocate %zu bytes for %d frame buffer configurations\n",