aboutsummaryrefslogtreecommitdiff
path: root/vpx_ports/aarch64_cpudetect.c
blob: 539d09bb392b610440077b01c81296f1888b9905 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/*
 *  Copyright (c) 2023 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "./vpx_config.h"
#include "arm_cpudetect.h"

#if defined(__APPLE__)
#include <sys/sysctl.h>
#endif

#if !CONFIG_RUNTIME_CPU_DETECT

static int arm_get_cpu_caps(void) {
  // This function should actually be a no-op. There is no way to adjust any of
  // these because the RTCD tables do not exist: the functions are called
  // statically.
  int flags = 0;
#if HAVE_NEON
  flags |= HAS_NEON;
#endif  // HAVE_NEON
  return flags;
}

#elif defined(__APPLE__)  // end !CONFIG_RUNTIME_CPU_DETECT

// sysctlbyname() parameter documentation for instruction set characteristics:
// https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
static INLINE int64_t have_feature(const char *feature) {
  int64_t feature_present = 0;
  size_t size = sizeof(feature_present);
  if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) {
    return 0;
  }
  return feature_present;
}

static int arm_get_cpu_caps(void) {
  int flags = 0;
#if HAVE_NEON
  flags |= HAS_NEON;
#endif  // HAVE_NEON
#if HAVE_NEON_DOTPROD
  if (have_feature("hw.optional.arm.FEAT_DotProd")) {
    flags |= HAS_NEON_DOTPROD;
  }
#endif  // HAVE_NEON_DOTPROD
#if HAVE_NEON_I8MM
  if (have_feature("hw.optional.arm.FEAT_I8MM")) {
    flags |= HAS_NEON_I8MM;
  }
#endif  // HAVE_NEON_I8MM
  return flags;
}

#elif defined(_WIN32)  // end __APPLE__

static int arm_get_cpu_caps(void) {
  int flags = 0;
// IsProcessorFeaturePresent() parameter documentation:
// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters
#if HAVE_NEON
  flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
#endif  // HAVE_NEON
#if HAVE_NEON_DOTPROD
// Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK
// 20348, supported by Windows 11 and Windows Server 2022.
#if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
  if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
    flags |= HAS_NEON_DOTPROD;
  }
#endif  // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
#endif  // HAVE_NEON_DOTPROD
  // No I8MM or SVE feature detection available on Windows at time of writing.
  return flags;
}

#elif defined(ANDROID_USE_CPU_FEATURES_LIB)

static int arm_get_cpu_caps(void) {
  int flags = 0;
#if HAVE_NEON
  flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
#endif  // HAVE_NEON
  return flags;
}

#elif defined(__linux__)  // end defined(VPX_USE_ANDROID_CPU_FEATURES)

#include <sys/auxv.h>

// Define hwcap values ourselves: building with an old auxv header where these
// hwcap values are not defined should not prevent features from being enabled.
#define VPX_AARCH64_HWCAP_ASIMDDP (1 << 20)
#define VPX_AARCH64_HWCAP_SVE (1 << 22)
#define VPX_AARCH64_HWCAP2_I8MM (1 << 13)

static int arm_get_cpu_caps(void) {
  int flags = 0;
  unsigned long hwcap = getauxval(AT_HWCAP);
#if HAVE_NEON_I8MM
  unsigned long hwcap2 = getauxval(AT_HWCAP2);
#endif  // HAVE_NEON_I8MM
#if HAVE_NEON
  flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
#endif  // HAVE_NEON
#if HAVE_NEON_DOTPROD
  if (hwcap & VPX_AARCH64_HWCAP_ASIMDDP) {
    flags |= HAS_NEON_DOTPROD;
  }
#endif  // HAVE_NEON_DOTPROD
#if HAVE_NEON_I8MM
  if (hwcap2 & VPX_AARCH64_HWCAP2_I8MM) {
    flags |= HAS_NEON_I8MM;
  }
#endif  // HAVE_NEON_I8MM
#if HAVE_SVE
  if (hwcap & VPX_AARCH64_HWCAP_SVE) {
    flags |= HAS_SVE;
  }
#endif  // HAVE_SVE
  return flags;
}

#elif defined(__Fuchsia__)  // end __linux__

#include <zircon/features.h>
#include <zircon/syscalls.h>

// Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/894282.
#ifndef ZX_ARM64_FEATURE_ISA_I8MM
#define ZX_ARM64_FEATURE_ISA_I8MM ((uint32_t)(1u << 19))
#endif
// Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/895083.
#ifndef ZX_ARM64_FEATURE_ISA_SVE
#define ZX_ARM64_FEATURE_ISA_SVE ((uint32_t)(1u << 20))
#endif

static int arm_get_cpu_caps(void) {
  int flags = 0;
#if HAVE_NEON
  flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
#endif  // HAVE_NEON
  uint32_t features;
  zx_status_t status = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
  if (status != ZX_OK) {
    return flags;
  }
#if HAVE_NEON_DOTPROD
  if (features & ZX_ARM64_FEATURE_ISA_DP) {
    flags |= HAS_NEON_DOTPROD;
  }
#endif  // HAVE_NEON_DOTPROD
#if HAVE_NEON_I8MM
  if (features & ZX_ARM64_FEATURE_ISA_I8MM) {
    flags |= HAS_NEON_I8MM;
  }
#endif  // HAVE_NEON_I8MM
#if HAVE_SVE
  if (features & ZX_ARM64_FEATURE_ISA_SVE) {
    flags |= HAS_SVE;
  }
#endif  // HAVE_SVE
  return flags;
}

#else  // end __Fuchsia__
#error \
    "Runtime CPU detection selected, but no CPU detection method available" \
"for your platform. Rerun configure with --disable-runtime-cpu-detect."
#endif

int arm_cpu_caps(void) {
  int flags = 0;
  if (!arm_cpu_env_flags(&flags)) {
    flags = arm_get_cpu_caps() & arm_cpu_env_mask();
  }

  // Restrict flags: FEAT_I8MM assumes that FEAT_DotProd is available.
  if (!(flags & HAS_NEON_DOTPROD)) {
    flags &= ~HAS_NEON_I8MM;
  }

  // Restrict flags: FEAT_SVE assumes that FEAT_{DotProd,I8MM} are available.
  if (!(flags & HAS_NEON_DOTPROD)) {
    flags &= ~HAS_SVE;
  }
  if (!(flags & HAS_NEON_I8MM)) {
    flags &= ~HAS_SVE;
  }

  return flags;
}