aboutsummaryrefslogtreecommitdiff
path: root/source/cpu_id.cc
blob: eedce16b44ffb802dc1e5b76665a4dab604a7083 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
/*
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS. All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "libyuv/cpu_id.h"

#if defined(_MSC_VER)
#include <intrin.h>  // For __cpuidex()
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) &&                           \
    !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
    defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
#include <immintrin.h>  // For _xgetbv()
#endif

// For ArmCpuCaps() but unittested on all platforms
#include <stdio.h>  // For fopen()
#include <string.h>

#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif

// For functions that use the stack and have runtime checks for overflow,
// use SAFEBUFFERS to avoid additional check.
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && \
    !defined(__clang__)
#define SAFEBUFFERS __declspec(safebuffers)
#else
#define SAFEBUFFERS
#endif

// cpu_info_ variable for SIMD instruction sets detected.
LIBYUV_API int cpu_info_ = 0;

// Low level cpuid for X86.
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
     defined(__x86_64__)) &&                                     \
    !defined(__pnacl__) && !defined(__CLR_VER)
LIBYUV_API
void CpuId(int info_eax, int info_ecx, int* cpu_info) {
#if defined(_MSC_VER)
// Visual C version uses intrinsic or inline x86 assembly.
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
  __cpuidex(cpu_info, info_eax, info_ecx);
#elif defined(_M_IX86)
  __asm {
    mov        eax, info_eax
    mov        ecx, info_ecx
    mov        edi, cpu_info
    cpuid
    mov        [edi], eax
    mov        [edi + 4], ebx
    mov        [edi + 8], ecx
    mov        [edi + 12], edx
  }
#else  // Visual C but not x86
  if (info_ecx == 0) {
    __cpuid(cpu_info, info_eax);
  } else {
    cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0u;
  }
#endif
// GCC version uses inline x86 assembly.
#else  // defined(_MSC_VER)
  int info_ebx, info_edx;
  asm volatile(
#if defined(__i386__) && defined(__PIC__)
      // Preserve ebx for fpic 32 bit.
      "mov         %%ebx, %%edi                  \n"
      "cpuid                                     \n"
      "xchg        %%edi, %%ebx                  \n"
      : "=D"(info_ebx),
#else
      "cpuid                                     \n"
      : "=b"(info_ebx),
#endif  //  defined( __i386__) && defined(__PIC__)
        "+a"(info_eax), "+c"(info_ecx), "=d"(info_edx));
  cpu_info[0] = info_eax;
  cpu_info[1] = info_ebx;
  cpu_info[2] = info_ecx;
  cpu_info[3] = info_edx;
#endif  // defined(_MSC_VER)
}
#else  // (defined(_M_IX86) || defined(_M_X64) ...
LIBYUV_API
void CpuId(int eax, int ecx, int* cpu_info) {
  (void)eax;
  (void)ecx;
  cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
}
#endif

// For VS2010 and earlier emit can be used:
//   _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier.
//  __asm {
//    xor        ecx, ecx    // xcr 0
//    xgetbv
//    mov        xcr0, eax
//  }
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
// https://code.google.com/p/libyuv/issues/detail?id=529
#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
#pragma optimize("g", off)
#endif
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
     defined(__x86_64__)) &&                                     \
    !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
static int GetXCR0() {
  int xcr0 = 0;
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
  xcr0 = (int)_xgetbv(0);  // VS2010 SP1 required.  NOLINT
#elif defined(__i386__) || defined(__x86_64__)
  asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx");
#endif  // defined(__i386__) || defined(__x86_64__)
  return xcr0;
}
#else
// xgetbv unavailable to query for OSSave support.  Return 0.
#define GetXCR0() 0
#endif  // defined(_M_IX86) || defined(_M_X64) ..
// Return optimization to previous setting.
#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
#pragma optimize("g", on)
#endif

// Based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
  char cpuinfo_line[512];
  FILE* f = fopen(cpuinfo_name, "re");
  if (!f) {
    // Assume Neon if /proc/cpuinfo is unavailable.
    // This will occur for Chrome sandbox for Pepper or Render process.
    return kCpuHasNEON;
  }
  memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
  while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
    if (memcmp(cpuinfo_line, "Features", 8) == 0) {
      char* p = strstr(cpuinfo_line, " neon");
      if (p && (p[5] == ' ' || p[5] == '\n')) {
        fclose(f);
        return kCpuHasNEON;
      }
      // aarch64 uses asimd for Neon.
      p = strstr(cpuinfo_line, " asimd");
      if (p) {
        fclose(f);
        return kCpuHasNEON;
      }
    }
  }
  fclose(f);
  return 0;
}

LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) {
  char cpuinfo_line[512];
  int flag = 0;
  FILE* f = fopen(cpuinfo_name, "re");
  if (!f) {
#if defined(__riscv_vector)
    // Assume RVV if /proc/cpuinfo is unavailable.
    // This will occur for Chrome sandbox for Pepper or Render process.
    return kCpuHasRVV;
#else
    return 0;
#endif
  }
  memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
  while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
    if (memcmp(cpuinfo_line, "isa", 3) == 0) {
      // ISA string must begin with rv64{i,e,g} for a 64-bit processor.
      char* isa = strstr(cpuinfo_line, "rv64");
      if (isa) {
        size_t isa_len = strlen(isa);
        char* extensions;
        size_t extensions_len = 0;
        size_t std_isa_len;
        // Remove the new-line character at the end of string
        if (isa[isa_len - 1] == '\n') {
          isa[--isa_len] = '\0';
        }
        // 5 ISA characters
        if (isa_len < 5) {
          fclose(f);
          return 0;
        }
        // Skip {i,e,g} canonical checking.
        // Skip rvxxx
        isa += 5;
        // Find the very first occurrence of 's', 'x' or 'z'.
        // To detect multi-letter standard, non-standard, and
        // supervisor-level extensions.
        extensions = strpbrk(isa, "zxs");
        if (extensions) {
          // Multi-letter extensions are seperated by a single underscore
          // as described in RISC-V User-Level ISA V2.2.
          char* ext = strtok(extensions, "_");
          extensions_len = strlen(extensions);
          while (ext) {
            // Search for the ZVFH (Vector FP16) extension.
            if (!strcmp(ext, "zvfh")) {
              flag |= kCpuHasRVVZVFH;
            }
            ext = strtok(NULL, "_");
          }
        }
        std_isa_len = isa_len - extensions_len - 5;
        // Detect the v in the standard single-letter extensions.
        if (memchr(isa, 'v', std_isa_len)) {
          // The RVV implied the F extension.
          flag |= kCpuHasRVV;
        }
      }
    }
#if defined(__riscv_vector)
    // Assume RVV if /proc/cpuinfo is from x86 host running QEMU.
    else if ((memcmp(cpuinfo_line, "vendor_id\t: GenuineIntel", 24) == 0) ||
             (memcmp(cpuinfo_line, "vendor_id\t: AuthenticAMD", 24) == 0)) {
      fclose(f);
      return kCpuHasRVV;
    }
#endif
  }
  fclose(f);
  return flag;
}

LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) {
  char cpuinfo_line[512];
  int flag = 0;
  FILE* f = fopen(cpuinfo_name, "re");
  if (!f) {
    // Assume nothing if /proc/cpuinfo is unavailable.
    // This will occur for Chrome sandbox for Pepper or Render process.
    return 0;
  }
  memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
  while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
    if (memcmp(cpuinfo_line, "cpu model", 9) == 0) {
      // Workaround early kernel without MSA in ASEs line.
      if (strstr(cpuinfo_line, "Loongson-2K")) {
        flag |= kCpuHasMSA;
      }
    }
    if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
      if (strstr(cpuinfo_line, "msa")) {
        flag |= kCpuHasMSA;
      }
      // ASEs is the last line, so we can break here.
      break;
    }
  }
  fclose(f);
  return flag;
}

#define LOONGARCH_CFG2 0x2
#define LOONGARCH_CFG2_LSX (1 << 6)
#define LOONGARCH_CFG2_LASX (1 << 7)

#if defined(__loongarch__)
LIBYUV_API SAFEBUFFERS int LoongarchCpuCaps(void) {
  int flag = 0;
  uint32_t cfg2 = 0;

  __asm__ volatile("cpucfg %0, %1 \n\t" : "+&r"(cfg2) : "r"(LOONGARCH_CFG2));

  if (cfg2 & LOONGARCH_CFG2_LSX)
    flag |= kCpuHasLSX;

  if (cfg2 & LOONGARCH_CFG2_LASX)
    flag |= kCpuHasLASX;
  return flag;
}
#endif

static SAFEBUFFERS int GetCpuFlags(void) {
  int cpu_info = 0;
#if !defined(__pnacl__) && !defined(__CLR_VER) &&                   \
    (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
     defined(_M_IX86))
  int cpu_info0[4] = {0, 0, 0, 0};
  int cpu_info1[4] = {0, 0, 0, 0};
  int cpu_info7[4] = {0, 0, 0, 0};
  int cpu_einfo7[4] = {0, 0, 0, 0};
  CpuId(0, 0, cpu_info0);
  CpuId(1, 0, cpu_info1);
  if (cpu_info0[0] >= 7) {
    CpuId(7, 0, cpu_info7);
    CpuId(7, 1, cpu_einfo7);
  }
  cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
             ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
             ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
             ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
             ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0);

  // AVX requires OS saves YMM registers.
  if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) &&  // AVX and OSXSave
      ((GetXCR0() & 6) == 6)) {  // Test OS saves YMM registers
    cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
                ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
                ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0) |
                ((cpu_einfo7[0] & 0x00000010) ? kCpuHasAVXVNNI : 0) |
                ((cpu_einfo7[3] & 0x00000010) ? kCpuHasAVXVNNIINT8 : 0);

    // Detect AVX512bw
    if ((GetXCR0() & 0xe0) == 0xe0) {
      cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0;
      cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0;
      cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0;
      cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
      cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0;
      cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
      cpu_info |= (cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0;
    }
  }
#endif
#if defined(__mips__) && defined(__linux__)
  cpu_info = MipsCpuCaps("/proc/cpuinfo");
  cpu_info |= kCpuHasMIPS;
#endif
#if defined(__loongarch__) && defined(__linux__)
  cpu_info = LoongarchCpuCaps();
  cpu_info |= kCpuHasLOONGARCH;
#endif
#if defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__
// __ARM_NEON__ generates code that requires Neon.  NaCL also requires Neon.
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
  cpu_info = kCpuHasNEON;
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
// flag in it.
// So for aarch64, neon enabling is hard coded here.
#endif
#if defined(__aarch64__)
  cpu_info = kCpuHasNEON;
#else
  // Linux arm parse text file for neon detect.
  cpu_info = ArmCpuCaps("/proc/cpuinfo");
#endif
  cpu_info |= kCpuHasARM;
#endif  // __arm__
#if defined(__riscv) && defined(__linux__)
  cpu_info = RiscvCpuCaps("/proc/cpuinfo");
  cpu_info |= kCpuHasRISCV;
#endif  // __riscv
  cpu_info |= kCpuInitialized;
  return cpu_info;
}

// Note that use of this function is not thread safe.
LIBYUV_API
int MaskCpuFlags(int enable_flags) {
  int cpu_info = GetCpuFlags() & enable_flags;
  SetCpuFlags(cpu_info);
  return cpu_info;
}

LIBYUV_API
int InitCpuFlags(void) {
  return MaskCpuFlags(-1);
}

#ifdef __cplusplus
}  // extern "C"
}  // namespace libyuv
#endif