1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only 2 /* 3 * Copyright (c) Meta Platforms, Inc. and affiliates. 4 * All rights reserved. 5 * 6 * This source code is licensed under both the BSD-style license (found in the 7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 8 * in the COPYING file in the root directory of this source tree). 9 * You may select, at your option, one of the above-listed licenses. 10 */ 11 12 #ifndef ZSTD_COMMON_CPU_H 13 #define ZSTD_COMMON_CPU_H 14 15 /** 16 * Implementation taken from folly/CpuId.h 17 * https://github.com/facebook/folly/blob/master/folly/CpuId.h 18 */ 19 20 #include "mem.h" 21 22 #ifdef _MSC_VER 23 #include <intrin.h> 24 #endif 25 26 typedef struct { 27 U32 f1c; 28 U32 f1d; 29 U32 f7b; 30 U32 f7c; 31 } ZSTD_cpuid_t; 32 33 MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { 34 U32 f1c = 0; 35 U32 f1d = 0; 36 U32 f7b = 0; 37 U32 f7c = 0; 38 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) 39 #if !defined(_M_X64) || !defined(__clang__) || __clang_major__ >= 16 40 int reg[4]; 41 __cpuid((int*)reg, 0); 42 { 43 int const n = reg[0]; 44 if (n >= 1) { 45 __cpuid((int*)reg, 1); 46 f1c = (U32)reg[2]; 47 f1d = (U32)reg[3]; 48 } 49 if (n >= 7) { 50 __cpuidex((int*)reg, 7, 0); 51 f7b = (U32)reg[1]; 52 f7c = (U32)reg[2]; 53 } 54 } 55 #else 56 /* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in 57 * which the `__cpuid` intrinsic does not save and restore `rbx` as it needs 58 * to due to being a reserved register. So in that case, do the `cpuid` 59 * ourselves. Clang supports inline assembly anyway. 60 */ 61 U32 n; 62 __asm__( 63 "pushq %%rbx\n\t" 64 "cpuid\n\t" 65 "popq %%rbx\n\t" 66 : "=a"(n) 67 : "a"(0) 68 : "rcx", "rdx"); 69 if (n >= 1) { 70 U32 f1a; 71 __asm__( 72 "pushq %%rbx\n\t" 73 "cpuid\n\t" 74 "popq %%rbx\n\t" 75 : "=a"(f1a), "=c"(f1c), "=d"(f1d) 76 : "a"(1) 77 :); 78 } 79 if (n >= 7) { 80 __asm__( 81 "pushq %%rbx\n\t" 82 "cpuid\n\t" 83 "movq %%rbx, %%rax\n\t" 84 "popq %%rbx" 85 : "=a"(f7b), "=c"(f7c) 86 : "a"(7), "c"(0) 87 : "rdx"); 88 } 89 #endif 90 #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) 91 /* The following block like the normal cpuid branch below, but gcc 92 * reserves ebx for use of its pic register so we must specially 93 * handle the save and restore to avoid clobbering the register 94 */ 95 U32 n; 96 __asm__( 97 "pushl %%ebx\n\t" 98 "cpuid\n\t" 99 "popl %%ebx\n\t" 100 : "=a"(n) 101 : "a"(0) 102 : "ecx", "edx"); 103 if (n >= 1) { 104 U32 f1a; 105 __asm__( 106 "pushl %%ebx\n\t" 107 "cpuid\n\t" 108 "popl %%ebx\n\t" 109 : "=a"(f1a), "=c"(f1c), "=d"(f1d) 110 : "a"(1)); 111 } 112 if (n >= 7) { 113 __asm__( 114 "pushl %%ebx\n\t" 115 "cpuid\n\t" 116 "movl %%ebx, %%eax\n\t" 117 "popl %%ebx" 118 : "=a"(f7b), "=c"(f7c) 119 : "a"(7), "c"(0) 120 : "edx"); 121 } 122 #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) 123 U32 n; 124 __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); 125 if (n >= 1) { 126 U32 f1a; 127 __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); 128 } 129 if (n >= 7) { 130 U32 f7a; 131 __asm__("cpuid" 132 : "=a"(f7a), "=b"(f7b), "=c"(f7c) 133 : "a"(7), "c"(0) 134 : "edx"); 135 } 136 #endif 137 { 138 ZSTD_cpuid_t cpuid; 139 cpuid.f1c = f1c; 140 cpuid.f1d = f1d; 141 cpuid.f7b = f7b; 142 cpuid.f7c = f7c; 143 return cpuid; 144 } 145 } 146 147 #define X(name, r, bit) \ 148 MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ 149 return ((cpuid.r) & (1U << bit)) != 0; \ 150 } 151 152 /* cpuid(1): Processor Info and Feature Bits. */ 153 #define C(name, bit) X(name, f1c, bit) 154 C(sse3, 0) 155 C(pclmuldq, 1) 156 C(dtes64, 2) 157 C(monitor, 3) 158 C(dscpl, 4) 159 C(vmx, 5) 160 C(smx, 6) 161 C(eist, 7) 162 C(tm2, 8) 163 C(ssse3, 9) 164 C(cnxtid, 10) 165 C(fma, 12) 166 C(cx16, 13) 167 C(xtpr, 14) 168 C(pdcm, 15) 169 C(pcid, 17) 170 C(dca, 18) 171 C(sse41, 19) 172 C(sse42, 20) 173 C(x2apic, 21) 174 C(movbe, 22) 175 C(popcnt, 23) 176 C(tscdeadline, 24) 177 C(aes, 25) 178 C(xsave, 26) 179 C(osxsave, 27) 180 C(avx, 28) 181 C(f16c, 29) 182 C(rdrand, 30) 183 #undef C 184 #define D(name, bit) X(name, f1d, bit) 185 D(fpu, 0) 186 D(vme, 1) 187 D(de, 2) 188 D(pse, 3) 189 D(tsc, 4) 190 D(msr, 5) 191 D(pae, 6) 192 D(mce, 7) 193 D(cx8, 8) 194 D(apic, 9) 195 D(sep, 11) 196 D(mtrr, 12) 197 D(pge, 13) 198 D(mca, 14) 199 D(cmov, 15) 200 D(pat, 16) 201 D(pse36, 17) 202 D(psn, 18) 203 D(clfsh, 19) 204 D(ds, 21) 205 D(acpi, 22) 206 D(mmx, 23) 207 D(fxsr, 24) 208 D(sse, 25) 209 D(sse2, 26) 210 D(ss, 27) 211 D(htt, 28) 212 D(tm, 29) 213 D(pbe, 31) 214 #undef D 215 216 /* cpuid(7): Extended Features. */ 217 #define B(name, bit) X(name, f7b, bit) 218 B(bmi1, 3) 219 B(hle, 4) 220 B(avx2, 5) 221 B(smep, 7) 222 B(bmi2, 8) 223 B(erms, 9) 224 B(invpcid, 10) 225 B(rtm, 11) 226 B(mpx, 14) 227 B(avx512f, 16) 228 B(avx512dq, 17) 229 B(rdseed, 18) 230 B(adx, 19) 231 B(smap, 20) 232 B(avx512ifma, 21) 233 B(pcommit, 22) 234 B(clflushopt, 23) 235 B(clwb, 24) 236 B(avx512pf, 26) 237 B(avx512er, 27) 238 B(avx512cd, 28) 239 B(sha, 29) 240 B(avx512bw, 30) 241 B(avx512vl, 31) 242 #undef B 243 #define C(name, bit) X(name, f7c, bit) 244 C(prefetchwt1, 0) 245 C(avx512vbmi, 1) 246 #undef C 247 248 #undef X 249 250 #endif /* ZSTD_COMMON_CPU_H */ 251