1 /* 2 * Copyright (c) Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #ifndef ZSTD_COMMON_CPU_H 12 #define ZSTD_COMMON_CPU_H 13 14 /** 15 * Implementation taken from folly/CpuId.h 16 * https://github.com/facebook/folly/blob/master/folly/CpuId.h 17 */ 18 19 #include "mem.h" 20 21 #ifdef _MSC_VER 22 #include <intrin.h> 23 #endif 24 25 typedef struct { 26 U32 f1c; 27 U32 f1d; 28 U32 f7b; 29 U32 f7c; 30 } ZSTD_cpuid_t; 31 32 MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { 33 U32 f1c = 0; 34 U32 f1d = 0; 35 U32 f7b = 0; 36 U32 f7c = 0; 37 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) 38 int reg[4]; 39 __cpuid((int*)reg, 0); 40 { 41 int const n = reg[0]; 42 if (n >= 1) { 43 __cpuid((int*)reg, 1); 44 f1c = (U32)reg[2]; 45 f1d = (U32)reg[3]; 46 } 47 if (n >= 7) { 48 __cpuidex((int*)reg, 7, 0); 49 f7b = (U32)reg[1]; 50 f7c = (U32)reg[2]; 51 } 52 } 53 #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) 54 /* The following block like the normal cpuid branch below, but gcc 55 * reserves ebx for use of its pic register so we must specially 56 * handle the save and restore to avoid clobbering the register 57 */ 58 U32 n; 59 __asm__( 60 "pushl %%ebx\n\t" 61 "cpuid\n\t" 62 "popl %%ebx\n\t" 63 : "=a"(n) 64 : "a"(0) 65 : "ecx", "edx"); 66 if (n >= 1) { 67 U32 f1a; 68 __asm__( 69 "pushl %%ebx\n\t" 70 "cpuid\n\t" 71 "popl %%ebx\n\t" 72 : "=a"(f1a), "=c"(f1c), "=d"(f1d) 73 : "a"(1)); 74 } 75 if (n >= 7) { 76 __asm__( 77 "pushl %%ebx\n\t" 78 "cpuid\n\t" 79 "movl %%ebx, %%eax\n\t" 80 "popl %%ebx" 81 : "=a"(f7b), "=c"(f7c) 82 : "a"(7), "c"(0) 83 : "edx"); 84 } 85 #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) 86 U32 n; 87 __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); 88 if (n >= 1) { 89 U32 f1a; 90 __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); 91 } 92 if (n >= 7) { 93 U32 f7a; 94 __asm__("cpuid" 95 : "=a"(f7a), "=b"(f7b), "=c"(f7c) 96 : "a"(7), "c"(0) 97 : "edx"); 98 } 99 #endif 100 { 101 ZSTD_cpuid_t cpuid; 102 cpuid.f1c = f1c; 103 cpuid.f1d = f1d; 104 cpuid.f7b = f7b; 105 cpuid.f7c = f7c; 106 return cpuid; 107 } 108 } 109 110 #define X(name, r, bit) \ 111 MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ 112 return ((cpuid.r) & (1U << bit)) != 0; \ 113 } 114 115 /* cpuid(1): Processor Info and Feature Bits. */ 116 #define C(name, bit) X(name, f1c, bit) 117 C(sse3, 0) 118 C(pclmuldq, 1) 119 C(dtes64, 2) 120 C(monitor, 3) 121 C(dscpl, 4) 122 C(vmx, 5) 123 C(smx, 6) 124 C(eist, 7) 125 C(tm2, 8) 126 C(ssse3, 9) 127 C(cnxtid, 10) 128 C(fma, 12) 129 C(cx16, 13) 130 C(xtpr, 14) 131 C(pdcm, 15) 132 C(pcid, 17) 133 C(dca, 18) 134 C(sse41, 19) 135 C(sse42, 20) 136 C(x2apic, 21) 137 C(movbe, 22) 138 C(popcnt, 23) 139 C(tscdeadline, 24) 140 C(aes, 25) 141 C(xsave, 26) 142 C(osxsave, 27) 143 C(avx, 28) 144 C(f16c, 29) 145 C(rdrand, 30) 146 #undef C 147 #define D(name, bit) X(name, f1d, bit) 148 D(fpu, 0) 149 D(vme, 1) 150 D(de, 2) 151 D(pse, 3) 152 D(tsc, 4) 153 D(msr, 5) 154 D(pae, 6) 155 D(mce, 7) 156 D(cx8, 8) 157 D(apic, 9) 158 D(sep, 11) 159 D(mtrr, 12) 160 D(pge, 13) 161 D(mca, 14) 162 D(cmov, 15) 163 D(pat, 16) 164 D(pse36, 17) 165 D(psn, 18) 166 D(clfsh, 19) 167 D(ds, 21) 168 D(acpi, 22) 169 D(mmx, 23) 170 D(fxsr, 24) 171 D(sse, 25) 172 D(sse2, 26) 173 D(ss, 27) 174 D(htt, 28) 175 D(tm, 29) 176 D(pbe, 31) 177 #undef D 178 179 /* cpuid(7): Extended Features. */ 180 #define B(name, bit) X(name, f7b, bit) 181 B(bmi1, 3) 182 B(hle, 4) 183 B(avx2, 5) 184 B(smep, 7) 185 B(bmi2, 8) 186 B(erms, 9) 187 B(invpcid, 10) 188 B(rtm, 11) 189 B(mpx, 14) 190 B(avx512f, 16) 191 B(avx512dq, 17) 192 B(rdseed, 18) 193 B(adx, 19) 194 B(smap, 20) 195 B(avx512ifma, 21) 196 B(pcommit, 22) 197 B(clflushopt, 23) 198 B(clwb, 24) 199 B(avx512pf, 26) 200 B(avx512er, 27) 201 B(avx512cd, 28) 202 B(sha, 29) 203 B(avx512bw, 30) 204 B(avx512vl, 31) 205 #undef B 206 #define C(name, bit) X(name, f7c, bit) 207 C(prefetchwt1, 0) 208 C(avx512vbmi, 1) 209 #undef C 210 211 #undef X 212 213 #endif /* ZSTD_COMMON_CPU_H */ 214