1*19fcbaf1SConrad Meyer /* 2*19fcbaf1SConrad Meyer * Copyright (c) 2018-present, Facebook, Inc. 3*19fcbaf1SConrad Meyer * All rights reserved. 4*19fcbaf1SConrad Meyer * 5*19fcbaf1SConrad Meyer * This source code is licensed under both the BSD-style license (found in the 6*19fcbaf1SConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*19fcbaf1SConrad Meyer * in the COPYING file in the root directory of this source tree). 8*19fcbaf1SConrad Meyer * You may select, at your option, one of the above-listed licenses. 9*19fcbaf1SConrad Meyer */ 10*19fcbaf1SConrad Meyer 11*19fcbaf1SConrad Meyer #ifndef ZSTD_COMMON_CPU_H 12*19fcbaf1SConrad Meyer #define ZSTD_COMMON_CPU_H 13*19fcbaf1SConrad Meyer 14*19fcbaf1SConrad Meyer /** 15*19fcbaf1SConrad Meyer * Implementation taken from folly/CpuId.h 16*19fcbaf1SConrad Meyer * https://github.com/facebook/folly/blob/master/folly/CpuId.h 17*19fcbaf1SConrad Meyer */ 18*19fcbaf1SConrad Meyer 19*19fcbaf1SConrad Meyer #include <string.h> 20*19fcbaf1SConrad Meyer 21*19fcbaf1SConrad Meyer #include "mem.h" 22*19fcbaf1SConrad Meyer 23*19fcbaf1SConrad Meyer #ifdef _MSC_VER 24*19fcbaf1SConrad Meyer #include <intrin.h> 25*19fcbaf1SConrad Meyer #endif 26*19fcbaf1SConrad Meyer 27*19fcbaf1SConrad Meyer typedef struct { 28*19fcbaf1SConrad Meyer U32 f1c; 29*19fcbaf1SConrad Meyer U32 f1d; 30*19fcbaf1SConrad Meyer U32 f7b; 31*19fcbaf1SConrad Meyer U32 f7c; 32*19fcbaf1SConrad Meyer } ZSTD_cpuid_t; 33*19fcbaf1SConrad Meyer 34*19fcbaf1SConrad Meyer MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { 35*19fcbaf1SConrad Meyer U32 f1c = 0; 36*19fcbaf1SConrad Meyer U32 f1d = 0; 37*19fcbaf1SConrad Meyer U32 f7b = 0; 38*19fcbaf1SConrad Meyer U32 f7c = 0; 39*19fcbaf1SConrad Meyer #ifdef _MSC_VER 40*19fcbaf1SConrad Meyer int reg[4]; 41*19fcbaf1SConrad Meyer __cpuid((int*)reg, 0); 42*19fcbaf1SConrad Meyer { 43*19fcbaf1SConrad Meyer int const n = reg[0]; 44*19fcbaf1SConrad Meyer if (n >= 1) { 45*19fcbaf1SConrad Meyer __cpuid((int*)reg, 1); 46*19fcbaf1SConrad Meyer f1c = (U32)reg[2]; 47*19fcbaf1SConrad Meyer f1d = (U32)reg[3]; 48*19fcbaf1SConrad Meyer } 49*19fcbaf1SConrad Meyer if (n >= 7) { 50*19fcbaf1SConrad Meyer __cpuidex((int*)reg, 7, 0); 51*19fcbaf1SConrad Meyer f7b = (U32)reg[1]; 52*19fcbaf1SConrad Meyer f7c = (U32)reg[2]; 53*19fcbaf1SConrad Meyer } 54*19fcbaf1SConrad Meyer } 55*19fcbaf1SConrad Meyer #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) 56*19fcbaf1SConrad Meyer /* The following block like the normal cpuid branch below, but gcc 57*19fcbaf1SConrad Meyer * reserves ebx for use of its pic register so we must specially 58*19fcbaf1SConrad Meyer * handle the save and restore to avoid clobbering the register 59*19fcbaf1SConrad Meyer */ 60*19fcbaf1SConrad Meyer U32 n; 61*19fcbaf1SConrad Meyer __asm__( 62*19fcbaf1SConrad Meyer "pushl %%ebx\n\t" 63*19fcbaf1SConrad Meyer "cpuid\n\t" 64*19fcbaf1SConrad Meyer "popl %%ebx\n\t" 65*19fcbaf1SConrad Meyer : "=a"(n) 66*19fcbaf1SConrad Meyer : "a"(0) 67*19fcbaf1SConrad Meyer : "ecx", "edx"); 68*19fcbaf1SConrad Meyer if (n >= 1) { 69*19fcbaf1SConrad Meyer U32 f1a; 70*19fcbaf1SConrad Meyer __asm__( 71*19fcbaf1SConrad Meyer "pushl %%ebx\n\t" 72*19fcbaf1SConrad Meyer "cpuid\n\t" 73*19fcbaf1SConrad Meyer "popl %%ebx\n\t" 74*19fcbaf1SConrad Meyer : "=a"(f1a), "=c"(f1c), "=d"(f1d) 75*19fcbaf1SConrad Meyer : "a"(1) 76*19fcbaf1SConrad Meyer :); 77*19fcbaf1SConrad Meyer } 78*19fcbaf1SConrad Meyer if (n >= 7) { 79*19fcbaf1SConrad Meyer __asm__( 80*19fcbaf1SConrad Meyer "pushl %%ebx\n\t" 81*19fcbaf1SConrad Meyer "cpuid\n\t" 82*19fcbaf1SConrad Meyer "movl %%ebx, %%eax\n\r" 83*19fcbaf1SConrad Meyer "popl %%ebx" 84*19fcbaf1SConrad Meyer : "=a"(f7b), "=c"(f7c) 85*19fcbaf1SConrad Meyer : "a"(7), "c"(0) 86*19fcbaf1SConrad Meyer : "edx"); 87*19fcbaf1SConrad Meyer } 88*19fcbaf1SConrad Meyer #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) 89*19fcbaf1SConrad Meyer U32 n; 90*19fcbaf1SConrad Meyer __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); 91*19fcbaf1SConrad Meyer if (n >= 1) { 92*19fcbaf1SConrad Meyer U32 f1a; 93*19fcbaf1SConrad Meyer __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); 94*19fcbaf1SConrad Meyer } 95*19fcbaf1SConrad Meyer if (n >= 7) { 96*19fcbaf1SConrad Meyer U32 f7a; 97*19fcbaf1SConrad Meyer __asm__("cpuid" 98*19fcbaf1SConrad Meyer : "=a"(f7a), "=b"(f7b), "=c"(f7c) 99*19fcbaf1SConrad Meyer : "a"(7), "c"(0) 100*19fcbaf1SConrad Meyer : "edx"); 101*19fcbaf1SConrad Meyer } 102*19fcbaf1SConrad Meyer #endif 103*19fcbaf1SConrad Meyer { 104*19fcbaf1SConrad Meyer ZSTD_cpuid_t cpuid; 105*19fcbaf1SConrad Meyer cpuid.f1c = f1c; 106*19fcbaf1SConrad Meyer cpuid.f1d = f1d; 107*19fcbaf1SConrad Meyer cpuid.f7b = f7b; 108*19fcbaf1SConrad Meyer cpuid.f7c = f7c; 109*19fcbaf1SConrad Meyer return cpuid; 110*19fcbaf1SConrad Meyer } 111*19fcbaf1SConrad Meyer } 112*19fcbaf1SConrad Meyer 113*19fcbaf1SConrad Meyer #define X(name, r, bit) \ 114*19fcbaf1SConrad Meyer MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ 115*19fcbaf1SConrad Meyer return ((cpuid.r) & (1U << bit)) != 0; \ 116*19fcbaf1SConrad Meyer } 117*19fcbaf1SConrad Meyer 118*19fcbaf1SConrad Meyer /* cpuid(1): Processor Info and Feature Bits. */ 119*19fcbaf1SConrad Meyer #define C(name, bit) X(name, f1c, bit) 120*19fcbaf1SConrad Meyer C(sse3, 0) 121*19fcbaf1SConrad Meyer C(pclmuldq, 1) 122*19fcbaf1SConrad Meyer C(dtes64, 2) 123*19fcbaf1SConrad Meyer C(monitor, 3) 124*19fcbaf1SConrad Meyer C(dscpl, 4) 125*19fcbaf1SConrad Meyer C(vmx, 5) 126*19fcbaf1SConrad Meyer C(smx, 6) 127*19fcbaf1SConrad Meyer C(eist, 7) 128*19fcbaf1SConrad Meyer C(tm2, 8) 129*19fcbaf1SConrad Meyer C(ssse3, 9) 130*19fcbaf1SConrad Meyer C(cnxtid, 10) 131*19fcbaf1SConrad Meyer C(fma, 12) 132*19fcbaf1SConrad Meyer C(cx16, 13) 133*19fcbaf1SConrad Meyer C(xtpr, 14) 134*19fcbaf1SConrad Meyer C(pdcm, 15) 135*19fcbaf1SConrad Meyer C(pcid, 17) 136*19fcbaf1SConrad Meyer C(dca, 18) 137*19fcbaf1SConrad Meyer C(sse41, 19) 138*19fcbaf1SConrad Meyer C(sse42, 20) 139*19fcbaf1SConrad Meyer C(x2apic, 21) 140*19fcbaf1SConrad Meyer C(movbe, 22) 141*19fcbaf1SConrad Meyer C(popcnt, 23) 142*19fcbaf1SConrad Meyer C(tscdeadline, 24) 143*19fcbaf1SConrad Meyer C(aes, 25) 144*19fcbaf1SConrad Meyer C(xsave, 26) 145*19fcbaf1SConrad Meyer C(osxsave, 27) 146*19fcbaf1SConrad Meyer C(avx, 28) 147*19fcbaf1SConrad Meyer C(f16c, 29) 148*19fcbaf1SConrad Meyer C(rdrand, 30) 149*19fcbaf1SConrad Meyer #undef C 150*19fcbaf1SConrad Meyer #define D(name, bit) X(name, f1d, bit) 151*19fcbaf1SConrad Meyer D(fpu, 0) 152*19fcbaf1SConrad Meyer D(vme, 1) 153*19fcbaf1SConrad Meyer D(de, 2) 154*19fcbaf1SConrad Meyer D(pse, 3) 155*19fcbaf1SConrad Meyer D(tsc, 4) 156*19fcbaf1SConrad Meyer D(msr, 5) 157*19fcbaf1SConrad Meyer D(pae, 6) 158*19fcbaf1SConrad Meyer D(mce, 7) 159*19fcbaf1SConrad Meyer D(cx8, 8) 160*19fcbaf1SConrad Meyer D(apic, 9) 161*19fcbaf1SConrad Meyer D(sep, 11) 162*19fcbaf1SConrad Meyer D(mtrr, 12) 163*19fcbaf1SConrad Meyer D(pge, 13) 164*19fcbaf1SConrad Meyer D(mca, 14) 165*19fcbaf1SConrad Meyer D(cmov, 15) 166*19fcbaf1SConrad Meyer D(pat, 16) 167*19fcbaf1SConrad Meyer D(pse36, 17) 168*19fcbaf1SConrad Meyer D(psn, 18) 169*19fcbaf1SConrad Meyer D(clfsh, 19) 170*19fcbaf1SConrad Meyer D(ds, 21) 171*19fcbaf1SConrad Meyer D(acpi, 22) 172*19fcbaf1SConrad Meyer D(mmx, 23) 173*19fcbaf1SConrad Meyer D(fxsr, 24) 174*19fcbaf1SConrad Meyer D(sse, 25) 175*19fcbaf1SConrad Meyer D(sse2, 26) 176*19fcbaf1SConrad Meyer D(ss, 27) 177*19fcbaf1SConrad Meyer D(htt, 28) 178*19fcbaf1SConrad Meyer D(tm, 29) 179*19fcbaf1SConrad Meyer D(pbe, 31) 180*19fcbaf1SConrad Meyer #undef D 181*19fcbaf1SConrad Meyer 182*19fcbaf1SConrad Meyer /* cpuid(7): Extended Features. */ 183*19fcbaf1SConrad Meyer #define B(name, bit) X(name, f7b, bit) 184*19fcbaf1SConrad Meyer B(bmi1, 3) 185*19fcbaf1SConrad Meyer B(hle, 4) 186*19fcbaf1SConrad Meyer B(avx2, 5) 187*19fcbaf1SConrad Meyer B(smep, 7) 188*19fcbaf1SConrad Meyer B(bmi2, 8) 189*19fcbaf1SConrad Meyer B(erms, 9) 190*19fcbaf1SConrad Meyer B(invpcid, 10) 191*19fcbaf1SConrad Meyer B(rtm, 11) 192*19fcbaf1SConrad Meyer B(mpx, 14) 193*19fcbaf1SConrad Meyer B(avx512f, 16) 194*19fcbaf1SConrad Meyer B(avx512dq, 17) 195*19fcbaf1SConrad Meyer B(rdseed, 18) 196*19fcbaf1SConrad Meyer B(adx, 19) 197*19fcbaf1SConrad Meyer B(smap, 20) 198*19fcbaf1SConrad Meyer B(avx512ifma, 21) 199*19fcbaf1SConrad Meyer B(pcommit, 22) 200*19fcbaf1SConrad Meyer B(clflushopt, 23) 201*19fcbaf1SConrad Meyer B(clwb, 24) 202*19fcbaf1SConrad Meyer B(avx512pf, 26) 203*19fcbaf1SConrad Meyer B(avx512er, 27) 204*19fcbaf1SConrad Meyer B(avx512cd, 28) 205*19fcbaf1SConrad Meyer B(sha, 29) 206*19fcbaf1SConrad Meyer B(avx512bw, 30) 207*19fcbaf1SConrad Meyer B(avx512vl, 31) 208*19fcbaf1SConrad Meyer #undef B 209*19fcbaf1SConrad Meyer #define C(name, bit) X(name, f7c, bit) 210*19fcbaf1SConrad Meyer C(prefetchwt1, 0) 211*19fcbaf1SConrad Meyer C(avx512vbmi, 1) 212*19fcbaf1SConrad Meyer #undef C 213*19fcbaf1SConrad Meyer 214*19fcbaf1SConrad Meyer #undef X 215*19fcbaf1SConrad Meyer 216*19fcbaf1SConrad Meyer #endif /* ZSTD_COMMON_CPU_H */ 217