1 /*- 2 * Copyright (c) 2023 The FreeBSD Foundation 3 * 4 * This software was developed by Robert Clausecker <fuz@FreeBSD.org> 5 * under sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE 27 */ 28 29 #include <sys/types.h> 30 31 #include <machine/atomic.h> 32 #include <machine/cpufunc.h> 33 #include <machine/specialreg.h> 34 35 #include <stddef.h> 36 #include <string.h> 37 38 #include "amd64_archlevel.h" 39 #include "libc_private.h" 40 41 #define ARCHLEVEL_ENV "ARCHLEVEL" 42 43 static volatile int amd64_archlevel = X86_64_UNDEFINED; 44 45 static const struct archlevel { 46 char name[10]; 47 /* CPUID feature bits that need to be present */ 48 u_int feat_edx, feat_ecx, amd_ecx, ext_ebx; 49 } levels[] = { 50 { 51 .name = "scalar", 52 .feat_edx = 0, 53 .feat_ecx = 0, 54 .amd_ecx = 0, 55 .ext_ebx = 0, 56 }, { 57 #define FEAT_EDX_BASELINE (CPUID_FPU | CPUID_CX8 | CPUID_CMOV | CPUID_MMX | \ 58 CPUID_FXSR | CPUID_SSE | CPUID_SSE2) 59 .name = "baseline", 60 .feat_edx = FEAT_EDX_BASELINE, 61 .feat_ecx = 0, 62 .amd_ecx = 0, 63 .ext_ebx = 0, 64 }, { 65 #define FEAT_ECX_V2 (CPUID2_SSE3 | CPUID2_SSSE3 | CPUID2_CX16 | CPUID2_SSE41 | \ 66 CPUID2_SSE42 | CPUID2_POPCNT) 67 #define AMD_ECX_V2 AMDID2_LAHF 68 .name = "x86-64-v2", 69 .feat_edx = FEAT_EDX_BASELINE, 70 .feat_ecx = FEAT_ECX_V2, 71 .amd_ecx = AMD_ECX_V2, 72 .ext_ebx = 0, 73 }, { 74 #define FEAT_ECX_V3 (FEAT_ECX_V2 | CPUID2_FMA | CPUID2_MOVBE | \ 75 CPUID2_OSXSAVE | CPUID2_AVX | CPUID2_F16C) 76 #define AMD_ECX_V3 (AMD_ECX_V2 | AMDID2_ABM) 77 #define EXT_EBX_V3 (CPUID_STDEXT_BMI1 | CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2) 78 .name = "x86-64-v3", 79 .feat_edx = FEAT_EDX_BASELINE, 80 .feat_ecx = FEAT_ECX_V3, 81 .amd_ecx = AMD_ECX_V3, 82 .ext_ebx = EXT_EBX_V3, 83 }, { 84 #define EXT_EBX_V4 (EXT_EBX_V3 | CPUID_STDEXT_AVX512F | \ 85 CPUID_STDEXT_AVX512DQ | CPUID_STDEXT_AVX512CD | \ 86 CPUID_STDEXT_AVX512BW | CPUID_STDEXT_AVX512VL) 87 .name = "x86-64-v4", 88 .feat_edx = FEAT_EDX_BASELINE, 89 .feat_ecx = FEAT_ECX_V3, 90 .amd_ecx = AMD_ECX_V3, 91 .ext_ebx = EXT_EBX_V4, 92 } 93 }; 94 95 static int 96 supported_archlevel(u_int feat_edx, u_int feat_ecx, u_int ext_ebx, u_int ext_ecx) 97 { 98 int level; 99 u_int p[4], max_leaf; 100 u_int amd_ecx = 0; 101 102 (void)ext_ecx; 103 104 do_cpuid(0x80000000, p); 105 max_leaf = p[0]; 106 107 if (max_leaf >= 0x80000001) { 108 do_cpuid(0x80000001, p); 109 amd_ecx = p[2]; 110 } 111 112 for (level = X86_64_BASELINE; level <= X86_64_MAX; level++) { 113 const struct archlevel *lvl = &levels[level]; 114 115 if ((lvl->feat_edx & feat_edx) != lvl->feat_edx || 116 (lvl->feat_ecx & feat_ecx) != lvl->feat_ecx || 117 (lvl->amd_ecx & amd_ecx) != lvl->amd_ecx || 118 (lvl->ext_ebx & ext_ebx) != lvl->ext_ebx) 119 return (level - 1); 120 } 121 122 return (X86_64_MAX); 123 } 124 125 static int 126 match_archlevel(const char *str, int *force) 127 { 128 int level, want_force = 0; 129 130 *force = 0; 131 132 if (str[0] == '!') { 133 str++; 134 want_force = 1; 135 } 136 137 for (level = 0; level <= X86_64_MAX; level++) { 138 size_t i; 139 const char *candidate = levels[level].name; 140 141 /* can't use strcmp here: would recurse during ifunc resolution */ 142 for (i = 0; str[i] == candidate[i]; i++) 143 /* suffixes starting with : or + are ignored for future extensions */ 144 if (str[i] == '\0' || str[i] == ':' || str[i] == '+') { 145 if (want_force) 146 *force = 1; 147 148 return (level); 149 } 150 } 151 152 return (X86_64_UNDEFINED); 153 } 154 155 /* 156 * We can't use getenv(), strcmp(), and a bunch of other functions here as 157 * they may in turn call SIMD-optimised string functions. 158 * 159 * *force is set to 1 if the architecture level is valid and begins with a ! 160 * and to 0 otherwise. 161 */ 162 static int 163 env_archlevel(int *force) 164 { 165 size_t i; 166 167 if (environ == NULL) 168 return (X86_64_UNDEFINED); 169 170 for (i = 0; environ[i] != NULL; i++) { 171 size_t j; 172 173 for (j = 0; environ[i][j] == ARCHLEVEL_ENV "="[j]; j++) 174 if (environ[i][j] == '=') 175 return (match_archlevel(&environ[i][j + 1], force)); 176 } 177 178 *force = 0; 179 180 return (X86_64_UNDEFINED); 181 182 } 183 184 /* 185 * Determine the architecture level by checking the CPU capabilities 186 * and the environment: 187 * 188 * 1. If environment variable ARCHLEVEL starts with a ! and is followed 189 * by a valid architecture level, that level is returned. 190 * 2. Else if ARCHLEVEL is set to a valid architecture level that is 191 * supported by the CPU, that level is returned. 192 * 3. Else the highest architecture level supported by the CPU is 193 * returned. 194 * 195 * Valid architecture levels are those defined in the levels array. 196 * The architecture level "scalar" indicates that SIMD enhancements 197 * shall not be used. 198 */ 199 static int 200 archlevel(u_int feat_edx, u_int feat_ecx, u_int ext_ebx, u_int ext_ecx) 201 { 202 int islevel, wantlevel, hwlevel, force; 203 204 islevel = atomic_load_int(&amd64_archlevel); 205 if (islevel != X86_64_UNDEFINED) 206 return (islevel); 207 208 wantlevel = env_archlevel(&force); 209 if (!force) { 210 hwlevel = supported_archlevel(feat_edx, feat_ecx, ext_ebx, ext_ecx); 211 if (wantlevel == X86_64_UNDEFINED || wantlevel > hwlevel) 212 wantlevel = hwlevel; 213 } 214 215 /* 216 * Ensure amd64_archlevel is set only once and 217 * all calls agree on what it was set to. 218 */ 219 if (atomic_cmpset_int(&amd64_archlevel, islevel, wantlevel)) 220 return (wantlevel); 221 else 222 return (atomic_load_int(&amd64_archlevel)); 223 } 224 225 /* 226 * Helper function for SIMD ifunc dispatch: select the highest level 227 * implementation up to the current architecture level. 228 */ 229 dlfunc_t 230 __archlevel_resolve(u_int feat_edx, u_int feat_ecx, u_int ext_ebx, 231 u_int ext_ecx, int32_t funcs[static X86_64_MAX + 1]) 232 { 233 int level; 234 235 for (level = archlevel(feat_edx, feat_ecx, ext_ebx, ext_ecx); level >= 0; level--) 236 if (funcs[level] != 0) 237 return (dlfunc_t)((uintptr_t)funcs + (ptrdiff_t)funcs[level]); 238 239 /* no function is present -- what now? */ 240 __builtin_trap(); 241 } 242