1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Various routines to handle identification 30 * and classification of x86 processors. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/archsystm.h> 35 #include <sys/x86_archext.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 #include <sys/cmn_err.h> 39 #include <sys/sunddi.h> 40 #include <sys/sunndi.h> 41 #include <sys/cpuvar.h> 42 #include <sys/processor.h> 43 #include <sys/sysmacros.h> 44 #include <sys/pg.h> 45 #include <sys/fp.h> 46 #include <sys/controlregs.h> 47 #include <sys/auxv_386.h> 48 #include <sys/bitmap.h> 49 #include <sys/memnode.h> 50 51 /* 52 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 53 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 54 * them accordingly. For most modern processors, feature detection occurs here 55 * in pass 1. 56 * 57 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 58 * for the boot CPU and does the basic analysis that the early kernel needs. 59 * x86_feature is set based on the return value of cpuid_pass1() of the boot 60 * CPU. 61 * 62 * Pass 1 includes: 63 * 64 * o Determining vendor/model/family/stepping and setting x86_type and 65 * x86_vendor accordingly. 66 * o Processing the feature flags returned by the cpuid instruction while 67 * applying any workarounds or tricks for the specific processor. 68 * o Mapping the feature flags into Solaris feature bits (X86_*). 69 * o Processing extended feature flags if supported by the processor, 70 * again while applying specific processor knowledge. 71 * o Determining the CMT characteristics of the system. 72 * 73 * Pass 1 is done on non-boot CPUs during their initialization and the results 74 * are used only as a meager attempt at ensuring that all processors within the 75 * system support the same features. 76 * 77 * Pass 2 of cpuid feature analysis happens just at the beginning 78 * of startup(). It just copies in and corrects the remainder 79 * of the cpuid data we depend on: standard cpuid functions that we didn't 80 * need for pass1 feature analysis, and extended cpuid functions beyond the 81 * simple feature processing done in pass1. 82 * 83 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 84 * particular kernel memory allocation has been made available. It creates a 85 * readable brand string based on the data collected in the first two passes. 86 * 87 * Pass 4 of cpuid analysis is invoked after post_startup() when all 88 * the support infrastructure for various hardware features has been 89 * initialized. It determines which processor features will be reported 90 * to userland via the aux vector. 91 * 92 * All passes are executed on all CPUs, but only the boot CPU determines what 93 * features the kernel will use. 94 * 95 * Much of the worst junk in this file is for the support of processors 96 * that didn't really implement the cpuid instruction properly. 97 * 98 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 99 * the pass numbers. Accordingly, changes to the pass code may require changes 100 * to the accessor code. 101 */ 102 103 uint_t x86_feature = 0; 104 uint_t x86_vendor = X86_VENDOR_IntelClone; 105 uint_t x86_type = X86_TYPE_OTHER; 106 107 uint_t pentiumpro_bug4046376; 108 uint_t pentiumpro_bug4064495; 109 110 uint_t enable486; 111 112 /* 113 * This set of strings are for processors rumored to support the cpuid 114 * instruction, and is used by locore.s to figure out how to set x86_vendor 115 */ 116 const char CyrixInstead[] = "CyrixInstead"; 117 118 /* 119 * monitor/mwait info. 120 * 121 * size_actual and buf_actual are the real address and size allocated to get 122 * proper mwait_buf alignement. buf_actual and size_actual should be passed 123 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 124 * processor cache-line alignment, but this is not guarantied in the furture. 125 */ 126 struct mwait_info { 127 size_t mon_min; /* min size to avoid missed wakeups */ 128 size_t mon_max; /* size to avoid false wakeups */ 129 size_t size_actual; /* size actually allocated */ 130 void *buf_actual; /* memory actually allocated */ 131 uint32_t support; /* processor support of monitor/mwait */ 132 }; 133 134 /* 135 * These constants determine how many of the elements of the 136 * cpuid we cache in the cpuid_info data structure; the 137 * remaining elements are accessible via the cpuid instruction. 138 */ 139 140 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 141 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 142 143 struct cpuid_info { 144 uint_t cpi_pass; /* last pass completed */ 145 /* 146 * standard function information 147 */ 148 uint_t cpi_maxeax; /* fn 0: %eax */ 149 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 150 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 151 152 uint_t cpi_family; /* fn 1: extended family */ 153 uint_t cpi_model; /* fn 1: extended model */ 154 uint_t cpi_step; /* fn 1: stepping */ 155 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 156 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 157 int cpi_clogid; /* fn 1: %ebx: thread # */ 158 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 159 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 160 uint_t cpi_ncache; /* fn 2: number of elements */ 161 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 162 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 163 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 164 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 165 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 166 /* 167 * extended function information 168 */ 169 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 170 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 171 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 172 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 173 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 174 id_t cpi_coreid; 175 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 176 /* Intel: fn 4: %eax[31-26] */ 177 /* 178 * supported feature information 179 */ 180 uint32_t cpi_support[5]; 181 #define STD_EDX_FEATURES 0 182 #define AMD_EDX_FEATURES 1 183 #define TM_EDX_FEATURES 2 184 #define STD_ECX_FEATURES 3 185 #define AMD_ECX_FEATURES 4 186 /* 187 * Synthesized information, where known. 188 */ 189 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 190 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 191 uint32_t cpi_socket; /* Chip package/socket type */ 192 193 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 194 }; 195 196 197 static struct cpuid_info cpuid_info0; 198 199 /* 200 * These bit fields are defined by the Intel Application Note AP-485 201 * "Intel Processor Identification and the CPUID Instruction" 202 */ 203 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 204 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 205 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 206 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 207 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 208 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 209 210 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 211 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 212 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 213 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 214 215 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 216 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 217 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 218 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 219 220 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 221 #define CPI_XMAXEAX_MAX 0x80000100 222 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 223 224 /* 225 * Function 4 (Deterministic Cache Parameters) macros 226 * Defined by Intel Application Note AP-485 227 */ 228 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 229 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 230 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 231 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 232 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 233 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 234 235 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 236 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 237 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 238 239 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 240 241 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 242 243 244 /* 245 * A couple of shorthand macros to identify "later" P6-family chips 246 * like the Pentium M and Core. First, the "older" P6-based stuff 247 * (loosely defined as "pre-Pentium-4"): 248 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 249 */ 250 251 #define IS_LEGACY_P6(cpi) ( \ 252 cpi->cpi_family == 6 && \ 253 (cpi->cpi_model == 1 || \ 254 cpi->cpi_model == 3 || \ 255 cpi->cpi_model == 5 || \ 256 cpi->cpi_model == 6 || \ 257 cpi->cpi_model == 7 || \ 258 cpi->cpi_model == 8 || \ 259 cpi->cpi_model == 0xA || \ 260 cpi->cpi_model == 0xB) \ 261 ) 262 263 /* A "new F6" is everything with family 6 that's not the above */ 264 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 265 266 /* Extended family/model support */ 267 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 268 cpi->cpi_family >= 0xf) 269 270 /* 271 * AMD family 0xf socket types. 272 * First index is 0 for revs B thru E, 1 for F and G. 273 * Second index by (model & 0x3) 274 */ 275 static uint32_t amd_skts[2][4] = { 276 { 277 X86_SOCKET_754, /* 0b00 */ 278 X86_SOCKET_940, /* 0b01 */ 279 X86_SOCKET_754, /* 0b10 */ 280 X86_SOCKET_939 /* 0b11 */ 281 }, 282 { 283 X86_SOCKET_S1g1, /* 0b00 */ 284 X86_SOCKET_F1207, /* 0b01 */ 285 X86_SOCKET_UNKNOWN, /* 0b10 */ 286 X86_SOCKET_AM2 /* 0b11 */ 287 } 288 }; 289 290 /* 291 * Table for mapping AMD Family 0xf model/stepping combination to 292 * chip "revision" and socket type. Only rm_family 0xf is used at the 293 * moment, but AMD family 0x10 will extend the exsiting revision names 294 * so will likely also use this table. 295 * 296 * The first member of this array that matches a given family, extended model 297 * plus model range, and stepping range will be considered a match. 298 */ 299 static const struct amd_rev_mapent { 300 uint_t rm_family; 301 uint_t rm_modello; 302 uint_t rm_modelhi; 303 uint_t rm_steplo; 304 uint_t rm_stephi; 305 uint32_t rm_chiprev; 306 const char *rm_chiprevstr; 307 int rm_sktidx; 308 } amd_revmap[] = { 309 /* 310 * Rev B includes model 0x4 stepping 0 and model 0x5 stepping 0 and 1. 311 */ 312 { 0xf, 0x04, 0x04, 0x0, 0x0, X86_CHIPREV_AMD_F_REV_B, "B", 0 }, 313 { 0xf, 0x05, 0x05, 0x0, 0x1, X86_CHIPREV_AMD_F_REV_B, "B", 0 }, 314 /* 315 * Rev C0 includes model 0x4 stepping 8 and model 0x5 stepping 8 316 */ 317 { 0xf, 0x04, 0x05, 0x8, 0x8, X86_CHIPREV_AMD_F_REV_C0, "C0", 0 }, 318 /* 319 * Rev CG is the rest of extended model 0x0 - i.e., everything 320 * but the rev B and C0 combinations covered above. 321 */ 322 { 0xf, 0x00, 0x0f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_CG, "CG", 0 }, 323 /* 324 * Rev D has extended model 0x1. 325 */ 326 { 0xf, 0x10, 0x1f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_D, "D", 0 }, 327 /* 328 * Rev E has extended model 0x2. 329 * Extended model 0x3 is unused but available to grow into. 330 */ 331 { 0xf, 0x20, 0x3f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_E, "E", 0 }, 332 /* 333 * Rev F has extended models 0x4 and 0x5. 334 */ 335 { 0xf, 0x40, 0x5f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_F, "F", 1 }, 336 /* 337 * Rev G has extended model 0x6. 338 */ 339 { 0xf, 0x60, 0x6f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_G, "G", 1 }, 340 }; 341 342 /* 343 * Info for monitor/mwait idle loop. 344 * 345 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 346 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 347 * 2006. 348 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 349 * Documentation Updates" #33633, Rev 2.05, December 2006. 350 */ 351 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 352 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 353 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 354 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 355 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 356 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 357 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 358 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 359 /* 360 * Number of sub-cstates for a given c-state. 361 */ 362 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 363 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 364 365 static void intel_cpuid_4_cache_info(void *, struct cpuid_info *); 366 367 static void 368 synth_amd_info(struct cpuid_info *cpi) 369 { 370 const struct amd_rev_mapent *rmp; 371 uint_t family, model, step; 372 int i; 373 374 /* 375 * Currently only AMD family 0xf uses these fields. 376 */ 377 if (cpi->cpi_family != 0xf) 378 return; 379 380 family = cpi->cpi_family; 381 model = cpi->cpi_model; 382 step = cpi->cpi_step; 383 384 for (i = 0, rmp = amd_revmap; i < sizeof (amd_revmap) / sizeof (*rmp); 385 i++, rmp++) { 386 if (family == rmp->rm_family && 387 model >= rmp->rm_modello && model <= rmp->rm_modelhi && 388 step >= rmp->rm_steplo && step <= rmp->rm_stephi) { 389 cpi->cpi_chiprev = rmp->rm_chiprev; 390 cpi->cpi_chiprevstr = rmp->rm_chiprevstr; 391 cpi->cpi_socket = amd_skts[rmp->rm_sktidx][model & 0x3]; 392 return; 393 } 394 } 395 } 396 397 static void 398 synth_info(struct cpuid_info *cpi) 399 { 400 cpi->cpi_chiprev = X86_CHIPREV_UNKNOWN; 401 cpi->cpi_chiprevstr = "Unknown"; 402 cpi->cpi_socket = X86_SOCKET_UNKNOWN; 403 404 switch (cpi->cpi_vendor) { 405 case X86_VENDOR_AMD: 406 synth_amd_info(cpi); 407 break; 408 409 default: 410 break; 411 412 } 413 } 414 415 /* 416 * Apply up various platform-dependent restrictions where the 417 * underlying platform restrictions mean the CPU can be marked 418 * as less capable than its cpuid instruction would imply. 419 */ 420 #if defined(__xpv) 421 static void 422 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 423 { 424 switch (eax) { 425 case 1: 426 cp->cp_edx &= 427 ~(CPUID_INTC_EDX_PSE | 428 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 429 CPUID_INTC_EDX_MCA | /* XXPV true on dom0? */ 430 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 431 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 432 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 433 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 434 break; 435 436 case 0x80000001: 437 cp->cp_edx &= 438 ~(CPUID_AMD_EDX_PSE | 439 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 440 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 441 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 442 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 443 CPUID_AMD_EDX_TSCP); 444 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 445 break; 446 default: 447 break; 448 } 449 450 switch (vendor) { 451 case X86_VENDOR_Intel: 452 switch (eax) { 453 case 4: 454 /* 455 * Zero out the (ncores-per-chip - 1) field 456 */ 457 cp->cp_eax &= 0x03fffffff; 458 break; 459 default: 460 break; 461 } 462 break; 463 case X86_VENDOR_AMD: 464 switch (eax) { 465 case 0x80000008: 466 /* 467 * Zero out the (ncores-per-chip - 1) field 468 */ 469 cp->cp_ecx &= 0xffffff00; 470 break; 471 default: 472 break; 473 } 474 break; 475 default: 476 break; 477 } 478 } 479 #else 480 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 481 #endif 482 483 /* 484 * Some undocumented ways of patching the results of the cpuid 485 * instruction to permit running Solaris 10 on future cpus that 486 * we don't currently support. Could be set to non-zero values 487 * via settings in eeprom. 488 */ 489 490 uint32_t cpuid_feature_ecx_include; 491 uint32_t cpuid_feature_ecx_exclude; 492 uint32_t cpuid_feature_edx_include; 493 uint32_t cpuid_feature_edx_exclude; 494 495 void 496 cpuid_alloc_space(cpu_t *cpu) 497 { 498 /* 499 * By convention, cpu0 is the boot cpu, which is set up 500 * before memory allocation is available. All other cpus get 501 * their cpuid_info struct allocated here. 502 */ 503 ASSERT(cpu->cpu_id != 0); 504 cpu->cpu_m.mcpu_cpi = 505 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 506 } 507 508 void 509 cpuid_free_space(cpu_t *cpu) 510 { 511 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 512 int i; 513 514 ASSERT(cpu->cpu_id != 0); 515 516 /* 517 * Free up any function 4 related dynamic storage 518 */ 519 for (i = 1; i < cpi->cpi_std_4_size; i++) 520 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 521 if (cpi->cpi_std_4_size > 0) 522 kmem_free(cpi->cpi_std_4, 523 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 524 525 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 526 } 527 528 uint_t 529 cpuid_pass1(cpu_t *cpu) 530 { 531 uint32_t mask_ecx, mask_edx; 532 uint_t feature = X86_CPUID; 533 struct cpuid_info *cpi; 534 struct cpuid_regs *cp; 535 int xcpuid; 536 #if !defined(__xpv) 537 extern int idle_cpu_prefer_mwait; 538 #endif 539 540 /* 541 * Space statically allocated for cpu0, ensure pointer is set 542 */ 543 if (cpu->cpu_id == 0) 544 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 545 cpi = cpu->cpu_m.mcpu_cpi; 546 ASSERT(cpi != NULL); 547 cp = &cpi->cpi_std[0]; 548 cp->cp_eax = 0; 549 cpi->cpi_maxeax = __cpuid_insn(cp); 550 { 551 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 552 *iptr++ = cp->cp_ebx; 553 *iptr++ = cp->cp_edx; 554 *iptr++ = cp->cp_ecx; 555 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 556 } 557 558 /* 559 * Map the vendor string to a type code 560 */ 561 if (strcmp(cpi->cpi_vendorstr, "GenuineIntel") == 0) 562 cpi->cpi_vendor = X86_VENDOR_Intel; 563 else if (strcmp(cpi->cpi_vendorstr, "AuthenticAMD") == 0) 564 cpi->cpi_vendor = X86_VENDOR_AMD; 565 else if (strcmp(cpi->cpi_vendorstr, "GenuineTMx86") == 0) 566 cpi->cpi_vendor = X86_VENDOR_TM; 567 else if (strcmp(cpi->cpi_vendorstr, CyrixInstead) == 0) 568 /* 569 * CyrixInstead is a variable used by the Cyrix detection code 570 * in locore. 571 */ 572 cpi->cpi_vendor = X86_VENDOR_Cyrix; 573 else if (strcmp(cpi->cpi_vendorstr, "UMC UMC UMC ") == 0) 574 cpi->cpi_vendor = X86_VENDOR_UMC; 575 else if (strcmp(cpi->cpi_vendorstr, "NexGenDriven") == 0) 576 cpi->cpi_vendor = X86_VENDOR_NexGen; 577 else if (strcmp(cpi->cpi_vendorstr, "CentaurHauls") == 0) 578 cpi->cpi_vendor = X86_VENDOR_Centaur; 579 else if (strcmp(cpi->cpi_vendorstr, "RiseRiseRise") == 0) 580 cpi->cpi_vendor = X86_VENDOR_Rise; 581 else if (strcmp(cpi->cpi_vendorstr, "SiS SiS SiS ") == 0) 582 cpi->cpi_vendor = X86_VENDOR_SiS; 583 else if (strcmp(cpi->cpi_vendorstr, "Geode by NSC") == 0) 584 cpi->cpi_vendor = X86_VENDOR_NSC; 585 else 586 cpi->cpi_vendor = X86_VENDOR_IntelClone; 587 588 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 589 590 /* 591 * Limit the range in case of weird hardware 592 */ 593 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 594 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 595 if (cpi->cpi_maxeax < 1) 596 goto pass1_done; 597 598 cp = &cpi->cpi_std[1]; 599 cp->cp_eax = 1; 600 (void) __cpuid_insn(cp); 601 602 /* 603 * Extract identifying constants for easy access. 604 */ 605 cpi->cpi_model = CPI_MODEL(cpi); 606 cpi->cpi_family = CPI_FAMILY(cpi); 607 608 if (cpi->cpi_family == 0xf) 609 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 610 611 /* 612 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 613 * Intel, and presumably everyone else, uses model == 0xf, as 614 * one would expect (max value means possible overflow). Sigh. 615 */ 616 617 switch (cpi->cpi_vendor) { 618 case X86_VENDOR_Intel: 619 if (IS_EXTENDED_MODEL_INTEL(cpi)) 620 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 621 break; 622 case X86_VENDOR_AMD: 623 if (CPI_FAMILY(cpi) == 0xf) 624 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 625 break; 626 default: 627 if (cpi->cpi_model == 0xf) 628 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 629 break; 630 } 631 632 cpi->cpi_step = CPI_STEP(cpi); 633 cpi->cpi_brandid = CPI_BRANDID(cpi); 634 635 /* 636 * *default* assumptions: 637 * - believe %edx feature word 638 * - ignore %ecx feature word 639 * - 32-bit virtual and physical addressing 640 */ 641 mask_edx = 0xffffffff; 642 mask_ecx = 0; 643 644 cpi->cpi_pabits = cpi->cpi_vabits = 32; 645 646 switch (cpi->cpi_vendor) { 647 case X86_VENDOR_Intel: 648 if (cpi->cpi_family == 5) 649 x86_type = X86_TYPE_P5; 650 else if (IS_LEGACY_P6(cpi)) { 651 x86_type = X86_TYPE_P6; 652 pentiumpro_bug4046376 = 1; 653 pentiumpro_bug4064495 = 1; 654 /* 655 * Clear the SEP bit when it was set erroneously 656 */ 657 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 658 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 659 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 660 x86_type = X86_TYPE_P4; 661 /* 662 * We don't currently depend on any of the %ecx 663 * features until Prescott, so we'll only check 664 * this from P4 onwards. We might want to revisit 665 * that idea later. 666 */ 667 mask_ecx = 0xffffffff; 668 } else if (cpi->cpi_family > 0xf) 669 mask_ecx = 0xffffffff; 670 /* 671 * We don't support MONITOR/MWAIT if leaf 5 is not available 672 * to obtain the monitor linesize. 673 */ 674 if (cpi->cpi_maxeax < 5) 675 mask_ecx &= ~CPUID_INTC_ECX_MON; 676 break; 677 case X86_VENDOR_IntelClone: 678 default: 679 break; 680 case X86_VENDOR_AMD: 681 #if defined(OPTERON_ERRATUM_108) 682 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 683 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 684 cpi->cpi_model = 0xc; 685 } else 686 #endif 687 if (cpi->cpi_family == 5) { 688 /* 689 * AMD K5 and K6 690 * 691 * These CPUs have an incomplete implementation 692 * of MCA/MCE which we mask away. 693 */ 694 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 695 696 /* 697 * Model 0 uses the wrong (APIC) bit 698 * to indicate PGE. Fix it here. 699 */ 700 if (cpi->cpi_model == 0) { 701 if (cp->cp_edx & 0x200) { 702 cp->cp_edx &= ~0x200; 703 cp->cp_edx |= CPUID_INTC_EDX_PGE; 704 } 705 } 706 707 /* 708 * Early models had problems w/ MMX; disable. 709 */ 710 if (cpi->cpi_model < 6) 711 mask_edx &= ~CPUID_INTC_EDX_MMX; 712 } 713 714 /* 715 * For newer families, SSE3 and CX16, at least, are valid; 716 * enable all 717 */ 718 if (cpi->cpi_family >= 0xf) 719 mask_ecx = 0xffffffff; 720 /* 721 * We don't support MONITOR/MWAIT if leaf 5 is not available 722 * to obtain the monitor linesize. 723 */ 724 if (cpi->cpi_maxeax < 5) 725 mask_ecx &= ~CPUID_INTC_ECX_MON; 726 727 #if !defined(__xpv) 728 /* 729 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 730 * processors. AMD does not intend MWAIT to be used in the cpu 731 * idle loop on current and future processors. 10h and future 732 * AMD processors use more power in MWAIT than HLT. 733 * Pre-family-10h Opterons do not have the MWAIT instruction. 734 */ 735 idle_cpu_prefer_mwait = 0; 736 #endif 737 738 break; 739 case X86_VENDOR_TM: 740 /* 741 * workaround the NT workaround in CMS 4.1 742 */ 743 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 744 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 745 cp->cp_edx |= CPUID_INTC_EDX_CX8; 746 break; 747 case X86_VENDOR_Centaur: 748 /* 749 * workaround the NT workarounds again 750 */ 751 if (cpi->cpi_family == 6) 752 cp->cp_edx |= CPUID_INTC_EDX_CX8; 753 break; 754 case X86_VENDOR_Cyrix: 755 /* 756 * We rely heavily on the probing in locore 757 * to actually figure out what parts, if any, 758 * of the Cyrix cpuid instruction to believe. 759 */ 760 switch (x86_type) { 761 case X86_TYPE_CYRIX_486: 762 mask_edx = 0; 763 break; 764 case X86_TYPE_CYRIX_6x86: 765 mask_edx = 0; 766 break; 767 case X86_TYPE_CYRIX_6x86L: 768 mask_edx = 769 CPUID_INTC_EDX_DE | 770 CPUID_INTC_EDX_CX8; 771 break; 772 case X86_TYPE_CYRIX_6x86MX: 773 mask_edx = 774 CPUID_INTC_EDX_DE | 775 CPUID_INTC_EDX_MSR | 776 CPUID_INTC_EDX_CX8 | 777 CPUID_INTC_EDX_PGE | 778 CPUID_INTC_EDX_CMOV | 779 CPUID_INTC_EDX_MMX; 780 break; 781 case X86_TYPE_CYRIX_GXm: 782 mask_edx = 783 CPUID_INTC_EDX_MSR | 784 CPUID_INTC_EDX_CX8 | 785 CPUID_INTC_EDX_CMOV | 786 CPUID_INTC_EDX_MMX; 787 break; 788 case X86_TYPE_CYRIX_MediaGX: 789 break; 790 case X86_TYPE_CYRIX_MII: 791 case X86_TYPE_VIA_CYRIX_III: 792 mask_edx = 793 CPUID_INTC_EDX_DE | 794 CPUID_INTC_EDX_TSC | 795 CPUID_INTC_EDX_MSR | 796 CPUID_INTC_EDX_CX8 | 797 CPUID_INTC_EDX_PGE | 798 CPUID_INTC_EDX_CMOV | 799 CPUID_INTC_EDX_MMX; 800 break; 801 default: 802 break; 803 } 804 break; 805 } 806 807 #if defined(__xpv) 808 /* 809 * Do not support MONITOR/MWAIT under a hypervisor 810 */ 811 mask_ecx &= ~CPUID_INTC_ECX_MON; 812 #endif /* __xpv */ 813 814 /* 815 * Now we've figured out the masks that determine 816 * which bits we choose to believe, apply the masks 817 * to the feature words, then map the kernel's view 818 * of these feature words into its feature word. 819 */ 820 cp->cp_edx &= mask_edx; 821 cp->cp_ecx &= mask_ecx; 822 823 /* 824 * apply any platform restrictions (we don't call this 825 * immediately after __cpuid_insn here, because we need the 826 * workarounds applied above first) 827 */ 828 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 829 830 /* 831 * fold in overrides from the "eeprom" mechanism 832 */ 833 cp->cp_edx |= cpuid_feature_edx_include; 834 cp->cp_edx &= ~cpuid_feature_edx_exclude; 835 836 cp->cp_ecx |= cpuid_feature_ecx_include; 837 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 838 839 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 840 feature |= X86_LARGEPAGE; 841 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 842 feature |= X86_TSC; 843 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 844 feature |= X86_MSR; 845 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 846 feature |= X86_MTRR; 847 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 848 feature |= X86_PGE; 849 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 850 feature |= X86_CMOV; 851 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 852 feature |= X86_MMX; 853 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 854 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 855 feature |= X86_MCA; 856 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 857 feature |= X86_PAE; 858 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 859 feature |= X86_CX8; 860 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 861 feature |= X86_CX16; 862 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 863 feature |= X86_PAT; 864 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 865 feature |= X86_SEP; 866 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 867 /* 868 * In our implementation, fxsave/fxrstor 869 * are prerequisites before we'll even 870 * try and do SSE things. 871 */ 872 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 873 feature |= X86_SSE; 874 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 875 feature |= X86_SSE2; 876 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 877 feature |= X86_SSE3; 878 } 879 if (cp->cp_edx & CPUID_INTC_EDX_DE) 880 feature |= X86_DE; 881 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 882 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 883 feature |= X86_MWAIT; 884 } 885 886 if (feature & X86_PAE) 887 cpi->cpi_pabits = 36; 888 889 /* 890 * Hyperthreading configuration is slightly tricky on Intel 891 * and pure clones, and even trickier on AMD. 892 * 893 * (AMD chose to set the HTT bit on their CMP processors, 894 * even though they're not actually hyperthreaded. Thus it 895 * takes a bit more work to figure out what's really going 896 * on ... see the handling of the CMP_LGCY bit below) 897 */ 898 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 899 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 900 if (cpi->cpi_ncpu_per_chip > 1) 901 feature |= X86_HTT; 902 } else { 903 cpi->cpi_ncpu_per_chip = 1; 904 } 905 906 /* 907 * Work on the "extended" feature information, doing 908 * some basic initialization for cpuid_pass2() 909 */ 910 xcpuid = 0; 911 switch (cpi->cpi_vendor) { 912 case X86_VENDOR_Intel: 913 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 914 xcpuid++; 915 break; 916 case X86_VENDOR_AMD: 917 if (cpi->cpi_family > 5 || 918 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 919 xcpuid++; 920 break; 921 case X86_VENDOR_Cyrix: 922 /* 923 * Only these Cyrix CPUs are -known- to support 924 * extended cpuid operations. 925 */ 926 if (x86_type == X86_TYPE_VIA_CYRIX_III || 927 x86_type == X86_TYPE_CYRIX_GXm) 928 xcpuid++; 929 break; 930 case X86_VENDOR_Centaur: 931 case X86_VENDOR_TM: 932 default: 933 xcpuid++; 934 break; 935 } 936 937 if (xcpuid) { 938 cp = &cpi->cpi_extd[0]; 939 cp->cp_eax = 0x80000000; 940 cpi->cpi_xmaxeax = __cpuid_insn(cp); 941 } 942 943 if (cpi->cpi_xmaxeax & 0x80000000) { 944 945 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 946 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 947 948 switch (cpi->cpi_vendor) { 949 case X86_VENDOR_Intel: 950 case X86_VENDOR_AMD: 951 if (cpi->cpi_xmaxeax < 0x80000001) 952 break; 953 cp = &cpi->cpi_extd[1]; 954 cp->cp_eax = 0x80000001; 955 (void) __cpuid_insn(cp); 956 957 if (cpi->cpi_vendor == X86_VENDOR_AMD && 958 cpi->cpi_family == 5 && 959 cpi->cpi_model == 6 && 960 cpi->cpi_step == 6) { 961 /* 962 * K6 model 6 uses bit 10 to indicate SYSC 963 * Later models use bit 11. Fix it here. 964 */ 965 if (cp->cp_edx & 0x400) { 966 cp->cp_edx &= ~0x400; 967 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 968 } 969 } 970 971 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 972 973 /* 974 * Compute the additions to the kernel's feature word. 975 */ 976 if (cp->cp_edx & CPUID_AMD_EDX_NX) 977 feature |= X86_NX; 978 979 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 980 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 981 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 982 feature |= X86_SSE4A; 983 984 /* 985 * If both the HTT and CMP_LGCY bits are set, 986 * then we're not actually HyperThreaded. Read 987 * "AMD CPUID Specification" for more details. 988 */ 989 if (cpi->cpi_vendor == X86_VENDOR_AMD && 990 (feature & X86_HTT) && 991 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 992 feature &= ~X86_HTT; 993 feature |= X86_CMP; 994 } 995 #if defined(__amd64) 996 /* 997 * It's really tricky to support syscall/sysret in 998 * the i386 kernel; we rely on sysenter/sysexit 999 * instead. In the amd64 kernel, things are -way- 1000 * better. 1001 */ 1002 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 1003 feature |= X86_ASYSC; 1004 1005 /* 1006 * While we're thinking about system calls, note 1007 * that AMD processors don't support sysenter 1008 * in long mode at all, so don't try to program them. 1009 */ 1010 if (x86_vendor == X86_VENDOR_AMD) 1011 feature &= ~X86_SEP; 1012 #endif 1013 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1014 feature |= X86_TSCP; 1015 break; 1016 default: 1017 break; 1018 } 1019 1020 /* 1021 * Get CPUID data about processor cores and hyperthreads. 1022 */ 1023 switch (cpi->cpi_vendor) { 1024 case X86_VENDOR_Intel: 1025 if (cpi->cpi_maxeax >= 4) { 1026 cp = &cpi->cpi_std[4]; 1027 cp->cp_eax = 4; 1028 cp->cp_ecx = 0; 1029 (void) __cpuid_insn(cp); 1030 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1031 } 1032 /*FALLTHROUGH*/ 1033 case X86_VENDOR_AMD: 1034 if (cpi->cpi_xmaxeax < 0x80000008) 1035 break; 1036 cp = &cpi->cpi_extd[8]; 1037 cp->cp_eax = 0x80000008; 1038 (void) __cpuid_insn(cp); 1039 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1040 1041 /* 1042 * Virtual and physical address limits from 1043 * cpuid override previously guessed values. 1044 */ 1045 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1046 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1047 break; 1048 default: 1049 break; 1050 } 1051 1052 /* 1053 * Derive the number of cores per chip 1054 */ 1055 switch (cpi->cpi_vendor) { 1056 case X86_VENDOR_Intel: 1057 if (cpi->cpi_maxeax < 4) { 1058 cpi->cpi_ncore_per_chip = 1; 1059 break; 1060 } else { 1061 cpi->cpi_ncore_per_chip = 1062 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1063 } 1064 break; 1065 case X86_VENDOR_AMD: 1066 if (cpi->cpi_xmaxeax < 0x80000008) { 1067 cpi->cpi_ncore_per_chip = 1; 1068 break; 1069 } else { 1070 cpi->cpi_ncore_per_chip = 1071 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1072 } 1073 break; 1074 default: 1075 cpi->cpi_ncore_per_chip = 1; 1076 break; 1077 } 1078 } 1079 1080 /* 1081 * If more than one core, then this processor is CMP. 1082 */ 1083 if (cpi->cpi_ncore_per_chip > 1) 1084 feature |= X86_CMP; 1085 1086 /* 1087 * If the number of cores is the same as the number 1088 * of CPUs, then we cannot have HyperThreading. 1089 */ 1090 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1091 feature &= ~X86_HTT; 1092 1093 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1094 /* 1095 * Single-core single-threaded processors. 1096 */ 1097 cpi->cpi_chipid = -1; 1098 cpi->cpi_clogid = 0; 1099 cpi->cpi_coreid = cpu->cpu_id; 1100 } else if (cpi->cpi_ncpu_per_chip > 1) { 1101 uint_t i; 1102 uint_t chipid_shift = 0; 1103 uint_t coreid_shift = 0; 1104 uint_t apic_id = CPI_APIC_ID(cpi); 1105 1106 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1107 chipid_shift++; 1108 cpi->cpi_chipid = apic_id >> chipid_shift; 1109 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1110 1111 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1112 if (feature & X86_CMP) { 1113 /* 1114 * Multi-core (and possibly multi-threaded) 1115 * processors. 1116 */ 1117 uint_t ncpu_per_core; 1118 if (cpi->cpi_ncore_per_chip == 1) 1119 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1120 else if (cpi->cpi_ncore_per_chip > 1) 1121 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1122 cpi->cpi_ncore_per_chip; 1123 /* 1124 * 8bit APIC IDs on dual core Pentiums 1125 * look like this: 1126 * 1127 * +-----------------------+------+------+ 1128 * | Physical Package ID | MC | HT | 1129 * +-----------------------+------+------+ 1130 * <------- chipid --------> 1131 * <------- coreid ---------------> 1132 * <--- clogid --> 1133 * 1134 * Where the number of bits necessary to 1135 * represent MC and HT fields together equals 1136 * to the minimum number of bits necessary to 1137 * store the value of cpi->cpi_ncpu_per_chip. 1138 * Of those bits, the MC part uses the number 1139 * of bits necessary to store the value of 1140 * cpi->cpi_ncore_per_chip. 1141 */ 1142 for (i = 1; i < ncpu_per_core; i <<= 1) 1143 coreid_shift++; 1144 cpi->cpi_coreid = apic_id >> coreid_shift; 1145 } else if (feature & X86_HTT) { 1146 /* 1147 * Single-core multi-threaded processors. 1148 */ 1149 cpi->cpi_coreid = cpi->cpi_chipid; 1150 } 1151 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1152 /* 1153 * AMD currently only has dual-core processors with 1154 * single-threaded cores. If they ever release 1155 * multi-threaded processors, then this code 1156 * will have to be updated. 1157 */ 1158 cpi->cpi_coreid = cpu->cpu_id; 1159 } else { 1160 /* 1161 * All other processors are currently 1162 * assumed to have single cores. 1163 */ 1164 cpi->cpi_coreid = cpi->cpi_chipid; 1165 } 1166 } 1167 1168 /* 1169 * Synthesize chip "revision" and socket type 1170 */ 1171 synth_info(cpi); 1172 1173 pass1_done: 1174 cpi->cpi_pass = 1; 1175 return (feature); 1176 } 1177 1178 /* 1179 * Make copies of the cpuid table entries we depend on, in 1180 * part for ease of parsing now, in part so that we have only 1181 * one place to correct any of it, in part for ease of 1182 * later export to userland, and in part so we can look at 1183 * this stuff in a crash dump. 1184 */ 1185 1186 /*ARGSUSED*/ 1187 void 1188 cpuid_pass2(cpu_t *cpu) 1189 { 1190 uint_t n, nmax; 1191 int i; 1192 struct cpuid_regs *cp; 1193 uint8_t *dp; 1194 uint32_t *iptr; 1195 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1196 1197 ASSERT(cpi->cpi_pass == 1); 1198 1199 if (cpi->cpi_maxeax < 1) 1200 goto pass2_done; 1201 1202 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1203 nmax = NMAX_CPI_STD; 1204 /* 1205 * (We already handled n == 0 and n == 1 in pass 1) 1206 */ 1207 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1208 cp->cp_eax = n; 1209 1210 /* 1211 * CPUID function 4 expects %ecx to be initialized 1212 * with an index which indicates which cache to return 1213 * information about. The OS is expected to call function 4 1214 * with %ecx set to 0, 1, 2, ... until it returns with 1215 * EAX[4:0] set to 0, which indicates there are no more 1216 * caches. 1217 * 1218 * Here, populate cpi_std[4] with the information returned by 1219 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1220 * when dynamic memory allocation becomes available. 1221 * 1222 * Note: we need to explicitly initialize %ecx here, since 1223 * function 4 may have been previously invoked. 1224 */ 1225 if (n == 4) 1226 cp->cp_ecx = 0; 1227 1228 (void) __cpuid_insn(cp); 1229 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1230 switch (n) { 1231 case 2: 1232 /* 1233 * "the lower 8 bits of the %eax register 1234 * contain a value that identifies the number 1235 * of times the cpuid [instruction] has to be 1236 * executed to obtain a complete image of the 1237 * processor's caching systems." 1238 * 1239 * How *do* they make this stuff up? 1240 */ 1241 cpi->cpi_ncache = sizeof (*cp) * 1242 BITX(cp->cp_eax, 7, 0); 1243 if (cpi->cpi_ncache == 0) 1244 break; 1245 cpi->cpi_ncache--; /* skip count byte */ 1246 1247 /* 1248 * Well, for now, rather than attempt to implement 1249 * this slightly dubious algorithm, we just look 1250 * at the first 15 .. 1251 */ 1252 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1253 cpi->cpi_ncache = sizeof (*cp) - 1; 1254 1255 dp = cpi->cpi_cacheinfo; 1256 if (BITX(cp->cp_eax, 31, 31) == 0) { 1257 uint8_t *p = (void *)&cp->cp_eax; 1258 for (i = 1; i < 3; i++) 1259 if (p[i] != 0) 1260 *dp++ = p[i]; 1261 } 1262 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1263 uint8_t *p = (void *)&cp->cp_ebx; 1264 for (i = 0; i < 4; i++) 1265 if (p[i] != 0) 1266 *dp++ = p[i]; 1267 } 1268 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1269 uint8_t *p = (void *)&cp->cp_ecx; 1270 for (i = 0; i < 4; i++) 1271 if (p[i] != 0) 1272 *dp++ = p[i]; 1273 } 1274 if (BITX(cp->cp_edx, 31, 31) == 0) { 1275 uint8_t *p = (void *)&cp->cp_edx; 1276 for (i = 0; i < 4; i++) 1277 if (p[i] != 0) 1278 *dp++ = p[i]; 1279 } 1280 break; 1281 1282 case 3: /* Processor serial number, if PSN supported */ 1283 break; 1284 1285 case 4: /* Deterministic cache parameters */ 1286 break; 1287 1288 case 5: /* Monitor/Mwait parameters */ 1289 { 1290 size_t mwait_size; 1291 1292 /* 1293 * check cpi_mwait.support which was set in cpuid_pass1 1294 */ 1295 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1296 break; 1297 1298 /* 1299 * Protect ourself from insane mwait line size. 1300 * Workaround for incomplete hardware emulator(s). 1301 */ 1302 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1303 if (mwait_size < sizeof (uint32_t) || 1304 !ISP2(mwait_size)) { 1305 #if DEBUG 1306 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1307 "size %ld", 1308 cpu->cpu_id, (long)mwait_size); 1309 #endif 1310 break; 1311 } 1312 1313 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1314 cpi->cpi_mwait.mon_max = mwait_size; 1315 if (MWAIT_EXTENSION(cpi)) { 1316 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1317 if (MWAIT_INT_ENABLE(cpi)) 1318 cpi->cpi_mwait.support |= 1319 MWAIT_ECX_INT_ENABLE; 1320 } 1321 break; 1322 } 1323 default: 1324 break; 1325 } 1326 } 1327 1328 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1329 goto pass2_done; 1330 1331 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1332 nmax = NMAX_CPI_EXTD; 1333 /* 1334 * Copy the extended properties, fixing them as we go. 1335 * (We already handled n == 0 and n == 1 in pass 1) 1336 */ 1337 iptr = (void *)cpi->cpi_brandstr; 1338 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1339 cp->cp_eax = 0x80000000 + n; 1340 (void) __cpuid_insn(cp); 1341 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1342 switch (n) { 1343 case 2: 1344 case 3: 1345 case 4: 1346 /* 1347 * Extract the brand string 1348 */ 1349 *iptr++ = cp->cp_eax; 1350 *iptr++ = cp->cp_ebx; 1351 *iptr++ = cp->cp_ecx; 1352 *iptr++ = cp->cp_edx; 1353 break; 1354 case 5: 1355 switch (cpi->cpi_vendor) { 1356 case X86_VENDOR_AMD: 1357 /* 1358 * The Athlon and Duron were the first 1359 * parts to report the sizes of the 1360 * TLB for large pages. Before then, 1361 * we don't trust the data. 1362 */ 1363 if (cpi->cpi_family < 6 || 1364 (cpi->cpi_family == 6 && 1365 cpi->cpi_model < 1)) 1366 cp->cp_eax = 0; 1367 break; 1368 default: 1369 break; 1370 } 1371 break; 1372 case 6: 1373 switch (cpi->cpi_vendor) { 1374 case X86_VENDOR_AMD: 1375 /* 1376 * The Athlon and Duron were the first 1377 * AMD parts with L2 TLB's. 1378 * Before then, don't trust the data. 1379 */ 1380 if (cpi->cpi_family < 6 || 1381 cpi->cpi_family == 6 && 1382 cpi->cpi_model < 1) 1383 cp->cp_eax = cp->cp_ebx = 0; 1384 /* 1385 * AMD Duron rev A0 reports L2 1386 * cache size incorrectly as 1K 1387 * when it is really 64K 1388 */ 1389 if (cpi->cpi_family == 6 && 1390 cpi->cpi_model == 3 && 1391 cpi->cpi_step == 0) { 1392 cp->cp_ecx &= 0xffff; 1393 cp->cp_ecx |= 0x400000; 1394 } 1395 break; 1396 case X86_VENDOR_Cyrix: /* VIA C3 */ 1397 /* 1398 * VIA C3 processors are a bit messed 1399 * up w.r.t. encoding cache sizes in %ecx 1400 */ 1401 if (cpi->cpi_family != 6) 1402 break; 1403 /* 1404 * model 7 and 8 were incorrectly encoded 1405 * 1406 * xxx is model 8 really broken? 1407 */ 1408 if (cpi->cpi_model == 7 || 1409 cpi->cpi_model == 8) 1410 cp->cp_ecx = 1411 BITX(cp->cp_ecx, 31, 24) << 16 | 1412 BITX(cp->cp_ecx, 23, 16) << 12 | 1413 BITX(cp->cp_ecx, 15, 8) << 8 | 1414 BITX(cp->cp_ecx, 7, 0); 1415 /* 1416 * model 9 stepping 1 has wrong associativity 1417 */ 1418 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1419 cp->cp_ecx |= 8 << 12; 1420 break; 1421 case X86_VENDOR_Intel: 1422 /* 1423 * Extended L2 Cache features function. 1424 * First appeared on Prescott. 1425 */ 1426 default: 1427 break; 1428 } 1429 break; 1430 default: 1431 break; 1432 } 1433 } 1434 1435 pass2_done: 1436 cpi->cpi_pass = 2; 1437 } 1438 1439 static const char * 1440 intel_cpubrand(const struct cpuid_info *cpi) 1441 { 1442 int i; 1443 1444 if ((x86_feature & X86_CPUID) == 0 || 1445 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1446 return ("i486"); 1447 1448 switch (cpi->cpi_family) { 1449 case 5: 1450 return ("Intel Pentium(r)"); 1451 case 6: 1452 switch (cpi->cpi_model) { 1453 uint_t celeron, xeon; 1454 const struct cpuid_regs *cp; 1455 case 0: 1456 case 1: 1457 case 2: 1458 return ("Intel Pentium(r) Pro"); 1459 case 3: 1460 case 4: 1461 return ("Intel Pentium(r) II"); 1462 case 6: 1463 return ("Intel Celeron(r)"); 1464 case 5: 1465 case 7: 1466 celeron = xeon = 0; 1467 cp = &cpi->cpi_std[2]; /* cache info */ 1468 1469 for (i = 1; i < 3; i++) { 1470 uint_t tmp; 1471 1472 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1473 if (tmp == 0x40) 1474 celeron++; 1475 if (tmp >= 0x44 && tmp <= 0x45) 1476 xeon++; 1477 } 1478 1479 for (i = 0; i < 2; i++) { 1480 uint_t tmp; 1481 1482 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1483 if (tmp == 0x40) 1484 celeron++; 1485 else if (tmp >= 0x44 && tmp <= 0x45) 1486 xeon++; 1487 } 1488 1489 for (i = 0; i < 4; i++) { 1490 uint_t tmp; 1491 1492 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1493 if (tmp == 0x40) 1494 celeron++; 1495 else if (tmp >= 0x44 && tmp <= 0x45) 1496 xeon++; 1497 } 1498 1499 for (i = 0; i < 4; i++) { 1500 uint_t tmp; 1501 1502 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1503 if (tmp == 0x40) 1504 celeron++; 1505 else if (tmp >= 0x44 && tmp <= 0x45) 1506 xeon++; 1507 } 1508 1509 if (celeron) 1510 return ("Intel Celeron(r)"); 1511 if (xeon) 1512 return (cpi->cpi_model == 5 ? 1513 "Intel Pentium(r) II Xeon(tm)" : 1514 "Intel Pentium(r) III Xeon(tm)"); 1515 return (cpi->cpi_model == 5 ? 1516 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1517 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1518 default: 1519 break; 1520 } 1521 default: 1522 break; 1523 } 1524 1525 /* BrandID is present if the field is nonzero */ 1526 if (cpi->cpi_brandid != 0) { 1527 static const struct { 1528 uint_t bt_bid; 1529 const char *bt_str; 1530 } brand_tbl[] = { 1531 { 0x1, "Intel(r) Celeron(r)" }, 1532 { 0x2, "Intel(r) Pentium(r) III" }, 1533 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1534 { 0x4, "Intel(r) Pentium(r) III" }, 1535 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1536 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1537 { 0x8, "Intel(r) Pentium(r) 4" }, 1538 { 0x9, "Intel(r) Pentium(r) 4" }, 1539 { 0xa, "Intel(r) Celeron(r)" }, 1540 { 0xb, "Intel(r) Xeon(tm)" }, 1541 { 0xc, "Intel(r) Xeon(tm) MP" }, 1542 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1543 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1544 { 0x11, "Mobile Genuine Intel(r)" }, 1545 { 0x12, "Intel(r) Celeron(r) M" }, 1546 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1547 { 0x14, "Intel(r) Celeron(r)" }, 1548 { 0x15, "Mobile Genuine Intel(r)" }, 1549 { 0x16, "Intel(r) Pentium(r) M" }, 1550 { 0x17, "Mobile Intel(r) Celeron(r)" } 1551 }; 1552 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1553 uint_t sgn; 1554 1555 sgn = (cpi->cpi_family << 8) | 1556 (cpi->cpi_model << 4) | cpi->cpi_step; 1557 1558 for (i = 0; i < btblmax; i++) 1559 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1560 break; 1561 if (i < btblmax) { 1562 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1563 return ("Intel(r) Celeron(r)"); 1564 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1565 return ("Intel(r) Xeon(tm) MP"); 1566 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1567 return ("Intel(r) Xeon(tm)"); 1568 return (brand_tbl[i].bt_str); 1569 } 1570 } 1571 1572 return (NULL); 1573 } 1574 1575 static const char * 1576 amd_cpubrand(const struct cpuid_info *cpi) 1577 { 1578 if ((x86_feature & X86_CPUID) == 0 || 1579 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1580 return ("i486 compatible"); 1581 1582 switch (cpi->cpi_family) { 1583 case 5: 1584 switch (cpi->cpi_model) { 1585 case 0: 1586 case 1: 1587 case 2: 1588 case 3: 1589 case 4: 1590 case 5: 1591 return ("AMD-K5(r)"); 1592 case 6: 1593 case 7: 1594 return ("AMD-K6(r)"); 1595 case 8: 1596 return ("AMD-K6(r)-2"); 1597 case 9: 1598 return ("AMD-K6(r)-III"); 1599 default: 1600 return ("AMD (family 5)"); 1601 } 1602 case 6: 1603 switch (cpi->cpi_model) { 1604 case 1: 1605 return ("AMD-K7(tm)"); 1606 case 0: 1607 case 2: 1608 case 4: 1609 return ("AMD Athlon(tm)"); 1610 case 3: 1611 case 7: 1612 return ("AMD Duron(tm)"); 1613 case 6: 1614 case 8: 1615 case 10: 1616 /* 1617 * Use the L2 cache size to distinguish 1618 */ 1619 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1620 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1621 default: 1622 return ("AMD (family 6)"); 1623 } 1624 default: 1625 break; 1626 } 1627 1628 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1629 cpi->cpi_brandid != 0) { 1630 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1631 case 3: 1632 return ("AMD Opteron(tm) UP 1xx"); 1633 case 4: 1634 return ("AMD Opteron(tm) DP 2xx"); 1635 case 5: 1636 return ("AMD Opteron(tm) MP 8xx"); 1637 default: 1638 return ("AMD Opteron(tm)"); 1639 } 1640 } 1641 1642 return (NULL); 1643 } 1644 1645 static const char * 1646 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1647 { 1648 if ((x86_feature & X86_CPUID) == 0 || 1649 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1650 type == X86_TYPE_CYRIX_486) 1651 return ("i486 compatible"); 1652 1653 switch (type) { 1654 case X86_TYPE_CYRIX_6x86: 1655 return ("Cyrix 6x86"); 1656 case X86_TYPE_CYRIX_6x86L: 1657 return ("Cyrix 6x86L"); 1658 case X86_TYPE_CYRIX_6x86MX: 1659 return ("Cyrix 6x86MX"); 1660 case X86_TYPE_CYRIX_GXm: 1661 return ("Cyrix GXm"); 1662 case X86_TYPE_CYRIX_MediaGX: 1663 return ("Cyrix MediaGX"); 1664 case X86_TYPE_CYRIX_MII: 1665 return ("Cyrix M2"); 1666 case X86_TYPE_VIA_CYRIX_III: 1667 return ("VIA Cyrix M3"); 1668 default: 1669 /* 1670 * Have another wild guess .. 1671 */ 1672 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1673 return ("Cyrix 5x86"); 1674 else if (cpi->cpi_family == 5) { 1675 switch (cpi->cpi_model) { 1676 case 2: 1677 return ("Cyrix 6x86"); /* Cyrix M1 */ 1678 case 4: 1679 return ("Cyrix MediaGX"); 1680 default: 1681 break; 1682 } 1683 } else if (cpi->cpi_family == 6) { 1684 switch (cpi->cpi_model) { 1685 case 0: 1686 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1687 case 5: 1688 case 6: 1689 case 7: 1690 case 8: 1691 case 9: 1692 return ("VIA C3"); 1693 default: 1694 break; 1695 } 1696 } 1697 break; 1698 } 1699 return (NULL); 1700 } 1701 1702 /* 1703 * This only gets called in the case that the CPU extended 1704 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1705 * aren't available, or contain null bytes for some reason. 1706 */ 1707 static void 1708 fabricate_brandstr(struct cpuid_info *cpi) 1709 { 1710 const char *brand = NULL; 1711 1712 switch (cpi->cpi_vendor) { 1713 case X86_VENDOR_Intel: 1714 brand = intel_cpubrand(cpi); 1715 break; 1716 case X86_VENDOR_AMD: 1717 brand = amd_cpubrand(cpi); 1718 break; 1719 case X86_VENDOR_Cyrix: 1720 brand = cyrix_cpubrand(cpi, x86_type); 1721 break; 1722 case X86_VENDOR_NexGen: 1723 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1724 brand = "NexGen Nx586"; 1725 break; 1726 case X86_VENDOR_Centaur: 1727 if (cpi->cpi_family == 5) 1728 switch (cpi->cpi_model) { 1729 case 4: 1730 brand = "Centaur C6"; 1731 break; 1732 case 8: 1733 brand = "Centaur C2"; 1734 break; 1735 case 9: 1736 brand = "Centaur C3"; 1737 break; 1738 default: 1739 break; 1740 } 1741 break; 1742 case X86_VENDOR_Rise: 1743 if (cpi->cpi_family == 5 && 1744 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1745 brand = "Rise mP6"; 1746 break; 1747 case X86_VENDOR_SiS: 1748 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1749 brand = "SiS 55x"; 1750 break; 1751 case X86_VENDOR_TM: 1752 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1753 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1754 break; 1755 case X86_VENDOR_NSC: 1756 case X86_VENDOR_UMC: 1757 default: 1758 break; 1759 } 1760 if (brand) { 1761 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1762 return; 1763 } 1764 1765 /* 1766 * If all else fails ... 1767 */ 1768 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1769 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1770 cpi->cpi_model, cpi->cpi_step); 1771 } 1772 1773 /* 1774 * This routine is called just after kernel memory allocation 1775 * becomes available on cpu0, and as part of mp_startup() on 1776 * the other cpus. 1777 * 1778 * Fixup the brand string, and collect any information from cpuid 1779 * that requires dynamicically allocated storage to represent. 1780 */ 1781 /*ARGSUSED*/ 1782 void 1783 cpuid_pass3(cpu_t *cpu) 1784 { 1785 int i, max, shft, level, size; 1786 struct cpuid_regs regs; 1787 struct cpuid_regs *cp; 1788 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1789 1790 ASSERT(cpi->cpi_pass == 2); 1791 1792 /* 1793 * Function 4: Deterministic cache parameters 1794 * 1795 * Take this opportunity to detect the number of threads 1796 * sharing the last level cache, and construct a corresponding 1797 * cache id. The respective cpuid_info members are initialized 1798 * to the default case of "no last level cache sharing". 1799 */ 1800 cpi->cpi_ncpu_shr_last_cache = 1; 1801 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1802 1803 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1804 1805 /* 1806 * Find the # of elements (size) returned by fn 4, and along 1807 * the way detect last level cache sharing details. 1808 */ 1809 bzero(®s, sizeof (regs)); 1810 cp = ®s; 1811 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1812 cp->cp_eax = 4; 1813 cp->cp_ecx = i; 1814 1815 (void) __cpuid_insn(cp); 1816 1817 if (CPI_CACHE_TYPE(cp) == 0) 1818 break; 1819 level = CPI_CACHE_LVL(cp); 1820 if (level > max) { 1821 max = level; 1822 cpi->cpi_ncpu_shr_last_cache = 1823 CPI_NTHR_SHR_CACHE(cp) + 1; 1824 } 1825 } 1826 cpi->cpi_std_4_size = size = i; 1827 1828 /* 1829 * Allocate the cpi_std_4 array. The first element 1830 * references the regs for fn 4, %ecx == 0, which 1831 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1832 */ 1833 if (size > 0) { 1834 cpi->cpi_std_4 = 1835 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1836 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1837 1838 /* 1839 * Allocate storage to hold the additional regs 1840 * for function 4, %ecx == 1 .. cpi_std_4_size. 1841 * 1842 * The regs for fn 4, %ecx == 0 has already 1843 * been allocated as indicated above. 1844 */ 1845 for (i = 1; i < size; i++) { 1846 cp = cpi->cpi_std_4[i] = 1847 kmem_zalloc(sizeof (regs), KM_SLEEP); 1848 cp->cp_eax = 4; 1849 cp->cp_ecx = i; 1850 1851 (void) __cpuid_insn(cp); 1852 } 1853 } 1854 /* 1855 * Determine the number of bits needed to represent 1856 * the number of CPUs sharing the last level cache. 1857 * 1858 * Shift off that number of bits from the APIC id to 1859 * derive the cache id. 1860 */ 1861 shft = 0; 1862 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1863 shft++; 1864 cpi->cpi_last_lvl_cacheid = CPI_APIC_ID(cpi) >> shft; 1865 } 1866 1867 /* 1868 * Now fixup the brand string 1869 */ 1870 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1871 fabricate_brandstr(cpi); 1872 } else { 1873 1874 /* 1875 * If we successfully extracted a brand string from the cpuid 1876 * instruction, clean it up by removing leading spaces and 1877 * similar junk. 1878 */ 1879 if (cpi->cpi_brandstr[0]) { 1880 size_t maxlen = sizeof (cpi->cpi_brandstr); 1881 char *src, *dst; 1882 1883 dst = src = (char *)cpi->cpi_brandstr; 1884 src[maxlen - 1] = '\0'; 1885 /* 1886 * strip leading spaces 1887 */ 1888 while (*src == ' ') 1889 src++; 1890 /* 1891 * Remove any 'Genuine' or "Authentic" prefixes 1892 */ 1893 if (strncmp(src, "Genuine ", 8) == 0) 1894 src += 8; 1895 if (strncmp(src, "Authentic ", 10) == 0) 1896 src += 10; 1897 1898 /* 1899 * Now do an in-place copy. 1900 * Map (R) to (r) and (TM) to (tm). 1901 * The era of teletypes is long gone, and there's 1902 * -really- no need to shout. 1903 */ 1904 while (*src != '\0') { 1905 if (src[0] == '(') { 1906 if (strncmp(src + 1, "R)", 2) == 0) { 1907 (void) strncpy(dst, "(r)", 3); 1908 src += 3; 1909 dst += 3; 1910 continue; 1911 } 1912 if (strncmp(src + 1, "TM)", 3) == 0) { 1913 (void) strncpy(dst, "(tm)", 4); 1914 src += 4; 1915 dst += 4; 1916 continue; 1917 } 1918 } 1919 *dst++ = *src++; 1920 } 1921 *dst = '\0'; 1922 1923 /* 1924 * Finally, remove any trailing spaces 1925 */ 1926 while (--dst > cpi->cpi_brandstr) 1927 if (*dst == ' ') 1928 *dst = '\0'; 1929 else 1930 break; 1931 } else 1932 fabricate_brandstr(cpi); 1933 } 1934 cpi->cpi_pass = 3; 1935 } 1936 1937 /* 1938 * This routine is called out of bind_hwcap() much later in the life 1939 * of the kernel (post_startup()). The job of this routine is to resolve 1940 * the hardware feature support and kernel support for those features into 1941 * what we're actually going to tell applications via the aux vector. 1942 */ 1943 uint_t 1944 cpuid_pass4(cpu_t *cpu) 1945 { 1946 struct cpuid_info *cpi; 1947 uint_t hwcap_flags = 0; 1948 1949 if (cpu == NULL) 1950 cpu = CPU; 1951 cpi = cpu->cpu_m.mcpu_cpi; 1952 1953 ASSERT(cpi->cpi_pass == 3); 1954 1955 if (cpi->cpi_maxeax >= 1) { 1956 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 1957 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 1958 1959 *edx = CPI_FEATURES_EDX(cpi); 1960 *ecx = CPI_FEATURES_ECX(cpi); 1961 1962 /* 1963 * [these require explicit kernel support] 1964 */ 1965 if ((x86_feature & X86_SEP) == 0) 1966 *edx &= ~CPUID_INTC_EDX_SEP; 1967 1968 if ((x86_feature & X86_SSE) == 0) 1969 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 1970 if ((x86_feature & X86_SSE2) == 0) 1971 *edx &= ~CPUID_INTC_EDX_SSE2; 1972 1973 if ((x86_feature & X86_HTT) == 0) 1974 *edx &= ~CPUID_INTC_EDX_HTT; 1975 1976 if ((x86_feature & X86_SSE3) == 0) 1977 *ecx &= ~CPUID_INTC_ECX_SSE3; 1978 1979 /* 1980 * [no explicit support required beyond x87 fp context] 1981 */ 1982 if (!fpu_exists) 1983 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 1984 1985 /* 1986 * Now map the supported feature vector to things that we 1987 * think userland will care about. 1988 */ 1989 if (*edx & CPUID_INTC_EDX_SEP) 1990 hwcap_flags |= AV_386_SEP; 1991 if (*edx & CPUID_INTC_EDX_SSE) 1992 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 1993 if (*edx & CPUID_INTC_EDX_SSE2) 1994 hwcap_flags |= AV_386_SSE2; 1995 if (*ecx & CPUID_INTC_ECX_SSE3) 1996 hwcap_flags |= AV_386_SSE3; 1997 if (*ecx & CPUID_INTC_ECX_POPCNT) 1998 hwcap_flags |= AV_386_POPCNT; 1999 if (*edx & CPUID_INTC_EDX_FPU) 2000 hwcap_flags |= AV_386_FPU; 2001 if (*edx & CPUID_INTC_EDX_MMX) 2002 hwcap_flags |= AV_386_MMX; 2003 2004 if (*edx & CPUID_INTC_EDX_TSC) 2005 hwcap_flags |= AV_386_TSC; 2006 if (*edx & CPUID_INTC_EDX_CX8) 2007 hwcap_flags |= AV_386_CX8; 2008 if (*edx & CPUID_INTC_EDX_CMOV) 2009 hwcap_flags |= AV_386_CMOV; 2010 if (*ecx & CPUID_INTC_ECX_MON) 2011 hwcap_flags |= AV_386_MON; 2012 if (*ecx & CPUID_INTC_ECX_CX16) 2013 hwcap_flags |= AV_386_CX16; 2014 } 2015 2016 if (x86_feature & X86_HTT) 2017 hwcap_flags |= AV_386_PAUSE; 2018 2019 if (cpi->cpi_xmaxeax < 0x80000001) 2020 goto pass4_done; 2021 2022 switch (cpi->cpi_vendor) { 2023 struct cpuid_regs cp; 2024 uint32_t *edx, *ecx; 2025 2026 case X86_VENDOR_Intel: 2027 /* 2028 * Seems like Intel duplicated what we necessary 2029 * here to make the initial crop of 64-bit OS's work. 2030 * Hopefully, those are the only "extended" bits 2031 * they'll add. 2032 */ 2033 /*FALLTHROUGH*/ 2034 2035 case X86_VENDOR_AMD: 2036 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2037 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2038 2039 *edx = CPI_FEATURES_XTD_EDX(cpi); 2040 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2041 2042 /* 2043 * [these features require explicit kernel support] 2044 */ 2045 switch (cpi->cpi_vendor) { 2046 case X86_VENDOR_Intel: 2047 break; 2048 2049 case X86_VENDOR_AMD: 2050 if ((x86_feature & X86_TSCP) == 0) 2051 *edx &= ~CPUID_AMD_EDX_TSCP; 2052 if ((x86_feature & X86_SSE4A) == 0) 2053 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2054 break; 2055 2056 default: 2057 break; 2058 } 2059 2060 /* 2061 * [no explicit support required beyond 2062 * x87 fp context and exception handlers] 2063 */ 2064 if (!fpu_exists) 2065 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2066 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2067 2068 if ((x86_feature & X86_NX) == 0) 2069 *edx &= ~CPUID_AMD_EDX_NX; 2070 #if !defined(__amd64) 2071 *edx &= ~CPUID_AMD_EDX_LM; 2072 #endif 2073 /* 2074 * Now map the supported feature vector to 2075 * things that we think userland will care about. 2076 */ 2077 #if defined(__amd64) 2078 if (*edx & CPUID_AMD_EDX_SYSC) 2079 hwcap_flags |= AV_386_AMD_SYSC; 2080 #endif 2081 if (*edx & CPUID_AMD_EDX_MMXamd) 2082 hwcap_flags |= AV_386_AMD_MMX; 2083 if (*edx & CPUID_AMD_EDX_3DNow) 2084 hwcap_flags |= AV_386_AMD_3DNow; 2085 if (*edx & CPUID_AMD_EDX_3DNowx) 2086 hwcap_flags |= AV_386_AMD_3DNowx; 2087 2088 switch (cpi->cpi_vendor) { 2089 case X86_VENDOR_AMD: 2090 if (*edx & CPUID_AMD_EDX_TSCP) 2091 hwcap_flags |= AV_386_TSCP; 2092 if (*ecx & CPUID_AMD_ECX_AHF64) 2093 hwcap_flags |= AV_386_AHF; 2094 if (*ecx & CPUID_AMD_ECX_SSE4A) 2095 hwcap_flags |= AV_386_AMD_SSE4A; 2096 if (*ecx & CPUID_AMD_ECX_LZCNT) 2097 hwcap_flags |= AV_386_AMD_LZCNT; 2098 break; 2099 2100 case X86_VENDOR_Intel: 2101 /* 2102 * Aarrgh. 2103 * Intel uses a different bit in the same word. 2104 */ 2105 if (*ecx & CPUID_INTC_ECX_AHF64) 2106 hwcap_flags |= AV_386_AHF; 2107 break; 2108 2109 default: 2110 break; 2111 } 2112 break; 2113 2114 case X86_VENDOR_TM: 2115 cp.cp_eax = 0x80860001; 2116 (void) __cpuid_insn(&cp); 2117 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2118 break; 2119 2120 default: 2121 break; 2122 } 2123 2124 pass4_done: 2125 cpi->cpi_pass = 4; 2126 return (hwcap_flags); 2127 } 2128 2129 2130 /* 2131 * Simulate the cpuid instruction using the data we previously 2132 * captured about this CPU. We try our best to return the truth 2133 * about the hardware, independently of kernel support. 2134 */ 2135 uint32_t 2136 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2137 { 2138 struct cpuid_info *cpi; 2139 struct cpuid_regs *xcp; 2140 2141 if (cpu == NULL) 2142 cpu = CPU; 2143 cpi = cpu->cpu_m.mcpu_cpi; 2144 2145 ASSERT(cpuid_checkpass(cpu, 3)); 2146 2147 /* 2148 * CPUID data is cached in two separate places: cpi_std for standard 2149 * CPUID functions, and cpi_extd for extended CPUID functions. 2150 */ 2151 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2152 xcp = &cpi->cpi_std[cp->cp_eax]; 2153 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2154 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2155 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2156 else 2157 /* 2158 * The caller is asking for data from an input parameter which 2159 * the kernel has not cached. In this case we go fetch from 2160 * the hardware and return the data directly to the user. 2161 */ 2162 return (__cpuid_insn(cp)); 2163 2164 cp->cp_eax = xcp->cp_eax; 2165 cp->cp_ebx = xcp->cp_ebx; 2166 cp->cp_ecx = xcp->cp_ecx; 2167 cp->cp_edx = xcp->cp_edx; 2168 return (cp->cp_eax); 2169 } 2170 2171 int 2172 cpuid_checkpass(cpu_t *cpu, int pass) 2173 { 2174 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2175 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2176 } 2177 2178 int 2179 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2180 { 2181 ASSERT(cpuid_checkpass(cpu, 3)); 2182 2183 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2184 } 2185 2186 int 2187 cpuid_is_cmt(cpu_t *cpu) 2188 { 2189 if (cpu == NULL) 2190 cpu = CPU; 2191 2192 ASSERT(cpuid_checkpass(cpu, 1)); 2193 2194 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2195 } 2196 2197 /* 2198 * AMD and Intel both implement the 64-bit variant of the syscall 2199 * instruction (syscallq), so if there's -any- support for syscall, 2200 * cpuid currently says "yes, we support this". 2201 * 2202 * However, Intel decided to -not- implement the 32-bit variant of the 2203 * syscall instruction, so we provide a predicate to allow our caller 2204 * to test that subtlety here. 2205 * 2206 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2207 * even in the case where the hardware would in fact support it. 2208 */ 2209 /*ARGSUSED*/ 2210 int 2211 cpuid_syscall32_insn(cpu_t *cpu) 2212 { 2213 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2214 2215 #if !defined(__xpv) 2216 if (cpu == NULL) 2217 cpu = CPU; 2218 2219 /*CSTYLED*/ 2220 { 2221 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2222 2223 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2224 cpi->cpi_xmaxeax >= 0x80000001 && 2225 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2226 return (1); 2227 } 2228 #endif 2229 return (0); 2230 } 2231 2232 int 2233 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2234 { 2235 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2236 2237 static const char fmt[] = 2238 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2239 static const char fmt_ht[] = 2240 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2241 2242 ASSERT(cpuid_checkpass(cpu, 1)); 2243 2244 if (cpuid_is_cmt(cpu)) 2245 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2246 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2247 cpi->cpi_family, cpi->cpi_model, 2248 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2249 return (snprintf(s, n, fmt, 2250 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2251 cpi->cpi_family, cpi->cpi_model, 2252 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2253 } 2254 2255 const char * 2256 cpuid_getvendorstr(cpu_t *cpu) 2257 { 2258 ASSERT(cpuid_checkpass(cpu, 1)); 2259 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2260 } 2261 2262 uint_t 2263 cpuid_getvendor(cpu_t *cpu) 2264 { 2265 ASSERT(cpuid_checkpass(cpu, 1)); 2266 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2267 } 2268 2269 uint_t 2270 cpuid_getfamily(cpu_t *cpu) 2271 { 2272 ASSERT(cpuid_checkpass(cpu, 1)); 2273 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2274 } 2275 2276 uint_t 2277 cpuid_getmodel(cpu_t *cpu) 2278 { 2279 ASSERT(cpuid_checkpass(cpu, 1)); 2280 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2281 } 2282 2283 uint_t 2284 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2285 { 2286 ASSERT(cpuid_checkpass(cpu, 1)); 2287 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2288 } 2289 2290 uint_t 2291 cpuid_get_ncore_per_chip(cpu_t *cpu) 2292 { 2293 ASSERT(cpuid_checkpass(cpu, 1)); 2294 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2295 } 2296 2297 uint_t 2298 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2299 { 2300 ASSERT(cpuid_checkpass(cpu, 2)); 2301 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2302 } 2303 2304 id_t 2305 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2306 { 2307 ASSERT(cpuid_checkpass(cpu, 2)); 2308 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2309 } 2310 2311 uint_t 2312 cpuid_getstep(cpu_t *cpu) 2313 { 2314 ASSERT(cpuid_checkpass(cpu, 1)); 2315 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2316 } 2317 2318 uint_t 2319 cpuid_getsig(struct cpu *cpu) 2320 { 2321 ASSERT(cpuid_checkpass(cpu, 1)); 2322 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2323 } 2324 2325 uint32_t 2326 cpuid_getchiprev(struct cpu *cpu) 2327 { 2328 ASSERT(cpuid_checkpass(cpu, 1)); 2329 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2330 } 2331 2332 const char * 2333 cpuid_getchiprevstr(struct cpu *cpu) 2334 { 2335 ASSERT(cpuid_checkpass(cpu, 1)); 2336 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2337 } 2338 2339 uint32_t 2340 cpuid_getsockettype(struct cpu *cpu) 2341 { 2342 ASSERT(cpuid_checkpass(cpu, 1)); 2343 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2344 } 2345 2346 int 2347 cpuid_get_chipid(cpu_t *cpu) 2348 { 2349 ASSERT(cpuid_checkpass(cpu, 1)); 2350 2351 if (cpuid_is_cmt(cpu)) 2352 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2353 return (cpu->cpu_id); 2354 } 2355 2356 id_t 2357 cpuid_get_coreid(cpu_t *cpu) 2358 { 2359 ASSERT(cpuid_checkpass(cpu, 1)); 2360 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2361 } 2362 2363 int 2364 cpuid_get_clogid(cpu_t *cpu) 2365 { 2366 ASSERT(cpuid_checkpass(cpu, 1)); 2367 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2368 } 2369 2370 void 2371 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2372 { 2373 struct cpuid_info *cpi; 2374 2375 if (cpu == NULL) 2376 cpu = CPU; 2377 cpi = cpu->cpu_m.mcpu_cpi; 2378 2379 ASSERT(cpuid_checkpass(cpu, 1)); 2380 2381 if (pabits) 2382 *pabits = cpi->cpi_pabits; 2383 if (vabits) 2384 *vabits = cpi->cpi_vabits; 2385 } 2386 2387 /* 2388 * Returns the number of data TLB entries for a corresponding 2389 * pagesize. If it can't be computed, or isn't known, the 2390 * routine returns zero. If you ask about an architecturally 2391 * impossible pagesize, the routine will panic (so that the 2392 * hat implementor knows that things are inconsistent.) 2393 */ 2394 uint_t 2395 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2396 { 2397 struct cpuid_info *cpi; 2398 uint_t dtlb_nent = 0; 2399 2400 if (cpu == NULL) 2401 cpu = CPU; 2402 cpi = cpu->cpu_m.mcpu_cpi; 2403 2404 ASSERT(cpuid_checkpass(cpu, 1)); 2405 2406 /* 2407 * Check the L2 TLB info 2408 */ 2409 if (cpi->cpi_xmaxeax >= 0x80000006) { 2410 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2411 2412 switch (pagesize) { 2413 2414 case 4 * 1024: 2415 /* 2416 * All zero in the top 16 bits of the register 2417 * indicates a unified TLB. Size is in low 16 bits. 2418 */ 2419 if ((cp->cp_ebx & 0xffff0000) == 0) 2420 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2421 else 2422 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2423 break; 2424 2425 case 2 * 1024 * 1024: 2426 if ((cp->cp_eax & 0xffff0000) == 0) 2427 dtlb_nent = cp->cp_eax & 0x0000ffff; 2428 else 2429 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2430 break; 2431 2432 default: 2433 panic("unknown L2 pagesize"); 2434 /*NOTREACHED*/ 2435 } 2436 } 2437 2438 if (dtlb_nent != 0) 2439 return (dtlb_nent); 2440 2441 /* 2442 * No L2 TLB support for this size, try L1. 2443 */ 2444 if (cpi->cpi_xmaxeax >= 0x80000005) { 2445 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2446 2447 switch (pagesize) { 2448 case 4 * 1024: 2449 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2450 break; 2451 case 2 * 1024 * 1024: 2452 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2453 break; 2454 default: 2455 panic("unknown L1 d-TLB pagesize"); 2456 /*NOTREACHED*/ 2457 } 2458 } 2459 2460 return (dtlb_nent); 2461 } 2462 2463 /* 2464 * Return 0 if the erratum is not present or not applicable, positive 2465 * if it is, and negative if the status of the erratum is unknown. 2466 * 2467 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2468 * Processors" #25759, Rev 3.57, August 2005 2469 */ 2470 int 2471 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2472 { 2473 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2474 uint_t eax; 2475 2476 /* 2477 * Bail out if this CPU isn't an AMD CPU, or if it's 2478 * a legacy (32-bit) AMD CPU. 2479 */ 2480 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2481 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2482 cpi->cpi_family == 6) 2483 2484 return (0); 2485 2486 eax = cpi->cpi_std[1].cp_eax; 2487 2488 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2489 #define SH_B3(eax) (eax == 0xf51) 2490 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2491 2492 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2493 2494 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2495 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2496 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2497 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2498 2499 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2500 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2501 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2502 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2503 2504 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2505 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2506 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2507 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2508 #define BH_E4(eax) (eax == 0x20fb1) 2509 #define SH_E5(eax) (eax == 0x20f42) 2510 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2511 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2512 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2513 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2514 DH_E6(eax) || JH_E6(eax)) 2515 2516 switch (erratum) { 2517 case 1: 2518 return (cpi->cpi_family < 0x10); 2519 case 51: /* what does the asterisk mean? */ 2520 return (B(eax) || SH_C0(eax) || CG(eax)); 2521 case 52: 2522 return (B(eax)); 2523 case 57: 2524 return (cpi->cpi_family <= 0x10); 2525 case 58: 2526 return (B(eax)); 2527 case 60: 2528 return (cpi->cpi_family <= 0x10); 2529 case 61: 2530 case 62: 2531 case 63: 2532 case 64: 2533 case 65: 2534 case 66: 2535 case 68: 2536 case 69: 2537 case 70: 2538 case 71: 2539 return (B(eax)); 2540 case 72: 2541 return (SH_B0(eax)); 2542 case 74: 2543 return (B(eax)); 2544 case 75: 2545 return (cpi->cpi_family < 0x10); 2546 case 76: 2547 return (B(eax)); 2548 case 77: 2549 return (cpi->cpi_family <= 0x10); 2550 case 78: 2551 return (B(eax) || SH_C0(eax)); 2552 case 79: 2553 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2554 case 80: 2555 case 81: 2556 case 82: 2557 return (B(eax)); 2558 case 83: 2559 return (B(eax) || SH_C0(eax) || CG(eax)); 2560 case 85: 2561 return (cpi->cpi_family < 0x10); 2562 case 86: 2563 return (SH_C0(eax) || CG(eax)); 2564 case 88: 2565 #if !defined(__amd64) 2566 return (0); 2567 #else 2568 return (B(eax) || SH_C0(eax)); 2569 #endif 2570 case 89: 2571 return (cpi->cpi_family < 0x10); 2572 case 90: 2573 return (B(eax) || SH_C0(eax) || CG(eax)); 2574 case 91: 2575 case 92: 2576 return (B(eax) || SH_C0(eax)); 2577 case 93: 2578 return (SH_C0(eax)); 2579 case 94: 2580 return (B(eax) || SH_C0(eax) || CG(eax)); 2581 case 95: 2582 #if !defined(__amd64) 2583 return (0); 2584 #else 2585 return (B(eax) || SH_C0(eax)); 2586 #endif 2587 case 96: 2588 return (B(eax) || SH_C0(eax) || CG(eax)); 2589 case 97: 2590 case 98: 2591 return (SH_C0(eax) || CG(eax)); 2592 case 99: 2593 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2594 case 100: 2595 return (B(eax) || SH_C0(eax)); 2596 case 101: 2597 case 103: 2598 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2599 case 104: 2600 return (SH_C0(eax) || CG(eax) || D0(eax)); 2601 case 105: 2602 case 106: 2603 case 107: 2604 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2605 case 108: 2606 return (DH_CG(eax)); 2607 case 109: 2608 return (SH_C0(eax) || CG(eax) || D0(eax)); 2609 case 110: 2610 return (D0(eax) || EX(eax)); 2611 case 111: 2612 return (CG(eax)); 2613 case 112: 2614 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2615 case 113: 2616 return (eax == 0x20fc0); 2617 case 114: 2618 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2619 case 115: 2620 return (SH_E0(eax) || JH_E1(eax)); 2621 case 116: 2622 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2623 case 117: 2624 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2625 case 118: 2626 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2627 JH_E6(eax)); 2628 case 121: 2629 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2630 case 122: 2631 return (cpi->cpi_family < 0x10); 2632 case 123: 2633 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2634 case 131: 2635 return (cpi->cpi_family < 0x10); 2636 case 6336786: 2637 /* 2638 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2639 * if this is a K8 family or newer processor 2640 */ 2641 if (CPI_FAMILY(cpi) == 0xf) { 2642 struct cpuid_regs regs; 2643 regs.cp_eax = 0x80000007; 2644 (void) __cpuid_insn(®s); 2645 return (!(regs.cp_edx & 0x100)); 2646 } 2647 return (0); 2648 case 6323525: 2649 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2650 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2651 2652 default: 2653 return (-1); 2654 } 2655 } 2656 2657 static const char assoc_str[] = "associativity"; 2658 static const char line_str[] = "line-size"; 2659 static const char size_str[] = "size"; 2660 2661 static void 2662 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2663 uint32_t val) 2664 { 2665 char buf[128]; 2666 2667 /* 2668 * ndi_prop_update_int() is used because it is desirable for 2669 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2670 */ 2671 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2672 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2673 } 2674 2675 /* 2676 * Intel-style cache/tlb description 2677 * 2678 * Standard cpuid level 2 gives a randomly ordered 2679 * selection of tags that index into a table that describes 2680 * cache and tlb properties. 2681 */ 2682 2683 static const char l1_icache_str[] = "l1-icache"; 2684 static const char l1_dcache_str[] = "l1-dcache"; 2685 static const char l2_cache_str[] = "l2-cache"; 2686 static const char l3_cache_str[] = "l3-cache"; 2687 static const char itlb4k_str[] = "itlb-4K"; 2688 static const char dtlb4k_str[] = "dtlb-4K"; 2689 static const char itlb4M_str[] = "itlb-4M"; 2690 static const char dtlb4M_str[] = "dtlb-4M"; 2691 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2692 static const char dtlb44_str[] = "dtlb-4K-4M"; 2693 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2694 static const char sl2_cache_str[] = "sectored-l2-cache"; 2695 static const char itrace_str[] = "itrace-cache"; 2696 static const char sl3_cache_str[] = "sectored-l3-cache"; 2697 2698 static const struct cachetab { 2699 uint8_t ct_code; 2700 uint8_t ct_assoc; 2701 uint16_t ct_line_size; 2702 size_t ct_size; 2703 const char *ct_label; 2704 } intel_ctab[] = { 2705 /* maintain descending order! */ 2706 { 0xb4, 4, 0, 256, dtlb4k_str }, 2707 { 0xb3, 4, 0, 128, dtlb4k_str }, 2708 { 0xb0, 4, 0, 128, itlb4k_str }, 2709 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2710 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2711 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2712 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2713 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2714 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2715 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2716 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2717 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2718 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2719 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2720 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2721 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2722 { 0x73, 8, 0, 64*1024, itrace_str}, 2723 { 0x72, 8, 0, 32*1024, itrace_str}, 2724 { 0x71, 8, 0, 16*1024, itrace_str}, 2725 { 0x70, 8, 0, 12*1024, itrace_str}, 2726 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2727 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2728 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2729 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2730 { 0x5d, 0, 0, 256, dtlb44_str}, 2731 { 0x5c, 0, 0, 128, dtlb44_str}, 2732 { 0x5b, 0, 0, 64, dtlb44_str}, 2733 { 0x52, 0, 0, 256, itlb424_str}, 2734 { 0x51, 0, 0, 128, itlb424_str}, 2735 { 0x50, 0, 0, 64, itlb424_str}, 2736 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 2737 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 2738 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 2739 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 2740 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 2741 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 2742 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 2743 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 2744 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 2745 { 0x43, 4, 32, 512*1024, l2_cache_str}, 2746 { 0x42, 4, 32, 256*1024, l2_cache_str}, 2747 { 0x41, 4, 32, 128*1024, l2_cache_str}, 2748 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 2749 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 2750 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 2751 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 2752 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 2753 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 2754 { 0x30, 8, 64, 32*1024, l1_icache_str}, 2755 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 2756 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 2757 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 2758 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 2759 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 2760 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 2761 { 0x0b, 4, 0, 4, itlb4M_str}, 2762 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 2763 { 0x08, 4, 32, 16*1024, l1_icache_str}, 2764 { 0x06, 4, 32, 8*1024, l1_icache_str}, 2765 { 0x04, 4, 0, 8, dtlb4M_str}, 2766 { 0x03, 4, 0, 64, dtlb4k_str}, 2767 { 0x02, 4, 0, 2, itlb4M_str}, 2768 { 0x01, 4, 0, 32, itlb4k_str}, 2769 { 0 } 2770 }; 2771 2772 static const struct cachetab cyrix_ctab[] = { 2773 { 0x70, 4, 0, 32, "tlb-4K" }, 2774 { 0x80, 4, 16, 16*1024, "l1-cache" }, 2775 { 0 } 2776 }; 2777 2778 /* 2779 * Search a cache table for a matching entry 2780 */ 2781 static const struct cachetab * 2782 find_cacheent(const struct cachetab *ct, uint_t code) 2783 { 2784 if (code != 0) { 2785 for (; ct->ct_code != 0; ct++) 2786 if (ct->ct_code <= code) 2787 break; 2788 if (ct->ct_code == code) 2789 return (ct); 2790 } 2791 return (NULL); 2792 } 2793 2794 /* 2795 * Walk the cacheinfo descriptor, applying 'func' to every valid element 2796 * The walk is terminated if the walker returns non-zero. 2797 */ 2798 static void 2799 intel_walk_cacheinfo(struct cpuid_info *cpi, 2800 void *arg, int (*func)(void *, const struct cachetab *)) 2801 { 2802 const struct cachetab *ct; 2803 uint8_t *dp; 2804 int i; 2805 2806 if ((dp = cpi->cpi_cacheinfo) == NULL) 2807 return; 2808 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2809 /* 2810 * For overloaded descriptor 0x49 we use cpuid function 4 2811 * if supported by the current processor, to update 2812 * cache information. 2813 */ 2814 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4) { 2815 intel_cpuid_4_cache_info(arg, cpi); 2816 continue; 2817 } 2818 2819 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2820 if (func(arg, ct) != 0) 2821 break; 2822 } 2823 } 2824 } 2825 2826 /* 2827 * (Like the Intel one, except for Cyrix CPUs) 2828 */ 2829 static void 2830 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 2831 void *arg, int (*func)(void *, const struct cachetab *)) 2832 { 2833 const struct cachetab *ct; 2834 uint8_t *dp; 2835 int i; 2836 2837 if ((dp = cpi->cpi_cacheinfo) == NULL) 2838 return; 2839 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2840 /* 2841 * Search Cyrix-specific descriptor table first .. 2842 */ 2843 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 2844 if (func(arg, ct) != 0) 2845 break; 2846 continue; 2847 } 2848 /* 2849 * .. else fall back to the Intel one 2850 */ 2851 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2852 if (func(arg, ct) != 0) 2853 break; 2854 continue; 2855 } 2856 } 2857 } 2858 2859 /* 2860 * A cacheinfo walker that adds associativity, line-size, and size properties 2861 * to the devinfo node it is passed as an argument. 2862 */ 2863 static int 2864 add_cacheent_props(void *arg, const struct cachetab *ct) 2865 { 2866 dev_info_t *devi = arg; 2867 2868 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 2869 if (ct->ct_line_size != 0) 2870 add_cache_prop(devi, ct->ct_label, line_str, 2871 ct->ct_line_size); 2872 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 2873 return (0); 2874 } 2875 2876 /* 2877 * Add L2 or L3 cache-information using cpuid function 4. This 2878 * function is called from intel_walk_cacheinfo() when descriptor 2879 * 0x49 is encountered. 2880 */ 2881 static void 2882 intel_cpuid_4_cache_info(void *arg, struct cpuid_info *cpi) 2883 { 2884 uint32_t level, i; 2885 2886 struct cachetab ct; 2887 2888 for (i = 0; i < cpi->cpi_std_4_size; i++) { 2889 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 2890 2891 if (level == 2 || level == 3) { 2892 ct.ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 2893 ct.ct_line_size = 2894 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 2895 ct.ct_size = ct.ct_assoc * 2896 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 2897 ct.ct_line_size * 2898 (cpi->cpi_std_4[i]->cp_ecx + 1); 2899 2900 if (level == 2) { 2901 ct.ct_label = l2_cache_str; 2902 } else if (level == 3) { 2903 ct.ct_label = l3_cache_str; 2904 } 2905 2906 (void) add_cacheent_props(arg, 2907 (const struct cachetab *) (&ct)); 2908 } 2909 } 2910 } 2911 2912 static const char fully_assoc[] = "fully-associative?"; 2913 2914 /* 2915 * AMD style cache/tlb description 2916 * 2917 * Extended functions 5 and 6 directly describe properties of 2918 * tlbs and various cache levels. 2919 */ 2920 static void 2921 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 2922 { 2923 switch (assoc) { 2924 case 0: /* reserved; ignore */ 2925 break; 2926 default: 2927 add_cache_prop(devi, label, assoc_str, assoc); 2928 break; 2929 case 0xff: 2930 add_cache_prop(devi, label, fully_assoc, 1); 2931 break; 2932 } 2933 } 2934 2935 static void 2936 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 2937 { 2938 if (size == 0) 2939 return; 2940 add_cache_prop(devi, label, size_str, size); 2941 add_amd_assoc(devi, label, assoc); 2942 } 2943 2944 static void 2945 add_amd_cache(dev_info_t *devi, const char *label, 2946 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 2947 { 2948 if (size == 0 || line_size == 0) 2949 return; 2950 add_amd_assoc(devi, label, assoc); 2951 /* 2952 * Most AMD parts have a sectored cache. Multiple cache lines are 2953 * associated with each tag. A sector consists of all cache lines 2954 * associated with a tag. For example, the AMD K6-III has a sector 2955 * size of 2 cache lines per tag. 2956 */ 2957 if (lines_per_tag != 0) 2958 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 2959 add_cache_prop(devi, label, line_str, line_size); 2960 add_cache_prop(devi, label, size_str, size * 1024); 2961 } 2962 2963 static void 2964 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 2965 { 2966 switch (assoc) { 2967 case 0: /* off */ 2968 break; 2969 case 1: 2970 case 2: 2971 case 4: 2972 add_cache_prop(devi, label, assoc_str, assoc); 2973 break; 2974 case 6: 2975 add_cache_prop(devi, label, assoc_str, 8); 2976 break; 2977 case 8: 2978 add_cache_prop(devi, label, assoc_str, 16); 2979 break; 2980 case 0xf: 2981 add_cache_prop(devi, label, fully_assoc, 1); 2982 break; 2983 default: /* reserved; ignore */ 2984 break; 2985 } 2986 } 2987 2988 static void 2989 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 2990 { 2991 if (size == 0 || assoc == 0) 2992 return; 2993 add_amd_l2_assoc(devi, label, assoc); 2994 add_cache_prop(devi, label, size_str, size); 2995 } 2996 2997 static void 2998 add_amd_l2_cache(dev_info_t *devi, const char *label, 2999 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3000 { 3001 if (size == 0 || assoc == 0 || line_size == 0) 3002 return; 3003 add_amd_l2_assoc(devi, label, assoc); 3004 if (lines_per_tag != 0) 3005 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3006 add_cache_prop(devi, label, line_str, line_size); 3007 add_cache_prop(devi, label, size_str, size * 1024); 3008 } 3009 3010 static void 3011 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3012 { 3013 struct cpuid_regs *cp; 3014 3015 if (cpi->cpi_xmaxeax < 0x80000005) 3016 return; 3017 cp = &cpi->cpi_extd[5]; 3018 3019 /* 3020 * 4M/2M L1 TLB configuration 3021 * 3022 * We report the size for 2M pages because AMD uses two 3023 * TLB entries for one 4M page. 3024 */ 3025 add_amd_tlb(devi, "dtlb-2M", 3026 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3027 add_amd_tlb(devi, "itlb-2M", 3028 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3029 3030 /* 3031 * 4K L1 TLB configuration 3032 */ 3033 3034 switch (cpi->cpi_vendor) { 3035 uint_t nentries; 3036 case X86_VENDOR_TM: 3037 if (cpi->cpi_family >= 5) { 3038 /* 3039 * Crusoe processors have 256 TLB entries, but 3040 * cpuid data format constrains them to only 3041 * reporting 255 of them. 3042 */ 3043 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3044 nentries = 256; 3045 /* 3046 * Crusoe processors also have a unified TLB 3047 */ 3048 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3049 nentries); 3050 break; 3051 } 3052 /*FALLTHROUGH*/ 3053 default: 3054 add_amd_tlb(devi, itlb4k_str, 3055 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3056 add_amd_tlb(devi, dtlb4k_str, 3057 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3058 break; 3059 } 3060 3061 /* 3062 * data L1 cache configuration 3063 */ 3064 3065 add_amd_cache(devi, l1_dcache_str, 3066 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3067 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3068 3069 /* 3070 * code L1 cache configuration 3071 */ 3072 3073 add_amd_cache(devi, l1_icache_str, 3074 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3075 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3076 3077 if (cpi->cpi_xmaxeax < 0x80000006) 3078 return; 3079 cp = &cpi->cpi_extd[6]; 3080 3081 /* Check for a unified L2 TLB for large pages */ 3082 3083 if (BITX(cp->cp_eax, 31, 16) == 0) 3084 add_amd_l2_tlb(devi, "l2-tlb-2M", 3085 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3086 else { 3087 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3088 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3089 add_amd_l2_tlb(devi, "l2-itlb-2M", 3090 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3091 } 3092 3093 /* Check for a unified L2 TLB for 4K pages */ 3094 3095 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3096 add_amd_l2_tlb(devi, "l2-tlb-4K", 3097 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3098 } else { 3099 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3100 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3101 add_amd_l2_tlb(devi, "l2-itlb-4K", 3102 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3103 } 3104 3105 add_amd_l2_cache(devi, l2_cache_str, 3106 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3107 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3108 } 3109 3110 /* 3111 * There are two basic ways that the x86 world describes it cache 3112 * and tlb architecture - Intel's way and AMD's way. 3113 * 3114 * Return which flavor of cache architecture we should use 3115 */ 3116 static int 3117 x86_which_cacheinfo(struct cpuid_info *cpi) 3118 { 3119 switch (cpi->cpi_vendor) { 3120 case X86_VENDOR_Intel: 3121 if (cpi->cpi_maxeax >= 2) 3122 return (X86_VENDOR_Intel); 3123 break; 3124 case X86_VENDOR_AMD: 3125 /* 3126 * The K5 model 1 was the first part from AMD that reported 3127 * cache sizes via extended cpuid functions. 3128 */ 3129 if (cpi->cpi_family > 5 || 3130 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3131 return (X86_VENDOR_AMD); 3132 break; 3133 case X86_VENDOR_TM: 3134 if (cpi->cpi_family >= 5) 3135 return (X86_VENDOR_AMD); 3136 /*FALLTHROUGH*/ 3137 default: 3138 /* 3139 * If they have extended CPU data for 0x80000005 3140 * then we assume they have AMD-format cache 3141 * information. 3142 * 3143 * If not, and the vendor happens to be Cyrix, 3144 * then try our-Cyrix specific handler. 3145 * 3146 * If we're not Cyrix, then assume we're using Intel's 3147 * table-driven format instead. 3148 */ 3149 if (cpi->cpi_xmaxeax >= 0x80000005) 3150 return (X86_VENDOR_AMD); 3151 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3152 return (X86_VENDOR_Cyrix); 3153 else if (cpi->cpi_maxeax >= 2) 3154 return (X86_VENDOR_Intel); 3155 break; 3156 } 3157 return (-1); 3158 } 3159 3160 /* 3161 * create a node for the given cpu under the prom root node. 3162 * Also, create a cpu node in the device tree. 3163 */ 3164 static dev_info_t *cpu_nex_devi = NULL; 3165 static kmutex_t cpu_node_lock; 3166 3167 /* 3168 * Called from post_startup() and mp_startup() 3169 */ 3170 void 3171 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3172 { 3173 dev_info_t *cpu_devi; 3174 int create; 3175 3176 mutex_enter(&cpu_node_lock); 3177 3178 /* 3179 * create a nexus node for all cpus identified as 'cpu_id' under 3180 * the root node. 3181 */ 3182 if (cpu_nex_devi == NULL) { 3183 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3184 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3185 mutex_exit(&cpu_node_lock); 3186 return; 3187 } 3188 (void) ndi_devi_online(cpu_nex_devi, 0); 3189 } 3190 3191 /* 3192 * create a child node for cpu identified as 'cpu_id' 3193 */ 3194 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3195 cpu_id); 3196 if (cpu_devi == NULL) { 3197 mutex_exit(&cpu_node_lock); 3198 return; 3199 } 3200 3201 /* device_type */ 3202 3203 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3204 "device_type", "cpu"); 3205 3206 /* reg */ 3207 3208 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3209 "reg", cpu_id); 3210 3211 /* cpu-mhz, and clock-frequency */ 3212 3213 if (cpu_freq > 0) { 3214 long long mul; 3215 3216 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3217 "cpu-mhz", cpu_freq); 3218 3219 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3220 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3221 "clock-frequency", (int)mul); 3222 } 3223 3224 (void) ndi_devi_online(cpu_devi, 0); 3225 3226 if ((x86_feature & X86_CPUID) == 0) { 3227 mutex_exit(&cpu_node_lock); 3228 return; 3229 } 3230 3231 /* vendor-id */ 3232 3233 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3234 "vendor-id", cpi->cpi_vendorstr); 3235 3236 if (cpi->cpi_maxeax == 0) { 3237 mutex_exit(&cpu_node_lock); 3238 return; 3239 } 3240 3241 /* 3242 * family, model, and step 3243 */ 3244 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3245 "family", CPI_FAMILY(cpi)); 3246 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3247 "cpu-model", CPI_MODEL(cpi)); 3248 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3249 "stepping-id", CPI_STEP(cpi)); 3250 3251 /* type */ 3252 3253 switch (cpi->cpi_vendor) { 3254 case X86_VENDOR_Intel: 3255 create = 1; 3256 break; 3257 default: 3258 create = 0; 3259 break; 3260 } 3261 if (create) 3262 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3263 "type", CPI_TYPE(cpi)); 3264 3265 /* ext-family */ 3266 3267 switch (cpi->cpi_vendor) { 3268 case X86_VENDOR_Intel: 3269 case X86_VENDOR_AMD: 3270 create = cpi->cpi_family >= 0xf; 3271 break; 3272 default: 3273 create = 0; 3274 break; 3275 } 3276 if (create) 3277 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3278 "ext-family", CPI_FAMILY_XTD(cpi)); 3279 3280 /* ext-model */ 3281 3282 switch (cpi->cpi_vendor) { 3283 case X86_VENDOR_Intel: 3284 create = CPI_MODEL(cpi) == 0xf; 3285 break; 3286 case X86_VENDOR_AMD: 3287 create = CPI_FAMILY(cpi) == 0xf; 3288 break; 3289 default: 3290 create = 0; 3291 break; 3292 } 3293 if (create) 3294 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3295 "ext-model", CPI_MODEL_XTD(cpi)); 3296 3297 /* generation */ 3298 3299 switch (cpi->cpi_vendor) { 3300 case X86_VENDOR_AMD: 3301 /* 3302 * AMD K5 model 1 was the first part to support this 3303 */ 3304 create = cpi->cpi_xmaxeax >= 0x80000001; 3305 break; 3306 default: 3307 create = 0; 3308 break; 3309 } 3310 if (create) 3311 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3312 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3313 3314 /* brand-id */ 3315 3316 switch (cpi->cpi_vendor) { 3317 case X86_VENDOR_Intel: 3318 /* 3319 * brand id first appeared on Pentium III Xeon model 8, 3320 * and Celeron model 8 processors and Opteron 3321 */ 3322 create = cpi->cpi_family > 6 || 3323 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3324 break; 3325 case X86_VENDOR_AMD: 3326 create = cpi->cpi_family >= 0xf; 3327 break; 3328 default: 3329 create = 0; 3330 break; 3331 } 3332 if (create && cpi->cpi_brandid != 0) { 3333 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3334 "brand-id", cpi->cpi_brandid); 3335 } 3336 3337 /* chunks, and apic-id */ 3338 3339 switch (cpi->cpi_vendor) { 3340 /* 3341 * first available on Pentium IV and Opteron (K8) 3342 */ 3343 case X86_VENDOR_Intel: 3344 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3345 break; 3346 case X86_VENDOR_AMD: 3347 create = cpi->cpi_family >= 0xf; 3348 break; 3349 default: 3350 create = 0; 3351 break; 3352 } 3353 if (create) { 3354 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3355 "chunks", CPI_CHUNKS(cpi)); 3356 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3357 "apic-id", CPI_APIC_ID(cpi)); 3358 if (cpi->cpi_chipid >= 0) { 3359 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3360 "chip#", cpi->cpi_chipid); 3361 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3362 "clog#", cpi->cpi_clogid); 3363 } 3364 } 3365 3366 /* cpuid-features */ 3367 3368 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3369 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3370 3371 3372 /* cpuid-features-ecx */ 3373 3374 switch (cpi->cpi_vendor) { 3375 case X86_VENDOR_Intel: 3376 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3377 break; 3378 default: 3379 create = 0; 3380 break; 3381 } 3382 if (create) 3383 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3384 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3385 3386 /* ext-cpuid-features */ 3387 3388 switch (cpi->cpi_vendor) { 3389 case X86_VENDOR_Intel: 3390 case X86_VENDOR_AMD: 3391 case X86_VENDOR_Cyrix: 3392 case X86_VENDOR_TM: 3393 case X86_VENDOR_Centaur: 3394 create = cpi->cpi_xmaxeax >= 0x80000001; 3395 break; 3396 default: 3397 create = 0; 3398 break; 3399 } 3400 if (create) { 3401 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3402 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3403 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3404 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3405 } 3406 3407 /* 3408 * Brand String first appeared in Intel Pentium IV, AMD K5 3409 * model 1, and Cyrix GXm. On earlier models we try and 3410 * simulate something similar .. so this string should always 3411 * same -something- about the processor, however lame. 3412 */ 3413 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3414 "brand-string", cpi->cpi_brandstr); 3415 3416 /* 3417 * Finally, cache and tlb information 3418 */ 3419 switch (x86_which_cacheinfo(cpi)) { 3420 case X86_VENDOR_Intel: 3421 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3422 break; 3423 case X86_VENDOR_Cyrix: 3424 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3425 break; 3426 case X86_VENDOR_AMD: 3427 amd_cache_info(cpi, cpu_devi); 3428 break; 3429 default: 3430 break; 3431 } 3432 3433 mutex_exit(&cpu_node_lock); 3434 } 3435 3436 struct l2info { 3437 int *l2i_csz; 3438 int *l2i_lsz; 3439 int *l2i_assoc; 3440 int l2i_ret; 3441 }; 3442 3443 /* 3444 * A cacheinfo walker that fetches the size, line-size and associativity 3445 * of the L2 cache 3446 */ 3447 static int 3448 intel_l2cinfo(void *arg, const struct cachetab *ct) 3449 { 3450 struct l2info *l2i = arg; 3451 int *ip; 3452 3453 if (ct->ct_label != l2_cache_str && 3454 ct->ct_label != sl2_cache_str) 3455 return (0); /* not an L2 -- keep walking */ 3456 3457 if ((ip = l2i->l2i_csz) != NULL) 3458 *ip = ct->ct_size; 3459 if ((ip = l2i->l2i_lsz) != NULL) 3460 *ip = ct->ct_line_size; 3461 if ((ip = l2i->l2i_assoc) != NULL) 3462 *ip = ct->ct_assoc; 3463 l2i->l2i_ret = ct->ct_size; 3464 return (1); /* was an L2 -- terminate walk */ 3465 } 3466 3467 /* 3468 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3469 * 3470 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3471 * value is the associativity, the associativity for the L2 cache and 3472 * tlb is encoded in the following table. The 4 bit L2 value serves as 3473 * an index into the amd_afd[] array to determine the associativity. 3474 * -1 is undefined. 0 is fully associative. 3475 */ 3476 3477 static int amd_afd[] = 3478 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3479 3480 static void 3481 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3482 { 3483 struct cpuid_regs *cp; 3484 uint_t size, assoc; 3485 int i; 3486 int *ip; 3487 3488 if (cpi->cpi_xmaxeax < 0x80000006) 3489 return; 3490 cp = &cpi->cpi_extd[6]; 3491 3492 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3493 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3494 uint_t cachesz = size * 1024; 3495 assoc = amd_afd[i]; 3496 3497 ASSERT(assoc != -1); 3498 3499 if ((ip = l2i->l2i_csz) != NULL) 3500 *ip = cachesz; 3501 if ((ip = l2i->l2i_lsz) != NULL) 3502 *ip = BITX(cp->cp_ecx, 7, 0); 3503 if ((ip = l2i->l2i_assoc) != NULL) 3504 *ip = assoc; 3505 l2i->l2i_ret = cachesz; 3506 } 3507 } 3508 3509 int 3510 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3511 { 3512 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3513 struct l2info __l2info, *l2i = &__l2info; 3514 3515 l2i->l2i_csz = csz; 3516 l2i->l2i_lsz = lsz; 3517 l2i->l2i_assoc = assoc; 3518 l2i->l2i_ret = -1; 3519 3520 switch (x86_which_cacheinfo(cpi)) { 3521 case X86_VENDOR_Intel: 3522 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3523 break; 3524 case X86_VENDOR_Cyrix: 3525 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3526 break; 3527 case X86_VENDOR_AMD: 3528 amd_l2cacheinfo(cpi, l2i); 3529 break; 3530 default: 3531 break; 3532 } 3533 return (l2i->l2i_ret); 3534 } 3535 3536 #if !defined(__xpv) 3537 3538 uint32_t * 3539 cpuid_mwait_alloc(cpu_t *cpu) 3540 { 3541 uint32_t *ret; 3542 size_t mwait_size; 3543 3544 ASSERT(cpuid_checkpass(cpu, 2)); 3545 3546 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3547 if (mwait_size == 0) 3548 return (NULL); 3549 3550 /* 3551 * kmem_alloc() returns cache line size aligned data for mwait_size 3552 * allocations. mwait_size is currently cache line sized. Neither 3553 * of these implementation details are guarantied to be true in the 3554 * future. 3555 * 3556 * First try allocating mwait_size as kmem_alloc() currently returns 3557 * correctly aligned memory. If kmem_alloc() does not return 3558 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3559 * 3560 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3561 * decide to free this memory. 3562 */ 3563 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3564 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3565 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3566 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3567 *ret = MWAIT_RUNNING; 3568 return (ret); 3569 } else { 3570 kmem_free(ret, mwait_size); 3571 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3572 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3573 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3574 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3575 *ret = MWAIT_RUNNING; 3576 return (ret); 3577 } 3578 } 3579 3580 void 3581 cpuid_mwait_free(cpu_t *cpu) 3582 { 3583 ASSERT(cpuid_checkpass(cpu, 2)); 3584 3585 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3586 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3587 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3588 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3589 } 3590 3591 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3592 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3593 } 3594 3595 #endif /* !__xpv */ 3596