1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2009, Intel Corporation. 26 * All rights reserved. 27 */ 28 /* 29 * Portions Copyright 2009 Advanced Micro Devices, Inc. 30 */ 31 32 /* 33 * Various routines to handle identification 34 * and classification of x86 processors. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/archsystm.h> 39 #include <sys/x86_archext.h> 40 #include <sys/kmem.h> 41 #include <sys/systm.h> 42 #include <sys/cmn_err.h> 43 #include <sys/sunddi.h> 44 #include <sys/sunndi.h> 45 #include <sys/cpuvar.h> 46 #include <sys/processor.h> 47 #include <sys/sysmacros.h> 48 #include <sys/pg.h> 49 #include <sys/fp.h> 50 #include <sys/controlregs.h> 51 #include <sys/auxv_386.h> 52 #include <sys/bitmap.h> 53 #include <sys/memnode.h> 54 #include <sys/pci_cfgspace.h> 55 56 #ifdef __xpv 57 #include <sys/hypervisor.h> 58 #else 59 #include <sys/ontrap.h> 60 #endif 61 62 /* 63 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 64 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 65 * them accordingly. For most modern processors, feature detection occurs here 66 * in pass 1. 67 * 68 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 69 * for the boot CPU and does the basic analysis that the early kernel needs. 70 * x86_feature is set based on the return value of cpuid_pass1() of the boot 71 * CPU. 72 * 73 * Pass 1 includes: 74 * 75 * o Determining vendor/model/family/stepping and setting x86_type and 76 * x86_vendor accordingly. 77 * o Processing the feature flags returned by the cpuid instruction while 78 * applying any workarounds or tricks for the specific processor. 79 * o Mapping the feature flags into Solaris feature bits (X86_*). 80 * o Processing extended feature flags if supported by the processor, 81 * again while applying specific processor knowledge. 82 * o Determining the CMT characteristics of the system. 83 * 84 * Pass 1 is done on non-boot CPUs during their initialization and the results 85 * are used only as a meager attempt at ensuring that all processors within the 86 * system support the same features. 87 * 88 * Pass 2 of cpuid feature analysis happens just at the beginning 89 * of startup(). It just copies in and corrects the remainder 90 * of the cpuid data we depend on: standard cpuid functions that we didn't 91 * need for pass1 feature analysis, and extended cpuid functions beyond the 92 * simple feature processing done in pass1. 93 * 94 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 95 * particular kernel memory allocation has been made available. It creates a 96 * readable brand string based on the data collected in the first two passes. 97 * 98 * Pass 4 of cpuid analysis is invoked after post_startup() when all 99 * the support infrastructure for various hardware features has been 100 * initialized. It determines which processor features will be reported 101 * to userland via the aux vector. 102 * 103 * All passes are executed on all CPUs, but only the boot CPU determines what 104 * features the kernel will use. 105 * 106 * Much of the worst junk in this file is for the support of processors 107 * that didn't really implement the cpuid instruction properly. 108 * 109 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 110 * the pass numbers. Accordingly, changes to the pass code may require changes 111 * to the accessor code. 112 */ 113 114 uint_t x86_feature = 0; 115 uint_t x86_vendor = X86_VENDOR_IntelClone; 116 uint_t x86_type = X86_TYPE_OTHER; 117 uint_t x86_clflush_size = 0; 118 119 uint_t pentiumpro_bug4046376; 120 uint_t pentiumpro_bug4064495; 121 122 uint_t enable486; 123 /* 124 * This is set to platform type Solaris is running on. 125 */ 126 static int platform_type = -1; 127 128 #if !defined(__xpv) 129 /* 130 * Variable to patch if hypervisor platform detection needs to be 131 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 132 */ 133 int enable_platform_detection = 1; 134 #endif 135 136 /* 137 * monitor/mwait info. 138 * 139 * size_actual and buf_actual are the real address and size allocated to get 140 * proper mwait_buf alignement. buf_actual and size_actual should be passed 141 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 142 * processor cache-line alignment, but this is not guarantied in the furture. 143 */ 144 struct mwait_info { 145 size_t mon_min; /* min size to avoid missed wakeups */ 146 size_t mon_max; /* size to avoid false wakeups */ 147 size_t size_actual; /* size actually allocated */ 148 void *buf_actual; /* memory actually allocated */ 149 uint32_t support; /* processor support of monitor/mwait */ 150 }; 151 152 /* 153 * These constants determine how many of the elements of the 154 * cpuid we cache in the cpuid_info data structure; the 155 * remaining elements are accessible via the cpuid instruction. 156 */ 157 158 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 159 #define NMAX_CPI_EXTD 0x1c /* eax = 0x80000000 .. 0x8000001b */ 160 161 /* 162 * Some terminology needs to be explained: 163 * - Socket: Something that can be plugged into a motherboard. 164 * - Package: Same as socket 165 * - Chip: Same as socket. Note that AMD's documentation uses term "chip" 166 * differently: there, chip is the same as processor node (below) 167 * - Processor node: Some AMD processors have more than one 168 * "subprocessor" embedded in a package. These subprocessors (nodes) 169 * are fully-functional processors themselves with cores, caches, 170 * memory controllers, PCI configuration spaces. They are connected 171 * inside the package with Hypertransport links. On single-node 172 * processors, processor node is equivalent to chip/socket/package. 173 */ 174 175 struct cpuid_info { 176 uint_t cpi_pass; /* last pass completed */ 177 /* 178 * standard function information 179 */ 180 uint_t cpi_maxeax; /* fn 0: %eax */ 181 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 182 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 183 184 uint_t cpi_family; /* fn 1: extended family */ 185 uint_t cpi_model; /* fn 1: extended model */ 186 uint_t cpi_step; /* fn 1: stepping */ 187 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */ 188 /* AMD: package/socket # */ 189 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 190 int cpi_clogid; /* fn 1: %ebx: thread # */ 191 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 192 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 193 uint_t cpi_ncache; /* fn 2: number of elements */ 194 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 195 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 196 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 197 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 198 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 199 /* 200 * extended function information 201 */ 202 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 203 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 204 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 205 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 206 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */ 207 208 id_t cpi_coreid; /* same coreid => strands share core */ 209 int cpi_pkgcoreid; /* core number within single package */ 210 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 211 /* Intel: fn 4: %eax[31-26] */ 212 /* 213 * supported feature information 214 */ 215 uint32_t cpi_support[5]; 216 #define STD_EDX_FEATURES 0 217 #define AMD_EDX_FEATURES 1 218 #define TM_EDX_FEATURES 2 219 #define STD_ECX_FEATURES 3 220 #define AMD_ECX_FEATURES 4 221 /* 222 * Synthesized information, where known. 223 */ 224 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 225 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 226 uint32_t cpi_socket; /* Chip package/socket type */ 227 228 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 229 uint32_t cpi_apicid; 230 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ 231 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ 232 /* Intel: 1 */ 233 }; 234 235 236 static struct cpuid_info cpuid_info0; 237 238 /* 239 * These bit fields are defined by the Intel Application Note AP-485 240 * "Intel Processor Identification and the CPUID Instruction" 241 */ 242 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 243 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 244 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 245 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 246 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 247 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 248 249 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 250 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 251 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 252 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 253 254 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 255 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 256 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 257 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 258 259 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 260 #define CPI_XMAXEAX_MAX 0x80000100 261 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 262 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 263 264 /* 265 * Function 4 (Deterministic Cache Parameters) macros 266 * Defined by Intel Application Note AP-485 267 */ 268 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 269 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 270 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 271 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 272 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 273 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 274 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 275 276 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 277 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 278 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 279 280 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 281 282 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 283 284 285 /* 286 * A couple of shorthand macros to identify "later" P6-family chips 287 * like the Pentium M and Core. First, the "older" P6-based stuff 288 * (loosely defined as "pre-Pentium-4"): 289 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 290 */ 291 292 #define IS_LEGACY_P6(cpi) ( \ 293 cpi->cpi_family == 6 && \ 294 (cpi->cpi_model == 1 || \ 295 cpi->cpi_model == 3 || \ 296 cpi->cpi_model == 5 || \ 297 cpi->cpi_model == 6 || \ 298 cpi->cpi_model == 7 || \ 299 cpi->cpi_model == 8 || \ 300 cpi->cpi_model == 0xA || \ 301 cpi->cpi_model == 0xB) \ 302 ) 303 304 /* A "new F6" is everything with family 6 that's not the above */ 305 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 306 307 /* Extended family/model support */ 308 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 309 cpi->cpi_family >= 0xf) 310 311 /* 312 * Info for monitor/mwait idle loop. 313 * 314 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 315 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 316 * 2006. 317 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 318 * Documentation Updates" #33633, Rev 2.05, December 2006. 319 */ 320 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 321 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 322 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 323 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 324 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 325 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 326 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 327 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 328 /* 329 * Number of sub-cstates for a given c-state. 330 */ 331 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 332 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 333 334 /* 335 * Functions we consune from cpuid_subr.c; don't publish these in a header 336 * file to try and keep people using the expected cpuid_* interfaces. 337 */ 338 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 339 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 340 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 341 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 342 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 343 344 /* 345 * Apply up various platform-dependent restrictions where the 346 * underlying platform restrictions mean the CPU can be marked 347 * as less capable than its cpuid instruction would imply. 348 */ 349 #if defined(__xpv) 350 static void 351 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 352 { 353 switch (eax) { 354 case 1: { 355 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 356 0 : CPUID_INTC_EDX_MCA; 357 cp->cp_edx &= 358 ~(mcamask | 359 CPUID_INTC_EDX_PSE | 360 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 361 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 362 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 363 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 364 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 365 break; 366 } 367 368 case 0x80000001: 369 cp->cp_edx &= 370 ~(CPUID_AMD_EDX_PSE | 371 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 372 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 373 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 374 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 375 CPUID_AMD_EDX_TSCP); 376 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 377 break; 378 default: 379 break; 380 } 381 382 switch (vendor) { 383 case X86_VENDOR_Intel: 384 switch (eax) { 385 case 4: 386 /* 387 * Zero out the (ncores-per-chip - 1) field 388 */ 389 cp->cp_eax &= 0x03fffffff; 390 break; 391 default: 392 break; 393 } 394 break; 395 case X86_VENDOR_AMD: 396 switch (eax) { 397 398 case 0x80000001: 399 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 400 break; 401 402 case 0x80000008: 403 /* 404 * Zero out the (ncores-per-chip - 1) field 405 */ 406 cp->cp_ecx &= 0xffffff00; 407 break; 408 default: 409 break; 410 } 411 break; 412 default: 413 break; 414 } 415 } 416 #else 417 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 418 #endif 419 420 /* 421 * Some undocumented ways of patching the results of the cpuid 422 * instruction to permit running Solaris 10 on future cpus that 423 * we don't currently support. Could be set to non-zero values 424 * via settings in eeprom. 425 */ 426 427 uint32_t cpuid_feature_ecx_include; 428 uint32_t cpuid_feature_ecx_exclude; 429 uint32_t cpuid_feature_edx_include; 430 uint32_t cpuid_feature_edx_exclude; 431 432 /* 433 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs. 434 */ 435 void 436 cpuid_alloc_space(cpu_t *cpu) 437 { 438 /* 439 * By convention, cpu0 is the boot cpu, which is set up 440 * before memory allocation is available. All other cpus get 441 * their cpuid_info struct allocated here. 442 */ 443 ASSERT(cpu->cpu_id != 0); 444 ASSERT(cpu->cpu_m.mcpu_cpi == NULL); 445 cpu->cpu_m.mcpu_cpi = 446 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 447 } 448 449 void 450 cpuid_free_space(cpu_t *cpu) 451 { 452 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 453 int i; 454 455 ASSERT(cpi != NULL); 456 ASSERT(cpi != &cpuid_info0); 457 458 /* 459 * Free up any function 4 related dynamic storage 460 */ 461 for (i = 1; i < cpi->cpi_std_4_size; i++) 462 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 463 if (cpi->cpi_std_4_size > 0) 464 kmem_free(cpi->cpi_std_4, 465 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 466 467 kmem_free(cpi, sizeof (*cpi)); 468 cpu->cpu_m.mcpu_cpi = NULL; 469 } 470 471 #if !defined(__xpv) 472 473 static void 474 determine_platform() 475 { 476 struct cpuid_regs cp; 477 char *xen_str; 478 uint32_t xen_signature[4], base; 479 480 platform_type = HW_NATIVE; 481 482 if (!enable_platform_detection) 483 return; 484 485 /* 486 * In a fully virtualized domain, Xen's pseudo-cpuid function 487 * returns a string representing the Xen signature in %ebx, %ecx, 488 * and %edx. %eax contains the maximum supported cpuid function. 489 * We need at least a (base + 2) leaf value to do what we want 490 * to do. Try different base values, since the hypervisor might 491 * use a different one depending on whether hyper-v emulation 492 * is switched on by default or not. 493 */ 494 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 495 cp.cp_eax = base; 496 (void) __cpuid_insn(&cp); 497 xen_signature[0] = cp.cp_ebx; 498 xen_signature[1] = cp.cp_ecx; 499 xen_signature[2] = cp.cp_edx; 500 xen_signature[3] = 0; 501 xen_str = (char *)xen_signature; 502 if (strcmp("XenVMMXenVMM", xen_str) == 0 && 503 cp.cp_eax >= (base + 2)) { 504 platform_type = HW_XEN_HVM; 505 return; 506 } 507 } 508 509 if (vmware_platform()) /* running under vmware hypervisor? */ 510 platform_type = HW_VMWARE; 511 } 512 513 int 514 get_hwenv(void) 515 { 516 if (platform_type == -1) 517 determine_platform(); 518 519 return (platform_type); 520 } 521 522 int 523 is_controldom(void) 524 { 525 return (0); 526 } 527 528 #else 529 530 int 531 get_hwenv(void) 532 { 533 return (HW_XEN_PV); 534 } 535 536 int 537 is_controldom(void) 538 { 539 return (DOMAIN_IS_INITDOMAIN(xen_info)); 540 } 541 542 #endif /* __xpv */ 543 544 static void 545 cpuid_intel_getids(cpu_t *cpu, uint_t feature) 546 { 547 uint_t i; 548 uint_t chipid_shift = 0; 549 uint_t coreid_shift = 0; 550 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 551 552 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 553 chipid_shift++; 554 555 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift; 556 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1); 557 558 if (feature & X86_CMP) { 559 /* 560 * Multi-core (and possibly multi-threaded) 561 * processors. 562 */ 563 uint_t ncpu_per_core; 564 if (cpi->cpi_ncore_per_chip == 1) 565 ncpu_per_core = cpi->cpi_ncpu_per_chip; 566 else if (cpi->cpi_ncore_per_chip > 1) 567 ncpu_per_core = cpi->cpi_ncpu_per_chip / 568 cpi->cpi_ncore_per_chip; 569 /* 570 * 8bit APIC IDs on dual core Pentiums 571 * look like this: 572 * 573 * +-----------------------+------+------+ 574 * | Physical Package ID | MC | HT | 575 * +-----------------------+------+------+ 576 * <------- chipid --------> 577 * <------- coreid ---------------> 578 * <--- clogid --> 579 * <------> 580 * pkgcoreid 581 * 582 * Where the number of bits necessary to 583 * represent MC and HT fields together equals 584 * to the minimum number of bits necessary to 585 * store the value of cpi->cpi_ncpu_per_chip. 586 * Of those bits, the MC part uses the number 587 * of bits necessary to store the value of 588 * cpi->cpi_ncore_per_chip. 589 */ 590 for (i = 1; i < ncpu_per_core; i <<= 1) 591 coreid_shift++; 592 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift; 593 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 594 } else if (feature & X86_HTT) { 595 /* 596 * Single-core multi-threaded processors. 597 */ 598 cpi->cpi_coreid = cpi->cpi_chipid; 599 cpi->cpi_pkgcoreid = 0; 600 } 601 cpi->cpi_procnodeid = cpi->cpi_chipid; 602 } 603 604 static void 605 cpuid_amd_getids(cpu_t *cpu) 606 { 607 int i, first_half, coreidsz; 608 uint32_t nb_caps_reg; 609 uint_t node2_1; 610 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 611 612 /* 613 * AMD CMP chips currently have a single thread per core. 614 * 615 * Since no two cpus share a core we must assign a distinct coreid 616 * per cpu, and we do this by using the cpu_id. This scheme does not, 617 * however, guarantee that sibling cores of a chip will have sequential 618 * coreids starting at a multiple of the number of cores per chip - 619 * that is usually the case, but if the ACPI MADT table is presented 620 * in a different order then we need to perform a few more gymnastics 621 * for the pkgcoreid. 622 * 623 * All processors in the system have the same number of enabled 624 * cores. Cores within a processor are always numbered sequentially 625 * from 0 regardless of how many or which are disabled, and there 626 * is no way for operating system to discover the real core id when some 627 * are disabled. 628 */ 629 630 cpi->cpi_coreid = cpu->cpu_id; 631 632 if (cpi->cpi_xmaxeax >= 0x80000008) { 633 634 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 635 636 /* 637 * In AMD parlance chip is really a node while Solaris 638 * sees chip as equivalent to socket/package. 639 */ 640 cpi->cpi_ncore_per_chip = 641 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 642 if (coreidsz == 0) { 643 /* Use legacy method */ 644 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1) 645 coreidsz++; 646 if (coreidsz == 0) 647 coreidsz = 1; 648 } 649 } else { 650 /* Assume single-core part */ 651 cpi->cpi_ncore_per_chip = 1; 652 } 653 654 cpi->cpi_clogid = cpi->cpi_pkgcoreid = 655 cpi->cpi_apicid & ((1<<coreidsz) - 1); 656 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip; 657 658 /* Get nodeID */ 659 if (cpi->cpi_family == 0xf) { 660 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 661 cpi->cpi_chipid = cpi->cpi_procnodeid; 662 } else if (cpi->cpi_family == 0x10) { 663 /* 664 * See if we are a multi-node processor. 665 * All processors in the system have the same number of nodes 666 */ 667 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8); 668 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) { 669 /* Single-node */ 670 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, 671 coreidsz); 672 cpi->cpi_chipid = cpi->cpi_procnodeid; 673 } else { 674 675 /* 676 * Multi-node revision D (2 nodes per package 677 * are supported) 678 */ 679 cpi->cpi_procnodes_per_pkg = 2; 680 681 first_half = (cpi->cpi_pkgcoreid <= 682 (cpi->cpi_ncore_per_chip/2 - 1)); 683 684 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { 685 /* We are BSP */ 686 cpi->cpi_procnodeid = (first_half ? 0 : 1); 687 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; 688 } else { 689 690 /* We are AP */ 691 /* NodeId[2:1] bits to use for reading F3xe8 */ 692 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1; 693 694 nb_caps_reg = 695 pci_getl_func(0, 24 + node2_1, 3, 0xe8); 696 697 /* 698 * Check IntNodeNum bit (31:30, but bit 31 is 699 * always 0 on dual-node processors) 700 */ 701 if (BITX(nb_caps_reg, 30, 30) == 0) 702 cpi->cpi_procnodeid = node2_1 + 703 !first_half; 704 else 705 cpi->cpi_procnodeid = node2_1 + 706 first_half; 707 708 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; 709 } 710 } 711 } else if (cpi->cpi_family >= 0x11) { 712 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 713 cpi->cpi_chipid = cpi->cpi_procnodeid; 714 } else { 715 cpi->cpi_procnodeid = 0; 716 cpi->cpi_chipid = cpi->cpi_procnodeid; 717 } 718 } 719 720 uint_t 721 cpuid_pass1(cpu_t *cpu) 722 { 723 uint32_t mask_ecx, mask_edx; 724 uint_t feature = X86_CPUID; 725 struct cpuid_info *cpi; 726 struct cpuid_regs *cp; 727 int xcpuid; 728 #if !defined(__xpv) 729 extern int idle_cpu_prefer_mwait; 730 #endif 731 732 733 #if !defined(__xpv) 734 determine_platform(); 735 #endif 736 /* 737 * Space statically allocated for BSP, ensure pointer is set 738 */ 739 if (cpu->cpu_id == 0 && cpu->cpu_m.mcpu_cpi == NULL) 740 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 741 cpi = cpu->cpu_m.mcpu_cpi; 742 ASSERT(cpi != NULL); 743 cp = &cpi->cpi_std[0]; 744 cp->cp_eax = 0; 745 cpi->cpi_maxeax = __cpuid_insn(cp); 746 { 747 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 748 *iptr++ = cp->cp_ebx; 749 *iptr++ = cp->cp_edx; 750 *iptr++ = cp->cp_ecx; 751 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 752 } 753 754 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 755 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 756 757 /* 758 * Limit the range in case of weird hardware 759 */ 760 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 761 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 762 if (cpi->cpi_maxeax < 1) 763 goto pass1_done; 764 765 cp = &cpi->cpi_std[1]; 766 cp->cp_eax = 1; 767 (void) __cpuid_insn(cp); 768 769 /* 770 * Extract identifying constants for easy access. 771 */ 772 cpi->cpi_model = CPI_MODEL(cpi); 773 cpi->cpi_family = CPI_FAMILY(cpi); 774 775 if (cpi->cpi_family == 0xf) 776 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 777 778 /* 779 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 780 * Intel, and presumably everyone else, uses model == 0xf, as 781 * one would expect (max value means possible overflow). Sigh. 782 */ 783 784 switch (cpi->cpi_vendor) { 785 case X86_VENDOR_Intel: 786 if (IS_EXTENDED_MODEL_INTEL(cpi)) 787 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 788 break; 789 case X86_VENDOR_AMD: 790 if (CPI_FAMILY(cpi) == 0xf) 791 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 792 break; 793 default: 794 if (cpi->cpi_model == 0xf) 795 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 796 break; 797 } 798 799 cpi->cpi_step = CPI_STEP(cpi); 800 cpi->cpi_brandid = CPI_BRANDID(cpi); 801 802 /* 803 * *default* assumptions: 804 * - believe %edx feature word 805 * - ignore %ecx feature word 806 * - 32-bit virtual and physical addressing 807 */ 808 mask_edx = 0xffffffff; 809 mask_ecx = 0; 810 811 cpi->cpi_pabits = cpi->cpi_vabits = 32; 812 813 switch (cpi->cpi_vendor) { 814 case X86_VENDOR_Intel: 815 if (cpi->cpi_family == 5) 816 x86_type = X86_TYPE_P5; 817 else if (IS_LEGACY_P6(cpi)) { 818 x86_type = X86_TYPE_P6; 819 pentiumpro_bug4046376 = 1; 820 pentiumpro_bug4064495 = 1; 821 /* 822 * Clear the SEP bit when it was set erroneously 823 */ 824 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 825 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 826 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 827 x86_type = X86_TYPE_P4; 828 /* 829 * We don't currently depend on any of the %ecx 830 * features until Prescott, so we'll only check 831 * this from P4 onwards. We might want to revisit 832 * that idea later. 833 */ 834 mask_ecx = 0xffffffff; 835 } else if (cpi->cpi_family > 0xf) 836 mask_ecx = 0xffffffff; 837 /* 838 * We don't support MONITOR/MWAIT if leaf 5 is not available 839 * to obtain the monitor linesize. 840 */ 841 if (cpi->cpi_maxeax < 5) 842 mask_ecx &= ~CPUID_INTC_ECX_MON; 843 break; 844 case X86_VENDOR_IntelClone: 845 default: 846 break; 847 case X86_VENDOR_AMD: 848 #if defined(OPTERON_ERRATUM_108) 849 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 850 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 851 cpi->cpi_model = 0xc; 852 } else 853 #endif 854 if (cpi->cpi_family == 5) { 855 /* 856 * AMD K5 and K6 857 * 858 * These CPUs have an incomplete implementation 859 * of MCA/MCE which we mask away. 860 */ 861 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 862 863 /* 864 * Model 0 uses the wrong (APIC) bit 865 * to indicate PGE. Fix it here. 866 */ 867 if (cpi->cpi_model == 0) { 868 if (cp->cp_edx & 0x200) { 869 cp->cp_edx &= ~0x200; 870 cp->cp_edx |= CPUID_INTC_EDX_PGE; 871 } 872 } 873 874 /* 875 * Early models had problems w/ MMX; disable. 876 */ 877 if (cpi->cpi_model < 6) 878 mask_edx &= ~CPUID_INTC_EDX_MMX; 879 } 880 881 /* 882 * For newer families, SSE3 and CX16, at least, are valid; 883 * enable all 884 */ 885 if (cpi->cpi_family >= 0xf) 886 mask_ecx = 0xffffffff; 887 /* 888 * We don't support MONITOR/MWAIT if leaf 5 is not available 889 * to obtain the monitor linesize. 890 */ 891 if (cpi->cpi_maxeax < 5) 892 mask_ecx &= ~CPUID_INTC_ECX_MON; 893 894 #if !defined(__xpv) 895 /* 896 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 897 * processors. AMD does not intend MWAIT to be used in the cpu 898 * idle loop on current and future processors. 10h and future 899 * AMD processors use more power in MWAIT than HLT. 900 * Pre-family-10h Opterons do not have the MWAIT instruction. 901 */ 902 idle_cpu_prefer_mwait = 0; 903 #endif 904 905 break; 906 case X86_VENDOR_TM: 907 /* 908 * workaround the NT workaround in CMS 4.1 909 */ 910 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 911 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 912 cp->cp_edx |= CPUID_INTC_EDX_CX8; 913 break; 914 case X86_VENDOR_Centaur: 915 /* 916 * workaround the NT workarounds again 917 */ 918 if (cpi->cpi_family == 6) 919 cp->cp_edx |= CPUID_INTC_EDX_CX8; 920 break; 921 case X86_VENDOR_Cyrix: 922 /* 923 * We rely heavily on the probing in locore 924 * to actually figure out what parts, if any, 925 * of the Cyrix cpuid instruction to believe. 926 */ 927 switch (x86_type) { 928 case X86_TYPE_CYRIX_486: 929 mask_edx = 0; 930 break; 931 case X86_TYPE_CYRIX_6x86: 932 mask_edx = 0; 933 break; 934 case X86_TYPE_CYRIX_6x86L: 935 mask_edx = 936 CPUID_INTC_EDX_DE | 937 CPUID_INTC_EDX_CX8; 938 break; 939 case X86_TYPE_CYRIX_6x86MX: 940 mask_edx = 941 CPUID_INTC_EDX_DE | 942 CPUID_INTC_EDX_MSR | 943 CPUID_INTC_EDX_CX8 | 944 CPUID_INTC_EDX_PGE | 945 CPUID_INTC_EDX_CMOV | 946 CPUID_INTC_EDX_MMX; 947 break; 948 case X86_TYPE_CYRIX_GXm: 949 mask_edx = 950 CPUID_INTC_EDX_MSR | 951 CPUID_INTC_EDX_CX8 | 952 CPUID_INTC_EDX_CMOV | 953 CPUID_INTC_EDX_MMX; 954 break; 955 case X86_TYPE_CYRIX_MediaGX: 956 break; 957 case X86_TYPE_CYRIX_MII: 958 case X86_TYPE_VIA_CYRIX_III: 959 mask_edx = 960 CPUID_INTC_EDX_DE | 961 CPUID_INTC_EDX_TSC | 962 CPUID_INTC_EDX_MSR | 963 CPUID_INTC_EDX_CX8 | 964 CPUID_INTC_EDX_PGE | 965 CPUID_INTC_EDX_CMOV | 966 CPUID_INTC_EDX_MMX; 967 break; 968 default: 969 break; 970 } 971 break; 972 } 973 974 #if defined(__xpv) 975 /* 976 * Do not support MONITOR/MWAIT under a hypervisor 977 */ 978 mask_ecx &= ~CPUID_INTC_ECX_MON; 979 #endif /* __xpv */ 980 981 /* 982 * Now we've figured out the masks that determine 983 * which bits we choose to believe, apply the masks 984 * to the feature words, then map the kernel's view 985 * of these feature words into its feature word. 986 */ 987 cp->cp_edx &= mask_edx; 988 cp->cp_ecx &= mask_ecx; 989 990 /* 991 * apply any platform restrictions (we don't call this 992 * immediately after __cpuid_insn here, because we need the 993 * workarounds applied above first) 994 */ 995 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 996 997 /* 998 * fold in overrides from the "eeprom" mechanism 999 */ 1000 cp->cp_edx |= cpuid_feature_edx_include; 1001 cp->cp_edx &= ~cpuid_feature_edx_exclude; 1002 1003 cp->cp_ecx |= cpuid_feature_ecx_include; 1004 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 1005 1006 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 1007 feature |= X86_LARGEPAGE; 1008 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 1009 feature |= X86_TSC; 1010 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 1011 feature |= X86_MSR; 1012 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 1013 feature |= X86_MTRR; 1014 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 1015 feature |= X86_PGE; 1016 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 1017 feature |= X86_CMOV; 1018 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 1019 feature |= X86_MMX; 1020 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 1021 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 1022 feature |= X86_MCA; 1023 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 1024 feature |= X86_PAE; 1025 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 1026 feature |= X86_CX8; 1027 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 1028 feature |= X86_CX16; 1029 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 1030 feature |= X86_PAT; 1031 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 1032 feature |= X86_SEP; 1033 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 1034 /* 1035 * In our implementation, fxsave/fxrstor 1036 * are prerequisites before we'll even 1037 * try and do SSE things. 1038 */ 1039 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 1040 feature |= X86_SSE; 1041 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 1042 feature |= X86_SSE2; 1043 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 1044 feature |= X86_SSE3; 1045 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1046 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 1047 feature |= X86_SSSE3; 1048 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 1049 feature |= X86_SSE4_1; 1050 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 1051 feature |= X86_SSE4_2; 1052 if (cp->cp_ecx & CPUID_INTC_ECX_AES) 1053 feature |= X86_AES; 1054 } 1055 } 1056 if (cp->cp_edx & CPUID_INTC_EDX_DE) 1057 feature |= X86_DE; 1058 #if !defined(__xpv) 1059 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 1060 1061 /* 1062 * We require the CLFLUSH instruction for erratum workaround 1063 * to use MONITOR/MWAIT. 1064 */ 1065 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1066 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 1067 feature |= X86_MWAIT; 1068 } else { 1069 extern int idle_cpu_assert_cflush_monitor; 1070 1071 /* 1072 * All processors we are aware of which have 1073 * MONITOR/MWAIT also have CLFLUSH. 1074 */ 1075 if (idle_cpu_assert_cflush_monitor) { 1076 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 1077 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 1078 } 1079 } 1080 } 1081 #endif /* __xpv */ 1082 1083 /* 1084 * Only need it first time, rest of the cpus would follow suite. 1085 * we only capture this for the bootcpu. 1086 */ 1087 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1088 feature |= X86_CLFSH; 1089 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 1090 } 1091 1092 if (feature & X86_PAE) 1093 cpi->cpi_pabits = 36; 1094 1095 /* 1096 * Hyperthreading configuration is slightly tricky on Intel 1097 * and pure clones, and even trickier on AMD. 1098 * 1099 * (AMD chose to set the HTT bit on their CMP processors, 1100 * even though they're not actually hyperthreaded. Thus it 1101 * takes a bit more work to figure out what's really going 1102 * on ... see the handling of the CMP_LGCY bit below) 1103 */ 1104 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 1105 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 1106 if (cpi->cpi_ncpu_per_chip > 1) 1107 feature |= X86_HTT; 1108 } else { 1109 cpi->cpi_ncpu_per_chip = 1; 1110 } 1111 1112 /* 1113 * Work on the "extended" feature information, doing 1114 * some basic initialization for cpuid_pass2() 1115 */ 1116 xcpuid = 0; 1117 switch (cpi->cpi_vendor) { 1118 case X86_VENDOR_Intel: 1119 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 1120 xcpuid++; 1121 break; 1122 case X86_VENDOR_AMD: 1123 if (cpi->cpi_family > 5 || 1124 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 1125 xcpuid++; 1126 break; 1127 case X86_VENDOR_Cyrix: 1128 /* 1129 * Only these Cyrix CPUs are -known- to support 1130 * extended cpuid operations. 1131 */ 1132 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1133 x86_type == X86_TYPE_CYRIX_GXm) 1134 xcpuid++; 1135 break; 1136 case X86_VENDOR_Centaur: 1137 case X86_VENDOR_TM: 1138 default: 1139 xcpuid++; 1140 break; 1141 } 1142 1143 if (xcpuid) { 1144 cp = &cpi->cpi_extd[0]; 1145 cp->cp_eax = 0x80000000; 1146 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1147 } 1148 1149 if (cpi->cpi_xmaxeax & 0x80000000) { 1150 1151 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1152 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1153 1154 switch (cpi->cpi_vendor) { 1155 case X86_VENDOR_Intel: 1156 case X86_VENDOR_AMD: 1157 if (cpi->cpi_xmaxeax < 0x80000001) 1158 break; 1159 cp = &cpi->cpi_extd[1]; 1160 cp->cp_eax = 0x80000001; 1161 (void) __cpuid_insn(cp); 1162 1163 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1164 cpi->cpi_family == 5 && 1165 cpi->cpi_model == 6 && 1166 cpi->cpi_step == 6) { 1167 /* 1168 * K6 model 6 uses bit 10 to indicate SYSC 1169 * Later models use bit 11. Fix it here. 1170 */ 1171 if (cp->cp_edx & 0x400) { 1172 cp->cp_edx &= ~0x400; 1173 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1174 } 1175 } 1176 1177 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1178 1179 /* 1180 * Compute the additions to the kernel's feature word. 1181 */ 1182 if (cp->cp_edx & CPUID_AMD_EDX_NX) 1183 feature |= X86_NX; 1184 1185 /* 1186 * Regardless whether or not we boot 64-bit, 1187 * we should have a way to identify whether 1188 * the CPU is capable of running 64-bit. 1189 */ 1190 if (cp->cp_edx & CPUID_AMD_EDX_LM) 1191 feature |= X86_64; 1192 1193 #if defined(__amd64) 1194 /* 1 GB large page - enable only for 64 bit kernel */ 1195 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 1196 feature |= X86_1GPG; 1197 #endif 1198 1199 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1200 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1201 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 1202 feature |= X86_SSE4A; 1203 1204 /* 1205 * If both the HTT and CMP_LGCY bits are set, 1206 * then we're not actually HyperThreaded. Read 1207 * "AMD CPUID Specification" for more details. 1208 */ 1209 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1210 (feature & X86_HTT) && 1211 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1212 feature &= ~X86_HTT; 1213 feature |= X86_CMP; 1214 } 1215 #if defined(__amd64) 1216 /* 1217 * It's really tricky to support syscall/sysret in 1218 * the i386 kernel; we rely on sysenter/sysexit 1219 * instead. In the amd64 kernel, things are -way- 1220 * better. 1221 */ 1222 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 1223 feature |= X86_ASYSC; 1224 1225 /* 1226 * While we're thinking about system calls, note 1227 * that AMD processors don't support sysenter 1228 * in long mode at all, so don't try to program them. 1229 */ 1230 if (x86_vendor == X86_VENDOR_AMD) 1231 feature &= ~X86_SEP; 1232 #endif 1233 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1234 feature |= X86_TSCP; 1235 break; 1236 default: 1237 break; 1238 } 1239 1240 /* 1241 * Get CPUID data about processor cores and hyperthreads. 1242 */ 1243 switch (cpi->cpi_vendor) { 1244 case X86_VENDOR_Intel: 1245 if (cpi->cpi_maxeax >= 4) { 1246 cp = &cpi->cpi_std[4]; 1247 cp->cp_eax = 4; 1248 cp->cp_ecx = 0; 1249 (void) __cpuid_insn(cp); 1250 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1251 } 1252 /*FALLTHROUGH*/ 1253 case X86_VENDOR_AMD: 1254 if (cpi->cpi_xmaxeax < 0x80000008) 1255 break; 1256 cp = &cpi->cpi_extd[8]; 1257 cp->cp_eax = 0x80000008; 1258 (void) __cpuid_insn(cp); 1259 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1260 1261 /* 1262 * Virtual and physical address limits from 1263 * cpuid override previously guessed values. 1264 */ 1265 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1266 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1267 break; 1268 default: 1269 break; 1270 } 1271 1272 /* 1273 * Derive the number of cores per chip 1274 */ 1275 switch (cpi->cpi_vendor) { 1276 case X86_VENDOR_Intel: 1277 if (cpi->cpi_maxeax < 4) { 1278 cpi->cpi_ncore_per_chip = 1; 1279 break; 1280 } else { 1281 cpi->cpi_ncore_per_chip = 1282 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1283 } 1284 break; 1285 case X86_VENDOR_AMD: 1286 if (cpi->cpi_xmaxeax < 0x80000008) { 1287 cpi->cpi_ncore_per_chip = 1; 1288 break; 1289 } else { 1290 /* 1291 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1292 * 1 less than the number of physical cores on 1293 * the chip. In family 0x10 this value can 1294 * be affected by "downcoring" - it reflects 1295 * 1 less than the number of cores actually 1296 * enabled on this node. 1297 */ 1298 cpi->cpi_ncore_per_chip = 1299 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1300 } 1301 break; 1302 default: 1303 cpi->cpi_ncore_per_chip = 1; 1304 break; 1305 } 1306 1307 /* 1308 * Get CPUID data about TSC Invariance in Deep C-State. 1309 */ 1310 switch (cpi->cpi_vendor) { 1311 case X86_VENDOR_Intel: 1312 if (cpi->cpi_maxeax >= 7) { 1313 cp = &cpi->cpi_extd[7]; 1314 cp->cp_eax = 0x80000007; 1315 cp->cp_ecx = 0; 1316 (void) __cpuid_insn(cp); 1317 } 1318 break; 1319 default: 1320 break; 1321 } 1322 } else { 1323 cpi->cpi_ncore_per_chip = 1; 1324 } 1325 1326 /* 1327 * If more than one core, then this processor is CMP. 1328 */ 1329 if (cpi->cpi_ncore_per_chip > 1) 1330 feature |= X86_CMP; 1331 1332 /* 1333 * If the number of cores is the same as the number 1334 * of CPUs, then we cannot have HyperThreading. 1335 */ 1336 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1337 feature &= ~X86_HTT; 1338 1339 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1340 cpi->cpi_procnodes_per_pkg = 1; 1341 1342 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1343 /* 1344 * Single-core single-threaded processors. 1345 */ 1346 cpi->cpi_chipid = -1; 1347 cpi->cpi_clogid = 0; 1348 cpi->cpi_coreid = cpu->cpu_id; 1349 cpi->cpi_pkgcoreid = 0; 1350 if (cpi->cpi_vendor == X86_VENDOR_AMD) 1351 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); 1352 else 1353 cpi->cpi_procnodeid = cpi->cpi_chipid; 1354 } else if (cpi->cpi_ncpu_per_chip > 1) { 1355 if (cpi->cpi_vendor == X86_VENDOR_Intel) 1356 cpuid_intel_getids(cpu, feature); 1357 else if (cpi->cpi_vendor == X86_VENDOR_AMD) 1358 cpuid_amd_getids(cpu); 1359 else { 1360 /* 1361 * All other processors are currently 1362 * assumed to have single cores. 1363 */ 1364 cpi->cpi_coreid = cpi->cpi_chipid; 1365 cpi->cpi_pkgcoreid = 0; 1366 cpi->cpi_procnodeid = cpi->cpi_chipid; 1367 } 1368 } 1369 1370 /* 1371 * Synthesize chip "revision" and socket type 1372 */ 1373 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1374 cpi->cpi_model, cpi->cpi_step); 1375 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1376 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1377 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1378 cpi->cpi_model, cpi->cpi_step); 1379 1380 pass1_done: 1381 cpi->cpi_pass = 1; 1382 return (feature); 1383 } 1384 1385 /* 1386 * Make copies of the cpuid table entries we depend on, in 1387 * part for ease of parsing now, in part so that we have only 1388 * one place to correct any of it, in part for ease of 1389 * later export to userland, and in part so we can look at 1390 * this stuff in a crash dump. 1391 */ 1392 1393 /*ARGSUSED*/ 1394 void 1395 cpuid_pass2(cpu_t *cpu) 1396 { 1397 uint_t n, nmax; 1398 int i; 1399 struct cpuid_regs *cp; 1400 uint8_t *dp; 1401 uint32_t *iptr; 1402 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1403 1404 ASSERT(cpi->cpi_pass == 1); 1405 1406 if (cpi->cpi_maxeax < 1) 1407 goto pass2_done; 1408 1409 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1410 nmax = NMAX_CPI_STD; 1411 /* 1412 * (We already handled n == 0 and n == 1 in pass 1) 1413 */ 1414 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1415 cp->cp_eax = n; 1416 1417 /* 1418 * CPUID function 4 expects %ecx to be initialized 1419 * with an index which indicates which cache to return 1420 * information about. The OS is expected to call function 4 1421 * with %ecx set to 0, 1, 2, ... until it returns with 1422 * EAX[4:0] set to 0, which indicates there are no more 1423 * caches. 1424 * 1425 * Here, populate cpi_std[4] with the information returned by 1426 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1427 * when dynamic memory allocation becomes available. 1428 * 1429 * Note: we need to explicitly initialize %ecx here, since 1430 * function 4 may have been previously invoked. 1431 */ 1432 if (n == 4) 1433 cp->cp_ecx = 0; 1434 1435 (void) __cpuid_insn(cp); 1436 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1437 switch (n) { 1438 case 2: 1439 /* 1440 * "the lower 8 bits of the %eax register 1441 * contain a value that identifies the number 1442 * of times the cpuid [instruction] has to be 1443 * executed to obtain a complete image of the 1444 * processor's caching systems." 1445 * 1446 * How *do* they make this stuff up? 1447 */ 1448 cpi->cpi_ncache = sizeof (*cp) * 1449 BITX(cp->cp_eax, 7, 0); 1450 if (cpi->cpi_ncache == 0) 1451 break; 1452 cpi->cpi_ncache--; /* skip count byte */ 1453 1454 /* 1455 * Well, for now, rather than attempt to implement 1456 * this slightly dubious algorithm, we just look 1457 * at the first 15 .. 1458 */ 1459 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1460 cpi->cpi_ncache = sizeof (*cp) - 1; 1461 1462 dp = cpi->cpi_cacheinfo; 1463 if (BITX(cp->cp_eax, 31, 31) == 0) { 1464 uint8_t *p = (void *)&cp->cp_eax; 1465 for (i = 1; i < 4; i++) 1466 if (p[i] != 0) 1467 *dp++ = p[i]; 1468 } 1469 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1470 uint8_t *p = (void *)&cp->cp_ebx; 1471 for (i = 0; i < 4; i++) 1472 if (p[i] != 0) 1473 *dp++ = p[i]; 1474 } 1475 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1476 uint8_t *p = (void *)&cp->cp_ecx; 1477 for (i = 0; i < 4; i++) 1478 if (p[i] != 0) 1479 *dp++ = p[i]; 1480 } 1481 if (BITX(cp->cp_edx, 31, 31) == 0) { 1482 uint8_t *p = (void *)&cp->cp_edx; 1483 for (i = 0; i < 4; i++) 1484 if (p[i] != 0) 1485 *dp++ = p[i]; 1486 } 1487 break; 1488 1489 case 3: /* Processor serial number, if PSN supported */ 1490 break; 1491 1492 case 4: /* Deterministic cache parameters */ 1493 break; 1494 1495 case 5: /* Monitor/Mwait parameters */ 1496 { 1497 size_t mwait_size; 1498 1499 /* 1500 * check cpi_mwait.support which was set in cpuid_pass1 1501 */ 1502 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1503 break; 1504 1505 /* 1506 * Protect ourself from insane mwait line size. 1507 * Workaround for incomplete hardware emulator(s). 1508 */ 1509 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1510 if (mwait_size < sizeof (uint32_t) || 1511 !ISP2(mwait_size)) { 1512 #if DEBUG 1513 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1514 "size %ld", cpu->cpu_id, (long)mwait_size); 1515 #endif 1516 break; 1517 } 1518 1519 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1520 cpi->cpi_mwait.mon_max = mwait_size; 1521 if (MWAIT_EXTENSION(cpi)) { 1522 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1523 if (MWAIT_INT_ENABLE(cpi)) 1524 cpi->cpi_mwait.support |= 1525 MWAIT_ECX_INT_ENABLE; 1526 } 1527 break; 1528 } 1529 default: 1530 break; 1531 } 1532 } 1533 1534 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1535 struct cpuid_regs regs; 1536 1537 cp = ®s; 1538 cp->cp_eax = 0xB; 1539 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1540 1541 (void) __cpuid_insn(cp); 1542 1543 /* 1544 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1545 * indicates that the extended topology enumeration leaf is 1546 * available. 1547 */ 1548 if (cp->cp_ebx) { 1549 uint32_t x2apic_id; 1550 uint_t coreid_shift = 0; 1551 uint_t ncpu_per_core = 1; 1552 uint_t chipid_shift = 0; 1553 uint_t ncpu_per_chip = 1; 1554 uint_t i; 1555 uint_t level; 1556 1557 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1558 cp->cp_eax = 0xB; 1559 cp->cp_ecx = i; 1560 1561 (void) __cpuid_insn(cp); 1562 level = CPI_CPU_LEVEL_TYPE(cp); 1563 1564 if (level == 1) { 1565 x2apic_id = cp->cp_edx; 1566 coreid_shift = BITX(cp->cp_eax, 4, 0); 1567 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1568 } else if (level == 2) { 1569 x2apic_id = cp->cp_edx; 1570 chipid_shift = BITX(cp->cp_eax, 4, 0); 1571 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1572 } 1573 } 1574 1575 cpi->cpi_apicid = x2apic_id; 1576 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1577 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1578 ncpu_per_core; 1579 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1580 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1581 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1582 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1583 } 1584 1585 /* Make cp NULL so that we don't stumble on others */ 1586 cp = NULL; 1587 } 1588 1589 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1590 goto pass2_done; 1591 1592 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1593 nmax = NMAX_CPI_EXTD; 1594 /* 1595 * Copy the extended properties, fixing them as we go. 1596 * (We already handled n == 0 and n == 1 in pass 1) 1597 */ 1598 iptr = (void *)cpi->cpi_brandstr; 1599 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1600 cp->cp_eax = 0x80000000 + n; 1601 (void) __cpuid_insn(cp); 1602 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1603 switch (n) { 1604 case 2: 1605 case 3: 1606 case 4: 1607 /* 1608 * Extract the brand string 1609 */ 1610 *iptr++ = cp->cp_eax; 1611 *iptr++ = cp->cp_ebx; 1612 *iptr++ = cp->cp_ecx; 1613 *iptr++ = cp->cp_edx; 1614 break; 1615 case 5: 1616 switch (cpi->cpi_vendor) { 1617 case X86_VENDOR_AMD: 1618 /* 1619 * The Athlon and Duron were the first 1620 * parts to report the sizes of the 1621 * TLB for large pages. Before then, 1622 * we don't trust the data. 1623 */ 1624 if (cpi->cpi_family < 6 || 1625 (cpi->cpi_family == 6 && 1626 cpi->cpi_model < 1)) 1627 cp->cp_eax = 0; 1628 break; 1629 default: 1630 break; 1631 } 1632 break; 1633 case 6: 1634 switch (cpi->cpi_vendor) { 1635 case X86_VENDOR_AMD: 1636 /* 1637 * The Athlon and Duron were the first 1638 * AMD parts with L2 TLB's. 1639 * Before then, don't trust the data. 1640 */ 1641 if (cpi->cpi_family < 6 || 1642 cpi->cpi_family == 6 && 1643 cpi->cpi_model < 1) 1644 cp->cp_eax = cp->cp_ebx = 0; 1645 /* 1646 * AMD Duron rev A0 reports L2 1647 * cache size incorrectly as 1K 1648 * when it is really 64K 1649 */ 1650 if (cpi->cpi_family == 6 && 1651 cpi->cpi_model == 3 && 1652 cpi->cpi_step == 0) { 1653 cp->cp_ecx &= 0xffff; 1654 cp->cp_ecx |= 0x400000; 1655 } 1656 break; 1657 case X86_VENDOR_Cyrix: /* VIA C3 */ 1658 /* 1659 * VIA C3 processors are a bit messed 1660 * up w.r.t. encoding cache sizes in %ecx 1661 */ 1662 if (cpi->cpi_family != 6) 1663 break; 1664 /* 1665 * model 7 and 8 were incorrectly encoded 1666 * 1667 * xxx is model 8 really broken? 1668 */ 1669 if (cpi->cpi_model == 7 || 1670 cpi->cpi_model == 8) 1671 cp->cp_ecx = 1672 BITX(cp->cp_ecx, 31, 24) << 16 | 1673 BITX(cp->cp_ecx, 23, 16) << 12 | 1674 BITX(cp->cp_ecx, 15, 8) << 8 | 1675 BITX(cp->cp_ecx, 7, 0); 1676 /* 1677 * model 9 stepping 1 has wrong associativity 1678 */ 1679 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1680 cp->cp_ecx |= 8 << 12; 1681 break; 1682 case X86_VENDOR_Intel: 1683 /* 1684 * Extended L2 Cache features function. 1685 * First appeared on Prescott. 1686 */ 1687 default: 1688 break; 1689 } 1690 break; 1691 default: 1692 break; 1693 } 1694 } 1695 1696 pass2_done: 1697 cpi->cpi_pass = 2; 1698 } 1699 1700 static const char * 1701 intel_cpubrand(const struct cpuid_info *cpi) 1702 { 1703 int i; 1704 1705 if ((x86_feature & X86_CPUID) == 0 || 1706 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1707 return ("i486"); 1708 1709 switch (cpi->cpi_family) { 1710 case 5: 1711 return ("Intel Pentium(r)"); 1712 case 6: 1713 switch (cpi->cpi_model) { 1714 uint_t celeron, xeon; 1715 const struct cpuid_regs *cp; 1716 case 0: 1717 case 1: 1718 case 2: 1719 return ("Intel Pentium(r) Pro"); 1720 case 3: 1721 case 4: 1722 return ("Intel Pentium(r) II"); 1723 case 6: 1724 return ("Intel Celeron(r)"); 1725 case 5: 1726 case 7: 1727 celeron = xeon = 0; 1728 cp = &cpi->cpi_std[2]; /* cache info */ 1729 1730 for (i = 1; i < 4; i++) { 1731 uint_t tmp; 1732 1733 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1734 if (tmp == 0x40) 1735 celeron++; 1736 if (tmp >= 0x44 && tmp <= 0x45) 1737 xeon++; 1738 } 1739 1740 for (i = 0; i < 2; i++) { 1741 uint_t tmp; 1742 1743 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1744 if (tmp == 0x40) 1745 celeron++; 1746 else if (tmp >= 0x44 && tmp <= 0x45) 1747 xeon++; 1748 } 1749 1750 for (i = 0; i < 4; i++) { 1751 uint_t tmp; 1752 1753 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1754 if (tmp == 0x40) 1755 celeron++; 1756 else if (tmp >= 0x44 && tmp <= 0x45) 1757 xeon++; 1758 } 1759 1760 for (i = 0; i < 4; i++) { 1761 uint_t tmp; 1762 1763 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1764 if (tmp == 0x40) 1765 celeron++; 1766 else if (tmp >= 0x44 && tmp <= 0x45) 1767 xeon++; 1768 } 1769 1770 if (celeron) 1771 return ("Intel Celeron(r)"); 1772 if (xeon) 1773 return (cpi->cpi_model == 5 ? 1774 "Intel Pentium(r) II Xeon(tm)" : 1775 "Intel Pentium(r) III Xeon(tm)"); 1776 return (cpi->cpi_model == 5 ? 1777 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1778 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1779 default: 1780 break; 1781 } 1782 default: 1783 break; 1784 } 1785 1786 /* BrandID is present if the field is nonzero */ 1787 if (cpi->cpi_brandid != 0) { 1788 static const struct { 1789 uint_t bt_bid; 1790 const char *bt_str; 1791 } brand_tbl[] = { 1792 { 0x1, "Intel(r) Celeron(r)" }, 1793 { 0x2, "Intel(r) Pentium(r) III" }, 1794 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1795 { 0x4, "Intel(r) Pentium(r) III" }, 1796 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1797 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1798 { 0x8, "Intel(r) Pentium(r) 4" }, 1799 { 0x9, "Intel(r) Pentium(r) 4" }, 1800 { 0xa, "Intel(r) Celeron(r)" }, 1801 { 0xb, "Intel(r) Xeon(tm)" }, 1802 { 0xc, "Intel(r) Xeon(tm) MP" }, 1803 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1804 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1805 { 0x11, "Mobile Genuine Intel(r)" }, 1806 { 0x12, "Intel(r) Celeron(r) M" }, 1807 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1808 { 0x14, "Intel(r) Celeron(r)" }, 1809 { 0x15, "Mobile Genuine Intel(r)" }, 1810 { 0x16, "Intel(r) Pentium(r) M" }, 1811 { 0x17, "Mobile Intel(r) Celeron(r)" } 1812 }; 1813 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1814 uint_t sgn; 1815 1816 sgn = (cpi->cpi_family << 8) | 1817 (cpi->cpi_model << 4) | cpi->cpi_step; 1818 1819 for (i = 0; i < btblmax; i++) 1820 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1821 break; 1822 if (i < btblmax) { 1823 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1824 return ("Intel(r) Celeron(r)"); 1825 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1826 return ("Intel(r) Xeon(tm) MP"); 1827 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1828 return ("Intel(r) Xeon(tm)"); 1829 return (brand_tbl[i].bt_str); 1830 } 1831 } 1832 1833 return (NULL); 1834 } 1835 1836 static const char * 1837 amd_cpubrand(const struct cpuid_info *cpi) 1838 { 1839 if ((x86_feature & X86_CPUID) == 0 || 1840 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1841 return ("i486 compatible"); 1842 1843 switch (cpi->cpi_family) { 1844 case 5: 1845 switch (cpi->cpi_model) { 1846 case 0: 1847 case 1: 1848 case 2: 1849 case 3: 1850 case 4: 1851 case 5: 1852 return ("AMD-K5(r)"); 1853 case 6: 1854 case 7: 1855 return ("AMD-K6(r)"); 1856 case 8: 1857 return ("AMD-K6(r)-2"); 1858 case 9: 1859 return ("AMD-K6(r)-III"); 1860 default: 1861 return ("AMD (family 5)"); 1862 } 1863 case 6: 1864 switch (cpi->cpi_model) { 1865 case 1: 1866 return ("AMD-K7(tm)"); 1867 case 0: 1868 case 2: 1869 case 4: 1870 return ("AMD Athlon(tm)"); 1871 case 3: 1872 case 7: 1873 return ("AMD Duron(tm)"); 1874 case 6: 1875 case 8: 1876 case 10: 1877 /* 1878 * Use the L2 cache size to distinguish 1879 */ 1880 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1881 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1882 default: 1883 return ("AMD (family 6)"); 1884 } 1885 default: 1886 break; 1887 } 1888 1889 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1890 cpi->cpi_brandid != 0) { 1891 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1892 case 3: 1893 return ("AMD Opteron(tm) UP 1xx"); 1894 case 4: 1895 return ("AMD Opteron(tm) DP 2xx"); 1896 case 5: 1897 return ("AMD Opteron(tm) MP 8xx"); 1898 default: 1899 return ("AMD Opteron(tm)"); 1900 } 1901 } 1902 1903 return (NULL); 1904 } 1905 1906 static const char * 1907 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1908 { 1909 if ((x86_feature & X86_CPUID) == 0 || 1910 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1911 type == X86_TYPE_CYRIX_486) 1912 return ("i486 compatible"); 1913 1914 switch (type) { 1915 case X86_TYPE_CYRIX_6x86: 1916 return ("Cyrix 6x86"); 1917 case X86_TYPE_CYRIX_6x86L: 1918 return ("Cyrix 6x86L"); 1919 case X86_TYPE_CYRIX_6x86MX: 1920 return ("Cyrix 6x86MX"); 1921 case X86_TYPE_CYRIX_GXm: 1922 return ("Cyrix GXm"); 1923 case X86_TYPE_CYRIX_MediaGX: 1924 return ("Cyrix MediaGX"); 1925 case X86_TYPE_CYRIX_MII: 1926 return ("Cyrix M2"); 1927 case X86_TYPE_VIA_CYRIX_III: 1928 return ("VIA Cyrix M3"); 1929 default: 1930 /* 1931 * Have another wild guess .. 1932 */ 1933 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1934 return ("Cyrix 5x86"); 1935 else if (cpi->cpi_family == 5) { 1936 switch (cpi->cpi_model) { 1937 case 2: 1938 return ("Cyrix 6x86"); /* Cyrix M1 */ 1939 case 4: 1940 return ("Cyrix MediaGX"); 1941 default: 1942 break; 1943 } 1944 } else if (cpi->cpi_family == 6) { 1945 switch (cpi->cpi_model) { 1946 case 0: 1947 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1948 case 5: 1949 case 6: 1950 case 7: 1951 case 8: 1952 case 9: 1953 return ("VIA C3"); 1954 default: 1955 break; 1956 } 1957 } 1958 break; 1959 } 1960 return (NULL); 1961 } 1962 1963 /* 1964 * This only gets called in the case that the CPU extended 1965 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1966 * aren't available, or contain null bytes for some reason. 1967 */ 1968 static void 1969 fabricate_brandstr(struct cpuid_info *cpi) 1970 { 1971 const char *brand = NULL; 1972 1973 switch (cpi->cpi_vendor) { 1974 case X86_VENDOR_Intel: 1975 brand = intel_cpubrand(cpi); 1976 break; 1977 case X86_VENDOR_AMD: 1978 brand = amd_cpubrand(cpi); 1979 break; 1980 case X86_VENDOR_Cyrix: 1981 brand = cyrix_cpubrand(cpi, x86_type); 1982 break; 1983 case X86_VENDOR_NexGen: 1984 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1985 brand = "NexGen Nx586"; 1986 break; 1987 case X86_VENDOR_Centaur: 1988 if (cpi->cpi_family == 5) 1989 switch (cpi->cpi_model) { 1990 case 4: 1991 brand = "Centaur C6"; 1992 break; 1993 case 8: 1994 brand = "Centaur C2"; 1995 break; 1996 case 9: 1997 brand = "Centaur C3"; 1998 break; 1999 default: 2000 break; 2001 } 2002 break; 2003 case X86_VENDOR_Rise: 2004 if (cpi->cpi_family == 5 && 2005 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 2006 brand = "Rise mP6"; 2007 break; 2008 case X86_VENDOR_SiS: 2009 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2010 brand = "SiS 55x"; 2011 break; 2012 case X86_VENDOR_TM: 2013 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 2014 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 2015 break; 2016 case X86_VENDOR_NSC: 2017 case X86_VENDOR_UMC: 2018 default: 2019 break; 2020 } 2021 if (brand) { 2022 (void) strcpy((char *)cpi->cpi_brandstr, brand); 2023 return; 2024 } 2025 2026 /* 2027 * If all else fails ... 2028 */ 2029 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 2030 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 2031 cpi->cpi_model, cpi->cpi_step); 2032 } 2033 2034 /* 2035 * This routine is called just after kernel memory allocation 2036 * becomes available on cpu0, and as part of mp_startup() on 2037 * the other cpus. 2038 * 2039 * Fixup the brand string, and collect any information from cpuid 2040 * that requires dynamicically allocated storage to represent. 2041 */ 2042 /*ARGSUSED*/ 2043 void 2044 cpuid_pass3(cpu_t *cpu) 2045 { 2046 int i, max, shft, level, size; 2047 struct cpuid_regs regs; 2048 struct cpuid_regs *cp; 2049 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2050 2051 ASSERT(cpi->cpi_pass == 2); 2052 2053 /* 2054 * Function 4: Deterministic cache parameters 2055 * 2056 * Take this opportunity to detect the number of threads 2057 * sharing the last level cache, and construct a corresponding 2058 * cache id. The respective cpuid_info members are initialized 2059 * to the default case of "no last level cache sharing". 2060 */ 2061 cpi->cpi_ncpu_shr_last_cache = 1; 2062 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 2063 2064 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 2065 2066 /* 2067 * Find the # of elements (size) returned by fn 4, and along 2068 * the way detect last level cache sharing details. 2069 */ 2070 bzero(®s, sizeof (regs)); 2071 cp = ®s; 2072 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 2073 cp->cp_eax = 4; 2074 cp->cp_ecx = i; 2075 2076 (void) __cpuid_insn(cp); 2077 2078 if (CPI_CACHE_TYPE(cp) == 0) 2079 break; 2080 level = CPI_CACHE_LVL(cp); 2081 if (level > max) { 2082 max = level; 2083 cpi->cpi_ncpu_shr_last_cache = 2084 CPI_NTHR_SHR_CACHE(cp) + 1; 2085 } 2086 } 2087 cpi->cpi_std_4_size = size = i; 2088 2089 /* 2090 * Allocate the cpi_std_4 array. The first element 2091 * references the regs for fn 4, %ecx == 0, which 2092 * cpuid_pass2() stashed in cpi->cpi_std[4]. 2093 */ 2094 if (size > 0) { 2095 cpi->cpi_std_4 = 2096 kmem_alloc(size * sizeof (cp), KM_SLEEP); 2097 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 2098 2099 /* 2100 * Allocate storage to hold the additional regs 2101 * for function 4, %ecx == 1 .. cpi_std_4_size. 2102 * 2103 * The regs for fn 4, %ecx == 0 has already 2104 * been allocated as indicated above. 2105 */ 2106 for (i = 1; i < size; i++) { 2107 cp = cpi->cpi_std_4[i] = 2108 kmem_zalloc(sizeof (regs), KM_SLEEP); 2109 cp->cp_eax = 4; 2110 cp->cp_ecx = i; 2111 2112 (void) __cpuid_insn(cp); 2113 } 2114 } 2115 /* 2116 * Determine the number of bits needed to represent 2117 * the number of CPUs sharing the last level cache. 2118 * 2119 * Shift off that number of bits from the APIC id to 2120 * derive the cache id. 2121 */ 2122 shft = 0; 2123 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 2124 shft++; 2125 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 2126 } 2127 2128 /* 2129 * Now fixup the brand string 2130 */ 2131 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2132 fabricate_brandstr(cpi); 2133 } else { 2134 2135 /* 2136 * If we successfully extracted a brand string from the cpuid 2137 * instruction, clean it up by removing leading spaces and 2138 * similar junk. 2139 */ 2140 if (cpi->cpi_brandstr[0]) { 2141 size_t maxlen = sizeof (cpi->cpi_brandstr); 2142 char *src, *dst; 2143 2144 dst = src = (char *)cpi->cpi_brandstr; 2145 src[maxlen - 1] = '\0'; 2146 /* 2147 * strip leading spaces 2148 */ 2149 while (*src == ' ') 2150 src++; 2151 /* 2152 * Remove any 'Genuine' or "Authentic" prefixes 2153 */ 2154 if (strncmp(src, "Genuine ", 8) == 0) 2155 src += 8; 2156 if (strncmp(src, "Authentic ", 10) == 0) 2157 src += 10; 2158 2159 /* 2160 * Now do an in-place copy. 2161 * Map (R) to (r) and (TM) to (tm). 2162 * The era of teletypes is long gone, and there's 2163 * -really- no need to shout. 2164 */ 2165 while (*src != '\0') { 2166 if (src[0] == '(') { 2167 if (strncmp(src + 1, "R)", 2) == 0) { 2168 (void) strncpy(dst, "(r)", 3); 2169 src += 3; 2170 dst += 3; 2171 continue; 2172 } 2173 if (strncmp(src + 1, "TM)", 3) == 0) { 2174 (void) strncpy(dst, "(tm)", 4); 2175 src += 4; 2176 dst += 4; 2177 continue; 2178 } 2179 } 2180 *dst++ = *src++; 2181 } 2182 *dst = '\0'; 2183 2184 /* 2185 * Finally, remove any trailing spaces 2186 */ 2187 while (--dst > cpi->cpi_brandstr) 2188 if (*dst == ' ') 2189 *dst = '\0'; 2190 else 2191 break; 2192 } else 2193 fabricate_brandstr(cpi); 2194 } 2195 cpi->cpi_pass = 3; 2196 } 2197 2198 /* 2199 * This routine is called out of bind_hwcap() much later in the life 2200 * of the kernel (post_startup()). The job of this routine is to resolve 2201 * the hardware feature support and kernel support for those features into 2202 * what we're actually going to tell applications via the aux vector. 2203 */ 2204 uint_t 2205 cpuid_pass4(cpu_t *cpu) 2206 { 2207 struct cpuid_info *cpi; 2208 uint_t hwcap_flags = 0; 2209 2210 if (cpu == NULL) 2211 cpu = CPU; 2212 cpi = cpu->cpu_m.mcpu_cpi; 2213 2214 ASSERT(cpi->cpi_pass == 3); 2215 2216 if (cpi->cpi_maxeax >= 1) { 2217 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2218 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2219 2220 *edx = CPI_FEATURES_EDX(cpi); 2221 *ecx = CPI_FEATURES_ECX(cpi); 2222 2223 /* 2224 * [these require explicit kernel support] 2225 */ 2226 if ((x86_feature & X86_SEP) == 0) 2227 *edx &= ~CPUID_INTC_EDX_SEP; 2228 2229 if ((x86_feature & X86_SSE) == 0) 2230 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2231 if ((x86_feature & X86_SSE2) == 0) 2232 *edx &= ~CPUID_INTC_EDX_SSE2; 2233 2234 if ((x86_feature & X86_HTT) == 0) 2235 *edx &= ~CPUID_INTC_EDX_HTT; 2236 2237 if ((x86_feature & X86_SSE3) == 0) 2238 *ecx &= ~CPUID_INTC_ECX_SSE3; 2239 2240 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2241 if ((x86_feature & X86_SSSE3) == 0) 2242 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2243 if ((x86_feature & X86_SSE4_1) == 0) 2244 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2245 if ((x86_feature & X86_SSE4_2) == 0) 2246 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2247 if ((x86_feature & X86_AES) == 0) 2248 *ecx &= ~CPUID_INTC_ECX_AES; 2249 } 2250 2251 /* 2252 * [no explicit support required beyond x87 fp context] 2253 */ 2254 if (!fpu_exists) 2255 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2256 2257 /* 2258 * Now map the supported feature vector to things that we 2259 * think userland will care about. 2260 */ 2261 if (*edx & CPUID_INTC_EDX_SEP) 2262 hwcap_flags |= AV_386_SEP; 2263 if (*edx & CPUID_INTC_EDX_SSE) 2264 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2265 if (*edx & CPUID_INTC_EDX_SSE2) 2266 hwcap_flags |= AV_386_SSE2; 2267 if (*ecx & CPUID_INTC_ECX_SSE3) 2268 hwcap_flags |= AV_386_SSE3; 2269 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2270 if (*ecx & CPUID_INTC_ECX_SSSE3) 2271 hwcap_flags |= AV_386_SSSE3; 2272 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2273 hwcap_flags |= AV_386_SSE4_1; 2274 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2275 hwcap_flags |= AV_386_SSE4_2; 2276 if (*ecx & CPUID_INTC_ECX_MOVBE) 2277 hwcap_flags |= AV_386_MOVBE; 2278 if (*ecx & CPUID_INTC_ECX_AES) 2279 hwcap_flags |= AV_386_AES; 2280 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2281 hwcap_flags |= AV_386_PCLMULQDQ; 2282 } 2283 if (*ecx & CPUID_INTC_ECX_POPCNT) 2284 hwcap_flags |= AV_386_POPCNT; 2285 if (*edx & CPUID_INTC_EDX_FPU) 2286 hwcap_flags |= AV_386_FPU; 2287 if (*edx & CPUID_INTC_EDX_MMX) 2288 hwcap_flags |= AV_386_MMX; 2289 2290 if (*edx & CPUID_INTC_EDX_TSC) 2291 hwcap_flags |= AV_386_TSC; 2292 if (*edx & CPUID_INTC_EDX_CX8) 2293 hwcap_flags |= AV_386_CX8; 2294 if (*edx & CPUID_INTC_EDX_CMOV) 2295 hwcap_flags |= AV_386_CMOV; 2296 if (*ecx & CPUID_INTC_ECX_CX16) 2297 hwcap_flags |= AV_386_CX16; 2298 } 2299 2300 if (cpi->cpi_xmaxeax < 0x80000001) 2301 goto pass4_done; 2302 2303 switch (cpi->cpi_vendor) { 2304 struct cpuid_regs cp; 2305 uint32_t *edx, *ecx; 2306 2307 case X86_VENDOR_Intel: 2308 /* 2309 * Seems like Intel duplicated what we necessary 2310 * here to make the initial crop of 64-bit OS's work. 2311 * Hopefully, those are the only "extended" bits 2312 * they'll add. 2313 */ 2314 /*FALLTHROUGH*/ 2315 2316 case X86_VENDOR_AMD: 2317 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2318 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2319 2320 *edx = CPI_FEATURES_XTD_EDX(cpi); 2321 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2322 2323 /* 2324 * [these features require explicit kernel support] 2325 */ 2326 switch (cpi->cpi_vendor) { 2327 case X86_VENDOR_Intel: 2328 if ((x86_feature & X86_TSCP) == 0) 2329 *edx &= ~CPUID_AMD_EDX_TSCP; 2330 break; 2331 2332 case X86_VENDOR_AMD: 2333 if ((x86_feature & X86_TSCP) == 0) 2334 *edx &= ~CPUID_AMD_EDX_TSCP; 2335 if ((x86_feature & X86_SSE4A) == 0) 2336 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2337 break; 2338 2339 default: 2340 break; 2341 } 2342 2343 /* 2344 * [no explicit support required beyond 2345 * x87 fp context and exception handlers] 2346 */ 2347 if (!fpu_exists) 2348 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2349 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2350 2351 if ((x86_feature & X86_NX) == 0) 2352 *edx &= ~CPUID_AMD_EDX_NX; 2353 #if !defined(__amd64) 2354 *edx &= ~CPUID_AMD_EDX_LM; 2355 #endif 2356 /* 2357 * Now map the supported feature vector to 2358 * things that we think userland will care about. 2359 */ 2360 #if defined(__amd64) 2361 if (*edx & CPUID_AMD_EDX_SYSC) 2362 hwcap_flags |= AV_386_AMD_SYSC; 2363 #endif 2364 if (*edx & CPUID_AMD_EDX_MMXamd) 2365 hwcap_flags |= AV_386_AMD_MMX; 2366 if (*edx & CPUID_AMD_EDX_3DNow) 2367 hwcap_flags |= AV_386_AMD_3DNow; 2368 if (*edx & CPUID_AMD_EDX_3DNowx) 2369 hwcap_flags |= AV_386_AMD_3DNowx; 2370 2371 switch (cpi->cpi_vendor) { 2372 case X86_VENDOR_AMD: 2373 if (*edx & CPUID_AMD_EDX_TSCP) 2374 hwcap_flags |= AV_386_TSCP; 2375 if (*ecx & CPUID_AMD_ECX_AHF64) 2376 hwcap_flags |= AV_386_AHF; 2377 if (*ecx & CPUID_AMD_ECX_SSE4A) 2378 hwcap_flags |= AV_386_AMD_SSE4A; 2379 if (*ecx & CPUID_AMD_ECX_LZCNT) 2380 hwcap_flags |= AV_386_AMD_LZCNT; 2381 break; 2382 2383 case X86_VENDOR_Intel: 2384 if (*edx & CPUID_AMD_EDX_TSCP) 2385 hwcap_flags |= AV_386_TSCP; 2386 /* 2387 * Aarrgh. 2388 * Intel uses a different bit in the same word. 2389 */ 2390 if (*ecx & CPUID_INTC_ECX_AHF64) 2391 hwcap_flags |= AV_386_AHF; 2392 break; 2393 2394 default: 2395 break; 2396 } 2397 break; 2398 2399 case X86_VENDOR_TM: 2400 cp.cp_eax = 0x80860001; 2401 (void) __cpuid_insn(&cp); 2402 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2403 break; 2404 2405 default: 2406 break; 2407 } 2408 2409 pass4_done: 2410 cpi->cpi_pass = 4; 2411 return (hwcap_flags); 2412 } 2413 2414 2415 /* 2416 * Simulate the cpuid instruction using the data we previously 2417 * captured about this CPU. We try our best to return the truth 2418 * about the hardware, independently of kernel support. 2419 */ 2420 uint32_t 2421 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2422 { 2423 struct cpuid_info *cpi; 2424 struct cpuid_regs *xcp; 2425 2426 if (cpu == NULL) 2427 cpu = CPU; 2428 cpi = cpu->cpu_m.mcpu_cpi; 2429 2430 ASSERT(cpuid_checkpass(cpu, 3)); 2431 2432 /* 2433 * CPUID data is cached in two separate places: cpi_std for standard 2434 * CPUID functions, and cpi_extd for extended CPUID functions. 2435 */ 2436 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2437 xcp = &cpi->cpi_std[cp->cp_eax]; 2438 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2439 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2440 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2441 else 2442 /* 2443 * The caller is asking for data from an input parameter which 2444 * the kernel has not cached. In this case we go fetch from 2445 * the hardware and return the data directly to the user. 2446 */ 2447 return (__cpuid_insn(cp)); 2448 2449 cp->cp_eax = xcp->cp_eax; 2450 cp->cp_ebx = xcp->cp_ebx; 2451 cp->cp_ecx = xcp->cp_ecx; 2452 cp->cp_edx = xcp->cp_edx; 2453 return (cp->cp_eax); 2454 } 2455 2456 int 2457 cpuid_checkpass(cpu_t *cpu, int pass) 2458 { 2459 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2460 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2461 } 2462 2463 int 2464 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2465 { 2466 ASSERT(cpuid_checkpass(cpu, 3)); 2467 2468 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2469 } 2470 2471 int 2472 cpuid_is_cmt(cpu_t *cpu) 2473 { 2474 if (cpu == NULL) 2475 cpu = CPU; 2476 2477 ASSERT(cpuid_checkpass(cpu, 1)); 2478 2479 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2480 } 2481 2482 /* 2483 * AMD and Intel both implement the 64-bit variant of the syscall 2484 * instruction (syscallq), so if there's -any- support for syscall, 2485 * cpuid currently says "yes, we support this". 2486 * 2487 * However, Intel decided to -not- implement the 32-bit variant of the 2488 * syscall instruction, so we provide a predicate to allow our caller 2489 * to test that subtlety here. 2490 * 2491 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2492 * even in the case where the hardware would in fact support it. 2493 */ 2494 /*ARGSUSED*/ 2495 int 2496 cpuid_syscall32_insn(cpu_t *cpu) 2497 { 2498 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2499 2500 #if !defined(__xpv) 2501 if (cpu == NULL) 2502 cpu = CPU; 2503 2504 /*CSTYLED*/ 2505 { 2506 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2507 2508 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2509 cpi->cpi_xmaxeax >= 0x80000001 && 2510 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2511 return (1); 2512 } 2513 #endif 2514 return (0); 2515 } 2516 2517 int 2518 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2519 { 2520 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2521 2522 static const char fmt[] = 2523 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2524 static const char fmt_ht[] = 2525 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2526 2527 ASSERT(cpuid_checkpass(cpu, 1)); 2528 2529 if (cpuid_is_cmt(cpu)) 2530 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2531 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2532 cpi->cpi_family, cpi->cpi_model, 2533 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2534 return (snprintf(s, n, fmt, 2535 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2536 cpi->cpi_family, cpi->cpi_model, 2537 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2538 } 2539 2540 const char * 2541 cpuid_getvendorstr(cpu_t *cpu) 2542 { 2543 ASSERT(cpuid_checkpass(cpu, 1)); 2544 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2545 } 2546 2547 uint_t 2548 cpuid_getvendor(cpu_t *cpu) 2549 { 2550 ASSERT(cpuid_checkpass(cpu, 1)); 2551 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2552 } 2553 2554 uint_t 2555 cpuid_getfamily(cpu_t *cpu) 2556 { 2557 ASSERT(cpuid_checkpass(cpu, 1)); 2558 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2559 } 2560 2561 uint_t 2562 cpuid_getmodel(cpu_t *cpu) 2563 { 2564 ASSERT(cpuid_checkpass(cpu, 1)); 2565 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2566 } 2567 2568 uint_t 2569 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2570 { 2571 ASSERT(cpuid_checkpass(cpu, 1)); 2572 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2573 } 2574 2575 uint_t 2576 cpuid_get_ncore_per_chip(cpu_t *cpu) 2577 { 2578 ASSERT(cpuid_checkpass(cpu, 1)); 2579 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2580 } 2581 2582 uint_t 2583 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2584 { 2585 ASSERT(cpuid_checkpass(cpu, 2)); 2586 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2587 } 2588 2589 id_t 2590 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2591 { 2592 ASSERT(cpuid_checkpass(cpu, 2)); 2593 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2594 } 2595 2596 uint_t 2597 cpuid_getstep(cpu_t *cpu) 2598 { 2599 ASSERT(cpuid_checkpass(cpu, 1)); 2600 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2601 } 2602 2603 uint_t 2604 cpuid_getsig(struct cpu *cpu) 2605 { 2606 ASSERT(cpuid_checkpass(cpu, 1)); 2607 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2608 } 2609 2610 uint32_t 2611 cpuid_getchiprev(struct cpu *cpu) 2612 { 2613 ASSERT(cpuid_checkpass(cpu, 1)); 2614 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2615 } 2616 2617 const char * 2618 cpuid_getchiprevstr(struct cpu *cpu) 2619 { 2620 ASSERT(cpuid_checkpass(cpu, 1)); 2621 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2622 } 2623 2624 uint32_t 2625 cpuid_getsockettype(struct cpu *cpu) 2626 { 2627 ASSERT(cpuid_checkpass(cpu, 1)); 2628 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2629 } 2630 2631 const char * 2632 cpuid_getsocketstr(cpu_t *cpu) 2633 { 2634 static const char *socketstr = NULL; 2635 struct cpuid_info *cpi; 2636 2637 ASSERT(cpuid_checkpass(cpu, 1)); 2638 cpi = cpu->cpu_m.mcpu_cpi; 2639 2640 /* Assume that socket types are the same across the system */ 2641 if (socketstr == NULL) 2642 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 2643 cpi->cpi_model, cpi->cpi_step); 2644 2645 2646 return (socketstr); 2647 } 2648 2649 int 2650 cpuid_get_chipid(cpu_t *cpu) 2651 { 2652 ASSERT(cpuid_checkpass(cpu, 1)); 2653 2654 if (cpuid_is_cmt(cpu)) 2655 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2656 return (cpu->cpu_id); 2657 } 2658 2659 id_t 2660 cpuid_get_coreid(cpu_t *cpu) 2661 { 2662 ASSERT(cpuid_checkpass(cpu, 1)); 2663 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2664 } 2665 2666 int 2667 cpuid_get_pkgcoreid(cpu_t *cpu) 2668 { 2669 ASSERT(cpuid_checkpass(cpu, 1)); 2670 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2671 } 2672 2673 int 2674 cpuid_get_clogid(cpu_t *cpu) 2675 { 2676 ASSERT(cpuid_checkpass(cpu, 1)); 2677 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2678 } 2679 2680 int 2681 cpuid_get_cacheid(cpu_t *cpu) 2682 { 2683 ASSERT(cpuid_checkpass(cpu, 1)); 2684 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2685 } 2686 2687 uint_t 2688 cpuid_get_procnodeid(cpu_t *cpu) 2689 { 2690 ASSERT(cpuid_checkpass(cpu, 1)); 2691 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid); 2692 } 2693 2694 uint_t 2695 cpuid_get_procnodes_per_pkg(cpu_t *cpu) 2696 { 2697 ASSERT(cpuid_checkpass(cpu, 1)); 2698 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); 2699 } 2700 2701 /*ARGSUSED*/ 2702 int 2703 cpuid_have_cr8access(cpu_t *cpu) 2704 { 2705 #if defined(__amd64) 2706 return (1); 2707 #else 2708 struct cpuid_info *cpi; 2709 2710 ASSERT(cpu != NULL); 2711 cpi = cpu->cpu_m.mcpu_cpi; 2712 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 2713 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 2714 return (1); 2715 return (0); 2716 #endif 2717 } 2718 2719 uint32_t 2720 cpuid_get_apicid(cpu_t *cpu) 2721 { 2722 ASSERT(cpuid_checkpass(cpu, 1)); 2723 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 2724 return (UINT32_MAX); 2725 } else { 2726 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 2727 } 2728 } 2729 2730 void 2731 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2732 { 2733 struct cpuid_info *cpi; 2734 2735 if (cpu == NULL) 2736 cpu = CPU; 2737 cpi = cpu->cpu_m.mcpu_cpi; 2738 2739 ASSERT(cpuid_checkpass(cpu, 1)); 2740 2741 if (pabits) 2742 *pabits = cpi->cpi_pabits; 2743 if (vabits) 2744 *vabits = cpi->cpi_vabits; 2745 } 2746 2747 /* 2748 * Returns the number of data TLB entries for a corresponding 2749 * pagesize. If it can't be computed, or isn't known, the 2750 * routine returns zero. If you ask about an architecturally 2751 * impossible pagesize, the routine will panic (so that the 2752 * hat implementor knows that things are inconsistent.) 2753 */ 2754 uint_t 2755 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2756 { 2757 struct cpuid_info *cpi; 2758 uint_t dtlb_nent = 0; 2759 2760 if (cpu == NULL) 2761 cpu = CPU; 2762 cpi = cpu->cpu_m.mcpu_cpi; 2763 2764 ASSERT(cpuid_checkpass(cpu, 1)); 2765 2766 /* 2767 * Check the L2 TLB info 2768 */ 2769 if (cpi->cpi_xmaxeax >= 0x80000006) { 2770 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2771 2772 switch (pagesize) { 2773 2774 case 4 * 1024: 2775 /* 2776 * All zero in the top 16 bits of the register 2777 * indicates a unified TLB. Size is in low 16 bits. 2778 */ 2779 if ((cp->cp_ebx & 0xffff0000) == 0) 2780 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2781 else 2782 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2783 break; 2784 2785 case 2 * 1024 * 1024: 2786 if ((cp->cp_eax & 0xffff0000) == 0) 2787 dtlb_nent = cp->cp_eax & 0x0000ffff; 2788 else 2789 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2790 break; 2791 2792 default: 2793 panic("unknown L2 pagesize"); 2794 /*NOTREACHED*/ 2795 } 2796 } 2797 2798 if (dtlb_nent != 0) 2799 return (dtlb_nent); 2800 2801 /* 2802 * No L2 TLB support for this size, try L1. 2803 */ 2804 if (cpi->cpi_xmaxeax >= 0x80000005) { 2805 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2806 2807 switch (pagesize) { 2808 case 4 * 1024: 2809 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2810 break; 2811 case 2 * 1024 * 1024: 2812 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2813 break; 2814 default: 2815 panic("unknown L1 d-TLB pagesize"); 2816 /*NOTREACHED*/ 2817 } 2818 } 2819 2820 return (dtlb_nent); 2821 } 2822 2823 /* 2824 * Return 0 if the erratum is not present or not applicable, positive 2825 * if it is, and negative if the status of the erratum is unknown. 2826 * 2827 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2828 * Processors" #25759, Rev 3.57, August 2005 2829 */ 2830 int 2831 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2832 { 2833 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2834 uint_t eax; 2835 2836 /* 2837 * Bail out if this CPU isn't an AMD CPU, or if it's 2838 * a legacy (32-bit) AMD CPU. 2839 */ 2840 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2841 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2842 cpi->cpi_family == 6) 2843 2844 return (0); 2845 2846 eax = cpi->cpi_std[1].cp_eax; 2847 2848 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2849 #define SH_B3(eax) (eax == 0xf51) 2850 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2851 2852 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2853 2854 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2855 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2856 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2857 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2858 2859 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2860 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2861 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2862 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2863 2864 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2865 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2866 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2867 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2868 #define BH_E4(eax) (eax == 0x20fb1) 2869 #define SH_E5(eax) (eax == 0x20f42) 2870 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2871 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2872 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2873 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2874 DH_E6(eax) || JH_E6(eax)) 2875 2876 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2877 #define DR_B0(eax) (eax == 0x100f20) 2878 #define DR_B1(eax) (eax == 0x100f21) 2879 #define DR_BA(eax) (eax == 0x100f2a) 2880 #define DR_B2(eax) (eax == 0x100f22) 2881 #define DR_B3(eax) (eax == 0x100f23) 2882 #define RB_C0(eax) (eax == 0x100f40) 2883 2884 switch (erratum) { 2885 case 1: 2886 return (cpi->cpi_family < 0x10); 2887 case 51: /* what does the asterisk mean? */ 2888 return (B(eax) || SH_C0(eax) || CG(eax)); 2889 case 52: 2890 return (B(eax)); 2891 case 57: 2892 return (cpi->cpi_family <= 0x11); 2893 case 58: 2894 return (B(eax)); 2895 case 60: 2896 return (cpi->cpi_family <= 0x11); 2897 case 61: 2898 case 62: 2899 case 63: 2900 case 64: 2901 case 65: 2902 case 66: 2903 case 68: 2904 case 69: 2905 case 70: 2906 case 71: 2907 return (B(eax)); 2908 case 72: 2909 return (SH_B0(eax)); 2910 case 74: 2911 return (B(eax)); 2912 case 75: 2913 return (cpi->cpi_family < 0x10); 2914 case 76: 2915 return (B(eax)); 2916 case 77: 2917 return (cpi->cpi_family <= 0x11); 2918 case 78: 2919 return (B(eax) || SH_C0(eax)); 2920 case 79: 2921 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2922 case 80: 2923 case 81: 2924 case 82: 2925 return (B(eax)); 2926 case 83: 2927 return (B(eax) || SH_C0(eax) || CG(eax)); 2928 case 85: 2929 return (cpi->cpi_family < 0x10); 2930 case 86: 2931 return (SH_C0(eax) || CG(eax)); 2932 case 88: 2933 #if !defined(__amd64) 2934 return (0); 2935 #else 2936 return (B(eax) || SH_C0(eax)); 2937 #endif 2938 case 89: 2939 return (cpi->cpi_family < 0x10); 2940 case 90: 2941 return (B(eax) || SH_C0(eax) || CG(eax)); 2942 case 91: 2943 case 92: 2944 return (B(eax) || SH_C0(eax)); 2945 case 93: 2946 return (SH_C0(eax)); 2947 case 94: 2948 return (B(eax) || SH_C0(eax) || CG(eax)); 2949 case 95: 2950 #if !defined(__amd64) 2951 return (0); 2952 #else 2953 return (B(eax) || SH_C0(eax)); 2954 #endif 2955 case 96: 2956 return (B(eax) || SH_C0(eax) || CG(eax)); 2957 case 97: 2958 case 98: 2959 return (SH_C0(eax) || CG(eax)); 2960 case 99: 2961 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2962 case 100: 2963 return (B(eax) || SH_C0(eax)); 2964 case 101: 2965 case 103: 2966 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2967 case 104: 2968 return (SH_C0(eax) || CG(eax) || D0(eax)); 2969 case 105: 2970 case 106: 2971 case 107: 2972 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2973 case 108: 2974 return (DH_CG(eax)); 2975 case 109: 2976 return (SH_C0(eax) || CG(eax) || D0(eax)); 2977 case 110: 2978 return (D0(eax) || EX(eax)); 2979 case 111: 2980 return (CG(eax)); 2981 case 112: 2982 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2983 case 113: 2984 return (eax == 0x20fc0); 2985 case 114: 2986 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2987 case 115: 2988 return (SH_E0(eax) || JH_E1(eax)); 2989 case 116: 2990 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2991 case 117: 2992 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2993 case 118: 2994 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2995 JH_E6(eax)); 2996 case 121: 2997 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2998 case 122: 2999 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 3000 case 123: 3001 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 3002 case 131: 3003 return (cpi->cpi_family < 0x10); 3004 case 6336786: 3005 /* 3006 * Test for AdvPowerMgmtInfo.TscPStateInvariant 3007 * if this is a K8 family or newer processor 3008 */ 3009 if (CPI_FAMILY(cpi) == 0xf) { 3010 struct cpuid_regs regs; 3011 regs.cp_eax = 0x80000007; 3012 (void) __cpuid_insn(®s); 3013 return (!(regs.cp_edx & 0x100)); 3014 } 3015 return (0); 3016 case 6323525: 3017 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 3018 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 3019 3020 case 6671130: 3021 /* 3022 * check for processors (pre-Shanghai) that do not provide 3023 * optimal management of 1gb ptes in its tlb. 3024 */ 3025 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 3026 3027 case 298: 3028 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 3029 DR_B2(eax) || RB_C0(eax)); 3030 3031 default: 3032 return (-1); 3033 3034 } 3035 } 3036 3037 /* 3038 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 3039 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 3040 */ 3041 int 3042 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 3043 { 3044 struct cpuid_info *cpi; 3045 uint_t osvwid; 3046 static int osvwfeature = -1; 3047 uint64_t osvwlength; 3048 3049 3050 cpi = cpu->cpu_m.mcpu_cpi; 3051 3052 /* confirm OSVW supported */ 3053 if (osvwfeature == -1) { 3054 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 3055 } else { 3056 /* assert that osvw feature setting is consistent on all cpus */ 3057 ASSERT(osvwfeature == 3058 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 3059 } 3060 if (!osvwfeature) 3061 return (-1); 3062 3063 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 3064 3065 switch (erratum) { 3066 case 298: /* osvwid is 0 */ 3067 osvwid = 0; 3068 if (osvwlength <= (uint64_t)osvwid) { 3069 /* osvwid 0 is unknown */ 3070 return (-1); 3071 } 3072 3073 /* 3074 * Check the OSVW STATUS MSR to determine the state 3075 * of the erratum where: 3076 * 0 - fixed by HW 3077 * 1 - BIOS has applied the workaround when BIOS 3078 * workaround is available. (Or for other errata, 3079 * OS workaround is required.) 3080 * For a value of 1, caller will confirm that the 3081 * erratum 298 workaround has indeed been applied by BIOS. 3082 * 3083 * A 1 may be set in cpus that have a HW fix 3084 * in a mixed cpu system. Regarding erratum 298: 3085 * In a multiprocessor platform, the workaround above 3086 * should be applied to all processors regardless of 3087 * silicon revision when an affected processor is 3088 * present. 3089 */ 3090 3091 return (rdmsr(MSR_AMD_OSVW_STATUS + 3092 (osvwid / OSVW_ID_CNT_PER_MSR)) & 3093 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 3094 3095 default: 3096 return (-1); 3097 } 3098 } 3099 3100 static const char assoc_str[] = "associativity"; 3101 static const char line_str[] = "line-size"; 3102 static const char size_str[] = "size"; 3103 3104 static void 3105 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 3106 uint32_t val) 3107 { 3108 char buf[128]; 3109 3110 /* 3111 * ndi_prop_update_int() is used because it is desirable for 3112 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 3113 */ 3114 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 3115 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 3116 } 3117 3118 /* 3119 * Intel-style cache/tlb description 3120 * 3121 * Standard cpuid level 2 gives a randomly ordered 3122 * selection of tags that index into a table that describes 3123 * cache and tlb properties. 3124 */ 3125 3126 static const char l1_icache_str[] = "l1-icache"; 3127 static const char l1_dcache_str[] = "l1-dcache"; 3128 static const char l2_cache_str[] = "l2-cache"; 3129 static const char l3_cache_str[] = "l3-cache"; 3130 static const char itlb4k_str[] = "itlb-4K"; 3131 static const char dtlb4k_str[] = "dtlb-4K"; 3132 static const char itlb2M_str[] = "itlb-2M"; 3133 static const char itlb4M_str[] = "itlb-4M"; 3134 static const char dtlb4M_str[] = "dtlb-4M"; 3135 static const char dtlb24_str[] = "dtlb0-2M-4M"; 3136 static const char itlb424_str[] = "itlb-4K-2M-4M"; 3137 static const char itlb24_str[] = "itlb-2M-4M"; 3138 static const char dtlb44_str[] = "dtlb-4K-4M"; 3139 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 3140 static const char sl2_cache_str[] = "sectored-l2-cache"; 3141 static const char itrace_str[] = "itrace-cache"; 3142 static const char sl3_cache_str[] = "sectored-l3-cache"; 3143 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 3144 3145 static const struct cachetab { 3146 uint8_t ct_code; 3147 uint8_t ct_assoc; 3148 uint16_t ct_line_size; 3149 size_t ct_size; 3150 const char *ct_label; 3151 } intel_ctab[] = { 3152 /* 3153 * maintain descending order! 3154 * 3155 * Codes ignored - Reason 3156 * ---------------------- 3157 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3158 * f0H/f1H - Currently we do not interpret prefetch size by design 3159 */ 3160 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3161 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3162 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3163 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3164 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3165 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3166 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3167 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3168 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3169 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3170 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3171 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3172 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3173 { 0xc0, 4, 0, 8, dtlb44_str }, 3174 { 0xba, 4, 0, 64, dtlb4k_str }, 3175 { 0xb4, 4, 0, 256, dtlb4k_str }, 3176 { 0xb3, 4, 0, 128, dtlb4k_str }, 3177 { 0xb2, 4, 0, 64, itlb4k_str }, 3178 { 0xb0, 4, 0, 128, itlb4k_str }, 3179 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3180 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3181 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3182 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3183 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3184 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3185 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3186 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3187 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3188 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3189 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3190 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3191 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3192 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3193 { 0x73, 8, 0, 64*1024, itrace_str}, 3194 { 0x72, 8, 0, 32*1024, itrace_str}, 3195 { 0x71, 8, 0, 16*1024, itrace_str}, 3196 { 0x70, 8, 0, 12*1024, itrace_str}, 3197 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3198 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3199 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3200 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3201 { 0x5d, 0, 0, 256, dtlb44_str}, 3202 { 0x5c, 0, 0, 128, dtlb44_str}, 3203 { 0x5b, 0, 0, 64, dtlb44_str}, 3204 { 0x5a, 4, 0, 32, dtlb24_str}, 3205 { 0x59, 0, 0, 16, dtlb4k_str}, 3206 { 0x57, 4, 0, 16, dtlb4k_str}, 3207 { 0x56, 4, 0, 16, dtlb4M_str}, 3208 { 0x55, 0, 0, 7, itlb24_str}, 3209 { 0x52, 0, 0, 256, itlb424_str}, 3210 { 0x51, 0, 0, 128, itlb424_str}, 3211 { 0x50, 0, 0, 64, itlb424_str}, 3212 { 0x4f, 0, 0, 32, itlb4k_str}, 3213 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3214 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3215 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3216 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3217 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3218 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3219 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3220 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3221 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3222 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3223 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3224 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3225 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3226 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3227 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3228 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3229 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3230 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3231 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3232 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3233 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3234 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3235 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3236 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3237 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3238 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3239 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3240 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3241 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3242 { 0x0b, 4, 0, 4, itlb4M_str}, 3243 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3244 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3245 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3246 { 0x05, 4, 0, 32, dtlb4M_str}, 3247 { 0x04, 4, 0, 8, dtlb4M_str}, 3248 { 0x03, 4, 0, 64, dtlb4k_str}, 3249 { 0x02, 4, 0, 2, itlb4M_str}, 3250 { 0x01, 4, 0, 32, itlb4k_str}, 3251 { 0 } 3252 }; 3253 3254 static const struct cachetab cyrix_ctab[] = { 3255 { 0x70, 4, 0, 32, "tlb-4K" }, 3256 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3257 { 0 } 3258 }; 3259 3260 /* 3261 * Search a cache table for a matching entry 3262 */ 3263 static const struct cachetab * 3264 find_cacheent(const struct cachetab *ct, uint_t code) 3265 { 3266 if (code != 0) { 3267 for (; ct->ct_code != 0; ct++) 3268 if (ct->ct_code <= code) 3269 break; 3270 if (ct->ct_code == code) 3271 return (ct); 3272 } 3273 return (NULL); 3274 } 3275 3276 /* 3277 * Populate cachetab entry with L2 or L3 cache-information using 3278 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3279 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3280 * information is found. 3281 */ 3282 static int 3283 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3284 { 3285 uint32_t level, i; 3286 int ret = 0; 3287 3288 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3289 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3290 3291 if (level == 2 || level == 3) { 3292 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3293 ct->ct_line_size = 3294 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3295 ct->ct_size = ct->ct_assoc * 3296 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3297 ct->ct_line_size * 3298 (cpi->cpi_std_4[i]->cp_ecx + 1); 3299 3300 if (level == 2) { 3301 ct->ct_label = l2_cache_str; 3302 } else if (level == 3) { 3303 ct->ct_label = l3_cache_str; 3304 } 3305 ret = 1; 3306 } 3307 } 3308 3309 return (ret); 3310 } 3311 3312 /* 3313 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3314 * The walk is terminated if the walker returns non-zero. 3315 */ 3316 static void 3317 intel_walk_cacheinfo(struct cpuid_info *cpi, 3318 void *arg, int (*func)(void *, const struct cachetab *)) 3319 { 3320 const struct cachetab *ct; 3321 struct cachetab des_49_ct, des_b1_ct; 3322 uint8_t *dp; 3323 int i; 3324 3325 if ((dp = cpi->cpi_cacheinfo) == NULL) 3326 return; 3327 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3328 /* 3329 * For overloaded descriptor 0x49 we use cpuid function 4 3330 * if supported by the current processor, to create 3331 * cache information. 3332 * For overloaded descriptor 0xb1 we use X86_PAE flag 3333 * to disambiguate the cache information. 3334 */ 3335 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3336 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3337 ct = &des_49_ct; 3338 } else if (*dp == 0xb1) { 3339 des_b1_ct.ct_code = 0xb1; 3340 des_b1_ct.ct_assoc = 4; 3341 des_b1_ct.ct_line_size = 0; 3342 if (x86_feature & X86_PAE) { 3343 des_b1_ct.ct_size = 8; 3344 des_b1_ct.ct_label = itlb2M_str; 3345 } else { 3346 des_b1_ct.ct_size = 4; 3347 des_b1_ct.ct_label = itlb4M_str; 3348 } 3349 ct = &des_b1_ct; 3350 } else { 3351 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3352 continue; 3353 } 3354 } 3355 3356 if (func(arg, ct) != 0) { 3357 break; 3358 } 3359 } 3360 } 3361 3362 /* 3363 * (Like the Intel one, except for Cyrix CPUs) 3364 */ 3365 static void 3366 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3367 void *arg, int (*func)(void *, const struct cachetab *)) 3368 { 3369 const struct cachetab *ct; 3370 uint8_t *dp; 3371 int i; 3372 3373 if ((dp = cpi->cpi_cacheinfo) == NULL) 3374 return; 3375 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3376 /* 3377 * Search Cyrix-specific descriptor table first .. 3378 */ 3379 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3380 if (func(arg, ct) != 0) 3381 break; 3382 continue; 3383 } 3384 /* 3385 * .. else fall back to the Intel one 3386 */ 3387 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3388 if (func(arg, ct) != 0) 3389 break; 3390 continue; 3391 } 3392 } 3393 } 3394 3395 /* 3396 * A cacheinfo walker that adds associativity, line-size, and size properties 3397 * to the devinfo node it is passed as an argument. 3398 */ 3399 static int 3400 add_cacheent_props(void *arg, const struct cachetab *ct) 3401 { 3402 dev_info_t *devi = arg; 3403 3404 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3405 if (ct->ct_line_size != 0) 3406 add_cache_prop(devi, ct->ct_label, line_str, 3407 ct->ct_line_size); 3408 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3409 return (0); 3410 } 3411 3412 3413 static const char fully_assoc[] = "fully-associative?"; 3414 3415 /* 3416 * AMD style cache/tlb description 3417 * 3418 * Extended functions 5 and 6 directly describe properties of 3419 * tlbs and various cache levels. 3420 */ 3421 static void 3422 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3423 { 3424 switch (assoc) { 3425 case 0: /* reserved; ignore */ 3426 break; 3427 default: 3428 add_cache_prop(devi, label, assoc_str, assoc); 3429 break; 3430 case 0xff: 3431 add_cache_prop(devi, label, fully_assoc, 1); 3432 break; 3433 } 3434 } 3435 3436 static void 3437 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3438 { 3439 if (size == 0) 3440 return; 3441 add_cache_prop(devi, label, size_str, size); 3442 add_amd_assoc(devi, label, assoc); 3443 } 3444 3445 static void 3446 add_amd_cache(dev_info_t *devi, const char *label, 3447 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3448 { 3449 if (size == 0 || line_size == 0) 3450 return; 3451 add_amd_assoc(devi, label, assoc); 3452 /* 3453 * Most AMD parts have a sectored cache. Multiple cache lines are 3454 * associated with each tag. A sector consists of all cache lines 3455 * associated with a tag. For example, the AMD K6-III has a sector 3456 * size of 2 cache lines per tag. 3457 */ 3458 if (lines_per_tag != 0) 3459 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3460 add_cache_prop(devi, label, line_str, line_size); 3461 add_cache_prop(devi, label, size_str, size * 1024); 3462 } 3463 3464 static void 3465 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3466 { 3467 switch (assoc) { 3468 case 0: /* off */ 3469 break; 3470 case 1: 3471 case 2: 3472 case 4: 3473 add_cache_prop(devi, label, assoc_str, assoc); 3474 break; 3475 case 6: 3476 add_cache_prop(devi, label, assoc_str, 8); 3477 break; 3478 case 8: 3479 add_cache_prop(devi, label, assoc_str, 16); 3480 break; 3481 case 0xf: 3482 add_cache_prop(devi, label, fully_assoc, 1); 3483 break; 3484 default: /* reserved; ignore */ 3485 break; 3486 } 3487 } 3488 3489 static void 3490 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3491 { 3492 if (size == 0 || assoc == 0) 3493 return; 3494 add_amd_l2_assoc(devi, label, assoc); 3495 add_cache_prop(devi, label, size_str, size); 3496 } 3497 3498 static void 3499 add_amd_l2_cache(dev_info_t *devi, const char *label, 3500 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3501 { 3502 if (size == 0 || assoc == 0 || line_size == 0) 3503 return; 3504 add_amd_l2_assoc(devi, label, assoc); 3505 if (lines_per_tag != 0) 3506 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3507 add_cache_prop(devi, label, line_str, line_size); 3508 add_cache_prop(devi, label, size_str, size * 1024); 3509 } 3510 3511 static void 3512 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3513 { 3514 struct cpuid_regs *cp; 3515 3516 if (cpi->cpi_xmaxeax < 0x80000005) 3517 return; 3518 cp = &cpi->cpi_extd[5]; 3519 3520 /* 3521 * 4M/2M L1 TLB configuration 3522 * 3523 * We report the size for 2M pages because AMD uses two 3524 * TLB entries for one 4M page. 3525 */ 3526 add_amd_tlb(devi, "dtlb-2M", 3527 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3528 add_amd_tlb(devi, "itlb-2M", 3529 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3530 3531 /* 3532 * 4K L1 TLB configuration 3533 */ 3534 3535 switch (cpi->cpi_vendor) { 3536 uint_t nentries; 3537 case X86_VENDOR_TM: 3538 if (cpi->cpi_family >= 5) { 3539 /* 3540 * Crusoe processors have 256 TLB entries, but 3541 * cpuid data format constrains them to only 3542 * reporting 255 of them. 3543 */ 3544 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3545 nentries = 256; 3546 /* 3547 * Crusoe processors also have a unified TLB 3548 */ 3549 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3550 nentries); 3551 break; 3552 } 3553 /*FALLTHROUGH*/ 3554 default: 3555 add_amd_tlb(devi, itlb4k_str, 3556 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3557 add_amd_tlb(devi, dtlb4k_str, 3558 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3559 break; 3560 } 3561 3562 /* 3563 * data L1 cache configuration 3564 */ 3565 3566 add_amd_cache(devi, l1_dcache_str, 3567 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3568 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3569 3570 /* 3571 * code L1 cache configuration 3572 */ 3573 3574 add_amd_cache(devi, l1_icache_str, 3575 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3576 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3577 3578 if (cpi->cpi_xmaxeax < 0x80000006) 3579 return; 3580 cp = &cpi->cpi_extd[6]; 3581 3582 /* Check for a unified L2 TLB for large pages */ 3583 3584 if (BITX(cp->cp_eax, 31, 16) == 0) 3585 add_amd_l2_tlb(devi, "l2-tlb-2M", 3586 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3587 else { 3588 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3589 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3590 add_amd_l2_tlb(devi, "l2-itlb-2M", 3591 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3592 } 3593 3594 /* Check for a unified L2 TLB for 4K pages */ 3595 3596 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3597 add_amd_l2_tlb(devi, "l2-tlb-4K", 3598 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3599 } else { 3600 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3601 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3602 add_amd_l2_tlb(devi, "l2-itlb-4K", 3603 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3604 } 3605 3606 add_amd_l2_cache(devi, l2_cache_str, 3607 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3608 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3609 } 3610 3611 /* 3612 * There are two basic ways that the x86 world describes it cache 3613 * and tlb architecture - Intel's way and AMD's way. 3614 * 3615 * Return which flavor of cache architecture we should use 3616 */ 3617 static int 3618 x86_which_cacheinfo(struct cpuid_info *cpi) 3619 { 3620 switch (cpi->cpi_vendor) { 3621 case X86_VENDOR_Intel: 3622 if (cpi->cpi_maxeax >= 2) 3623 return (X86_VENDOR_Intel); 3624 break; 3625 case X86_VENDOR_AMD: 3626 /* 3627 * The K5 model 1 was the first part from AMD that reported 3628 * cache sizes via extended cpuid functions. 3629 */ 3630 if (cpi->cpi_family > 5 || 3631 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3632 return (X86_VENDOR_AMD); 3633 break; 3634 case X86_VENDOR_TM: 3635 if (cpi->cpi_family >= 5) 3636 return (X86_VENDOR_AMD); 3637 /*FALLTHROUGH*/ 3638 default: 3639 /* 3640 * If they have extended CPU data for 0x80000005 3641 * then we assume they have AMD-format cache 3642 * information. 3643 * 3644 * If not, and the vendor happens to be Cyrix, 3645 * then try our-Cyrix specific handler. 3646 * 3647 * If we're not Cyrix, then assume we're using Intel's 3648 * table-driven format instead. 3649 */ 3650 if (cpi->cpi_xmaxeax >= 0x80000005) 3651 return (X86_VENDOR_AMD); 3652 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3653 return (X86_VENDOR_Cyrix); 3654 else if (cpi->cpi_maxeax >= 2) 3655 return (X86_VENDOR_Intel); 3656 break; 3657 } 3658 return (-1); 3659 } 3660 3661 void 3662 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 3663 struct cpuid_info *cpi) 3664 { 3665 dev_info_t *cpu_devi; 3666 int create; 3667 3668 cpu_devi = (dev_info_t *)dip; 3669 3670 /* device_type */ 3671 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3672 "device_type", "cpu"); 3673 3674 /* reg */ 3675 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3676 "reg", cpu_id); 3677 3678 /* cpu-mhz, and clock-frequency */ 3679 if (cpu_freq > 0) { 3680 long long mul; 3681 3682 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3683 "cpu-mhz", cpu_freq); 3684 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3685 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3686 "clock-frequency", (int)mul); 3687 } 3688 3689 if ((x86_feature & X86_CPUID) == 0) { 3690 return; 3691 } 3692 3693 /* vendor-id */ 3694 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3695 "vendor-id", cpi->cpi_vendorstr); 3696 3697 if (cpi->cpi_maxeax == 0) { 3698 return; 3699 } 3700 3701 /* 3702 * family, model, and step 3703 */ 3704 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3705 "family", CPI_FAMILY(cpi)); 3706 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3707 "cpu-model", CPI_MODEL(cpi)); 3708 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3709 "stepping-id", CPI_STEP(cpi)); 3710 3711 /* type */ 3712 switch (cpi->cpi_vendor) { 3713 case X86_VENDOR_Intel: 3714 create = 1; 3715 break; 3716 default: 3717 create = 0; 3718 break; 3719 } 3720 if (create) 3721 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3722 "type", CPI_TYPE(cpi)); 3723 3724 /* ext-family */ 3725 switch (cpi->cpi_vendor) { 3726 case X86_VENDOR_Intel: 3727 case X86_VENDOR_AMD: 3728 create = cpi->cpi_family >= 0xf; 3729 break; 3730 default: 3731 create = 0; 3732 break; 3733 } 3734 if (create) 3735 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3736 "ext-family", CPI_FAMILY_XTD(cpi)); 3737 3738 /* ext-model */ 3739 switch (cpi->cpi_vendor) { 3740 case X86_VENDOR_Intel: 3741 create = IS_EXTENDED_MODEL_INTEL(cpi); 3742 break; 3743 case X86_VENDOR_AMD: 3744 create = CPI_FAMILY(cpi) == 0xf; 3745 break; 3746 default: 3747 create = 0; 3748 break; 3749 } 3750 if (create) 3751 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3752 "ext-model", CPI_MODEL_XTD(cpi)); 3753 3754 /* generation */ 3755 switch (cpi->cpi_vendor) { 3756 case X86_VENDOR_AMD: 3757 /* 3758 * AMD K5 model 1 was the first part to support this 3759 */ 3760 create = cpi->cpi_xmaxeax >= 0x80000001; 3761 break; 3762 default: 3763 create = 0; 3764 break; 3765 } 3766 if (create) 3767 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3768 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3769 3770 /* brand-id */ 3771 switch (cpi->cpi_vendor) { 3772 case X86_VENDOR_Intel: 3773 /* 3774 * brand id first appeared on Pentium III Xeon model 8, 3775 * and Celeron model 8 processors and Opteron 3776 */ 3777 create = cpi->cpi_family > 6 || 3778 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3779 break; 3780 case X86_VENDOR_AMD: 3781 create = cpi->cpi_family >= 0xf; 3782 break; 3783 default: 3784 create = 0; 3785 break; 3786 } 3787 if (create && cpi->cpi_brandid != 0) { 3788 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3789 "brand-id", cpi->cpi_brandid); 3790 } 3791 3792 /* chunks, and apic-id */ 3793 switch (cpi->cpi_vendor) { 3794 /* 3795 * first available on Pentium IV and Opteron (K8) 3796 */ 3797 case X86_VENDOR_Intel: 3798 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3799 break; 3800 case X86_VENDOR_AMD: 3801 create = cpi->cpi_family >= 0xf; 3802 break; 3803 default: 3804 create = 0; 3805 break; 3806 } 3807 if (create) { 3808 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3809 "chunks", CPI_CHUNKS(cpi)); 3810 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3811 "apic-id", cpi->cpi_apicid); 3812 if (cpi->cpi_chipid >= 0) { 3813 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3814 "chip#", cpi->cpi_chipid); 3815 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3816 "clog#", cpi->cpi_clogid); 3817 } 3818 } 3819 3820 /* cpuid-features */ 3821 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3822 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3823 3824 3825 /* cpuid-features-ecx */ 3826 switch (cpi->cpi_vendor) { 3827 case X86_VENDOR_Intel: 3828 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3829 break; 3830 default: 3831 create = 0; 3832 break; 3833 } 3834 if (create) 3835 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3836 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3837 3838 /* ext-cpuid-features */ 3839 switch (cpi->cpi_vendor) { 3840 case X86_VENDOR_Intel: 3841 case X86_VENDOR_AMD: 3842 case X86_VENDOR_Cyrix: 3843 case X86_VENDOR_TM: 3844 case X86_VENDOR_Centaur: 3845 create = cpi->cpi_xmaxeax >= 0x80000001; 3846 break; 3847 default: 3848 create = 0; 3849 break; 3850 } 3851 if (create) { 3852 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3853 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3854 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3855 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3856 } 3857 3858 /* 3859 * Brand String first appeared in Intel Pentium IV, AMD K5 3860 * model 1, and Cyrix GXm. On earlier models we try and 3861 * simulate something similar .. so this string should always 3862 * same -something- about the processor, however lame. 3863 */ 3864 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3865 "brand-string", cpi->cpi_brandstr); 3866 3867 /* 3868 * Finally, cache and tlb information 3869 */ 3870 switch (x86_which_cacheinfo(cpi)) { 3871 case X86_VENDOR_Intel: 3872 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3873 break; 3874 case X86_VENDOR_Cyrix: 3875 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3876 break; 3877 case X86_VENDOR_AMD: 3878 amd_cache_info(cpi, cpu_devi); 3879 break; 3880 default: 3881 break; 3882 } 3883 } 3884 3885 struct l2info { 3886 int *l2i_csz; 3887 int *l2i_lsz; 3888 int *l2i_assoc; 3889 int l2i_ret; 3890 }; 3891 3892 /* 3893 * A cacheinfo walker that fetches the size, line-size and associativity 3894 * of the L2 cache 3895 */ 3896 static int 3897 intel_l2cinfo(void *arg, const struct cachetab *ct) 3898 { 3899 struct l2info *l2i = arg; 3900 int *ip; 3901 3902 if (ct->ct_label != l2_cache_str && 3903 ct->ct_label != sl2_cache_str) 3904 return (0); /* not an L2 -- keep walking */ 3905 3906 if ((ip = l2i->l2i_csz) != NULL) 3907 *ip = ct->ct_size; 3908 if ((ip = l2i->l2i_lsz) != NULL) 3909 *ip = ct->ct_line_size; 3910 if ((ip = l2i->l2i_assoc) != NULL) 3911 *ip = ct->ct_assoc; 3912 l2i->l2i_ret = ct->ct_size; 3913 return (1); /* was an L2 -- terminate walk */ 3914 } 3915 3916 /* 3917 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3918 * 3919 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3920 * value is the associativity, the associativity for the L2 cache and 3921 * tlb is encoded in the following table. The 4 bit L2 value serves as 3922 * an index into the amd_afd[] array to determine the associativity. 3923 * -1 is undefined. 0 is fully associative. 3924 */ 3925 3926 static int amd_afd[] = 3927 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3928 3929 static void 3930 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3931 { 3932 struct cpuid_regs *cp; 3933 uint_t size, assoc; 3934 int i; 3935 int *ip; 3936 3937 if (cpi->cpi_xmaxeax < 0x80000006) 3938 return; 3939 cp = &cpi->cpi_extd[6]; 3940 3941 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3942 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3943 uint_t cachesz = size * 1024; 3944 assoc = amd_afd[i]; 3945 3946 ASSERT(assoc != -1); 3947 3948 if ((ip = l2i->l2i_csz) != NULL) 3949 *ip = cachesz; 3950 if ((ip = l2i->l2i_lsz) != NULL) 3951 *ip = BITX(cp->cp_ecx, 7, 0); 3952 if ((ip = l2i->l2i_assoc) != NULL) 3953 *ip = assoc; 3954 l2i->l2i_ret = cachesz; 3955 } 3956 } 3957 3958 int 3959 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3960 { 3961 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3962 struct l2info __l2info, *l2i = &__l2info; 3963 3964 l2i->l2i_csz = csz; 3965 l2i->l2i_lsz = lsz; 3966 l2i->l2i_assoc = assoc; 3967 l2i->l2i_ret = -1; 3968 3969 switch (x86_which_cacheinfo(cpi)) { 3970 case X86_VENDOR_Intel: 3971 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3972 break; 3973 case X86_VENDOR_Cyrix: 3974 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3975 break; 3976 case X86_VENDOR_AMD: 3977 amd_l2cacheinfo(cpi, l2i); 3978 break; 3979 default: 3980 break; 3981 } 3982 return (l2i->l2i_ret); 3983 } 3984 3985 #if !defined(__xpv) 3986 3987 uint32_t * 3988 cpuid_mwait_alloc(cpu_t *cpu) 3989 { 3990 uint32_t *ret; 3991 size_t mwait_size; 3992 3993 ASSERT(cpuid_checkpass(CPU, 2)); 3994 3995 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3996 if (mwait_size == 0) 3997 return (NULL); 3998 3999 /* 4000 * kmem_alloc() returns cache line size aligned data for mwait_size 4001 * allocations. mwait_size is currently cache line sized. Neither 4002 * of these implementation details are guarantied to be true in the 4003 * future. 4004 * 4005 * First try allocating mwait_size as kmem_alloc() currently returns 4006 * correctly aligned memory. If kmem_alloc() does not return 4007 * mwait_size aligned memory, then use mwait_size ROUNDUP. 4008 * 4009 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 4010 * decide to free this memory. 4011 */ 4012 ret = kmem_zalloc(mwait_size, KM_SLEEP); 4013 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 4014 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4015 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 4016 *ret = MWAIT_RUNNING; 4017 return (ret); 4018 } else { 4019 kmem_free(ret, mwait_size); 4020 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 4021 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4022 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 4023 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 4024 *ret = MWAIT_RUNNING; 4025 return (ret); 4026 } 4027 } 4028 4029 void 4030 cpuid_mwait_free(cpu_t *cpu) 4031 { 4032 if (cpu->cpu_m.mcpu_cpi == NULL) { 4033 return; 4034 } 4035 4036 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 4037 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 4038 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 4039 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 4040 } 4041 4042 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 4043 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 4044 } 4045 4046 void 4047 patch_tsc_read(int flag) 4048 { 4049 size_t cnt; 4050 4051 switch (flag) { 4052 case X86_NO_TSC: 4053 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 4054 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 4055 break; 4056 case X86_HAVE_TSCP: 4057 cnt = &_tscp_end - &_tscp_start; 4058 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 4059 break; 4060 case X86_TSC_MFENCE: 4061 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 4062 (void) memcpy((void *)tsc_read, 4063 (void *)&_tsc_mfence_start, cnt); 4064 break; 4065 case X86_TSC_LFENCE: 4066 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 4067 (void) memcpy((void *)tsc_read, 4068 (void *)&_tsc_lfence_start, cnt); 4069 break; 4070 default: 4071 break; 4072 } 4073 } 4074 4075 int 4076 cpuid_deep_cstates_supported(void) 4077 { 4078 struct cpuid_info *cpi; 4079 struct cpuid_regs regs; 4080 4081 ASSERT(cpuid_checkpass(CPU, 1)); 4082 4083 cpi = CPU->cpu_m.mcpu_cpi; 4084 4085 if (!(x86_feature & X86_CPUID)) 4086 return (0); 4087 4088 switch (cpi->cpi_vendor) { 4089 case X86_VENDOR_Intel: 4090 if (cpi->cpi_xmaxeax < 0x80000007) 4091 return (0); 4092 4093 /* 4094 * TSC run at a constant rate in all ACPI C-states? 4095 */ 4096 regs.cp_eax = 0x80000007; 4097 (void) __cpuid_insn(®s); 4098 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 4099 4100 default: 4101 return (0); 4102 } 4103 } 4104 4105 #endif /* !__xpv */ 4106 4107 void 4108 post_startup_cpu_fixups(void) 4109 { 4110 #ifndef __xpv 4111 /* 4112 * Some AMD processors support C1E state. Entering this state will 4113 * cause the local APIC timer to stop, which we can't deal with at 4114 * this time. 4115 */ 4116 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 4117 on_trap_data_t otd; 4118 uint64_t reg; 4119 4120 if (!on_trap(&otd, OT_DATA_ACCESS)) { 4121 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 4122 /* Disable C1E state if it is enabled by BIOS */ 4123 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 4124 AMD_ACTONCMPHALT_MASK) { 4125 reg &= ~(AMD_ACTONCMPHALT_MASK << 4126 AMD_ACTONCMPHALT_SHIFT); 4127 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 4128 } 4129 } 4130 no_trap(); 4131 } 4132 #endif /* !__xpv */ 4133 } 4134 4135 /* 4136 * Starting with the Westmere processor the local 4137 * APIC timer will continue running in all C-states, 4138 * including the deepest C-states. 4139 */ 4140 int 4141 cpuid_arat_supported(void) 4142 { 4143 struct cpuid_info *cpi; 4144 struct cpuid_regs regs; 4145 4146 ASSERT(cpuid_checkpass(CPU, 1)); 4147 ASSERT(x86_feature & X86_CPUID); 4148 4149 cpi = CPU->cpu_m.mcpu_cpi; 4150 4151 switch (cpi->cpi_vendor) { 4152 case X86_VENDOR_Intel: 4153 /* 4154 * Always-running Local APIC Timer is 4155 * indicated by CPUID.6.EAX[2]. 4156 */ 4157 if (cpi->cpi_maxeax >= 6) { 4158 regs.cp_eax = 6; 4159 (void) cpuid_insn(NULL, ®s); 4160 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4161 } else { 4162 return (0); 4163 } 4164 default: 4165 return (0); 4166 } 4167 } 4168 4169 /* 4170 * Check support for Intel ENERGY_PERF_BIAS feature 4171 */ 4172 int 4173 cpuid_iepb_supported(struct cpu *cp) 4174 { 4175 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi; 4176 struct cpuid_regs regs; 4177 4178 ASSERT(cpuid_checkpass(cp, 1)); 4179 4180 if (!(x86_feature & X86_CPUID) || !(x86_feature & X86_MSR)) { 4181 return (0); 4182 } 4183 4184 /* 4185 * Intel ENERGY_PERF_BIAS MSR is indicated by 4186 * capability bit CPUID.6.ECX.3 4187 */ 4188 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6)) 4189 return (0); 4190 4191 regs.cp_eax = 0x6; 4192 (void) cpuid_insn(NULL, ®s); 4193 return (regs.cp_ecx & CPUID_EPB_SUPPORT); 4194 } 4195 4196 #if defined(__amd64) && !defined(__xpv) 4197 /* 4198 * Patch in versions of bcopy for high performance Intel Nhm processors 4199 * and later... 4200 */ 4201 void 4202 patch_memops(uint_t vendor) 4203 { 4204 size_t cnt, i; 4205 caddr_t to, from; 4206 4207 if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) { 4208 cnt = &bcopy_patch_end - &bcopy_patch_start; 4209 to = &bcopy_ck_size; 4210 from = &bcopy_patch_start; 4211 for (i = 0; i < cnt; i++) { 4212 *to++ = *from++; 4213 } 4214 } 4215 } 4216 #endif /* __amd64 && !__xpv */ 4217 4218 /* 4219 * This function finds the number of bits to represent the number of cores per 4220 * chip and the number of strands per core for the Intel platforms. 4221 * It re-uses the x2APIC cpuid code of the cpuid_pass2(). 4222 */ 4223 void 4224 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits) 4225 { 4226 struct cpuid_regs regs; 4227 struct cpuid_regs *cp = ®s; 4228 4229 if (vendor != X86_VENDOR_Intel) { 4230 return; 4231 } 4232 4233 /* if the cpuid level is 0xB, extended topo is available. */ 4234 cp->cp_eax = 0; 4235 if (__cpuid_insn(cp) >= 0xB) { 4236 4237 cp->cp_eax = 0xB; 4238 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 4239 (void) __cpuid_insn(cp); 4240 4241 /* 4242 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 4243 * indicates that the extended topology enumeration leaf is 4244 * available. 4245 */ 4246 if (cp->cp_ebx) { 4247 uint_t coreid_shift = 0; 4248 uint_t chipid_shift = 0; 4249 uint_t i; 4250 uint_t level; 4251 4252 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 4253 cp->cp_eax = 0xB; 4254 cp->cp_ecx = i; 4255 4256 (void) __cpuid_insn(cp); 4257 level = CPI_CPU_LEVEL_TYPE(cp); 4258 4259 if (level == 1) { 4260 /* 4261 * Thread level processor topology 4262 * Number of bits shift right APIC ID 4263 * to get the coreid. 4264 */ 4265 coreid_shift = BITX(cp->cp_eax, 4, 0); 4266 } else if (level == 2) { 4267 /* 4268 * Core level processor topology 4269 * Number of bits shift right APIC ID 4270 * to get the chipid. 4271 */ 4272 chipid_shift = BITX(cp->cp_eax, 4, 0); 4273 } 4274 } 4275 4276 if (coreid_shift > 0 && chipid_shift > coreid_shift) { 4277 *strand_nbits = coreid_shift; 4278 *core_nbits = chipid_shift - coreid_shift; 4279 } 4280 } 4281 } 4282 } 4283