1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 /* 30 * Portions Copyright 2009 Advanced Micro Devices, Inc. 31 */ 32 33 /* 34 * Various routines to handle identification 35 * and classification of x86 processors. 36 */ 37 38 #include <sys/types.h> 39 #include <sys/archsystm.h> 40 #include <sys/x86_archext.h> 41 #include <sys/kmem.h> 42 #include <sys/systm.h> 43 #include <sys/cmn_err.h> 44 #include <sys/sunddi.h> 45 #include <sys/sunndi.h> 46 #include <sys/cpuvar.h> 47 #include <sys/processor.h> 48 #include <sys/sysmacros.h> 49 #include <sys/pg.h> 50 #include <sys/fp.h> 51 #include <sys/controlregs.h> 52 #include <sys/auxv_386.h> 53 #include <sys/bitmap.h> 54 #include <sys/memnode.h> 55 #include <sys/pci_cfgspace.h> 56 57 #ifdef __xpv 58 #include <sys/hypervisor.h> 59 #else 60 #include <sys/ontrap.h> 61 #endif 62 63 /* 64 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 65 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 66 * them accordingly. For most modern processors, feature detection occurs here 67 * in pass 1. 68 * 69 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 70 * for the boot CPU and does the basic analysis that the early kernel needs. 71 * x86_feature is set based on the return value of cpuid_pass1() of the boot 72 * CPU. 73 * 74 * Pass 1 includes: 75 * 76 * o Determining vendor/model/family/stepping and setting x86_type and 77 * x86_vendor accordingly. 78 * o Processing the feature flags returned by the cpuid instruction while 79 * applying any workarounds or tricks for the specific processor. 80 * o Mapping the feature flags into Solaris feature bits (X86_*). 81 * o Processing extended feature flags if supported by the processor, 82 * again while applying specific processor knowledge. 83 * o Determining the CMT characteristics of the system. 84 * 85 * Pass 1 is done on non-boot CPUs during their initialization and the results 86 * are used only as a meager attempt at ensuring that all processors within the 87 * system support the same features. 88 * 89 * Pass 2 of cpuid feature analysis happens just at the beginning 90 * of startup(). It just copies in and corrects the remainder 91 * of the cpuid data we depend on: standard cpuid functions that we didn't 92 * need for pass1 feature analysis, and extended cpuid functions beyond the 93 * simple feature processing done in pass1. 94 * 95 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 96 * particular kernel memory allocation has been made available. It creates a 97 * readable brand string based on the data collected in the first two passes. 98 * 99 * Pass 4 of cpuid analysis is invoked after post_startup() when all 100 * the support infrastructure for various hardware features has been 101 * initialized. It determines which processor features will be reported 102 * to userland via the aux vector. 103 * 104 * All passes are executed on all CPUs, but only the boot CPU determines what 105 * features the kernel will use. 106 * 107 * Much of the worst junk in this file is for the support of processors 108 * that didn't really implement the cpuid instruction properly. 109 * 110 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 111 * the pass numbers. Accordingly, changes to the pass code may require changes 112 * to the accessor code. 113 */ 114 115 uint_t x86_feature = 0; 116 uint_t x86_vendor = X86_VENDOR_IntelClone; 117 uint_t x86_type = X86_TYPE_OTHER; 118 uint_t x86_clflush_size = 0; 119 120 uint_t pentiumpro_bug4046376; 121 uint_t pentiumpro_bug4064495; 122 123 uint_t enable486; 124 /* 125 * This is set to platform type Solaris is running on. 126 */ 127 static int platform_type = -1; 128 129 #if !defined(__xpv) 130 /* 131 * Variable to patch if hypervisor platform detection needs to be 132 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 133 */ 134 int enable_platform_detection = 1; 135 #endif 136 137 /* 138 * monitor/mwait info. 139 * 140 * size_actual and buf_actual are the real address and size allocated to get 141 * proper mwait_buf alignement. buf_actual and size_actual should be passed 142 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 143 * processor cache-line alignment, but this is not guarantied in the furture. 144 */ 145 struct mwait_info { 146 size_t mon_min; /* min size to avoid missed wakeups */ 147 size_t mon_max; /* size to avoid false wakeups */ 148 size_t size_actual; /* size actually allocated */ 149 void *buf_actual; /* memory actually allocated */ 150 uint32_t support; /* processor support of monitor/mwait */ 151 }; 152 153 /* 154 * These constants determine how many of the elements of the 155 * cpuid we cache in the cpuid_info data structure; the 156 * remaining elements are accessible via the cpuid instruction. 157 */ 158 159 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 160 #define NMAX_CPI_EXTD 0x1c /* eax = 0x80000000 .. 0x8000001b */ 161 162 /* 163 * Some terminology needs to be explained: 164 * - Socket: Something that can be plugged into a motherboard. 165 * - Package: Same as socket 166 * - Chip: Same as socket. Note that AMD's documentation uses term "chip" 167 * differently: there, chip is the same as processor node (below) 168 * - Processor node: Some AMD processors have more than one 169 * "subprocessor" embedded in a package. These subprocessors (nodes) 170 * are fully-functional processors themselves with cores, caches, 171 * memory controllers, PCI configuration spaces. They are connected 172 * inside the package with Hypertransport links. On single-node 173 * processors, processor node is equivalent to chip/socket/package. 174 */ 175 176 struct cpuid_info { 177 uint_t cpi_pass; /* last pass completed */ 178 /* 179 * standard function information 180 */ 181 uint_t cpi_maxeax; /* fn 0: %eax */ 182 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 183 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 184 185 uint_t cpi_family; /* fn 1: extended family */ 186 uint_t cpi_model; /* fn 1: extended model */ 187 uint_t cpi_step; /* fn 1: stepping */ 188 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */ 189 /* AMD: package/socket # */ 190 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 191 int cpi_clogid; /* fn 1: %ebx: thread # */ 192 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 193 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 194 uint_t cpi_ncache; /* fn 2: number of elements */ 195 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 196 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 197 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 198 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 199 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 200 /* 201 * extended function information 202 */ 203 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 204 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 205 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 206 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 207 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */ 208 209 id_t cpi_coreid; /* same coreid => strands share core */ 210 int cpi_pkgcoreid; /* core number within single package */ 211 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 212 /* Intel: fn 4: %eax[31-26] */ 213 /* 214 * supported feature information 215 */ 216 uint32_t cpi_support[5]; 217 #define STD_EDX_FEATURES 0 218 #define AMD_EDX_FEATURES 1 219 #define TM_EDX_FEATURES 2 220 #define STD_ECX_FEATURES 3 221 #define AMD_ECX_FEATURES 4 222 /* 223 * Synthesized information, where known. 224 */ 225 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 226 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 227 uint32_t cpi_socket; /* Chip package/socket type */ 228 229 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 230 uint32_t cpi_apicid; 231 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ 232 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ 233 /* Intel: 1 */ 234 }; 235 236 237 static struct cpuid_info cpuid_info0; 238 239 /* 240 * These bit fields are defined by the Intel Application Note AP-485 241 * "Intel Processor Identification and the CPUID Instruction" 242 */ 243 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 244 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 245 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 246 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 247 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 248 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 249 250 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 251 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 252 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 253 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 254 255 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 256 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 257 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 258 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 259 260 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 261 #define CPI_XMAXEAX_MAX 0x80000100 262 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 263 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 264 265 /* 266 * Function 4 (Deterministic Cache Parameters) macros 267 * Defined by Intel Application Note AP-485 268 */ 269 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 270 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 271 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 272 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 273 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 274 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 275 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 276 277 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 278 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 279 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 280 281 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 282 283 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 284 285 286 /* 287 * A couple of shorthand macros to identify "later" P6-family chips 288 * like the Pentium M and Core. First, the "older" P6-based stuff 289 * (loosely defined as "pre-Pentium-4"): 290 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 291 */ 292 293 #define IS_LEGACY_P6(cpi) ( \ 294 cpi->cpi_family == 6 && \ 295 (cpi->cpi_model == 1 || \ 296 cpi->cpi_model == 3 || \ 297 cpi->cpi_model == 5 || \ 298 cpi->cpi_model == 6 || \ 299 cpi->cpi_model == 7 || \ 300 cpi->cpi_model == 8 || \ 301 cpi->cpi_model == 0xA || \ 302 cpi->cpi_model == 0xB) \ 303 ) 304 305 /* A "new F6" is everything with family 6 that's not the above */ 306 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 307 308 /* Extended family/model support */ 309 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 310 cpi->cpi_family >= 0xf) 311 312 /* 313 * Info for monitor/mwait idle loop. 314 * 315 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 316 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 317 * 2006. 318 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 319 * Documentation Updates" #33633, Rev 2.05, December 2006. 320 */ 321 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 322 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 323 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 324 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 325 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 326 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 327 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 328 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 329 /* 330 * Number of sub-cstates for a given c-state. 331 */ 332 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 333 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 334 335 /* 336 * Functions we consune from cpuid_subr.c; don't publish these in a header 337 * file to try and keep people using the expected cpuid_* interfaces. 338 */ 339 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 340 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 341 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 342 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 343 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 344 345 /* 346 * Apply up various platform-dependent restrictions where the 347 * underlying platform restrictions mean the CPU can be marked 348 * as less capable than its cpuid instruction would imply. 349 */ 350 #if defined(__xpv) 351 static void 352 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 353 { 354 switch (eax) { 355 case 1: { 356 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 357 0 : CPUID_INTC_EDX_MCA; 358 cp->cp_edx &= 359 ~(mcamask | 360 CPUID_INTC_EDX_PSE | 361 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 362 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 363 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 364 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 365 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 366 break; 367 } 368 369 case 0x80000001: 370 cp->cp_edx &= 371 ~(CPUID_AMD_EDX_PSE | 372 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 373 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 374 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 375 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 376 CPUID_AMD_EDX_TSCP); 377 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 378 break; 379 default: 380 break; 381 } 382 383 switch (vendor) { 384 case X86_VENDOR_Intel: 385 switch (eax) { 386 case 4: 387 /* 388 * Zero out the (ncores-per-chip - 1) field 389 */ 390 cp->cp_eax &= 0x03fffffff; 391 break; 392 default: 393 break; 394 } 395 break; 396 case X86_VENDOR_AMD: 397 switch (eax) { 398 399 case 0x80000001: 400 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 401 break; 402 403 case 0x80000008: 404 /* 405 * Zero out the (ncores-per-chip - 1) field 406 */ 407 cp->cp_ecx &= 0xffffff00; 408 break; 409 default: 410 break; 411 } 412 break; 413 default: 414 break; 415 } 416 } 417 #else 418 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 419 #endif 420 421 /* 422 * Some undocumented ways of patching the results of the cpuid 423 * instruction to permit running Solaris 10 on future cpus that 424 * we don't currently support. Could be set to non-zero values 425 * via settings in eeprom. 426 */ 427 428 uint32_t cpuid_feature_ecx_include; 429 uint32_t cpuid_feature_ecx_exclude; 430 uint32_t cpuid_feature_edx_include; 431 uint32_t cpuid_feature_edx_exclude; 432 433 void 434 cpuid_alloc_space(cpu_t *cpu) 435 { 436 /* 437 * By convention, cpu0 is the boot cpu, which is set up 438 * before memory allocation is available. All other cpus get 439 * their cpuid_info struct allocated here. 440 */ 441 ASSERT(cpu->cpu_id != 0); 442 cpu->cpu_m.mcpu_cpi = 443 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 444 } 445 446 void 447 cpuid_free_space(cpu_t *cpu) 448 { 449 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 450 int i; 451 452 ASSERT(cpu->cpu_id != 0); 453 454 /* 455 * Free up any function 4 related dynamic storage 456 */ 457 for (i = 1; i < cpi->cpi_std_4_size; i++) 458 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 459 if (cpi->cpi_std_4_size > 0) 460 kmem_free(cpi->cpi_std_4, 461 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 462 463 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 464 } 465 466 #if !defined(__xpv) 467 468 static void 469 determine_platform() 470 { 471 struct cpuid_regs cp; 472 char *xen_str; 473 uint32_t xen_signature[4]; 474 475 platform_type = HW_NATIVE; 476 477 if (!enable_platform_detection) 478 return; 479 480 /* 481 * In a fully virtualized domain, Xen's pseudo-cpuid function 482 * 0x40000000 returns a string representing the Xen signature in 483 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 484 * function. 485 */ 486 cp.cp_eax = 0x40000000; 487 (void) __cpuid_insn(&cp); 488 xen_signature[0] = cp.cp_ebx; 489 xen_signature[1] = cp.cp_ecx; 490 xen_signature[2] = cp.cp_edx; 491 xen_signature[3] = 0; 492 xen_str = (char *)xen_signature; 493 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) { 494 platform_type = HW_XEN_HVM; 495 } else if (vmware_platform()) { /* running under vmware hypervisor? */ 496 platform_type = HW_VMWARE; 497 } 498 } 499 500 int 501 get_hwenv(void) 502 { 503 if (platform_type == -1) 504 determine_platform(); 505 506 return (platform_type); 507 } 508 509 int 510 is_controldom(void) 511 { 512 return (0); 513 } 514 515 #else 516 517 int 518 get_hwenv(void) 519 { 520 return (HW_XEN_PV); 521 } 522 523 int 524 is_controldom(void) 525 { 526 return (DOMAIN_IS_INITDOMAIN(xen_info)); 527 } 528 529 #endif /* __xpv */ 530 531 static void 532 cpuid_intel_getids(cpu_t *cpu, uint_t feature) 533 { 534 uint_t i; 535 uint_t chipid_shift = 0; 536 uint_t coreid_shift = 0; 537 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 538 539 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 540 chipid_shift++; 541 542 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift; 543 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1); 544 545 if (feature & X86_CMP) { 546 /* 547 * Multi-core (and possibly multi-threaded) 548 * processors. 549 */ 550 uint_t ncpu_per_core; 551 if (cpi->cpi_ncore_per_chip == 1) 552 ncpu_per_core = cpi->cpi_ncpu_per_chip; 553 else if (cpi->cpi_ncore_per_chip > 1) 554 ncpu_per_core = cpi->cpi_ncpu_per_chip / 555 cpi->cpi_ncore_per_chip; 556 /* 557 * 8bit APIC IDs on dual core Pentiums 558 * look like this: 559 * 560 * +-----------------------+------+------+ 561 * | Physical Package ID | MC | HT | 562 * +-----------------------+------+------+ 563 * <------- chipid --------> 564 * <------- coreid ---------------> 565 * <--- clogid --> 566 * <------> 567 * pkgcoreid 568 * 569 * Where the number of bits necessary to 570 * represent MC and HT fields together equals 571 * to the minimum number of bits necessary to 572 * store the value of cpi->cpi_ncpu_per_chip. 573 * Of those bits, the MC part uses the number 574 * of bits necessary to store the value of 575 * cpi->cpi_ncore_per_chip. 576 */ 577 for (i = 1; i < ncpu_per_core; i <<= 1) 578 coreid_shift++; 579 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift; 580 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 581 } else if (feature & X86_HTT) { 582 /* 583 * Single-core multi-threaded processors. 584 */ 585 cpi->cpi_coreid = cpi->cpi_chipid; 586 cpi->cpi_pkgcoreid = 0; 587 } 588 cpi->cpi_procnodeid = cpi->cpi_chipid; 589 } 590 591 static void 592 cpuid_amd_getids(cpu_t *cpu) 593 { 594 int i, first_half, coreidsz; 595 uint32_t nb_caps_reg; 596 uint_t node2_1; 597 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 598 599 /* 600 * AMD CMP chips currently have a single thread per core. 601 * 602 * Since no two cpus share a core we must assign a distinct coreid 603 * per cpu, and we do this by using the cpu_id. This scheme does not, 604 * however, guarantee that sibling cores of a chip will have sequential 605 * coreids starting at a multiple of the number of cores per chip - 606 * that is usually the case, but if the ACPI MADT table is presented 607 * in a different order then we need to perform a few more gymnastics 608 * for the pkgcoreid. 609 * 610 * All processors in the system have the same number of enabled 611 * cores. Cores within a processor are always numbered sequentially 612 * from 0 regardless of how many or which are disabled, and there 613 * is no way for operating system to discover the real core id when some 614 * are disabled. 615 */ 616 617 cpi->cpi_coreid = cpu->cpu_id; 618 619 if (cpi->cpi_xmaxeax >= 0x80000008) { 620 621 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 622 623 /* 624 * In AMD parlance chip is really a node while Solaris 625 * sees chip as equivalent to socket/package. 626 */ 627 cpi->cpi_ncore_per_chip = 628 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 629 if (coreidsz == 0) { 630 /* Use legacy method */ 631 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1) 632 coreidsz++; 633 if (coreidsz == 0) 634 coreidsz = 1; 635 } 636 } else { 637 /* Assume single-core part */ 638 cpi->cpi_ncore_per_chip = 1; 639 } 640 641 cpi->cpi_clogid = cpi->cpi_pkgcoreid = 642 cpi->cpi_apicid & ((1<<coreidsz) - 1); 643 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip; 644 645 /* Get nodeID */ 646 if (cpi->cpi_family == 0xf) { 647 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 648 cpi->cpi_chipid = cpi->cpi_procnodeid; 649 } else if (cpi->cpi_family == 0x10) { 650 /* 651 * See if we are a multi-node processor. 652 * All processors in the system have the same number of nodes 653 */ 654 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8); 655 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) { 656 /* Single-node */ 657 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, 658 coreidsz); 659 cpi->cpi_chipid = cpi->cpi_procnodeid; 660 } else { 661 662 /* 663 * Multi-node revision D (2 nodes per package 664 * are supported) 665 */ 666 cpi->cpi_procnodes_per_pkg = 2; 667 668 first_half = (cpi->cpi_pkgcoreid <= 669 (cpi->cpi_ncore_per_chip/2 - 1)); 670 671 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { 672 /* We are BSP */ 673 cpi->cpi_procnodeid = (first_half ? 0 : 1); 674 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; 675 } else { 676 677 /* We are AP */ 678 /* NodeId[2:1] bits to use for reading F3xe8 */ 679 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1; 680 681 nb_caps_reg = 682 pci_getl_func(0, 24 + node2_1, 3, 0xe8); 683 684 /* 685 * Check IntNodeNum bit (31:30, but bit 31 is 686 * always 0 on dual-node processors) 687 */ 688 if (BITX(nb_caps_reg, 30, 30) == 0) 689 cpi->cpi_procnodeid = node2_1 + 690 !first_half; 691 else 692 cpi->cpi_procnodeid = node2_1 + 693 first_half; 694 695 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; 696 } 697 } 698 } else if (cpi->cpi_family >= 0x11) { 699 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 700 cpi->cpi_chipid = cpi->cpi_procnodeid; 701 } else { 702 cpi->cpi_procnodeid = 0; 703 cpi->cpi_chipid = cpi->cpi_procnodeid; 704 } 705 } 706 707 uint_t 708 cpuid_pass1(cpu_t *cpu) 709 { 710 uint32_t mask_ecx, mask_edx; 711 uint_t feature = X86_CPUID; 712 struct cpuid_info *cpi; 713 struct cpuid_regs *cp; 714 int xcpuid; 715 #if !defined(__xpv) 716 extern int idle_cpu_prefer_mwait; 717 #endif 718 719 720 #if !defined(__xpv) 721 determine_platform(); 722 #endif 723 /* 724 * Space statically allocated for cpu0, ensure pointer is set 725 */ 726 if (cpu->cpu_id == 0) 727 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 728 cpi = cpu->cpu_m.mcpu_cpi; 729 ASSERT(cpi != NULL); 730 cp = &cpi->cpi_std[0]; 731 cp->cp_eax = 0; 732 cpi->cpi_maxeax = __cpuid_insn(cp); 733 { 734 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 735 *iptr++ = cp->cp_ebx; 736 *iptr++ = cp->cp_edx; 737 *iptr++ = cp->cp_ecx; 738 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 739 } 740 741 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 742 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 743 744 /* 745 * Limit the range in case of weird hardware 746 */ 747 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 748 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 749 if (cpi->cpi_maxeax < 1) 750 goto pass1_done; 751 752 cp = &cpi->cpi_std[1]; 753 cp->cp_eax = 1; 754 (void) __cpuid_insn(cp); 755 756 /* 757 * Extract identifying constants for easy access. 758 */ 759 cpi->cpi_model = CPI_MODEL(cpi); 760 cpi->cpi_family = CPI_FAMILY(cpi); 761 762 if (cpi->cpi_family == 0xf) 763 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 764 765 /* 766 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 767 * Intel, and presumably everyone else, uses model == 0xf, as 768 * one would expect (max value means possible overflow). Sigh. 769 */ 770 771 switch (cpi->cpi_vendor) { 772 case X86_VENDOR_Intel: 773 if (IS_EXTENDED_MODEL_INTEL(cpi)) 774 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 775 break; 776 case X86_VENDOR_AMD: 777 if (CPI_FAMILY(cpi) == 0xf) 778 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 779 break; 780 default: 781 if (cpi->cpi_model == 0xf) 782 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 783 break; 784 } 785 786 cpi->cpi_step = CPI_STEP(cpi); 787 cpi->cpi_brandid = CPI_BRANDID(cpi); 788 789 /* 790 * *default* assumptions: 791 * - believe %edx feature word 792 * - ignore %ecx feature word 793 * - 32-bit virtual and physical addressing 794 */ 795 mask_edx = 0xffffffff; 796 mask_ecx = 0; 797 798 cpi->cpi_pabits = cpi->cpi_vabits = 32; 799 800 switch (cpi->cpi_vendor) { 801 case X86_VENDOR_Intel: 802 if (cpi->cpi_family == 5) 803 x86_type = X86_TYPE_P5; 804 else if (IS_LEGACY_P6(cpi)) { 805 x86_type = X86_TYPE_P6; 806 pentiumpro_bug4046376 = 1; 807 pentiumpro_bug4064495 = 1; 808 /* 809 * Clear the SEP bit when it was set erroneously 810 */ 811 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 812 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 813 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 814 x86_type = X86_TYPE_P4; 815 /* 816 * We don't currently depend on any of the %ecx 817 * features until Prescott, so we'll only check 818 * this from P4 onwards. We might want to revisit 819 * that idea later. 820 */ 821 mask_ecx = 0xffffffff; 822 } else if (cpi->cpi_family > 0xf) 823 mask_ecx = 0xffffffff; 824 /* 825 * We don't support MONITOR/MWAIT if leaf 5 is not available 826 * to obtain the monitor linesize. 827 */ 828 if (cpi->cpi_maxeax < 5) 829 mask_ecx &= ~CPUID_INTC_ECX_MON; 830 break; 831 case X86_VENDOR_IntelClone: 832 default: 833 break; 834 case X86_VENDOR_AMD: 835 #if defined(OPTERON_ERRATUM_108) 836 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 837 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 838 cpi->cpi_model = 0xc; 839 } else 840 #endif 841 if (cpi->cpi_family == 5) { 842 /* 843 * AMD K5 and K6 844 * 845 * These CPUs have an incomplete implementation 846 * of MCA/MCE which we mask away. 847 */ 848 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 849 850 /* 851 * Model 0 uses the wrong (APIC) bit 852 * to indicate PGE. Fix it here. 853 */ 854 if (cpi->cpi_model == 0) { 855 if (cp->cp_edx & 0x200) { 856 cp->cp_edx &= ~0x200; 857 cp->cp_edx |= CPUID_INTC_EDX_PGE; 858 } 859 } 860 861 /* 862 * Early models had problems w/ MMX; disable. 863 */ 864 if (cpi->cpi_model < 6) 865 mask_edx &= ~CPUID_INTC_EDX_MMX; 866 } 867 868 /* 869 * For newer families, SSE3 and CX16, at least, are valid; 870 * enable all 871 */ 872 if (cpi->cpi_family >= 0xf) 873 mask_ecx = 0xffffffff; 874 /* 875 * We don't support MONITOR/MWAIT if leaf 5 is not available 876 * to obtain the monitor linesize. 877 */ 878 if (cpi->cpi_maxeax < 5) 879 mask_ecx &= ~CPUID_INTC_ECX_MON; 880 881 #if !defined(__xpv) 882 /* 883 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 884 * processors. AMD does not intend MWAIT to be used in the cpu 885 * idle loop on current and future processors. 10h and future 886 * AMD processors use more power in MWAIT than HLT. 887 * Pre-family-10h Opterons do not have the MWAIT instruction. 888 */ 889 idle_cpu_prefer_mwait = 0; 890 #endif 891 892 break; 893 case X86_VENDOR_TM: 894 /* 895 * workaround the NT workaround in CMS 4.1 896 */ 897 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 898 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 899 cp->cp_edx |= CPUID_INTC_EDX_CX8; 900 break; 901 case X86_VENDOR_Centaur: 902 /* 903 * workaround the NT workarounds again 904 */ 905 if (cpi->cpi_family == 6) 906 cp->cp_edx |= CPUID_INTC_EDX_CX8; 907 break; 908 case X86_VENDOR_Cyrix: 909 /* 910 * We rely heavily on the probing in locore 911 * to actually figure out what parts, if any, 912 * of the Cyrix cpuid instruction to believe. 913 */ 914 switch (x86_type) { 915 case X86_TYPE_CYRIX_486: 916 mask_edx = 0; 917 break; 918 case X86_TYPE_CYRIX_6x86: 919 mask_edx = 0; 920 break; 921 case X86_TYPE_CYRIX_6x86L: 922 mask_edx = 923 CPUID_INTC_EDX_DE | 924 CPUID_INTC_EDX_CX8; 925 break; 926 case X86_TYPE_CYRIX_6x86MX: 927 mask_edx = 928 CPUID_INTC_EDX_DE | 929 CPUID_INTC_EDX_MSR | 930 CPUID_INTC_EDX_CX8 | 931 CPUID_INTC_EDX_PGE | 932 CPUID_INTC_EDX_CMOV | 933 CPUID_INTC_EDX_MMX; 934 break; 935 case X86_TYPE_CYRIX_GXm: 936 mask_edx = 937 CPUID_INTC_EDX_MSR | 938 CPUID_INTC_EDX_CX8 | 939 CPUID_INTC_EDX_CMOV | 940 CPUID_INTC_EDX_MMX; 941 break; 942 case X86_TYPE_CYRIX_MediaGX: 943 break; 944 case X86_TYPE_CYRIX_MII: 945 case X86_TYPE_VIA_CYRIX_III: 946 mask_edx = 947 CPUID_INTC_EDX_DE | 948 CPUID_INTC_EDX_TSC | 949 CPUID_INTC_EDX_MSR | 950 CPUID_INTC_EDX_CX8 | 951 CPUID_INTC_EDX_PGE | 952 CPUID_INTC_EDX_CMOV | 953 CPUID_INTC_EDX_MMX; 954 break; 955 default: 956 break; 957 } 958 break; 959 } 960 961 #if defined(__xpv) 962 /* 963 * Do not support MONITOR/MWAIT under a hypervisor 964 */ 965 mask_ecx &= ~CPUID_INTC_ECX_MON; 966 #endif /* __xpv */ 967 968 /* 969 * Now we've figured out the masks that determine 970 * which bits we choose to believe, apply the masks 971 * to the feature words, then map the kernel's view 972 * of these feature words into its feature word. 973 */ 974 cp->cp_edx &= mask_edx; 975 cp->cp_ecx &= mask_ecx; 976 977 /* 978 * apply any platform restrictions (we don't call this 979 * immediately after __cpuid_insn here, because we need the 980 * workarounds applied above first) 981 */ 982 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 983 984 /* 985 * fold in overrides from the "eeprom" mechanism 986 */ 987 cp->cp_edx |= cpuid_feature_edx_include; 988 cp->cp_edx &= ~cpuid_feature_edx_exclude; 989 990 cp->cp_ecx |= cpuid_feature_ecx_include; 991 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 992 993 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 994 feature |= X86_LARGEPAGE; 995 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 996 feature |= X86_TSC; 997 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 998 feature |= X86_MSR; 999 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 1000 feature |= X86_MTRR; 1001 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 1002 feature |= X86_PGE; 1003 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 1004 feature |= X86_CMOV; 1005 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 1006 feature |= X86_MMX; 1007 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 1008 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 1009 feature |= X86_MCA; 1010 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 1011 feature |= X86_PAE; 1012 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 1013 feature |= X86_CX8; 1014 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 1015 feature |= X86_CX16; 1016 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 1017 feature |= X86_PAT; 1018 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 1019 feature |= X86_SEP; 1020 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 1021 /* 1022 * In our implementation, fxsave/fxrstor 1023 * are prerequisites before we'll even 1024 * try and do SSE things. 1025 */ 1026 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 1027 feature |= X86_SSE; 1028 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 1029 feature |= X86_SSE2; 1030 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 1031 feature |= X86_SSE3; 1032 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1033 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 1034 feature |= X86_SSSE3; 1035 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 1036 feature |= X86_SSE4_1; 1037 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 1038 feature |= X86_SSE4_2; 1039 if (cp->cp_ecx & CPUID_INTC_ECX_AES) 1040 feature |= X86_AES; 1041 } 1042 } 1043 if (cp->cp_edx & CPUID_INTC_EDX_DE) 1044 feature |= X86_DE; 1045 #if !defined(__xpv) 1046 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 1047 1048 /* 1049 * We require the CLFLUSH instruction for erratum workaround 1050 * to use MONITOR/MWAIT. 1051 */ 1052 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1053 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 1054 feature |= X86_MWAIT; 1055 } else { 1056 extern int idle_cpu_assert_cflush_monitor; 1057 1058 /* 1059 * All processors we are aware of which have 1060 * MONITOR/MWAIT also have CLFLUSH. 1061 */ 1062 if (idle_cpu_assert_cflush_monitor) { 1063 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 1064 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 1065 } 1066 } 1067 } 1068 #endif /* __xpv */ 1069 1070 /* 1071 * Only need it first time, rest of the cpus would follow suite. 1072 * we only capture this for the bootcpu. 1073 */ 1074 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1075 feature |= X86_CLFSH; 1076 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 1077 } 1078 1079 if (feature & X86_PAE) 1080 cpi->cpi_pabits = 36; 1081 1082 /* 1083 * Hyperthreading configuration is slightly tricky on Intel 1084 * and pure clones, and even trickier on AMD. 1085 * 1086 * (AMD chose to set the HTT bit on their CMP processors, 1087 * even though they're not actually hyperthreaded. Thus it 1088 * takes a bit more work to figure out what's really going 1089 * on ... see the handling of the CMP_LGCY bit below) 1090 */ 1091 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 1092 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 1093 if (cpi->cpi_ncpu_per_chip > 1) 1094 feature |= X86_HTT; 1095 } else { 1096 cpi->cpi_ncpu_per_chip = 1; 1097 } 1098 1099 /* 1100 * Work on the "extended" feature information, doing 1101 * some basic initialization for cpuid_pass2() 1102 */ 1103 xcpuid = 0; 1104 switch (cpi->cpi_vendor) { 1105 case X86_VENDOR_Intel: 1106 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 1107 xcpuid++; 1108 break; 1109 case X86_VENDOR_AMD: 1110 if (cpi->cpi_family > 5 || 1111 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 1112 xcpuid++; 1113 break; 1114 case X86_VENDOR_Cyrix: 1115 /* 1116 * Only these Cyrix CPUs are -known- to support 1117 * extended cpuid operations. 1118 */ 1119 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1120 x86_type == X86_TYPE_CYRIX_GXm) 1121 xcpuid++; 1122 break; 1123 case X86_VENDOR_Centaur: 1124 case X86_VENDOR_TM: 1125 default: 1126 xcpuid++; 1127 break; 1128 } 1129 1130 if (xcpuid) { 1131 cp = &cpi->cpi_extd[0]; 1132 cp->cp_eax = 0x80000000; 1133 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1134 } 1135 1136 if (cpi->cpi_xmaxeax & 0x80000000) { 1137 1138 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1139 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1140 1141 switch (cpi->cpi_vendor) { 1142 case X86_VENDOR_Intel: 1143 case X86_VENDOR_AMD: 1144 if (cpi->cpi_xmaxeax < 0x80000001) 1145 break; 1146 cp = &cpi->cpi_extd[1]; 1147 cp->cp_eax = 0x80000001; 1148 (void) __cpuid_insn(cp); 1149 1150 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1151 cpi->cpi_family == 5 && 1152 cpi->cpi_model == 6 && 1153 cpi->cpi_step == 6) { 1154 /* 1155 * K6 model 6 uses bit 10 to indicate SYSC 1156 * Later models use bit 11. Fix it here. 1157 */ 1158 if (cp->cp_edx & 0x400) { 1159 cp->cp_edx &= ~0x400; 1160 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1161 } 1162 } 1163 1164 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1165 1166 /* 1167 * Compute the additions to the kernel's feature word. 1168 */ 1169 if (cp->cp_edx & CPUID_AMD_EDX_NX) 1170 feature |= X86_NX; 1171 1172 /* 1173 * Regardless whether or not we boot 64-bit, 1174 * we should have a way to identify whether 1175 * the CPU is capable of running 64-bit. 1176 */ 1177 if (cp->cp_edx & CPUID_AMD_EDX_LM) 1178 feature |= X86_64; 1179 1180 #if defined(__amd64) 1181 /* 1 GB large page - enable only for 64 bit kernel */ 1182 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 1183 feature |= X86_1GPG; 1184 #endif 1185 1186 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1187 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1188 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 1189 feature |= X86_SSE4A; 1190 1191 /* 1192 * If both the HTT and CMP_LGCY bits are set, 1193 * then we're not actually HyperThreaded. Read 1194 * "AMD CPUID Specification" for more details. 1195 */ 1196 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1197 (feature & X86_HTT) && 1198 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1199 feature &= ~X86_HTT; 1200 feature |= X86_CMP; 1201 } 1202 #if defined(__amd64) 1203 /* 1204 * It's really tricky to support syscall/sysret in 1205 * the i386 kernel; we rely on sysenter/sysexit 1206 * instead. In the amd64 kernel, things are -way- 1207 * better. 1208 */ 1209 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 1210 feature |= X86_ASYSC; 1211 1212 /* 1213 * While we're thinking about system calls, note 1214 * that AMD processors don't support sysenter 1215 * in long mode at all, so don't try to program them. 1216 */ 1217 if (x86_vendor == X86_VENDOR_AMD) 1218 feature &= ~X86_SEP; 1219 #endif 1220 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1221 feature |= X86_TSCP; 1222 break; 1223 default: 1224 break; 1225 } 1226 1227 /* 1228 * Get CPUID data about processor cores and hyperthreads. 1229 */ 1230 switch (cpi->cpi_vendor) { 1231 case X86_VENDOR_Intel: 1232 if (cpi->cpi_maxeax >= 4) { 1233 cp = &cpi->cpi_std[4]; 1234 cp->cp_eax = 4; 1235 cp->cp_ecx = 0; 1236 (void) __cpuid_insn(cp); 1237 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1238 } 1239 /*FALLTHROUGH*/ 1240 case X86_VENDOR_AMD: 1241 if (cpi->cpi_xmaxeax < 0x80000008) 1242 break; 1243 cp = &cpi->cpi_extd[8]; 1244 cp->cp_eax = 0x80000008; 1245 (void) __cpuid_insn(cp); 1246 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1247 1248 /* 1249 * Virtual and physical address limits from 1250 * cpuid override previously guessed values. 1251 */ 1252 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1253 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1254 break; 1255 default: 1256 break; 1257 } 1258 1259 /* 1260 * Derive the number of cores per chip 1261 */ 1262 switch (cpi->cpi_vendor) { 1263 case X86_VENDOR_Intel: 1264 if (cpi->cpi_maxeax < 4) { 1265 cpi->cpi_ncore_per_chip = 1; 1266 break; 1267 } else { 1268 cpi->cpi_ncore_per_chip = 1269 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1270 } 1271 break; 1272 case X86_VENDOR_AMD: 1273 if (cpi->cpi_xmaxeax < 0x80000008) { 1274 cpi->cpi_ncore_per_chip = 1; 1275 break; 1276 } else { 1277 /* 1278 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1279 * 1 less than the number of physical cores on 1280 * the chip. In family 0x10 this value can 1281 * be affected by "downcoring" - it reflects 1282 * 1 less than the number of cores actually 1283 * enabled on this node. 1284 */ 1285 cpi->cpi_ncore_per_chip = 1286 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1287 } 1288 break; 1289 default: 1290 cpi->cpi_ncore_per_chip = 1; 1291 break; 1292 } 1293 1294 /* 1295 * Get CPUID data about TSC Invariance in Deep C-State. 1296 */ 1297 switch (cpi->cpi_vendor) { 1298 case X86_VENDOR_Intel: 1299 if (cpi->cpi_maxeax >= 7) { 1300 cp = &cpi->cpi_extd[7]; 1301 cp->cp_eax = 0x80000007; 1302 cp->cp_ecx = 0; 1303 (void) __cpuid_insn(cp); 1304 } 1305 break; 1306 default: 1307 break; 1308 } 1309 } else { 1310 cpi->cpi_ncore_per_chip = 1; 1311 } 1312 1313 /* 1314 * If more than one core, then this processor is CMP. 1315 */ 1316 if (cpi->cpi_ncore_per_chip > 1) 1317 feature |= X86_CMP; 1318 1319 /* 1320 * If the number of cores is the same as the number 1321 * of CPUs, then we cannot have HyperThreading. 1322 */ 1323 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1324 feature &= ~X86_HTT; 1325 1326 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1327 cpi->cpi_procnodes_per_pkg = 1; 1328 1329 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1330 /* 1331 * Single-core single-threaded processors. 1332 */ 1333 cpi->cpi_chipid = -1; 1334 cpi->cpi_clogid = 0; 1335 cpi->cpi_coreid = cpu->cpu_id; 1336 cpi->cpi_pkgcoreid = 0; 1337 if (cpi->cpi_vendor == X86_VENDOR_AMD) 1338 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); 1339 else 1340 cpi->cpi_procnodeid = cpi->cpi_chipid; 1341 } else if (cpi->cpi_ncpu_per_chip > 1) { 1342 if (cpi->cpi_vendor == X86_VENDOR_Intel) 1343 cpuid_intel_getids(cpu, feature); 1344 else if (cpi->cpi_vendor == X86_VENDOR_AMD) 1345 cpuid_amd_getids(cpu); 1346 else { 1347 /* 1348 * All other processors are currently 1349 * assumed to have single cores. 1350 */ 1351 cpi->cpi_coreid = cpi->cpi_chipid; 1352 cpi->cpi_pkgcoreid = 0; 1353 cpi->cpi_procnodeid = cpi->cpi_chipid; 1354 } 1355 } 1356 1357 /* 1358 * Synthesize chip "revision" and socket type 1359 */ 1360 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1361 cpi->cpi_model, cpi->cpi_step); 1362 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1363 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1364 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1365 cpi->cpi_model, cpi->cpi_step); 1366 1367 pass1_done: 1368 cpi->cpi_pass = 1; 1369 return (feature); 1370 } 1371 1372 /* 1373 * Make copies of the cpuid table entries we depend on, in 1374 * part for ease of parsing now, in part so that we have only 1375 * one place to correct any of it, in part for ease of 1376 * later export to userland, and in part so we can look at 1377 * this stuff in a crash dump. 1378 */ 1379 1380 /*ARGSUSED*/ 1381 void 1382 cpuid_pass2(cpu_t *cpu) 1383 { 1384 uint_t n, nmax; 1385 int i; 1386 struct cpuid_regs *cp; 1387 uint8_t *dp; 1388 uint32_t *iptr; 1389 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1390 1391 ASSERT(cpi->cpi_pass == 1); 1392 1393 if (cpi->cpi_maxeax < 1) 1394 goto pass2_done; 1395 1396 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1397 nmax = NMAX_CPI_STD; 1398 /* 1399 * (We already handled n == 0 and n == 1 in pass 1) 1400 */ 1401 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1402 cp->cp_eax = n; 1403 1404 /* 1405 * CPUID function 4 expects %ecx to be initialized 1406 * with an index which indicates which cache to return 1407 * information about. The OS is expected to call function 4 1408 * with %ecx set to 0, 1, 2, ... until it returns with 1409 * EAX[4:0] set to 0, which indicates there are no more 1410 * caches. 1411 * 1412 * Here, populate cpi_std[4] with the information returned by 1413 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1414 * when dynamic memory allocation becomes available. 1415 * 1416 * Note: we need to explicitly initialize %ecx here, since 1417 * function 4 may have been previously invoked. 1418 */ 1419 if (n == 4) 1420 cp->cp_ecx = 0; 1421 1422 (void) __cpuid_insn(cp); 1423 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1424 switch (n) { 1425 case 2: 1426 /* 1427 * "the lower 8 bits of the %eax register 1428 * contain a value that identifies the number 1429 * of times the cpuid [instruction] has to be 1430 * executed to obtain a complete image of the 1431 * processor's caching systems." 1432 * 1433 * How *do* they make this stuff up? 1434 */ 1435 cpi->cpi_ncache = sizeof (*cp) * 1436 BITX(cp->cp_eax, 7, 0); 1437 if (cpi->cpi_ncache == 0) 1438 break; 1439 cpi->cpi_ncache--; /* skip count byte */ 1440 1441 /* 1442 * Well, for now, rather than attempt to implement 1443 * this slightly dubious algorithm, we just look 1444 * at the first 15 .. 1445 */ 1446 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1447 cpi->cpi_ncache = sizeof (*cp) - 1; 1448 1449 dp = cpi->cpi_cacheinfo; 1450 if (BITX(cp->cp_eax, 31, 31) == 0) { 1451 uint8_t *p = (void *)&cp->cp_eax; 1452 for (i = 1; i < 4; i++) 1453 if (p[i] != 0) 1454 *dp++ = p[i]; 1455 } 1456 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1457 uint8_t *p = (void *)&cp->cp_ebx; 1458 for (i = 0; i < 4; i++) 1459 if (p[i] != 0) 1460 *dp++ = p[i]; 1461 } 1462 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1463 uint8_t *p = (void *)&cp->cp_ecx; 1464 for (i = 0; i < 4; i++) 1465 if (p[i] != 0) 1466 *dp++ = p[i]; 1467 } 1468 if (BITX(cp->cp_edx, 31, 31) == 0) { 1469 uint8_t *p = (void *)&cp->cp_edx; 1470 for (i = 0; i < 4; i++) 1471 if (p[i] != 0) 1472 *dp++ = p[i]; 1473 } 1474 break; 1475 1476 case 3: /* Processor serial number, if PSN supported */ 1477 break; 1478 1479 case 4: /* Deterministic cache parameters */ 1480 break; 1481 1482 case 5: /* Monitor/Mwait parameters */ 1483 { 1484 size_t mwait_size; 1485 1486 /* 1487 * check cpi_mwait.support which was set in cpuid_pass1 1488 */ 1489 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1490 break; 1491 1492 /* 1493 * Protect ourself from insane mwait line size. 1494 * Workaround for incomplete hardware emulator(s). 1495 */ 1496 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1497 if (mwait_size < sizeof (uint32_t) || 1498 !ISP2(mwait_size)) { 1499 #if DEBUG 1500 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1501 "size %ld", cpu->cpu_id, (long)mwait_size); 1502 #endif 1503 break; 1504 } 1505 1506 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1507 cpi->cpi_mwait.mon_max = mwait_size; 1508 if (MWAIT_EXTENSION(cpi)) { 1509 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1510 if (MWAIT_INT_ENABLE(cpi)) 1511 cpi->cpi_mwait.support |= 1512 MWAIT_ECX_INT_ENABLE; 1513 } 1514 break; 1515 } 1516 default: 1517 break; 1518 } 1519 } 1520 1521 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1522 struct cpuid_regs regs; 1523 1524 cp = ®s; 1525 cp->cp_eax = 0xB; 1526 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1527 1528 (void) __cpuid_insn(cp); 1529 1530 /* 1531 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1532 * indicates that the extended topology enumeration leaf is 1533 * available. 1534 */ 1535 if (cp->cp_ebx) { 1536 uint32_t x2apic_id; 1537 uint_t coreid_shift = 0; 1538 uint_t ncpu_per_core = 1; 1539 uint_t chipid_shift = 0; 1540 uint_t ncpu_per_chip = 1; 1541 uint_t i; 1542 uint_t level; 1543 1544 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1545 cp->cp_eax = 0xB; 1546 cp->cp_ecx = i; 1547 1548 (void) __cpuid_insn(cp); 1549 level = CPI_CPU_LEVEL_TYPE(cp); 1550 1551 if (level == 1) { 1552 x2apic_id = cp->cp_edx; 1553 coreid_shift = BITX(cp->cp_eax, 4, 0); 1554 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1555 } else if (level == 2) { 1556 x2apic_id = cp->cp_edx; 1557 chipid_shift = BITX(cp->cp_eax, 4, 0); 1558 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1559 } 1560 } 1561 1562 cpi->cpi_apicid = x2apic_id; 1563 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1564 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1565 ncpu_per_core; 1566 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1567 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1568 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1569 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1570 } 1571 1572 /* Make cp NULL so that we don't stumble on others */ 1573 cp = NULL; 1574 } 1575 1576 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1577 goto pass2_done; 1578 1579 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1580 nmax = NMAX_CPI_EXTD; 1581 /* 1582 * Copy the extended properties, fixing them as we go. 1583 * (We already handled n == 0 and n == 1 in pass 1) 1584 */ 1585 iptr = (void *)cpi->cpi_brandstr; 1586 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1587 cp->cp_eax = 0x80000000 + n; 1588 (void) __cpuid_insn(cp); 1589 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1590 switch (n) { 1591 case 2: 1592 case 3: 1593 case 4: 1594 /* 1595 * Extract the brand string 1596 */ 1597 *iptr++ = cp->cp_eax; 1598 *iptr++ = cp->cp_ebx; 1599 *iptr++ = cp->cp_ecx; 1600 *iptr++ = cp->cp_edx; 1601 break; 1602 case 5: 1603 switch (cpi->cpi_vendor) { 1604 case X86_VENDOR_AMD: 1605 /* 1606 * The Athlon and Duron were the first 1607 * parts to report the sizes of the 1608 * TLB for large pages. Before then, 1609 * we don't trust the data. 1610 */ 1611 if (cpi->cpi_family < 6 || 1612 (cpi->cpi_family == 6 && 1613 cpi->cpi_model < 1)) 1614 cp->cp_eax = 0; 1615 break; 1616 default: 1617 break; 1618 } 1619 break; 1620 case 6: 1621 switch (cpi->cpi_vendor) { 1622 case X86_VENDOR_AMD: 1623 /* 1624 * The Athlon and Duron were the first 1625 * AMD parts with L2 TLB's. 1626 * Before then, don't trust the data. 1627 */ 1628 if (cpi->cpi_family < 6 || 1629 cpi->cpi_family == 6 && 1630 cpi->cpi_model < 1) 1631 cp->cp_eax = cp->cp_ebx = 0; 1632 /* 1633 * AMD Duron rev A0 reports L2 1634 * cache size incorrectly as 1K 1635 * when it is really 64K 1636 */ 1637 if (cpi->cpi_family == 6 && 1638 cpi->cpi_model == 3 && 1639 cpi->cpi_step == 0) { 1640 cp->cp_ecx &= 0xffff; 1641 cp->cp_ecx |= 0x400000; 1642 } 1643 break; 1644 case X86_VENDOR_Cyrix: /* VIA C3 */ 1645 /* 1646 * VIA C3 processors are a bit messed 1647 * up w.r.t. encoding cache sizes in %ecx 1648 */ 1649 if (cpi->cpi_family != 6) 1650 break; 1651 /* 1652 * model 7 and 8 were incorrectly encoded 1653 * 1654 * xxx is model 8 really broken? 1655 */ 1656 if (cpi->cpi_model == 7 || 1657 cpi->cpi_model == 8) 1658 cp->cp_ecx = 1659 BITX(cp->cp_ecx, 31, 24) << 16 | 1660 BITX(cp->cp_ecx, 23, 16) << 12 | 1661 BITX(cp->cp_ecx, 15, 8) << 8 | 1662 BITX(cp->cp_ecx, 7, 0); 1663 /* 1664 * model 9 stepping 1 has wrong associativity 1665 */ 1666 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1667 cp->cp_ecx |= 8 << 12; 1668 break; 1669 case X86_VENDOR_Intel: 1670 /* 1671 * Extended L2 Cache features function. 1672 * First appeared on Prescott. 1673 */ 1674 default: 1675 break; 1676 } 1677 break; 1678 default: 1679 break; 1680 } 1681 } 1682 1683 pass2_done: 1684 cpi->cpi_pass = 2; 1685 } 1686 1687 static const char * 1688 intel_cpubrand(const struct cpuid_info *cpi) 1689 { 1690 int i; 1691 1692 if ((x86_feature & X86_CPUID) == 0 || 1693 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1694 return ("i486"); 1695 1696 switch (cpi->cpi_family) { 1697 case 5: 1698 return ("Intel Pentium(r)"); 1699 case 6: 1700 switch (cpi->cpi_model) { 1701 uint_t celeron, xeon; 1702 const struct cpuid_regs *cp; 1703 case 0: 1704 case 1: 1705 case 2: 1706 return ("Intel Pentium(r) Pro"); 1707 case 3: 1708 case 4: 1709 return ("Intel Pentium(r) II"); 1710 case 6: 1711 return ("Intel Celeron(r)"); 1712 case 5: 1713 case 7: 1714 celeron = xeon = 0; 1715 cp = &cpi->cpi_std[2]; /* cache info */ 1716 1717 for (i = 1; i < 4; i++) { 1718 uint_t tmp; 1719 1720 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1721 if (tmp == 0x40) 1722 celeron++; 1723 if (tmp >= 0x44 && tmp <= 0x45) 1724 xeon++; 1725 } 1726 1727 for (i = 0; i < 2; i++) { 1728 uint_t tmp; 1729 1730 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1731 if (tmp == 0x40) 1732 celeron++; 1733 else if (tmp >= 0x44 && tmp <= 0x45) 1734 xeon++; 1735 } 1736 1737 for (i = 0; i < 4; i++) { 1738 uint_t tmp; 1739 1740 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1741 if (tmp == 0x40) 1742 celeron++; 1743 else if (tmp >= 0x44 && tmp <= 0x45) 1744 xeon++; 1745 } 1746 1747 for (i = 0; i < 4; i++) { 1748 uint_t tmp; 1749 1750 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1751 if (tmp == 0x40) 1752 celeron++; 1753 else if (tmp >= 0x44 && tmp <= 0x45) 1754 xeon++; 1755 } 1756 1757 if (celeron) 1758 return ("Intel Celeron(r)"); 1759 if (xeon) 1760 return (cpi->cpi_model == 5 ? 1761 "Intel Pentium(r) II Xeon(tm)" : 1762 "Intel Pentium(r) III Xeon(tm)"); 1763 return (cpi->cpi_model == 5 ? 1764 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1765 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1766 default: 1767 break; 1768 } 1769 default: 1770 break; 1771 } 1772 1773 /* BrandID is present if the field is nonzero */ 1774 if (cpi->cpi_brandid != 0) { 1775 static const struct { 1776 uint_t bt_bid; 1777 const char *bt_str; 1778 } brand_tbl[] = { 1779 { 0x1, "Intel(r) Celeron(r)" }, 1780 { 0x2, "Intel(r) Pentium(r) III" }, 1781 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1782 { 0x4, "Intel(r) Pentium(r) III" }, 1783 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1784 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1785 { 0x8, "Intel(r) Pentium(r) 4" }, 1786 { 0x9, "Intel(r) Pentium(r) 4" }, 1787 { 0xa, "Intel(r) Celeron(r)" }, 1788 { 0xb, "Intel(r) Xeon(tm)" }, 1789 { 0xc, "Intel(r) Xeon(tm) MP" }, 1790 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1791 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1792 { 0x11, "Mobile Genuine Intel(r)" }, 1793 { 0x12, "Intel(r) Celeron(r) M" }, 1794 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1795 { 0x14, "Intel(r) Celeron(r)" }, 1796 { 0x15, "Mobile Genuine Intel(r)" }, 1797 { 0x16, "Intel(r) Pentium(r) M" }, 1798 { 0x17, "Mobile Intel(r) Celeron(r)" } 1799 }; 1800 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1801 uint_t sgn; 1802 1803 sgn = (cpi->cpi_family << 8) | 1804 (cpi->cpi_model << 4) | cpi->cpi_step; 1805 1806 for (i = 0; i < btblmax; i++) 1807 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1808 break; 1809 if (i < btblmax) { 1810 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1811 return ("Intel(r) Celeron(r)"); 1812 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1813 return ("Intel(r) Xeon(tm) MP"); 1814 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1815 return ("Intel(r) Xeon(tm)"); 1816 return (brand_tbl[i].bt_str); 1817 } 1818 } 1819 1820 return (NULL); 1821 } 1822 1823 static const char * 1824 amd_cpubrand(const struct cpuid_info *cpi) 1825 { 1826 if ((x86_feature & X86_CPUID) == 0 || 1827 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1828 return ("i486 compatible"); 1829 1830 switch (cpi->cpi_family) { 1831 case 5: 1832 switch (cpi->cpi_model) { 1833 case 0: 1834 case 1: 1835 case 2: 1836 case 3: 1837 case 4: 1838 case 5: 1839 return ("AMD-K5(r)"); 1840 case 6: 1841 case 7: 1842 return ("AMD-K6(r)"); 1843 case 8: 1844 return ("AMD-K6(r)-2"); 1845 case 9: 1846 return ("AMD-K6(r)-III"); 1847 default: 1848 return ("AMD (family 5)"); 1849 } 1850 case 6: 1851 switch (cpi->cpi_model) { 1852 case 1: 1853 return ("AMD-K7(tm)"); 1854 case 0: 1855 case 2: 1856 case 4: 1857 return ("AMD Athlon(tm)"); 1858 case 3: 1859 case 7: 1860 return ("AMD Duron(tm)"); 1861 case 6: 1862 case 8: 1863 case 10: 1864 /* 1865 * Use the L2 cache size to distinguish 1866 */ 1867 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1868 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1869 default: 1870 return ("AMD (family 6)"); 1871 } 1872 default: 1873 break; 1874 } 1875 1876 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1877 cpi->cpi_brandid != 0) { 1878 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1879 case 3: 1880 return ("AMD Opteron(tm) UP 1xx"); 1881 case 4: 1882 return ("AMD Opteron(tm) DP 2xx"); 1883 case 5: 1884 return ("AMD Opteron(tm) MP 8xx"); 1885 default: 1886 return ("AMD Opteron(tm)"); 1887 } 1888 } 1889 1890 return (NULL); 1891 } 1892 1893 static const char * 1894 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1895 { 1896 if ((x86_feature & X86_CPUID) == 0 || 1897 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1898 type == X86_TYPE_CYRIX_486) 1899 return ("i486 compatible"); 1900 1901 switch (type) { 1902 case X86_TYPE_CYRIX_6x86: 1903 return ("Cyrix 6x86"); 1904 case X86_TYPE_CYRIX_6x86L: 1905 return ("Cyrix 6x86L"); 1906 case X86_TYPE_CYRIX_6x86MX: 1907 return ("Cyrix 6x86MX"); 1908 case X86_TYPE_CYRIX_GXm: 1909 return ("Cyrix GXm"); 1910 case X86_TYPE_CYRIX_MediaGX: 1911 return ("Cyrix MediaGX"); 1912 case X86_TYPE_CYRIX_MII: 1913 return ("Cyrix M2"); 1914 case X86_TYPE_VIA_CYRIX_III: 1915 return ("VIA Cyrix M3"); 1916 default: 1917 /* 1918 * Have another wild guess .. 1919 */ 1920 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1921 return ("Cyrix 5x86"); 1922 else if (cpi->cpi_family == 5) { 1923 switch (cpi->cpi_model) { 1924 case 2: 1925 return ("Cyrix 6x86"); /* Cyrix M1 */ 1926 case 4: 1927 return ("Cyrix MediaGX"); 1928 default: 1929 break; 1930 } 1931 } else if (cpi->cpi_family == 6) { 1932 switch (cpi->cpi_model) { 1933 case 0: 1934 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1935 case 5: 1936 case 6: 1937 case 7: 1938 case 8: 1939 case 9: 1940 return ("VIA C3"); 1941 default: 1942 break; 1943 } 1944 } 1945 break; 1946 } 1947 return (NULL); 1948 } 1949 1950 /* 1951 * This only gets called in the case that the CPU extended 1952 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1953 * aren't available, or contain null bytes for some reason. 1954 */ 1955 static void 1956 fabricate_brandstr(struct cpuid_info *cpi) 1957 { 1958 const char *brand = NULL; 1959 1960 switch (cpi->cpi_vendor) { 1961 case X86_VENDOR_Intel: 1962 brand = intel_cpubrand(cpi); 1963 break; 1964 case X86_VENDOR_AMD: 1965 brand = amd_cpubrand(cpi); 1966 break; 1967 case X86_VENDOR_Cyrix: 1968 brand = cyrix_cpubrand(cpi, x86_type); 1969 break; 1970 case X86_VENDOR_NexGen: 1971 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1972 brand = "NexGen Nx586"; 1973 break; 1974 case X86_VENDOR_Centaur: 1975 if (cpi->cpi_family == 5) 1976 switch (cpi->cpi_model) { 1977 case 4: 1978 brand = "Centaur C6"; 1979 break; 1980 case 8: 1981 brand = "Centaur C2"; 1982 break; 1983 case 9: 1984 brand = "Centaur C3"; 1985 break; 1986 default: 1987 break; 1988 } 1989 break; 1990 case X86_VENDOR_Rise: 1991 if (cpi->cpi_family == 5 && 1992 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1993 brand = "Rise mP6"; 1994 break; 1995 case X86_VENDOR_SiS: 1996 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1997 brand = "SiS 55x"; 1998 break; 1999 case X86_VENDOR_TM: 2000 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 2001 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 2002 break; 2003 case X86_VENDOR_NSC: 2004 case X86_VENDOR_UMC: 2005 default: 2006 break; 2007 } 2008 if (brand) { 2009 (void) strcpy((char *)cpi->cpi_brandstr, brand); 2010 return; 2011 } 2012 2013 /* 2014 * If all else fails ... 2015 */ 2016 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 2017 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 2018 cpi->cpi_model, cpi->cpi_step); 2019 } 2020 2021 /* 2022 * This routine is called just after kernel memory allocation 2023 * becomes available on cpu0, and as part of mp_startup() on 2024 * the other cpus. 2025 * 2026 * Fixup the brand string, and collect any information from cpuid 2027 * that requires dynamicically allocated storage to represent. 2028 */ 2029 /*ARGSUSED*/ 2030 void 2031 cpuid_pass3(cpu_t *cpu) 2032 { 2033 int i, max, shft, level, size; 2034 struct cpuid_regs regs; 2035 struct cpuid_regs *cp; 2036 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2037 2038 ASSERT(cpi->cpi_pass == 2); 2039 2040 /* 2041 * Function 4: Deterministic cache parameters 2042 * 2043 * Take this opportunity to detect the number of threads 2044 * sharing the last level cache, and construct a corresponding 2045 * cache id. The respective cpuid_info members are initialized 2046 * to the default case of "no last level cache sharing". 2047 */ 2048 cpi->cpi_ncpu_shr_last_cache = 1; 2049 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 2050 2051 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 2052 2053 /* 2054 * Find the # of elements (size) returned by fn 4, and along 2055 * the way detect last level cache sharing details. 2056 */ 2057 bzero(®s, sizeof (regs)); 2058 cp = ®s; 2059 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 2060 cp->cp_eax = 4; 2061 cp->cp_ecx = i; 2062 2063 (void) __cpuid_insn(cp); 2064 2065 if (CPI_CACHE_TYPE(cp) == 0) 2066 break; 2067 level = CPI_CACHE_LVL(cp); 2068 if (level > max) { 2069 max = level; 2070 cpi->cpi_ncpu_shr_last_cache = 2071 CPI_NTHR_SHR_CACHE(cp) + 1; 2072 } 2073 } 2074 cpi->cpi_std_4_size = size = i; 2075 2076 /* 2077 * Allocate the cpi_std_4 array. The first element 2078 * references the regs for fn 4, %ecx == 0, which 2079 * cpuid_pass2() stashed in cpi->cpi_std[4]. 2080 */ 2081 if (size > 0) { 2082 cpi->cpi_std_4 = 2083 kmem_alloc(size * sizeof (cp), KM_SLEEP); 2084 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 2085 2086 /* 2087 * Allocate storage to hold the additional regs 2088 * for function 4, %ecx == 1 .. cpi_std_4_size. 2089 * 2090 * The regs for fn 4, %ecx == 0 has already 2091 * been allocated as indicated above. 2092 */ 2093 for (i = 1; i < size; i++) { 2094 cp = cpi->cpi_std_4[i] = 2095 kmem_zalloc(sizeof (regs), KM_SLEEP); 2096 cp->cp_eax = 4; 2097 cp->cp_ecx = i; 2098 2099 (void) __cpuid_insn(cp); 2100 } 2101 } 2102 /* 2103 * Determine the number of bits needed to represent 2104 * the number of CPUs sharing the last level cache. 2105 * 2106 * Shift off that number of bits from the APIC id to 2107 * derive the cache id. 2108 */ 2109 shft = 0; 2110 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 2111 shft++; 2112 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 2113 } 2114 2115 /* 2116 * Now fixup the brand string 2117 */ 2118 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2119 fabricate_brandstr(cpi); 2120 } else { 2121 2122 /* 2123 * If we successfully extracted a brand string from the cpuid 2124 * instruction, clean it up by removing leading spaces and 2125 * similar junk. 2126 */ 2127 if (cpi->cpi_brandstr[0]) { 2128 size_t maxlen = sizeof (cpi->cpi_brandstr); 2129 char *src, *dst; 2130 2131 dst = src = (char *)cpi->cpi_brandstr; 2132 src[maxlen - 1] = '\0'; 2133 /* 2134 * strip leading spaces 2135 */ 2136 while (*src == ' ') 2137 src++; 2138 /* 2139 * Remove any 'Genuine' or "Authentic" prefixes 2140 */ 2141 if (strncmp(src, "Genuine ", 8) == 0) 2142 src += 8; 2143 if (strncmp(src, "Authentic ", 10) == 0) 2144 src += 10; 2145 2146 /* 2147 * Now do an in-place copy. 2148 * Map (R) to (r) and (TM) to (tm). 2149 * The era of teletypes is long gone, and there's 2150 * -really- no need to shout. 2151 */ 2152 while (*src != '\0') { 2153 if (src[0] == '(') { 2154 if (strncmp(src + 1, "R)", 2) == 0) { 2155 (void) strncpy(dst, "(r)", 3); 2156 src += 3; 2157 dst += 3; 2158 continue; 2159 } 2160 if (strncmp(src + 1, "TM)", 3) == 0) { 2161 (void) strncpy(dst, "(tm)", 4); 2162 src += 4; 2163 dst += 4; 2164 continue; 2165 } 2166 } 2167 *dst++ = *src++; 2168 } 2169 *dst = '\0'; 2170 2171 /* 2172 * Finally, remove any trailing spaces 2173 */ 2174 while (--dst > cpi->cpi_brandstr) 2175 if (*dst == ' ') 2176 *dst = '\0'; 2177 else 2178 break; 2179 } else 2180 fabricate_brandstr(cpi); 2181 } 2182 cpi->cpi_pass = 3; 2183 } 2184 2185 /* 2186 * This routine is called out of bind_hwcap() much later in the life 2187 * of the kernel (post_startup()). The job of this routine is to resolve 2188 * the hardware feature support and kernel support for those features into 2189 * what we're actually going to tell applications via the aux vector. 2190 */ 2191 uint_t 2192 cpuid_pass4(cpu_t *cpu) 2193 { 2194 struct cpuid_info *cpi; 2195 uint_t hwcap_flags = 0; 2196 2197 if (cpu == NULL) 2198 cpu = CPU; 2199 cpi = cpu->cpu_m.mcpu_cpi; 2200 2201 ASSERT(cpi->cpi_pass == 3); 2202 2203 if (cpi->cpi_maxeax >= 1) { 2204 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2205 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2206 2207 *edx = CPI_FEATURES_EDX(cpi); 2208 *ecx = CPI_FEATURES_ECX(cpi); 2209 2210 /* 2211 * [these require explicit kernel support] 2212 */ 2213 if ((x86_feature & X86_SEP) == 0) 2214 *edx &= ~CPUID_INTC_EDX_SEP; 2215 2216 if ((x86_feature & X86_SSE) == 0) 2217 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2218 if ((x86_feature & X86_SSE2) == 0) 2219 *edx &= ~CPUID_INTC_EDX_SSE2; 2220 2221 if ((x86_feature & X86_HTT) == 0) 2222 *edx &= ~CPUID_INTC_EDX_HTT; 2223 2224 if ((x86_feature & X86_SSE3) == 0) 2225 *ecx &= ~CPUID_INTC_ECX_SSE3; 2226 2227 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2228 if ((x86_feature & X86_SSSE3) == 0) 2229 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2230 if ((x86_feature & X86_SSE4_1) == 0) 2231 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2232 if ((x86_feature & X86_SSE4_2) == 0) 2233 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2234 if ((x86_feature & X86_AES) == 0) 2235 *ecx &= ~CPUID_INTC_ECX_AES; 2236 } 2237 2238 /* 2239 * [no explicit support required beyond x87 fp context] 2240 */ 2241 if (!fpu_exists) 2242 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2243 2244 /* 2245 * Now map the supported feature vector to things that we 2246 * think userland will care about. 2247 */ 2248 if (*edx & CPUID_INTC_EDX_SEP) 2249 hwcap_flags |= AV_386_SEP; 2250 if (*edx & CPUID_INTC_EDX_SSE) 2251 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2252 if (*edx & CPUID_INTC_EDX_SSE2) 2253 hwcap_flags |= AV_386_SSE2; 2254 if (*ecx & CPUID_INTC_ECX_SSE3) 2255 hwcap_flags |= AV_386_SSE3; 2256 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2257 if (*ecx & CPUID_INTC_ECX_SSSE3) 2258 hwcap_flags |= AV_386_SSSE3; 2259 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2260 hwcap_flags |= AV_386_SSE4_1; 2261 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2262 hwcap_flags |= AV_386_SSE4_2; 2263 if (*ecx & CPUID_INTC_ECX_MOVBE) 2264 hwcap_flags |= AV_386_MOVBE; 2265 if (*ecx & CPUID_INTC_ECX_AES) 2266 hwcap_flags |= AV_386_AES; 2267 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2268 hwcap_flags |= AV_386_PCLMULQDQ; 2269 } 2270 if (*ecx & CPUID_INTC_ECX_POPCNT) 2271 hwcap_flags |= AV_386_POPCNT; 2272 if (*edx & CPUID_INTC_EDX_FPU) 2273 hwcap_flags |= AV_386_FPU; 2274 if (*edx & CPUID_INTC_EDX_MMX) 2275 hwcap_flags |= AV_386_MMX; 2276 2277 if (*edx & CPUID_INTC_EDX_TSC) 2278 hwcap_flags |= AV_386_TSC; 2279 if (*edx & CPUID_INTC_EDX_CX8) 2280 hwcap_flags |= AV_386_CX8; 2281 if (*edx & CPUID_INTC_EDX_CMOV) 2282 hwcap_flags |= AV_386_CMOV; 2283 if (*ecx & CPUID_INTC_ECX_CX16) 2284 hwcap_flags |= AV_386_CX16; 2285 } 2286 2287 if (cpi->cpi_xmaxeax < 0x80000001) 2288 goto pass4_done; 2289 2290 switch (cpi->cpi_vendor) { 2291 struct cpuid_regs cp; 2292 uint32_t *edx, *ecx; 2293 2294 case X86_VENDOR_Intel: 2295 /* 2296 * Seems like Intel duplicated what we necessary 2297 * here to make the initial crop of 64-bit OS's work. 2298 * Hopefully, those are the only "extended" bits 2299 * they'll add. 2300 */ 2301 /*FALLTHROUGH*/ 2302 2303 case X86_VENDOR_AMD: 2304 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2305 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2306 2307 *edx = CPI_FEATURES_XTD_EDX(cpi); 2308 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2309 2310 /* 2311 * [these features require explicit kernel support] 2312 */ 2313 switch (cpi->cpi_vendor) { 2314 case X86_VENDOR_Intel: 2315 if ((x86_feature & X86_TSCP) == 0) 2316 *edx &= ~CPUID_AMD_EDX_TSCP; 2317 break; 2318 2319 case X86_VENDOR_AMD: 2320 if ((x86_feature & X86_TSCP) == 0) 2321 *edx &= ~CPUID_AMD_EDX_TSCP; 2322 if ((x86_feature & X86_SSE4A) == 0) 2323 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2324 break; 2325 2326 default: 2327 break; 2328 } 2329 2330 /* 2331 * [no explicit support required beyond 2332 * x87 fp context and exception handlers] 2333 */ 2334 if (!fpu_exists) 2335 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2336 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2337 2338 if ((x86_feature & X86_NX) == 0) 2339 *edx &= ~CPUID_AMD_EDX_NX; 2340 #if !defined(__amd64) 2341 *edx &= ~CPUID_AMD_EDX_LM; 2342 #endif 2343 /* 2344 * Now map the supported feature vector to 2345 * things that we think userland will care about. 2346 */ 2347 #if defined(__amd64) 2348 if (*edx & CPUID_AMD_EDX_SYSC) 2349 hwcap_flags |= AV_386_AMD_SYSC; 2350 #endif 2351 if (*edx & CPUID_AMD_EDX_MMXamd) 2352 hwcap_flags |= AV_386_AMD_MMX; 2353 if (*edx & CPUID_AMD_EDX_3DNow) 2354 hwcap_flags |= AV_386_AMD_3DNow; 2355 if (*edx & CPUID_AMD_EDX_3DNowx) 2356 hwcap_flags |= AV_386_AMD_3DNowx; 2357 2358 switch (cpi->cpi_vendor) { 2359 case X86_VENDOR_AMD: 2360 if (*edx & CPUID_AMD_EDX_TSCP) 2361 hwcap_flags |= AV_386_TSCP; 2362 if (*ecx & CPUID_AMD_ECX_AHF64) 2363 hwcap_flags |= AV_386_AHF; 2364 if (*ecx & CPUID_AMD_ECX_SSE4A) 2365 hwcap_flags |= AV_386_AMD_SSE4A; 2366 if (*ecx & CPUID_AMD_ECX_LZCNT) 2367 hwcap_flags |= AV_386_AMD_LZCNT; 2368 break; 2369 2370 case X86_VENDOR_Intel: 2371 if (*edx & CPUID_AMD_EDX_TSCP) 2372 hwcap_flags |= AV_386_TSCP; 2373 /* 2374 * Aarrgh. 2375 * Intel uses a different bit in the same word. 2376 */ 2377 if (*ecx & CPUID_INTC_ECX_AHF64) 2378 hwcap_flags |= AV_386_AHF; 2379 break; 2380 2381 default: 2382 break; 2383 } 2384 break; 2385 2386 case X86_VENDOR_TM: 2387 cp.cp_eax = 0x80860001; 2388 (void) __cpuid_insn(&cp); 2389 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2390 break; 2391 2392 default: 2393 break; 2394 } 2395 2396 pass4_done: 2397 cpi->cpi_pass = 4; 2398 return (hwcap_flags); 2399 } 2400 2401 2402 /* 2403 * Simulate the cpuid instruction using the data we previously 2404 * captured about this CPU. We try our best to return the truth 2405 * about the hardware, independently of kernel support. 2406 */ 2407 uint32_t 2408 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2409 { 2410 struct cpuid_info *cpi; 2411 struct cpuid_regs *xcp; 2412 2413 if (cpu == NULL) 2414 cpu = CPU; 2415 cpi = cpu->cpu_m.mcpu_cpi; 2416 2417 ASSERT(cpuid_checkpass(cpu, 3)); 2418 2419 /* 2420 * CPUID data is cached in two separate places: cpi_std for standard 2421 * CPUID functions, and cpi_extd for extended CPUID functions. 2422 */ 2423 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2424 xcp = &cpi->cpi_std[cp->cp_eax]; 2425 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2426 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2427 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2428 else 2429 /* 2430 * The caller is asking for data from an input parameter which 2431 * the kernel has not cached. In this case we go fetch from 2432 * the hardware and return the data directly to the user. 2433 */ 2434 return (__cpuid_insn(cp)); 2435 2436 cp->cp_eax = xcp->cp_eax; 2437 cp->cp_ebx = xcp->cp_ebx; 2438 cp->cp_ecx = xcp->cp_ecx; 2439 cp->cp_edx = xcp->cp_edx; 2440 return (cp->cp_eax); 2441 } 2442 2443 int 2444 cpuid_checkpass(cpu_t *cpu, int pass) 2445 { 2446 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2447 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2448 } 2449 2450 int 2451 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2452 { 2453 ASSERT(cpuid_checkpass(cpu, 3)); 2454 2455 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2456 } 2457 2458 int 2459 cpuid_is_cmt(cpu_t *cpu) 2460 { 2461 if (cpu == NULL) 2462 cpu = CPU; 2463 2464 ASSERT(cpuid_checkpass(cpu, 1)); 2465 2466 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2467 } 2468 2469 /* 2470 * AMD and Intel both implement the 64-bit variant of the syscall 2471 * instruction (syscallq), so if there's -any- support for syscall, 2472 * cpuid currently says "yes, we support this". 2473 * 2474 * However, Intel decided to -not- implement the 32-bit variant of the 2475 * syscall instruction, so we provide a predicate to allow our caller 2476 * to test that subtlety here. 2477 * 2478 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2479 * even in the case where the hardware would in fact support it. 2480 */ 2481 /*ARGSUSED*/ 2482 int 2483 cpuid_syscall32_insn(cpu_t *cpu) 2484 { 2485 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2486 2487 #if !defined(__xpv) 2488 if (cpu == NULL) 2489 cpu = CPU; 2490 2491 /*CSTYLED*/ 2492 { 2493 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2494 2495 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2496 cpi->cpi_xmaxeax >= 0x80000001 && 2497 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2498 return (1); 2499 } 2500 #endif 2501 return (0); 2502 } 2503 2504 int 2505 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2506 { 2507 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2508 2509 static const char fmt[] = 2510 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2511 static const char fmt_ht[] = 2512 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2513 2514 ASSERT(cpuid_checkpass(cpu, 1)); 2515 2516 if (cpuid_is_cmt(cpu)) 2517 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2518 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2519 cpi->cpi_family, cpi->cpi_model, 2520 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2521 return (snprintf(s, n, fmt, 2522 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2523 cpi->cpi_family, cpi->cpi_model, 2524 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2525 } 2526 2527 const char * 2528 cpuid_getvendorstr(cpu_t *cpu) 2529 { 2530 ASSERT(cpuid_checkpass(cpu, 1)); 2531 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2532 } 2533 2534 uint_t 2535 cpuid_getvendor(cpu_t *cpu) 2536 { 2537 ASSERT(cpuid_checkpass(cpu, 1)); 2538 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2539 } 2540 2541 uint_t 2542 cpuid_getfamily(cpu_t *cpu) 2543 { 2544 ASSERT(cpuid_checkpass(cpu, 1)); 2545 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2546 } 2547 2548 uint_t 2549 cpuid_getmodel(cpu_t *cpu) 2550 { 2551 ASSERT(cpuid_checkpass(cpu, 1)); 2552 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2553 } 2554 2555 uint_t 2556 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2557 { 2558 ASSERT(cpuid_checkpass(cpu, 1)); 2559 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2560 } 2561 2562 uint_t 2563 cpuid_get_ncore_per_chip(cpu_t *cpu) 2564 { 2565 ASSERT(cpuid_checkpass(cpu, 1)); 2566 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2567 } 2568 2569 uint_t 2570 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2571 { 2572 ASSERT(cpuid_checkpass(cpu, 2)); 2573 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2574 } 2575 2576 id_t 2577 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2578 { 2579 ASSERT(cpuid_checkpass(cpu, 2)); 2580 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2581 } 2582 2583 uint_t 2584 cpuid_getstep(cpu_t *cpu) 2585 { 2586 ASSERT(cpuid_checkpass(cpu, 1)); 2587 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2588 } 2589 2590 uint_t 2591 cpuid_getsig(struct cpu *cpu) 2592 { 2593 ASSERT(cpuid_checkpass(cpu, 1)); 2594 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2595 } 2596 2597 uint32_t 2598 cpuid_getchiprev(struct cpu *cpu) 2599 { 2600 ASSERT(cpuid_checkpass(cpu, 1)); 2601 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2602 } 2603 2604 const char * 2605 cpuid_getchiprevstr(struct cpu *cpu) 2606 { 2607 ASSERT(cpuid_checkpass(cpu, 1)); 2608 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2609 } 2610 2611 uint32_t 2612 cpuid_getsockettype(struct cpu *cpu) 2613 { 2614 ASSERT(cpuid_checkpass(cpu, 1)); 2615 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2616 } 2617 2618 const char * 2619 cpuid_getsocketstr(cpu_t *cpu) 2620 { 2621 static const char *socketstr = NULL; 2622 struct cpuid_info *cpi; 2623 2624 ASSERT(cpuid_checkpass(cpu, 1)); 2625 cpi = cpu->cpu_m.mcpu_cpi; 2626 2627 /* Assume that socket types are the same across the system */ 2628 if (socketstr == NULL) 2629 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 2630 cpi->cpi_model, cpi->cpi_step); 2631 2632 2633 return (socketstr); 2634 } 2635 2636 int 2637 cpuid_get_chipid(cpu_t *cpu) 2638 { 2639 ASSERT(cpuid_checkpass(cpu, 1)); 2640 2641 if (cpuid_is_cmt(cpu)) 2642 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2643 return (cpu->cpu_id); 2644 } 2645 2646 id_t 2647 cpuid_get_coreid(cpu_t *cpu) 2648 { 2649 ASSERT(cpuid_checkpass(cpu, 1)); 2650 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2651 } 2652 2653 int 2654 cpuid_get_pkgcoreid(cpu_t *cpu) 2655 { 2656 ASSERT(cpuid_checkpass(cpu, 1)); 2657 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2658 } 2659 2660 int 2661 cpuid_get_clogid(cpu_t *cpu) 2662 { 2663 ASSERT(cpuid_checkpass(cpu, 1)); 2664 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2665 } 2666 2667 int 2668 cpuid_get_cacheid(cpu_t *cpu) 2669 { 2670 ASSERT(cpuid_checkpass(cpu, 1)); 2671 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2672 } 2673 2674 uint_t 2675 cpuid_get_procnodeid(cpu_t *cpu) 2676 { 2677 ASSERT(cpuid_checkpass(cpu, 1)); 2678 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid); 2679 } 2680 2681 uint_t 2682 cpuid_get_procnodes_per_pkg(cpu_t *cpu) 2683 { 2684 ASSERT(cpuid_checkpass(cpu, 1)); 2685 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); 2686 } 2687 2688 /*ARGSUSED*/ 2689 int 2690 cpuid_have_cr8access(cpu_t *cpu) 2691 { 2692 #if defined(__amd64) 2693 return (1); 2694 #else 2695 struct cpuid_info *cpi; 2696 2697 ASSERT(cpu != NULL); 2698 cpi = cpu->cpu_m.mcpu_cpi; 2699 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 2700 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 2701 return (1); 2702 return (0); 2703 #endif 2704 } 2705 2706 uint32_t 2707 cpuid_get_apicid(cpu_t *cpu) 2708 { 2709 ASSERT(cpuid_checkpass(cpu, 1)); 2710 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 2711 return (UINT32_MAX); 2712 } else { 2713 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 2714 } 2715 } 2716 2717 void 2718 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2719 { 2720 struct cpuid_info *cpi; 2721 2722 if (cpu == NULL) 2723 cpu = CPU; 2724 cpi = cpu->cpu_m.mcpu_cpi; 2725 2726 ASSERT(cpuid_checkpass(cpu, 1)); 2727 2728 if (pabits) 2729 *pabits = cpi->cpi_pabits; 2730 if (vabits) 2731 *vabits = cpi->cpi_vabits; 2732 } 2733 2734 /* 2735 * Returns the number of data TLB entries for a corresponding 2736 * pagesize. If it can't be computed, or isn't known, the 2737 * routine returns zero. If you ask about an architecturally 2738 * impossible pagesize, the routine will panic (so that the 2739 * hat implementor knows that things are inconsistent.) 2740 */ 2741 uint_t 2742 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2743 { 2744 struct cpuid_info *cpi; 2745 uint_t dtlb_nent = 0; 2746 2747 if (cpu == NULL) 2748 cpu = CPU; 2749 cpi = cpu->cpu_m.mcpu_cpi; 2750 2751 ASSERT(cpuid_checkpass(cpu, 1)); 2752 2753 /* 2754 * Check the L2 TLB info 2755 */ 2756 if (cpi->cpi_xmaxeax >= 0x80000006) { 2757 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2758 2759 switch (pagesize) { 2760 2761 case 4 * 1024: 2762 /* 2763 * All zero in the top 16 bits of the register 2764 * indicates a unified TLB. Size is in low 16 bits. 2765 */ 2766 if ((cp->cp_ebx & 0xffff0000) == 0) 2767 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2768 else 2769 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2770 break; 2771 2772 case 2 * 1024 * 1024: 2773 if ((cp->cp_eax & 0xffff0000) == 0) 2774 dtlb_nent = cp->cp_eax & 0x0000ffff; 2775 else 2776 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2777 break; 2778 2779 default: 2780 panic("unknown L2 pagesize"); 2781 /*NOTREACHED*/ 2782 } 2783 } 2784 2785 if (dtlb_nent != 0) 2786 return (dtlb_nent); 2787 2788 /* 2789 * No L2 TLB support for this size, try L1. 2790 */ 2791 if (cpi->cpi_xmaxeax >= 0x80000005) { 2792 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2793 2794 switch (pagesize) { 2795 case 4 * 1024: 2796 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2797 break; 2798 case 2 * 1024 * 1024: 2799 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2800 break; 2801 default: 2802 panic("unknown L1 d-TLB pagesize"); 2803 /*NOTREACHED*/ 2804 } 2805 } 2806 2807 return (dtlb_nent); 2808 } 2809 2810 /* 2811 * Return 0 if the erratum is not present or not applicable, positive 2812 * if it is, and negative if the status of the erratum is unknown. 2813 * 2814 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2815 * Processors" #25759, Rev 3.57, August 2005 2816 */ 2817 int 2818 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2819 { 2820 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2821 uint_t eax; 2822 2823 /* 2824 * Bail out if this CPU isn't an AMD CPU, or if it's 2825 * a legacy (32-bit) AMD CPU. 2826 */ 2827 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2828 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2829 cpi->cpi_family == 6) 2830 2831 return (0); 2832 2833 eax = cpi->cpi_std[1].cp_eax; 2834 2835 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2836 #define SH_B3(eax) (eax == 0xf51) 2837 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2838 2839 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2840 2841 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2842 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2843 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2844 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2845 2846 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2847 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2848 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2849 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2850 2851 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2852 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2853 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2854 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2855 #define BH_E4(eax) (eax == 0x20fb1) 2856 #define SH_E5(eax) (eax == 0x20f42) 2857 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2858 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2859 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2860 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2861 DH_E6(eax) || JH_E6(eax)) 2862 2863 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2864 #define DR_B0(eax) (eax == 0x100f20) 2865 #define DR_B1(eax) (eax == 0x100f21) 2866 #define DR_BA(eax) (eax == 0x100f2a) 2867 #define DR_B2(eax) (eax == 0x100f22) 2868 #define DR_B3(eax) (eax == 0x100f23) 2869 #define RB_C0(eax) (eax == 0x100f40) 2870 2871 switch (erratum) { 2872 case 1: 2873 return (cpi->cpi_family < 0x10); 2874 case 51: /* what does the asterisk mean? */ 2875 return (B(eax) || SH_C0(eax) || CG(eax)); 2876 case 52: 2877 return (B(eax)); 2878 case 57: 2879 return (cpi->cpi_family <= 0x11); 2880 case 58: 2881 return (B(eax)); 2882 case 60: 2883 return (cpi->cpi_family <= 0x11); 2884 case 61: 2885 case 62: 2886 case 63: 2887 case 64: 2888 case 65: 2889 case 66: 2890 case 68: 2891 case 69: 2892 case 70: 2893 case 71: 2894 return (B(eax)); 2895 case 72: 2896 return (SH_B0(eax)); 2897 case 74: 2898 return (B(eax)); 2899 case 75: 2900 return (cpi->cpi_family < 0x10); 2901 case 76: 2902 return (B(eax)); 2903 case 77: 2904 return (cpi->cpi_family <= 0x11); 2905 case 78: 2906 return (B(eax) || SH_C0(eax)); 2907 case 79: 2908 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2909 case 80: 2910 case 81: 2911 case 82: 2912 return (B(eax)); 2913 case 83: 2914 return (B(eax) || SH_C0(eax) || CG(eax)); 2915 case 85: 2916 return (cpi->cpi_family < 0x10); 2917 case 86: 2918 return (SH_C0(eax) || CG(eax)); 2919 case 88: 2920 #if !defined(__amd64) 2921 return (0); 2922 #else 2923 return (B(eax) || SH_C0(eax)); 2924 #endif 2925 case 89: 2926 return (cpi->cpi_family < 0x10); 2927 case 90: 2928 return (B(eax) || SH_C0(eax) || CG(eax)); 2929 case 91: 2930 case 92: 2931 return (B(eax) || SH_C0(eax)); 2932 case 93: 2933 return (SH_C0(eax)); 2934 case 94: 2935 return (B(eax) || SH_C0(eax) || CG(eax)); 2936 case 95: 2937 #if !defined(__amd64) 2938 return (0); 2939 #else 2940 return (B(eax) || SH_C0(eax)); 2941 #endif 2942 case 96: 2943 return (B(eax) || SH_C0(eax) || CG(eax)); 2944 case 97: 2945 case 98: 2946 return (SH_C0(eax) || CG(eax)); 2947 case 99: 2948 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2949 case 100: 2950 return (B(eax) || SH_C0(eax)); 2951 case 101: 2952 case 103: 2953 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2954 case 104: 2955 return (SH_C0(eax) || CG(eax) || D0(eax)); 2956 case 105: 2957 case 106: 2958 case 107: 2959 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2960 case 108: 2961 return (DH_CG(eax)); 2962 case 109: 2963 return (SH_C0(eax) || CG(eax) || D0(eax)); 2964 case 110: 2965 return (D0(eax) || EX(eax)); 2966 case 111: 2967 return (CG(eax)); 2968 case 112: 2969 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2970 case 113: 2971 return (eax == 0x20fc0); 2972 case 114: 2973 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2974 case 115: 2975 return (SH_E0(eax) || JH_E1(eax)); 2976 case 116: 2977 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2978 case 117: 2979 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2980 case 118: 2981 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2982 JH_E6(eax)); 2983 case 121: 2984 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2985 case 122: 2986 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2987 case 123: 2988 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2989 case 131: 2990 return (cpi->cpi_family < 0x10); 2991 case 6336786: 2992 /* 2993 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2994 * if this is a K8 family or newer processor 2995 */ 2996 if (CPI_FAMILY(cpi) == 0xf) { 2997 struct cpuid_regs regs; 2998 regs.cp_eax = 0x80000007; 2999 (void) __cpuid_insn(®s); 3000 return (!(regs.cp_edx & 0x100)); 3001 } 3002 return (0); 3003 case 6323525: 3004 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 3005 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 3006 3007 case 6671130: 3008 /* 3009 * check for processors (pre-Shanghai) that do not provide 3010 * optimal management of 1gb ptes in its tlb. 3011 */ 3012 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 3013 3014 case 298: 3015 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 3016 DR_B2(eax) || RB_C0(eax)); 3017 3018 default: 3019 return (-1); 3020 3021 } 3022 } 3023 3024 /* 3025 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 3026 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 3027 */ 3028 int 3029 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 3030 { 3031 struct cpuid_info *cpi; 3032 uint_t osvwid; 3033 static int osvwfeature = -1; 3034 uint64_t osvwlength; 3035 3036 3037 cpi = cpu->cpu_m.mcpu_cpi; 3038 3039 /* confirm OSVW supported */ 3040 if (osvwfeature == -1) { 3041 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 3042 } else { 3043 /* assert that osvw feature setting is consistent on all cpus */ 3044 ASSERT(osvwfeature == 3045 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 3046 } 3047 if (!osvwfeature) 3048 return (-1); 3049 3050 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 3051 3052 switch (erratum) { 3053 case 298: /* osvwid is 0 */ 3054 osvwid = 0; 3055 if (osvwlength <= (uint64_t)osvwid) { 3056 /* osvwid 0 is unknown */ 3057 return (-1); 3058 } 3059 3060 /* 3061 * Check the OSVW STATUS MSR to determine the state 3062 * of the erratum where: 3063 * 0 - fixed by HW 3064 * 1 - BIOS has applied the workaround when BIOS 3065 * workaround is available. (Or for other errata, 3066 * OS workaround is required.) 3067 * For a value of 1, caller will confirm that the 3068 * erratum 298 workaround has indeed been applied by BIOS. 3069 * 3070 * A 1 may be set in cpus that have a HW fix 3071 * in a mixed cpu system. Regarding erratum 298: 3072 * In a multiprocessor platform, the workaround above 3073 * should be applied to all processors regardless of 3074 * silicon revision when an affected processor is 3075 * present. 3076 */ 3077 3078 return (rdmsr(MSR_AMD_OSVW_STATUS + 3079 (osvwid / OSVW_ID_CNT_PER_MSR)) & 3080 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 3081 3082 default: 3083 return (-1); 3084 } 3085 } 3086 3087 static const char assoc_str[] = "associativity"; 3088 static const char line_str[] = "line-size"; 3089 static const char size_str[] = "size"; 3090 3091 static void 3092 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 3093 uint32_t val) 3094 { 3095 char buf[128]; 3096 3097 /* 3098 * ndi_prop_update_int() is used because it is desirable for 3099 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 3100 */ 3101 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 3102 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 3103 } 3104 3105 /* 3106 * Intel-style cache/tlb description 3107 * 3108 * Standard cpuid level 2 gives a randomly ordered 3109 * selection of tags that index into a table that describes 3110 * cache and tlb properties. 3111 */ 3112 3113 static const char l1_icache_str[] = "l1-icache"; 3114 static const char l1_dcache_str[] = "l1-dcache"; 3115 static const char l2_cache_str[] = "l2-cache"; 3116 static const char l3_cache_str[] = "l3-cache"; 3117 static const char itlb4k_str[] = "itlb-4K"; 3118 static const char dtlb4k_str[] = "dtlb-4K"; 3119 static const char itlb2M_str[] = "itlb-2M"; 3120 static const char itlb4M_str[] = "itlb-4M"; 3121 static const char dtlb4M_str[] = "dtlb-4M"; 3122 static const char dtlb24_str[] = "dtlb0-2M-4M"; 3123 static const char itlb424_str[] = "itlb-4K-2M-4M"; 3124 static const char itlb24_str[] = "itlb-2M-4M"; 3125 static const char dtlb44_str[] = "dtlb-4K-4M"; 3126 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 3127 static const char sl2_cache_str[] = "sectored-l2-cache"; 3128 static const char itrace_str[] = "itrace-cache"; 3129 static const char sl3_cache_str[] = "sectored-l3-cache"; 3130 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 3131 3132 static const struct cachetab { 3133 uint8_t ct_code; 3134 uint8_t ct_assoc; 3135 uint16_t ct_line_size; 3136 size_t ct_size; 3137 const char *ct_label; 3138 } intel_ctab[] = { 3139 /* 3140 * maintain descending order! 3141 * 3142 * Codes ignored - Reason 3143 * ---------------------- 3144 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3145 * f0H/f1H - Currently we do not interpret prefetch size by design 3146 */ 3147 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3148 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3149 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3150 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3151 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3152 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3153 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3154 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3155 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3156 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3157 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3158 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3159 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3160 { 0xc0, 4, 0, 8, dtlb44_str }, 3161 { 0xba, 4, 0, 64, dtlb4k_str }, 3162 { 0xb4, 4, 0, 256, dtlb4k_str }, 3163 { 0xb3, 4, 0, 128, dtlb4k_str }, 3164 { 0xb2, 4, 0, 64, itlb4k_str }, 3165 { 0xb0, 4, 0, 128, itlb4k_str }, 3166 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3167 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3168 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3169 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3170 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3171 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3172 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3173 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3174 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3175 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3176 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3177 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3178 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3179 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3180 { 0x73, 8, 0, 64*1024, itrace_str}, 3181 { 0x72, 8, 0, 32*1024, itrace_str}, 3182 { 0x71, 8, 0, 16*1024, itrace_str}, 3183 { 0x70, 8, 0, 12*1024, itrace_str}, 3184 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3185 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3186 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3187 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3188 { 0x5d, 0, 0, 256, dtlb44_str}, 3189 { 0x5c, 0, 0, 128, dtlb44_str}, 3190 { 0x5b, 0, 0, 64, dtlb44_str}, 3191 { 0x5a, 4, 0, 32, dtlb24_str}, 3192 { 0x59, 0, 0, 16, dtlb4k_str}, 3193 { 0x57, 4, 0, 16, dtlb4k_str}, 3194 { 0x56, 4, 0, 16, dtlb4M_str}, 3195 { 0x55, 0, 0, 7, itlb24_str}, 3196 { 0x52, 0, 0, 256, itlb424_str}, 3197 { 0x51, 0, 0, 128, itlb424_str}, 3198 { 0x50, 0, 0, 64, itlb424_str}, 3199 { 0x4f, 0, 0, 32, itlb4k_str}, 3200 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3201 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3202 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3203 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3204 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3205 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3206 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3207 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3208 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3209 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3210 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3211 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3212 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3213 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3214 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3215 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3216 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3217 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3218 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3219 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3220 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3221 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3222 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3223 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3224 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3225 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3226 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3227 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3228 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3229 { 0x0b, 4, 0, 4, itlb4M_str}, 3230 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3231 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3232 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3233 { 0x05, 4, 0, 32, dtlb4M_str}, 3234 { 0x04, 4, 0, 8, dtlb4M_str}, 3235 { 0x03, 4, 0, 64, dtlb4k_str}, 3236 { 0x02, 4, 0, 2, itlb4M_str}, 3237 { 0x01, 4, 0, 32, itlb4k_str}, 3238 { 0 } 3239 }; 3240 3241 static const struct cachetab cyrix_ctab[] = { 3242 { 0x70, 4, 0, 32, "tlb-4K" }, 3243 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3244 { 0 } 3245 }; 3246 3247 /* 3248 * Search a cache table for a matching entry 3249 */ 3250 static const struct cachetab * 3251 find_cacheent(const struct cachetab *ct, uint_t code) 3252 { 3253 if (code != 0) { 3254 for (; ct->ct_code != 0; ct++) 3255 if (ct->ct_code <= code) 3256 break; 3257 if (ct->ct_code == code) 3258 return (ct); 3259 } 3260 return (NULL); 3261 } 3262 3263 /* 3264 * Populate cachetab entry with L2 or L3 cache-information using 3265 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3266 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3267 * information is found. 3268 */ 3269 static int 3270 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3271 { 3272 uint32_t level, i; 3273 int ret = 0; 3274 3275 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3276 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3277 3278 if (level == 2 || level == 3) { 3279 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3280 ct->ct_line_size = 3281 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3282 ct->ct_size = ct->ct_assoc * 3283 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3284 ct->ct_line_size * 3285 (cpi->cpi_std_4[i]->cp_ecx + 1); 3286 3287 if (level == 2) { 3288 ct->ct_label = l2_cache_str; 3289 } else if (level == 3) { 3290 ct->ct_label = l3_cache_str; 3291 } 3292 ret = 1; 3293 } 3294 } 3295 3296 return (ret); 3297 } 3298 3299 /* 3300 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3301 * The walk is terminated if the walker returns non-zero. 3302 */ 3303 static void 3304 intel_walk_cacheinfo(struct cpuid_info *cpi, 3305 void *arg, int (*func)(void *, const struct cachetab *)) 3306 { 3307 const struct cachetab *ct; 3308 struct cachetab des_49_ct, des_b1_ct; 3309 uint8_t *dp; 3310 int i; 3311 3312 if ((dp = cpi->cpi_cacheinfo) == NULL) 3313 return; 3314 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3315 /* 3316 * For overloaded descriptor 0x49 we use cpuid function 4 3317 * if supported by the current processor, to create 3318 * cache information. 3319 * For overloaded descriptor 0xb1 we use X86_PAE flag 3320 * to disambiguate the cache information. 3321 */ 3322 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3323 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3324 ct = &des_49_ct; 3325 } else if (*dp == 0xb1) { 3326 des_b1_ct.ct_code = 0xb1; 3327 des_b1_ct.ct_assoc = 4; 3328 des_b1_ct.ct_line_size = 0; 3329 if (x86_feature & X86_PAE) { 3330 des_b1_ct.ct_size = 8; 3331 des_b1_ct.ct_label = itlb2M_str; 3332 } else { 3333 des_b1_ct.ct_size = 4; 3334 des_b1_ct.ct_label = itlb4M_str; 3335 } 3336 ct = &des_b1_ct; 3337 } else { 3338 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3339 continue; 3340 } 3341 } 3342 3343 if (func(arg, ct) != 0) { 3344 break; 3345 } 3346 } 3347 } 3348 3349 /* 3350 * (Like the Intel one, except for Cyrix CPUs) 3351 */ 3352 static void 3353 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3354 void *arg, int (*func)(void *, const struct cachetab *)) 3355 { 3356 const struct cachetab *ct; 3357 uint8_t *dp; 3358 int i; 3359 3360 if ((dp = cpi->cpi_cacheinfo) == NULL) 3361 return; 3362 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3363 /* 3364 * Search Cyrix-specific descriptor table first .. 3365 */ 3366 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3367 if (func(arg, ct) != 0) 3368 break; 3369 continue; 3370 } 3371 /* 3372 * .. else fall back to the Intel one 3373 */ 3374 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3375 if (func(arg, ct) != 0) 3376 break; 3377 continue; 3378 } 3379 } 3380 } 3381 3382 /* 3383 * A cacheinfo walker that adds associativity, line-size, and size properties 3384 * to the devinfo node it is passed as an argument. 3385 */ 3386 static int 3387 add_cacheent_props(void *arg, const struct cachetab *ct) 3388 { 3389 dev_info_t *devi = arg; 3390 3391 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3392 if (ct->ct_line_size != 0) 3393 add_cache_prop(devi, ct->ct_label, line_str, 3394 ct->ct_line_size); 3395 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3396 return (0); 3397 } 3398 3399 3400 static const char fully_assoc[] = "fully-associative?"; 3401 3402 /* 3403 * AMD style cache/tlb description 3404 * 3405 * Extended functions 5 and 6 directly describe properties of 3406 * tlbs and various cache levels. 3407 */ 3408 static void 3409 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3410 { 3411 switch (assoc) { 3412 case 0: /* reserved; ignore */ 3413 break; 3414 default: 3415 add_cache_prop(devi, label, assoc_str, assoc); 3416 break; 3417 case 0xff: 3418 add_cache_prop(devi, label, fully_assoc, 1); 3419 break; 3420 } 3421 } 3422 3423 static void 3424 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3425 { 3426 if (size == 0) 3427 return; 3428 add_cache_prop(devi, label, size_str, size); 3429 add_amd_assoc(devi, label, assoc); 3430 } 3431 3432 static void 3433 add_amd_cache(dev_info_t *devi, const char *label, 3434 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3435 { 3436 if (size == 0 || line_size == 0) 3437 return; 3438 add_amd_assoc(devi, label, assoc); 3439 /* 3440 * Most AMD parts have a sectored cache. Multiple cache lines are 3441 * associated with each tag. A sector consists of all cache lines 3442 * associated with a tag. For example, the AMD K6-III has a sector 3443 * size of 2 cache lines per tag. 3444 */ 3445 if (lines_per_tag != 0) 3446 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3447 add_cache_prop(devi, label, line_str, line_size); 3448 add_cache_prop(devi, label, size_str, size * 1024); 3449 } 3450 3451 static void 3452 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3453 { 3454 switch (assoc) { 3455 case 0: /* off */ 3456 break; 3457 case 1: 3458 case 2: 3459 case 4: 3460 add_cache_prop(devi, label, assoc_str, assoc); 3461 break; 3462 case 6: 3463 add_cache_prop(devi, label, assoc_str, 8); 3464 break; 3465 case 8: 3466 add_cache_prop(devi, label, assoc_str, 16); 3467 break; 3468 case 0xf: 3469 add_cache_prop(devi, label, fully_assoc, 1); 3470 break; 3471 default: /* reserved; ignore */ 3472 break; 3473 } 3474 } 3475 3476 static void 3477 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3478 { 3479 if (size == 0 || assoc == 0) 3480 return; 3481 add_amd_l2_assoc(devi, label, assoc); 3482 add_cache_prop(devi, label, size_str, size); 3483 } 3484 3485 static void 3486 add_amd_l2_cache(dev_info_t *devi, const char *label, 3487 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3488 { 3489 if (size == 0 || assoc == 0 || line_size == 0) 3490 return; 3491 add_amd_l2_assoc(devi, label, assoc); 3492 if (lines_per_tag != 0) 3493 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3494 add_cache_prop(devi, label, line_str, line_size); 3495 add_cache_prop(devi, label, size_str, size * 1024); 3496 } 3497 3498 static void 3499 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3500 { 3501 struct cpuid_regs *cp; 3502 3503 if (cpi->cpi_xmaxeax < 0x80000005) 3504 return; 3505 cp = &cpi->cpi_extd[5]; 3506 3507 /* 3508 * 4M/2M L1 TLB configuration 3509 * 3510 * We report the size for 2M pages because AMD uses two 3511 * TLB entries for one 4M page. 3512 */ 3513 add_amd_tlb(devi, "dtlb-2M", 3514 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3515 add_amd_tlb(devi, "itlb-2M", 3516 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3517 3518 /* 3519 * 4K L1 TLB configuration 3520 */ 3521 3522 switch (cpi->cpi_vendor) { 3523 uint_t nentries; 3524 case X86_VENDOR_TM: 3525 if (cpi->cpi_family >= 5) { 3526 /* 3527 * Crusoe processors have 256 TLB entries, but 3528 * cpuid data format constrains them to only 3529 * reporting 255 of them. 3530 */ 3531 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3532 nentries = 256; 3533 /* 3534 * Crusoe processors also have a unified TLB 3535 */ 3536 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3537 nentries); 3538 break; 3539 } 3540 /*FALLTHROUGH*/ 3541 default: 3542 add_amd_tlb(devi, itlb4k_str, 3543 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3544 add_amd_tlb(devi, dtlb4k_str, 3545 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3546 break; 3547 } 3548 3549 /* 3550 * data L1 cache configuration 3551 */ 3552 3553 add_amd_cache(devi, l1_dcache_str, 3554 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3555 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3556 3557 /* 3558 * code L1 cache configuration 3559 */ 3560 3561 add_amd_cache(devi, l1_icache_str, 3562 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3563 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3564 3565 if (cpi->cpi_xmaxeax < 0x80000006) 3566 return; 3567 cp = &cpi->cpi_extd[6]; 3568 3569 /* Check for a unified L2 TLB for large pages */ 3570 3571 if (BITX(cp->cp_eax, 31, 16) == 0) 3572 add_amd_l2_tlb(devi, "l2-tlb-2M", 3573 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3574 else { 3575 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3576 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3577 add_amd_l2_tlb(devi, "l2-itlb-2M", 3578 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3579 } 3580 3581 /* Check for a unified L2 TLB for 4K pages */ 3582 3583 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3584 add_amd_l2_tlb(devi, "l2-tlb-4K", 3585 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3586 } else { 3587 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3588 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3589 add_amd_l2_tlb(devi, "l2-itlb-4K", 3590 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3591 } 3592 3593 add_amd_l2_cache(devi, l2_cache_str, 3594 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3595 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3596 } 3597 3598 /* 3599 * There are two basic ways that the x86 world describes it cache 3600 * and tlb architecture - Intel's way and AMD's way. 3601 * 3602 * Return which flavor of cache architecture we should use 3603 */ 3604 static int 3605 x86_which_cacheinfo(struct cpuid_info *cpi) 3606 { 3607 switch (cpi->cpi_vendor) { 3608 case X86_VENDOR_Intel: 3609 if (cpi->cpi_maxeax >= 2) 3610 return (X86_VENDOR_Intel); 3611 break; 3612 case X86_VENDOR_AMD: 3613 /* 3614 * The K5 model 1 was the first part from AMD that reported 3615 * cache sizes via extended cpuid functions. 3616 */ 3617 if (cpi->cpi_family > 5 || 3618 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3619 return (X86_VENDOR_AMD); 3620 break; 3621 case X86_VENDOR_TM: 3622 if (cpi->cpi_family >= 5) 3623 return (X86_VENDOR_AMD); 3624 /*FALLTHROUGH*/ 3625 default: 3626 /* 3627 * If they have extended CPU data for 0x80000005 3628 * then we assume they have AMD-format cache 3629 * information. 3630 * 3631 * If not, and the vendor happens to be Cyrix, 3632 * then try our-Cyrix specific handler. 3633 * 3634 * If we're not Cyrix, then assume we're using Intel's 3635 * table-driven format instead. 3636 */ 3637 if (cpi->cpi_xmaxeax >= 0x80000005) 3638 return (X86_VENDOR_AMD); 3639 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3640 return (X86_VENDOR_Cyrix); 3641 else if (cpi->cpi_maxeax >= 2) 3642 return (X86_VENDOR_Intel); 3643 break; 3644 } 3645 return (-1); 3646 } 3647 3648 void 3649 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 3650 struct cpuid_info *cpi) 3651 { 3652 dev_info_t *cpu_devi; 3653 int create; 3654 3655 cpu_devi = (dev_info_t *)dip; 3656 3657 /* device_type */ 3658 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3659 "device_type", "cpu"); 3660 3661 /* reg */ 3662 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3663 "reg", cpu_id); 3664 3665 /* cpu-mhz, and clock-frequency */ 3666 if (cpu_freq > 0) { 3667 long long mul; 3668 3669 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3670 "cpu-mhz", cpu_freq); 3671 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3672 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3673 "clock-frequency", (int)mul); 3674 } 3675 3676 if ((x86_feature & X86_CPUID) == 0) { 3677 return; 3678 } 3679 3680 /* vendor-id */ 3681 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3682 "vendor-id", cpi->cpi_vendorstr); 3683 3684 if (cpi->cpi_maxeax == 0) { 3685 return; 3686 } 3687 3688 /* 3689 * family, model, and step 3690 */ 3691 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3692 "family", CPI_FAMILY(cpi)); 3693 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3694 "cpu-model", CPI_MODEL(cpi)); 3695 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3696 "stepping-id", CPI_STEP(cpi)); 3697 3698 /* type */ 3699 switch (cpi->cpi_vendor) { 3700 case X86_VENDOR_Intel: 3701 create = 1; 3702 break; 3703 default: 3704 create = 0; 3705 break; 3706 } 3707 if (create) 3708 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3709 "type", CPI_TYPE(cpi)); 3710 3711 /* ext-family */ 3712 switch (cpi->cpi_vendor) { 3713 case X86_VENDOR_Intel: 3714 case X86_VENDOR_AMD: 3715 create = cpi->cpi_family >= 0xf; 3716 break; 3717 default: 3718 create = 0; 3719 break; 3720 } 3721 if (create) 3722 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3723 "ext-family", CPI_FAMILY_XTD(cpi)); 3724 3725 /* ext-model */ 3726 switch (cpi->cpi_vendor) { 3727 case X86_VENDOR_Intel: 3728 create = IS_EXTENDED_MODEL_INTEL(cpi); 3729 break; 3730 case X86_VENDOR_AMD: 3731 create = CPI_FAMILY(cpi) == 0xf; 3732 break; 3733 default: 3734 create = 0; 3735 break; 3736 } 3737 if (create) 3738 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3739 "ext-model", CPI_MODEL_XTD(cpi)); 3740 3741 /* generation */ 3742 switch (cpi->cpi_vendor) { 3743 case X86_VENDOR_AMD: 3744 /* 3745 * AMD K5 model 1 was the first part to support this 3746 */ 3747 create = cpi->cpi_xmaxeax >= 0x80000001; 3748 break; 3749 default: 3750 create = 0; 3751 break; 3752 } 3753 if (create) 3754 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3755 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3756 3757 /* brand-id */ 3758 switch (cpi->cpi_vendor) { 3759 case X86_VENDOR_Intel: 3760 /* 3761 * brand id first appeared on Pentium III Xeon model 8, 3762 * and Celeron model 8 processors and Opteron 3763 */ 3764 create = cpi->cpi_family > 6 || 3765 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3766 break; 3767 case X86_VENDOR_AMD: 3768 create = cpi->cpi_family >= 0xf; 3769 break; 3770 default: 3771 create = 0; 3772 break; 3773 } 3774 if (create && cpi->cpi_brandid != 0) { 3775 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3776 "brand-id", cpi->cpi_brandid); 3777 } 3778 3779 /* chunks, and apic-id */ 3780 switch (cpi->cpi_vendor) { 3781 /* 3782 * first available on Pentium IV and Opteron (K8) 3783 */ 3784 case X86_VENDOR_Intel: 3785 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3786 break; 3787 case X86_VENDOR_AMD: 3788 create = cpi->cpi_family >= 0xf; 3789 break; 3790 default: 3791 create = 0; 3792 break; 3793 } 3794 if (create) { 3795 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3796 "chunks", CPI_CHUNKS(cpi)); 3797 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3798 "apic-id", cpi->cpi_apicid); 3799 if (cpi->cpi_chipid >= 0) { 3800 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3801 "chip#", cpi->cpi_chipid); 3802 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3803 "clog#", cpi->cpi_clogid); 3804 } 3805 } 3806 3807 /* cpuid-features */ 3808 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3809 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3810 3811 3812 /* cpuid-features-ecx */ 3813 switch (cpi->cpi_vendor) { 3814 case X86_VENDOR_Intel: 3815 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3816 break; 3817 default: 3818 create = 0; 3819 break; 3820 } 3821 if (create) 3822 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3823 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3824 3825 /* ext-cpuid-features */ 3826 switch (cpi->cpi_vendor) { 3827 case X86_VENDOR_Intel: 3828 case X86_VENDOR_AMD: 3829 case X86_VENDOR_Cyrix: 3830 case X86_VENDOR_TM: 3831 case X86_VENDOR_Centaur: 3832 create = cpi->cpi_xmaxeax >= 0x80000001; 3833 break; 3834 default: 3835 create = 0; 3836 break; 3837 } 3838 if (create) { 3839 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3840 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3841 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3842 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3843 } 3844 3845 /* 3846 * Brand String first appeared in Intel Pentium IV, AMD K5 3847 * model 1, and Cyrix GXm. On earlier models we try and 3848 * simulate something similar .. so this string should always 3849 * same -something- about the processor, however lame. 3850 */ 3851 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3852 "brand-string", cpi->cpi_brandstr); 3853 3854 /* 3855 * Finally, cache and tlb information 3856 */ 3857 switch (x86_which_cacheinfo(cpi)) { 3858 case X86_VENDOR_Intel: 3859 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3860 break; 3861 case X86_VENDOR_Cyrix: 3862 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3863 break; 3864 case X86_VENDOR_AMD: 3865 amd_cache_info(cpi, cpu_devi); 3866 break; 3867 default: 3868 break; 3869 } 3870 } 3871 3872 struct l2info { 3873 int *l2i_csz; 3874 int *l2i_lsz; 3875 int *l2i_assoc; 3876 int l2i_ret; 3877 }; 3878 3879 /* 3880 * A cacheinfo walker that fetches the size, line-size and associativity 3881 * of the L2 cache 3882 */ 3883 static int 3884 intel_l2cinfo(void *arg, const struct cachetab *ct) 3885 { 3886 struct l2info *l2i = arg; 3887 int *ip; 3888 3889 if (ct->ct_label != l2_cache_str && 3890 ct->ct_label != sl2_cache_str) 3891 return (0); /* not an L2 -- keep walking */ 3892 3893 if ((ip = l2i->l2i_csz) != NULL) 3894 *ip = ct->ct_size; 3895 if ((ip = l2i->l2i_lsz) != NULL) 3896 *ip = ct->ct_line_size; 3897 if ((ip = l2i->l2i_assoc) != NULL) 3898 *ip = ct->ct_assoc; 3899 l2i->l2i_ret = ct->ct_size; 3900 return (1); /* was an L2 -- terminate walk */ 3901 } 3902 3903 /* 3904 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3905 * 3906 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3907 * value is the associativity, the associativity for the L2 cache and 3908 * tlb is encoded in the following table. The 4 bit L2 value serves as 3909 * an index into the amd_afd[] array to determine the associativity. 3910 * -1 is undefined. 0 is fully associative. 3911 */ 3912 3913 static int amd_afd[] = 3914 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3915 3916 static void 3917 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3918 { 3919 struct cpuid_regs *cp; 3920 uint_t size, assoc; 3921 int i; 3922 int *ip; 3923 3924 if (cpi->cpi_xmaxeax < 0x80000006) 3925 return; 3926 cp = &cpi->cpi_extd[6]; 3927 3928 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3929 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3930 uint_t cachesz = size * 1024; 3931 assoc = amd_afd[i]; 3932 3933 ASSERT(assoc != -1); 3934 3935 if ((ip = l2i->l2i_csz) != NULL) 3936 *ip = cachesz; 3937 if ((ip = l2i->l2i_lsz) != NULL) 3938 *ip = BITX(cp->cp_ecx, 7, 0); 3939 if ((ip = l2i->l2i_assoc) != NULL) 3940 *ip = assoc; 3941 l2i->l2i_ret = cachesz; 3942 } 3943 } 3944 3945 int 3946 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3947 { 3948 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3949 struct l2info __l2info, *l2i = &__l2info; 3950 3951 l2i->l2i_csz = csz; 3952 l2i->l2i_lsz = lsz; 3953 l2i->l2i_assoc = assoc; 3954 l2i->l2i_ret = -1; 3955 3956 switch (x86_which_cacheinfo(cpi)) { 3957 case X86_VENDOR_Intel: 3958 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3959 break; 3960 case X86_VENDOR_Cyrix: 3961 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3962 break; 3963 case X86_VENDOR_AMD: 3964 amd_l2cacheinfo(cpi, l2i); 3965 break; 3966 default: 3967 break; 3968 } 3969 return (l2i->l2i_ret); 3970 } 3971 3972 #if !defined(__xpv) 3973 3974 uint32_t * 3975 cpuid_mwait_alloc(cpu_t *cpu) 3976 { 3977 uint32_t *ret; 3978 size_t mwait_size; 3979 3980 ASSERT(cpuid_checkpass(cpu, 2)); 3981 3982 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3983 if (mwait_size == 0) 3984 return (NULL); 3985 3986 /* 3987 * kmem_alloc() returns cache line size aligned data for mwait_size 3988 * allocations. mwait_size is currently cache line sized. Neither 3989 * of these implementation details are guarantied to be true in the 3990 * future. 3991 * 3992 * First try allocating mwait_size as kmem_alloc() currently returns 3993 * correctly aligned memory. If kmem_alloc() does not return 3994 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3995 * 3996 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3997 * decide to free this memory. 3998 */ 3999 ret = kmem_zalloc(mwait_size, KM_SLEEP); 4000 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 4001 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4002 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 4003 *ret = MWAIT_RUNNING; 4004 return (ret); 4005 } else { 4006 kmem_free(ret, mwait_size); 4007 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 4008 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4009 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 4010 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 4011 *ret = MWAIT_RUNNING; 4012 return (ret); 4013 } 4014 } 4015 4016 void 4017 cpuid_mwait_free(cpu_t *cpu) 4018 { 4019 ASSERT(cpuid_checkpass(cpu, 2)); 4020 4021 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 4022 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 4023 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 4024 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 4025 } 4026 4027 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 4028 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 4029 } 4030 4031 void 4032 patch_tsc_read(int flag) 4033 { 4034 size_t cnt; 4035 4036 switch (flag) { 4037 case X86_NO_TSC: 4038 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 4039 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 4040 break; 4041 case X86_HAVE_TSCP: 4042 cnt = &_tscp_end - &_tscp_start; 4043 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 4044 break; 4045 case X86_TSC_MFENCE: 4046 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 4047 (void) memcpy((void *)tsc_read, 4048 (void *)&_tsc_mfence_start, cnt); 4049 break; 4050 case X86_TSC_LFENCE: 4051 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 4052 (void) memcpy((void *)tsc_read, 4053 (void *)&_tsc_lfence_start, cnt); 4054 break; 4055 default: 4056 break; 4057 } 4058 } 4059 4060 int 4061 cpuid_deep_cstates_supported(void) 4062 { 4063 struct cpuid_info *cpi; 4064 struct cpuid_regs regs; 4065 4066 ASSERT(cpuid_checkpass(CPU, 1)); 4067 4068 cpi = CPU->cpu_m.mcpu_cpi; 4069 4070 if (!(x86_feature & X86_CPUID)) 4071 return (0); 4072 4073 switch (cpi->cpi_vendor) { 4074 case X86_VENDOR_Intel: 4075 if (cpi->cpi_xmaxeax < 0x80000007) 4076 return (0); 4077 4078 /* 4079 * TSC run at a constant rate in all ACPI C-states? 4080 */ 4081 regs.cp_eax = 0x80000007; 4082 (void) __cpuid_insn(®s); 4083 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 4084 4085 default: 4086 return (0); 4087 } 4088 } 4089 4090 #endif /* !__xpv */ 4091 4092 void 4093 post_startup_cpu_fixups(void) 4094 { 4095 #ifndef __xpv 4096 /* 4097 * Some AMD processors support C1E state. Entering this state will 4098 * cause the local APIC timer to stop, which we can't deal with at 4099 * this time. 4100 */ 4101 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 4102 on_trap_data_t otd; 4103 uint64_t reg; 4104 4105 if (!on_trap(&otd, OT_DATA_ACCESS)) { 4106 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 4107 /* Disable C1E state if it is enabled by BIOS */ 4108 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 4109 AMD_ACTONCMPHALT_MASK) { 4110 reg &= ~(AMD_ACTONCMPHALT_MASK << 4111 AMD_ACTONCMPHALT_SHIFT); 4112 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 4113 } 4114 } 4115 no_trap(); 4116 } 4117 #endif /* !__xpv */ 4118 } 4119 4120 /* 4121 * Starting with the Westmere processor the local 4122 * APIC timer will continue running in all C-states, 4123 * including the deepest C-states. 4124 */ 4125 int 4126 cpuid_arat_supported(void) 4127 { 4128 struct cpuid_info *cpi; 4129 struct cpuid_regs regs; 4130 4131 ASSERT(cpuid_checkpass(CPU, 1)); 4132 ASSERT(x86_feature & X86_CPUID); 4133 4134 cpi = CPU->cpu_m.mcpu_cpi; 4135 4136 switch (cpi->cpi_vendor) { 4137 case X86_VENDOR_Intel: 4138 /* 4139 * Always-running Local APIC Timer is 4140 * indicated by CPUID.6.EAX[2]. 4141 */ 4142 if (cpi->cpi_maxeax >= 6) { 4143 regs.cp_eax = 6; 4144 (void) cpuid_insn(NULL, ®s); 4145 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4146 } else { 4147 return (0); 4148 } 4149 default: 4150 return (0); 4151 } 4152 } 4153 4154 /* 4155 * Check support for Intel ENERGY_PERF_BIAS feature 4156 */ 4157 int 4158 cpuid_iepb_supported(struct cpu *cp) 4159 { 4160 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi; 4161 struct cpuid_regs regs; 4162 4163 ASSERT(cpuid_checkpass(cp, 1)); 4164 4165 if (!(x86_feature & X86_CPUID) || !(x86_feature & X86_MSR)) { 4166 return (0); 4167 } 4168 4169 /* 4170 * Intel ENERGY_PERF_BIAS MSR is indicated by 4171 * capability bit CPUID.6.ECX.3 4172 */ 4173 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6)) 4174 return (0); 4175 4176 regs.cp_eax = 0x6; 4177 (void) cpuid_insn(NULL, ®s); 4178 return (regs.cp_ecx & CPUID_EPB_SUPPORT); 4179 } 4180 4181 #if defined(__amd64) && !defined(__xpv) 4182 /* 4183 * Patch in versions of bcopy for high performance Intel Nhm processors 4184 * and later... 4185 */ 4186 void 4187 patch_memops(uint_t vendor) 4188 { 4189 size_t cnt, i; 4190 caddr_t to, from; 4191 4192 if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) { 4193 cnt = &bcopy_patch_end - &bcopy_patch_start; 4194 to = &bcopy_ck_size; 4195 from = &bcopy_patch_start; 4196 for (i = 0; i < cnt; i++) { 4197 *to++ = *from++; 4198 } 4199 } 4200 } 4201 #endif /* __amd64 && !__xpv */ 4202