1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2009, Intel Corporation. 26 * All rights reserved. 27 */ 28 /* 29 * Portions Copyright 2009 Advanced Micro Devices, Inc. 30 */ 31 32 /* 33 * Various routines to handle identification 34 * and classification of x86 processors. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/archsystm.h> 39 #include <sys/x86_archext.h> 40 #include <sys/kmem.h> 41 #include <sys/systm.h> 42 #include <sys/cmn_err.h> 43 #include <sys/sunddi.h> 44 #include <sys/sunndi.h> 45 #include <sys/cpuvar.h> 46 #include <sys/processor.h> 47 #include <sys/sysmacros.h> 48 #include <sys/pg.h> 49 #include <sys/fp.h> 50 #include <sys/controlregs.h> 51 #include <sys/auxv_386.h> 52 #include <sys/bitmap.h> 53 #include <sys/memnode.h> 54 #include <sys/pci_cfgspace.h> 55 56 #ifdef __xpv 57 #include <sys/hypervisor.h> 58 #else 59 #include <sys/ontrap.h> 60 #endif 61 62 /* 63 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 64 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 65 * them accordingly. For most modern processors, feature detection occurs here 66 * in pass 1. 67 * 68 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 69 * for the boot CPU and does the basic analysis that the early kernel needs. 70 * x86_feature is set based on the return value of cpuid_pass1() of the boot 71 * CPU. 72 * 73 * Pass 1 includes: 74 * 75 * o Determining vendor/model/family/stepping and setting x86_type and 76 * x86_vendor accordingly. 77 * o Processing the feature flags returned by the cpuid instruction while 78 * applying any workarounds or tricks for the specific processor. 79 * o Mapping the feature flags into Solaris feature bits (X86_*). 80 * o Processing extended feature flags if supported by the processor, 81 * again while applying specific processor knowledge. 82 * o Determining the CMT characteristics of the system. 83 * 84 * Pass 1 is done on non-boot CPUs during their initialization and the results 85 * are used only as a meager attempt at ensuring that all processors within the 86 * system support the same features. 87 * 88 * Pass 2 of cpuid feature analysis happens just at the beginning 89 * of startup(). It just copies in and corrects the remainder 90 * of the cpuid data we depend on: standard cpuid functions that we didn't 91 * need for pass1 feature analysis, and extended cpuid functions beyond the 92 * simple feature processing done in pass1. 93 * 94 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 95 * particular kernel memory allocation has been made available. It creates a 96 * readable brand string based on the data collected in the first two passes. 97 * 98 * Pass 4 of cpuid analysis is invoked after post_startup() when all 99 * the support infrastructure for various hardware features has been 100 * initialized. It determines which processor features will be reported 101 * to userland via the aux vector. 102 * 103 * All passes are executed on all CPUs, but only the boot CPU determines what 104 * features the kernel will use. 105 * 106 * Much of the worst junk in this file is for the support of processors 107 * that didn't really implement the cpuid instruction properly. 108 * 109 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 110 * the pass numbers. Accordingly, changes to the pass code may require changes 111 * to the accessor code. 112 */ 113 114 uint_t x86_feature = 0; 115 uint_t x86_vendor = X86_VENDOR_IntelClone; 116 uint_t x86_type = X86_TYPE_OTHER; 117 uint_t x86_clflush_size = 0; 118 119 uint_t pentiumpro_bug4046376; 120 uint_t pentiumpro_bug4064495; 121 122 uint_t enable486; 123 /* 124 * This is set to platform type Solaris is running on. 125 */ 126 static int platform_type = -1; 127 128 #if !defined(__xpv) 129 /* 130 * Variable to patch if hypervisor platform detection needs to be 131 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 132 */ 133 int enable_platform_detection = 1; 134 #endif 135 136 /* 137 * monitor/mwait info. 138 * 139 * size_actual and buf_actual are the real address and size allocated to get 140 * proper mwait_buf alignement. buf_actual and size_actual should be passed 141 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 142 * processor cache-line alignment, but this is not guarantied in the furture. 143 */ 144 struct mwait_info { 145 size_t mon_min; /* min size to avoid missed wakeups */ 146 size_t mon_max; /* size to avoid false wakeups */ 147 size_t size_actual; /* size actually allocated */ 148 void *buf_actual; /* memory actually allocated */ 149 uint32_t support; /* processor support of monitor/mwait */ 150 }; 151 152 /* 153 * These constants determine how many of the elements of the 154 * cpuid we cache in the cpuid_info data structure; the 155 * remaining elements are accessible via the cpuid instruction. 156 */ 157 158 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 159 #define NMAX_CPI_EXTD 0x1c /* eax = 0x80000000 .. 0x8000001b */ 160 161 /* 162 * Some terminology needs to be explained: 163 * - Socket: Something that can be plugged into a motherboard. 164 * - Package: Same as socket 165 * - Chip: Same as socket. Note that AMD's documentation uses term "chip" 166 * differently: there, chip is the same as processor node (below) 167 * - Processor node: Some AMD processors have more than one 168 * "subprocessor" embedded in a package. These subprocessors (nodes) 169 * are fully-functional processors themselves with cores, caches, 170 * memory controllers, PCI configuration spaces. They are connected 171 * inside the package with Hypertransport links. On single-node 172 * processors, processor node is equivalent to chip/socket/package. 173 */ 174 175 struct cpuid_info { 176 uint_t cpi_pass; /* last pass completed */ 177 /* 178 * standard function information 179 */ 180 uint_t cpi_maxeax; /* fn 0: %eax */ 181 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 182 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 183 184 uint_t cpi_family; /* fn 1: extended family */ 185 uint_t cpi_model; /* fn 1: extended model */ 186 uint_t cpi_step; /* fn 1: stepping */ 187 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */ 188 /* AMD: package/socket # */ 189 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 190 int cpi_clogid; /* fn 1: %ebx: thread # */ 191 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 192 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 193 uint_t cpi_ncache; /* fn 2: number of elements */ 194 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 195 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 196 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 197 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 198 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 199 /* 200 * extended function information 201 */ 202 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 203 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 204 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 205 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 206 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */ 207 208 id_t cpi_coreid; /* same coreid => strands share core */ 209 int cpi_pkgcoreid; /* core number within single package */ 210 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 211 /* Intel: fn 4: %eax[31-26] */ 212 /* 213 * supported feature information 214 */ 215 uint32_t cpi_support[5]; 216 #define STD_EDX_FEATURES 0 217 #define AMD_EDX_FEATURES 1 218 #define TM_EDX_FEATURES 2 219 #define STD_ECX_FEATURES 3 220 #define AMD_ECX_FEATURES 4 221 /* 222 * Synthesized information, where known. 223 */ 224 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 225 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 226 uint32_t cpi_socket; /* Chip package/socket type */ 227 228 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 229 uint32_t cpi_apicid; 230 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ 231 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ 232 /* Intel: 1 */ 233 }; 234 235 236 static struct cpuid_info cpuid_info0; 237 238 /* 239 * These bit fields are defined by the Intel Application Note AP-485 240 * "Intel Processor Identification and the CPUID Instruction" 241 */ 242 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 243 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 244 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 245 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 246 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 247 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 248 249 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 250 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 251 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 252 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 253 254 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 255 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 256 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 257 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 258 259 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 260 #define CPI_XMAXEAX_MAX 0x80000100 261 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 262 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 263 264 /* 265 * Function 4 (Deterministic Cache Parameters) macros 266 * Defined by Intel Application Note AP-485 267 */ 268 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 269 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 270 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 271 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 272 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 273 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 274 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 275 276 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 277 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 278 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 279 280 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 281 282 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 283 284 285 /* 286 * A couple of shorthand macros to identify "later" P6-family chips 287 * like the Pentium M and Core. First, the "older" P6-based stuff 288 * (loosely defined as "pre-Pentium-4"): 289 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 290 */ 291 292 #define IS_LEGACY_P6(cpi) ( \ 293 cpi->cpi_family == 6 && \ 294 (cpi->cpi_model == 1 || \ 295 cpi->cpi_model == 3 || \ 296 cpi->cpi_model == 5 || \ 297 cpi->cpi_model == 6 || \ 298 cpi->cpi_model == 7 || \ 299 cpi->cpi_model == 8 || \ 300 cpi->cpi_model == 0xA || \ 301 cpi->cpi_model == 0xB) \ 302 ) 303 304 /* A "new F6" is everything with family 6 that's not the above */ 305 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 306 307 /* Extended family/model support */ 308 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 309 cpi->cpi_family >= 0xf) 310 311 /* 312 * Info for monitor/mwait idle loop. 313 * 314 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 315 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 316 * 2006. 317 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 318 * Documentation Updates" #33633, Rev 2.05, December 2006. 319 */ 320 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 321 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 322 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 323 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 324 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 325 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 326 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 327 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 328 /* 329 * Number of sub-cstates for a given c-state. 330 */ 331 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 332 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 333 334 /* 335 * Functions we consune from cpuid_subr.c; don't publish these in a header 336 * file to try and keep people using the expected cpuid_* interfaces. 337 */ 338 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 339 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 340 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 341 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 342 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 343 344 /* 345 * Apply up various platform-dependent restrictions where the 346 * underlying platform restrictions mean the CPU can be marked 347 * as less capable than its cpuid instruction would imply. 348 */ 349 #if defined(__xpv) 350 static void 351 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 352 { 353 switch (eax) { 354 case 1: { 355 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 356 0 : CPUID_INTC_EDX_MCA; 357 cp->cp_edx &= 358 ~(mcamask | 359 CPUID_INTC_EDX_PSE | 360 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 361 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 362 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 363 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 364 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 365 break; 366 } 367 368 case 0x80000001: 369 cp->cp_edx &= 370 ~(CPUID_AMD_EDX_PSE | 371 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 372 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 373 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 374 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 375 CPUID_AMD_EDX_TSCP); 376 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 377 break; 378 default: 379 break; 380 } 381 382 switch (vendor) { 383 case X86_VENDOR_Intel: 384 switch (eax) { 385 case 4: 386 /* 387 * Zero out the (ncores-per-chip - 1) field 388 */ 389 cp->cp_eax &= 0x03fffffff; 390 break; 391 default: 392 break; 393 } 394 break; 395 case X86_VENDOR_AMD: 396 switch (eax) { 397 398 case 0x80000001: 399 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 400 break; 401 402 case 0x80000008: 403 /* 404 * Zero out the (ncores-per-chip - 1) field 405 */ 406 cp->cp_ecx &= 0xffffff00; 407 break; 408 default: 409 break; 410 } 411 break; 412 default: 413 break; 414 } 415 } 416 #else 417 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 418 #endif 419 420 /* 421 * Some undocumented ways of patching the results of the cpuid 422 * instruction to permit running Solaris 10 on future cpus that 423 * we don't currently support. Could be set to non-zero values 424 * via settings in eeprom. 425 */ 426 427 uint32_t cpuid_feature_ecx_include; 428 uint32_t cpuid_feature_ecx_exclude; 429 uint32_t cpuid_feature_edx_include; 430 uint32_t cpuid_feature_edx_exclude; 431 432 /* 433 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs. 434 */ 435 void 436 cpuid_alloc_space(cpu_t *cpu) 437 { 438 /* 439 * By convention, cpu0 is the boot cpu, which is set up 440 * before memory allocation is available. All other cpus get 441 * their cpuid_info struct allocated here. 442 */ 443 ASSERT(cpu->cpu_id != 0); 444 ASSERT(cpu->cpu_m.mcpu_cpi == NULL); 445 cpu->cpu_m.mcpu_cpi = 446 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 447 } 448 449 void 450 cpuid_free_space(cpu_t *cpu) 451 { 452 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 453 int i; 454 455 ASSERT(cpi != NULL); 456 ASSERT(cpi != &cpuid_info0); 457 458 /* 459 * Free up any function 4 related dynamic storage 460 */ 461 for (i = 1; i < cpi->cpi_std_4_size; i++) 462 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 463 if (cpi->cpi_std_4_size > 0) 464 kmem_free(cpi->cpi_std_4, 465 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 466 467 kmem_free(cpi, sizeof (*cpi)); 468 cpu->cpu_m.mcpu_cpi = NULL; 469 } 470 471 #if !defined(__xpv) 472 473 static void 474 determine_platform() 475 { 476 struct cpuid_regs cp; 477 char *xen_str; 478 uint32_t xen_signature[4], base; 479 480 platform_type = HW_NATIVE; 481 482 if (!enable_platform_detection) 483 return; 484 485 /* 486 * In a fully virtualized domain, Xen's pseudo-cpuid function 487 * returns a string representing the Xen signature in %ebx, %ecx, 488 * and %edx. %eax contains the maximum supported cpuid function. 489 * We need at least a (base + 2) leaf value to do what we want 490 * to do. Try different base values, since the hypervisor might 491 * use a different one depending on whether hyper-v emulation 492 * is switched on by default or not. 493 */ 494 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 495 cp.cp_eax = base; 496 (void) __cpuid_insn(&cp); 497 xen_signature[0] = cp.cp_ebx; 498 xen_signature[1] = cp.cp_ecx; 499 xen_signature[2] = cp.cp_edx; 500 xen_signature[3] = 0; 501 xen_str = (char *)xen_signature; 502 if (strcmp("XenVMMXenVMM", xen_str) == 0 && 503 cp.cp_eax >= (base + 2)) { 504 platform_type = HW_XEN_HVM; 505 return; 506 } 507 } 508 509 if (vmware_platform()) /* running under vmware hypervisor? */ 510 platform_type = HW_VMWARE; 511 } 512 513 int 514 get_hwenv(void) 515 { 516 if (platform_type == -1) 517 determine_platform(); 518 519 return (platform_type); 520 } 521 522 int 523 is_controldom(void) 524 { 525 return (0); 526 } 527 528 #else 529 530 int 531 get_hwenv(void) 532 { 533 return (HW_XEN_PV); 534 } 535 536 int 537 is_controldom(void) 538 { 539 return (DOMAIN_IS_INITDOMAIN(xen_info)); 540 } 541 542 #endif /* __xpv */ 543 544 static void 545 cpuid_intel_getids(cpu_t *cpu, uint_t feature) 546 { 547 uint_t i; 548 uint_t chipid_shift = 0; 549 uint_t coreid_shift = 0; 550 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 551 552 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 553 chipid_shift++; 554 555 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift; 556 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1); 557 558 if (feature & X86_CMP) { 559 /* 560 * Multi-core (and possibly multi-threaded) 561 * processors. 562 */ 563 uint_t ncpu_per_core; 564 if (cpi->cpi_ncore_per_chip == 1) 565 ncpu_per_core = cpi->cpi_ncpu_per_chip; 566 else if (cpi->cpi_ncore_per_chip > 1) 567 ncpu_per_core = cpi->cpi_ncpu_per_chip / 568 cpi->cpi_ncore_per_chip; 569 /* 570 * 8bit APIC IDs on dual core Pentiums 571 * look like this: 572 * 573 * +-----------------------+------+------+ 574 * | Physical Package ID | MC | HT | 575 * +-----------------------+------+------+ 576 * <------- chipid --------> 577 * <------- coreid ---------------> 578 * <--- clogid --> 579 * <------> 580 * pkgcoreid 581 * 582 * Where the number of bits necessary to 583 * represent MC and HT fields together equals 584 * to the minimum number of bits necessary to 585 * store the value of cpi->cpi_ncpu_per_chip. 586 * Of those bits, the MC part uses the number 587 * of bits necessary to store the value of 588 * cpi->cpi_ncore_per_chip. 589 */ 590 for (i = 1; i < ncpu_per_core; i <<= 1) 591 coreid_shift++; 592 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift; 593 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 594 } else if (feature & X86_HTT) { 595 /* 596 * Single-core multi-threaded processors. 597 */ 598 cpi->cpi_coreid = cpi->cpi_chipid; 599 cpi->cpi_pkgcoreid = 0; 600 } 601 cpi->cpi_procnodeid = cpi->cpi_chipid; 602 } 603 604 static void 605 cpuid_amd_getids(cpu_t *cpu) 606 { 607 int i, first_half, coreidsz; 608 uint32_t nb_caps_reg; 609 uint_t node2_1; 610 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 611 612 /* 613 * AMD CMP chips currently have a single thread per core. 614 * 615 * Since no two cpus share a core we must assign a distinct coreid 616 * per cpu, and we do this by using the cpu_id. This scheme does not, 617 * however, guarantee that sibling cores of a chip will have sequential 618 * coreids starting at a multiple of the number of cores per chip - 619 * that is usually the case, but if the ACPI MADT table is presented 620 * in a different order then we need to perform a few more gymnastics 621 * for the pkgcoreid. 622 * 623 * All processors in the system have the same number of enabled 624 * cores. Cores within a processor are always numbered sequentially 625 * from 0 regardless of how many or which are disabled, and there 626 * is no way for operating system to discover the real core id when some 627 * are disabled. 628 */ 629 630 cpi->cpi_coreid = cpu->cpu_id; 631 632 if (cpi->cpi_xmaxeax >= 0x80000008) { 633 634 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 635 636 /* 637 * In AMD parlance chip is really a node while Solaris 638 * sees chip as equivalent to socket/package. 639 */ 640 cpi->cpi_ncore_per_chip = 641 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 642 if (coreidsz == 0) { 643 /* Use legacy method */ 644 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1) 645 coreidsz++; 646 if (coreidsz == 0) 647 coreidsz = 1; 648 } 649 } else { 650 /* Assume single-core part */ 651 cpi->cpi_ncore_per_chip = 1; 652 coreidsz = 1; 653 } 654 655 cpi->cpi_clogid = cpi->cpi_pkgcoreid = 656 cpi->cpi_apicid & ((1<<coreidsz) - 1); 657 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip; 658 659 /* Get nodeID */ 660 if (cpi->cpi_family == 0xf) { 661 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 662 cpi->cpi_chipid = cpi->cpi_procnodeid; 663 } else if (cpi->cpi_family == 0x10) { 664 /* 665 * See if we are a multi-node processor. 666 * All processors in the system have the same number of nodes 667 */ 668 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8); 669 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) { 670 /* Single-node */ 671 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, 672 coreidsz); 673 cpi->cpi_chipid = cpi->cpi_procnodeid; 674 } else { 675 676 /* 677 * Multi-node revision D (2 nodes per package 678 * are supported) 679 */ 680 cpi->cpi_procnodes_per_pkg = 2; 681 682 first_half = (cpi->cpi_pkgcoreid <= 683 (cpi->cpi_ncore_per_chip/2 - 1)); 684 685 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { 686 /* We are BSP */ 687 cpi->cpi_procnodeid = (first_half ? 0 : 1); 688 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; 689 } else { 690 691 /* We are AP */ 692 /* NodeId[2:1] bits to use for reading F3xe8 */ 693 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1; 694 695 nb_caps_reg = 696 pci_getl_func(0, 24 + node2_1, 3, 0xe8); 697 698 /* 699 * Check IntNodeNum bit (31:30, but bit 31 is 700 * always 0 on dual-node processors) 701 */ 702 if (BITX(nb_caps_reg, 30, 30) == 0) 703 cpi->cpi_procnodeid = node2_1 + 704 !first_half; 705 else 706 cpi->cpi_procnodeid = node2_1 + 707 first_half; 708 709 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; 710 } 711 } 712 } else if (cpi->cpi_family >= 0x11) { 713 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 714 cpi->cpi_chipid = cpi->cpi_procnodeid; 715 } else { 716 cpi->cpi_procnodeid = 0; 717 cpi->cpi_chipid = cpi->cpi_procnodeid; 718 } 719 } 720 721 uint_t 722 cpuid_pass1(cpu_t *cpu) 723 { 724 uint32_t mask_ecx, mask_edx; 725 uint_t feature = X86_CPUID; 726 struct cpuid_info *cpi; 727 struct cpuid_regs *cp; 728 int xcpuid; 729 #if !defined(__xpv) 730 extern int idle_cpu_prefer_mwait; 731 #endif 732 733 734 #if !defined(__xpv) 735 determine_platform(); 736 #endif 737 /* 738 * Space statically allocated for BSP, ensure pointer is set 739 */ 740 if (cpu->cpu_id == 0 && cpu->cpu_m.mcpu_cpi == NULL) 741 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 742 cpi = cpu->cpu_m.mcpu_cpi; 743 ASSERT(cpi != NULL); 744 cp = &cpi->cpi_std[0]; 745 cp->cp_eax = 0; 746 cpi->cpi_maxeax = __cpuid_insn(cp); 747 { 748 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 749 *iptr++ = cp->cp_ebx; 750 *iptr++ = cp->cp_edx; 751 *iptr++ = cp->cp_ecx; 752 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 753 } 754 755 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 756 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 757 758 /* 759 * Limit the range in case of weird hardware 760 */ 761 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 762 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 763 if (cpi->cpi_maxeax < 1) 764 goto pass1_done; 765 766 cp = &cpi->cpi_std[1]; 767 cp->cp_eax = 1; 768 (void) __cpuid_insn(cp); 769 770 /* 771 * Extract identifying constants for easy access. 772 */ 773 cpi->cpi_model = CPI_MODEL(cpi); 774 cpi->cpi_family = CPI_FAMILY(cpi); 775 776 if (cpi->cpi_family == 0xf) 777 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 778 779 /* 780 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 781 * Intel, and presumably everyone else, uses model == 0xf, as 782 * one would expect (max value means possible overflow). Sigh. 783 */ 784 785 switch (cpi->cpi_vendor) { 786 case X86_VENDOR_Intel: 787 if (IS_EXTENDED_MODEL_INTEL(cpi)) 788 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 789 break; 790 case X86_VENDOR_AMD: 791 if (CPI_FAMILY(cpi) == 0xf) 792 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 793 break; 794 default: 795 if (cpi->cpi_model == 0xf) 796 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 797 break; 798 } 799 800 cpi->cpi_step = CPI_STEP(cpi); 801 cpi->cpi_brandid = CPI_BRANDID(cpi); 802 803 /* 804 * *default* assumptions: 805 * - believe %edx feature word 806 * - ignore %ecx feature word 807 * - 32-bit virtual and physical addressing 808 */ 809 mask_edx = 0xffffffff; 810 mask_ecx = 0; 811 812 cpi->cpi_pabits = cpi->cpi_vabits = 32; 813 814 switch (cpi->cpi_vendor) { 815 case X86_VENDOR_Intel: 816 if (cpi->cpi_family == 5) 817 x86_type = X86_TYPE_P5; 818 else if (IS_LEGACY_P6(cpi)) { 819 x86_type = X86_TYPE_P6; 820 pentiumpro_bug4046376 = 1; 821 pentiumpro_bug4064495 = 1; 822 /* 823 * Clear the SEP bit when it was set erroneously 824 */ 825 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 826 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 827 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 828 x86_type = X86_TYPE_P4; 829 /* 830 * We don't currently depend on any of the %ecx 831 * features until Prescott, so we'll only check 832 * this from P4 onwards. We might want to revisit 833 * that idea later. 834 */ 835 mask_ecx = 0xffffffff; 836 } else if (cpi->cpi_family > 0xf) 837 mask_ecx = 0xffffffff; 838 /* 839 * We don't support MONITOR/MWAIT if leaf 5 is not available 840 * to obtain the monitor linesize. 841 */ 842 if (cpi->cpi_maxeax < 5) 843 mask_ecx &= ~CPUID_INTC_ECX_MON; 844 break; 845 case X86_VENDOR_IntelClone: 846 default: 847 break; 848 case X86_VENDOR_AMD: 849 #if defined(OPTERON_ERRATUM_108) 850 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 851 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 852 cpi->cpi_model = 0xc; 853 } else 854 #endif 855 if (cpi->cpi_family == 5) { 856 /* 857 * AMD K5 and K6 858 * 859 * These CPUs have an incomplete implementation 860 * of MCA/MCE which we mask away. 861 */ 862 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 863 864 /* 865 * Model 0 uses the wrong (APIC) bit 866 * to indicate PGE. Fix it here. 867 */ 868 if (cpi->cpi_model == 0) { 869 if (cp->cp_edx & 0x200) { 870 cp->cp_edx &= ~0x200; 871 cp->cp_edx |= CPUID_INTC_EDX_PGE; 872 } 873 } 874 875 /* 876 * Early models had problems w/ MMX; disable. 877 */ 878 if (cpi->cpi_model < 6) 879 mask_edx &= ~CPUID_INTC_EDX_MMX; 880 } 881 882 /* 883 * For newer families, SSE3 and CX16, at least, are valid; 884 * enable all 885 */ 886 if (cpi->cpi_family >= 0xf) 887 mask_ecx = 0xffffffff; 888 /* 889 * We don't support MONITOR/MWAIT if leaf 5 is not available 890 * to obtain the monitor linesize. 891 */ 892 if (cpi->cpi_maxeax < 5) 893 mask_ecx &= ~CPUID_INTC_ECX_MON; 894 895 #if !defined(__xpv) 896 /* 897 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 898 * processors. AMD does not intend MWAIT to be used in the cpu 899 * idle loop on current and future processors. 10h and future 900 * AMD processors use more power in MWAIT than HLT. 901 * Pre-family-10h Opterons do not have the MWAIT instruction. 902 */ 903 idle_cpu_prefer_mwait = 0; 904 #endif 905 906 break; 907 case X86_VENDOR_TM: 908 /* 909 * workaround the NT workaround in CMS 4.1 910 */ 911 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 912 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 913 cp->cp_edx |= CPUID_INTC_EDX_CX8; 914 break; 915 case X86_VENDOR_Centaur: 916 /* 917 * workaround the NT workarounds again 918 */ 919 if (cpi->cpi_family == 6) 920 cp->cp_edx |= CPUID_INTC_EDX_CX8; 921 break; 922 case X86_VENDOR_Cyrix: 923 /* 924 * We rely heavily on the probing in locore 925 * to actually figure out what parts, if any, 926 * of the Cyrix cpuid instruction to believe. 927 */ 928 switch (x86_type) { 929 case X86_TYPE_CYRIX_486: 930 mask_edx = 0; 931 break; 932 case X86_TYPE_CYRIX_6x86: 933 mask_edx = 0; 934 break; 935 case X86_TYPE_CYRIX_6x86L: 936 mask_edx = 937 CPUID_INTC_EDX_DE | 938 CPUID_INTC_EDX_CX8; 939 break; 940 case X86_TYPE_CYRIX_6x86MX: 941 mask_edx = 942 CPUID_INTC_EDX_DE | 943 CPUID_INTC_EDX_MSR | 944 CPUID_INTC_EDX_CX8 | 945 CPUID_INTC_EDX_PGE | 946 CPUID_INTC_EDX_CMOV | 947 CPUID_INTC_EDX_MMX; 948 break; 949 case X86_TYPE_CYRIX_GXm: 950 mask_edx = 951 CPUID_INTC_EDX_MSR | 952 CPUID_INTC_EDX_CX8 | 953 CPUID_INTC_EDX_CMOV | 954 CPUID_INTC_EDX_MMX; 955 break; 956 case X86_TYPE_CYRIX_MediaGX: 957 break; 958 case X86_TYPE_CYRIX_MII: 959 case X86_TYPE_VIA_CYRIX_III: 960 mask_edx = 961 CPUID_INTC_EDX_DE | 962 CPUID_INTC_EDX_TSC | 963 CPUID_INTC_EDX_MSR | 964 CPUID_INTC_EDX_CX8 | 965 CPUID_INTC_EDX_PGE | 966 CPUID_INTC_EDX_CMOV | 967 CPUID_INTC_EDX_MMX; 968 break; 969 default: 970 break; 971 } 972 break; 973 } 974 975 #if defined(__xpv) 976 /* 977 * Do not support MONITOR/MWAIT under a hypervisor 978 */ 979 mask_ecx &= ~CPUID_INTC_ECX_MON; 980 #endif /* __xpv */ 981 982 /* 983 * Now we've figured out the masks that determine 984 * which bits we choose to believe, apply the masks 985 * to the feature words, then map the kernel's view 986 * of these feature words into its feature word. 987 */ 988 cp->cp_edx &= mask_edx; 989 cp->cp_ecx &= mask_ecx; 990 991 /* 992 * apply any platform restrictions (we don't call this 993 * immediately after __cpuid_insn here, because we need the 994 * workarounds applied above first) 995 */ 996 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 997 998 /* 999 * fold in overrides from the "eeprom" mechanism 1000 */ 1001 cp->cp_edx |= cpuid_feature_edx_include; 1002 cp->cp_edx &= ~cpuid_feature_edx_exclude; 1003 1004 cp->cp_ecx |= cpuid_feature_ecx_include; 1005 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 1006 1007 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 1008 feature |= X86_LARGEPAGE; 1009 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 1010 feature |= X86_TSC; 1011 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 1012 feature |= X86_MSR; 1013 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 1014 feature |= X86_MTRR; 1015 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 1016 feature |= X86_PGE; 1017 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 1018 feature |= X86_CMOV; 1019 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 1020 feature |= X86_MMX; 1021 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 1022 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 1023 feature |= X86_MCA; 1024 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 1025 feature |= X86_PAE; 1026 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 1027 feature |= X86_CX8; 1028 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 1029 feature |= X86_CX16; 1030 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 1031 feature |= X86_PAT; 1032 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 1033 feature |= X86_SEP; 1034 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 1035 /* 1036 * In our implementation, fxsave/fxrstor 1037 * are prerequisites before we'll even 1038 * try and do SSE things. 1039 */ 1040 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 1041 feature |= X86_SSE; 1042 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 1043 feature |= X86_SSE2; 1044 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 1045 feature |= X86_SSE3; 1046 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1047 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 1048 feature |= X86_SSSE3; 1049 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 1050 feature |= X86_SSE4_1; 1051 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 1052 feature |= X86_SSE4_2; 1053 if (cp->cp_ecx & CPUID_INTC_ECX_AES) 1054 feature |= X86_AES; 1055 } 1056 } 1057 if (cp->cp_edx & CPUID_INTC_EDX_DE) 1058 feature |= X86_DE; 1059 #if !defined(__xpv) 1060 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 1061 1062 /* 1063 * We require the CLFLUSH instruction for erratum workaround 1064 * to use MONITOR/MWAIT. 1065 */ 1066 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1067 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 1068 feature |= X86_MWAIT; 1069 } else { 1070 extern int idle_cpu_assert_cflush_monitor; 1071 1072 /* 1073 * All processors we are aware of which have 1074 * MONITOR/MWAIT also have CLFLUSH. 1075 */ 1076 if (idle_cpu_assert_cflush_monitor) { 1077 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 1078 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 1079 } 1080 } 1081 } 1082 #endif /* __xpv */ 1083 1084 /* 1085 * Only need it first time, rest of the cpus would follow suite. 1086 * we only capture this for the bootcpu. 1087 */ 1088 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1089 feature |= X86_CLFSH; 1090 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 1091 } 1092 1093 if (feature & X86_PAE) 1094 cpi->cpi_pabits = 36; 1095 1096 /* 1097 * Hyperthreading configuration is slightly tricky on Intel 1098 * and pure clones, and even trickier on AMD. 1099 * 1100 * (AMD chose to set the HTT bit on their CMP processors, 1101 * even though they're not actually hyperthreaded. Thus it 1102 * takes a bit more work to figure out what's really going 1103 * on ... see the handling of the CMP_LGCY bit below) 1104 */ 1105 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 1106 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 1107 if (cpi->cpi_ncpu_per_chip > 1) 1108 feature |= X86_HTT; 1109 } else { 1110 cpi->cpi_ncpu_per_chip = 1; 1111 } 1112 1113 /* 1114 * Work on the "extended" feature information, doing 1115 * some basic initialization for cpuid_pass2() 1116 */ 1117 xcpuid = 0; 1118 switch (cpi->cpi_vendor) { 1119 case X86_VENDOR_Intel: 1120 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 1121 xcpuid++; 1122 break; 1123 case X86_VENDOR_AMD: 1124 if (cpi->cpi_family > 5 || 1125 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 1126 xcpuid++; 1127 break; 1128 case X86_VENDOR_Cyrix: 1129 /* 1130 * Only these Cyrix CPUs are -known- to support 1131 * extended cpuid operations. 1132 */ 1133 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1134 x86_type == X86_TYPE_CYRIX_GXm) 1135 xcpuid++; 1136 break; 1137 case X86_VENDOR_Centaur: 1138 case X86_VENDOR_TM: 1139 default: 1140 xcpuid++; 1141 break; 1142 } 1143 1144 if (xcpuid) { 1145 cp = &cpi->cpi_extd[0]; 1146 cp->cp_eax = 0x80000000; 1147 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1148 } 1149 1150 if (cpi->cpi_xmaxeax & 0x80000000) { 1151 1152 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1153 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1154 1155 switch (cpi->cpi_vendor) { 1156 case X86_VENDOR_Intel: 1157 case X86_VENDOR_AMD: 1158 if (cpi->cpi_xmaxeax < 0x80000001) 1159 break; 1160 cp = &cpi->cpi_extd[1]; 1161 cp->cp_eax = 0x80000001; 1162 (void) __cpuid_insn(cp); 1163 1164 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1165 cpi->cpi_family == 5 && 1166 cpi->cpi_model == 6 && 1167 cpi->cpi_step == 6) { 1168 /* 1169 * K6 model 6 uses bit 10 to indicate SYSC 1170 * Later models use bit 11. Fix it here. 1171 */ 1172 if (cp->cp_edx & 0x400) { 1173 cp->cp_edx &= ~0x400; 1174 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1175 } 1176 } 1177 1178 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1179 1180 /* 1181 * Compute the additions to the kernel's feature word. 1182 */ 1183 if (cp->cp_edx & CPUID_AMD_EDX_NX) 1184 feature |= X86_NX; 1185 1186 /* 1187 * Regardless whether or not we boot 64-bit, 1188 * we should have a way to identify whether 1189 * the CPU is capable of running 64-bit. 1190 */ 1191 if (cp->cp_edx & CPUID_AMD_EDX_LM) 1192 feature |= X86_64; 1193 1194 #if defined(__amd64) 1195 /* 1 GB large page - enable only for 64 bit kernel */ 1196 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 1197 feature |= X86_1GPG; 1198 #endif 1199 1200 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1201 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1202 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 1203 feature |= X86_SSE4A; 1204 1205 /* 1206 * If both the HTT and CMP_LGCY bits are set, 1207 * then we're not actually HyperThreaded. Read 1208 * "AMD CPUID Specification" for more details. 1209 */ 1210 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1211 (feature & X86_HTT) && 1212 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1213 feature &= ~X86_HTT; 1214 feature |= X86_CMP; 1215 } 1216 #if defined(__amd64) 1217 /* 1218 * It's really tricky to support syscall/sysret in 1219 * the i386 kernel; we rely on sysenter/sysexit 1220 * instead. In the amd64 kernel, things are -way- 1221 * better. 1222 */ 1223 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 1224 feature |= X86_ASYSC; 1225 1226 /* 1227 * While we're thinking about system calls, note 1228 * that AMD processors don't support sysenter 1229 * in long mode at all, so don't try to program them. 1230 */ 1231 if (x86_vendor == X86_VENDOR_AMD) 1232 feature &= ~X86_SEP; 1233 #endif 1234 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1235 feature |= X86_TSCP; 1236 break; 1237 default: 1238 break; 1239 } 1240 1241 /* 1242 * Get CPUID data about processor cores and hyperthreads. 1243 */ 1244 switch (cpi->cpi_vendor) { 1245 case X86_VENDOR_Intel: 1246 if (cpi->cpi_maxeax >= 4) { 1247 cp = &cpi->cpi_std[4]; 1248 cp->cp_eax = 4; 1249 cp->cp_ecx = 0; 1250 (void) __cpuid_insn(cp); 1251 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1252 } 1253 /*FALLTHROUGH*/ 1254 case X86_VENDOR_AMD: 1255 if (cpi->cpi_xmaxeax < 0x80000008) 1256 break; 1257 cp = &cpi->cpi_extd[8]; 1258 cp->cp_eax = 0x80000008; 1259 (void) __cpuid_insn(cp); 1260 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1261 1262 /* 1263 * Virtual and physical address limits from 1264 * cpuid override previously guessed values. 1265 */ 1266 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1267 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1268 break; 1269 default: 1270 break; 1271 } 1272 1273 /* 1274 * Derive the number of cores per chip 1275 */ 1276 switch (cpi->cpi_vendor) { 1277 case X86_VENDOR_Intel: 1278 if (cpi->cpi_maxeax < 4) { 1279 cpi->cpi_ncore_per_chip = 1; 1280 break; 1281 } else { 1282 cpi->cpi_ncore_per_chip = 1283 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1284 } 1285 break; 1286 case X86_VENDOR_AMD: 1287 if (cpi->cpi_xmaxeax < 0x80000008) { 1288 cpi->cpi_ncore_per_chip = 1; 1289 break; 1290 } else { 1291 /* 1292 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1293 * 1 less than the number of physical cores on 1294 * the chip. In family 0x10 this value can 1295 * be affected by "downcoring" - it reflects 1296 * 1 less than the number of cores actually 1297 * enabled on this node. 1298 */ 1299 cpi->cpi_ncore_per_chip = 1300 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1301 } 1302 break; 1303 default: 1304 cpi->cpi_ncore_per_chip = 1; 1305 break; 1306 } 1307 1308 /* 1309 * Get CPUID data about TSC Invariance in Deep C-State. 1310 */ 1311 switch (cpi->cpi_vendor) { 1312 case X86_VENDOR_Intel: 1313 if (cpi->cpi_maxeax >= 7) { 1314 cp = &cpi->cpi_extd[7]; 1315 cp->cp_eax = 0x80000007; 1316 cp->cp_ecx = 0; 1317 (void) __cpuid_insn(cp); 1318 } 1319 break; 1320 default: 1321 break; 1322 } 1323 } else { 1324 cpi->cpi_ncore_per_chip = 1; 1325 } 1326 1327 /* 1328 * If more than one core, then this processor is CMP. 1329 */ 1330 if (cpi->cpi_ncore_per_chip > 1) 1331 feature |= X86_CMP; 1332 1333 /* 1334 * If the number of cores is the same as the number 1335 * of CPUs, then we cannot have HyperThreading. 1336 */ 1337 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1338 feature &= ~X86_HTT; 1339 1340 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1341 cpi->cpi_procnodes_per_pkg = 1; 1342 1343 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1344 /* 1345 * Single-core single-threaded processors. 1346 */ 1347 cpi->cpi_chipid = -1; 1348 cpi->cpi_clogid = 0; 1349 cpi->cpi_coreid = cpu->cpu_id; 1350 cpi->cpi_pkgcoreid = 0; 1351 if (cpi->cpi_vendor == X86_VENDOR_AMD) 1352 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); 1353 else 1354 cpi->cpi_procnodeid = cpi->cpi_chipid; 1355 } else if (cpi->cpi_ncpu_per_chip > 1) { 1356 if (cpi->cpi_vendor == X86_VENDOR_Intel) 1357 cpuid_intel_getids(cpu, feature); 1358 else if (cpi->cpi_vendor == X86_VENDOR_AMD) 1359 cpuid_amd_getids(cpu); 1360 else { 1361 /* 1362 * All other processors are currently 1363 * assumed to have single cores. 1364 */ 1365 cpi->cpi_coreid = cpi->cpi_chipid; 1366 cpi->cpi_pkgcoreid = 0; 1367 cpi->cpi_procnodeid = cpi->cpi_chipid; 1368 } 1369 } 1370 1371 /* 1372 * Synthesize chip "revision" and socket type 1373 */ 1374 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1375 cpi->cpi_model, cpi->cpi_step); 1376 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1377 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1378 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1379 cpi->cpi_model, cpi->cpi_step); 1380 1381 pass1_done: 1382 cpi->cpi_pass = 1; 1383 return (feature); 1384 } 1385 1386 /* 1387 * Make copies of the cpuid table entries we depend on, in 1388 * part for ease of parsing now, in part so that we have only 1389 * one place to correct any of it, in part for ease of 1390 * later export to userland, and in part so we can look at 1391 * this stuff in a crash dump. 1392 */ 1393 1394 /*ARGSUSED*/ 1395 void 1396 cpuid_pass2(cpu_t *cpu) 1397 { 1398 uint_t n, nmax; 1399 int i; 1400 struct cpuid_regs *cp; 1401 uint8_t *dp; 1402 uint32_t *iptr; 1403 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1404 1405 ASSERT(cpi->cpi_pass == 1); 1406 1407 if (cpi->cpi_maxeax < 1) 1408 goto pass2_done; 1409 1410 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1411 nmax = NMAX_CPI_STD; 1412 /* 1413 * (We already handled n == 0 and n == 1 in pass 1) 1414 */ 1415 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1416 cp->cp_eax = n; 1417 1418 /* 1419 * CPUID function 4 expects %ecx to be initialized 1420 * with an index which indicates which cache to return 1421 * information about. The OS is expected to call function 4 1422 * with %ecx set to 0, 1, 2, ... until it returns with 1423 * EAX[4:0] set to 0, which indicates there are no more 1424 * caches. 1425 * 1426 * Here, populate cpi_std[4] with the information returned by 1427 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1428 * when dynamic memory allocation becomes available. 1429 * 1430 * Note: we need to explicitly initialize %ecx here, since 1431 * function 4 may have been previously invoked. 1432 */ 1433 if (n == 4) 1434 cp->cp_ecx = 0; 1435 1436 (void) __cpuid_insn(cp); 1437 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1438 switch (n) { 1439 case 2: 1440 /* 1441 * "the lower 8 bits of the %eax register 1442 * contain a value that identifies the number 1443 * of times the cpuid [instruction] has to be 1444 * executed to obtain a complete image of the 1445 * processor's caching systems." 1446 * 1447 * How *do* they make this stuff up? 1448 */ 1449 cpi->cpi_ncache = sizeof (*cp) * 1450 BITX(cp->cp_eax, 7, 0); 1451 if (cpi->cpi_ncache == 0) 1452 break; 1453 cpi->cpi_ncache--; /* skip count byte */ 1454 1455 /* 1456 * Well, for now, rather than attempt to implement 1457 * this slightly dubious algorithm, we just look 1458 * at the first 15 .. 1459 */ 1460 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1461 cpi->cpi_ncache = sizeof (*cp) - 1; 1462 1463 dp = cpi->cpi_cacheinfo; 1464 if (BITX(cp->cp_eax, 31, 31) == 0) { 1465 uint8_t *p = (void *)&cp->cp_eax; 1466 for (i = 1; i < 4; i++) 1467 if (p[i] != 0) 1468 *dp++ = p[i]; 1469 } 1470 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1471 uint8_t *p = (void *)&cp->cp_ebx; 1472 for (i = 0; i < 4; i++) 1473 if (p[i] != 0) 1474 *dp++ = p[i]; 1475 } 1476 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1477 uint8_t *p = (void *)&cp->cp_ecx; 1478 for (i = 0; i < 4; i++) 1479 if (p[i] != 0) 1480 *dp++ = p[i]; 1481 } 1482 if (BITX(cp->cp_edx, 31, 31) == 0) { 1483 uint8_t *p = (void *)&cp->cp_edx; 1484 for (i = 0; i < 4; i++) 1485 if (p[i] != 0) 1486 *dp++ = p[i]; 1487 } 1488 break; 1489 1490 case 3: /* Processor serial number, if PSN supported */ 1491 break; 1492 1493 case 4: /* Deterministic cache parameters */ 1494 break; 1495 1496 case 5: /* Monitor/Mwait parameters */ 1497 { 1498 size_t mwait_size; 1499 1500 /* 1501 * check cpi_mwait.support which was set in cpuid_pass1 1502 */ 1503 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1504 break; 1505 1506 /* 1507 * Protect ourself from insane mwait line size. 1508 * Workaround for incomplete hardware emulator(s). 1509 */ 1510 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1511 if (mwait_size < sizeof (uint32_t) || 1512 !ISP2(mwait_size)) { 1513 #if DEBUG 1514 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1515 "size %ld", cpu->cpu_id, (long)mwait_size); 1516 #endif 1517 break; 1518 } 1519 1520 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1521 cpi->cpi_mwait.mon_max = mwait_size; 1522 if (MWAIT_EXTENSION(cpi)) { 1523 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1524 if (MWAIT_INT_ENABLE(cpi)) 1525 cpi->cpi_mwait.support |= 1526 MWAIT_ECX_INT_ENABLE; 1527 } 1528 break; 1529 } 1530 default: 1531 break; 1532 } 1533 } 1534 1535 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1536 struct cpuid_regs regs; 1537 1538 cp = ®s; 1539 cp->cp_eax = 0xB; 1540 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1541 1542 (void) __cpuid_insn(cp); 1543 1544 /* 1545 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1546 * indicates that the extended topology enumeration leaf is 1547 * available. 1548 */ 1549 if (cp->cp_ebx) { 1550 uint32_t x2apic_id; 1551 uint_t coreid_shift = 0; 1552 uint_t ncpu_per_core = 1; 1553 uint_t chipid_shift = 0; 1554 uint_t ncpu_per_chip = 1; 1555 uint_t i; 1556 uint_t level; 1557 1558 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1559 cp->cp_eax = 0xB; 1560 cp->cp_ecx = i; 1561 1562 (void) __cpuid_insn(cp); 1563 level = CPI_CPU_LEVEL_TYPE(cp); 1564 1565 if (level == 1) { 1566 x2apic_id = cp->cp_edx; 1567 coreid_shift = BITX(cp->cp_eax, 4, 0); 1568 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1569 } else if (level == 2) { 1570 x2apic_id = cp->cp_edx; 1571 chipid_shift = BITX(cp->cp_eax, 4, 0); 1572 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1573 } 1574 } 1575 1576 cpi->cpi_apicid = x2apic_id; 1577 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1578 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1579 ncpu_per_core; 1580 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1581 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1582 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1583 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1584 } 1585 1586 /* Make cp NULL so that we don't stumble on others */ 1587 cp = NULL; 1588 } 1589 1590 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1591 goto pass2_done; 1592 1593 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1594 nmax = NMAX_CPI_EXTD; 1595 /* 1596 * Copy the extended properties, fixing them as we go. 1597 * (We already handled n == 0 and n == 1 in pass 1) 1598 */ 1599 iptr = (void *)cpi->cpi_brandstr; 1600 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1601 cp->cp_eax = 0x80000000 + n; 1602 (void) __cpuid_insn(cp); 1603 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1604 switch (n) { 1605 case 2: 1606 case 3: 1607 case 4: 1608 /* 1609 * Extract the brand string 1610 */ 1611 *iptr++ = cp->cp_eax; 1612 *iptr++ = cp->cp_ebx; 1613 *iptr++ = cp->cp_ecx; 1614 *iptr++ = cp->cp_edx; 1615 break; 1616 case 5: 1617 switch (cpi->cpi_vendor) { 1618 case X86_VENDOR_AMD: 1619 /* 1620 * The Athlon and Duron were the first 1621 * parts to report the sizes of the 1622 * TLB for large pages. Before then, 1623 * we don't trust the data. 1624 */ 1625 if (cpi->cpi_family < 6 || 1626 (cpi->cpi_family == 6 && 1627 cpi->cpi_model < 1)) 1628 cp->cp_eax = 0; 1629 break; 1630 default: 1631 break; 1632 } 1633 break; 1634 case 6: 1635 switch (cpi->cpi_vendor) { 1636 case X86_VENDOR_AMD: 1637 /* 1638 * The Athlon and Duron were the first 1639 * AMD parts with L2 TLB's. 1640 * Before then, don't trust the data. 1641 */ 1642 if (cpi->cpi_family < 6 || 1643 cpi->cpi_family == 6 && 1644 cpi->cpi_model < 1) 1645 cp->cp_eax = cp->cp_ebx = 0; 1646 /* 1647 * AMD Duron rev A0 reports L2 1648 * cache size incorrectly as 1K 1649 * when it is really 64K 1650 */ 1651 if (cpi->cpi_family == 6 && 1652 cpi->cpi_model == 3 && 1653 cpi->cpi_step == 0) { 1654 cp->cp_ecx &= 0xffff; 1655 cp->cp_ecx |= 0x400000; 1656 } 1657 break; 1658 case X86_VENDOR_Cyrix: /* VIA C3 */ 1659 /* 1660 * VIA C3 processors are a bit messed 1661 * up w.r.t. encoding cache sizes in %ecx 1662 */ 1663 if (cpi->cpi_family != 6) 1664 break; 1665 /* 1666 * model 7 and 8 were incorrectly encoded 1667 * 1668 * xxx is model 8 really broken? 1669 */ 1670 if (cpi->cpi_model == 7 || 1671 cpi->cpi_model == 8) 1672 cp->cp_ecx = 1673 BITX(cp->cp_ecx, 31, 24) << 16 | 1674 BITX(cp->cp_ecx, 23, 16) << 12 | 1675 BITX(cp->cp_ecx, 15, 8) << 8 | 1676 BITX(cp->cp_ecx, 7, 0); 1677 /* 1678 * model 9 stepping 1 has wrong associativity 1679 */ 1680 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1681 cp->cp_ecx |= 8 << 12; 1682 break; 1683 case X86_VENDOR_Intel: 1684 /* 1685 * Extended L2 Cache features function. 1686 * First appeared on Prescott. 1687 */ 1688 default: 1689 break; 1690 } 1691 break; 1692 default: 1693 break; 1694 } 1695 } 1696 1697 pass2_done: 1698 cpi->cpi_pass = 2; 1699 } 1700 1701 static const char * 1702 intel_cpubrand(const struct cpuid_info *cpi) 1703 { 1704 int i; 1705 1706 if ((x86_feature & X86_CPUID) == 0 || 1707 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1708 return ("i486"); 1709 1710 switch (cpi->cpi_family) { 1711 case 5: 1712 return ("Intel Pentium(r)"); 1713 case 6: 1714 switch (cpi->cpi_model) { 1715 uint_t celeron, xeon; 1716 const struct cpuid_regs *cp; 1717 case 0: 1718 case 1: 1719 case 2: 1720 return ("Intel Pentium(r) Pro"); 1721 case 3: 1722 case 4: 1723 return ("Intel Pentium(r) II"); 1724 case 6: 1725 return ("Intel Celeron(r)"); 1726 case 5: 1727 case 7: 1728 celeron = xeon = 0; 1729 cp = &cpi->cpi_std[2]; /* cache info */ 1730 1731 for (i = 1; i < 4; i++) { 1732 uint_t tmp; 1733 1734 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1735 if (tmp == 0x40) 1736 celeron++; 1737 if (tmp >= 0x44 && tmp <= 0x45) 1738 xeon++; 1739 } 1740 1741 for (i = 0; i < 2; i++) { 1742 uint_t tmp; 1743 1744 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1745 if (tmp == 0x40) 1746 celeron++; 1747 else if (tmp >= 0x44 && tmp <= 0x45) 1748 xeon++; 1749 } 1750 1751 for (i = 0; i < 4; i++) { 1752 uint_t tmp; 1753 1754 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1755 if (tmp == 0x40) 1756 celeron++; 1757 else if (tmp >= 0x44 && tmp <= 0x45) 1758 xeon++; 1759 } 1760 1761 for (i = 0; i < 4; i++) { 1762 uint_t tmp; 1763 1764 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1765 if (tmp == 0x40) 1766 celeron++; 1767 else if (tmp >= 0x44 && tmp <= 0x45) 1768 xeon++; 1769 } 1770 1771 if (celeron) 1772 return ("Intel Celeron(r)"); 1773 if (xeon) 1774 return (cpi->cpi_model == 5 ? 1775 "Intel Pentium(r) II Xeon(tm)" : 1776 "Intel Pentium(r) III Xeon(tm)"); 1777 return (cpi->cpi_model == 5 ? 1778 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1779 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1780 default: 1781 break; 1782 } 1783 default: 1784 break; 1785 } 1786 1787 /* BrandID is present if the field is nonzero */ 1788 if (cpi->cpi_brandid != 0) { 1789 static const struct { 1790 uint_t bt_bid; 1791 const char *bt_str; 1792 } brand_tbl[] = { 1793 { 0x1, "Intel(r) Celeron(r)" }, 1794 { 0x2, "Intel(r) Pentium(r) III" }, 1795 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1796 { 0x4, "Intel(r) Pentium(r) III" }, 1797 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1798 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1799 { 0x8, "Intel(r) Pentium(r) 4" }, 1800 { 0x9, "Intel(r) Pentium(r) 4" }, 1801 { 0xa, "Intel(r) Celeron(r)" }, 1802 { 0xb, "Intel(r) Xeon(tm)" }, 1803 { 0xc, "Intel(r) Xeon(tm) MP" }, 1804 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1805 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1806 { 0x11, "Mobile Genuine Intel(r)" }, 1807 { 0x12, "Intel(r) Celeron(r) M" }, 1808 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1809 { 0x14, "Intel(r) Celeron(r)" }, 1810 { 0x15, "Mobile Genuine Intel(r)" }, 1811 { 0x16, "Intel(r) Pentium(r) M" }, 1812 { 0x17, "Mobile Intel(r) Celeron(r)" } 1813 }; 1814 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1815 uint_t sgn; 1816 1817 sgn = (cpi->cpi_family << 8) | 1818 (cpi->cpi_model << 4) | cpi->cpi_step; 1819 1820 for (i = 0; i < btblmax; i++) 1821 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1822 break; 1823 if (i < btblmax) { 1824 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1825 return ("Intel(r) Celeron(r)"); 1826 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1827 return ("Intel(r) Xeon(tm) MP"); 1828 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1829 return ("Intel(r) Xeon(tm)"); 1830 return (brand_tbl[i].bt_str); 1831 } 1832 } 1833 1834 return (NULL); 1835 } 1836 1837 static const char * 1838 amd_cpubrand(const struct cpuid_info *cpi) 1839 { 1840 if ((x86_feature & X86_CPUID) == 0 || 1841 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1842 return ("i486 compatible"); 1843 1844 switch (cpi->cpi_family) { 1845 case 5: 1846 switch (cpi->cpi_model) { 1847 case 0: 1848 case 1: 1849 case 2: 1850 case 3: 1851 case 4: 1852 case 5: 1853 return ("AMD-K5(r)"); 1854 case 6: 1855 case 7: 1856 return ("AMD-K6(r)"); 1857 case 8: 1858 return ("AMD-K6(r)-2"); 1859 case 9: 1860 return ("AMD-K6(r)-III"); 1861 default: 1862 return ("AMD (family 5)"); 1863 } 1864 case 6: 1865 switch (cpi->cpi_model) { 1866 case 1: 1867 return ("AMD-K7(tm)"); 1868 case 0: 1869 case 2: 1870 case 4: 1871 return ("AMD Athlon(tm)"); 1872 case 3: 1873 case 7: 1874 return ("AMD Duron(tm)"); 1875 case 6: 1876 case 8: 1877 case 10: 1878 /* 1879 * Use the L2 cache size to distinguish 1880 */ 1881 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1882 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1883 default: 1884 return ("AMD (family 6)"); 1885 } 1886 default: 1887 break; 1888 } 1889 1890 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1891 cpi->cpi_brandid != 0) { 1892 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1893 case 3: 1894 return ("AMD Opteron(tm) UP 1xx"); 1895 case 4: 1896 return ("AMD Opteron(tm) DP 2xx"); 1897 case 5: 1898 return ("AMD Opteron(tm) MP 8xx"); 1899 default: 1900 return ("AMD Opteron(tm)"); 1901 } 1902 } 1903 1904 return (NULL); 1905 } 1906 1907 static const char * 1908 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1909 { 1910 if ((x86_feature & X86_CPUID) == 0 || 1911 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1912 type == X86_TYPE_CYRIX_486) 1913 return ("i486 compatible"); 1914 1915 switch (type) { 1916 case X86_TYPE_CYRIX_6x86: 1917 return ("Cyrix 6x86"); 1918 case X86_TYPE_CYRIX_6x86L: 1919 return ("Cyrix 6x86L"); 1920 case X86_TYPE_CYRIX_6x86MX: 1921 return ("Cyrix 6x86MX"); 1922 case X86_TYPE_CYRIX_GXm: 1923 return ("Cyrix GXm"); 1924 case X86_TYPE_CYRIX_MediaGX: 1925 return ("Cyrix MediaGX"); 1926 case X86_TYPE_CYRIX_MII: 1927 return ("Cyrix M2"); 1928 case X86_TYPE_VIA_CYRIX_III: 1929 return ("VIA Cyrix M3"); 1930 default: 1931 /* 1932 * Have another wild guess .. 1933 */ 1934 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1935 return ("Cyrix 5x86"); 1936 else if (cpi->cpi_family == 5) { 1937 switch (cpi->cpi_model) { 1938 case 2: 1939 return ("Cyrix 6x86"); /* Cyrix M1 */ 1940 case 4: 1941 return ("Cyrix MediaGX"); 1942 default: 1943 break; 1944 } 1945 } else if (cpi->cpi_family == 6) { 1946 switch (cpi->cpi_model) { 1947 case 0: 1948 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1949 case 5: 1950 case 6: 1951 case 7: 1952 case 8: 1953 case 9: 1954 return ("VIA C3"); 1955 default: 1956 break; 1957 } 1958 } 1959 break; 1960 } 1961 return (NULL); 1962 } 1963 1964 /* 1965 * This only gets called in the case that the CPU extended 1966 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1967 * aren't available, or contain null bytes for some reason. 1968 */ 1969 static void 1970 fabricate_brandstr(struct cpuid_info *cpi) 1971 { 1972 const char *brand = NULL; 1973 1974 switch (cpi->cpi_vendor) { 1975 case X86_VENDOR_Intel: 1976 brand = intel_cpubrand(cpi); 1977 break; 1978 case X86_VENDOR_AMD: 1979 brand = amd_cpubrand(cpi); 1980 break; 1981 case X86_VENDOR_Cyrix: 1982 brand = cyrix_cpubrand(cpi, x86_type); 1983 break; 1984 case X86_VENDOR_NexGen: 1985 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1986 brand = "NexGen Nx586"; 1987 break; 1988 case X86_VENDOR_Centaur: 1989 if (cpi->cpi_family == 5) 1990 switch (cpi->cpi_model) { 1991 case 4: 1992 brand = "Centaur C6"; 1993 break; 1994 case 8: 1995 brand = "Centaur C2"; 1996 break; 1997 case 9: 1998 brand = "Centaur C3"; 1999 break; 2000 default: 2001 break; 2002 } 2003 break; 2004 case X86_VENDOR_Rise: 2005 if (cpi->cpi_family == 5 && 2006 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 2007 brand = "Rise mP6"; 2008 break; 2009 case X86_VENDOR_SiS: 2010 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2011 brand = "SiS 55x"; 2012 break; 2013 case X86_VENDOR_TM: 2014 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 2015 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 2016 break; 2017 case X86_VENDOR_NSC: 2018 case X86_VENDOR_UMC: 2019 default: 2020 break; 2021 } 2022 if (brand) { 2023 (void) strcpy((char *)cpi->cpi_brandstr, brand); 2024 return; 2025 } 2026 2027 /* 2028 * If all else fails ... 2029 */ 2030 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 2031 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 2032 cpi->cpi_model, cpi->cpi_step); 2033 } 2034 2035 /* 2036 * This routine is called just after kernel memory allocation 2037 * becomes available on cpu0, and as part of mp_startup() on 2038 * the other cpus. 2039 * 2040 * Fixup the brand string, and collect any information from cpuid 2041 * that requires dynamicically allocated storage to represent. 2042 */ 2043 /*ARGSUSED*/ 2044 void 2045 cpuid_pass3(cpu_t *cpu) 2046 { 2047 int i, max, shft, level, size; 2048 struct cpuid_regs regs; 2049 struct cpuid_regs *cp; 2050 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2051 2052 ASSERT(cpi->cpi_pass == 2); 2053 2054 /* 2055 * Function 4: Deterministic cache parameters 2056 * 2057 * Take this opportunity to detect the number of threads 2058 * sharing the last level cache, and construct a corresponding 2059 * cache id. The respective cpuid_info members are initialized 2060 * to the default case of "no last level cache sharing". 2061 */ 2062 cpi->cpi_ncpu_shr_last_cache = 1; 2063 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 2064 2065 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 2066 2067 /* 2068 * Find the # of elements (size) returned by fn 4, and along 2069 * the way detect last level cache sharing details. 2070 */ 2071 bzero(®s, sizeof (regs)); 2072 cp = ®s; 2073 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 2074 cp->cp_eax = 4; 2075 cp->cp_ecx = i; 2076 2077 (void) __cpuid_insn(cp); 2078 2079 if (CPI_CACHE_TYPE(cp) == 0) 2080 break; 2081 level = CPI_CACHE_LVL(cp); 2082 if (level > max) { 2083 max = level; 2084 cpi->cpi_ncpu_shr_last_cache = 2085 CPI_NTHR_SHR_CACHE(cp) + 1; 2086 } 2087 } 2088 cpi->cpi_std_4_size = size = i; 2089 2090 /* 2091 * Allocate the cpi_std_4 array. The first element 2092 * references the regs for fn 4, %ecx == 0, which 2093 * cpuid_pass2() stashed in cpi->cpi_std[4]. 2094 */ 2095 if (size > 0) { 2096 cpi->cpi_std_4 = 2097 kmem_alloc(size * sizeof (cp), KM_SLEEP); 2098 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 2099 2100 /* 2101 * Allocate storage to hold the additional regs 2102 * for function 4, %ecx == 1 .. cpi_std_4_size. 2103 * 2104 * The regs for fn 4, %ecx == 0 has already 2105 * been allocated as indicated above. 2106 */ 2107 for (i = 1; i < size; i++) { 2108 cp = cpi->cpi_std_4[i] = 2109 kmem_zalloc(sizeof (regs), KM_SLEEP); 2110 cp->cp_eax = 4; 2111 cp->cp_ecx = i; 2112 2113 (void) __cpuid_insn(cp); 2114 } 2115 } 2116 /* 2117 * Determine the number of bits needed to represent 2118 * the number of CPUs sharing the last level cache. 2119 * 2120 * Shift off that number of bits from the APIC id to 2121 * derive the cache id. 2122 */ 2123 shft = 0; 2124 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 2125 shft++; 2126 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 2127 } 2128 2129 /* 2130 * Now fixup the brand string 2131 */ 2132 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2133 fabricate_brandstr(cpi); 2134 } else { 2135 2136 /* 2137 * If we successfully extracted a brand string from the cpuid 2138 * instruction, clean it up by removing leading spaces and 2139 * similar junk. 2140 */ 2141 if (cpi->cpi_brandstr[0]) { 2142 size_t maxlen = sizeof (cpi->cpi_brandstr); 2143 char *src, *dst; 2144 2145 dst = src = (char *)cpi->cpi_brandstr; 2146 src[maxlen - 1] = '\0'; 2147 /* 2148 * strip leading spaces 2149 */ 2150 while (*src == ' ') 2151 src++; 2152 /* 2153 * Remove any 'Genuine' or "Authentic" prefixes 2154 */ 2155 if (strncmp(src, "Genuine ", 8) == 0) 2156 src += 8; 2157 if (strncmp(src, "Authentic ", 10) == 0) 2158 src += 10; 2159 2160 /* 2161 * Now do an in-place copy. 2162 * Map (R) to (r) and (TM) to (tm). 2163 * The era of teletypes is long gone, and there's 2164 * -really- no need to shout. 2165 */ 2166 while (*src != '\0') { 2167 if (src[0] == '(') { 2168 if (strncmp(src + 1, "R)", 2) == 0) { 2169 (void) strncpy(dst, "(r)", 3); 2170 src += 3; 2171 dst += 3; 2172 continue; 2173 } 2174 if (strncmp(src + 1, "TM)", 3) == 0) { 2175 (void) strncpy(dst, "(tm)", 4); 2176 src += 4; 2177 dst += 4; 2178 continue; 2179 } 2180 } 2181 *dst++ = *src++; 2182 } 2183 *dst = '\0'; 2184 2185 /* 2186 * Finally, remove any trailing spaces 2187 */ 2188 while (--dst > cpi->cpi_brandstr) 2189 if (*dst == ' ') 2190 *dst = '\0'; 2191 else 2192 break; 2193 } else 2194 fabricate_brandstr(cpi); 2195 } 2196 cpi->cpi_pass = 3; 2197 } 2198 2199 /* 2200 * This routine is called out of bind_hwcap() much later in the life 2201 * of the kernel (post_startup()). The job of this routine is to resolve 2202 * the hardware feature support and kernel support for those features into 2203 * what we're actually going to tell applications via the aux vector. 2204 */ 2205 uint_t 2206 cpuid_pass4(cpu_t *cpu) 2207 { 2208 struct cpuid_info *cpi; 2209 uint_t hwcap_flags = 0; 2210 2211 if (cpu == NULL) 2212 cpu = CPU; 2213 cpi = cpu->cpu_m.mcpu_cpi; 2214 2215 ASSERT(cpi->cpi_pass == 3); 2216 2217 if (cpi->cpi_maxeax >= 1) { 2218 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2219 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2220 2221 *edx = CPI_FEATURES_EDX(cpi); 2222 *ecx = CPI_FEATURES_ECX(cpi); 2223 2224 /* 2225 * [these require explicit kernel support] 2226 */ 2227 if ((x86_feature & X86_SEP) == 0) 2228 *edx &= ~CPUID_INTC_EDX_SEP; 2229 2230 if ((x86_feature & X86_SSE) == 0) 2231 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2232 if ((x86_feature & X86_SSE2) == 0) 2233 *edx &= ~CPUID_INTC_EDX_SSE2; 2234 2235 if ((x86_feature & X86_HTT) == 0) 2236 *edx &= ~CPUID_INTC_EDX_HTT; 2237 2238 if ((x86_feature & X86_SSE3) == 0) 2239 *ecx &= ~CPUID_INTC_ECX_SSE3; 2240 2241 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2242 if ((x86_feature & X86_SSSE3) == 0) 2243 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2244 if ((x86_feature & X86_SSE4_1) == 0) 2245 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2246 if ((x86_feature & X86_SSE4_2) == 0) 2247 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2248 if ((x86_feature & X86_AES) == 0) 2249 *ecx &= ~CPUID_INTC_ECX_AES; 2250 } 2251 2252 /* 2253 * [no explicit support required beyond x87 fp context] 2254 */ 2255 if (!fpu_exists) 2256 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2257 2258 /* 2259 * Now map the supported feature vector to things that we 2260 * think userland will care about. 2261 */ 2262 if (*edx & CPUID_INTC_EDX_SEP) 2263 hwcap_flags |= AV_386_SEP; 2264 if (*edx & CPUID_INTC_EDX_SSE) 2265 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2266 if (*edx & CPUID_INTC_EDX_SSE2) 2267 hwcap_flags |= AV_386_SSE2; 2268 if (*ecx & CPUID_INTC_ECX_SSE3) 2269 hwcap_flags |= AV_386_SSE3; 2270 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2271 if (*ecx & CPUID_INTC_ECX_SSSE3) 2272 hwcap_flags |= AV_386_SSSE3; 2273 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2274 hwcap_flags |= AV_386_SSE4_1; 2275 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2276 hwcap_flags |= AV_386_SSE4_2; 2277 if (*ecx & CPUID_INTC_ECX_MOVBE) 2278 hwcap_flags |= AV_386_MOVBE; 2279 if (*ecx & CPUID_INTC_ECX_AES) 2280 hwcap_flags |= AV_386_AES; 2281 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2282 hwcap_flags |= AV_386_PCLMULQDQ; 2283 } 2284 if (*ecx & CPUID_INTC_ECX_POPCNT) 2285 hwcap_flags |= AV_386_POPCNT; 2286 if (*edx & CPUID_INTC_EDX_FPU) 2287 hwcap_flags |= AV_386_FPU; 2288 if (*edx & CPUID_INTC_EDX_MMX) 2289 hwcap_flags |= AV_386_MMX; 2290 2291 if (*edx & CPUID_INTC_EDX_TSC) 2292 hwcap_flags |= AV_386_TSC; 2293 if (*edx & CPUID_INTC_EDX_CX8) 2294 hwcap_flags |= AV_386_CX8; 2295 if (*edx & CPUID_INTC_EDX_CMOV) 2296 hwcap_flags |= AV_386_CMOV; 2297 if (*ecx & CPUID_INTC_ECX_CX16) 2298 hwcap_flags |= AV_386_CX16; 2299 } 2300 2301 if (cpi->cpi_xmaxeax < 0x80000001) 2302 goto pass4_done; 2303 2304 switch (cpi->cpi_vendor) { 2305 struct cpuid_regs cp; 2306 uint32_t *edx, *ecx; 2307 2308 case X86_VENDOR_Intel: 2309 /* 2310 * Seems like Intel duplicated what we necessary 2311 * here to make the initial crop of 64-bit OS's work. 2312 * Hopefully, those are the only "extended" bits 2313 * they'll add. 2314 */ 2315 /*FALLTHROUGH*/ 2316 2317 case X86_VENDOR_AMD: 2318 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2319 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2320 2321 *edx = CPI_FEATURES_XTD_EDX(cpi); 2322 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2323 2324 /* 2325 * [these features require explicit kernel support] 2326 */ 2327 switch (cpi->cpi_vendor) { 2328 case X86_VENDOR_Intel: 2329 if ((x86_feature & X86_TSCP) == 0) 2330 *edx &= ~CPUID_AMD_EDX_TSCP; 2331 break; 2332 2333 case X86_VENDOR_AMD: 2334 if ((x86_feature & X86_TSCP) == 0) 2335 *edx &= ~CPUID_AMD_EDX_TSCP; 2336 if ((x86_feature & X86_SSE4A) == 0) 2337 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2338 break; 2339 2340 default: 2341 break; 2342 } 2343 2344 /* 2345 * [no explicit support required beyond 2346 * x87 fp context and exception handlers] 2347 */ 2348 if (!fpu_exists) 2349 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2350 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2351 2352 if ((x86_feature & X86_NX) == 0) 2353 *edx &= ~CPUID_AMD_EDX_NX; 2354 #if !defined(__amd64) 2355 *edx &= ~CPUID_AMD_EDX_LM; 2356 #endif 2357 /* 2358 * Now map the supported feature vector to 2359 * things that we think userland will care about. 2360 */ 2361 #if defined(__amd64) 2362 if (*edx & CPUID_AMD_EDX_SYSC) 2363 hwcap_flags |= AV_386_AMD_SYSC; 2364 #endif 2365 if (*edx & CPUID_AMD_EDX_MMXamd) 2366 hwcap_flags |= AV_386_AMD_MMX; 2367 if (*edx & CPUID_AMD_EDX_3DNow) 2368 hwcap_flags |= AV_386_AMD_3DNow; 2369 if (*edx & CPUID_AMD_EDX_3DNowx) 2370 hwcap_flags |= AV_386_AMD_3DNowx; 2371 2372 switch (cpi->cpi_vendor) { 2373 case X86_VENDOR_AMD: 2374 if (*edx & CPUID_AMD_EDX_TSCP) 2375 hwcap_flags |= AV_386_TSCP; 2376 if (*ecx & CPUID_AMD_ECX_AHF64) 2377 hwcap_flags |= AV_386_AHF; 2378 if (*ecx & CPUID_AMD_ECX_SSE4A) 2379 hwcap_flags |= AV_386_AMD_SSE4A; 2380 if (*ecx & CPUID_AMD_ECX_LZCNT) 2381 hwcap_flags |= AV_386_AMD_LZCNT; 2382 break; 2383 2384 case X86_VENDOR_Intel: 2385 if (*edx & CPUID_AMD_EDX_TSCP) 2386 hwcap_flags |= AV_386_TSCP; 2387 /* 2388 * Aarrgh. 2389 * Intel uses a different bit in the same word. 2390 */ 2391 if (*ecx & CPUID_INTC_ECX_AHF64) 2392 hwcap_flags |= AV_386_AHF; 2393 break; 2394 2395 default: 2396 break; 2397 } 2398 break; 2399 2400 case X86_VENDOR_TM: 2401 cp.cp_eax = 0x80860001; 2402 (void) __cpuid_insn(&cp); 2403 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2404 break; 2405 2406 default: 2407 break; 2408 } 2409 2410 pass4_done: 2411 cpi->cpi_pass = 4; 2412 return (hwcap_flags); 2413 } 2414 2415 2416 /* 2417 * Simulate the cpuid instruction using the data we previously 2418 * captured about this CPU. We try our best to return the truth 2419 * about the hardware, independently of kernel support. 2420 */ 2421 uint32_t 2422 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2423 { 2424 struct cpuid_info *cpi; 2425 struct cpuid_regs *xcp; 2426 2427 if (cpu == NULL) 2428 cpu = CPU; 2429 cpi = cpu->cpu_m.mcpu_cpi; 2430 2431 ASSERT(cpuid_checkpass(cpu, 3)); 2432 2433 /* 2434 * CPUID data is cached in two separate places: cpi_std for standard 2435 * CPUID functions, and cpi_extd for extended CPUID functions. 2436 */ 2437 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2438 xcp = &cpi->cpi_std[cp->cp_eax]; 2439 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2440 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2441 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2442 else 2443 /* 2444 * The caller is asking for data from an input parameter which 2445 * the kernel has not cached. In this case we go fetch from 2446 * the hardware and return the data directly to the user. 2447 */ 2448 return (__cpuid_insn(cp)); 2449 2450 cp->cp_eax = xcp->cp_eax; 2451 cp->cp_ebx = xcp->cp_ebx; 2452 cp->cp_ecx = xcp->cp_ecx; 2453 cp->cp_edx = xcp->cp_edx; 2454 return (cp->cp_eax); 2455 } 2456 2457 int 2458 cpuid_checkpass(cpu_t *cpu, int pass) 2459 { 2460 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2461 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2462 } 2463 2464 int 2465 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2466 { 2467 ASSERT(cpuid_checkpass(cpu, 3)); 2468 2469 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2470 } 2471 2472 int 2473 cpuid_is_cmt(cpu_t *cpu) 2474 { 2475 if (cpu == NULL) 2476 cpu = CPU; 2477 2478 ASSERT(cpuid_checkpass(cpu, 1)); 2479 2480 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2481 } 2482 2483 /* 2484 * AMD and Intel both implement the 64-bit variant of the syscall 2485 * instruction (syscallq), so if there's -any- support for syscall, 2486 * cpuid currently says "yes, we support this". 2487 * 2488 * However, Intel decided to -not- implement the 32-bit variant of the 2489 * syscall instruction, so we provide a predicate to allow our caller 2490 * to test that subtlety here. 2491 * 2492 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2493 * even in the case where the hardware would in fact support it. 2494 */ 2495 /*ARGSUSED*/ 2496 int 2497 cpuid_syscall32_insn(cpu_t *cpu) 2498 { 2499 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2500 2501 #if !defined(__xpv) 2502 if (cpu == NULL) 2503 cpu = CPU; 2504 2505 /*CSTYLED*/ 2506 { 2507 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2508 2509 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2510 cpi->cpi_xmaxeax >= 0x80000001 && 2511 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2512 return (1); 2513 } 2514 #endif 2515 return (0); 2516 } 2517 2518 int 2519 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2520 { 2521 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2522 2523 static const char fmt[] = 2524 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2525 static const char fmt_ht[] = 2526 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2527 2528 ASSERT(cpuid_checkpass(cpu, 1)); 2529 2530 if (cpuid_is_cmt(cpu)) 2531 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2532 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2533 cpi->cpi_family, cpi->cpi_model, 2534 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2535 return (snprintf(s, n, fmt, 2536 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2537 cpi->cpi_family, cpi->cpi_model, 2538 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2539 } 2540 2541 const char * 2542 cpuid_getvendorstr(cpu_t *cpu) 2543 { 2544 ASSERT(cpuid_checkpass(cpu, 1)); 2545 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2546 } 2547 2548 uint_t 2549 cpuid_getvendor(cpu_t *cpu) 2550 { 2551 ASSERT(cpuid_checkpass(cpu, 1)); 2552 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2553 } 2554 2555 uint_t 2556 cpuid_getfamily(cpu_t *cpu) 2557 { 2558 ASSERT(cpuid_checkpass(cpu, 1)); 2559 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2560 } 2561 2562 uint_t 2563 cpuid_getmodel(cpu_t *cpu) 2564 { 2565 ASSERT(cpuid_checkpass(cpu, 1)); 2566 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2567 } 2568 2569 uint_t 2570 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2571 { 2572 ASSERT(cpuid_checkpass(cpu, 1)); 2573 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2574 } 2575 2576 uint_t 2577 cpuid_get_ncore_per_chip(cpu_t *cpu) 2578 { 2579 ASSERT(cpuid_checkpass(cpu, 1)); 2580 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2581 } 2582 2583 uint_t 2584 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2585 { 2586 ASSERT(cpuid_checkpass(cpu, 2)); 2587 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2588 } 2589 2590 id_t 2591 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2592 { 2593 ASSERT(cpuid_checkpass(cpu, 2)); 2594 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2595 } 2596 2597 uint_t 2598 cpuid_getstep(cpu_t *cpu) 2599 { 2600 ASSERT(cpuid_checkpass(cpu, 1)); 2601 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2602 } 2603 2604 uint_t 2605 cpuid_getsig(struct cpu *cpu) 2606 { 2607 ASSERT(cpuid_checkpass(cpu, 1)); 2608 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2609 } 2610 2611 uint32_t 2612 cpuid_getchiprev(struct cpu *cpu) 2613 { 2614 ASSERT(cpuid_checkpass(cpu, 1)); 2615 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2616 } 2617 2618 const char * 2619 cpuid_getchiprevstr(struct cpu *cpu) 2620 { 2621 ASSERT(cpuid_checkpass(cpu, 1)); 2622 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2623 } 2624 2625 uint32_t 2626 cpuid_getsockettype(struct cpu *cpu) 2627 { 2628 ASSERT(cpuid_checkpass(cpu, 1)); 2629 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2630 } 2631 2632 const char * 2633 cpuid_getsocketstr(cpu_t *cpu) 2634 { 2635 static const char *socketstr = NULL; 2636 struct cpuid_info *cpi; 2637 2638 ASSERT(cpuid_checkpass(cpu, 1)); 2639 cpi = cpu->cpu_m.mcpu_cpi; 2640 2641 /* Assume that socket types are the same across the system */ 2642 if (socketstr == NULL) 2643 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 2644 cpi->cpi_model, cpi->cpi_step); 2645 2646 2647 return (socketstr); 2648 } 2649 2650 int 2651 cpuid_get_chipid(cpu_t *cpu) 2652 { 2653 ASSERT(cpuid_checkpass(cpu, 1)); 2654 2655 if (cpuid_is_cmt(cpu)) 2656 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2657 return (cpu->cpu_id); 2658 } 2659 2660 id_t 2661 cpuid_get_coreid(cpu_t *cpu) 2662 { 2663 ASSERT(cpuid_checkpass(cpu, 1)); 2664 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2665 } 2666 2667 int 2668 cpuid_get_pkgcoreid(cpu_t *cpu) 2669 { 2670 ASSERT(cpuid_checkpass(cpu, 1)); 2671 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2672 } 2673 2674 int 2675 cpuid_get_clogid(cpu_t *cpu) 2676 { 2677 ASSERT(cpuid_checkpass(cpu, 1)); 2678 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2679 } 2680 2681 int 2682 cpuid_get_cacheid(cpu_t *cpu) 2683 { 2684 ASSERT(cpuid_checkpass(cpu, 1)); 2685 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2686 } 2687 2688 uint_t 2689 cpuid_get_procnodeid(cpu_t *cpu) 2690 { 2691 ASSERT(cpuid_checkpass(cpu, 1)); 2692 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid); 2693 } 2694 2695 uint_t 2696 cpuid_get_procnodes_per_pkg(cpu_t *cpu) 2697 { 2698 ASSERT(cpuid_checkpass(cpu, 1)); 2699 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); 2700 } 2701 2702 /*ARGSUSED*/ 2703 int 2704 cpuid_have_cr8access(cpu_t *cpu) 2705 { 2706 #if defined(__amd64) 2707 return (1); 2708 #else 2709 struct cpuid_info *cpi; 2710 2711 ASSERT(cpu != NULL); 2712 cpi = cpu->cpu_m.mcpu_cpi; 2713 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 2714 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 2715 return (1); 2716 return (0); 2717 #endif 2718 } 2719 2720 uint32_t 2721 cpuid_get_apicid(cpu_t *cpu) 2722 { 2723 ASSERT(cpuid_checkpass(cpu, 1)); 2724 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 2725 return (UINT32_MAX); 2726 } else { 2727 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 2728 } 2729 } 2730 2731 void 2732 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2733 { 2734 struct cpuid_info *cpi; 2735 2736 if (cpu == NULL) 2737 cpu = CPU; 2738 cpi = cpu->cpu_m.mcpu_cpi; 2739 2740 ASSERT(cpuid_checkpass(cpu, 1)); 2741 2742 if (pabits) 2743 *pabits = cpi->cpi_pabits; 2744 if (vabits) 2745 *vabits = cpi->cpi_vabits; 2746 } 2747 2748 /* 2749 * Returns the number of data TLB entries for a corresponding 2750 * pagesize. If it can't be computed, or isn't known, the 2751 * routine returns zero. If you ask about an architecturally 2752 * impossible pagesize, the routine will panic (so that the 2753 * hat implementor knows that things are inconsistent.) 2754 */ 2755 uint_t 2756 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2757 { 2758 struct cpuid_info *cpi; 2759 uint_t dtlb_nent = 0; 2760 2761 if (cpu == NULL) 2762 cpu = CPU; 2763 cpi = cpu->cpu_m.mcpu_cpi; 2764 2765 ASSERT(cpuid_checkpass(cpu, 1)); 2766 2767 /* 2768 * Check the L2 TLB info 2769 */ 2770 if (cpi->cpi_xmaxeax >= 0x80000006) { 2771 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2772 2773 switch (pagesize) { 2774 2775 case 4 * 1024: 2776 /* 2777 * All zero in the top 16 bits of the register 2778 * indicates a unified TLB. Size is in low 16 bits. 2779 */ 2780 if ((cp->cp_ebx & 0xffff0000) == 0) 2781 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2782 else 2783 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2784 break; 2785 2786 case 2 * 1024 * 1024: 2787 if ((cp->cp_eax & 0xffff0000) == 0) 2788 dtlb_nent = cp->cp_eax & 0x0000ffff; 2789 else 2790 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2791 break; 2792 2793 default: 2794 panic("unknown L2 pagesize"); 2795 /*NOTREACHED*/ 2796 } 2797 } 2798 2799 if (dtlb_nent != 0) 2800 return (dtlb_nent); 2801 2802 /* 2803 * No L2 TLB support for this size, try L1. 2804 */ 2805 if (cpi->cpi_xmaxeax >= 0x80000005) { 2806 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2807 2808 switch (pagesize) { 2809 case 4 * 1024: 2810 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2811 break; 2812 case 2 * 1024 * 1024: 2813 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2814 break; 2815 default: 2816 panic("unknown L1 d-TLB pagesize"); 2817 /*NOTREACHED*/ 2818 } 2819 } 2820 2821 return (dtlb_nent); 2822 } 2823 2824 /* 2825 * Return 0 if the erratum is not present or not applicable, positive 2826 * if it is, and negative if the status of the erratum is unknown. 2827 * 2828 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2829 * Processors" #25759, Rev 3.57, August 2005 2830 */ 2831 int 2832 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2833 { 2834 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2835 uint_t eax; 2836 2837 /* 2838 * Bail out if this CPU isn't an AMD CPU, or if it's 2839 * a legacy (32-bit) AMD CPU. 2840 */ 2841 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2842 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2843 cpi->cpi_family == 6) 2844 2845 return (0); 2846 2847 eax = cpi->cpi_std[1].cp_eax; 2848 2849 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2850 #define SH_B3(eax) (eax == 0xf51) 2851 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2852 2853 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2854 2855 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2856 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2857 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2858 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2859 2860 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2861 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2862 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2863 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2864 2865 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2866 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2867 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2868 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2869 #define BH_E4(eax) (eax == 0x20fb1) 2870 #define SH_E5(eax) (eax == 0x20f42) 2871 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2872 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2873 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2874 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2875 DH_E6(eax) || JH_E6(eax)) 2876 2877 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2878 #define DR_B0(eax) (eax == 0x100f20) 2879 #define DR_B1(eax) (eax == 0x100f21) 2880 #define DR_BA(eax) (eax == 0x100f2a) 2881 #define DR_B2(eax) (eax == 0x100f22) 2882 #define DR_B3(eax) (eax == 0x100f23) 2883 #define RB_C0(eax) (eax == 0x100f40) 2884 2885 switch (erratum) { 2886 case 1: 2887 return (cpi->cpi_family < 0x10); 2888 case 51: /* what does the asterisk mean? */ 2889 return (B(eax) || SH_C0(eax) || CG(eax)); 2890 case 52: 2891 return (B(eax)); 2892 case 57: 2893 return (cpi->cpi_family <= 0x11); 2894 case 58: 2895 return (B(eax)); 2896 case 60: 2897 return (cpi->cpi_family <= 0x11); 2898 case 61: 2899 case 62: 2900 case 63: 2901 case 64: 2902 case 65: 2903 case 66: 2904 case 68: 2905 case 69: 2906 case 70: 2907 case 71: 2908 return (B(eax)); 2909 case 72: 2910 return (SH_B0(eax)); 2911 case 74: 2912 return (B(eax)); 2913 case 75: 2914 return (cpi->cpi_family < 0x10); 2915 case 76: 2916 return (B(eax)); 2917 case 77: 2918 return (cpi->cpi_family <= 0x11); 2919 case 78: 2920 return (B(eax) || SH_C0(eax)); 2921 case 79: 2922 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2923 case 80: 2924 case 81: 2925 case 82: 2926 return (B(eax)); 2927 case 83: 2928 return (B(eax) || SH_C0(eax) || CG(eax)); 2929 case 85: 2930 return (cpi->cpi_family < 0x10); 2931 case 86: 2932 return (SH_C0(eax) || CG(eax)); 2933 case 88: 2934 #if !defined(__amd64) 2935 return (0); 2936 #else 2937 return (B(eax) || SH_C0(eax)); 2938 #endif 2939 case 89: 2940 return (cpi->cpi_family < 0x10); 2941 case 90: 2942 return (B(eax) || SH_C0(eax) || CG(eax)); 2943 case 91: 2944 case 92: 2945 return (B(eax) || SH_C0(eax)); 2946 case 93: 2947 return (SH_C0(eax)); 2948 case 94: 2949 return (B(eax) || SH_C0(eax) || CG(eax)); 2950 case 95: 2951 #if !defined(__amd64) 2952 return (0); 2953 #else 2954 return (B(eax) || SH_C0(eax)); 2955 #endif 2956 case 96: 2957 return (B(eax) || SH_C0(eax) || CG(eax)); 2958 case 97: 2959 case 98: 2960 return (SH_C0(eax) || CG(eax)); 2961 case 99: 2962 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2963 case 100: 2964 return (B(eax) || SH_C0(eax)); 2965 case 101: 2966 case 103: 2967 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2968 case 104: 2969 return (SH_C0(eax) || CG(eax) || D0(eax)); 2970 case 105: 2971 case 106: 2972 case 107: 2973 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2974 case 108: 2975 return (DH_CG(eax)); 2976 case 109: 2977 return (SH_C0(eax) || CG(eax) || D0(eax)); 2978 case 110: 2979 return (D0(eax) || EX(eax)); 2980 case 111: 2981 return (CG(eax)); 2982 case 112: 2983 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2984 case 113: 2985 return (eax == 0x20fc0); 2986 case 114: 2987 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2988 case 115: 2989 return (SH_E0(eax) || JH_E1(eax)); 2990 case 116: 2991 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2992 case 117: 2993 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2994 case 118: 2995 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2996 JH_E6(eax)); 2997 case 121: 2998 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2999 case 122: 3000 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 3001 case 123: 3002 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 3003 case 131: 3004 return (cpi->cpi_family < 0x10); 3005 case 6336786: 3006 /* 3007 * Test for AdvPowerMgmtInfo.TscPStateInvariant 3008 * if this is a K8 family or newer processor 3009 */ 3010 if (CPI_FAMILY(cpi) == 0xf) { 3011 struct cpuid_regs regs; 3012 regs.cp_eax = 0x80000007; 3013 (void) __cpuid_insn(®s); 3014 return (!(regs.cp_edx & 0x100)); 3015 } 3016 return (0); 3017 case 6323525: 3018 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 3019 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 3020 3021 case 6671130: 3022 /* 3023 * check for processors (pre-Shanghai) that do not provide 3024 * optimal management of 1gb ptes in its tlb. 3025 */ 3026 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 3027 3028 case 298: 3029 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 3030 DR_B2(eax) || RB_C0(eax)); 3031 3032 default: 3033 return (-1); 3034 3035 } 3036 } 3037 3038 /* 3039 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 3040 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 3041 */ 3042 int 3043 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 3044 { 3045 struct cpuid_info *cpi; 3046 uint_t osvwid; 3047 static int osvwfeature = -1; 3048 uint64_t osvwlength; 3049 3050 3051 cpi = cpu->cpu_m.mcpu_cpi; 3052 3053 /* confirm OSVW supported */ 3054 if (osvwfeature == -1) { 3055 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 3056 } else { 3057 /* assert that osvw feature setting is consistent on all cpus */ 3058 ASSERT(osvwfeature == 3059 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 3060 } 3061 if (!osvwfeature) 3062 return (-1); 3063 3064 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 3065 3066 switch (erratum) { 3067 case 298: /* osvwid is 0 */ 3068 osvwid = 0; 3069 if (osvwlength <= (uint64_t)osvwid) { 3070 /* osvwid 0 is unknown */ 3071 return (-1); 3072 } 3073 3074 /* 3075 * Check the OSVW STATUS MSR to determine the state 3076 * of the erratum where: 3077 * 0 - fixed by HW 3078 * 1 - BIOS has applied the workaround when BIOS 3079 * workaround is available. (Or for other errata, 3080 * OS workaround is required.) 3081 * For a value of 1, caller will confirm that the 3082 * erratum 298 workaround has indeed been applied by BIOS. 3083 * 3084 * A 1 may be set in cpus that have a HW fix 3085 * in a mixed cpu system. Regarding erratum 298: 3086 * In a multiprocessor platform, the workaround above 3087 * should be applied to all processors regardless of 3088 * silicon revision when an affected processor is 3089 * present. 3090 */ 3091 3092 return (rdmsr(MSR_AMD_OSVW_STATUS + 3093 (osvwid / OSVW_ID_CNT_PER_MSR)) & 3094 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 3095 3096 default: 3097 return (-1); 3098 } 3099 } 3100 3101 static const char assoc_str[] = "associativity"; 3102 static const char line_str[] = "line-size"; 3103 static const char size_str[] = "size"; 3104 3105 static void 3106 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 3107 uint32_t val) 3108 { 3109 char buf[128]; 3110 3111 /* 3112 * ndi_prop_update_int() is used because it is desirable for 3113 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 3114 */ 3115 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 3116 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 3117 } 3118 3119 /* 3120 * Intel-style cache/tlb description 3121 * 3122 * Standard cpuid level 2 gives a randomly ordered 3123 * selection of tags that index into a table that describes 3124 * cache and tlb properties. 3125 */ 3126 3127 static const char l1_icache_str[] = "l1-icache"; 3128 static const char l1_dcache_str[] = "l1-dcache"; 3129 static const char l2_cache_str[] = "l2-cache"; 3130 static const char l3_cache_str[] = "l3-cache"; 3131 static const char itlb4k_str[] = "itlb-4K"; 3132 static const char dtlb4k_str[] = "dtlb-4K"; 3133 static const char itlb2M_str[] = "itlb-2M"; 3134 static const char itlb4M_str[] = "itlb-4M"; 3135 static const char dtlb4M_str[] = "dtlb-4M"; 3136 static const char dtlb24_str[] = "dtlb0-2M-4M"; 3137 static const char itlb424_str[] = "itlb-4K-2M-4M"; 3138 static const char itlb24_str[] = "itlb-2M-4M"; 3139 static const char dtlb44_str[] = "dtlb-4K-4M"; 3140 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 3141 static const char sl2_cache_str[] = "sectored-l2-cache"; 3142 static const char itrace_str[] = "itrace-cache"; 3143 static const char sl3_cache_str[] = "sectored-l3-cache"; 3144 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 3145 3146 static const struct cachetab { 3147 uint8_t ct_code; 3148 uint8_t ct_assoc; 3149 uint16_t ct_line_size; 3150 size_t ct_size; 3151 const char *ct_label; 3152 } intel_ctab[] = { 3153 /* 3154 * maintain descending order! 3155 * 3156 * Codes ignored - Reason 3157 * ---------------------- 3158 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3159 * f0H/f1H - Currently we do not interpret prefetch size by design 3160 */ 3161 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3162 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3163 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3164 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3165 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3166 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3167 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3168 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3169 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3170 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3171 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3172 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3173 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3174 { 0xc0, 4, 0, 8, dtlb44_str }, 3175 { 0xba, 4, 0, 64, dtlb4k_str }, 3176 { 0xb4, 4, 0, 256, dtlb4k_str }, 3177 { 0xb3, 4, 0, 128, dtlb4k_str }, 3178 { 0xb2, 4, 0, 64, itlb4k_str }, 3179 { 0xb0, 4, 0, 128, itlb4k_str }, 3180 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3181 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3182 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3183 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3184 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3185 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3186 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3187 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3188 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3189 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3190 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3191 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3192 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3193 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3194 { 0x73, 8, 0, 64*1024, itrace_str}, 3195 { 0x72, 8, 0, 32*1024, itrace_str}, 3196 { 0x71, 8, 0, 16*1024, itrace_str}, 3197 { 0x70, 8, 0, 12*1024, itrace_str}, 3198 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3199 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3200 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3201 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3202 { 0x5d, 0, 0, 256, dtlb44_str}, 3203 { 0x5c, 0, 0, 128, dtlb44_str}, 3204 { 0x5b, 0, 0, 64, dtlb44_str}, 3205 { 0x5a, 4, 0, 32, dtlb24_str}, 3206 { 0x59, 0, 0, 16, dtlb4k_str}, 3207 { 0x57, 4, 0, 16, dtlb4k_str}, 3208 { 0x56, 4, 0, 16, dtlb4M_str}, 3209 { 0x55, 0, 0, 7, itlb24_str}, 3210 { 0x52, 0, 0, 256, itlb424_str}, 3211 { 0x51, 0, 0, 128, itlb424_str}, 3212 { 0x50, 0, 0, 64, itlb424_str}, 3213 { 0x4f, 0, 0, 32, itlb4k_str}, 3214 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3215 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3216 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3217 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3218 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3219 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3220 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3221 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3222 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3223 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3224 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3225 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3226 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3227 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3228 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3229 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3230 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3231 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3232 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3233 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3234 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3235 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3236 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3237 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3238 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3239 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3240 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3241 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3242 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3243 { 0x0b, 4, 0, 4, itlb4M_str}, 3244 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3245 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3246 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3247 { 0x05, 4, 0, 32, dtlb4M_str}, 3248 { 0x04, 4, 0, 8, dtlb4M_str}, 3249 { 0x03, 4, 0, 64, dtlb4k_str}, 3250 { 0x02, 4, 0, 2, itlb4M_str}, 3251 { 0x01, 4, 0, 32, itlb4k_str}, 3252 { 0 } 3253 }; 3254 3255 static const struct cachetab cyrix_ctab[] = { 3256 { 0x70, 4, 0, 32, "tlb-4K" }, 3257 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3258 { 0 } 3259 }; 3260 3261 /* 3262 * Search a cache table for a matching entry 3263 */ 3264 static const struct cachetab * 3265 find_cacheent(const struct cachetab *ct, uint_t code) 3266 { 3267 if (code != 0) { 3268 for (; ct->ct_code != 0; ct++) 3269 if (ct->ct_code <= code) 3270 break; 3271 if (ct->ct_code == code) 3272 return (ct); 3273 } 3274 return (NULL); 3275 } 3276 3277 /* 3278 * Populate cachetab entry with L2 or L3 cache-information using 3279 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3280 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3281 * information is found. 3282 */ 3283 static int 3284 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3285 { 3286 uint32_t level, i; 3287 int ret = 0; 3288 3289 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3290 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3291 3292 if (level == 2 || level == 3) { 3293 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3294 ct->ct_line_size = 3295 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3296 ct->ct_size = ct->ct_assoc * 3297 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3298 ct->ct_line_size * 3299 (cpi->cpi_std_4[i]->cp_ecx + 1); 3300 3301 if (level == 2) { 3302 ct->ct_label = l2_cache_str; 3303 } else if (level == 3) { 3304 ct->ct_label = l3_cache_str; 3305 } 3306 ret = 1; 3307 } 3308 } 3309 3310 return (ret); 3311 } 3312 3313 /* 3314 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3315 * The walk is terminated if the walker returns non-zero. 3316 */ 3317 static void 3318 intel_walk_cacheinfo(struct cpuid_info *cpi, 3319 void *arg, int (*func)(void *, const struct cachetab *)) 3320 { 3321 const struct cachetab *ct; 3322 struct cachetab des_49_ct, des_b1_ct; 3323 uint8_t *dp; 3324 int i; 3325 3326 if ((dp = cpi->cpi_cacheinfo) == NULL) 3327 return; 3328 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3329 /* 3330 * For overloaded descriptor 0x49 we use cpuid function 4 3331 * if supported by the current processor, to create 3332 * cache information. 3333 * For overloaded descriptor 0xb1 we use X86_PAE flag 3334 * to disambiguate the cache information. 3335 */ 3336 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3337 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3338 ct = &des_49_ct; 3339 } else if (*dp == 0xb1) { 3340 des_b1_ct.ct_code = 0xb1; 3341 des_b1_ct.ct_assoc = 4; 3342 des_b1_ct.ct_line_size = 0; 3343 if (x86_feature & X86_PAE) { 3344 des_b1_ct.ct_size = 8; 3345 des_b1_ct.ct_label = itlb2M_str; 3346 } else { 3347 des_b1_ct.ct_size = 4; 3348 des_b1_ct.ct_label = itlb4M_str; 3349 } 3350 ct = &des_b1_ct; 3351 } else { 3352 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3353 continue; 3354 } 3355 } 3356 3357 if (func(arg, ct) != 0) { 3358 break; 3359 } 3360 } 3361 } 3362 3363 /* 3364 * (Like the Intel one, except for Cyrix CPUs) 3365 */ 3366 static void 3367 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3368 void *arg, int (*func)(void *, const struct cachetab *)) 3369 { 3370 const struct cachetab *ct; 3371 uint8_t *dp; 3372 int i; 3373 3374 if ((dp = cpi->cpi_cacheinfo) == NULL) 3375 return; 3376 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3377 /* 3378 * Search Cyrix-specific descriptor table first .. 3379 */ 3380 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3381 if (func(arg, ct) != 0) 3382 break; 3383 continue; 3384 } 3385 /* 3386 * .. else fall back to the Intel one 3387 */ 3388 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3389 if (func(arg, ct) != 0) 3390 break; 3391 continue; 3392 } 3393 } 3394 } 3395 3396 /* 3397 * A cacheinfo walker that adds associativity, line-size, and size properties 3398 * to the devinfo node it is passed as an argument. 3399 */ 3400 static int 3401 add_cacheent_props(void *arg, const struct cachetab *ct) 3402 { 3403 dev_info_t *devi = arg; 3404 3405 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3406 if (ct->ct_line_size != 0) 3407 add_cache_prop(devi, ct->ct_label, line_str, 3408 ct->ct_line_size); 3409 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3410 return (0); 3411 } 3412 3413 3414 static const char fully_assoc[] = "fully-associative?"; 3415 3416 /* 3417 * AMD style cache/tlb description 3418 * 3419 * Extended functions 5 and 6 directly describe properties of 3420 * tlbs and various cache levels. 3421 */ 3422 static void 3423 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3424 { 3425 switch (assoc) { 3426 case 0: /* reserved; ignore */ 3427 break; 3428 default: 3429 add_cache_prop(devi, label, assoc_str, assoc); 3430 break; 3431 case 0xff: 3432 add_cache_prop(devi, label, fully_assoc, 1); 3433 break; 3434 } 3435 } 3436 3437 static void 3438 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3439 { 3440 if (size == 0) 3441 return; 3442 add_cache_prop(devi, label, size_str, size); 3443 add_amd_assoc(devi, label, assoc); 3444 } 3445 3446 static void 3447 add_amd_cache(dev_info_t *devi, const char *label, 3448 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3449 { 3450 if (size == 0 || line_size == 0) 3451 return; 3452 add_amd_assoc(devi, label, assoc); 3453 /* 3454 * Most AMD parts have a sectored cache. Multiple cache lines are 3455 * associated with each tag. A sector consists of all cache lines 3456 * associated with a tag. For example, the AMD K6-III has a sector 3457 * size of 2 cache lines per tag. 3458 */ 3459 if (lines_per_tag != 0) 3460 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3461 add_cache_prop(devi, label, line_str, line_size); 3462 add_cache_prop(devi, label, size_str, size * 1024); 3463 } 3464 3465 static void 3466 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3467 { 3468 switch (assoc) { 3469 case 0: /* off */ 3470 break; 3471 case 1: 3472 case 2: 3473 case 4: 3474 add_cache_prop(devi, label, assoc_str, assoc); 3475 break; 3476 case 6: 3477 add_cache_prop(devi, label, assoc_str, 8); 3478 break; 3479 case 8: 3480 add_cache_prop(devi, label, assoc_str, 16); 3481 break; 3482 case 0xf: 3483 add_cache_prop(devi, label, fully_assoc, 1); 3484 break; 3485 default: /* reserved; ignore */ 3486 break; 3487 } 3488 } 3489 3490 static void 3491 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3492 { 3493 if (size == 0 || assoc == 0) 3494 return; 3495 add_amd_l2_assoc(devi, label, assoc); 3496 add_cache_prop(devi, label, size_str, size); 3497 } 3498 3499 static void 3500 add_amd_l2_cache(dev_info_t *devi, const char *label, 3501 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3502 { 3503 if (size == 0 || assoc == 0 || line_size == 0) 3504 return; 3505 add_amd_l2_assoc(devi, label, assoc); 3506 if (lines_per_tag != 0) 3507 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3508 add_cache_prop(devi, label, line_str, line_size); 3509 add_cache_prop(devi, label, size_str, size * 1024); 3510 } 3511 3512 static void 3513 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3514 { 3515 struct cpuid_regs *cp; 3516 3517 if (cpi->cpi_xmaxeax < 0x80000005) 3518 return; 3519 cp = &cpi->cpi_extd[5]; 3520 3521 /* 3522 * 4M/2M L1 TLB configuration 3523 * 3524 * We report the size for 2M pages because AMD uses two 3525 * TLB entries for one 4M page. 3526 */ 3527 add_amd_tlb(devi, "dtlb-2M", 3528 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3529 add_amd_tlb(devi, "itlb-2M", 3530 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3531 3532 /* 3533 * 4K L1 TLB configuration 3534 */ 3535 3536 switch (cpi->cpi_vendor) { 3537 uint_t nentries; 3538 case X86_VENDOR_TM: 3539 if (cpi->cpi_family >= 5) { 3540 /* 3541 * Crusoe processors have 256 TLB entries, but 3542 * cpuid data format constrains them to only 3543 * reporting 255 of them. 3544 */ 3545 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3546 nentries = 256; 3547 /* 3548 * Crusoe processors also have a unified TLB 3549 */ 3550 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3551 nentries); 3552 break; 3553 } 3554 /*FALLTHROUGH*/ 3555 default: 3556 add_amd_tlb(devi, itlb4k_str, 3557 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3558 add_amd_tlb(devi, dtlb4k_str, 3559 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3560 break; 3561 } 3562 3563 /* 3564 * data L1 cache configuration 3565 */ 3566 3567 add_amd_cache(devi, l1_dcache_str, 3568 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3569 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3570 3571 /* 3572 * code L1 cache configuration 3573 */ 3574 3575 add_amd_cache(devi, l1_icache_str, 3576 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3577 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3578 3579 if (cpi->cpi_xmaxeax < 0x80000006) 3580 return; 3581 cp = &cpi->cpi_extd[6]; 3582 3583 /* Check for a unified L2 TLB for large pages */ 3584 3585 if (BITX(cp->cp_eax, 31, 16) == 0) 3586 add_amd_l2_tlb(devi, "l2-tlb-2M", 3587 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3588 else { 3589 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3590 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3591 add_amd_l2_tlb(devi, "l2-itlb-2M", 3592 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3593 } 3594 3595 /* Check for a unified L2 TLB for 4K pages */ 3596 3597 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3598 add_amd_l2_tlb(devi, "l2-tlb-4K", 3599 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3600 } else { 3601 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3602 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3603 add_amd_l2_tlb(devi, "l2-itlb-4K", 3604 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3605 } 3606 3607 add_amd_l2_cache(devi, l2_cache_str, 3608 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3609 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3610 } 3611 3612 /* 3613 * There are two basic ways that the x86 world describes it cache 3614 * and tlb architecture - Intel's way and AMD's way. 3615 * 3616 * Return which flavor of cache architecture we should use 3617 */ 3618 static int 3619 x86_which_cacheinfo(struct cpuid_info *cpi) 3620 { 3621 switch (cpi->cpi_vendor) { 3622 case X86_VENDOR_Intel: 3623 if (cpi->cpi_maxeax >= 2) 3624 return (X86_VENDOR_Intel); 3625 break; 3626 case X86_VENDOR_AMD: 3627 /* 3628 * The K5 model 1 was the first part from AMD that reported 3629 * cache sizes via extended cpuid functions. 3630 */ 3631 if (cpi->cpi_family > 5 || 3632 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3633 return (X86_VENDOR_AMD); 3634 break; 3635 case X86_VENDOR_TM: 3636 if (cpi->cpi_family >= 5) 3637 return (X86_VENDOR_AMD); 3638 /*FALLTHROUGH*/ 3639 default: 3640 /* 3641 * If they have extended CPU data for 0x80000005 3642 * then we assume they have AMD-format cache 3643 * information. 3644 * 3645 * If not, and the vendor happens to be Cyrix, 3646 * then try our-Cyrix specific handler. 3647 * 3648 * If we're not Cyrix, then assume we're using Intel's 3649 * table-driven format instead. 3650 */ 3651 if (cpi->cpi_xmaxeax >= 0x80000005) 3652 return (X86_VENDOR_AMD); 3653 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3654 return (X86_VENDOR_Cyrix); 3655 else if (cpi->cpi_maxeax >= 2) 3656 return (X86_VENDOR_Intel); 3657 break; 3658 } 3659 return (-1); 3660 } 3661 3662 void 3663 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 3664 struct cpuid_info *cpi) 3665 { 3666 dev_info_t *cpu_devi; 3667 int create; 3668 3669 cpu_devi = (dev_info_t *)dip; 3670 3671 /* device_type */ 3672 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3673 "device_type", "cpu"); 3674 3675 /* reg */ 3676 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3677 "reg", cpu_id); 3678 3679 /* cpu-mhz, and clock-frequency */ 3680 if (cpu_freq > 0) { 3681 long long mul; 3682 3683 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3684 "cpu-mhz", cpu_freq); 3685 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3686 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3687 "clock-frequency", (int)mul); 3688 } 3689 3690 if ((x86_feature & X86_CPUID) == 0) { 3691 return; 3692 } 3693 3694 /* vendor-id */ 3695 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3696 "vendor-id", cpi->cpi_vendorstr); 3697 3698 if (cpi->cpi_maxeax == 0) { 3699 return; 3700 } 3701 3702 /* 3703 * family, model, and step 3704 */ 3705 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3706 "family", CPI_FAMILY(cpi)); 3707 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3708 "cpu-model", CPI_MODEL(cpi)); 3709 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3710 "stepping-id", CPI_STEP(cpi)); 3711 3712 /* type */ 3713 switch (cpi->cpi_vendor) { 3714 case X86_VENDOR_Intel: 3715 create = 1; 3716 break; 3717 default: 3718 create = 0; 3719 break; 3720 } 3721 if (create) 3722 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3723 "type", CPI_TYPE(cpi)); 3724 3725 /* ext-family */ 3726 switch (cpi->cpi_vendor) { 3727 case X86_VENDOR_Intel: 3728 case X86_VENDOR_AMD: 3729 create = cpi->cpi_family >= 0xf; 3730 break; 3731 default: 3732 create = 0; 3733 break; 3734 } 3735 if (create) 3736 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3737 "ext-family", CPI_FAMILY_XTD(cpi)); 3738 3739 /* ext-model */ 3740 switch (cpi->cpi_vendor) { 3741 case X86_VENDOR_Intel: 3742 create = IS_EXTENDED_MODEL_INTEL(cpi); 3743 break; 3744 case X86_VENDOR_AMD: 3745 create = CPI_FAMILY(cpi) == 0xf; 3746 break; 3747 default: 3748 create = 0; 3749 break; 3750 } 3751 if (create) 3752 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3753 "ext-model", CPI_MODEL_XTD(cpi)); 3754 3755 /* generation */ 3756 switch (cpi->cpi_vendor) { 3757 case X86_VENDOR_AMD: 3758 /* 3759 * AMD K5 model 1 was the first part to support this 3760 */ 3761 create = cpi->cpi_xmaxeax >= 0x80000001; 3762 break; 3763 default: 3764 create = 0; 3765 break; 3766 } 3767 if (create) 3768 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3769 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3770 3771 /* brand-id */ 3772 switch (cpi->cpi_vendor) { 3773 case X86_VENDOR_Intel: 3774 /* 3775 * brand id first appeared on Pentium III Xeon model 8, 3776 * and Celeron model 8 processors and Opteron 3777 */ 3778 create = cpi->cpi_family > 6 || 3779 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3780 break; 3781 case X86_VENDOR_AMD: 3782 create = cpi->cpi_family >= 0xf; 3783 break; 3784 default: 3785 create = 0; 3786 break; 3787 } 3788 if (create && cpi->cpi_brandid != 0) { 3789 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3790 "brand-id", cpi->cpi_brandid); 3791 } 3792 3793 /* chunks, and apic-id */ 3794 switch (cpi->cpi_vendor) { 3795 /* 3796 * first available on Pentium IV and Opteron (K8) 3797 */ 3798 case X86_VENDOR_Intel: 3799 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3800 break; 3801 case X86_VENDOR_AMD: 3802 create = cpi->cpi_family >= 0xf; 3803 break; 3804 default: 3805 create = 0; 3806 break; 3807 } 3808 if (create) { 3809 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3810 "chunks", CPI_CHUNKS(cpi)); 3811 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3812 "apic-id", cpi->cpi_apicid); 3813 if (cpi->cpi_chipid >= 0) { 3814 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3815 "chip#", cpi->cpi_chipid); 3816 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3817 "clog#", cpi->cpi_clogid); 3818 } 3819 } 3820 3821 /* cpuid-features */ 3822 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3823 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3824 3825 3826 /* cpuid-features-ecx */ 3827 switch (cpi->cpi_vendor) { 3828 case X86_VENDOR_Intel: 3829 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3830 break; 3831 default: 3832 create = 0; 3833 break; 3834 } 3835 if (create) 3836 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3837 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3838 3839 /* ext-cpuid-features */ 3840 switch (cpi->cpi_vendor) { 3841 case X86_VENDOR_Intel: 3842 case X86_VENDOR_AMD: 3843 case X86_VENDOR_Cyrix: 3844 case X86_VENDOR_TM: 3845 case X86_VENDOR_Centaur: 3846 create = cpi->cpi_xmaxeax >= 0x80000001; 3847 break; 3848 default: 3849 create = 0; 3850 break; 3851 } 3852 if (create) { 3853 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3854 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3855 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3856 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3857 } 3858 3859 /* 3860 * Brand String first appeared in Intel Pentium IV, AMD K5 3861 * model 1, and Cyrix GXm. On earlier models we try and 3862 * simulate something similar .. so this string should always 3863 * same -something- about the processor, however lame. 3864 */ 3865 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3866 "brand-string", cpi->cpi_brandstr); 3867 3868 /* 3869 * Finally, cache and tlb information 3870 */ 3871 switch (x86_which_cacheinfo(cpi)) { 3872 case X86_VENDOR_Intel: 3873 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3874 break; 3875 case X86_VENDOR_Cyrix: 3876 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3877 break; 3878 case X86_VENDOR_AMD: 3879 amd_cache_info(cpi, cpu_devi); 3880 break; 3881 default: 3882 break; 3883 } 3884 } 3885 3886 struct l2info { 3887 int *l2i_csz; 3888 int *l2i_lsz; 3889 int *l2i_assoc; 3890 int l2i_ret; 3891 }; 3892 3893 /* 3894 * A cacheinfo walker that fetches the size, line-size and associativity 3895 * of the L2 cache 3896 */ 3897 static int 3898 intel_l2cinfo(void *arg, const struct cachetab *ct) 3899 { 3900 struct l2info *l2i = arg; 3901 int *ip; 3902 3903 if (ct->ct_label != l2_cache_str && 3904 ct->ct_label != sl2_cache_str) 3905 return (0); /* not an L2 -- keep walking */ 3906 3907 if ((ip = l2i->l2i_csz) != NULL) 3908 *ip = ct->ct_size; 3909 if ((ip = l2i->l2i_lsz) != NULL) 3910 *ip = ct->ct_line_size; 3911 if ((ip = l2i->l2i_assoc) != NULL) 3912 *ip = ct->ct_assoc; 3913 l2i->l2i_ret = ct->ct_size; 3914 return (1); /* was an L2 -- terminate walk */ 3915 } 3916 3917 /* 3918 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3919 * 3920 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3921 * value is the associativity, the associativity for the L2 cache and 3922 * tlb is encoded in the following table. The 4 bit L2 value serves as 3923 * an index into the amd_afd[] array to determine the associativity. 3924 * -1 is undefined. 0 is fully associative. 3925 */ 3926 3927 static int amd_afd[] = 3928 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3929 3930 static void 3931 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3932 { 3933 struct cpuid_regs *cp; 3934 uint_t size, assoc; 3935 int i; 3936 int *ip; 3937 3938 if (cpi->cpi_xmaxeax < 0x80000006) 3939 return; 3940 cp = &cpi->cpi_extd[6]; 3941 3942 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3943 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3944 uint_t cachesz = size * 1024; 3945 assoc = amd_afd[i]; 3946 3947 ASSERT(assoc != -1); 3948 3949 if ((ip = l2i->l2i_csz) != NULL) 3950 *ip = cachesz; 3951 if ((ip = l2i->l2i_lsz) != NULL) 3952 *ip = BITX(cp->cp_ecx, 7, 0); 3953 if ((ip = l2i->l2i_assoc) != NULL) 3954 *ip = assoc; 3955 l2i->l2i_ret = cachesz; 3956 } 3957 } 3958 3959 int 3960 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3961 { 3962 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3963 struct l2info __l2info, *l2i = &__l2info; 3964 3965 l2i->l2i_csz = csz; 3966 l2i->l2i_lsz = lsz; 3967 l2i->l2i_assoc = assoc; 3968 l2i->l2i_ret = -1; 3969 3970 switch (x86_which_cacheinfo(cpi)) { 3971 case X86_VENDOR_Intel: 3972 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3973 break; 3974 case X86_VENDOR_Cyrix: 3975 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3976 break; 3977 case X86_VENDOR_AMD: 3978 amd_l2cacheinfo(cpi, l2i); 3979 break; 3980 default: 3981 break; 3982 } 3983 return (l2i->l2i_ret); 3984 } 3985 3986 #if !defined(__xpv) 3987 3988 uint32_t * 3989 cpuid_mwait_alloc(cpu_t *cpu) 3990 { 3991 uint32_t *ret; 3992 size_t mwait_size; 3993 3994 ASSERT(cpuid_checkpass(CPU, 2)); 3995 3996 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3997 if (mwait_size == 0) 3998 return (NULL); 3999 4000 /* 4001 * kmem_alloc() returns cache line size aligned data for mwait_size 4002 * allocations. mwait_size is currently cache line sized. Neither 4003 * of these implementation details are guarantied to be true in the 4004 * future. 4005 * 4006 * First try allocating mwait_size as kmem_alloc() currently returns 4007 * correctly aligned memory. If kmem_alloc() does not return 4008 * mwait_size aligned memory, then use mwait_size ROUNDUP. 4009 * 4010 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 4011 * decide to free this memory. 4012 */ 4013 ret = kmem_zalloc(mwait_size, KM_SLEEP); 4014 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 4015 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4016 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 4017 *ret = MWAIT_RUNNING; 4018 return (ret); 4019 } else { 4020 kmem_free(ret, mwait_size); 4021 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 4022 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4023 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 4024 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 4025 *ret = MWAIT_RUNNING; 4026 return (ret); 4027 } 4028 } 4029 4030 void 4031 cpuid_mwait_free(cpu_t *cpu) 4032 { 4033 if (cpu->cpu_m.mcpu_cpi == NULL) { 4034 return; 4035 } 4036 4037 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 4038 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 4039 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 4040 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 4041 } 4042 4043 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 4044 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 4045 } 4046 4047 void 4048 patch_tsc_read(int flag) 4049 { 4050 size_t cnt; 4051 4052 switch (flag) { 4053 case X86_NO_TSC: 4054 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 4055 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 4056 break; 4057 case X86_HAVE_TSCP: 4058 cnt = &_tscp_end - &_tscp_start; 4059 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 4060 break; 4061 case X86_TSC_MFENCE: 4062 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 4063 (void) memcpy((void *)tsc_read, 4064 (void *)&_tsc_mfence_start, cnt); 4065 break; 4066 case X86_TSC_LFENCE: 4067 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 4068 (void) memcpy((void *)tsc_read, 4069 (void *)&_tsc_lfence_start, cnt); 4070 break; 4071 default: 4072 break; 4073 } 4074 } 4075 4076 int 4077 cpuid_deep_cstates_supported(void) 4078 { 4079 struct cpuid_info *cpi; 4080 struct cpuid_regs regs; 4081 4082 ASSERT(cpuid_checkpass(CPU, 1)); 4083 4084 cpi = CPU->cpu_m.mcpu_cpi; 4085 4086 if (!(x86_feature & X86_CPUID)) 4087 return (0); 4088 4089 switch (cpi->cpi_vendor) { 4090 case X86_VENDOR_Intel: 4091 if (cpi->cpi_xmaxeax < 0x80000007) 4092 return (0); 4093 4094 /* 4095 * TSC run at a constant rate in all ACPI C-states? 4096 */ 4097 regs.cp_eax = 0x80000007; 4098 (void) __cpuid_insn(®s); 4099 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 4100 4101 default: 4102 return (0); 4103 } 4104 } 4105 4106 #endif /* !__xpv */ 4107 4108 void 4109 post_startup_cpu_fixups(void) 4110 { 4111 #ifndef __xpv 4112 /* 4113 * Some AMD processors support C1E state. Entering this state will 4114 * cause the local APIC timer to stop, which we can't deal with at 4115 * this time. 4116 */ 4117 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 4118 on_trap_data_t otd; 4119 uint64_t reg; 4120 4121 if (!on_trap(&otd, OT_DATA_ACCESS)) { 4122 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 4123 /* Disable C1E state if it is enabled by BIOS */ 4124 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 4125 AMD_ACTONCMPHALT_MASK) { 4126 reg &= ~(AMD_ACTONCMPHALT_MASK << 4127 AMD_ACTONCMPHALT_SHIFT); 4128 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 4129 } 4130 } 4131 no_trap(); 4132 } 4133 #endif /* !__xpv */ 4134 } 4135 4136 /* 4137 * Starting with the Westmere processor the local 4138 * APIC timer will continue running in all C-states, 4139 * including the deepest C-states. 4140 */ 4141 int 4142 cpuid_arat_supported(void) 4143 { 4144 struct cpuid_info *cpi; 4145 struct cpuid_regs regs; 4146 4147 ASSERT(cpuid_checkpass(CPU, 1)); 4148 ASSERT(x86_feature & X86_CPUID); 4149 4150 cpi = CPU->cpu_m.mcpu_cpi; 4151 4152 switch (cpi->cpi_vendor) { 4153 case X86_VENDOR_Intel: 4154 /* 4155 * Always-running Local APIC Timer is 4156 * indicated by CPUID.6.EAX[2]. 4157 */ 4158 if (cpi->cpi_maxeax >= 6) { 4159 regs.cp_eax = 6; 4160 (void) cpuid_insn(NULL, ®s); 4161 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4162 } else { 4163 return (0); 4164 } 4165 default: 4166 return (0); 4167 } 4168 } 4169 4170 /* 4171 * Check support for Intel ENERGY_PERF_BIAS feature 4172 */ 4173 int 4174 cpuid_iepb_supported(struct cpu *cp) 4175 { 4176 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi; 4177 struct cpuid_regs regs; 4178 4179 ASSERT(cpuid_checkpass(cp, 1)); 4180 4181 if (!(x86_feature & X86_CPUID) || !(x86_feature & X86_MSR)) { 4182 return (0); 4183 } 4184 4185 /* 4186 * Intel ENERGY_PERF_BIAS MSR is indicated by 4187 * capability bit CPUID.6.ECX.3 4188 */ 4189 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6)) 4190 return (0); 4191 4192 regs.cp_eax = 0x6; 4193 (void) cpuid_insn(NULL, ®s); 4194 return (regs.cp_ecx & CPUID_EPB_SUPPORT); 4195 } 4196 4197 #if defined(__amd64) && !defined(__xpv) 4198 /* 4199 * Patch in versions of bcopy for high performance Intel Nhm processors 4200 * and later... 4201 */ 4202 void 4203 patch_memops(uint_t vendor) 4204 { 4205 size_t cnt, i; 4206 caddr_t to, from; 4207 4208 if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) { 4209 cnt = &bcopy_patch_end - &bcopy_patch_start; 4210 to = &bcopy_ck_size; 4211 from = &bcopy_patch_start; 4212 for (i = 0; i < cnt; i++) { 4213 *to++ = *from++; 4214 } 4215 } 4216 } 4217 #endif /* __amd64 && !__xpv */ 4218 4219 /* 4220 * This function finds the number of bits to represent the number of cores per 4221 * chip and the number of strands per core for the Intel platforms. 4222 * It re-uses the x2APIC cpuid code of the cpuid_pass2(). 4223 */ 4224 void 4225 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits) 4226 { 4227 struct cpuid_regs regs; 4228 struct cpuid_regs *cp = ®s; 4229 4230 if (vendor != X86_VENDOR_Intel) { 4231 return; 4232 } 4233 4234 /* if the cpuid level is 0xB, extended topo is available. */ 4235 cp->cp_eax = 0; 4236 if (__cpuid_insn(cp) >= 0xB) { 4237 4238 cp->cp_eax = 0xB; 4239 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 4240 (void) __cpuid_insn(cp); 4241 4242 /* 4243 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 4244 * indicates that the extended topology enumeration leaf is 4245 * available. 4246 */ 4247 if (cp->cp_ebx) { 4248 uint_t coreid_shift = 0; 4249 uint_t chipid_shift = 0; 4250 uint_t i; 4251 uint_t level; 4252 4253 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 4254 cp->cp_eax = 0xB; 4255 cp->cp_ecx = i; 4256 4257 (void) __cpuid_insn(cp); 4258 level = CPI_CPU_LEVEL_TYPE(cp); 4259 4260 if (level == 1) { 4261 /* 4262 * Thread level processor topology 4263 * Number of bits shift right APIC ID 4264 * to get the coreid. 4265 */ 4266 coreid_shift = BITX(cp->cp_eax, 4, 0); 4267 } else if (level == 2) { 4268 /* 4269 * Core level processor topology 4270 * Number of bits shift right APIC ID 4271 * to get the chipid. 4272 */ 4273 chipid_shift = BITX(cp->cp_eax, 4, 0); 4274 } 4275 } 4276 4277 if (coreid_shift > 0 && chipid_shift > coreid_shift) { 4278 *strand_nbits = coreid_shift; 4279 *core_nbits = chipid_shift - coreid_shift; 4280 } 4281 } 4282 } 4283 } 4284