1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 /* 30 * Portions Copyright 2009 Advanced Micro Devices, Inc. 31 */ 32 33 /* 34 * Various routines to handle identification 35 * and classification of x86 processors. 36 */ 37 38 #include <sys/types.h> 39 #include <sys/archsystm.h> 40 #include <sys/x86_archext.h> 41 #include <sys/kmem.h> 42 #include <sys/systm.h> 43 #include <sys/cmn_err.h> 44 #include <sys/sunddi.h> 45 #include <sys/sunndi.h> 46 #include <sys/cpuvar.h> 47 #include <sys/processor.h> 48 #include <sys/sysmacros.h> 49 #include <sys/pg.h> 50 #include <sys/fp.h> 51 #include <sys/controlregs.h> 52 #include <sys/auxv_386.h> 53 #include <sys/bitmap.h> 54 #include <sys/memnode.h> 55 #include <sys/pci_cfgspace.h> 56 57 #ifdef __xpv 58 #include <sys/hypervisor.h> 59 #else 60 #include <sys/ontrap.h> 61 #endif 62 63 /* 64 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 65 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 66 * them accordingly. For most modern processors, feature detection occurs here 67 * in pass 1. 68 * 69 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 70 * for the boot CPU and does the basic analysis that the early kernel needs. 71 * x86_feature is set based on the return value of cpuid_pass1() of the boot 72 * CPU. 73 * 74 * Pass 1 includes: 75 * 76 * o Determining vendor/model/family/stepping and setting x86_type and 77 * x86_vendor accordingly. 78 * o Processing the feature flags returned by the cpuid instruction while 79 * applying any workarounds or tricks for the specific processor. 80 * o Mapping the feature flags into Solaris feature bits (X86_*). 81 * o Processing extended feature flags if supported by the processor, 82 * again while applying specific processor knowledge. 83 * o Determining the CMT characteristics of the system. 84 * 85 * Pass 1 is done on non-boot CPUs during their initialization and the results 86 * are used only as a meager attempt at ensuring that all processors within the 87 * system support the same features. 88 * 89 * Pass 2 of cpuid feature analysis happens just at the beginning 90 * of startup(). It just copies in and corrects the remainder 91 * of the cpuid data we depend on: standard cpuid functions that we didn't 92 * need for pass1 feature analysis, and extended cpuid functions beyond the 93 * simple feature processing done in pass1. 94 * 95 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 96 * particular kernel memory allocation has been made available. It creates a 97 * readable brand string based on the data collected in the first two passes. 98 * 99 * Pass 4 of cpuid analysis is invoked after post_startup() when all 100 * the support infrastructure for various hardware features has been 101 * initialized. It determines which processor features will be reported 102 * to userland via the aux vector. 103 * 104 * All passes are executed on all CPUs, but only the boot CPU determines what 105 * features the kernel will use. 106 * 107 * Much of the worst junk in this file is for the support of processors 108 * that didn't really implement the cpuid instruction properly. 109 * 110 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 111 * the pass numbers. Accordingly, changes to the pass code may require changes 112 * to the accessor code. 113 */ 114 115 uint_t x86_feature = 0; 116 uint_t x86_vendor = X86_VENDOR_IntelClone; 117 uint_t x86_type = X86_TYPE_OTHER; 118 uint_t x86_clflush_size = 0; 119 120 uint_t pentiumpro_bug4046376; 121 uint_t pentiumpro_bug4064495; 122 123 uint_t enable486; 124 /* 125 * This is set to platform type Solaris is running on. 126 */ 127 static int platform_type = -1; 128 129 #if !defined(__xpv) 130 /* 131 * Variable to patch if hypervisor platform detection needs to be 132 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 133 */ 134 int enable_platform_detection = 1; 135 #endif 136 137 /* 138 * monitor/mwait info. 139 * 140 * size_actual and buf_actual are the real address and size allocated to get 141 * proper mwait_buf alignement. buf_actual and size_actual should be passed 142 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 143 * processor cache-line alignment, but this is not guarantied in the furture. 144 */ 145 struct mwait_info { 146 size_t mon_min; /* min size to avoid missed wakeups */ 147 size_t mon_max; /* size to avoid false wakeups */ 148 size_t size_actual; /* size actually allocated */ 149 void *buf_actual; /* memory actually allocated */ 150 uint32_t support; /* processor support of monitor/mwait */ 151 }; 152 153 /* 154 * These constants determine how many of the elements of the 155 * cpuid we cache in the cpuid_info data structure; the 156 * remaining elements are accessible via the cpuid instruction. 157 */ 158 159 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 160 #define NMAX_CPI_EXTD 0x1c /* eax = 0x80000000 .. 0x8000001b */ 161 162 /* 163 * Some terminology needs to be explained: 164 * - Socket: Something that can be plugged into a motherboard. 165 * - Package: Same as socket 166 * - Chip: Same as socket. Note that AMD's documentation uses term "chip" 167 * differently: there, chip is the same as processor node (below) 168 * - Processor node: Some AMD processors have more than one 169 * "subprocessor" embedded in a package. These subprocessors (nodes) 170 * are fully-functional processors themselves with cores, caches, 171 * memory controllers, PCI configuration spaces. They are connected 172 * inside the package with Hypertransport links. On single-node 173 * processors, processor node is equivalent to chip/socket/package. 174 */ 175 176 struct cpuid_info { 177 uint_t cpi_pass; /* last pass completed */ 178 /* 179 * standard function information 180 */ 181 uint_t cpi_maxeax; /* fn 0: %eax */ 182 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 183 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 184 185 uint_t cpi_family; /* fn 1: extended family */ 186 uint_t cpi_model; /* fn 1: extended model */ 187 uint_t cpi_step; /* fn 1: stepping */ 188 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */ 189 /* AMD: package/socket # */ 190 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 191 int cpi_clogid; /* fn 1: %ebx: thread # */ 192 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 193 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 194 uint_t cpi_ncache; /* fn 2: number of elements */ 195 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 196 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 197 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 198 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 199 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 200 /* 201 * extended function information 202 */ 203 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 204 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 205 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 206 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 207 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */ 208 209 id_t cpi_coreid; /* same coreid => strands share core */ 210 int cpi_pkgcoreid; /* core number within single package */ 211 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 212 /* Intel: fn 4: %eax[31-26] */ 213 /* 214 * supported feature information 215 */ 216 uint32_t cpi_support[5]; 217 #define STD_EDX_FEATURES 0 218 #define AMD_EDX_FEATURES 1 219 #define TM_EDX_FEATURES 2 220 #define STD_ECX_FEATURES 3 221 #define AMD_ECX_FEATURES 4 222 /* 223 * Synthesized information, where known. 224 */ 225 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 226 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 227 uint32_t cpi_socket; /* Chip package/socket type */ 228 229 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 230 uint32_t cpi_apicid; 231 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ 232 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ 233 /* Intel: 1 */ 234 }; 235 236 237 static struct cpuid_info cpuid_info0; 238 239 /* 240 * These bit fields are defined by the Intel Application Note AP-485 241 * "Intel Processor Identification and the CPUID Instruction" 242 */ 243 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 244 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 245 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 246 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 247 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 248 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 249 250 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 251 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 252 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 253 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 254 255 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 256 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 257 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 258 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 259 260 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 261 #define CPI_XMAXEAX_MAX 0x80000100 262 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 263 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 264 265 /* 266 * Function 4 (Deterministic Cache Parameters) macros 267 * Defined by Intel Application Note AP-485 268 */ 269 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 270 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 271 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 272 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 273 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 274 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 275 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 276 277 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 278 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 279 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 280 281 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 282 283 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 284 285 286 /* 287 * A couple of shorthand macros to identify "later" P6-family chips 288 * like the Pentium M and Core. First, the "older" P6-based stuff 289 * (loosely defined as "pre-Pentium-4"): 290 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 291 */ 292 293 #define IS_LEGACY_P6(cpi) ( \ 294 cpi->cpi_family == 6 && \ 295 (cpi->cpi_model == 1 || \ 296 cpi->cpi_model == 3 || \ 297 cpi->cpi_model == 5 || \ 298 cpi->cpi_model == 6 || \ 299 cpi->cpi_model == 7 || \ 300 cpi->cpi_model == 8 || \ 301 cpi->cpi_model == 0xA || \ 302 cpi->cpi_model == 0xB) \ 303 ) 304 305 /* A "new F6" is everything with family 6 that's not the above */ 306 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 307 308 /* Extended family/model support */ 309 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 310 cpi->cpi_family >= 0xf) 311 312 /* 313 * Info for monitor/mwait idle loop. 314 * 315 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 316 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 317 * 2006. 318 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 319 * Documentation Updates" #33633, Rev 2.05, December 2006. 320 */ 321 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 322 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 323 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 324 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 325 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 326 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 327 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 328 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 329 /* 330 * Number of sub-cstates for a given c-state. 331 */ 332 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 333 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 334 335 /* 336 * Functions we consune from cpuid_subr.c; don't publish these in a header 337 * file to try and keep people using the expected cpuid_* interfaces. 338 */ 339 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 340 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 341 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 342 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 343 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 344 345 /* 346 * Apply up various platform-dependent restrictions where the 347 * underlying platform restrictions mean the CPU can be marked 348 * as less capable than its cpuid instruction would imply. 349 */ 350 #if defined(__xpv) 351 static void 352 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 353 { 354 switch (eax) { 355 case 1: { 356 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 357 0 : CPUID_INTC_EDX_MCA; 358 cp->cp_edx &= 359 ~(mcamask | 360 CPUID_INTC_EDX_PSE | 361 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 362 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 363 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 364 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 365 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 366 break; 367 } 368 369 case 0x80000001: 370 cp->cp_edx &= 371 ~(CPUID_AMD_EDX_PSE | 372 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 373 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 374 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 375 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 376 CPUID_AMD_EDX_TSCP); 377 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 378 break; 379 default: 380 break; 381 } 382 383 switch (vendor) { 384 case X86_VENDOR_Intel: 385 switch (eax) { 386 case 4: 387 /* 388 * Zero out the (ncores-per-chip - 1) field 389 */ 390 cp->cp_eax &= 0x03fffffff; 391 break; 392 default: 393 break; 394 } 395 break; 396 case X86_VENDOR_AMD: 397 switch (eax) { 398 399 case 0x80000001: 400 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 401 break; 402 403 case 0x80000008: 404 /* 405 * Zero out the (ncores-per-chip - 1) field 406 */ 407 cp->cp_ecx &= 0xffffff00; 408 break; 409 default: 410 break; 411 } 412 break; 413 default: 414 break; 415 } 416 } 417 #else 418 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 419 #endif 420 421 /* 422 * Some undocumented ways of patching the results of the cpuid 423 * instruction to permit running Solaris 10 on future cpus that 424 * we don't currently support. Could be set to non-zero values 425 * via settings in eeprom. 426 */ 427 428 uint32_t cpuid_feature_ecx_include; 429 uint32_t cpuid_feature_ecx_exclude; 430 uint32_t cpuid_feature_edx_include; 431 uint32_t cpuid_feature_edx_exclude; 432 433 /* 434 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs. 435 */ 436 void 437 cpuid_alloc_space(cpu_t *cpu) 438 { 439 /* 440 * By convention, cpu0 is the boot cpu, which is set up 441 * before memory allocation is available. All other cpus get 442 * their cpuid_info struct allocated here. 443 */ 444 ASSERT(cpu->cpu_id != 0); 445 ASSERT(cpu->cpu_m.mcpu_cpi == NULL); 446 cpu->cpu_m.mcpu_cpi = 447 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 448 } 449 450 void 451 cpuid_free_space(cpu_t *cpu) 452 { 453 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 454 int i; 455 456 ASSERT(cpi != NULL); 457 ASSERT(cpi != &cpuid_info0); 458 459 /* 460 * Free up any function 4 related dynamic storage 461 */ 462 for (i = 1; i < cpi->cpi_std_4_size; i++) 463 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 464 if (cpi->cpi_std_4_size > 0) 465 kmem_free(cpi->cpi_std_4, 466 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 467 468 kmem_free(cpi, sizeof (*cpi)); 469 cpu->cpu_m.mcpu_cpi = NULL; 470 } 471 472 #if !defined(__xpv) 473 474 static void 475 determine_platform() 476 { 477 struct cpuid_regs cp; 478 char *xen_str; 479 uint32_t xen_signature[4]; 480 481 platform_type = HW_NATIVE; 482 483 if (!enable_platform_detection) 484 return; 485 486 /* 487 * In a fully virtualized domain, Xen's pseudo-cpuid function 488 * 0x40000000 returns a string representing the Xen signature in 489 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 490 * function. 491 */ 492 cp.cp_eax = 0x40000000; 493 (void) __cpuid_insn(&cp); 494 xen_signature[0] = cp.cp_ebx; 495 xen_signature[1] = cp.cp_ecx; 496 xen_signature[2] = cp.cp_edx; 497 xen_signature[3] = 0; 498 xen_str = (char *)xen_signature; 499 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) { 500 platform_type = HW_XEN_HVM; 501 } else if (vmware_platform()) { /* running under vmware hypervisor? */ 502 platform_type = HW_VMWARE; 503 } 504 } 505 506 int 507 get_hwenv(void) 508 { 509 if (platform_type == -1) 510 determine_platform(); 511 512 return (platform_type); 513 } 514 515 int 516 is_controldom(void) 517 { 518 return (0); 519 } 520 521 #else 522 523 int 524 get_hwenv(void) 525 { 526 return (HW_XEN_PV); 527 } 528 529 int 530 is_controldom(void) 531 { 532 return (DOMAIN_IS_INITDOMAIN(xen_info)); 533 } 534 535 #endif /* __xpv */ 536 537 static void 538 cpuid_intel_getids(cpu_t *cpu, uint_t feature) 539 { 540 uint_t i; 541 uint_t chipid_shift = 0; 542 uint_t coreid_shift = 0; 543 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 544 545 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 546 chipid_shift++; 547 548 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift; 549 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1); 550 551 if (feature & X86_CMP) { 552 /* 553 * Multi-core (and possibly multi-threaded) 554 * processors. 555 */ 556 uint_t ncpu_per_core; 557 if (cpi->cpi_ncore_per_chip == 1) 558 ncpu_per_core = cpi->cpi_ncpu_per_chip; 559 else if (cpi->cpi_ncore_per_chip > 1) 560 ncpu_per_core = cpi->cpi_ncpu_per_chip / 561 cpi->cpi_ncore_per_chip; 562 /* 563 * 8bit APIC IDs on dual core Pentiums 564 * look like this: 565 * 566 * +-----------------------+------+------+ 567 * | Physical Package ID | MC | HT | 568 * +-----------------------+------+------+ 569 * <------- chipid --------> 570 * <------- coreid ---------------> 571 * <--- clogid --> 572 * <------> 573 * pkgcoreid 574 * 575 * Where the number of bits necessary to 576 * represent MC and HT fields together equals 577 * to the minimum number of bits necessary to 578 * store the value of cpi->cpi_ncpu_per_chip. 579 * Of those bits, the MC part uses the number 580 * of bits necessary to store the value of 581 * cpi->cpi_ncore_per_chip. 582 */ 583 for (i = 1; i < ncpu_per_core; i <<= 1) 584 coreid_shift++; 585 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift; 586 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 587 } else if (feature & X86_HTT) { 588 /* 589 * Single-core multi-threaded processors. 590 */ 591 cpi->cpi_coreid = cpi->cpi_chipid; 592 cpi->cpi_pkgcoreid = 0; 593 } 594 cpi->cpi_procnodeid = cpi->cpi_chipid; 595 } 596 597 static void 598 cpuid_amd_getids(cpu_t *cpu) 599 { 600 int i, first_half, coreidsz; 601 uint32_t nb_caps_reg; 602 uint_t node2_1; 603 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 604 605 /* 606 * AMD CMP chips currently have a single thread per core. 607 * 608 * Since no two cpus share a core we must assign a distinct coreid 609 * per cpu, and we do this by using the cpu_id. This scheme does not, 610 * however, guarantee that sibling cores of a chip will have sequential 611 * coreids starting at a multiple of the number of cores per chip - 612 * that is usually the case, but if the ACPI MADT table is presented 613 * in a different order then we need to perform a few more gymnastics 614 * for the pkgcoreid. 615 * 616 * All processors in the system have the same number of enabled 617 * cores. Cores within a processor are always numbered sequentially 618 * from 0 regardless of how many or which are disabled, and there 619 * is no way for operating system to discover the real core id when some 620 * are disabled. 621 */ 622 623 cpi->cpi_coreid = cpu->cpu_id; 624 625 if (cpi->cpi_xmaxeax >= 0x80000008) { 626 627 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 628 629 /* 630 * In AMD parlance chip is really a node while Solaris 631 * sees chip as equivalent to socket/package. 632 */ 633 cpi->cpi_ncore_per_chip = 634 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 635 if (coreidsz == 0) { 636 /* Use legacy method */ 637 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1) 638 coreidsz++; 639 if (coreidsz == 0) 640 coreidsz = 1; 641 } 642 } else { 643 /* Assume single-core part */ 644 cpi->cpi_ncore_per_chip = 1; 645 } 646 647 cpi->cpi_clogid = cpi->cpi_pkgcoreid = 648 cpi->cpi_apicid & ((1<<coreidsz) - 1); 649 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip; 650 651 /* Get nodeID */ 652 if (cpi->cpi_family == 0xf) { 653 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 654 cpi->cpi_chipid = cpi->cpi_procnodeid; 655 } else if (cpi->cpi_family == 0x10) { 656 /* 657 * See if we are a multi-node processor. 658 * All processors in the system have the same number of nodes 659 */ 660 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8); 661 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) { 662 /* Single-node */ 663 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, 664 coreidsz); 665 cpi->cpi_chipid = cpi->cpi_procnodeid; 666 } else { 667 668 /* 669 * Multi-node revision D (2 nodes per package 670 * are supported) 671 */ 672 cpi->cpi_procnodes_per_pkg = 2; 673 674 first_half = (cpi->cpi_pkgcoreid <= 675 (cpi->cpi_ncore_per_chip/2 - 1)); 676 677 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { 678 /* We are BSP */ 679 cpi->cpi_procnodeid = (first_half ? 0 : 1); 680 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; 681 } else { 682 683 /* We are AP */ 684 /* NodeId[2:1] bits to use for reading F3xe8 */ 685 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1; 686 687 nb_caps_reg = 688 pci_getl_func(0, 24 + node2_1, 3, 0xe8); 689 690 /* 691 * Check IntNodeNum bit (31:30, but bit 31 is 692 * always 0 on dual-node processors) 693 */ 694 if (BITX(nb_caps_reg, 30, 30) == 0) 695 cpi->cpi_procnodeid = node2_1 + 696 !first_half; 697 else 698 cpi->cpi_procnodeid = node2_1 + 699 first_half; 700 701 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; 702 } 703 } 704 } else if (cpi->cpi_family >= 0x11) { 705 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 706 cpi->cpi_chipid = cpi->cpi_procnodeid; 707 } else { 708 cpi->cpi_procnodeid = 0; 709 cpi->cpi_chipid = cpi->cpi_procnodeid; 710 } 711 } 712 713 uint_t 714 cpuid_pass1(cpu_t *cpu) 715 { 716 uint32_t mask_ecx, mask_edx; 717 uint_t feature = X86_CPUID; 718 struct cpuid_info *cpi; 719 struct cpuid_regs *cp; 720 int xcpuid; 721 #if !defined(__xpv) 722 extern int idle_cpu_prefer_mwait; 723 #endif 724 725 726 #if !defined(__xpv) 727 determine_platform(); 728 #endif 729 /* 730 * Space statically allocated for BSP, ensure pointer is set 731 */ 732 if (cpu->cpu_id == 0 && cpu->cpu_m.mcpu_cpi == NULL) 733 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 734 cpi = cpu->cpu_m.mcpu_cpi; 735 ASSERT(cpi != NULL); 736 cp = &cpi->cpi_std[0]; 737 cp->cp_eax = 0; 738 cpi->cpi_maxeax = __cpuid_insn(cp); 739 { 740 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 741 *iptr++ = cp->cp_ebx; 742 *iptr++ = cp->cp_edx; 743 *iptr++ = cp->cp_ecx; 744 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 745 } 746 747 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 748 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 749 750 /* 751 * Limit the range in case of weird hardware 752 */ 753 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 754 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 755 if (cpi->cpi_maxeax < 1) 756 goto pass1_done; 757 758 cp = &cpi->cpi_std[1]; 759 cp->cp_eax = 1; 760 (void) __cpuid_insn(cp); 761 762 /* 763 * Extract identifying constants for easy access. 764 */ 765 cpi->cpi_model = CPI_MODEL(cpi); 766 cpi->cpi_family = CPI_FAMILY(cpi); 767 768 if (cpi->cpi_family == 0xf) 769 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 770 771 /* 772 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 773 * Intel, and presumably everyone else, uses model == 0xf, as 774 * one would expect (max value means possible overflow). Sigh. 775 */ 776 777 switch (cpi->cpi_vendor) { 778 case X86_VENDOR_Intel: 779 if (IS_EXTENDED_MODEL_INTEL(cpi)) 780 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 781 break; 782 case X86_VENDOR_AMD: 783 if (CPI_FAMILY(cpi) == 0xf) 784 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 785 break; 786 default: 787 if (cpi->cpi_model == 0xf) 788 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 789 break; 790 } 791 792 cpi->cpi_step = CPI_STEP(cpi); 793 cpi->cpi_brandid = CPI_BRANDID(cpi); 794 795 /* 796 * *default* assumptions: 797 * - believe %edx feature word 798 * - ignore %ecx feature word 799 * - 32-bit virtual and physical addressing 800 */ 801 mask_edx = 0xffffffff; 802 mask_ecx = 0; 803 804 cpi->cpi_pabits = cpi->cpi_vabits = 32; 805 806 switch (cpi->cpi_vendor) { 807 case X86_VENDOR_Intel: 808 if (cpi->cpi_family == 5) 809 x86_type = X86_TYPE_P5; 810 else if (IS_LEGACY_P6(cpi)) { 811 x86_type = X86_TYPE_P6; 812 pentiumpro_bug4046376 = 1; 813 pentiumpro_bug4064495 = 1; 814 /* 815 * Clear the SEP bit when it was set erroneously 816 */ 817 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 818 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 819 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 820 x86_type = X86_TYPE_P4; 821 /* 822 * We don't currently depend on any of the %ecx 823 * features until Prescott, so we'll only check 824 * this from P4 onwards. We might want to revisit 825 * that idea later. 826 */ 827 mask_ecx = 0xffffffff; 828 } else if (cpi->cpi_family > 0xf) 829 mask_ecx = 0xffffffff; 830 /* 831 * We don't support MONITOR/MWAIT if leaf 5 is not available 832 * to obtain the monitor linesize. 833 */ 834 if (cpi->cpi_maxeax < 5) 835 mask_ecx &= ~CPUID_INTC_ECX_MON; 836 break; 837 case X86_VENDOR_IntelClone: 838 default: 839 break; 840 case X86_VENDOR_AMD: 841 #if defined(OPTERON_ERRATUM_108) 842 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 843 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 844 cpi->cpi_model = 0xc; 845 } else 846 #endif 847 if (cpi->cpi_family == 5) { 848 /* 849 * AMD K5 and K6 850 * 851 * These CPUs have an incomplete implementation 852 * of MCA/MCE which we mask away. 853 */ 854 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 855 856 /* 857 * Model 0 uses the wrong (APIC) bit 858 * to indicate PGE. Fix it here. 859 */ 860 if (cpi->cpi_model == 0) { 861 if (cp->cp_edx & 0x200) { 862 cp->cp_edx &= ~0x200; 863 cp->cp_edx |= CPUID_INTC_EDX_PGE; 864 } 865 } 866 867 /* 868 * Early models had problems w/ MMX; disable. 869 */ 870 if (cpi->cpi_model < 6) 871 mask_edx &= ~CPUID_INTC_EDX_MMX; 872 } 873 874 /* 875 * For newer families, SSE3 and CX16, at least, are valid; 876 * enable all 877 */ 878 if (cpi->cpi_family >= 0xf) 879 mask_ecx = 0xffffffff; 880 /* 881 * We don't support MONITOR/MWAIT if leaf 5 is not available 882 * to obtain the monitor linesize. 883 */ 884 if (cpi->cpi_maxeax < 5) 885 mask_ecx &= ~CPUID_INTC_ECX_MON; 886 887 #if !defined(__xpv) 888 /* 889 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 890 * processors. AMD does not intend MWAIT to be used in the cpu 891 * idle loop on current and future processors. 10h and future 892 * AMD processors use more power in MWAIT than HLT. 893 * Pre-family-10h Opterons do not have the MWAIT instruction. 894 */ 895 idle_cpu_prefer_mwait = 0; 896 #endif 897 898 break; 899 case X86_VENDOR_TM: 900 /* 901 * workaround the NT workaround in CMS 4.1 902 */ 903 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 904 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 905 cp->cp_edx |= CPUID_INTC_EDX_CX8; 906 break; 907 case X86_VENDOR_Centaur: 908 /* 909 * workaround the NT workarounds again 910 */ 911 if (cpi->cpi_family == 6) 912 cp->cp_edx |= CPUID_INTC_EDX_CX8; 913 break; 914 case X86_VENDOR_Cyrix: 915 /* 916 * We rely heavily on the probing in locore 917 * to actually figure out what parts, if any, 918 * of the Cyrix cpuid instruction to believe. 919 */ 920 switch (x86_type) { 921 case X86_TYPE_CYRIX_486: 922 mask_edx = 0; 923 break; 924 case X86_TYPE_CYRIX_6x86: 925 mask_edx = 0; 926 break; 927 case X86_TYPE_CYRIX_6x86L: 928 mask_edx = 929 CPUID_INTC_EDX_DE | 930 CPUID_INTC_EDX_CX8; 931 break; 932 case X86_TYPE_CYRIX_6x86MX: 933 mask_edx = 934 CPUID_INTC_EDX_DE | 935 CPUID_INTC_EDX_MSR | 936 CPUID_INTC_EDX_CX8 | 937 CPUID_INTC_EDX_PGE | 938 CPUID_INTC_EDX_CMOV | 939 CPUID_INTC_EDX_MMX; 940 break; 941 case X86_TYPE_CYRIX_GXm: 942 mask_edx = 943 CPUID_INTC_EDX_MSR | 944 CPUID_INTC_EDX_CX8 | 945 CPUID_INTC_EDX_CMOV | 946 CPUID_INTC_EDX_MMX; 947 break; 948 case X86_TYPE_CYRIX_MediaGX: 949 break; 950 case X86_TYPE_CYRIX_MII: 951 case X86_TYPE_VIA_CYRIX_III: 952 mask_edx = 953 CPUID_INTC_EDX_DE | 954 CPUID_INTC_EDX_TSC | 955 CPUID_INTC_EDX_MSR | 956 CPUID_INTC_EDX_CX8 | 957 CPUID_INTC_EDX_PGE | 958 CPUID_INTC_EDX_CMOV | 959 CPUID_INTC_EDX_MMX; 960 break; 961 default: 962 break; 963 } 964 break; 965 } 966 967 #if defined(__xpv) 968 /* 969 * Do not support MONITOR/MWAIT under a hypervisor 970 */ 971 mask_ecx &= ~CPUID_INTC_ECX_MON; 972 #endif /* __xpv */ 973 974 /* 975 * Now we've figured out the masks that determine 976 * which bits we choose to believe, apply the masks 977 * to the feature words, then map the kernel's view 978 * of these feature words into its feature word. 979 */ 980 cp->cp_edx &= mask_edx; 981 cp->cp_ecx &= mask_ecx; 982 983 /* 984 * apply any platform restrictions (we don't call this 985 * immediately after __cpuid_insn here, because we need the 986 * workarounds applied above first) 987 */ 988 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 989 990 /* 991 * fold in overrides from the "eeprom" mechanism 992 */ 993 cp->cp_edx |= cpuid_feature_edx_include; 994 cp->cp_edx &= ~cpuid_feature_edx_exclude; 995 996 cp->cp_ecx |= cpuid_feature_ecx_include; 997 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 998 999 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 1000 feature |= X86_LARGEPAGE; 1001 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 1002 feature |= X86_TSC; 1003 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 1004 feature |= X86_MSR; 1005 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 1006 feature |= X86_MTRR; 1007 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 1008 feature |= X86_PGE; 1009 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 1010 feature |= X86_CMOV; 1011 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 1012 feature |= X86_MMX; 1013 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 1014 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 1015 feature |= X86_MCA; 1016 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 1017 feature |= X86_PAE; 1018 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 1019 feature |= X86_CX8; 1020 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 1021 feature |= X86_CX16; 1022 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 1023 feature |= X86_PAT; 1024 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 1025 feature |= X86_SEP; 1026 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 1027 /* 1028 * In our implementation, fxsave/fxrstor 1029 * are prerequisites before we'll even 1030 * try and do SSE things. 1031 */ 1032 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 1033 feature |= X86_SSE; 1034 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 1035 feature |= X86_SSE2; 1036 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 1037 feature |= X86_SSE3; 1038 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1039 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 1040 feature |= X86_SSSE3; 1041 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 1042 feature |= X86_SSE4_1; 1043 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 1044 feature |= X86_SSE4_2; 1045 if (cp->cp_ecx & CPUID_INTC_ECX_AES) 1046 feature |= X86_AES; 1047 } 1048 } 1049 if (cp->cp_edx & CPUID_INTC_EDX_DE) 1050 feature |= X86_DE; 1051 #if !defined(__xpv) 1052 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 1053 1054 /* 1055 * We require the CLFLUSH instruction for erratum workaround 1056 * to use MONITOR/MWAIT. 1057 */ 1058 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1059 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 1060 feature |= X86_MWAIT; 1061 } else { 1062 extern int idle_cpu_assert_cflush_monitor; 1063 1064 /* 1065 * All processors we are aware of which have 1066 * MONITOR/MWAIT also have CLFLUSH. 1067 */ 1068 if (idle_cpu_assert_cflush_monitor) { 1069 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 1070 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 1071 } 1072 } 1073 } 1074 #endif /* __xpv */ 1075 1076 /* 1077 * Only need it first time, rest of the cpus would follow suite. 1078 * we only capture this for the bootcpu. 1079 */ 1080 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1081 feature |= X86_CLFSH; 1082 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 1083 } 1084 1085 if (feature & X86_PAE) 1086 cpi->cpi_pabits = 36; 1087 1088 /* 1089 * Hyperthreading configuration is slightly tricky on Intel 1090 * and pure clones, and even trickier on AMD. 1091 * 1092 * (AMD chose to set the HTT bit on their CMP processors, 1093 * even though they're not actually hyperthreaded. Thus it 1094 * takes a bit more work to figure out what's really going 1095 * on ... see the handling of the CMP_LGCY bit below) 1096 */ 1097 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 1098 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 1099 if (cpi->cpi_ncpu_per_chip > 1) 1100 feature |= X86_HTT; 1101 } else { 1102 cpi->cpi_ncpu_per_chip = 1; 1103 } 1104 1105 /* 1106 * Work on the "extended" feature information, doing 1107 * some basic initialization for cpuid_pass2() 1108 */ 1109 xcpuid = 0; 1110 switch (cpi->cpi_vendor) { 1111 case X86_VENDOR_Intel: 1112 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 1113 xcpuid++; 1114 break; 1115 case X86_VENDOR_AMD: 1116 if (cpi->cpi_family > 5 || 1117 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 1118 xcpuid++; 1119 break; 1120 case X86_VENDOR_Cyrix: 1121 /* 1122 * Only these Cyrix CPUs are -known- to support 1123 * extended cpuid operations. 1124 */ 1125 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1126 x86_type == X86_TYPE_CYRIX_GXm) 1127 xcpuid++; 1128 break; 1129 case X86_VENDOR_Centaur: 1130 case X86_VENDOR_TM: 1131 default: 1132 xcpuid++; 1133 break; 1134 } 1135 1136 if (xcpuid) { 1137 cp = &cpi->cpi_extd[0]; 1138 cp->cp_eax = 0x80000000; 1139 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1140 } 1141 1142 if (cpi->cpi_xmaxeax & 0x80000000) { 1143 1144 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1145 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1146 1147 switch (cpi->cpi_vendor) { 1148 case X86_VENDOR_Intel: 1149 case X86_VENDOR_AMD: 1150 if (cpi->cpi_xmaxeax < 0x80000001) 1151 break; 1152 cp = &cpi->cpi_extd[1]; 1153 cp->cp_eax = 0x80000001; 1154 (void) __cpuid_insn(cp); 1155 1156 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1157 cpi->cpi_family == 5 && 1158 cpi->cpi_model == 6 && 1159 cpi->cpi_step == 6) { 1160 /* 1161 * K6 model 6 uses bit 10 to indicate SYSC 1162 * Later models use bit 11. Fix it here. 1163 */ 1164 if (cp->cp_edx & 0x400) { 1165 cp->cp_edx &= ~0x400; 1166 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1167 } 1168 } 1169 1170 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1171 1172 /* 1173 * Compute the additions to the kernel's feature word. 1174 */ 1175 if (cp->cp_edx & CPUID_AMD_EDX_NX) 1176 feature |= X86_NX; 1177 1178 /* 1179 * Regardless whether or not we boot 64-bit, 1180 * we should have a way to identify whether 1181 * the CPU is capable of running 64-bit. 1182 */ 1183 if (cp->cp_edx & CPUID_AMD_EDX_LM) 1184 feature |= X86_64; 1185 1186 #if defined(__amd64) 1187 /* 1 GB large page - enable only for 64 bit kernel */ 1188 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 1189 feature |= X86_1GPG; 1190 #endif 1191 1192 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1193 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1194 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 1195 feature |= X86_SSE4A; 1196 1197 /* 1198 * If both the HTT and CMP_LGCY bits are set, 1199 * then we're not actually HyperThreaded. Read 1200 * "AMD CPUID Specification" for more details. 1201 */ 1202 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1203 (feature & X86_HTT) && 1204 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1205 feature &= ~X86_HTT; 1206 feature |= X86_CMP; 1207 } 1208 #if defined(__amd64) 1209 /* 1210 * It's really tricky to support syscall/sysret in 1211 * the i386 kernel; we rely on sysenter/sysexit 1212 * instead. In the amd64 kernel, things are -way- 1213 * better. 1214 */ 1215 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 1216 feature |= X86_ASYSC; 1217 1218 /* 1219 * While we're thinking about system calls, note 1220 * that AMD processors don't support sysenter 1221 * in long mode at all, so don't try to program them. 1222 */ 1223 if (x86_vendor == X86_VENDOR_AMD) 1224 feature &= ~X86_SEP; 1225 #endif 1226 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1227 feature |= X86_TSCP; 1228 break; 1229 default: 1230 break; 1231 } 1232 1233 /* 1234 * Get CPUID data about processor cores and hyperthreads. 1235 */ 1236 switch (cpi->cpi_vendor) { 1237 case X86_VENDOR_Intel: 1238 if (cpi->cpi_maxeax >= 4) { 1239 cp = &cpi->cpi_std[4]; 1240 cp->cp_eax = 4; 1241 cp->cp_ecx = 0; 1242 (void) __cpuid_insn(cp); 1243 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1244 } 1245 /*FALLTHROUGH*/ 1246 case X86_VENDOR_AMD: 1247 if (cpi->cpi_xmaxeax < 0x80000008) 1248 break; 1249 cp = &cpi->cpi_extd[8]; 1250 cp->cp_eax = 0x80000008; 1251 (void) __cpuid_insn(cp); 1252 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1253 1254 /* 1255 * Virtual and physical address limits from 1256 * cpuid override previously guessed values. 1257 */ 1258 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1259 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1260 break; 1261 default: 1262 break; 1263 } 1264 1265 /* 1266 * Derive the number of cores per chip 1267 */ 1268 switch (cpi->cpi_vendor) { 1269 case X86_VENDOR_Intel: 1270 if (cpi->cpi_maxeax < 4) { 1271 cpi->cpi_ncore_per_chip = 1; 1272 break; 1273 } else { 1274 cpi->cpi_ncore_per_chip = 1275 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1276 } 1277 break; 1278 case X86_VENDOR_AMD: 1279 if (cpi->cpi_xmaxeax < 0x80000008) { 1280 cpi->cpi_ncore_per_chip = 1; 1281 break; 1282 } else { 1283 /* 1284 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1285 * 1 less than the number of physical cores on 1286 * the chip. In family 0x10 this value can 1287 * be affected by "downcoring" - it reflects 1288 * 1 less than the number of cores actually 1289 * enabled on this node. 1290 */ 1291 cpi->cpi_ncore_per_chip = 1292 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1293 } 1294 break; 1295 default: 1296 cpi->cpi_ncore_per_chip = 1; 1297 break; 1298 } 1299 1300 /* 1301 * Get CPUID data about TSC Invariance in Deep C-State. 1302 */ 1303 switch (cpi->cpi_vendor) { 1304 case X86_VENDOR_Intel: 1305 if (cpi->cpi_maxeax >= 7) { 1306 cp = &cpi->cpi_extd[7]; 1307 cp->cp_eax = 0x80000007; 1308 cp->cp_ecx = 0; 1309 (void) __cpuid_insn(cp); 1310 } 1311 break; 1312 default: 1313 break; 1314 } 1315 } else { 1316 cpi->cpi_ncore_per_chip = 1; 1317 } 1318 1319 /* 1320 * If more than one core, then this processor is CMP. 1321 */ 1322 if (cpi->cpi_ncore_per_chip > 1) 1323 feature |= X86_CMP; 1324 1325 /* 1326 * If the number of cores is the same as the number 1327 * of CPUs, then we cannot have HyperThreading. 1328 */ 1329 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1330 feature &= ~X86_HTT; 1331 1332 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1333 cpi->cpi_procnodes_per_pkg = 1; 1334 1335 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1336 /* 1337 * Single-core single-threaded processors. 1338 */ 1339 cpi->cpi_chipid = -1; 1340 cpi->cpi_clogid = 0; 1341 cpi->cpi_coreid = cpu->cpu_id; 1342 cpi->cpi_pkgcoreid = 0; 1343 if (cpi->cpi_vendor == X86_VENDOR_AMD) 1344 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); 1345 else 1346 cpi->cpi_procnodeid = cpi->cpi_chipid; 1347 } else if (cpi->cpi_ncpu_per_chip > 1) { 1348 if (cpi->cpi_vendor == X86_VENDOR_Intel) 1349 cpuid_intel_getids(cpu, feature); 1350 else if (cpi->cpi_vendor == X86_VENDOR_AMD) 1351 cpuid_amd_getids(cpu); 1352 else { 1353 /* 1354 * All other processors are currently 1355 * assumed to have single cores. 1356 */ 1357 cpi->cpi_coreid = cpi->cpi_chipid; 1358 cpi->cpi_pkgcoreid = 0; 1359 cpi->cpi_procnodeid = cpi->cpi_chipid; 1360 } 1361 } 1362 1363 /* 1364 * Synthesize chip "revision" and socket type 1365 */ 1366 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1367 cpi->cpi_model, cpi->cpi_step); 1368 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1369 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1370 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1371 cpi->cpi_model, cpi->cpi_step); 1372 1373 pass1_done: 1374 cpi->cpi_pass = 1; 1375 return (feature); 1376 } 1377 1378 /* 1379 * Make copies of the cpuid table entries we depend on, in 1380 * part for ease of parsing now, in part so that we have only 1381 * one place to correct any of it, in part for ease of 1382 * later export to userland, and in part so we can look at 1383 * this stuff in a crash dump. 1384 */ 1385 1386 /*ARGSUSED*/ 1387 void 1388 cpuid_pass2(cpu_t *cpu) 1389 { 1390 uint_t n, nmax; 1391 int i; 1392 struct cpuid_regs *cp; 1393 uint8_t *dp; 1394 uint32_t *iptr; 1395 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1396 1397 ASSERT(cpi->cpi_pass == 1); 1398 1399 if (cpi->cpi_maxeax < 1) 1400 goto pass2_done; 1401 1402 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1403 nmax = NMAX_CPI_STD; 1404 /* 1405 * (We already handled n == 0 and n == 1 in pass 1) 1406 */ 1407 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1408 cp->cp_eax = n; 1409 1410 /* 1411 * CPUID function 4 expects %ecx to be initialized 1412 * with an index which indicates which cache to return 1413 * information about. The OS is expected to call function 4 1414 * with %ecx set to 0, 1, 2, ... until it returns with 1415 * EAX[4:0] set to 0, which indicates there are no more 1416 * caches. 1417 * 1418 * Here, populate cpi_std[4] with the information returned by 1419 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1420 * when dynamic memory allocation becomes available. 1421 * 1422 * Note: we need to explicitly initialize %ecx here, since 1423 * function 4 may have been previously invoked. 1424 */ 1425 if (n == 4) 1426 cp->cp_ecx = 0; 1427 1428 (void) __cpuid_insn(cp); 1429 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1430 switch (n) { 1431 case 2: 1432 /* 1433 * "the lower 8 bits of the %eax register 1434 * contain a value that identifies the number 1435 * of times the cpuid [instruction] has to be 1436 * executed to obtain a complete image of the 1437 * processor's caching systems." 1438 * 1439 * How *do* they make this stuff up? 1440 */ 1441 cpi->cpi_ncache = sizeof (*cp) * 1442 BITX(cp->cp_eax, 7, 0); 1443 if (cpi->cpi_ncache == 0) 1444 break; 1445 cpi->cpi_ncache--; /* skip count byte */ 1446 1447 /* 1448 * Well, for now, rather than attempt to implement 1449 * this slightly dubious algorithm, we just look 1450 * at the first 15 .. 1451 */ 1452 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1453 cpi->cpi_ncache = sizeof (*cp) - 1; 1454 1455 dp = cpi->cpi_cacheinfo; 1456 if (BITX(cp->cp_eax, 31, 31) == 0) { 1457 uint8_t *p = (void *)&cp->cp_eax; 1458 for (i = 1; i < 4; i++) 1459 if (p[i] != 0) 1460 *dp++ = p[i]; 1461 } 1462 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1463 uint8_t *p = (void *)&cp->cp_ebx; 1464 for (i = 0; i < 4; i++) 1465 if (p[i] != 0) 1466 *dp++ = p[i]; 1467 } 1468 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1469 uint8_t *p = (void *)&cp->cp_ecx; 1470 for (i = 0; i < 4; i++) 1471 if (p[i] != 0) 1472 *dp++ = p[i]; 1473 } 1474 if (BITX(cp->cp_edx, 31, 31) == 0) { 1475 uint8_t *p = (void *)&cp->cp_edx; 1476 for (i = 0; i < 4; i++) 1477 if (p[i] != 0) 1478 *dp++ = p[i]; 1479 } 1480 break; 1481 1482 case 3: /* Processor serial number, if PSN supported */ 1483 break; 1484 1485 case 4: /* Deterministic cache parameters */ 1486 break; 1487 1488 case 5: /* Monitor/Mwait parameters */ 1489 { 1490 size_t mwait_size; 1491 1492 /* 1493 * check cpi_mwait.support which was set in cpuid_pass1 1494 */ 1495 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1496 break; 1497 1498 /* 1499 * Protect ourself from insane mwait line size. 1500 * Workaround for incomplete hardware emulator(s). 1501 */ 1502 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1503 if (mwait_size < sizeof (uint32_t) || 1504 !ISP2(mwait_size)) { 1505 #if DEBUG 1506 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1507 "size %ld", cpu->cpu_id, (long)mwait_size); 1508 #endif 1509 break; 1510 } 1511 1512 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1513 cpi->cpi_mwait.mon_max = mwait_size; 1514 if (MWAIT_EXTENSION(cpi)) { 1515 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1516 if (MWAIT_INT_ENABLE(cpi)) 1517 cpi->cpi_mwait.support |= 1518 MWAIT_ECX_INT_ENABLE; 1519 } 1520 break; 1521 } 1522 default: 1523 break; 1524 } 1525 } 1526 1527 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1528 struct cpuid_regs regs; 1529 1530 cp = ®s; 1531 cp->cp_eax = 0xB; 1532 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1533 1534 (void) __cpuid_insn(cp); 1535 1536 /* 1537 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1538 * indicates that the extended topology enumeration leaf is 1539 * available. 1540 */ 1541 if (cp->cp_ebx) { 1542 uint32_t x2apic_id; 1543 uint_t coreid_shift = 0; 1544 uint_t ncpu_per_core = 1; 1545 uint_t chipid_shift = 0; 1546 uint_t ncpu_per_chip = 1; 1547 uint_t i; 1548 uint_t level; 1549 1550 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1551 cp->cp_eax = 0xB; 1552 cp->cp_ecx = i; 1553 1554 (void) __cpuid_insn(cp); 1555 level = CPI_CPU_LEVEL_TYPE(cp); 1556 1557 if (level == 1) { 1558 x2apic_id = cp->cp_edx; 1559 coreid_shift = BITX(cp->cp_eax, 4, 0); 1560 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1561 } else if (level == 2) { 1562 x2apic_id = cp->cp_edx; 1563 chipid_shift = BITX(cp->cp_eax, 4, 0); 1564 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1565 } 1566 } 1567 1568 cpi->cpi_apicid = x2apic_id; 1569 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1570 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1571 ncpu_per_core; 1572 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1573 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1574 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1575 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1576 } 1577 1578 /* Make cp NULL so that we don't stumble on others */ 1579 cp = NULL; 1580 } 1581 1582 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1583 goto pass2_done; 1584 1585 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1586 nmax = NMAX_CPI_EXTD; 1587 /* 1588 * Copy the extended properties, fixing them as we go. 1589 * (We already handled n == 0 and n == 1 in pass 1) 1590 */ 1591 iptr = (void *)cpi->cpi_brandstr; 1592 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1593 cp->cp_eax = 0x80000000 + n; 1594 (void) __cpuid_insn(cp); 1595 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1596 switch (n) { 1597 case 2: 1598 case 3: 1599 case 4: 1600 /* 1601 * Extract the brand string 1602 */ 1603 *iptr++ = cp->cp_eax; 1604 *iptr++ = cp->cp_ebx; 1605 *iptr++ = cp->cp_ecx; 1606 *iptr++ = cp->cp_edx; 1607 break; 1608 case 5: 1609 switch (cpi->cpi_vendor) { 1610 case X86_VENDOR_AMD: 1611 /* 1612 * The Athlon and Duron were the first 1613 * parts to report the sizes of the 1614 * TLB for large pages. Before then, 1615 * we don't trust the data. 1616 */ 1617 if (cpi->cpi_family < 6 || 1618 (cpi->cpi_family == 6 && 1619 cpi->cpi_model < 1)) 1620 cp->cp_eax = 0; 1621 break; 1622 default: 1623 break; 1624 } 1625 break; 1626 case 6: 1627 switch (cpi->cpi_vendor) { 1628 case X86_VENDOR_AMD: 1629 /* 1630 * The Athlon and Duron were the first 1631 * AMD parts with L2 TLB's. 1632 * Before then, don't trust the data. 1633 */ 1634 if (cpi->cpi_family < 6 || 1635 cpi->cpi_family == 6 && 1636 cpi->cpi_model < 1) 1637 cp->cp_eax = cp->cp_ebx = 0; 1638 /* 1639 * AMD Duron rev A0 reports L2 1640 * cache size incorrectly as 1K 1641 * when it is really 64K 1642 */ 1643 if (cpi->cpi_family == 6 && 1644 cpi->cpi_model == 3 && 1645 cpi->cpi_step == 0) { 1646 cp->cp_ecx &= 0xffff; 1647 cp->cp_ecx |= 0x400000; 1648 } 1649 break; 1650 case X86_VENDOR_Cyrix: /* VIA C3 */ 1651 /* 1652 * VIA C3 processors are a bit messed 1653 * up w.r.t. encoding cache sizes in %ecx 1654 */ 1655 if (cpi->cpi_family != 6) 1656 break; 1657 /* 1658 * model 7 and 8 were incorrectly encoded 1659 * 1660 * xxx is model 8 really broken? 1661 */ 1662 if (cpi->cpi_model == 7 || 1663 cpi->cpi_model == 8) 1664 cp->cp_ecx = 1665 BITX(cp->cp_ecx, 31, 24) << 16 | 1666 BITX(cp->cp_ecx, 23, 16) << 12 | 1667 BITX(cp->cp_ecx, 15, 8) << 8 | 1668 BITX(cp->cp_ecx, 7, 0); 1669 /* 1670 * model 9 stepping 1 has wrong associativity 1671 */ 1672 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1673 cp->cp_ecx |= 8 << 12; 1674 break; 1675 case X86_VENDOR_Intel: 1676 /* 1677 * Extended L2 Cache features function. 1678 * First appeared on Prescott. 1679 */ 1680 default: 1681 break; 1682 } 1683 break; 1684 default: 1685 break; 1686 } 1687 } 1688 1689 pass2_done: 1690 cpi->cpi_pass = 2; 1691 } 1692 1693 static const char * 1694 intel_cpubrand(const struct cpuid_info *cpi) 1695 { 1696 int i; 1697 1698 if ((x86_feature & X86_CPUID) == 0 || 1699 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1700 return ("i486"); 1701 1702 switch (cpi->cpi_family) { 1703 case 5: 1704 return ("Intel Pentium(r)"); 1705 case 6: 1706 switch (cpi->cpi_model) { 1707 uint_t celeron, xeon; 1708 const struct cpuid_regs *cp; 1709 case 0: 1710 case 1: 1711 case 2: 1712 return ("Intel Pentium(r) Pro"); 1713 case 3: 1714 case 4: 1715 return ("Intel Pentium(r) II"); 1716 case 6: 1717 return ("Intel Celeron(r)"); 1718 case 5: 1719 case 7: 1720 celeron = xeon = 0; 1721 cp = &cpi->cpi_std[2]; /* cache info */ 1722 1723 for (i = 1; i < 4; i++) { 1724 uint_t tmp; 1725 1726 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1727 if (tmp == 0x40) 1728 celeron++; 1729 if (tmp >= 0x44 && tmp <= 0x45) 1730 xeon++; 1731 } 1732 1733 for (i = 0; i < 2; i++) { 1734 uint_t tmp; 1735 1736 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1737 if (tmp == 0x40) 1738 celeron++; 1739 else if (tmp >= 0x44 && tmp <= 0x45) 1740 xeon++; 1741 } 1742 1743 for (i = 0; i < 4; i++) { 1744 uint_t tmp; 1745 1746 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1747 if (tmp == 0x40) 1748 celeron++; 1749 else if (tmp >= 0x44 && tmp <= 0x45) 1750 xeon++; 1751 } 1752 1753 for (i = 0; i < 4; i++) { 1754 uint_t tmp; 1755 1756 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1757 if (tmp == 0x40) 1758 celeron++; 1759 else if (tmp >= 0x44 && tmp <= 0x45) 1760 xeon++; 1761 } 1762 1763 if (celeron) 1764 return ("Intel Celeron(r)"); 1765 if (xeon) 1766 return (cpi->cpi_model == 5 ? 1767 "Intel Pentium(r) II Xeon(tm)" : 1768 "Intel Pentium(r) III Xeon(tm)"); 1769 return (cpi->cpi_model == 5 ? 1770 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1771 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1772 default: 1773 break; 1774 } 1775 default: 1776 break; 1777 } 1778 1779 /* BrandID is present if the field is nonzero */ 1780 if (cpi->cpi_brandid != 0) { 1781 static const struct { 1782 uint_t bt_bid; 1783 const char *bt_str; 1784 } brand_tbl[] = { 1785 { 0x1, "Intel(r) Celeron(r)" }, 1786 { 0x2, "Intel(r) Pentium(r) III" }, 1787 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1788 { 0x4, "Intel(r) Pentium(r) III" }, 1789 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1790 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1791 { 0x8, "Intel(r) Pentium(r) 4" }, 1792 { 0x9, "Intel(r) Pentium(r) 4" }, 1793 { 0xa, "Intel(r) Celeron(r)" }, 1794 { 0xb, "Intel(r) Xeon(tm)" }, 1795 { 0xc, "Intel(r) Xeon(tm) MP" }, 1796 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1797 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1798 { 0x11, "Mobile Genuine Intel(r)" }, 1799 { 0x12, "Intel(r) Celeron(r) M" }, 1800 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1801 { 0x14, "Intel(r) Celeron(r)" }, 1802 { 0x15, "Mobile Genuine Intel(r)" }, 1803 { 0x16, "Intel(r) Pentium(r) M" }, 1804 { 0x17, "Mobile Intel(r) Celeron(r)" } 1805 }; 1806 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1807 uint_t sgn; 1808 1809 sgn = (cpi->cpi_family << 8) | 1810 (cpi->cpi_model << 4) | cpi->cpi_step; 1811 1812 for (i = 0; i < btblmax; i++) 1813 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1814 break; 1815 if (i < btblmax) { 1816 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1817 return ("Intel(r) Celeron(r)"); 1818 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1819 return ("Intel(r) Xeon(tm) MP"); 1820 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1821 return ("Intel(r) Xeon(tm)"); 1822 return (brand_tbl[i].bt_str); 1823 } 1824 } 1825 1826 return (NULL); 1827 } 1828 1829 static const char * 1830 amd_cpubrand(const struct cpuid_info *cpi) 1831 { 1832 if ((x86_feature & X86_CPUID) == 0 || 1833 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1834 return ("i486 compatible"); 1835 1836 switch (cpi->cpi_family) { 1837 case 5: 1838 switch (cpi->cpi_model) { 1839 case 0: 1840 case 1: 1841 case 2: 1842 case 3: 1843 case 4: 1844 case 5: 1845 return ("AMD-K5(r)"); 1846 case 6: 1847 case 7: 1848 return ("AMD-K6(r)"); 1849 case 8: 1850 return ("AMD-K6(r)-2"); 1851 case 9: 1852 return ("AMD-K6(r)-III"); 1853 default: 1854 return ("AMD (family 5)"); 1855 } 1856 case 6: 1857 switch (cpi->cpi_model) { 1858 case 1: 1859 return ("AMD-K7(tm)"); 1860 case 0: 1861 case 2: 1862 case 4: 1863 return ("AMD Athlon(tm)"); 1864 case 3: 1865 case 7: 1866 return ("AMD Duron(tm)"); 1867 case 6: 1868 case 8: 1869 case 10: 1870 /* 1871 * Use the L2 cache size to distinguish 1872 */ 1873 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1874 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1875 default: 1876 return ("AMD (family 6)"); 1877 } 1878 default: 1879 break; 1880 } 1881 1882 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1883 cpi->cpi_brandid != 0) { 1884 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1885 case 3: 1886 return ("AMD Opteron(tm) UP 1xx"); 1887 case 4: 1888 return ("AMD Opteron(tm) DP 2xx"); 1889 case 5: 1890 return ("AMD Opteron(tm) MP 8xx"); 1891 default: 1892 return ("AMD Opteron(tm)"); 1893 } 1894 } 1895 1896 return (NULL); 1897 } 1898 1899 static const char * 1900 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1901 { 1902 if ((x86_feature & X86_CPUID) == 0 || 1903 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1904 type == X86_TYPE_CYRIX_486) 1905 return ("i486 compatible"); 1906 1907 switch (type) { 1908 case X86_TYPE_CYRIX_6x86: 1909 return ("Cyrix 6x86"); 1910 case X86_TYPE_CYRIX_6x86L: 1911 return ("Cyrix 6x86L"); 1912 case X86_TYPE_CYRIX_6x86MX: 1913 return ("Cyrix 6x86MX"); 1914 case X86_TYPE_CYRIX_GXm: 1915 return ("Cyrix GXm"); 1916 case X86_TYPE_CYRIX_MediaGX: 1917 return ("Cyrix MediaGX"); 1918 case X86_TYPE_CYRIX_MII: 1919 return ("Cyrix M2"); 1920 case X86_TYPE_VIA_CYRIX_III: 1921 return ("VIA Cyrix M3"); 1922 default: 1923 /* 1924 * Have another wild guess .. 1925 */ 1926 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1927 return ("Cyrix 5x86"); 1928 else if (cpi->cpi_family == 5) { 1929 switch (cpi->cpi_model) { 1930 case 2: 1931 return ("Cyrix 6x86"); /* Cyrix M1 */ 1932 case 4: 1933 return ("Cyrix MediaGX"); 1934 default: 1935 break; 1936 } 1937 } else if (cpi->cpi_family == 6) { 1938 switch (cpi->cpi_model) { 1939 case 0: 1940 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1941 case 5: 1942 case 6: 1943 case 7: 1944 case 8: 1945 case 9: 1946 return ("VIA C3"); 1947 default: 1948 break; 1949 } 1950 } 1951 break; 1952 } 1953 return (NULL); 1954 } 1955 1956 /* 1957 * This only gets called in the case that the CPU extended 1958 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1959 * aren't available, or contain null bytes for some reason. 1960 */ 1961 static void 1962 fabricate_brandstr(struct cpuid_info *cpi) 1963 { 1964 const char *brand = NULL; 1965 1966 switch (cpi->cpi_vendor) { 1967 case X86_VENDOR_Intel: 1968 brand = intel_cpubrand(cpi); 1969 break; 1970 case X86_VENDOR_AMD: 1971 brand = amd_cpubrand(cpi); 1972 break; 1973 case X86_VENDOR_Cyrix: 1974 brand = cyrix_cpubrand(cpi, x86_type); 1975 break; 1976 case X86_VENDOR_NexGen: 1977 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1978 brand = "NexGen Nx586"; 1979 break; 1980 case X86_VENDOR_Centaur: 1981 if (cpi->cpi_family == 5) 1982 switch (cpi->cpi_model) { 1983 case 4: 1984 brand = "Centaur C6"; 1985 break; 1986 case 8: 1987 brand = "Centaur C2"; 1988 break; 1989 case 9: 1990 brand = "Centaur C3"; 1991 break; 1992 default: 1993 break; 1994 } 1995 break; 1996 case X86_VENDOR_Rise: 1997 if (cpi->cpi_family == 5 && 1998 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1999 brand = "Rise mP6"; 2000 break; 2001 case X86_VENDOR_SiS: 2002 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2003 brand = "SiS 55x"; 2004 break; 2005 case X86_VENDOR_TM: 2006 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 2007 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 2008 break; 2009 case X86_VENDOR_NSC: 2010 case X86_VENDOR_UMC: 2011 default: 2012 break; 2013 } 2014 if (brand) { 2015 (void) strcpy((char *)cpi->cpi_brandstr, brand); 2016 return; 2017 } 2018 2019 /* 2020 * If all else fails ... 2021 */ 2022 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 2023 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 2024 cpi->cpi_model, cpi->cpi_step); 2025 } 2026 2027 /* 2028 * This routine is called just after kernel memory allocation 2029 * becomes available on cpu0, and as part of mp_startup() on 2030 * the other cpus. 2031 * 2032 * Fixup the brand string, and collect any information from cpuid 2033 * that requires dynamicically allocated storage to represent. 2034 */ 2035 /*ARGSUSED*/ 2036 void 2037 cpuid_pass3(cpu_t *cpu) 2038 { 2039 int i, max, shft, level, size; 2040 struct cpuid_regs regs; 2041 struct cpuid_regs *cp; 2042 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2043 2044 ASSERT(cpi->cpi_pass == 2); 2045 2046 /* 2047 * Function 4: Deterministic cache parameters 2048 * 2049 * Take this opportunity to detect the number of threads 2050 * sharing the last level cache, and construct a corresponding 2051 * cache id. The respective cpuid_info members are initialized 2052 * to the default case of "no last level cache sharing". 2053 */ 2054 cpi->cpi_ncpu_shr_last_cache = 1; 2055 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 2056 2057 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 2058 2059 /* 2060 * Find the # of elements (size) returned by fn 4, and along 2061 * the way detect last level cache sharing details. 2062 */ 2063 bzero(®s, sizeof (regs)); 2064 cp = ®s; 2065 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 2066 cp->cp_eax = 4; 2067 cp->cp_ecx = i; 2068 2069 (void) __cpuid_insn(cp); 2070 2071 if (CPI_CACHE_TYPE(cp) == 0) 2072 break; 2073 level = CPI_CACHE_LVL(cp); 2074 if (level > max) { 2075 max = level; 2076 cpi->cpi_ncpu_shr_last_cache = 2077 CPI_NTHR_SHR_CACHE(cp) + 1; 2078 } 2079 } 2080 cpi->cpi_std_4_size = size = i; 2081 2082 /* 2083 * Allocate the cpi_std_4 array. The first element 2084 * references the regs for fn 4, %ecx == 0, which 2085 * cpuid_pass2() stashed in cpi->cpi_std[4]. 2086 */ 2087 if (size > 0) { 2088 cpi->cpi_std_4 = 2089 kmem_alloc(size * sizeof (cp), KM_SLEEP); 2090 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 2091 2092 /* 2093 * Allocate storage to hold the additional regs 2094 * for function 4, %ecx == 1 .. cpi_std_4_size. 2095 * 2096 * The regs for fn 4, %ecx == 0 has already 2097 * been allocated as indicated above. 2098 */ 2099 for (i = 1; i < size; i++) { 2100 cp = cpi->cpi_std_4[i] = 2101 kmem_zalloc(sizeof (regs), KM_SLEEP); 2102 cp->cp_eax = 4; 2103 cp->cp_ecx = i; 2104 2105 (void) __cpuid_insn(cp); 2106 } 2107 } 2108 /* 2109 * Determine the number of bits needed to represent 2110 * the number of CPUs sharing the last level cache. 2111 * 2112 * Shift off that number of bits from the APIC id to 2113 * derive the cache id. 2114 */ 2115 shft = 0; 2116 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 2117 shft++; 2118 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 2119 } 2120 2121 /* 2122 * Now fixup the brand string 2123 */ 2124 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2125 fabricate_brandstr(cpi); 2126 } else { 2127 2128 /* 2129 * If we successfully extracted a brand string from the cpuid 2130 * instruction, clean it up by removing leading spaces and 2131 * similar junk. 2132 */ 2133 if (cpi->cpi_brandstr[0]) { 2134 size_t maxlen = sizeof (cpi->cpi_brandstr); 2135 char *src, *dst; 2136 2137 dst = src = (char *)cpi->cpi_brandstr; 2138 src[maxlen - 1] = '\0'; 2139 /* 2140 * strip leading spaces 2141 */ 2142 while (*src == ' ') 2143 src++; 2144 /* 2145 * Remove any 'Genuine' or "Authentic" prefixes 2146 */ 2147 if (strncmp(src, "Genuine ", 8) == 0) 2148 src += 8; 2149 if (strncmp(src, "Authentic ", 10) == 0) 2150 src += 10; 2151 2152 /* 2153 * Now do an in-place copy. 2154 * Map (R) to (r) and (TM) to (tm). 2155 * The era of teletypes is long gone, and there's 2156 * -really- no need to shout. 2157 */ 2158 while (*src != '\0') { 2159 if (src[0] == '(') { 2160 if (strncmp(src + 1, "R)", 2) == 0) { 2161 (void) strncpy(dst, "(r)", 3); 2162 src += 3; 2163 dst += 3; 2164 continue; 2165 } 2166 if (strncmp(src + 1, "TM)", 3) == 0) { 2167 (void) strncpy(dst, "(tm)", 4); 2168 src += 4; 2169 dst += 4; 2170 continue; 2171 } 2172 } 2173 *dst++ = *src++; 2174 } 2175 *dst = '\0'; 2176 2177 /* 2178 * Finally, remove any trailing spaces 2179 */ 2180 while (--dst > cpi->cpi_brandstr) 2181 if (*dst == ' ') 2182 *dst = '\0'; 2183 else 2184 break; 2185 } else 2186 fabricate_brandstr(cpi); 2187 } 2188 cpi->cpi_pass = 3; 2189 } 2190 2191 /* 2192 * This routine is called out of bind_hwcap() much later in the life 2193 * of the kernel (post_startup()). The job of this routine is to resolve 2194 * the hardware feature support and kernel support for those features into 2195 * what we're actually going to tell applications via the aux vector. 2196 */ 2197 uint_t 2198 cpuid_pass4(cpu_t *cpu) 2199 { 2200 struct cpuid_info *cpi; 2201 uint_t hwcap_flags = 0; 2202 2203 if (cpu == NULL) 2204 cpu = CPU; 2205 cpi = cpu->cpu_m.mcpu_cpi; 2206 2207 ASSERT(cpi->cpi_pass == 3); 2208 2209 if (cpi->cpi_maxeax >= 1) { 2210 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2211 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2212 2213 *edx = CPI_FEATURES_EDX(cpi); 2214 *ecx = CPI_FEATURES_ECX(cpi); 2215 2216 /* 2217 * [these require explicit kernel support] 2218 */ 2219 if ((x86_feature & X86_SEP) == 0) 2220 *edx &= ~CPUID_INTC_EDX_SEP; 2221 2222 if ((x86_feature & X86_SSE) == 0) 2223 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2224 if ((x86_feature & X86_SSE2) == 0) 2225 *edx &= ~CPUID_INTC_EDX_SSE2; 2226 2227 if ((x86_feature & X86_HTT) == 0) 2228 *edx &= ~CPUID_INTC_EDX_HTT; 2229 2230 if ((x86_feature & X86_SSE3) == 0) 2231 *ecx &= ~CPUID_INTC_ECX_SSE3; 2232 2233 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2234 if ((x86_feature & X86_SSSE3) == 0) 2235 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2236 if ((x86_feature & X86_SSE4_1) == 0) 2237 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2238 if ((x86_feature & X86_SSE4_2) == 0) 2239 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2240 if ((x86_feature & X86_AES) == 0) 2241 *ecx &= ~CPUID_INTC_ECX_AES; 2242 } 2243 2244 /* 2245 * [no explicit support required beyond x87 fp context] 2246 */ 2247 if (!fpu_exists) 2248 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2249 2250 /* 2251 * Now map the supported feature vector to things that we 2252 * think userland will care about. 2253 */ 2254 if (*edx & CPUID_INTC_EDX_SEP) 2255 hwcap_flags |= AV_386_SEP; 2256 if (*edx & CPUID_INTC_EDX_SSE) 2257 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2258 if (*edx & CPUID_INTC_EDX_SSE2) 2259 hwcap_flags |= AV_386_SSE2; 2260 if (*ecx & CPUID_INTC_ECX_SSE3) 2261 hwcap_flags |= AV_386_SSE3; 2262 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2263 if (*ecx & CPUID_INTC_ECX_SSSE3) 2264 hwcap_flags |= AV_386_SSSE3; 2265 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2266 hwcap_flags |= AV_386_SSE4_1; 2267 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2268 hwcap_flags |= AV_386_SSE4_2; 2269 if (*ecx & CPUID_INTC_ECX_MOVBE) 2270 hwcap_flags |= AV_386_MOVBE; 2271 if (*ecx & CPUID_INTC_ECX_AES) 2272 hwcap_flags |= AV_386_AES; 2273 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2274 hwcap_flags |= AV_386_PCLMULQDQ; 2275 } 2276 if (*ecx & CPUID_INTC_ECX_POPCNT) 2277 hwcap_flags |= AV_386_POPCNT; 2278 if (*edx & CPUID_INTC_EDX_FPU) 2279 hwcap_flags |= AV_386_FPU; 2280 if (*edx & CPUID_INTC_EDX_MMX) 2281 hwcap_flags |= AV_386_MMX; 2282 2283 if (*edx & CPUID_INTC_EDX_TSC) 2284 hwcap_flags |= AV_386_TSC; 2285 if (*edx & CPUID_INTC_EDX_CX8) 2286 hwcap_flags |= AV_386_CX8; 2287 if (*edx & CPUID_INTC_EDX_CMOV) 2288 hwcap_flags |= AV_386_CMOV; 2289 if (*ecx & CPUID_INTC_ECX_CX16) 2290 hwcap_flags |= AV_386_CX16; 2291 } 2292 2293 if (cpi->cpi_xmaxeax < 0x80000001) 2294 goto pass4_done; 2295 2296 switch (cpi->cpi_vendor) { 2297 struct cpuid_regs cp; 2298 uint32_t *edx, *ecx; 2299 2300 case X86_VENDOR_Intel: 2301 /* 2302 * Seems like Intel duplicated what we necessary 2303 * here to make the initial crop of 64-bit OS's work. 2304 * Hopefully, those are the only "extended" bits 2305 * they'll add. 2306 */ 2307 /*FALLTHROUGH*/ 2308 2309 case X86_VENDOR_AMD: 2310 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2311 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2312 2313 *edx = CPI_FEATURES_XTD_EDX(cpi); 2314 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2315 2316 /* 2317 * [these features require explicit kernel support] 2318 */ 2319 switch (cpi->cpi_vendor) { 2320 case X86_VENDOR_Intel: 2321 if ((x86_feature & X86_TSCP) == 0) 2322 *edx &= ~CPUID_AMD_EDX_TSCP; 2323 break; 2324 2325 case X86_VENDOR_AMD: 2326 if ((x86_feature & X86_TSCP) == 0) 2327 *edx &= ~CPUID_AMD_EDX_TSCP; 2328 if ((x86_feature & X86_SSE4A) == 0) 2329 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2330 break; 2331 2332 default: 2333 break; 2334 } 2335 2336 /* 2337 * [no explicit support required beyond 2338 * x87 fp context and exception handlers] 2339 */ 2340 if (!fpu_exists) 2341 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2342 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2343 2344 if ((x86_feature & X86_NX) == 0) 2345 *edx &= ~CPUID_AMD_EDX_NX; 2346 #if !defined(__amd64) 2347 *edx &= ~CPUID_AMD_EDX_LM; 2348 #endif 2349 /* 2350 * Now map the supported feature vector to 2351 * things that we think userland will care about. 2352 */ 2353 #if defined(__amd64) 2354 if (*edx & CPUID_AMD_EDX_SYSC) 2355 hwcap_flags |= AV_386_AMD_SYSC; 2356 #endif 2357 if (*edx & CPUID_AMD_EDX_MMXamd) 2358 hwcap_flags |= AV_386_AMD_MMX; 2359 if (*edx & CPUID_AMD_EDX_3DNow) 2360 hwcap_flags |= AV_386_AMD_3DNow; 2361 if (*edx & CPUID_AMD_EDX_3DNowx) 2362 hwcap_flags |= AV_386_AMD_3DNowx; 2363 2364 switch (cpi->cpi_vendor) { 2365 case X86_VENDOR_AMD: 2366 if (*edx & CPUID_AMD_EDX_TSCP) 2367 hwcap_flags |= AV_386_TSCP; 2368 if (*ecx & CPUID_AMD_ECX_AHF64) 2369 hwcap_flags |= AV_386_AHF; 2370 if (*ecx & CPUID_AMD_ECX_SSE4A) 2371 hwcap_flags |= AV_386_AMD_SSE4A; 2372 if (*ecx & CPUID_AMD_ECX_LZCNT) 2373 hwcap_flags |= AV_386_AMD_LZCNT; 2374 break; 2375 2376 case X86_VENDOR_Intel: 2377 if (*edx & CPUID_AMD_EDX_TSCP) 2378 hwcap_flags |= AV_386_TSCP; 2379 /* 2380 * Aarrgh. 2381 * Intel uses a different bit in the same word. 2382 */ 2383 if (*ecx & CPUID_INTC_ECX_AHF64) 2384 hwcap_flags |= AV_386_AHF; 2385 break; 2386 2387 default: 2388 break; 2389 } 2390 break; 2391 2392 case X86_VENDOR_TM: 2393 cp.cp_eax = 0x80860001; 2394 (void) __cpuid_insn(&cp); 2395 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2396 break; 2397 2398 default: 2399 break; 2400 } 2401 2402 pass4_done: 2403 cpi->cpi_pass = 4; 2404 return (hwcap_flags); 2405 } 2406 2407 2408 /* 2409 * Simulate the cpuid instruction using the data we previously 2410 * captured about this CPU. We try our best to return the truth 2411 * about the hardware, independently of kernel support. 2412 */ 2413 uint32_t 2414 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2415 { 2416 struct cpuid_info *cpi; 2417 struct cpuid_regs *xcp; 2418 2419 if (cpu == NULL) 2420 cpu = CPU; 2421 cpi = cpu->cpu_m.mcpu_cpi; 2422 2423 ASSERT(cpuid_checkpass(cpu, 3)); 2424 2425 /* 2426 * CPUID data is cached in two separate places: cpi_std for standard 2427 * CPUID functions, and cpi_extd for extended CPUID functions. 2428 */ 2429 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2430 xcp = &cpi->cpi_std[cp->cp_eax]; 2431 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2432 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2433 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2434 else 2435 /* 2436 * The caller is asking for data from an input parameter which 2437 * the kernel has not cached. In this case we go fetch from 2438 * the hardware and return the data directly to the user. 2439 */ 2440 return (__cpuid_insn(cp)); 2441 2442 cp->cp_eax = xcp->cp_eax; 2443 cp->cp_ebx = xcp->cp_ebx; 2444 cp->cp_ecx = xcp->cp_ecx; 2445 cp->cp_edx = xcp->cp_edx; 2446 return (cp->cp_eax); 2447 } 2448 2449 int 2450 cpuid_checkpass(cpu_t *cpu, int pass) 2451 { 2452 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2453 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2454 } 2455 2456 int 2457 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2458 { 2459 ASSERT(cpuid_checkpass(cpu, 3)); 2460 2461 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2462 } 2463 2464 int 2465 cpuid_is_cmt(cpu_t *cpu) 2466 { 2467 if (cpu == NULL) 2468 cpu = CPU; 2469 2470 ASSERT(cpuid_checkpass(cpu, 1)); 2471 2472 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2473 } 2474 2475 /* 2476 * AMD and Intel both implement the 64-bit variant of the syscall 2477 * instruction (syscallq), so if there's -any- support for syscall, 2478 * cpuid currently says "yes, we support this". 2479 * 2480 * However, Intel decided to -not- implement the 32-bit variant of the 2481 * syscall instruction, so we provide a predicate to allow our caller 2482 * to test that subtlety here. 2483 * 2484 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2485 * even in the case where the hardware would in fact support it. 2486 */ 2487 /*ARGSUSED*/ 2488 int 2489 cpuid_syscall32_insn(cpu_t *cpu) 2490 { 2491 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2492 2493 #if !defined(__xpv) 2494 if (cpu == NULL) 2495 cpu = CPU; 2496 2497 /*CSTYLED*/ 2498 { 2499 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2500 2501 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2502 cpi->cpi_xmaxeax >= 0x80000001 && 2503 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2504 return (1); 2505 } 2506 #endif 2507 return (0); 2508 } 2509 2510 int 2511 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2512 { 2513 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2514 2515 static const char fmt[] = 2516 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2517 static const char fmt_ht[] = 2518 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2519 2520 ASSERT(cpuid_checkpass(cpu, 1)); 2521 2522 if (cpuid_is_cmt(cpu)) 2523 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2524 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2525 cpi->cpi_family, cpi->cpi_model, 2526 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2527 return (snprintf(s, n, fmt, 2528 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2529 cpi->cpi_family, cpi->cpi_model, 2530 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2531 } 2532 2533 const char * 2534 cpuid_getvendorstr(cpu_t *cpu) 2535 { 2536 ASSERT(cpuid_checkpass(cpu, 1)); 2537 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2538 } 2539 2540 uint_t 2541 cpuid_getvendor(cpu_t *cpu) 2542 { 2543 ASSERT(cpuid_checkpass(cpu, 1)); 2544 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2545 } 2546 2547 uint_t 2548 cpuid_getfamily(cpu_t *cpu) 2549 { 2550 ASSERT(cpuid_checkpass(cpu, 1)); 2551 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2552 } 2553 2554 uint_t 2555 cpuid_getmodel(cpu_t *cpu) 2556 { 2557 ASSERT(cpuid_checkpass(cpu, 1)); 2558 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2559 } 2560 2561 uint_t 2562 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2563 { 2564 ASSERT(cpuid_checkpass(cpu, 1)); 2565 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2566 } 2567 2568 uint_t 2569 cpuid_get_ncore_per_chip(cpu_t *cpu) 2570 { 2571 ASSERT(cpuid_checkpass(cpu, 1)); 2572 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2573 } 2574 2575 uint_t 2576 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2577 { 2578 ASSERT(cpuid_checkpass(cpu, 2)); 2579 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2580 } 2581 2582 id_t 2583 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2584 { 2585 ASSERT(cpuid_checkpass(cpu, 2)); 2586 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2587 } 2588 2589 uint_t 2590 cpuid_getstep(cpu_t *cpu) 2591 { 2592 ASSERT(cpuid_checkpass(cpu, 1)); 2593 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2594 } 2595 2596 uint_t 2597 cpuid_getsig(struct cpu *cpu) 2598 { 2599 ASSERT(cpuid_checkpass(cpu, 1)); 2600 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2601 } 2602 2603 uint32_t 2604 cpuid_getchiprev(struct cpu *cpu) 2605 { 2606 ASSERT(cpuid_checkpass(cpu, 1)); 2607 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2608 } 2609 2610 const char * 2611 cpuid_getchiprevstr(struct cpu *cpu) 2612 { 2613 ASSERT(cpuid_checkpass(cpu, 1)); 2614 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2615 } 2616 2617 uint32_t 2618 cpuid_getsockettype(struct cpu *cpu) 2619 { 2620 ASSERT(cpuid_checkpass(cpu, 1)); 2621 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2622 } 2623 2624 const char * 2625 cpuid_getsocketstr(cpu_t *cpu) 2626 { 2627 static const char *socketstr = NULL; 2628 struct cpuid_info *cpi; 2629 2630 ASSERT(cpuid_checkpass(cpu, 1)); 2631 cpi = cpu->cpu_m.mcpu_cpi; 2632 2633 /* Assume that socket types are the same across the system */ 2634 if (socketstr == NULL) 2635 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 2636 cpi->cpi_model, cpi->cpi_step); 2637 2638 2639 return (socketstr); 2640 } 2641 2642 int 2643 cpuid_get_chipid(cpu_t *cpu) 2644 { 2645 ASSERT(cpuid_checkpass(cpu, 1)); 2646 2647 if (cpuid_is_cmt(cpu)) 2648 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2649 return (cpu->cpu_id); 2650 } 2651 2652 id_t 2653 cpuid_get_coreid(cpu_t *cpu) 2654 { 2655 ASSERT(cpuid_checkpass(cpu, 1)); 2656 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2657 } 2658 2659 int 2660 cpuid_get_pkgcoreid(cpu_t *cpu) 2661 { 2662 ASSERT(cpuid_checkpass(cpu, 1)); 2663 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2664 } 2665 2666 int 2667 cpuid_get_clogid(cpu_t *cpu) 2668 { 2669 ASSERT(cpuid_checkpass(cpu, 1)); 2670 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2671 } 2672 2673 int 2674 cpuid_get_cacheid(cpu_t *cpu) 2675 { 2676 ASSERT(cpuid_checkpass(cpu, 1)); 2677 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2678 } 2679 2680 uint_t 2681 cpuid_get_procnodeid(cpu_t *cpu) 2682 { 2683 ASSERT(cpuid_checkpass(cpu, 1)); 2684 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid); 2685 } 2686 2687 uint_t 2688 cpuid_get_procnodes_per_pkg(cpu_t *cpu) 2689 { 2690 ASSERT(cpuid_checkpass(cpu, 1)); 2691 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); 2692 } 2693 2694 /*ARGSUSED*/ 2695 int 2696 cpuid_have_cr8access(cpu_t *cpu) 2697 { 2698 #if defined(__amd64) 2699 return (1); 2700 #else 2701 struct cpuid_info *cpi; 2702 2703 ASSERT(cpu != NULL); 2704 cpi = cpu->cpu_m.mcpu_cpi; 2705 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 2706 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 2707 return (1); 2708 return (0); 2709 #endif 2710 } 2711 2712 uint32_t 2713 cpuid_get_apicid(cpu_t *cpu) 2714 { 2715 ASSERT(cpuid_checkpass(cpu, 1)); 2716 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 2717 return (UINT32_MAX); 2718 } else { 2719 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 2720 } 2721 } 2722 2723 void 2724 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2725 { 2726 struct cpuid_info *cpi; 2727 2728 if (cpu == NULL) 2729 cpu = CPU; 2730 cpi = cpu->cpu_m.mcpu_cpi; 2731 2732 ASSERT(cpuid_checkpass(cpu, 1)); 2733 2734 if (pabits) 2735 *pabits = cpi->cpi_pabits; 2736 if (vabits) 2737 *vabits = cpi->cpi_vabits; 2738 } 2739 2740 /* 2741 * Returns the number of data TLB entries for a corresponding 2742 * pagesize. If it can't be computed, or isn't known, the 2743 * routine returns zero. If you ask about an architecturally 2744 * impossible pagesize, the routine will panic (so that the 2745 * hat implementor knows that things are inconsistent.) 2746 */ 2747 uint_t 2748 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2749 { 2750 struct cpuid_info *cpi; 2751 uint_t dtlb_nent = 0; 2752 2753 if (cpu == NULL) 2754 cpu = CPU; 2755 cpi = cpu->cpu_m.mcpu_cpi; 2756 2757 ASSERT(cpuid_checkpass(cpu, 1)); 2758 2759 /* 2760 * Check the L2 TLB info 2761 */ 2762 if (cpi->cpi_xmaxeax >= 0x80000006) { 2763 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2764 2765 switch (pagesize) { 2766 2767 case 4 * 1024: 2768 /* 2769 * All zero in the top 16 bits of the register 2770 * indicates a unified TLB. Size is in low 16 bits. 2771 */ 2772 if ((cp->cp_ebx & 0xffff0000) == 0) 2773 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2774 else 2775 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2776 break; 2777 2778 case 2 * 1024 * 1024: 2779 if ((cp->cp_eax & 0xffff0000) == 0) 2780 dtlb_nent = cp->cp_eax & 0x0000ffff; 2781 else 2782 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2783 break; 2784 2785 default: 2786 panic("unknown L2 pagesize"); 2787 /*NOTREACHED*/ 2788 } 2789 } 2790 2791 if (dtlb_nent != 0) 2792 return (dtlb_nent); 2793 2794 /* 2795 * No L2 TLB support for this size, try L1. 2796 */ 2797 if (cpi->cpi_xmaxeax >= 0x80000005) { 2798 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2799 2800 switch (pagesize) { 2801 case 4 * 1024: 2802 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2803 break; 2804 case 2 * 1024 * 1024: 2805 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2806 break; 2807 default: 2808 panic("unknown L1 d-TLB pagesize"); 2809 /*NOTREACHED*/ 2810 } 2811 } 2812 2813 return (dtlb_nent); 2814 } 2815 2816 /* 2817 * Return 0 if the erratum is not present or not applicable, positive 2818 * if it is, and negative if the status of the erratum is unknown. 2819 * 2820 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2821 * Processors" #25759, Rev 3.57, August 2005 2822 */ 2823 int 2824 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2825 { 2826 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2827 uint_t eax; 2828 2829 /* 2830 * Bail out if this CPU isn't an AMD CPU, or if it's 2831 * a legacy (32-bit) AMD CPU. 2832 */ 2833 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2834 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2835 cpi->cpi_family == 6) 2836 2837 return (0); 2838 2839 eax = cpi->cpi_std[1].cp_eax; 2840 2841 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2842 #define SH_B3(eax) (eax == 0xf51) 2843 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2844 2845 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2846 2847 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2848 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2849 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2850 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2851 2852 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2853 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2854 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2855 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2856 2857 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2858 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2859 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2860 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2861 #define BH_E4(eax) (eax == 0x20fb1) 2862 #define SH_E5(eax) (eax == 0x20f42) 2863 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2864 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2865 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2866 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2867 DH_E6(eax) || JH_E6(eax)) 2868 2869 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2870 #define DR_B0(eax) (eax == 0x100f20) 2871 #define DR_B1(eax) (eax == 0x100f21) 2872 #define DR_BA(eax) (eax == 0x100f2a) 2873 #define DR_B2(eax) (eax == 0x100f22) 2874 #define DR_B3(eax) (eax == 0x100f23) 2875 #define RB_C0(eax) (eax == 0x100f40) 2876 2877 switch (erratum) { 2878 case 1: 2879 return (cpi->cpi_family < 0x10); 2880 case 51: /* what does the asterisk mean? */ 2881 return (B(eax) || SH_C0(eax) || CG(eax)); 2882 case 52: 2883 return (B(eax)); 2884 case 57: 2885 return (cpi->cpi_family <= 0x11); 2886 case 58: 2887 return (B(eax)); 2888 case 60: 2889 return (cpi->cpi_family <= 0x11); 2890 case 61: 2891 case 62: 2892 case 63: 2893 case 64: 2894 case 65: 2895 case 66: 2896 case 68: 2897 case 69: 2898 case 70: 2899 case 71: 2900 return (B(eax)); 2901 case 72: 2902 return (SH_B0(eax)); 2903 case 74: 2904 return (B(eax)); 2905 case 75: 2906 return (cpi->cpi_family < 0x10); 2907 case 76: 2908 return (B(eax)); 2909 case 77: 2910 return (cpi->cpi_family <= 0x11); 2911 case 78: 2912 return (B(eax) || SH_C0(eax)); 2913 case 79: 2914 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2915 case 80: 2916 case 81: 2917 case 82: 2918 return (B(eax)); 2919 case 83: 2920 return (B(eax) || SH_C0(eax) || CG(eax)); 2921 case 85: 2922 return (cpi->cpi_family < 0x10); 2923 case 86: 2924 return (SH_C0(eax) || CG(eax)); 2925 case 88: 2926 #if !defined(__amd64) 2927 return (0); 2928 #else 2929 return (B(eax) || SH_C0(eax)); 2930 #endif 2931 case 89: 2932 return (cpi->cpi_family < 0x10); 2933 case 90: 2934 return (B(eax) || SH_C0(eax) || CG(eax)); 2935 case 91: 2936 case 92: 2937 return (B(eax) || SH_C0(eax)); 2938 case 93: 2939 return (SH_C0(eax)); 2940 case 94: 2941 return (B(eax) || SH_C0(eax) || CG(eax)); 2942 case 95: 2943 #if !defined(__amd64) 2944 return (0); 2945 #else 2946 return (B(eax) || SH_C0(eax)); 2947 #endif 2948 case 96: 2949 return (B(eax) || SH_C0(eax) || CG(eax)); 2950 case 97: 2951 case 98: 2952 return (SH_C0(eax) || CG(eax)); 2953 case 99: 2954 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2955 case 100: 2956 return (B(eax) || SH_C0(eax)); 2957 case 101: 2958 case 103: 2959 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2960 case 104: 2961 return (SH_C0(eax) || CG(eax) || D0(eax)); 2962 case 105: 2963 case 106: 2964 case 107: 2965 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2966 case 108: 2967 return (DH_CG(eax)); 2968 case 109: 2969 return (SH_C0(eax) || CG(eax) || D0(eax)); 2970 case 110: 2971 return (D0(eax) || EX(eax)); 2972 case 111: 2973 return (CG(eax)); 2974 case 112: 2975 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2976 case 113: 2977 return (eax == 0x20fc0); 2978 case 114: 2979 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2980 case 115: 2981 return (SH_E0(eax) || JH_E1(eax)); 2982 case 116: 2983 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2984 case 117: 2985 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2986 case 118: 2987 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2988 JH_E6(eax)); 2989 case 121: 2990 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2991 case 122: 2992 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2993 case 123: 2994 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2995 case 131: 2996 return (cpi->cpi_family < 0x10); 2997 case 6336786: 2998 /* 2999 * Test for AdvPowerMgmtInfo.TscPStateInvariant 3000 * if this is a K8 family or newer processor 3001 */ 3002 if (CPI_FAMILY(cpi) == 0xf) { 3003 struct cpuid_regs regs; 3004 regs.cp_eax = 0x80000007; 3005 (void) __cpuid_insn(®s); 3006 return (!(regs.cp_edx & 0x100)); 3007 } 3008 return (0); 3009 case 6323525: 3010 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 3011 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 3012 3013 case 6671130: 3014 /* 3015 * check for processors (pre-Shanghai) that do not provide 3016 * optimal management of 1gb ptes in its tlb. 3017 */ 3018 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 3019 3020 case 298: 3021 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 3022 DR_B2(eax) || RB_C0(eax)); 3023 3024 default: 3025 return (-1); 3026 3027 } 3028 } 3029 3030 /* 3031 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 3032 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 3033 */ 3034 int 3035 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 3036 { 3037 struct cpuid_info *cpi; 3038 uint_t osvwid; 3039 static int osvwfeature = -1; 3040 uint64_t osvwlength; 3041 3042 3043 cpi = cpu->cpu_m.mcpu_cpi; 3044 3045 /* confirm OSVW supported */ 3046 if (osvwfeature == -1) { 3047 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 3048 } else { 3049 /* assert that osvw feature setting is consistent on all cpus */ 3050 ASSERT(osvwfeature == 3051 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 3052 } 3053 if (!osvwfeature) 3054 return (-1); 3055 3056 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 3057 3058 switch (erratum) { 3059 case 298: /* osvwid is 0 */ 3060 osvwid = 0; 3061 if (osvwlength <= (uint64_t)osvwid) { 3062 /* osvwid 0 is unknown */ 3063 return (-1); 3064 } 3065 3066 /* 3067 * Check the OSVW STATUS MSR to determine the state 3068 * of the erratum where: 3069 * 0 - fixed by HW 3070 * 1 - BIOS has applied the workaround when BIOS 3071 * workaround is available. (Or for other errata, 3072 * OS workaround is required.) 3073 * For a value of 1, caller will confirm that the 3074 * erratum 298 workaround has indeed been applied by BIOS. 3075 * 3076 * A 1 may be set in cpus that have a HW fix 3077 * in a mixed cpu system. Regarding erratum 298: 3078 * In a multiprocessor platform, the workaround above 3079 * should be applied to all processors regardless of 3080 * silicon revision when an affected processor is 3081 * present. 3082 */ 3083 3084 return (rdmsr(MSR_AMD_OSVW_STATUS + 3085 (osvwid / OSVW_ID_CNT_PER_MSR)) & 3086 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 3087 3088 default: 3089 return (-1); 3090 } 3091 } 3092 3093 static const char assoc_str[] = "associativity"; 3094 static const char line_str[] = "line-size"; 3095 static const char size_str[] = "size"; 3096 3097 static void 3098 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 3099 uint32_t val) 3100 { 3101 char buf[128]; 3102 3103 /* 3104 * ndi_prop_update_int() is used because it is desirable for 3105 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 3106 */ 3107 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 3108 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 3109 } 3110 3111 /* 3112 * Intel-style cache/tlb description 3113 * 3114 * Standard cpuid level 2 gives a randomly ordered 3115 * selection of tags that index into a table that describes 3116 * cache and tlb properties. 3117 */ 3118 3119 static const char l1_icache_str[] = "l1-icache"; 3120 static const char l1_dcache_str[] = "l1-dcache"; 3121 static const char l2_cache_str[] = "l2-cache"; 3122 static const char l3_cache_str[] = "l3-cache"; 3123 static const char itlb4k_str[] = "itlb-4K"; 3124 static const char dtlb4k_str[] = "dtlb-4K"; 3125 static const char itlb2M_str[] = "itlb-2M"; 3126 static const char itlb4M_str[] = "itlb-4M"; 3127 static const char dtlb4M_str[] = "dtlb-4M"; 3128 static const char dtlb24_str[] = "dtlb0-2M-4M"; 3129 static const char itlb424_str[] = "itlb-4K-2M-4M"; 3130 static const char itlb24_str[] = "itlb-2M-4M"; 3131 static const char dtlb44_str[] = "dtlb-4K-4M"; 3132 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 3133 static const char sl2_cache_str[] = "sectored-l2-cache"; 3134 static const char itrace_str[] = "itrace-cache"; 3135 static const char sl3_cache_str[] = "sectored-l3-cache"; 3136 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 3137 3138 static const struct cachetab { 3139 uint8_t ct_code; 3140 uint8_t ct_assoc; 3141 uint16_t ct_line_size; 3142 size_t ct_size; 3143 const char *ct_label; 3144 } intel_ctab[] = { 3145 /* 3146 * maintain descending order! 3147 * 3148 * Codes ignored - Reason 3149 * ---------------------- 3150 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3151 * f0H/f1H - Currently we do not interpret prefetch size by design 3152 */ 3153 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3154 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3155 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3156 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3157 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3158 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3159 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3160 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3161 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3162 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3163 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3164 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3165 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3166 { 0xc0, 4, 0, 8, dtlb44_str }, 3167 { 0xba, 4, 0, 64, dtlb4k_str }, 3168 { 0xb4, 4, 0, 256, dtlb4k_str }, 3169 { 0xb3, 4, 0, 128, dtlb4k_str }, 3170 { 0xb2, 4, 0, 64, itlb4k_str }, 3171 { 0xb0, 4, 0, 128, itlb4k_str }, 3172 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3173 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3174 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3175 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3176 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3177 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3178 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3179 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3180 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3181 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3182 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3183 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3184 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3185 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3186 { 0x73, 8, 0, 64*1024, itrace_str}, 3187 { 0x72, 8, 0, 32*1024, itrace_str}, 3188 { 0x71, 8, 0, 16*1024, itrace_str}, 3189 { 0x70, 8, 0, 12*1024, itrace_str}, 3190 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3191 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3192 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3193 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3194 { 0x5d, 0, 0, 256, dtlb44_str}, 3195 { 0x5c, 0, 0, 128, dtlb44_str}, 3196 { 0x5b, 0, 0, 64, dtlb44_str}, 3197 { 0x5a, 4, 0, 32, dtlb24_str}, 3198 { 0x59, 0, 0, 16, dtlb4k_str}, 3199 { 0x57, 4, 0, 16, dtlb4k_str}, 3200 { 0x56, 4, 0, 16, dtlb4M_str}, 3201 { 0x55, 0, 0, 7, itlb24_str}, 3202 { 0x52, 0, 0, 256, itlb424_str}, 3203 { 0x51, 0, 0, 128, itlb424_str}, 3204 { 0x50, 0, 0, 64, itlb424_str}, 3205 { 0x4f, 0, 0, 32, itlb4k_str}, 3206 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3207 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3208 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3209 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3210 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3211 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3212 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3213 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3214 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3215 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3216 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3217 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3218 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3219 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3220 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3221 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3222 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3223 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3224 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3225 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3226 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3227 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3228 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3229 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3230 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3231 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3232 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3233 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3234 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3235 { 0x0b, 4, 0, 4, itlb4M_str}, 3236 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3237 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3238 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3239 { 0x05, 4, 0, 32, dtlb4M_str}, 3240 { 0x04, 4, 0, 8, dtlb4M_str}, 3241 { 0x03, 4, 0, 64, dtlb4k_str}, 3242 { 0x02, 4, 0, 2, itlb4M_str}, 3243 { 0x01, 4, 0, 32, itlb4k_str}, 3244 { 0 } 3245 }; 3246 3247 static const struct cachetab cyrix_ctab[] = { 3248 { 0x70, 4, 0, 32, "tlb-4K" }, 3249 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3250 { 0 } 3251 }; 3252 3253 /* 3254 * Search a cache table for a matching entry 3255 */ 3256 static const struct cachetab * 3257 find_cacheent(const struct cachetab *ct, uint_t code) 3258 { 3259 if (code != 0) { 3260 for (; ct->ct_code != 0; ct++) 3261 if (ct->ct_code <= code) 3262 break; 3263 if (ct->ct_code == code) 3264 return (ct); 3265 } 3266 return (NULL); 3267 } 3268 3269 /* 3270 * Populate cachetab entry with L2 or L3 cache-information using 3271 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3272 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3273 * information is found. 3274 */ 3275 static int 3276 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3277 { 3278 uint32_t level, i; 3279 int ret = 0; 3280 3281 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3282 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3283 3284 if (level == 2 || level == 3) { 3285 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3286 ct->ct_line_size = 3287 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3288 ct->ct_size = ct->ct_assoc * 3289 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3290 ct->ct_line_size * 3291 (cpi->cpi_std_4[i]->cp_ecx + 1); 3292 3293 if (level == 2) { 3294 ct->ct_label = l2_cache_str; 3295 } else if (level == 3) { 3296 ct->ct_label = l3_cache_str; 3297 } 3298 ret = 1; 3299 } 3300 } 3301 3302 return (ret); 3303 } 3304 3305 /* 3306 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3307 * The walk is terminated if the walker returns non-zero. 3308 */ 3309 static void 3310 intel_walk_cacheinfo(struct cpuid_info *cpi, 3311 void *arg, int (*func)(void *, const struct cachetab *)) 3312 { 3313 const struct cachetab *ct; 3314 struct cachetab des_49_ct, des_b1_ct; 3315 uint8_t *dp; 3316 int i; 3317 3318 if ((dp = cpi->cpi_cacheinfo) == NULL) 3319 return; 3320 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3321 /* 3322 * For overloaded descriptor 0x49 we use cpuid function 4 3323 * if supported by the current processor, to create 3324 * cache information. 3325 * For overloaded descriptor 0xb1 we use X86_PAE flag 3326 * to disambiguate the cache information. 3327 */ 3328 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3329 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3330 ct = &des_49_ct; 3331 } else if (*dp == 0xb1) { 3332 des_b1_ct.ct_code = 0xb1; 3333 des_b1_ct.ct_assoc = 4; 3334 des_b1_ct.ct_line_size = 0; 3335 if (x86_feature & X86_PAE) { 3336 des_b1_ct.ct_size = 8; 3337 des_b1_ct.ct_label = itlb2M_str; 3338 } else { 3339 des_b1_ct.ct_size = 4; 3340 des_b1_ct.ct_label = itlb4M_str; 3341 } 3342 ct = &des_b1_ct; 3343 } else { 3344 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3345 continue; 3346 } 3347 } 3348 3349 if (func(arg, ct) != 0) { 3350 break; 3351 } 3352 } 3353 } 3354 3355 /* 3356 * (Like the Intel one, except for Cyrix CPUs) 3357 */ 3358 static void 3359 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3360 void *arg, int (*func)(void *, const struct cachetab *)) 3361 { 3362 const struct cachetab *ct; 3363 uint8_t *dp; 3364 int i; 3365 3366 if ((dp = cpi->cpi_cacheinfo) == NULL) 3367 return; 3368 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3369 /* 3370 * Search Cyrix-specific descriptor table first .. 3371 */ 3372 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3373 if (func(arg, ct) != 0) 3374 break; 3375 continue; 3376 } 3377 /* 3378 * .. else fall back to the Intel one 3379 */ 3380 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3381 if (func(arg, ct) != 0) 3382 break; 3383 continue; 3384 } 3385 } 3386 } 3387 3388 /* 3389 * A cacheinfo walker that adds associativity, line-size, and size properties 3390 * to the devinfo node it is passed as an argument. 3391 */ 3392 static int 3393 add_cacheent_props(void *arg, const struct cachetab *ct) 3394 { 3395 dev_info_t *devi = arg; 3396 3397 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3398 if (ct->ct_line_size != 0) 3399 add_cache_prop(devi, ct->ct_label, line_str, 3400 ct->ct_line_size); 3401 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3402 return (0); 3403 } 3404 3405 3406 static const char fully_assoc[] = "fully-associative?"; 3407 3408 /* 3409 * AMD style cache/tlb description 3410 * 3411 * Extended functions 5 and 6 directly describe properties of 3412 * tlbs and various cache levels. 3413 */ 3414 static void 3415 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3416 { 3417 switch (assoc) { 3418 case 0: /* reserved; ignore */ 3419 break; 3420 default: 3421 add_cache_prop(devi, label, assoc_str, assoc); 3422 break; 3423 case 0xff: 3424 add_cache_prop(devi, label, fully_assoc, 1); 3425 break; 3426 } 3427 } 3428 3429 static void 3430 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3431 { 3432 if (size == 0) 3433 return; 3434 add_cache_prop(devi, label, size_str, size); 3435 add_amd_assoc(devi, label, assoc); 3436 } 3437 3438 static void 3439 add_amd_cache(dev_info_t *devi, const char *label, 3440 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3441 { 3442 if (size == 0 || line_size == 0) 3443 return; 3444 add_amd_assoc(devi, label, assoc); 3445 /* 3446 * Most AMD parts have a sectored cache. Multiple cache lines are 3447 * associated with each tag. A sector consists of all cache lines 3448 * associated with a tag. For example, the AMD K6-III has a sector 3449 * size of 2 cache lines per tag. 3450 */ 3451 if (lines_per_tag != 0) 3452 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3453 add_cache_prop(devi, label, line_str, line_size); 3454 add_cache_prop(devi, label, size_str, size * 1024); 3455 } 3456 3457 static void 3458 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3459 { 3460 switch (assoc) { 3461 case 0: /* off */ 3462 break; 3463 case 1: 3464 case 2: 3465 case 4: 3466 add_cache_prop(devi, label, assoc_str, assoc); 3467 break; 3468 case 6: 3469 add_cache_prop(devi, label, assoc_str, 8); 3470 break; 3471 case 8: 3472 add_cache_prop(devi, label, assoc_str, 16); 3473 break; 3474 case 0xf: 3475 add_cache_prop(devi, label, fully_assoc, 1); 3476 break; 3477 default: /* reserved; ignore */ 3478 break; 3479 } 3480 } 3481 3482 static void 3483 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3484 { 3485 if (size == 0 || assoc == 0) 3486 return; 3487 add_amd_l2_assoc(devi, label, assoc); 3488 add_cache_prop(devi, label, size_str, size); 3489 } 3490 3491 static void 3492 add_amd_l2_cache(dev_info_t *devi, const char *label, 3493 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3494 { 3495 if (size == 0 || assoc == 0 || line_size == 0) 3496 return; 3497 add_amd_l2_assoc(devi, label, assoc); 3498 if (lines_per_tag != 0) 3499 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3500 add_cache_prop(devi, label, line_str, line_size); 3501 add_cache_prop(devi, label, size_str, size * 1024); 3502 } 3503 3504 static void 3505 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3506 { 3507 struct cpuid_regs *cp; 3508 3509 if (cpi->cpi_xmaxeax < 0x80000005) 3510 return; 3511 cp = &cpi->cpi_extd[5]; 3512 3513 /* 3514 * 4M/2M L1 TLB configuration 3515 * 3516 * We report the size for 2M pages because AMD uses two 3517 * TLB entries for one 4M page. 3518 */ 3519 add_amd_tlb(devi, "dtlb-2M", 3520 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3521 add_amd_tlb(devi, "itlb-2M", 3522 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3523 3524 /* 3525 * 4K L1 TLB configuration 3526 */ 3527 3528 switch (cpi->cpi_vendor) { 3529 uint_t nentries; 3530 case X86_VENDOR_TM: 3531 if (cpi->cpi_family >= 5) { 3532 /* 3533 * Crusoe processors have 256 TLB entries, but 3534 * cpuid data format constrains them to only 3535 * reporting 255 of them. 3536 */ 3537 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3538 nentries = 256; 3539 /* 3540 * Crusoe processors also have a unified TLB 3541 */ 3542 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3543 nentries); 3544 break; 3545 } 3546 /*FALLTHROUGH*/ 3547 default: 3548 add_amd_tlb(devi, itlb4k_str, 3549 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3550 add_amd_tlb(devi, dtlb4k_str, 3551 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3552 break; 3553 } 3554 3555 /* 3556 * data L1 cache configuration 3557 */ 3558 3559 add_amd_cache(devi, l1_dcache_str, 3560 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3561 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3562 3563 /* 3564 * code L1 cache configuration 3565 */ 3566 3567 add_amd_cache(devi, l1_icache_str, 3568 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3569 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3570 3571 if (cpi->cpi_xmaxeax < 0x80000006) 3572 return; 3573 cp = &cpi->cpi_extd[6]; 3574 3575 /* Check for a unified L2 TLB for large pages */ 3576 3577 if (BITX(cp->cp_eax, 31, 16) == 0) 3578 add_amd_l2_tlb(devi, "l2-tlb-2M", 3579 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3580 else { 3581 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3582 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3583 add_amd_l2_tlb(devi, "l2-itlb-2M", 3584 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3585 } 3586 3587 /* Check for a unified L2 TLB for 4K pages */ 3588 3589 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3590 add_amd_l2_tlb(devi, "l2-tlb-4K", 3591 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3592 } else { 3593 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3594 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3595 add_amd_l2_tlb(devi, "l2-itlb-4K", 3596 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3597 } 3598 3599 add_amd_l2_cache(devi, l2_cache_str, 3600 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3601 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3602 } 3603 3604 /* 3605 * There are two basic ways that the x86 world describes it cache 3606 * and tlb architecture - Intel's way and AMD's way. 3607 * 3608 * Return which flavor of cache architecture we should use 3609 */ 3610 static int 3611 x86_which_cacheinfo(struct cpuid_info *cpi) 3612 { 3613 switch (cpi->cpi_vendor) { 3614 case X86_VENDOR_Intel: 3615 if (cpi->cpi_maxeax >= 2) 3616 return (X86_VENDOR_Intel); 3617 break; 3618 case X86_VENDOR_AMD: 3619 /* 3620 * The K5 model 1 was the first part from AMD that reported 3621 * cache sizes via extended cpuid functions. 3622 */ 3623 if (cpi->cpi_family > 5 || 3624 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3625 return (X86_VENDOR_AMD); 3626 break; 3627 case X86_VENDOR_TM: 3628 if (cpi->cpi_family >= 5) 3629 return (X86_VENDOR_AMD); 3630 /*FALLTHROUGH*/ 3631 default: 3632 /* 3633 * If they have extended CPU data for 0x80000005 3634 * then we assume they have AMD-format cache 3635 * information. 3636 * 3637 * If not, and the vendor happens to be Cyrix, 3638 * then try our-Cyrix specific handler. 3639 * 3640 * If we're not Cyrix, then assume we're using Intel's 3641 * table-driven format instead. 3642 */ 3643 if (cpi->cpi_xmaxeax >= 0x80000005) 3644 return (X86_VENDOR_AMD); 3645 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3646 return (X86_VENDOR_Cyrix); 3647 else if (cpi->cpi_maxeax >= 2) 3648 return (X86_VENDOR_Intel); 3649 break; 3650 } 3651 return (-1); 3652 } 3653 3654 void 3655 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 3656 struct cpuid_info *cpi) 3657 { 3658 dev_info_t *cpu_devi; 3659 int create; 3660 3661 cpu_devi = (dev_info_t *)dip; 3662 3663 /* device_type */ 3664 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3665 "device_type", "cpu"); 3666 3667 /* reg */ 3668 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3669 "reg", cpu_id); 3670 3671 /* cpu-mhz, and clock-frequency */ 3672 if (cpu_freq > 0) { 3673 long long mul; 3674 3675 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3676 "cpu-mhz", cpu_freq); 3677 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3678 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3679 "clock-frequency", (int)mul); 3680 } 3681 3682 if ((x86_feature & X86_CPUID) == 0) { 3683 return; 3684 } 3685 3686 /* vendor-id */ 3687 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3688 "vendor-id", cpi->cpi_vendorstr); 3689 3690 if (cpi->cpi_maxeax == 0) { 3691 return; 3692 } 3693 3694 /* 3695 * family, model, and step 3696 */ 3697 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3698 "family", CPI_FAMILY(cpi)); 3699 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3700 "cpu-model", CPI_MODEL(cpi)); 3701 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3702 "stepping-id", CPI_STEP(cpi)); 3703 3704 /* type */ 3705 switch (cpi->cpi_vendor) { 3706 case X86_VENDOR_Intel: 3707 create = 1; 3708 break; 3709 default: 3710 create = 0; 3711 break; 3712 } 3713 if (create) 3714 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3715 "type", CPI_TYPE(cpi)); 3716 3717 /* ext-family */ 3718 switch (cpi->cpi_vendor) { 3719 case X86_VENDOR_Intel: 3720 case X86_VENDOR_AMD: 3721 create = cpi->cpi_family >= 0xf; 3722 break; 3723 default: 3724 create = 0; 3725 break; 3726 } 3727 if (create) 3728 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3729 "ext-family", CPI_FAMILY_XTD(cpi)); 3730 3731 /* ext-model */ 3732 switch (cpi->cpi_vendor) { 3733 case X86_VENDOR_Intel: 3734 create = IS_EXTENDED_MODEL_INTEL(cpi); 3735 break; 3736 case X86_VENDOR_AMD: 3737 create = CPI_FAMILY(cpi) == 0xf; 3738 break; 3739 default: 3740 create = 0; 3741 break; 3742 } 3743 if (create) 3744 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3745 "ext-model", CPI_MODEL_XTD(cpi)); 3746 3747 /* generation */ 3748 switch (cpi->cpi_vendor) { 3749 case X86_VENDOR_AMD: 3750 /* 3751 * AMD K5 model 1 was the first part to support this 3752 */ 3753 create = cpi->cpi_xmaxeax >= 0x80000001; 3754 break; 3755 default: 3756 create = 0; 3757 break; 3758 } 3759 if (create) 3760 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3761 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3762 3763 /* brand-id */ 3764 switch (cpi->cpi_vendor) { 3765 case X86_VENDOR_Intel: 3766 /* 3767 * brand id first appeared on Pentium III Xeon model 8, 3768 * and Celeron model 8 processors and Opteron 3769 */ 3770 create = cpi->cpi_family > 6 || 3771 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3772 break; 3773 case X86_VENDOR_AMD: 3774 create = cpi->cpi_family >= 0xf; 3775 break; 3776 default: 3777 create = 0; 3778 break; 3779 } 3780 if (create && cpi->cpi_brandid != 0) { 3781 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3782 "brand-id", cpi->cpi_brandid); 3783 } 3784 3785 /* chunks, and apic-id */ 3786 switch (cpi->cpi_vendor) { 3787 /* 3788 * first available on Pentium IV and Opteron (K8) 3789 */ 3790 case X86_VENDOR_Intel: 3791 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3792 break; 3793 case X86_VENDOR_AMD: 3794 create = cpi->cpi_family >= 0xf; 3795 break; 3796 default: 3797 create = 0; 3798 break; 3799 } 3800 if (create) { 3801 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3802 "chunks", CPI_CHUNKS(cpi)); 3803 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3804 "apic-id", cpi->cpi_apicid); 3805 if (cpi->cpi_chipid >= 0) { 3806 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3807 "chip#", cpi->cpi_chipid); 3808 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3809 "clog#", cpi->cpi_clogid); 3810 } 3811 } 3812 3813 /* cpuid-features */ 3814 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3815 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3816 3817 3818 /* cpuid-features-ecx */ 3819 switch (cpi->cpi_vendor) { 3820 case X86_VENDOR_Intel: 3821 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3822 break; 3823 default: 3824 create = 0; 3825 break; 3826 } 3827 if (create) 3828 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3829 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3830 3831 /* ext-cpuid-features */ 3832 switch (cpi->cpi_vendor) { 3833 case X86_VENDOR_Intel: 3834 case X86_VENDOR_AMD: 3835 case X86_VENDOR_Cyrix: 3836 case X86_VENDOR_TM: 3837 case X86_VENDOR_Centaur: 3838 create = cpi->cpi_xmaxeax >= 0x80000001; 3839 break; 3840 default: 3841 create = 0; 3842 break; 3843 } 3844 if (create) { 3845 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3846 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3847 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3848 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3849 } 3850 3851 /* 3852 * Brand String first appeared in Intel Pentium IV, AMD K5 3853 * model 1, and Cyrix GXm. On earlier models we try and 3854 * simulate something similar .. so this string should always 3855 * same -something- about the processor, however lame. 3856 */ 3857 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3858 "brand-string", cpi->cpi_brandstr); 3859 3860 /* 3861 * Finally, cache and tlb information 3862 */ 3863 switch (x86_which_cacheinfo(cpi)) { 3864 case X86_VENDOR_Intel: 3865 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3866 break; 3867 case X86_VENDOR_Cyrix: 3868 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3869 break; 3870 case X86_VENDOR_AMD: 3871 amd_cache_info(cpi, cpu_devi); 3872 break; 3873 default: 3874 break; 3875 } 3876 } 3877 3878 struct l2info { 3879 int *l2i_csz; 3880 int *l2i_lsz; 3881 int *l2i_assoc; 3882 int l2i_ret; 3883 }; 3884 3885 /* 3886 * A cacheinfo walker that fetches the size, line-size and associativity 3887 * of the L2 cache 3888 */ 3889 static int 3890 intel_l2cinfo(void *arg, const struct cachetab *ct) 3891 { 3892 struct l2info *l2i = arg; 3893 int *ip; 3894 3895 if (ct->ct_label != l2_cache_str && 3896 ct->ct_label != sl2_cache_str) 3897 return (0); /* not an L2 -- keep walking */ 3898 3899 if ((ip = l2i->l2i_csz) != NULL) 3900 *ip = ct->ct_size; 3901 if ((ip = l2i->l2i_lsz) != NULL) 3902 *ip = ct->ct_line_size; 3903 if ((ip = l2i->l2i_assoc) != NULL) 3904 *ip = ct->ct_assoc; 3905 l2i->l2i_ret = ct->ct_size; 3906 return (1); /* was an L2 -- terminate walk */ 3907 } 3908 3909 /* 3910 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3911 * 3912 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3913 * value is the associativity, the associativity for the L2 cache and 3914 * tlb is encoded in the following table. The 4 bit L2 value serves as 3915 * an index into the amd_afd[] array to determine the associativity. 3916 * -1 is undefined. 0 is fully associative. 3917 */ 3918 3919 static int amd_afd[] = 3920 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3921 3922 static void 3923 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3924 { 3925 struct cpuid_regs *cp; 3926 uint_t size, assoc; 3927 int i; 3928 int *ip; 3929 3930 if (cpi->cpi_xmaxeax < 0x80000006) 3931 return; 3932 cp = &cpi->cpi_extd[6]; 3933 3934 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3935 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3936 uint_t cachesz = size * 1024; 3937 assoc = amd_afd[i]; 3938 3939 ASSERT(assoc != -1); 3940 3941 if ((ip = l2i->l2i_csz) != NULL) 3942 *ip = cachesz; 3943 if ((ip = l2i->l2i_lsz) != NULL) 3944 *ip = BITX(cp->cp_ecx, 7, 0); 3945 if ((ip = l2i->l2i_assoc) != NULL) 3946 *ip = assoc; 3947 l2i->l2i_ret = cachesz; 3948 } 3949 } 3950 3951 int 3952 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3953 { 3954 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3955 struct l2info __l2info, *l2i = &__l2info; 3956 3957 l2i->l2i_csz = csz; 3958 l2i->l2i_lsz = lsz; 3959 l2i->l2i_assoc = assoc; 3960 l2i->l2i_ret = -1; 3961 3962 switch (x86_which_cacheinfo(cpi)) { 3963 case X86_VENDOR_Intel: 3964 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3965 break; 3966 case X86_VENDOR_Cyrix: 3967 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3968 break; 3969 case X86_VENDOR_AMD: 3970 amd_l2cacheinfo(cpi, l2i); 3971 break; 3972 default: 3973 break; 3974 } 3975 return (l2i->l2i_ret); 3976 } 3977 3978 #if !defined(__xpv) 3979 3980 uint32_t * 3981 cpuid_mwait_alloc(cpu_t *cpu) 3982 { 3983 uint32_t *ret; 3984 size_t mwait_size; 3985 3986 ASSERT(cpuid_checkpass(CPU, 2)); 3987 3988 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3989 if (mwait_size == 0) 3990 return (NULL); 3991 3992 /* 3993 * kmem_alloc() returns cache line size aligned data for mwait_size 3994 * allocations. mwait_size is currently cache line sized. Neither 3995 * of these implementation details are guarantied to be true in the 3996 * future. 3997 * 3998 * First try allocating mwait_size as kmem_alloc() currently returns 3999 * correctly aligned memory. If kmem_alloc() does not return 4000 * mwait_size aligned memory, then use mwait_size ROUNDUP. 4001 * 4002 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 4003 * decide to free this memory. 4004 */ 4005 ret = kmem_zalloc(mwait_size, KM_SLEEP); 4006 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 4007 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4008 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 4009 *ret = MWAIT_RUNNING; 4010 return (ret); 4011 } else { 4012 kmem_free(ret, mwait_size); 4013 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 4014 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4015 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 4016 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 4017 *ret = MWAIT_RUNNING; 4018 return (ret); 4019 } 4020 } 4021 4022 void 4023 cpuid_mwait_free(cpu_t *cpu) 4024 { 4025 if (cpu->cpu_m.mcpu_cpi == NULL) { 4026 return; 4027 } 4028 4029 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 4030 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 4031 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 4032 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 4033 } 4034 4035 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 4036 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 4037 } 4038 4039 void 4040 patch_tsc_read(int flag) 4041 { 4042 size_t cnt; 4043 4044 switch (flag) { 4045 case X86_NO_TSC: 4046 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 4047 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 4048 break; 4049 case X86_HAVE_TSCP: 4050 cnt = &_tscp_end - &_tscp_start; 4051 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 4052 break; 4053 case X86_TSC_MFENCE: 4054 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 4055 (void) memcpy((void *)tsc_read, 4056 (void *)&_tsc_mfence_start, cnt); 4057 break; 4058 case X86_TSC_LFENCE: 4059 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 4060 (void) memcpy((void *)tsc_read, 4061 (void *)&_tsc_lfence_start, cnt); 4062 break; 4063 default: 4064 break; 4065 } 4066 } 4067 4068 int 4069 cpuid_deep_cstates_supported(void) 4070 { 4071 struct cpuid_info *cpi; 4072 struct cpuid_regs regs; 4073 4074 ASSERT(cpuid_checkpass(CPU, 1)); 4075 4076 cpi = CPU->cpu_m.mcpu_cpi; 4077 4078 if (!(x86_feature & X86_CPUID)) 4079 return (0); 4080 4081 switch (cpi->cpi_vendor) { 4082 case X86_VENDOR_Intel: 4083 if (cpi->cpi_xmaxeax < 0x80000007) 4084 return (0); 4085 4086 /* 4087 * TSC run at a constant rate in all ACPI C-states? 4088 */ 4089 regs.cp_eax = 0x80000007; 4090 (void) __cpuid_insn(®s); 4091 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 4092 4093 default: 4094 return (0); 4095 } 4096 } 4097 4098 #endif /* !__xpv */ 4099 4100 void 4101 post_startup_cpu_fixups(void) 4102 { 4103 #ifndef __xpv 4104 /* 4105 * Some AMD processors support C1E state. Entering this state will 4106 * cause the local APIC timer to stop, which we can't deal with at 4107 * this time. 4108 */ 4109 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 4110 on_trap_data_t otd; 4111 uint64_t reg; 4112 4113 if (!on_trap(&otd, OT_DATA_ACCESS)) { 4114 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 4115 /* Disable C1E state if it is enabled by BIOS */ 4116 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 4117 AMD_ACTONCMPHALT_MASK) { 4118 reg &= ~(AMD_ACTONCMPHALT_MASK << 4119 AMD_ACTONCMPHALT_SHIFT); 4120 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 4121 } 4122 } 4123 no_trap(); 4124 } 4125 #endif /* !__xpv */ 4126 } 4127 4128 /* 4129 * Starting with the Westmere processor the local 4130 * APIC timer will continue running in all C-states, 4131 * including the deepest C-states. 4132 */ 4133 int 4134 cpuid_arat_supported(void) 4135 { 4136 struct cpuid_info *cpi; 4137 struct cpuid_regs regs; 4138 4139 ASSERT(cpuid_checkpass(CPU, 1)); 4140 ASSERT(x86_feature & X86_CPUID); 4141 4142 cpi = CPU->cpu_m.mcpu_cpi; 4143 4144 switch (cpi->cpi_vendor) { 4145 case X86_VENDOR_Intel: 4146 /* 4147 * Always-running Local APIC Timer is 4148 * indicated by CPUID.6.EAX[2]. 4149 */ 4150 if (cpi->cpi_maxeax >= 6) { 4151 regs.cp_eax = 6; 4152 (void) cpuid_insn(NULL, ®s); 4153 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4154 } else { 4155 return (0); 4156 } 4157 default: 4158 return (0); 4159 } 4160 } 4161 4162 /* 4163 * Check support for Intel ENERGY_PERF_BIAS feature 4164 */ 4165 int 4166 cpuid_iepb_supported(struct cpu *cp) 4167 { 4168 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi; 4169 struct cpuid_regs regs; 4170 4171 ASSERT(cpuid_checkpass(cp, 1)); 4172 4173 if (!(x86_feature & X86_CPUID) || !(x86_feature & X86_MSR)) { 4174 return (0); 4175 } 4176 4177 /* 4178 * Intel ENERGY_PERF_BIAS MSR is indicated by 4179 * capability bit CPUID.6.ECX.3 4180 */ 4181 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6)) 4182 return (0); 4183 4184 regs.cp_eax = 0x6; 4185 (void) cpuid_insn(NULL, ®s); 4186 return (regs.cp_ecx & CPUID_EPB_SUPPORT); 4187 } 4188 4189 #if defined(__amd64) && !defined(__xpv) 4190 /* 4191 * Patch in versions of bcopy for high performance Intel Nhm processors 4192 * and later... 4193 */ 4194 void 4195 patch_memops(uint_t vendor) 4196 { 4197 size_t cnt, i; 4198 caddr_t to, from; 4199 4200 if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) { 4201 cnt = &bcopy_patch_end - &bcopy_patch_start; 4202 to = &bcopy_ck_size; 4203 from = &bcopy_patch_start; 4204 for (i = 0; i < cnt; i++) { 4205 *to++ = *from++; 4206 } 4207 } 4208 } 4209 #endif /* __amd64 && !__xpv */ 4210