1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 by Delphix. All rights reserved. 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net> 26 */ 27 /* 28 * Copyright (c) 2010, Intel Corporation. 29 * All rights reserved. 30 */ 31 /* 32 * Portions Copyright 2009 Advanced Micro Devices, Inc. 33 */ 34 /* 35 * Copyright 2016 Joyent, Inc. 36 */ 37 /* 38 * Various routines to handle identification 39 * and classification of x86 processors. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/archsystm.h> 44 #include <sys/x86_archext.h> 45 #include <sys/kmem.h> 46 #include <sys/systm.h> 47 #include <sys/cmn_err.h> 48 #include <sys/sunddi.h> 49 #include <sys/sunndi.h> 50 #include <sys/cpuvar.h> 51 #include <sys/processor.h> 52 #include <sys/sysmacros.h> 53 #include <sys/pg.h> 54 #include <sys/fp.h> 55 #include <sys/controlregs.h> 56 #include <sys/bitmap.h> 57 #include <sys/auxv_386.h> 58 #include <sys/memnode.h> 59 #include <sys/pci_cfgspace.h> 60 #include <sys/comm_page.h> 61 #include <sys/tsc.h> 62 63 #ifdef __xpv 64 #include <sys/hypervisor.h> 65 #else 66 #include <sys/ontrap.h> 67 #endif 68 69 /* 70 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 71 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 72 * them accordingly. For most modern processors, feature detection occurs here 73 * in pass 1. 74 * 75 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 76 * for the boot CPU and does the basic analysis that the early kernel needs. 77 * x86_featureset is set based on the return value of cpuid_pass1() of the boot 78 * CPU. 79 * 80 * Pass 1 includes: 81 * 82 * o Determining vendor/model/family/stepping and setting x86_type and 83 * x86_vendor accordingly. 84 * o Processing the feature flags returned by the cpuid instruction while 85 * applying any workarounds or tricks for the specific processor. 86 * o Mapping the feature flags into Solaris feature bits (X86_*). 87 * o Processing extended feature flags if supported by the processor, 88 * again while applying specific processor knowledge. 89 * o Determining the CMT characteristics of the system. 90 * 91 * Pass 1 is done on non-boot CPUs during their initialization and the results 92 * are used only as a meager attempt at ensuring that all processors within the 93 * system support the same features. 94 * 95 * Pass 2 of cpuid feature analysis happens just at the beginning 96 * of startup(). It just copies in and corrects the remainder 97 * of the cpuid data we depend on: standard cpuid functions that we didn't 98 * need for pass1 feature analysis, and extended cpuid functions beyond the 99 * simple feature processing done in pass1. 100 * 101 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 102 * particular kernel memory allocation has been made available. It creates a 103 * readable brand string based on the data collected in the first two passes. 104 * 105 * Pass 4 of cpuid analysis is invoked after post_startup() when all 106 * the support infrastructure for various hardware features has been 107 * initialized. It determines which processor features will be reported 108 * to userland via the aux vector. 109 * 110 * All passes are executed on all CPUs, but only the boot CPU determines what 111 * features the kernel will use. 112 * 113 * Much of the worst junk in this file is for the support of processors 114 * that didn't really implement the cpuid instruction properly. 115 * 116 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 117 * the pass numbers. Accordingly, changes to the pass code may require changes 118 * to the accessor code. 119 */ 120 121 uint_t x86_vendor = X86_VENDOR_IntelClone; 122 uint_t x86_type = X86_TYPE_OTHER; 123 uint_t x86_clflush_size = 0; 124 125 uint_t pentiumpro_bug4046376; 126 127 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)]; 128 129 static char *x86_feature_names[NUM_X86_FEATURES] = { 130 "lgpg", 131 "tsc", 132 "msr", 133 "mtrr", 134 "pge", 135 "de", 136 "cmov", 137 "mmx", 138 "mca", 139 "pae", 140 "cv8", 141 "pat", 142 "sep", 143 "sse", 144 "sse2", 145 "htt", 146 "asysc", 147 "nx", 148 "sse3", 149 "cx16", 150 "cmp", 151 "tscp", 152 "mwait", 153 "sse4a", 154 "cpuid", 155 "ssse3", 156 "sse4_1", 157 "sse4_2", 158 "1gpg", 159 "clfsh", 160 "64", 161 "aes", 162 "pclmulqdq", 163 "xsave", 164 "avx", 165 "vmx", 166 "svm", 167 "topoext", 168 "f16c", 169 "rdrand", 170 "x2apic", 171 "avx2", 172 "bmi1", 173 "bmi2", 174 "fma", 175 "smep" 176 }; 177 178 boolean_t 179 is_x86_feature(void *featureset, uint_t feature) 180 { 181 ASSERT(feature < NUM_X86_FEATURES); 182 return (BT_TEST((ulong_t *)featureset, feature)); 183 } 184 185 void 186 add_x86_feature(void *featureset, uint_t feature) 187 { 188 ASSERT(feature < NUM_X86_FEATURES); 189 BT_SET((ulong_t *)featureset, feature); 190 } 191 192 void 193 remove_x86_feature(void *featureset, uint_t feature) 194 { 195 ASSERT(feature < NUM_X86_FEATURES); 196 BT_CLEAR((ulong_t *)featureset, feature); 197 } 198 199 boolean_t 200 compare_x86_featureset(void *setA, void *setB) 201 { 202 /* 203 * We assume that the unused bits of the bitmap are always zero. 204 */ 205 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) { 206 return (B_TRUE); 207 } else { 208 return (B_FALSE); 209 } 210 } 211 212 void 213 print_x86_featureset(void *featureset) 214 { 215 uint_t i; 216 217 for (i = 0; i < NUM_X86_FEATURES; i++) { 218 if (is_x86_feature(featureset, i)) { 219 cmn_err(CE_CONT, "?x86_feature: %s\n", 220 x86_feature_names[i]); 221 } 222 } 223 } 224 225 static size_t xsave_state_size = 0; 226 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE); 227 boolean_t xsave_force_disable = B_FALSE; 228 229 /* 230 * This is set to platform type we are running on. 231 */ 232 static int platform_type = -1; 233 234 #if !defined(__xpv) 235 /* 236 * Variable to patch if hypervisor platform detection needs to be 237 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 238 */ 239 int enable_platform_detection = 1; 240 #endif 241 242 /* 243 * monitor/mwait info. 244 * 245 * size_actual and buf_actual are the real address and size allocated to get 246 * proper mwait_buf alignement. buf_actual and size_actual should be passed 247 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 248 * processor cache-line alignment, but this is not guarantied in the furture. 249 */ 250 struct mwait_info { 251 size_t mon_min; /* min size to avoid missed wakeups */ 252 size_t mon_max; /* size to avoid false wakeups */ 253 size_t size_actual; /* size actually allocated */ 254 void *buf_actual; /* memory actually allocated */ 255 uint32_t support; /* processor support of monitor/mwait */ 256 }; 257 258 /* 259 * xsave/xrestor info. 260 * 261 * This structure contains HW feature bits and size of the xsave save area. 262 * Note: the kernel will use the maximum size required for all hardware 263 * features. It is not optimize for potential memory savings if features at 264 * the end of the save area are not enabled. 265 */ 266 struct xsave_info { 267 uint32_t xsav_hw_features_low; /* Supported HW features */ 268 uint32_t xsav_hw_features_high; /* Supported HW features */ 269 size_t xsav_max_size; /* max size save area for HW features */ 270 size_t ymm_size; /* AVX: size of ymm save area */ 271 size_t ymm_offset; /* AVX: offset for ymm save area */ 272 }; 273 274 275 /* 276 * These constants determine how many of the elements of the 277 * cpuid we cache in the cpuid_info data structure; the 278 * remaining elements are accessible via the cpuid instruction. 279 */ 280 281 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */ 282 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */ 283 284 /* 285 * Some terminology needs to be explained: 286 * - Socket: Something that can be plugged into a motherboard. 287 * - Package: Same as socket 288 * - Chip: Same as socket. Note that AMD's documentation uses term "chip" 289 * differently: there, chip is the same as processor node (below) 290 * - Processor node: Some AMD processors have more than one 291 * "subprocessor" embedded in a package. These subprocessors (nodes) 292 * are fully-functional processors themselves with cores, caches, 293 * memory controllers, PCI configuration spaces. They are connected 294 * inside the package with Hypertransport links. On single-node 295 * processors, processor node is equivalent to chip/socket/package. 296 * - Compute Unit: Some AMD processors pair cores in "compute units" that 297 * share the FPU and the I$ and L2 caches. 298 */ 299 300 struct cpuid_info { 301 uint_t cpi_pass; /* last pass completed */ 302 /* 303 * standard function information 304 */ 305 uint_t cpi_maxeax; /* fn 0: %eax */ 306 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 307 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 308 309 uint_t cpi_family; /* fn 1: extended family */ 310 uint_t cpi_model; /* fn 1: extended model */ 311 uint_t cpi_step; /* fn 1: stepping */ 312 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */ 313 /* AMD: package/socket # */ 314 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 315 int cpi_clogid; /* fn 1: %ebx: thread # */ 316 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 317 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 318 uint_t cpi_ncache; /* fn 2: number of elements */ 319 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 320 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 321 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 322 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 323 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */ 324 /* 325 * extended function information 326 */ 327 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 328 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 329 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 330 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 331 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */ 332 333 id_t cpi_coreid; /* same coreid => strands share core */ 334 int cpi_pkgcoreid; /* core number within single package */ 335 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 336 /* Intel: fn 4: %eax[31-26] */ 337 /* 338 * supported feature information 339 */ 340 uint32_t cpi_support[6]; 341 #define STD_EDX_FEATURES 0 342 #define AMD_EDX_FEATURES 1 343 #define TM_EDX_FEATURES 2 344 #define STD_ECX_FEATURES 3 345 #define AMD_ECX_FEATURES 4 346 #define STD_EBX_FEATURES 5 347 /* 348 * Synthesized information, where known. 349 */ 350 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 351 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 352 uint32_t cpi_socket; /* Chip package/socket type */ 353 354 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 355 uint32_t cpi_apicid; 356 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ 357 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ 358 /* Intel: 1 */ 359 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */ 360 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */ 361 362 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */ 363 }; 364 365 366 static struct cpuid_info cpuid_info0; 367 368 /* 369 * These bit fields are defined by the Intel Application Note AP-485 370 * "Intel Processor Identification and the CPUID Instruction" 371 */ 372 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 373 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 374 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 375 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 376 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 377 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 378 379 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 380 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 381 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 382 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 383 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx) 384 385 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 386 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 387 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 388 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 389 390 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 391 #define CPI_XMAXEAX_MAX 0x80000100 392 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 393 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 394 395 /* 396 * Function 4 (Deterministic Cache Parameters) macros 397 * Defined by Intel Application Note AP-485 398 */ 399 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 400 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 401 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 402 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 403 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 404 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 405 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 406 407 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 408 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 409 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 410 411 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 412 413 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 414 415 416 /* 417 * A couple of shorthand macros to identify "later" P6-family chips 418 * like the Pentium M and Core. First, the "older" P6-based stuff 419 * (loosely defined as "pre-Pentium-4"): 420 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 421 */ 422 423 #define IS_LEGACY_P6(cpi) ( \ 424 cpi->cpi_family == 6 && \ 425 (cpi->cpi_model == 1 || \ 426 cpi->cpi_model == 3 || \ 427 cpi->cpi_model == 5 || \ 428 cpi->cpi_model == 6 || \ 429 cpi->cpi_model == 7 || \ 430 cpi->cpi_model == 8 || \ 431 cpi->cpi_model == 0xA || \ 432 cpi->cpi_model == 0xB) \ 433 ) 434 435 /* A "new F6" is everything with family 6 that's not the above */ 436 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 437 438 /* Extended family/model support */ 439 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 440 cpi->cpi_family >= 0xf) 441 442 /* 443 * Info for monitor/mwait idle loop. 444 * 445 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 446 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 447 * 2006. 448 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 449 * Documentation Updates" #33633, Rev 2.05, December 2006. 450 */ 451 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 452 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 453 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 454 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 455 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 456 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 457 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 458 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 459 /* 460 * Number of sub-cstates for a given c-state. 461 */ 462 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 463 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 464 465 /* 466 * XSAVE leaf 0xD enumeration 467 */ 468 #define CPUID_LEAFD_2_YMM_OFFSET 576 469 #define CPUID_LEAFD_2_YMM_SIZE 256 470 471 /* 472 * Functions we consune from cpuid_subr.c; don't publish these in a header 473 * file to try and keep people using the expected cpuid_* interfaces. 474 */ 475 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 476 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 477 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 478 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 479 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 480 481 /* 482 * Apply up various platform-dependent restrictions where the 483 * underlying platform restrictions mean the CPU can be marked 484 * as less capable than its cpuid instruction would imply. 485 */ 486 #if defined(__xpv) 487 static void 488 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 489 { 490 switch (eax) { 491 case 1: { 492 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 493 0 : CPUID_INTC_EDX_MCA; 494 cp->cp_edx &= 495 ~(mcamask | 496 CPUID_INTC_EDX_PSE | 497 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 498 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 499 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 500 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 501 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 502 break; 503 } 504 505 case 0x80000001: 506 cp->cp_edx &= 507 ~(CPUID_AMD_EDX_PSE | 508 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 509 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 510 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 511 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 512 CPUID_AMD_EDX_TSCP); 513 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 514 break; 515 default: 516 break; 517 } 518 519 switch (vendor) { 520 case X86_VENDOR_Intel: 521 switch (eax) { 522 case 4: 523 /* 524 * Zero out the (ncores-per-chip - 1) field 525 */ 526 cp->cp_eax &= 0x03fffffff; 527 break; 528 default: 529 break; 530 } 531 break; 532 case X86_VENDOR_AMD: 533 switch (eax) { 534 535 case 0x80000001: 536 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 537 break; 538 539 case 0x80000008: 540 /* 541 * Zero out the (ncores-per-chip - 1) field 542 */ 543 cp->cp_ecx &= 0xffffff00; 544 break; 545 default: 546 break; 547 } 548 break; 549 default: 550 break; 551 } 552 } 553 #else 554 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 555 #endif 556 557 /* 558 * Some undocumented ways of patching the results of the cpuid 559 * instruction to permit running Solaris 10 on future cpus that 560 * we don't currently support. Could be set to non-zero values 561 * via settings in eeprom. 562 */ 563 564 uint32_t cpuid_feature_ecx_include; 565 uint32_t cpuid_feature_ecx_exclude; 566 uint32_t cpuid_feature_edx_include; 567 uint32_t cpuid_feature_edx_exclude; 568 569 /* 570 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs. 571 */ 572 void 573 cpuid_alloc_space(cpu_t *cpu) 574 { 575 /* 576 * By convention, cpu0 is the boot cpu, which is set up 577 * before memory allocation is available. All other cpus get 578 * their cpuid_info struct allocated here. 579 */ 580 ASSERT(cpu->cpu_id != 0); 581 ASSERT(cpu->cpu_m.mcpu_cpi == NULL); 582 cpu->cpu_m.mcpu_cpi = 583 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 584 } 585 586 void 587 cpuid_free_space(cpu_t *cpu) 588 { 589 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 590 int i; 591 592 ASSERT(cpi != NULL); 593 ASSERT(cpi != &cpuid_info0); 594 595 /* 596 * Free up any function 4 related dynamic storage 597 */ 598 for (i = 1; i < cpi->cpi_std_4_size; i++) 599 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 600 if (cpi->cpi_std_4_size > 0) 601 kmem_free(cpi->cpi_std_4, 602 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 603 604 kmem_free(cpi, sizeof (*cpi)); 605 cpu->cpu_m.mcpu_cpi = NULL; 606 } 607 608 #if !defined(__xpv) 609 /* 610 * Determine the type of the underlying platform. This is used to customize 611 * initialization of various subsystems (e.g. TSC). determine_platform() must 612 * only ever be called once to prevent two processors from seeing different 613 * values of platform_type. Must be called before cpuid_pass1(), the earliest 614 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv). 615 */ 616 void 617 determine_platform(void) 618 { 619 struct cpuid_regs cp; 620 uint32_t base; 621 uint32_t regs[4]; 622 char *hvstr = (char *)regs; 623 624 ASSERT(platform_type == -1); 625 626 platform_type = HW_NATIVE; 627 628 if (!enable_platform_detection) 629 return; 630 631 /* 632 * If Hypervisor CPUID bit is set, try to determine hypervisor 633 * vendor signature, and set platform type accordingly. 634 * 635 * References: 636 * http://lkml.org/lkml/2008/10/1/246 637 * http://kb.vmware.com/kb/1009458 638 */ 639 cp.cp_eax = 0x1; 640 (void) __cpuid_insn(&cp); 641 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) { 642 cp.cp_eax = 0x40000000; 643 (void) __cpuid_insn(&cp); 644 regs[0] = cp.cp_ebx; 645 regs[1] = cp.cp_ecx; 646 regs[2] = cp.cp_edx; 647 regs[3] = 0; 648 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) { 649 platform_type = HW_XEN_HVM; 650 return; 651 } 652 if (strcmp(hvstr, HVSIG_VMWARE) == 0) { 653 platform_type = HW_VMWARE; 654 return; 655 } 656 if (strcmp(hvstr, HVSIG_KVM) == 0) { 657 platform_type = HW_KVM; 658 return; 659 } 660 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0) 661 platform_type = HW_MICROSOFT; 662 } else { 663 /* 664 * Check older VMware hardware versions. VMware hypervisor is 665 * detected by performing an IN operation to VMware hypervisor 666 * port and checking that value returned in %ebx is VMware 667 * hypervisor magic value. 668 * 669 * References: http://kb.vmware.com/kb/1009458 670 */ 671 vmware_port(VMWARE_HVCMD_GETVERSION, regs); 672 if (regs[1] == VMWARE_HVMAGIC) { 673 platform_type = HW_VMWARE; 674 return; 675 } 676 } 677 678 /* 679 * Check Xen hypervisor. In a fully virtualized domain, 680 * Xen's pseudo-cpuid function returns a string representing the 681 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum 682 * supported cpuid function. We need at least a (base + 2) leaf value 683 * to do what we want to do. Try different base values, since the 684 * hypervisor might use a different one depending on whether Hyper-V 685 * emulation is switched on by default or not. 686 */ 687 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 688 cp.cp_eax = base; 689 (void) __cpuid_insn(&cp); 690 regs[0] = cp.cp_ebx; 691 regs[1] = cp.cp_ecx; 692 regs[2] = cp.cp_edx; 693 regs[3] = 0; 694 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 && 695 cp.cp_eax >= (base + 2)) { 696 platform_type &= ~HW_NATIVE; 697 platform_type |= HW_XEN_HVM; 698 return; 699 } 700 } 701 } 702 703 int 704 get_hwenv(void) 705 { 706 ASSERT(platform_type != -1); 707 return (platform_type); 708 } 709 710 int 711 is_controldom(void) 712 { 713 return (0); 714 } 715 716 #else 717 718 int 719 get_hwenv(void) 720 { 721 return (HW_XEN_PV); 722 } 723 724 int 725 is_controldom(void) 726 { 727 return (DOMAIN_IS_INITDOMAIN(xen_info)); 728 } 729 730 #endif /* __xpv */ 731 732 static void 733 cpuid_intel_getids(cpu_t *cpu, void *feature) 734 { 735 uint_t i; 736 uint_t chipid_shift = 0; 737 uint_t coreid_shift = 0; 738 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 739 740 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 741 chipid_shift++; 742 743 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift; 744 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1); 745 746 if (is_x86_feature(feature, X86FSET_CMP)) { 747 /* 748 * Multi-core (and possibly multi-threaded) 749 * processors. 750 */ 751 uint_t ncpu_per_core; 752 if (cpi->cpi_ncore_per_chip == 1) 753 ncpu_per_core = cpi->cpi_ncpu_per_chip; 754 else if (cpi->cpi_ncore_per_chip > 1) 755 ncpu_per_core = cpi->cpi_ncpu_per_chip / 756 cpi->cpi_ncore_per_chip; 757 /* 758 * 8bit APIC IDs on dual core Pentiums 759 * look like this: 760 * 761 * +-----------------------+------+------+ 762 * | Physical Package ID | MC | HT | 763 * +-----------------------+------+------+ 764 * <------- chipid --------> 765 * <------- coreid ---------------> 766 * <--- clogid --> 767 * <------> 768 * pkgcoreid 769 * 770 * Where the number of bits necessary to 771 * represent MC and HT fields together equals 772 * to the minimum number of bits necessary to 773 * store the value of cpi->cpi_ncpu_per_chip. 774 * Of those bits, the MC part uses the number 775 * of bits necessary to store the value of 776 * cpi->cpi_ncore_per_chip. 777 */ 778 for (i = 1; i < ncpu_per_core; i <<= 1) 779 coreid_shift++; 780 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift; 781 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 782 } else if (is_x86_feature(feature, X86FSET_HTT)) { 783 /* 784 * Single-core multi-threaded processors. 785 */ 786 cpi->cpi_coreid = cpi->cpi_chipid; 787 cpi->cpi_pkgcoreid = 0; 788 } 789 cpi->cpi_procnodeid = cpi->cpi_chipid; 790 cpi->cpi_compunitid = cpi->cpi_coreid; 791 } 792 793 static void 794 cpuid_amd_getids(cpu_t *cpu) 795 { 796 int i, first_half, coreidsz; 797 uint32_t nb_caps_reg; 798 uint_t node2_1; 799 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 800 struct cpuid_regs *cp; 801 802 /* 803 * AMD CMP chips currently have a single thread per core. 804 * 805 * Since no two cpus share a core we must assign a distinct coreid 806 * per cpu, and we do this by using the cpu_id. This scheme does not, 807 * however, guarantee that sibling cores of a chip will have sequential 808 * coreids starting at a multiple of the number of cores per chip - 809 * that is usually the case, but if the ACPI MADT table is presented 810 * in a different order then we need to perform a few more gymnastics 811 * for the pkgcoreid. 812 * 813 * All processors in the system have the same number of enabled 814 * cores. Cores within a processor are always numbered sequentially 815 * from 0 regardless of how many or which are disabled, and there 816 * is no way for operating system to discover the real core id when some 817 * are disabled. 818 * 819 * In family 0x15, the cores come in pairs called compute units. They 820 * share I$ and L2 caches and the FPU. Enumeration of this feature is 821 * simplified by the new topology extensions CPUID leaf, indicated by 822 * the X86 feature X86FSET_TOPOEXT. 823 */ 824 825 cpi->cpi_coreid = cpu->cpu_id; 826 cpi->cpi_compunitid = cpu->cpu_id; 827 828 if (cpi->cpi_xmaxeax >= 0x80000008) { 829 830 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 831 832 /* 833 * In AMD parlance chip is really a node while Solaris 834 * sees chip as equivalent to socket/package. 835 */ 836 cpi->cpi_ncore_per_chip = 837 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 838 if (coreidsz == 0) { 839 /* Use legacy method */ 840 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1) 841 coreidsz++; 842 if (coreidsz == 0) 843 coreidsz = 1; 844 } 845 } else { 846 /* Assume single-core part */ 847 cpi->cpi_ncore_per_chip = 1; 848 coreidsz = 1; 849 } 850 851 cpi->cpi_clogid = cpi->cpi_pkgcoreid = 852 cpi->cpi_apicid & ((1<<coreidsz) - 1); 853 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip; 854 855 /* Get node ID, compute unit ID */ 856 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 857 cpi->cpi_xmaxeax >= 0x8000001e) { 858 cp = &cpi->cpi_extd[0x1e]; 859 cp->cp_eax = 0x8000001e; 860 (void) __cpuid_insn(cp); 861 862 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1; 863 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0); 864 cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1; 865 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) 866 + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit) 867 * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg); 868 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) { 869 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 870 } else if (cpi->cpi_family == 0x10) { 871 /* 872 * See if we are a multi-node processor. 873 * All processors in the system have the same number of nodes 874 */ 875 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8); 876 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) { 877 /* Single-node */ 878 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, 879 coreidsz); 880 } else { 881 882 /* 883 * Multi-node revision D (2 nodes per package 884 * are supported) 885 */ 886 cpi->cpi_procnodes_per_pkg = 2; 887 888 first_half = (cpi->cpi_pkgcoreid <= 889 (cpi->cpi_ncore_per_chip/2 - 1)); 890 891 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { 892 /* We are BSP */ 893 cpi->cpi_procnodeid = (first_half ? 0 : 1); 894 } else { 895 896 /* We are AP */ 897 /* NodeId[2:1] bits to use for reading F3xe8 */ 898 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1; 899 900 nb_caps_reg = 901 pci_getl_func(0, 24 + node2_1, 3, 0xe8); 902 903 /* 904 * Check IntNodeNum bit (31:30, but bit 31 is 905 * always 0 on dual-node processors) 906 */ 907 if (BITX(nb_caps_reg, 30, 30) == 0) 908 cpi->cpi_procnodeid = node2_1 + 909 !first_half; 910 else 911 cpi->cpi_procnodeid = node2_1 + 912 first_half; 913 } 914 } 915 } else { 916 cpi->cpi_procnodeid = 0; 917 } 918 919 cpi->cpi_chipid = 920 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg; 921 } 922 923 /* 924 * Setup XFeature_Enabled_Mask register. Required by xsave feature. 925 */ 926 void 927 setup_xfem(void) 928 { 929 uint64_t flags = XFEATURE_LEGACY_FP; 930 931 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); 932 933 if (is_x86_feature(x86_featureset, X86FSET_SSE)) 934 flags |= XFEATURE_SSE; 935 936 if (is_x86_feature(x86_featureset, X86FSET_AVX)) 937 flags |= XFEATURE_AVX; 938 939 set_xcr(XFEATURE_ENABLED_MASK, flags); 940 941 xsave_bv_all = flags; 942 } 943 944 void 945 cpuid_pass1(cpu_t *cpu, uchar_t *featureset) 946 { 947 uint32_t mask_ecx, mask_edx; 948 struct cpuid_info *cpi; 949 struct cpuid_regs *cp; 950 int xcpuid; 951 #if !defined(__xpv) 952 extern int idle_cpu_prefer_mwait; 953 #endif 954 955 /* 956 * Space statically allocated for BSP, ensure pointer is set 957 */ 958 if (cpu->cpu_id == 0) { 959 if (cpu->cpu_m.mcpu_cpi == NULL) 960 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 961 } 962 963 add_x86_feature(featureset, X86FSET_CPUID); 964 965 cpi = cpu->cpu_m.mcpu_cpi; 966 ASSERT(cpi != NULL); 967 cp = &cpi->cpi_std[0]; 968 cp->cp_eax = 0; 969 cpi->cpi_maxeax = __cpuid_insn(cp); 970 { 971 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 972 *iptr++ = cp->cp_ebx; 973 *iptr++ = cp->cp_edx; 974 *iptr++ = cp->cp_ecx; 975 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 976 } 977 978 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 979 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 980 981 /* 982 * Limit the range in case of weird hardware 983 */ 984 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 985 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 986 if (cpi->cpi_maxeax < 1) 987 goto pass1_done; 988 989 cp = &cpi->cpi_std[1]; 990 cp->cp_eax = 1; 991 (void) __cpuid_insn(cp); 992 993 /* 994 * Extract identifying constants for easy access. 995 */ 996 cpi->cpi_model = CPI_MODEL(cpi); 997 cpi->cpi_family = CPI_FAMILY(cpi); 998 999 if (cpi->cpi_family == 0xf) 1000 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 1001 1002 /* 1003 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 1004 * Intel, and presumably everyone else, uses model == 0xf, as 1005 * one would expect (max value means possible overflow). Sigh. 1006 */ 1007 1008 switch (cpi->cpi_vendor) { 1009 case X86_VENDOR_Intel: 1010 if (IS_EXTENDED_MODEL_INTEL(cpi)) 1011 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 1012 break; 1013 case X86_VENDOR_AMD: 1014 if (CPI_FAMILY(cpi) == 0xf) 1015 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 1016 break; 1017 default: 1018 if (cpi->cpi_model == 0xf) 1019 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 1020 break; 1021 } 1022 1023 cpi->cpi_step = CPI_STEP(cpi); 1024 cpi->cpi_brandid = CPI_BRANDID(cpi); 1025 1026 /* 1027 * *default* assumptions: 1028 * - believe %edx feature word 1029 * - ignore %ecx feature word 1030 * - 32-bit virtual and physical addressing 1031 */ 1032 mask_edx = 0xffffffff; 1033 mask_ecx = 0; 1034 1035 cpi->cpi_pabits = cpi->cpi_vabits = 32; 1036 1037 switch (cpi->cpi_vendor) { 1038 case X86_VENDOR_Intel: 1039 if (cpi->cpi_family == 5) 1040 x86_type = X86_TYPE_P5; 1041 else if (IS_LEGACY_P6(cpi)) { 1042 x86_type = X86_TYPE_P6; 1043 pentiumpro_bug4046376 = 1; 1044 /* 1045 * Clear the SEP bit when it was set erroneously 1046 */ 1047 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 1048 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 1049 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 1050 x86_type = X86_TYPE_P4; 1051 /* 1052 * We don't currently depend on any of the %ecx 1053 * features until Prescott, so we'll only check 1054 * this from P4 onwards. We might want to revisit 1055 * that idea later. 1056 */ 1057 mask_ecx = 0xffffffff; 1058 } else if (cpi->cpi_family > 0xf) 1059 mask_ecx = 0xffffffff; 1060 /* 1061 * We don't support MONITOR/MWAIT if leaf 5 is not available 1062 * to obtain the monitor linesize. 1063 */ 1064 if (cpi->cpi_maxeax < 5) 1065 mask_ecx &= ~CPUID_INTC_ECX_MON; 1066 break; 1067 case X86_VENDOR_IntelClone: 1068 default: 1069 break; 1070 case X86_VENDOR_AMD: 1071 #if defined(OPTERON_ERRATUM_108) 1072 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 1073 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 1074 cpi->cpi_model = 0xc; 1075 } else 1076 #endif 1077 if (cpi->cpi_family == 5) { 1078 /* 1079 * AMD K5 and K6 1080 * 1081 * These CPUs have an incomplete implementation 1082 * of MCA/MCE which we mask away. 1083 */ 1084 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 1085 1086 /* 1087 * Model 0 uses the wrong (APIC) bit 1088 * to indicate PGE. Fix it here. 1089 */ 1090 if (cpi->cpi_model == 0) { 1091 if (cp->cp_edx & 0x200) { 1092 cp->cp_edx &= ~0x200; 1093 cp->cp_edx |= CPUID_INTC_EDX_PGE; 1094 } 1095 } 1096 1097 /* 1098 * Early models had problems w/ MMX; disable. 1099 */ 1100 if (cpi->cpi_model < 6) 1101 mask_edx &= ~CPUID_INTC_EDX_MMX; 1102 } 1103 1104 /* 1105 * For newer families, SSE3 and CX16, at least, are valid; 1106 * enable all 1107 */ 1108 if (cpi->cpi_family >= 0xf) 1109 mask_ecx = 0xffffffff; 1110 /* 1111 * We don't support MONITOR/MWAIT if leaf 5 is not available 1112 * to obtain the monitor linesize. 1113 */ 1114 if (cpi->cpi_maxeax < 5) 1115 mask_ecx &= ~CPUID_INTC_ECX_MON; 1116 1117 #if !defined(__xpv) 1118 /* 1119 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 1120 * processors. AMD does not intend MWAIT to be used in the cpu 1121 * idle loop on current and future processors. 10h and future 1122 * AMD processors use more power in MWAIT than HLT. 1123 * Pre-family-10h Opterons do not have the MWAIT instruction. 1124 */ 1125 idle_cpu_prefer_mwait = 0; 1126 #endif 1127 1128 break; 1129 case X86_VENDOR_TM: 1130 /* 1131 * workaround the NT workaround in CMS 4.1 1132 */ 1133 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 1134 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 1135 cp->cp_edx |= CPUID_INTC_EDX_CX8; 1136 break; 1137 case X86_VENDOR_Centaur: 1138 /* 1139 * workaround the NT workarounds again 1140 */ 1141 if (cpi->cpi_family == 6) 1142 cp->cp_edx |= CPUID_INTC_EDX_CX8; 1143 break; 1144 case X86_VENDOR_Cyrix: 1145 /* 1146 * We rely heavily on the probing in locore 1147 * to actually figure out what parts, if any, 1148 * of the Cyrix cpuid instruction to believe. 1149 */ 1150 switch (x86_type) { 1151 case X86_TYPE_CYRIX_486: 1152 mask_edx = 0; 1153 break; 1154 case X86_TYPE_CYRIX_6x86: 1155 mask_edx = 0; 1156 break; 1157 case X86_TYPE_CYRIX_6x86L: 1158 mask_edx = 1159 CPUID_INTC_EDX_DE | 1160 CPUID_INTC_EDX_CX8; 1161 break; 1162 case X86_TYPE_CYRIX_6x86MX: 1163 mask_edx = 1164 CPUID_INTC_EDX_DE | 1165 CPUID_INTC_EDX_MSR | 1166 CPUID_INTC_EDX_CX8 | 1167 CPUID_INTC_EDX_PGE | 1168 CPUID_INTC_EDX_CMOV | 1169 CPUID_INTC_EDX_MMX; 1170 break; 1171 case X86_TYPE_CYRIX_GXm: 1172 mask_edx = 1173 CPUID_INTC_EDX_MSR | 1174 CPUID_INTC_EDX_CX8 | 1175 CPUID_INTC_EDX_CMOV | 1176 CPUID_INTC_EDX_MMX; 1177 break; 1178 case X86_TYPE_CYRIX_MediaGX: 1179 break; 1180 case X86_TYPE_CYRIX_MII: 1181 case X86_TYPE_VIA_CYRIX_III: 1182 mask_edx = 1183 CPUID_INTC_EDX_DE | 1184 CPUID_INTC_EDX_TSC | 1185 CPUID_INTC_EDX_MSR | 1186 CPUID_INTC_EDX_CX8 | 1187 CPUID_INTC_EDX_PGE | 1188 CPUID_INTC_EDX_CMOV | 1189 CPUID_INTC_EDX_MMX; 1190 break; 1191 default: 1192 break; 1193 } 1194 break; 1195 } 1196 1197 #if defined(__xpv) 1198 /* 1199 * Do not support MONITOR/MWAIT under a hypervisor 1200 */ 1201 mask_ecx &= ~CPUID_INTC_ECX_MON; 1202 /* 1203 * Do not support XSAVE under a hypervisor for now 1204 */ 1205 xsave_force_disable = B_TRUE; 1206 1207 #endif /* __xpv */ 1208 1209 if (xsave_force_disable) { 1210 mask_ecx &= ~CPUID_INTC_ECX_XSAVE; 1211 mask_ecx &= ~CPUID_INTC_ECX_AVX; 1212 mask_ecx &= ~CPUID_INTC_ECX_F16C; 1213 mask_ecx &= ~CPUID_INTC_ECX_FMA; 1214 } 1215 1216 /* 1217 * Now we've figured out the masks that determine 1218 * which bits we choose to believe, apply the masks 1219 * to the feature words, then map the kernel's view 1220 * of these feature words into its feature word. 1221 */ 1222 cp->cp_edx &= mask_edx; 1223 cp->cp_ecx &= mask_ecx; 1224 1225 /* 1226 * apply any platform restrictions (we don't call this 1227 * immediately after __cpuid_insn here, because we need the 1228 * workarounds applied above first) 1229 */ 1230 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 1231 1232 /* 1233 * In addition to ecx and edx, Intel is storing a bunch of instruction 1234 * set extensions in leaf 7's ebx. 1235 */ 1236 if (cpi->cpi_vendor == X86_VENDOR_Intel && cpi->cpi_maxeax >= 7) { 1237 struct cpuid_regs *ecp; 1238 ecp = &cpi->cpi_std[7]; 1239 ecp->cp_eax = 7; 1240 ecp->cp_ecx = 0; 1241 (void) __cpuid_insn(ecp); 1242 /* 1243 * If XSAVE has been disabled, just ignore all of the AVX 1244 * dependent flags here. 1245 */ 1246 if (xsave_force_disable) { 1247 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1; 1248 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2; 1249 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2; 1250 } 1251 1252 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP) 1253 add_x86_feature(featureset, X86FSET_SMEP); 1254 } 1255 1256 /* 1257 * fold in overrides from the "eeprom" mechanism 1258 */ 1259 cp->cp_edx |= cpuid_feature_edx_include; 1260 cp->cp_edx &= ~cpuid_feature_edx_exclude; 1261 1262 cp->cp_ecx |= cpuid_feature_ecx_include; 1263 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 1264 1265 if (cp->cp_edx & CPUID_INTC_EDX_PSE) { 1266 add_x86_feature(featureset, X86FSET_LARGEPAGE); 1267 } 1268 if (cp->cp_edx & CPUID_INTC_EDX_TSC) { 1269 add_x86_feature(featureset, X86FSET_TSC); 1270 } 1271 if (cp->cp_edx & CPUID_INTC_EDX_MSR) { 1272 add_x86_feature(featureset, X86FSET_MSR); 1273 } 1274 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) { 1275 add_x86_feature(featureset, X86FSET_MTRR); 1276 } 1277 if (cp->cp_edx & CPUID_INTC_EDX_PGE) { 1278 add_x86_feature(featureset, X86FSET_PGE); 1279 } 1280 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) { 1281 add_x86_feature(featureset, X86FSET_CMOV); 1282 } 1283 if (cp->cp_edx & CPUID_INTC_EDX_MMX) { 1284 add_x86_feature(featureset, X86FSET_MMX); 1285 } 1286 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 1287 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) { 1288 add_x86_feature(featureset, X86FSET_MCA); 1289 } 1290 if (cp->cp_edx & CPUID_INTC_EDX_PAE) { 1291 add_x86_feature(featureset, X86FSET_PAE); 1292 } 1293 if (cp->cp_edx & CPUID_INTC_EDX_CX8) { 1294 add_x86_feature(featureset, X86FSET_CX8); 1295 } 1296 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) { 1297 add_x86_feature(featureset, X86FSET_CX16); 1298 } 1299 if (cp->cp_edx & CPUID_INTC_EDX_PAT) { 1300 add_x86_feature(featureset, X86FSET_PAT); 1301 } 1302 if (cp->cp_edx & CPUID_INTC_EDX_SEP) { 1303 add_x86_feature(featureset, X86FSET_SEP); 1304 } 1305 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 1306 /* 1307 * In our implementation, fxsave/fxrstor 1308 * are prerequisites before we'll even 1309 * try and do SSE things. 1310 */ 1311 if (cp->cp_edx & CPUID_INTC_EDX_SSE) { 1312 add_x86_feature(featureset, X86FSET_SSE); 1313 } 1314 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) { 1315 add_x86_feature(featureset, X86FSET_SSE2); 1316 } 1317 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) { 1318 add_x86_feature(featureset, X86FSET_SSE3); 1319 } 1320 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) { 1321 add_x86_feature(featureset, X86FSET_SSSE3); 1322 } 1323 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) { 1324 add_x86_feature(featureset, X86FSET_SSE4_1); 1325 } 1326 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) { 1327 add_x86_feature(featureset, X86FSET_SSE4_2); 1328 } 1329 if (cp->cp_ecx & CPUID_INTC_ECX_AES) { 1330 add_x86_feature(featureset, X86FSET_AES); 1331 } 1332 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) { 1333 add_x86_feature(featureset, X86FSET_PCLMULQDQ); 1334 } 1335 1336 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) { 1337 add_x86_feature(featureset, X86FSET_XSAVE); 1338 1339 /* We only test AVX when there is XSAVE */ 1340 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) { 1341 add_x86_feature(featureset, 1342 X86FSET_AVX); 1343 1344 /* 1345 * Intel says we can't check these without also 1346 * checking AVX. 1347 */ 1348 if (cp->cp_ecx & CPUID_INTC_ECX_F16C) 1349 add_x86_feature(featureset, 1350 X86FSET_F16C); 1351 1352 if (cp->cp_ecx & CPUID_INTC_ECX_FMA) 1353 add_x86_feature(featureset, 1354 X86FSET_FMA); 1355 1356 if (cpi->cpi_std[7].cp_ebx & 1357 CPUID_INTC_EBX_7_0_BMI1) 1358 add_x86_feature(featureset, 1359 X86FSET_BMI1); 1360 1361 if (cpi->cpi_std[7].cp_ebx & 1362 CPUID_INTC_EBX_7_0_BMI2) 1363 add_x86_feature(featureset, 1364 X86FSET_BMI2); 1365 1366 if (cpi->cpi_std[7].cp_ebx & 1367 CPUID_INTC_EBX_7_0_AVX2) 1368 add_x86_feature(featureset, 1369 X86FSET_AVX2); 1370 } 1371 } 1372 } 1373 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) { 1374 add_x86_feature(featureset, X86FSET_X2APIC); 1375 } 1376 if (cp->cp_edx & CPUID_INTC_EDX_DE) { 1377 add_x86_feature(featureset, X86FSET_DE); 1378 } 1379 #if !defined(__xpv) 1380 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 1381 1382 /* 1383 * We require the CLFLUSH instruction for erratum workaround 1384 * to use MONITOR/MWAIT. 1385 */ 1386 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1387 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 1388 add_x86_feature(featureset, X86FSET_MWAIT); 1389 } else { 1390 extern int idle_cpu_assert_cflush_monitor; 1391 1392 /* 1393 * All processors we are aware of which have 1394 * MONITOR/MWAIT also have CLFLUSH. 1395 */ 1396 if (idle_cpu_assert_cflush_monitor) { 1397 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 1398 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 1399 } 1400 } 1401 } 1402 #endif /* __xpv */ 1403 1404 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) { 1405 add_x86_feature(featureset, X86FSET_VMX); 1406 } 1407 1408 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND) 1409 add_x86_feature(featureset, X86FSET_RDRAND); 1410 1411 /* 1412 * Only need it first time, rest of the cpus would follow suit. 1413 * we only capture this for the bootcpu. 1414 */ 1415 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1416 add_x86_feature(featureset, X86FSET_CLFSH); 1417 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 1418 } 1419 if (is_x86_feature(featureset, X86FSET_PAE)) 1420 cpi->cpi_pabits = 36; 1421 1422 /* 1423 * Hyperthreading configuration is slightly tricky on Intel 1424 * and pure clones, and even trickier on AMD. 1425 * 1426 * (AMD chose to set the HTT bit on their CMP processors, 1427 * even though they're not actually hyperthreaded. Thus it 1428 * takes a bit more work to figure out what's really going 1429 * on ... see the handling of the CMP_LGCY bit below) 1430 */ 1431 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 1432 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 1433 if (cpi->cpi_ncpu_per_chip > 1) 1434 add_x86_feature(featureset, X86FSET_HTT); 1435 } else { 1436 cpi->cpi_ncpu_per_chip = 1; 1437 } 1438 1439 /* 1440 * Work on the "extended" feature information, doing 1441 * some basic initialization for cpuid_pass2() 1442 */ 1443 xcpuid = 0; 1444 switch (cpi->cpi_vendor) { 1445 case X86_VENDOR_Intel: 1446 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 1447 xcpuid++; 1448 break; 1449 case X86_VENDOR_AMD: 1450 if (cpi->cpi_family > 5 || 1451 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 1452 xcpuid++; 1453 break; 1454 case X86_VENDOR_Cyrix: 1455 /* 1456 * Only these Cyrix CPUs are -known- to support 1457 * extended cpuid operations. 1458 */ 1459 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1460 x86_type == X86_TYPE_CYRIX_GXm) 1461 xcpuid++; 1462 break; 1463 case X86_VENDOR_Centaur: 1464 case X86_VENDOR_TM: 1465 default: 1466 xcpuid++; 1467 break; 1468 } 1469 1470 if (xcpuid) { 1471 cp = &cpi->cpi_extd[0]; 1472 cp->cp_eax = 0x80000000; 1473 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1474 } 1475 1476 if (cpi->cpi_xmaxeax & 0x80000000) { 1477 1478 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1479 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1480 1481 switch (cpi->cpi_vendor) { 1482 case X86_VENDOR_Intel: 1483 case X86_VENDOR_AMD: 1484 if (cpi->cpi_xmaxeax < 0x80000001) 1485 break; 1486 cp = &cpi->cpi_extd[1]; 1487 cp->cp_eax = 0x80000001; 1488 (void) __cpuid_insn(cp); 1489 1490 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1491 cpi->cpi_family == 5 && 1492 cpi->cpi_model == 6 && 1493 cpi->cpi_step == 6) { 1494 /* 1495 * K6 model 6 uses bit 10 to indicate SYSC 1496 * Later models use bit 11. Fix it here. 1497 */ 1498 if (cp->cp_edx & 0x400) { 1499 cp->cp_edx &= ~0x400; 1500 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1501 } 1502 } 1503 1504 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1505 1506 /* 1507 * Compute the additions to the kernel's feature word. 1508 */ 1509 if (cp->cp_edx & CPUID_AMD_EDX_NX) { 1510 add_x86_feature(featureset, X86FSET_NX); 1511 } 1512 1513 /* 1514 * Regardless whether or not we boot 64-bit, 1515 * we should have a way to identify whether 1516 * the CPU is capable of running 64-bit. 1517 */ 1518 if (cp->cp_edx & CPUID_AMD_EDX_LM) { 1519 add_x86_feature(featureset, X86FSET_64); 1520 } 1521 1522 #if defined(__amd64) 1523 /* 1 GB large page - enable only for 64 bit kernel */ 1524 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) { 1525 add_x86_feature(featureset, X86FSET_1GPG); 1526 } 1527 #endif 1528 1529 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1530 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1531 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) { 1532 add_x86_feature(featureset, X86FSET_SSE4A); 1533 } 1534 1535 /* 1536 * If both the HTT and CMP_LGCY bits are set, 1537 * then we're not actually HyperThreaded. Read 1538 * "AMD CPUID Specification" for more details. 1539 */ 1540 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1541 is_x86_feature(featureset, X86FSET_HTT) && 1542 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1543 remove_x86_feature(featureset, X86FSET_HTT); 1544 add_x86_feature(featureset, X86FSET_CMP); 1545 } 1546 #if defined(__amd64) 1547 /* 1548 * It's really tricky to support syscall/sysret in 1549 * the i386 kernel; we rely on sysenter/sysexit 1550 * instead. In the amd64 kernel, things are -way- 1551 * better. 1552 */ 1553 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) { 1554 add_x86_feature(featureset, X86FSET_ASYSC); 1555 } 1556 1557 /* 1558 * While we're thinking about system calls, note 1559 * that AMD processors don't support sysenter 1560 * in long mode at all, so don't try to program them. 1561 */ 1562 if (x86_vendor == X86_VENDOR_AMD) { 1563 remove_x86_feature(featureset, X86FSET_SEP); 1564 } 1565 #endif 1566 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) { 1567 add_x86_feature(featureset, X86FSET_TSCP); 1568 } 1569 1570 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) { 1571 add_x86_feature(featureset, X86FSET_SVM); 1572 } 1573 1574 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) { 1575 add_x86_feature(featureset, X86FSET_TOPOEXT); 1576 } 1577 break; 1578 default: 1579 break; 1580 } 1581 1582 /* 1583 * Get CPUID data about processor cores and hyperthreads. 1584 */ 1585 switch (cpi->cpi_vendor) { 1586 case X86_VENDOR_Intel: 1587 if (cpi->cpi_maxeax >= 4) { 1588 cp = &cpi->cpi_std[4]; 1589 cp->cp_eax = 4; 1590 cp->cp_ecx = 0; 1591 (void) __cpuid_insn(cp); 1592 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1593 } 1594 /*FALLTHROUGH*/ 1595 case X86_VENDOR_AMD: 1596 if (cpi->cpi_xmaxeax < 0x80000008) 1597 break; 1598 cp = &cpi->cpi_extd[8]; 1599 cp->cp_eax = 0x80000008; 1600 (void) __cpuid_insn(cp); 1601 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1602 1603 /* 1604 * Virtual and physical address limits from 1605 * cpuid override previously guessed values. 1606 */ 1607 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1608 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1609 break; 1610 default: 1611 break; 1612 } 1613 1614 /* 1615 * Derive the number of cores per chip 1616 */ 1617 switch (cpi->cpi_vendor) { 1618 case X86_VENDOR_Intel: 1619 if (cpi->cpi_maxeax < 4) { 1620 cpi->cpi_ncore_per_chip = 1; 1621 break; 1622 } else { 1623 cpi->cpi_ncore_per_chip = 1624 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1625 } 1626 break; 1627 case X86_VENDOR_AMD: 1628 if (cpi->cpi_xmaxeax < 0x80000008) { 1629 cpi->cpi_ncore_per_chip = 1; 1630 break; 1631 } else { 1632 /* 1633 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1634 * 1 less than the number of physical cores on 1635 * the chip. In family 0x10 this value can 1636 * be affected by "downcoring" - it reflects 1637 * 1 less than the number of cores actually 1638 * enabled on this node. 1639 */ 1640 cpi->cpi_ncore_per_chip = 1641 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1642 } 1643 break; 1644 default: 1645 cpi->cpi_ncore_per_chip = 1; 1646 break; 1647 } 1648 1649 /* 1650 * Get CPUID data about TSC Invariance in Deep C-State. 1651 */ 1652 switch (cpi->cpi_vendor) { 1653 case X86_VENDOR_Intel: 1654 if (cpi->cpi_maxeax >= 7) { 1655 cp = &cpi->cpi_extd[7]; 1656 cp->cp_eax = 0x80000007; 1657 cp->cp_ecx = 0; 1658 (void) __cpuid_insn(cp); 1659 } 1660 break; 1661 default: 1662 break; 1663 } 1664 } else { 1665 cpi->cpi_ncore_per_chip = 1; 1666 } 1667 1668 /* 1669 * If more than one core, then this processor is CMP. 1670 */ 1671 if (cpi->cpi_ncore_per_chip > 1) { 1672 add_x86_feature(featureset, X86FSET_CMP); 1673 } 1674 1675 /* 1676 * If the number of cores is the same as the number 1677 * of CPUs, then we cannot have HyperThreading. 1678 */ 1679 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) { 1680 remove_x86_feature(featureset, X86FSET_HTT); 1681 } 1682 1683 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1684 cpi->cpi_procnodes_per_pkg = 1; 1685 cpi->cpi_cores_per_compunit = 1; 1686 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE && 1687 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) { 1688 /* 1689 * Single-core single-threaded processors. 1690 */ 1691 cpi->cpi_chipid = -1; 1692 cpi->cpi_clogid = 0; 1693 cpi->cpi_coreid = cpu->cpu_id; 1694 cpi->cpi_pkgcoreid = 0; 1695 if (cpi->cpi_vendor == X86_VENDOR_AMD) 1696 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); 1697 else 1698 cpi->cpi_procnodeid = cpi->cpi_chipid; 1699 } else if (cpi->cpi_ncpu_per_chip > 1) { 1700 if (cpi->cpi_vendor == X86_VENDOR_Intel) 1701 cpuid_intel_getids(cpu, featureset); 1702 else if (cpi->cpi_vendor == X86_VENDOR_AMD) 1703 cpuid_amd_getids(cpu); 1704 else { 1705 /* 1706 * All other processors are currently 1707 * assumed to have single cores. 1708 */ 1709 cpi->cpi_coreid = cpi->cpi_chipid; 1710 cpi->cpi_pkgcoreid = 0; 1711 cpi->cpi_procnodeid = cpi->cpi_chipid; 1712 cpi->cpi_compunitid = cpi->cpi_chipid; 1713 } 1714 } 1715 1716 /* 1717 * Synthesize chip "revision" and socket type 1718 */ 1719 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1720 cpi->cpi_model, cpi->cpi_step); 1721 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1722 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1723 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1724 cpi->cpi_model, cpi->cpi_step); 1725 1726 pass1_done: 1727 cpi->cpi_pass = 1; 1728 } 1729 1730 /* 1731 * Make copies of the cpuid table entries we depend on, in 1732 * part for ease of parsing now, in part so that we have only 1733 * one place to correct any of it, in part for ease of 1734 * later export to userland, and in part so we can look at 1735 * this stuff in a crash dump. 1736 */ 1737 1738 /*ARGSUSED*/ 1739 void 1740 cpuid_pass2(cpu_t *cpu) 1741 { 1742 uint_t n, nmax; 1743 int i; 1744 struct cpuid_regs *cp; 1745 uint8_t *dp; 1746 uint32_t *iptr; 1747 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1748 1749 ASSERT(cpi->cpi_pass == 1); 1750 1751 if (cpi->cpi_maxeax < 1) 1752 goto pass2_done; 1753 1754 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1755 nmax = NMAX_CPI_STD; 1756 /* 1757 * (We already handled n == 0 and n == 1 in pass 1) 1758 */ 1759 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1760 cp->cp_eax = n; 1761 1762 /* 1763 * CPUID function 4 expects %ecx to be initialized 1764 * with an index which indicates which cache to return 1765 * information about. The OS is expected to call function 4 1766 * with %ecx set to 0, 1, 2, ... until it returns with 1767 * EAX[4:0] set to 0, which indicates there are no more 1768 * caches. 1769 * 1770 * Here, populate cpi_std[4] with the information returned by 1771 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1772 * when dynamic memory allocation becomes available. 1773 * 1774 * Note: we need to explicitly initialize %ecx here, since 1775 * function 4 may have been previously invoked. 1776 */ 1777 if (n == 4) 1778 cp->cp_ecx = 0; 1779 1780 (void) __cpuid_insn(cp); 1781 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1782 switch (n) { 1783 case 2: 1784 /* 1785 * "the lower 8 bits of the %eax register 1786 * contain a value that identifies the number 1787 * of times the cpuid [instruction] has to be 1788 * executed to obtain a complete image of the 1789 * processor's caching systems." 1790 * 1791 * How *do* they make this stuff up? 1792 */ 1793 cpi->cpi_ncache = sizeof (*cp) * 1794 BITX(cp->cp_eax, 7, 0); 1795 if (cpi->cpi_ncache == 0) 1796 break; 1797 cpi->cpi_ncache--; /* skip count byte */ 1798 1799 /* 1800 * Well, for now, rather than attempt to implement 1801 * this slightly dubious algorithm, we just look 1802 * at the first 15 .. 1803 */ 1804 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1805 cpi->cpi_ncache = sizeof (*cp) - 1; 1806 1807 dp = cpi->cpi_cacheinfo; 1808 if (BITX(cp->cp_eax, 31, 31) == 0) { 1809 uint8_t *p = (void *)&cp->cp_eax; 1810 for (i = 1; i < 4; i++) 1811 if (p[i] != 0) 1812 *dp++ = p[i]; 1813 } 1814 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1815 uint8_t *p = (void *)&cp->cp_ebx; 1816 for (i = 0; i < 4; i++) 1817 if (p[i] != 0) 1818 *dp++ = p[i]; 1819 } 1820 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1821 uint8_t *p = (void *)&cp->cp_ecx; 1822 for (i = 0; i < 4; i++) 1823 if (p[i] != 0) 1824 *dp++ = p[i]; 1825 } 1826 if (BITX(cp->cp_edx, 31, 31) == 0) { 1827 uint8_t *p = (void *)&cp->cp_edx; 1828 for (i = 0; i < 4; i++) 1829 if (p[i] != 0) 1830 *dp++ = p[i]; 1831 } 1832 break; 1833 1834 case 3: /* Processor serial number, if PSN supported */ 1835 break; 1836 1837 case 4: /* Deterministic cache parameters */ 1838 break; 1839 1840 case 5: /* Monitor/Mwait parameters */ 1841 { 1842 size_t mwait_size; 1843 1844 /* 1845 * check cpi_mwait.support which was set in cpuid_pass1 1846 */ 1847 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1848 break; 1849 1850 /* 1851 * Protect ourself from insane mwait line size. 1852 * Workaround for incomplete hardware emulator(s). 1853 */ 1854 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1855 if (mwait_size < sizeof (uint32_t) || 1856 !ISP2(mwait_size)) { 1857 #if DEBUG 1858 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1859 "size %ld", cpu->cpu_id, (long)mwait_size); 1860 #endif 1861 break; 1862 } 1863 1864 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1865 cpi->cpi_mwait.mon_max = mwait_size; 1866 if (MWAIT_EXTENSION(cpi)) { 1867 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1868 if (MWAIT_INT_ENABLE(cpi)) 1869 cpi->cpi_mwait.support |= 1870 MWAIT_ECX_INT_ENABLE; 1871 } 1872 break; 1873 } 1874 default: 1875 break; 1876 } 1877 } 1878 1879 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1880 struct cpuid_regs regs; 1881 1882 cp = ®s; 1883 cp->cp_eax = 0xB; 1884 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1885 1886 (void) __cpuid_insn(cp); 1887 1888 /* 1889 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1890 * indicates that the extended topology enumeration leaf is 1891 * available. 1892 */ 1893 if (cp->cp_ebx) { 1894 uint32_t x2apic_id; 1895 uint_t coreid_shift = 0; 1896 uint_t ncpu_per_core = 1; 1897 uint_t chipid_shift = 0; 1898 uint_t ncpu_per_chip = 1; 1899 uint_t i; 1900 uint_t level; 1901 1902 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1903 cp->cp_eax = 0xB; 1904 cp->cp_ecx = i; 1905 1906 (void) __cpuid_insn(cp); 1907 level = CPI_CPU_LEVEL_TYPE(cp); 1908 1909 if (level == 1) { 1910 x2apic_id = cp->cp_edx; 1911 coreid_shift = BITX(cp->cp_eax, 4, 0); 1912 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1913 } else if (level == 2) { 1914 x2apic_id = cp->cp_edx; 1915 chipid_shift = BITX(cp->cp_eax, 4, 0); 1916 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1917 } 1918 } 1919 1920 cpi->cpi_apicid = x2apic_id; 1921 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1922 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1923 ncpu_per_core; 1924 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1925 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1926 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1927 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1928 } 1929 1930 /* Make cp NULL so that we don't stumble on others */ 1931 cp = NULL; 1932 } 1933 1934 /* 1935 * XSAVE enumeration 1936 */ 1937 if (cpi->cpi_maxeax >= 0xD) { 1938 struct cpuid_regs regs; 1939 boolean_t cpuid_d_valid = B_TRUE; 1940 1941 cp = ®s; 1942 cp->cp_eax = 0xD; 1943 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1944 1945 (void) __cpuid_insn(cp); 1946 1947 /* 1948 * Sanity checks for debug 1949 */ 1950 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 || 1951 (cp->cp_eax & XFEATURE_SSE) == 0) { 1952 cpuid_d_valid = B_FALSE; 1953 } 1954 1955 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax; 1956 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx; 1957 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx; 1958 1959 /* 1960 * If the hw supports AVX, get the size and offset in the save 1961 * area for the ymm state. 1962 */ 1963 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) { 1964 cp->cp_eax = 0xD; 1965 cp->cp_ecx = 2; 1966 cp->cp_edx = cp->cp_ebx = 0; 1967 1968 (void) __cpuid_insn(cp); 1969 1970 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET || 1971 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) { 1972 cpuid_d_valid = B_FALSE; 1973 } 1974 1975 cpi->cpi_xsave.ymm_size = cp->cp_eax; 1976 cpi->cpi_xsave.ymm_offset = cp->cp_ebx; 1977 } 1978 1979 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { 1980 xsave_state_size = 0; 1981 } else if (cpuid_d_valid) { 1982 xsave_state_size = cpi->cpi_xsave.xsav_max_size; 1983 } else { 1984 /* Broken CPUID 0xD, probably in HVM */ 1985 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid " 1986 "value: hw_low = %d, hw_high = %d, xsave_size = %d" 1987 ", ymm_size = %d, ymm_offset = %d\n", 1988 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low, 1989 cpi->cpi_xsave.xsav_hw_features_high, 1990 (int)cpi->cpi_xsave.xsav_max_size, 1991 (int)cpi->cpi_xsave.ymm_size, 1992 (int)cpi->cpi_xsave.ymm_offset); 1993 1994 if (xsave_state_size != 0) { 1995 /* 1996 * This must be a non-boot CPU. We cannot 1997 * continue, because boot cpu has already 1998 * enabled XSAVE. 1999 */ 2000 ASSERT(cpu->cpu_id != 0); 2001 cmn_err(CE_PANIC, "cpu%d: we have already " 2002 "enabled XSAVE on boot cpu, cannot " 2003 "continue.", cpu->cpu_id); 2004 } else { 2005 /* 2006 * If we reached here on the boot CPU, it's also 2007 * almost certain that we'll reach here on the 2008 * non-boot CPUs. When we're here on a boot CPU 2009 * we should disable the feature, on a non-boot 2010 * CPU we need to confirm that we have. 2011 */ 2012 if (cpu->cpu_id == 0) { 2013 remove_x86_feature(x86_featureset, 2014 X86FSET_XSAVE); 2015 remove_x86_feature(x86_featureset, 2016 X86FSET_AVX); 2017 remove_x86_feature(x86_featureset, 2018 X86FSET_F16C); 2019 remove_x86_feature(x86_featureset, 2020 X86FSET_BMI1); 2021 remove_x86_feature(x86_featureset, 2022 X86FSET_BMI2); 2023 remove_x86_feature(x86_featureset, 2024 X86FSET_FMA); 2025 remove_x86_feature(x86_featureset, 2026 X86FSET_AVX2); 2027 CPI_FEATURES_ECX(cpi) &= 2028 ~CPUID_INTC_ECX_XSAVE; 2029 CPI_FEATURES_ECX(cpi) &= 2030 ~CPUID_INTC_ECX_AVX; 2031 CPI_FEATURES_ECX(cpi) &= 2032 ~CPUID_INTC_ECX_F16C; 2033 CPI_FEATURES_ECX(cpi) &= 2034 ~CPUID_INTC_ECX_FMA; 2035 CPI_FEATURES_7_0_EBX(cpi) &= 2036 ~CPUID_INTC_EBX_7_0_BMI1; 2037 CPI_FEATURES_7_0_EBX(cpi) &= 2038 ~CPUID_INTC_EBX_7_0_BMI2; 2039 CPI_FEATURES_7_0_EBX(cpi) &= 2040 ~CPUID_INTC_EBX_7_0_AVX2; 2041 xsave_force_disable = B_TRUE; 2042 } else { 2043 VERIFY(is_x86_feature(x86_featureset, 2044 X86FSET_XSAVE) == B_FALSE); 2045 } 2046 } 2047 } 2048 } 2049 2050 2051 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 2052 goto pass2_done; 2053 2054 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 2055 nmax = NMAX_CPI_EXTD; 2056 /* 2057 * Copy the extended properties, fixing them as we go. 2058 * (We already handled n == 0 and n == 1 in pass 1) 2059 */ 2060 iptr = (void *)cpi->cpi_brandstr; 2061 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 2062 cp->cp_eax = 0x80000000 + n; 2063 (void) __cpuid_insn(cp); 2064 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 2065 switch (n) { 2066 case 2: 2067 case 3: 2068 case 4: 2069 /* 2070 * Extract the brand string 2071 */ 2072 *iptr++ = cp->cp_eax; 2073 *iptr++ = cp->cp_ebx; 2074 *iptr++ = cp->cp_ecx; 2075 *iptr++ = cp->cp_edx; 2076 break; 2077 case 5: 2078 switch (cpi->cpi_vendor) { 2079 case X86_VENDOR_AMD: 2080 /* 2081 * The Athlon and Duron were the first 2082 * parts to report the sizes of the 2083 * TLB for large pages. Before then, 2084 * we don't trust the data. 2085 */ 2086 if (cpi->cpi_family < 6 || 2087 (cpi->cpi_family == 6 && 2088 cpi->cpi_model < 1)) 2089 cp->cp_eax = 0; 2090 break; 2091 default: 2092 break; 2093 } 2094 break; 2095 case 6: 2096 switch (cpi->cpi_vendor) { 2097 case X86_VENDOR_AMD: 2098 /* 2099 * The Athlon and Duron were the first 2100 * AMD parts with L2 TLB's. 2101 * Before then, don't trust the data. 2102 */ 2103 if (cpi->cpi_family < 6 || 2104 cpi->cpi_family == 6 && 2105 cpi->cpi_model < 1) 2106 cp->cp_eax = cp->cp_ebx = 0; 2107 /* 2108 * AMD Duron rev A0 reports L2 2109 * cache size incorrectly as 1K 2110 * when it is really 64K 2111 */ 2112 if (cpi->cpi_family == 6 && 2113 cpi->cpi_model == 3 && 2114 cpi->cpi_step == 0) { 2115 cp->cp_ecx &= 0xffff; 2116 cp->cp_ecx |= 0x400000; 2117 } 2118 break; 2119 case X86_VENDOR_Cyrix: /* VIA C3 */ 2120 /* 2121 * VIA C3 processors are a bit messed 2122 * up w.r.t. encoding cache sizes in %ecx 2123 */ 2124 if (cpi->cpi_family != 6) 2125 break; 2126 /* 2127 * model 7 and 8 were incorrectly encoded 2128 * 2129 * xxx is model 8 really broken? 2130 */ 2131 if (cpi->cpi_model == 7 || 2132 cpi->cpi_model == 8) 2133 cp->cp_ecx = 2134 BITX(cp->cp_ecx, 31, 24) << 16 | 2135 BITX(cp->cp_ecx, 23, 16) << 12 | 2136 BITX(cp->cp_ecx, 15, 8) << 8 | 2137 BITX(cp->cp_ecx, 7, 0); 2138 /* 2139 * model 9 stepping 1 has wrong associativity 2140 */ 2141 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 2142 cp->cp_ecx |= 8 << 12; 2143 break; 2144 case X86_VENDOR_Intel: 2145 /* 2146 * Extended L2 Cache features function. 2147 * First appeared on Prescott. 2148 */ 2149 default: 2150 break; 2151 } 2152 break; 2153 default: 2154 break; 2155 } 2156 } 2157 2158 pass2_done: 2159 cpi->cpi_pass = 2; 2160 } 2161 2162 static const char * 2163 intel_cpubrand(const struct cpuid_info *cpi) 2164 { 2165 int i; 2166 2167 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 2168 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 2169 return ("i486"); 2170 2171 switch (cpi->cpi_family) { 2172 case 5: 2173 return ("Intel Pentium(r)"); 2174 case 6: 2175 switch (cpi->cpi_model) { 2176 uint_t celeron, xeon; 2177 const struct cpuid_regs *cp; 2178 case 0: 2179 case 1: 2180 case 2: 2181 return ("Intel Pentium(r) Pro"); 2182 case 3: 2183 case 4: 2184 return ("Intel Pentium(r) II"); 2185 case 6: 2186 return ("Intel Celeron(r)"); 2187 case 5: 2188 case 7: 2189 celeron = xeon = 0; 2190 cp = &cpi->cpi_std[2]; /* cache info */ 2191 2192 for (i = 1; i < 4; i++) { 2193 uint_t tmp; 2194 2195 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 2196 if (tmp == 0x40) 2197 celeron++; 2198 if (tmp >= 0x44 && tmp <= 0x45) 2199 xeon++; 2200 } 2201 2202 for (i = 0; i < 2; i++) { 2203 uint_t tmp; 2204 2205 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 2206 if (tmp == 0x40) 2207 celeron++; 2208 else if (tmp >= 0x44 && tmp <= 0x45) 2209 xeon++; 2210 } 2211 2212 for (i = 0; i < 4; i++) { 2213 uint_t tmp; 2214 2215 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 2216 if (tmp == 0x40) 2217 celeron++; 2218 else if (tmp >= 0x44 && tmp <= 0x45) 2219 xeon++; 2220 } 2221 2222 for (i = 0; i < 4; i++) { 2223 uint_t tmp; 2224 2225 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 2226 if (tmp == 0x40) 2227 celeron++; 2228 else if (tmp >= 0x44 && tmp <= 0x45) 2229 xeon++; 2230 } 2231 2232 if (celeron) 2233 return ("Intel Celeron(r)"); 2234 if (xeon) 2235 return (cpi->cpi_model == 5 ? 2236 "Intel Pentium(r) II Xeon(tm)" : 2237 "Intel Pentium(r) III Xeon(tm)"); 2238 return (cpi->cpi_model == 5 ? 2239 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 2240 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 2241 default: 2242 break; 2243 } 2244 default: 2245 break; 2246 } 2247 2248 /* BrandID is present if the field is nonzero */ 2249 if (cpi->cpi_brandid != 0) { 2250 static const struct { 2251 uint_t bt_bid; 2252 const char *bt_str; 2253 } brand_tbl[] = { 2254 { 0x1, "Intel(r) Celeron(r)" }, 2255 { 0x2, "Intel(r) Pentium(r) III" }, 2256 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 2257 { 0x4, "Intel(r) Pentium(r) III" }, 2258 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 2259 { 0x7, "Mobile Intel(r) Celeron(r)" }, 2260 { 0x8, "Intel(r) Pentium(r) 4" }, 2261 { 0x9, "Intel(r) Pentium(r) 4" }, 2262 { 0xa, "Intel(r) Celeron(r)" }, 2263 { 0xb, "Intel(r) Xeon(tm)" }, 2264 { 0xc, "Intel(r) Xeon(tm) MP" }, 2265 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 2266 { 0xf, "Mobile Intel(r) Celeron(r)" }, 2267 { 0x11, "Mobile Genuine Intel(r)" }, 2268 { 0x12, "Intel(r) Celeron(r) M" }, 2269 { 0x13, "Mobile Intel(r) Celeron(r)" }, 2270 { 0x14, "Intel(r) Celeron(r)" }, 2271 { 0x15, "Mobile Genuine Intel(r)" }, 2272 { 0x16, "Intel(r) Pentium(r) M" }, 2273 { 0x17, "Mobile Intel(r) Celeron(r)" } 2274 }; 2275 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 2276 uint_t sgn; 2277 2278 sgn = (cpi->cpi_family << 8) | 2279 (cpi->cpi_model << 4) | cpi->cpi_step; 2280 2281 for (i = 0; i < btblmax; i++) 2282 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 2283 break; 2284 if (i < btblmax) { 2285 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 2286 return ("Intel(r) Celeron(r)"); 2287 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 2288 return ("Intel(r) Xeon(tm) MP"); 2289 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 2290 return ("Intel(r) Xeon(tm)"); 2291 return (brand_tbl[i].bt_str); 2292 } 2293 } 2294 2295 return (NULL); 2296 } 2297 2298 static const char * 2299 amd_cpubrand(const struct cpuid_info *cpi) 2300 { 2301 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 2302 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 2303 return ("i486 compatible"); 2304 2305 switch (cpi->cpi_family) { 2306 case 5: 2307 switch (cpi->cpi_model) { 2308 case 0: 2309 case 1: 2310 case 2: 2311 case 3: 2312 case 4: 2313 case 5: 2314 return ("AMD-K5(r)"); 2315 case 6: 2316 case 7: 2317 return ("AMD-K6(r)"); 2318 case 8: 2319 return ("AMD-K6(r)-2"); 2320 case 9: 2321 return ("AMD-K6(r)-III"); 2322 default: 2323 return ("AMD (family 5)"); 2324 } 2325 case 6: 2326 switch (cpi->cpi_model) { 2327 case 1: 2328 return ("AMD-K7(tm)"); 2329 case 0: 2330 case 2: 2331 case 4: 2332 return ("AMD Athlon(tm)"); 2333 case 3: 2334 case 7: 2335 return ("AMD Duron(tm)"); 2336 case 6: 2337 case 8: 2338 case 10: 2339 /* 2340 * Use the L2 cache size to distinguish 2341 */ 2342 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 2343 "AMD Athlon(tm)" : "AMD Duron(tm)"); 2344 default: 2345 return ("AMD (family 6)"); 2346 } 2347 default: 2348 break; 2349 } 2350 2351 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 2352 cpi->cpi_brandid != 0) { 2353 switch (BITX(cpi->cpi_brandid, 7, 5)) { 2354 case 3: 2355 return ("AMD Opteron(tm) UP 1xx"); 2356 case 4: 2357 return ("AMD Opteron(tm) DP 2xx"); 2358 case 5: 2359 return ("AMD Opteron(tm) MP 8xx"); 2360 default: 2361 return ("AMD Opteron(tm)"); 2362 } 2363 } 2364 2365 return (NULL); 2366 } 2367 2368 static const char * 2369 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 2370 { 2371 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 2372 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 2373 type == X86_TYPE_CYRIX_486) 2374 return ("i486 compatible"); 2375 2376 switch (type) { 2377 case X86_TYPE_CYRIX_6x86: 2378 return ("Cyrix 6x86"); 2379 case X86_TYPE_CYRIX_6x86L: 2380 return ("Cyrix 6x86L"); 2381 case X86_TYPE_CYRIX_6x86MX: 2382 return ("Cyrix 6x86MX"); 2383 case X86_TYPE_CYRIX_GXm: 2384 return ("Cyrix GXm"); 2385 case X86_TYPE_CYRIX_MediaGX: 2386 return ("Cyrix MediaGX"); 2387 case X86_TYPE_CYRIX_MII: 2388 return ("Cyrix M2"); 2389 case X86_TYPE_VIA_CYRIX_III: 2390 return ("VIA Cyrix M3"); 2391 default: 2392 /* 2393 * Have another wild guess .. 2394 */ 2395 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 2396 return ("Cyrix 5x86"); 2397 else if (cpi->cpi_family == 5) { 2398 switch (cpi->cpi_model) { 2399 case 2: 2400 return ("Cyrix 6x86"); /* Cyrix M1 */ 2401 case 4: 2402 return ("Cyrix MediaGX"); 2403 default: 2404 break; 2405 } 2406 } else if (cpi->cpi_family == 6) { 2407 switch (cpi->cpi_model) { 2408 case 0: 2409 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 2410 case 5: 2411 case 6: 2412 case 7: 2413 case 8: 2414 case 9: 2415 return ("VIA C3"); 2416 default: 2417 break; 2418 } 2419 } 2420 break; 2421 } 2422 return (NULL); 2423 } 2424 2425 /* 2426 * This only gets called in the case that the CPU extended 2427 * feature brand string (0x80000002, 0x80000003, 0x80000004) 2428 * aren't available, or contain null bytes for some reason. 2429 */ 2430 static void 2431 fabricate_brandstr(struct cpuid_info *cpi) 2432 { 2433 const char *brand = NULL; 2434 2435 switch (cpi->cpi_vendor) { 2436 case X86_VENDOR_Intel: 2437 brand = intel_cpubrand(cpi); 2438 break; 2439 case X86_VENDOR_AMD: 2440 brand = amd_cpubrand(cpi); 2441 break; 2442 case X86_VENDOR_Cyrix: 2443 brand = cyrix_cpubrand(cpi, x86_type); 2444 break; 2445 case X86_VENDOR_NexGen: 2446 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2447 brand = "NexGen Nx586"; 2448 break; 2449 case X86_VENDOR_Centaur: 2450 if (cpi->cpi_family == 5) 2451 switch (cpi->cpi_model) { 2452 case 4: 2453 brand = "Centaur C6"; 2454 break; 2455 case 8: 2456 brand = "Centaur C2"; 2457 break; 2458 case 9: 2459 brand = "Centaur C3"; 2460 break; 2461 default: 2462 break; 2463 } 2464 break; 2465 case X86_VENDOR_Rise: 2466 if (cpi->cpi_family == 5 && 2467 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 2468 brand = "Rise mP6"; 2469 break; 2470 case X86_VENDOR_SiS: 2471 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2472 brand = "SiS 55x"; 2473 break; 2474 case X86_VENDOR_TM: 2475 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 2476 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 2477 break; 2478 case X86_VENDOR_NSC: 2479 case X86_VENDOR_UMC: 2480 default: 2481 break; 2482 } 2483 if (brand) { 2484 (void) strcpy((char *)cpi->cpi_brandstr, brand); 2485 return; 2486 } 2487 2488 /* 2489 * If all else fails ... 2490 */ 2491 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 2492 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 2493 cpi->cpi_model, cpi->cpi_step); 2494 } 2495 2496 /* 2497 * This routine is called just after kernel memory allocation 2498 * becomes available on cpu0, and as part of mp_startup() on 2499 * the other cpus. 2500 * 2501 * Fixup the brand string, and collect any information from cpuid 2502 * that requires dynamically allocated storage to represent. 2503 */ 2504 /*ARGSUSED*/ 2505 void 2506 cpuid_pass3(cpu_t *cpu) 2507 { 2508 int i, max, shft, level, size; 2509 struct cpuid_regs regs; 2510 struct cpuid_regs *cp; 2511 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2512 2513 ASSERT(cpi->cpi_pass == 2); 2514 2515 /* 2516 * Function 4: Deterministic cache parameters 2517 * 2518 * Take this opportunity to detect the number of threads 2519 * sharing the last level cache, and construct a corresponding 2520 * cache id. The respective cpuid_info members are initialized 2521 * to the default case of "no last level cache sharing". 2522 */ 2523 cpi->cpi_ncpu_shr_last_cache = 1; 2524 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 2525 2526 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 2527 2528 /* 2529 * Find the # of elements (size) returned by fn 4, and along 2530 * the way detect last level cache sharing details. 2531 */ 2532 bzero(®s, sizeof (regs)); 2533 cp = ®s; 2534 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 2535 cp->cp_eax = 4; 2536 cp->cp_ecx = i; 2537 2538 (void) __cpuid_insn(cp); 2539 2540 if (CPI_CACHE_TYPE(cp) == 0) 2541 break; 2542 level = CPI_CACHE_LVL(cp); 2543 if (level > max) { 2544 max = level; 2545 cpi->cpi_ncpu_shr_last_cache = 2546 CPI_NTHR_SHR_CACHE(cp) + 1; 2547 } 2548 } 2549 cpi->cpi_std_4_size = size = i; 2550 2551 /* 2552 * Allocate the cpi_std_4 array. The first element 2553 * references the regs for fn 4, %ecx == 0, which 2554 * cpuid_pass2() stashed in cpi->cpi_std[4]. 2555 */ 2556 if (size > 0) { 2557 cpi->cpi_std_4 = 2558 kmem_alloc(size * sizeof (cp), KM_SLEEP); 2559 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 2560 2561 /* 2562 * Allocate storage to hold the additional regs 2563 * for function 4, %ecx == 1 .. cpi_std_4_size. 2564 * 2565 * The regs for fn 4, %ecx == 0 has already 2566 * been allocated as indicated above. 2567 */ 2568 for (i = 1; i < size; i++) { 2569 cp = cpi->cpi_std_4[i] = 2570 kmem_zalloc(sizeof (regs), KM_SLEEP); 2571 cp->cp_eax = 4; 2572 cp->cp_ecx = i; 2573 2574 (void) __cpuid_insn(cp); 2575 } 2576 } 2577 /* 2578 * Determine the number of bits needed to represent 2579 * the number of CPUs sharing the last level cache. 2580 * 2581 * Shift off that number of bits from the APIC id to 2582 * derive the cache id. 2583 */ 2584 shft = 0; 2585 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 2586 shft++; 2587 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 2588 } 2589 2590 /* 2591 * Now fixup the brand string 2592 */ 2593 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2594 fabricate_brandstr(cpi); 2595 } else { 2596 2597 /* 2598 * If we successfully extracted a brand string from the cpuid 2599 * instruction, clean it up by removing leading spaces and 2600 * similar junk. 2601 */ 2602 if (cpi->cpi_brandstr[0]) { 2603 size_t maxlen = sizeof (cpi->cpi_brandstr); 2604 char *src, *dst; 2605 2606 dst = src = (char *)cpi->cpi_brandstr; 2607 src[maxlen - 1] = '\0'; 2608 /* 2609 * strip leading spaces 2610 */ 2611 while (*src == ' ') 2612 src++; 2613 /* 2614 * Remove any 'Genuine' or "Authentic" prefixes 2615 */ 2616 if (strncmp(src, "Genuine ", 8) == 0) 2617 src += 8; 2618 if (strncmp(src, "Authentic ", 10) == 0) 2619 src += 10; 2620 2621 /* 2622 * Now do an in-place copy. 2623 * Map (R) to (r) and (TM) to (tm). 2624 * The era of teletypes is long gone, and there's 2625 * -really- no need to shout. 2626 */ 2627 while (*src != '\0') { 2628 if (src[0] == '(') { 2629 if (strncmp(src + 1, "R)", 2) == 0) { 2630 (void) strncpy(dst, "(r)", 3); 2631 src += 3; 2632 dst += 3; 2633 continue; 2634 } 2635 if (strncmp(src + 1, "TM)", 3) == 0) { 2636 (void) strncpy(dst, "(tm)", 4); 2637 src += 4; 2638 dst += 4; 2639 continue; 2640 } 2641 } 2642 *dst++ = *src++; 2643 } 2644 *dst = '\0'; 2645 2646 /* 2647 * Finally, remove any trailing spaces 2648 */ 2649 while (--dst > cpi->cpi_brandstr) 2650 if (*dst == ' ') 2651 *dst = '\0'; 2652 else 2653 break; 2654 } else 2655 fabricate_brandstr(cpi); 2656 } 2657 cpi->cpi_pass = 3; 2658 } 2659 2660 /* 2661 * This routine is called out of bind_hwcap() much later in the life 2662 * of the kernel (post_startup()). The job of this routine is to resolve 2663 * the hardware feature support and kernel support for those features into 2664 * what we're actually going to tell applications via the aux vector. 2665 */ 2666 void 2667 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out) 2668 { 2669 struct cpuid_info *cpi; 2670 uint_t hwcap_flags = 0, hwcap_flags_2 = 0; 2671 2672 if (cpu == NULL) 2673 cpu = CPU; 2674 cpi = cpu->cpu_m.mcpu_cpi; 2675 2676 ASSERT(cpi->cpi_pass == 3); 2677 2678 if (cpi->cpi_maxeax >= 1) { 2679 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2680 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2681 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES]; 2682 2683 *edx = CPI_FEATURES_EDX(cpi); 2684 *ecx = CPI_FEATURES_ECX(cpi); 2685 *ebx = CPI_FEATURES_7_0_EBX(cpi); 2686 2687 /* 2688 * [these require explicit kernel support] 2689 */ 2690 if (!is_x86_feature(x86_featureset, X86FSET_SEP)) 2691 *edx &= ~CPUID_INTC_EDX_SEP; 2692 2693 if (!is_x86_feature(x86_featureset, X86FSET_SSE)) 2694 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2695 if (!is_x86_feature(x86_featureset, X86FSET_SSE2)) 2696 *edx &= ~CPUID_INTC_EDX_SSE2; 2697 2698 if (!is_x86_feature(x86_featureset, X86FSET_HTT)) 2699 *edx &= ~CPUID_INTC_EDX_HTT; 2700 2701 if (!is_x86_feature(x86_featureset, X86FSET_SSE3)) 2702 *ecx &= ~CPUID_INTC_ECX_SSE3; 2703 2704 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3)) 2705 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2706 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1)) 2707 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2708 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2)) 2709 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2710 if (!is_x86_feature(x86_featureset, X86FSET_AES)) 2711 *ecx &= ~CPUID_INTC_ECX_AES; 2712 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ)) 2713 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ; 2714 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE)) 2715 *ecx &= ~(CPUID_INTC_ECX_XSAVE | 2716 CPUID_INTC_ECX_OSXSAVE); 2717 if (!is_x86_feature(x86_featureset, X86FSET_AVX)) 2718 *ecx &= ~CPUID_INTC_ECX_AVX; 2719 if (!is_x86_feature(x86_featureset, X86FSET_F16C)) 2720 *ecx &= ~CPUID_INTC_ECX_F16C; 2721 if (!is_x86_feature(x86_featureset, X86FSET_FMA)) 2722 *ecx &= ~CPUID_INTC_ECX_FMA; 2723 if (!is_x86_feature(x86_featureset, X86FSET_BMI1)) 2724 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1; 2725 if (!is_x86_feature(x86_featureset, X86FSET_BMI2)) 2726 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2; 2727 if (!is_x86_feature(x86_featureset, X86FSET_AVX2)) 2728 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2; 2729 2730 /* 2731 * [no explicit support required beyond x87 fp context] 2732 */ 2733 if (!fpu_exists) 2734 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2735 2736 /* 2737 * Now map the supported feature vector to things that we 2738 * think userland will care about. 2739 */ 2740 if (*edx & CPUID_INTC_EDX_SEP) 2741 hwcap_flags |= AV_386_SEP; 2742 if (*edx & CPUID_INTC_EDX_SSE) 2743 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2744 if (*edx & CPUID_INTC_EDX_SSE2) 2745 hwcap_flags |= AV_386_SSE2; 2746 if (*ecx & CPUID_INTC_ECX_SSE3) 2747 hwcap_flags |= AV_386_SSE3; 2748 if (*ecx & CPUID_INTC_ECX_SSSE3) 2749 hwcap_flags |= AV_386_SSSE3; 2750 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2751 hwcap_flags |= AV_386_SSE4_1; 2752 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2753 hwcap_flags |= AV_386_SSE4_2; 2754 if (*ecx & CPUID_INTC_ECX_MOVBE) 2755 hwcap_flags |= AV_386_MOVBE; 2756 if (*ecx & CPUID_INTC_ECX_AES) 2757 hwcap_flags |= AV_386_AES; 2758 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2759 hwcap_flags |= AV_386_PCLMULQDQ; 2760 if ((*ecx & CPUID_INTC_ECX_XSAVE) && 2761 (*ecx & CPUID_INTC_ECX_OSXSAVE)) { 2762 hwcap_flags |= AV_386_XSAVE; 2763 2764 if (*ecx & CPUID_INTC_ECX_AVX) { 2765 hwcap_flags |= AV_386_AVX; 2766 if (*ecx & CPUID_INTC_ECX_F16C) 2767 hwcap_flags_2 |= AV_386_2_F16C; 2768 if (*ecx & CPUID_INTC_ECX_FMA) 2769 hwcap_flags_2 |= AV_386_2_FMA; 2770 if (*ebx & CPUID_INTC_EBX_7_0_BMI1) 2771 hwcap_flags_2 |= AV_386_2_BMI1; 2772 if (*ebx & CPUID_INTC_EBX_7_0_BMI2) 2773 hwcap_flags_2 |= AV_386_2_BMI2; 2774 if (*ebx & CPUID_INTC_EBX_7_0_AVX2) 2775 hwcap_flags_2 |= AV_386_2_AVX2; 2776 } 2777 } 2778 if (*ecx & CPUID_INTC_ECX_VMX) 2779 hwcap_flags |= AV_386_VMX; 2780 if (*ecx & CPUID_INTC_ECX_POPCNT) 2781 hwcap_flags |= AV_386_POPCNT; 2782 if (*edx & CPUID_INTC_EDX_FPU) 2783 hwcap_flags |= AV_386_FPU; 2784 if (*edx & CPUID_INTC_EDX_MMX) 2785 hwcap_flags |= AV_386_MMX; 2786 2787 if (*edx & CPUID_INTC_EDX_TSC) 2788 hwcap_flags |= AV_386_TSC; 2789 if (*edx & CPUID_INTC_EDX_CX8) 2790 hwcap_flags |= AV_386_CX8; 2791 if (*edx & CPUID_INTC_EDX_CMOV) 2792 hwcap_flags |= AV_386_CMOV; 2793 if (*ecx & CPUID_INTC_ECX_CX16) 2794 hwcap_flags |= AV_386_CX16; 2795 2796 if (*ecx & CPUID_INTC_ECX_RDRAND) 2797 hwcap_flags_2 |= AV_386_2_RDRAND; 2798 } 2799 2800 if (cpi->cpi_xmaxeax < 0x80000001) 2801 goto pass4_done; 2802 2803 switch (cpi->cpi_vendor) { 2804 struct cpuid_regs cp; 2805 uint32_t *edx, *ecx; 2806 2807 case X86_VENDOR_Intel: 2808 /* 2809 * Seems like Intel duplicated what we necessary 2810 * here to make the initial crop of 64-bit OS's work. 2811 * Hopefully, those are the only "extended" bits 2812 * they'll add. 2813 */ 2814 /*FALLTHROUGH*/ 2815 2816 case X86_VENDOR_AMD: 2817 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2818 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2819 2820 *edx = CPI_FEATURES_XTD_EDX(cpi); 2821 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2822 2823 /* 2824 * [these features require explicit kernel support] 2825 */ 2826 switch (cpi->cpi_vendor) { 2827 case X86_VENDOR_Intel: 2828 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 2829 *edx &= ~CPUID_AMD_EDX_TSCP; 2830 break; 2831 2832 case X86_VENDOR_AMD: 2833 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 2834 *edx &= ~CPUID_AMD_EDX_TSCP; 2835 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A)) 2836 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2837 break; 2838 2839 default: 2840 break; 2841 } 2842 2843 /* 2844 * [no explicit support required beyond 2845 * x87 fp context and exception handlers] 2846 */ 2847 if (!fpu_exists) 2848 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2849 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2850 2851 if (!is_x86_feature(x86_featureset, X86FSET_NX)) 2852 *edx &= ~CPUID_AMD_EDX_NX; 2853 #if !defined(__amd64) 2854 *edx &= ~CPUID_AMD_EDX_LM; 2855 #endif 2856 /* 2857 * Now map the supported feature vector to 2858 * things that we think userland will care about. 2859 */ 2860 #if defined(__amd64) 2861 if (*edx & CPUID_AMD_EDX_SYSC) 2862 hwcap_flags |= AV_386_AMD_SYSC; 2863 #endif 2864 if (*edx & CPUID_AMD_EDX_MMXamd) 2865 hwcap_flags |= AV_386_AMD_MMX; 2866 if (*edx & CPUID_AMD_EDX_3DNow) 2867 hwcap_flags |= AV_386_AMD_3DNow; 2868 if (*edx & CPUID_AMD_EDX_3DNowx) 2869 hwcap_flags |= AV_386_AMD_3DNowx; 2870 if (*ecx & CPUID_AMD_ECX_SVM) 2871 hwcap_flags |= AV_386_AMD_SVM; 2872 2873 switch (cpi->cpi_vendor) { 2874 case X86_VENDOR_AMD: 2875 if (*edx & CPUID_AMD_EDX_TSCP) 2876 hwcap_flags |= AV_386_TSCP; 2877 if (*ecx & CPUID_AMD_ECX_AHF64) 2878 hwcap_flags |= AV_386_AHF; 2879 if (*ecx & CPUID_AMD_ECX_SSE4A) 2880 hwcap_flags |= AV_386_AMD_SSE4A; 2881 if (*ecx & CPUID_AMD_ECX_LZCNT) 2882 hwcap_flags |= AV_386_AMD_LZCNT; 2883 break; 2884 2885 case X86_VENDOR_Intel: 2886 if (*edx & CPUID_AMD_EDX_TSCP) 2887 hwcap_flags |= AV_386_TSCP; 2888 /* 2889 * Aarrgh. 2890 * Intel uses a different bit in the same word. 2891 */ 2892 if (*ecx & CPUID_INTC_ECX_AHF64) 2893 hwcap_flags |= AV_386_AHF; 2894 break; 2895 2896 default: 2897 break; 2898 } 2899 break; 2900 2901 case X86_VENDOR_TM: 2902 cp.cp_eax = 0x80860001; 2903 (void) __cpuid_insn(&cp); 2904 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2905 break; 2906 2907 default: 2908 break; 2909 } 2910 2911 pass4_done: 2912 cpi->cpi_pass = 4; 2913 if (hwcap_out != NULL) { 2914 hwcap_out[0] = hwcap_flags; 2915 hwcap_out[1] = hwcap_flags_2; 2916 } 2917 } 2918 2919 2920 /* 2921 * Simulate the cpuid instruction using the data we previously 2922 * captured about this CPU. We try our best to return the truth 2923 * about the hardware, independently of kernel support. 2924 */ 2925 uint32_t 2926 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2927 { 2928 struct cpuid_info *cpi; 2929 struct cpuid_regs *xcp; 2930 2931 if (cpu == NULL) 2932 cpu = CPU; 2933 cpi = cpu->cpu_m.mcpu_cpi; 2934 2935 ASSERT(cpuid_checkpass(cpu, 3)); 2936 2937 /* 2938 * CPUID data is cached in two separate places: cpi_std for standard 2939 * CPUID functions, and cpi_extd for extended CPUID functions. 2940 */ 2941 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2942 xcp = &cpi->cpi_std[cp->cp_eax]; 2943 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2944 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2945 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2946 else 2947 /* 2948 * The caller is asking for data from an input parameter which 2949 * the kernel has not cached. In this case we go fetch from 2950 * the hardware and return the data directly to the user. 2951 */ 2952 return (__cpuid_insn(cp)); 2953 2954 cp->cp_eax = xcp->cp_eax; 2955 cp->cp_ebx = xcp->cp_ebx; 2956 cp->cp_ecx = xcp->cp_ecx; 2957 cp->cp_edx = xcp->cp_edx; 2958 return (cp->cp_eax); 2959 } 2960 2961 int 2962 cpuid_checkpass(cpu_t *cpu, int pass) 2963 { 2964 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2965 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2966 } 2967 2968 int 2969 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2970 { 2971 ASSERT(cpuid_checkpass(cpu, 3)); 2972 2973 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2974 } 2975 2976 int 2977 cpuid_is_cmt(cpu_t *cpu) 2978 { 2979 if (cpu == NULL) 2980 cpu = CPU; 2981 2982 ASSERT(cpuid_checkpass(cpu, 1)); 2983 2984 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2985 } 2986 2987 /* 2988 * AMD and Intel both implement the 64-bit variant of the syscall 2989 * instruction (syscallq), so if there's -any- support for syscall, 2990 * cpuid currently says "yes, we support this". 2991 * 2992 * However, Intel decided to -not- implement the 32-bit variant of the 2993 * syscall instruction, so we provide a predicate to allow our caller 2994 * to test that subtlety here. 2995 * 2996 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2997 * even in the case where the hardware would in fact support it. 2998 */ 2999 /*ARGSUSED*/ 3000 int 3001 cpuid_syscall32_insn(cpu_t *cpu) 3002 { 3003 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 3004 3005 #if !defined(__xpv) 3006 if (cpu == NULL) 3007 cpu = CPU; 3008 3009 /*CSTYLED*/ 3010 { 3011 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3012 3013 if (cpi->cpi_vendor == X86_VENDOR_AMD && 3014 cpi->cpi_xmaxeax >= 0x80000001 && 3015 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 3016 return (1); 3017 } 3018 #endif 3019 return (0); 3020 } 3021 3022 int 3023 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 3024 { 3025 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3026 3027 static const char fmt[] = 3028 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 3029 static const char fmt_ht[] = 3030 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 3031 3032 ASSERT(cpuid_checkpass(cpu, 1)); 3033 3034 if (cpuid_is_cmt(cpu)) 3035 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 3036 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 3037 cpi->cpi_family, cpi->cpi_model, 3038 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 3039 return (snprintf(s, n, fmt, 3040 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 3041 cpi->cpi_family, cpi->cpi_model, 3042 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 3043 } 3044 3045 const char * 3046 cpuid_getvendorstr(cpu_t *cpu) 3047 { 3048 ASSERT(cpuid_checkpass(cpu, 1)); 3049 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 3050 } 3051 3052 uint_t 3053 cpuid_getvendor(cpu_t *cpu) 3054 { 3055 ASSERT(cpuid_checkpass(cpu, 1)); 3056 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 3057 } 3058 3059 uint_t 3060 cpuid_getfamily(cpu_t *cpu) 3061 { 3062 ASSERT(cpuid_checkpass(cpu, 1)); 3063 return (cpu->cpu_m.mcpu_cpi->cpi_family); 3064 } 3065 3066 uint_t 3067 cpuid_getmodel(cpu_t *cpu) 3068 { 3069 ASSERT(cpuid_checkpass(cpu, 1)); 3070 return (cpu->cpu_m.mcpu_cpi->cpi_model); 3071 } 3072 3073 uint_t 3074 cpuid_get_ncpu_per_chip(cpu_t *cpu) 3075 { 3076 ASSERT(cpuid_checkpass(cpu, 1)); 3077 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 3078 } 3079 3080 uint_t 3081 cpuid_get_ncore_per_chip(cpu_t *cpu) 3082 { 3083 ASSERT(cpuid_checkpass(cpu, 1)); 3084 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 3085 } 3086 3087 uint_t 3088 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 3089 { 3090 ASSERT(cpuid_checkpass(cpu, 2)); 3091 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 3092 } 3093 3094 id_t 3095 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 3096 { 3097 ASSERT(cpuid_checkpass(cpu, 2)); 3098 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 3099 } 3100 3101 uint_t 3102 cpuid_getstep(cpu_t *cpu) 3103 { 3104 ASSERT(cpuid_checkpass(cpu, 1)); 3105 return (cpu->cpu_m.mcpu_cpi->cpi_step); 3106 } 3107 3108 uint_t 3109 cpuid_getsig(struct cpu *cpu) 3110 { 3111 ASSERT(cpuid_checkpass(cpu, 1)); 3112 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 3113 } 3114 3115 uint32_t 3116 cpuid_getchiprev(struct cpu *cpu) 3117 { 3118 ASSERT(cpuid_checkpass(cpu, 1)); 3119 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 3120 } 3121 3122 const char * 3123 cpuid_getchiprevstr(struct cpu *cpu) 3124 { 3125 ASSERT(cpuid_checkpass(cpu, 1)); 3126 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 3127 } 3128 3129 uint32_t 3130 cpuid_getsockettype(struct cpu *cpu) 3131 { 3132 ASSERT(cpuid_checkpass(cpu, 1)); 3133 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 3134 } 3135 3136 const char * 3137 cpuid_getsocketstr(cpu_t *cpu) 3138 { 3139 static const char *socketstr = NULL; 3140 struct cpuid_info *cpi; 3141 3142 ASSERT(cpuid_checkpass(cpu, 1)); 3143 cpi = cpu->cpu_m.mcpu_cpi; 3144 3145 /* Assume that socket types are the same across the system */ 3146 if (socketstr == NULL) 3147 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 3148 cpi->cpi_model, cpi->cpi_step); 3149 3150 3151 return (socketstr); 3152 } 3153 3154 int 3155 cpuid_get_chipid(cpu_t *cpu) 3156 { 3157 ASSERT(cpuid_checkpass(cpu, 1)); 3158 3159 if (cpuid_is_cmt(cpu)) 3160 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 3161 return (cpu->cpu_id); 3162 } 3163 3164 id_t 3165 cpuid_get_coreid(cpu_t *cpu) 3166 { 3167 ASSERT(cpuid_checkpass(cpu, 1)); 3168 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 3169 } 3170 3171 int 3172 cpuid_get_pkgcoreid(cpu_t *cpu) 3173 { 3174 ASSERT(cpuid_checkpass(cpu, 1)); 3175 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 3176 } 3177 3178 int 3179 cpuid_get_clogid(cpu_t *cpu) 3180 { 3181 ASSERT(cpuid_checkpass(cpu, 1)); 3182 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 3183 } 3184 3185 int 3186 cpuid_get_cacheid(cpu_t *cpu) 3187 { 3188 ASSERT(cpuid_checkpass(cpu, 1)); 3189 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 3190 } 3191 3192 uint_t 3193 cpuid_get_procnodeid(cpu_t *cpu) 3194 { 3195 ASSERT(cpuid_checkpass(cpu, 1)); 3196 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid); 3197 } 3198 3199 uint_t 3200 cpuid_get_procnodes_per_pkg(cpu_t *cpu) 3201 { 3202 ASSERT(cpuid_checkpass(cpu, 1)); 3203 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); 3204 } 3205 3206 uint_t 3207 cpuid_get_compunitid(cpu_t *cpu) 3208 { 3209 ASSERT(cpuid_checkpass(cpu, 1)); 3210 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid); 3211 } 3212 3213 uint_t 3214 cpuid_get_cores_per_compunit(cpu_t *cpu) 3215 { 3216 ASSERT(cpuid_checkpass(cpu, 1)); 3217 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit); 3218 } 3219 3220 /*ARGSUSED*/ 3221 int 3222 cpuid_have_cr8access(cpu_t *cpu) 3223 { 3224 #if defined(__amd64) 3225 return (1); 3226 #else 3227 struct cpuid_info *cpi; 3228 3229 ASSERT(cpu != NULL); 3230 cpi = cpu->cpu_m.mcpu_cpi; 3231 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 3232 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 3233 return (1); 3234 return (0); 3235 #endif 3236 } 3237 3238 uint32_t 3239 cpuid_get_apicid(cpu_t *cpu) 3240 { 3241 ASSERT(cpuid_checkpass(cpu, 1)); 3242 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 3243 return (UINT32_MAX); 3244 } else { 3245 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 3246 } 3247 } 3248 3249 void 3250 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 3251 { 3252 struct cpuid_info *cpi; 3253 3254 if (cpu == NULL) 3255 cpu = CPU; 3256 cpi = cpu->cpu_m.mcpu_cpi; 3257 3258 ASSERT(cpuid_checkpass(cpu, 1)); 3259 3260 if (pabits) 3261 *pabits = cpi->cpi_pabits; 3262 if (vabits) 3263 *vabits = cpi->cpi_vabits; 3264 } 3265 3266 /* 3267 * Returns the number of data TLB entries for a corresponding 3268 * pagesize. If it can't be computed, or isn't known, the 3269 * routine returns zero. If you ask about an architecturally 3270 * impossible pagesize, the routine will panic (so that the 3271 * hat implementor knows that things are inconsistent.) 3272 */ 3273 uint_t 3274 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 3275 { 3276 struct cpuid_info *cpi; 3277 uint_t dtlb_nent = 0; 3278 3279 if (cpu == NULL) 3280 cpu = CPU; 3281 cpi = cpu->cpu_m.mcpu_cpi; 3282 3283 ASSERT(cpuid_checkpass(cpu, 1)); 3284 3285 /* 3286 * Check the L2 TLB info 3287 */ 3288 if (cpi->cpi_xmaxeax >= 0x80000006) { 3289 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 3290 3291 switch (pagesize) { 3292 3293 case 4 * 1024: 3294 /* 3295 * All zero in the top 16 bits of the register 3296 * indicates a unified TLB. Size is in low 16 bits. 3297 */ 3298 if ((cp->cp_ebx & 0xffff0000) == 0) 3299 dtlb_nent = cp->cp_ebx & 0x0000ffff; 3300 else 3301 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 3302 break; 3303 3304 case 2 * 1024 * 1024: 3305 if ((cp->cp_eax & 0xffff0000) == 0) 3306 dtlb_nent = cp->cp_eax & 0x0000ffff; 3307 else 3308 dtlb_nent = BITX(cp->cp_eax, 27, 16); 3309 break; 3310 3311 default: 3312 panic("unknown L2 pagesize"); 3313 /*NOTREACHED*/ 3314 } 3315 } 3316 3317 if (dtlb_nent != 0) 3318 return (dtlb_nent); 3319 3320 /* 3321 * No L2 TLB support for this size, try L1. 3322 */ 3323 if (cpi->cpi_xmaxeax >= 0x80000005) { 3324 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 3325 3326 switch (pagesize) { 3327 case 4 * 1024: 3328 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 3329 break; 3330 case 2 * 1024 * 1024: 3331 dtlb_nent = BITX(cp->cp_eax, 23, 16); 3332 break; 3333 default: 3334 panic("unknown L1 d-TLB pagesize"); 3335 /*NOTREACHED*/ 3336 } 3337 } 3338 3339 return (dtlb_nent); 3340 } 3341 3342 /* 3343 * Return 0 if the erratum is not present or not applicable, positive 3344 * if it is, and negative if the status of the erratum is unknown. 3345 * 3346 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 3347 * Processors" #25759, Rev 3.57, August 2005 3348 */ 3349 int 3350 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 3351 { 3352 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3353 uint_t eax; 3354 3355 /* 3356 * Bail out if this CPU isn't an AMD CPU, or if it's 3357 * a legacy (32-bit) AMD CPU. 3358 */ 3359 if (cpi->cpi_vendor != X86_VENDOR_AMD || 3360 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 3361 cpi->cpi_family == 6) 3362 3363 return (0); 3364 3365 eax = cpi->cpi_std[1].cp_eax; 3366 3367 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 3368 #define SH_B3(eax) (eax == 0xf51) 3369 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 3370 3371 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 3372 3373 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 3374 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 3375 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 3376 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 3377 3378 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 3379 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 3380 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 3381 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 3382 3383 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 3384 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 3385 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 3386 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 3387 #define BH_E4(eax) (eax == 0x20fb1) 3388 #define SH_E5(eax) (eax == 0x20f42) 3389 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 3390 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 3391 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 3392 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 3393 DH_E6(eax) || JH_E6(eax)) 3394 3395 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 3396 #define DR_B0(eax) (eax == 0x100f20) 3397 #define DR_B1(eax) (eax == 0x100f21) 3398 #define DR_BA(eax) (eax == 0x100f2a) 3399 #define DR_B2(eax) (eax == 0x100f22) 3400 #define DR_B3(eax) (eax == 0x100f23) 3401 #define RB_C0(eax) (eax == 0x100f40) 3402 3403 switch (erratum) { 3404 case 1: 3405 return (cpi->cpi_family < 0x10); 3406 case 51: /* what does the asterisk mean? */ 3407 return (B(eax) || SH_C0(eax) || CG(eax)); 3408 case 52: 3409 return (B(eax)); 3410 case 57: 3411 return (cpi->cpi_family <= 0x11); 3412 case 58: 3413 return (B(eax)); 3414 case 60: 3415 return (cpi->cpi_family <= 0x11); 3416 case 61: 3417 case 62: 3418 case 63: 3419 case 64: 3420 case 65: 3421 case 66: 3422 case 68: 3423 case 69: 3424 case 70: 3425 case 71: 3426 return (B(eax)); 3427 case 72: 3428 return (SH_B0(eax)); 3429 case 74: 3430 return (B(eax)); 3431 case 75: 3432 return (cpi->cpi_family < 0x10); 3433 case 76: 3434 return (B(eax)); 3435 case 77: 3436 return (cpi->cpi_family <= 0x11); 3437 case 78: 3438 return (B(eax) || SH_C0(eax)); 3439 case 79: 3440 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3441 case 80: 3442 case 81: 3443 case 82: 3444 return (B(eax)); 3445 case 83: 3446 return (B(eax) || SH_C0(eax) || CG(eax)); 3447 case 85: 3448 return (cpi->cpi_family < 0x10); 3449 case 86: 3450 return (SH_C0(eax) || CG(eax)); 3451 case 88: 3452 #if !defined(__amd64) 3453 return (0); 3454 #else 3455 return (B(eax) || SH_C0(eax)); 3456 #endif 3457 case 89: 3458 return (cpi->cpi_family < 0x10); 3459 case 90: 3460 return (B(eax) || SH_C0(eax) || CG(eax)); 3461 case 91: 3462 case 92: 3463 return (B(eax) || SH_C0(eax)); 3464 case 93: 3465 return (SH_C0(eax)); 3466 case 94: 3467 return (B(eax) || SH_C0(eax) || CG(eax)); 3468 case 95: 3469 #if !defined(__amd64) 3470 return (0); 3471 #else 3472 return (B(eax) || SH_C0(eax)); 3473 #endif 3474 case 96: 3475 return (B(eax) || SH_C0(eax) || CG(eax)); 3476 case 97: 3477 case 98: 3478 return (SH_C0(eax) || CG(eax)); 3479 case 99: 3480 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3481 case 100: 3482 return (B(eax) || SH_C0(eax)); 3483 case 101: 3484 case 103: 3485 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3486 case 104: 3487 return (SH_C0(eax) || CG(eax) || D0(eax)); 3488 case 105: 3489 case 106: 3490 case 107: 3491 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3492 case 108: 3493 return (DH_CG(eax)); 3494 case 109: 3495 return (SH_C0(eax) || CG(eax) || D0(eax)); 3496 case 110: 3497 return (D0(eax) || EX(eax)); 3498 case 111: 3499 return (CG(eax)); 3500 case 112: 3501 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3502 case 113: 3503 return (eax == 0x20fc0); 3504 case 114: 3505 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 3506 case 115: 3507 return (SH_E0(eax) || JH_E1(eax)); 3508 case 116: 3509 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 3510 case 117: 3511 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3512 case 118: 3513 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 3514 JH_E6(eax)); 3515 case 121: 3516 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3517 case 122: 3518 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 3519 case 123: 3520 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 3521 case 131: 3522 return (cpi->cpi_family < 0x10); 3523 case 6336786: 3524 /* 3525 * Test for AdvPowerMgmtInfo.TscPStateInvariant 3526 * if this is a K8 family or newer processor 3527 */ 3528 if (CPI_FAMILY(cpi) == 0xf) { 3529 struct cpuid_regs regs; 3530 regs.cp_eax = 0x80000007; 3531 (void) __cpuid_insn(®s); 3532 return (!(regs.cp_edx & 0x100)); 3533 } 3534 return (0); 3535 case 6323525: 3536 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 3537 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 3538 3539 case 6671130: 3540 /* 3541 * check for processors (pre-Shanghai) that do not provide 3542 * optimal management of 1gb ptes in its tlb. 3543 */ 3544 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 3545 3546 case 298: 3547 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 3548 DR_B2(eax) || RB_C0(eax)); 3549 3550 case 721: 3551 #if defined(__amd64) 3552 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12); 3553 #else 3554 return (0); 3555 #endif 3556 3557 default: 3558 return (-1); 3559 3560 } 3561 } 3562 3563 /* 3564 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 3565 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 3566 */ 3567 int 3568 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 3569 { 3570 struct cpuid_info *cpi; 3571 uint_t osvwid; 3572 static int osvwfeature = -1; 3573 uint64_t osvwlength; 3574 3575 3576 cpi = cpu->cpu_m.mcpu_cpi; 3577 3578 /* confirm OSVW supported */ 3579 if (osvwfeature == -1) { 3580 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 3581 } else { 3582 /* assert that osvw feature setting is consistent on all cpus */ 3583 ASSERT(osvwfeature == 3584 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 3585 } 3586 if (!osvwfeature) 3587 return (-1); 3588 3589 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 3590 3591 switch (erratum) { 3592 case 298: /* osvwid is 0 */ 3593 osvwid = 0; 3594 if (osvwlength <= (uint64_t)osvwid) { 3595 /* osvwid 0 is unknown */ 3596 return (-1); 3597 } 3598 3599 /* 3600 * Check the OSVW STATUS MSR to determine the state 3601 * of the erratum where: 3602 * 0 - fixed by HW 3603 * 1 - BIOS has applied the workaround when BIOS 3604 * workaround is available. (Or for other errata, 3605 * OS workaround is required.) 3606 * For a value of 1, caller will confirm that the 3607 * erratum 298 workaround has indeed been applied by BIOS. 3608 * 3609 * A 1 may be set in cpus that have a HW fix 3610 * in a mixed cpu system. Regarding erratum 298: 3611 * In a multiprocessor platform, the workaround above 3612 * should be applied to all processors regardless of 3613 * silicon revision when an affected processor is 3614 * present. 3615 */ 3616 3617 return (rdmsr(MSR_AMD_OSVW_STATUS + 3618 (osvwid / OSVW_ID_CNT_PER_MSR)) & 3619 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 3620 3621 default: 3622 return (-1); 3623 } 3624 } 3625 3626 static const char assoc_str[] = "associativity"; 3627 static const char line_str[] = "line-size"; 3628 static const char size_str[] = "size"; 3629 3630 static void 3631 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 3632 uint32_t val) 3633 { 3634 char buf[128]; 3635 3636 /* 3637 * ndi_prop_update_int() is used because it is desirable for 3638 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 3639 */ 3640 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 3641 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 3642 } 3643 3644 /* 3645 * Intel-style cache/tlb description 3646 * 3647 * Standard cpuid level 2 gives a randomly ordered 3648 * selection of tags that index into a table that describes 3649 * cache and tlb properties. 3650 */ 3651 3652 static const char l1_icache_str[] = "l1-icache"; 3653 static const char l1_dcache_str[] = "l1-dcache"; 3654 static const char l2_cache_str[] = "l2-cache"; 3655 static const char l3_cache_str[] = "l3-cache"; 3656 static const char itlb4k_str[] = "itlb-4K"; 3657 static const char dtlb4k_str[] = "dtlb-4K"; 3658 static const char itlb2M_str[] = "itlb-2M"; 3659 static const char itlb4M_str[] = "itlb-4M"; 3660 static const char dtlb4M_str[] = "dtlb-4M"; 3661 static const char dtlb24_str[] = "dtlb0-2M-4M"; 3662 static const char itlb424_str[] = "itlb-4K-2M-4M"; 3663 static const char itlb24_str[] = "itlb-2M-4M"; 3664 static const char dtlb44_str[] = "dtlb-4K-4M"; 3665 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 3666 static const char sl2_cache_str[] = "sectored-l2-cache"; 3667 static const char itrace_str[] = "itrace-cache"; 3668 static const char sl3_cache_str[] = "sectored-l3-cache"; 3669 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 3670 3671 static const struct cachetab { 3672 uint8_t ct_code; 3673 uint8_t ct_assoc; 3674 uint16_t ct_line_size; 3675 size_t ct_size; 3676 const char *ct_label; 3677 } intel_ctab[] = { 3678 /* 3679 * maintain descending order! 3680 * 3681 * Codes ignored - Reason 3682 * ---------------------- 3683 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3684 * f0H/f1H - Currently we do not interpret prefetch size by design 3685 */ 3686 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3687 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3688 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3689 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3690 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3691 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3692 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3693 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3694 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3695 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3696 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3697 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3698 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3699 { 0xc0, 4, 0, 8, dtlb44_str }, 3700 { 0xba, 4, 0, 64, dtlb4k_str }, 3701 { 0xb4, 4, 0, 256, dtlb4k_str }, 3702 { 0xb3, 4, 0, 128, dtlb4k_str }, 3703 { 0xb2, 4, 0, 64, itlb4k_str }, 3704 { 0xb0, 4, 0, 128, itlb4k_str }, 3705 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3706 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3707 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3708 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3709 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3710 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3711 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3712 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3713 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3714 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3715 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3716 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3717 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3718 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3719 { 0x73, 8, 0, 64*1024, itrace_str}, 3720 { 0x72, 8, 0, 32*1024, itrace_str}, 3721 { 0x71, 8, 0, 16*1024, itrace_str}, 3722 { 0x70, 8, 0, 12*1024, itrace_str}, 3723 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3724 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3725 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3726 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3727 { 0x5d, 0, 0, 256, dtlb44_str}, 3728 { 0x5c, 0, 0, 128, dtlb44_str}, 3729 { 0x5b, 0, 0, 64, dtlb44_str}, 3730 { 0x5a, 4, 0, 32, dtlb24_str}, 3731 { 0x59, 0, 0, 16, dtlb4k_str}, 3732 { 0x57, 4, 0, 16, dtlb4k_str}, 3733 { 0x56, 4, 0, 16, dtlb4M_str}, 3734 { 0x55, 0, 0, 7, itlb24_str}, 3735 { 0x52, 0, 0, 256, itlb424_str}, 3736 { 0x51, 0, 0, 128, itlb424_str}, 3737 { 0x50, 0, 0, 64, itlb424_str}, 3738 { 0x4f, 0, 0, 32, itlb4k_str}, 3739 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3740 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3741 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3742 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3743 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3744 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3745 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3746 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3747 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3748 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3749 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3750 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3751 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3752 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3753 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3754 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3755 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3756 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3757 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3758 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3759 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3760 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3761 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3762 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3763 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3764 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3765 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3766 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3767 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3768 { 0x0b, 4, 0, 4, itlb4M_str}, 3769 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3770 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3771 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3772 { 0x05, 4, 0, 32, dtlb4M_str}, 3773 { 0x04, 4, 0, 8, dtlb4M_str}, 3774 { 0x03, 4, 0, 64, dtlb4k_str}, 3775 { 0x02, 4, 0, 2, itlb4M_str}, 3776 { 0x01, 4, 0, 32, itlb4k_str}, 3777 { 0 } 3778 }; 3779 3780 static const struct cachetab cyrix_ctab[] = { 3781 { 0x70, 4, 0, 32, "tlb-4K" }, 3782 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3783 { 0 } 3784 }; 3785 3786 /* 3787 * Search a cache table for a matching entry 3788 */ 3789 static const struct cachetab * 3790 find_cacheent(const struct cachetab *ct, uint_t code) 3791 { 3792 if (code != 0) { 3793 for (; ct->ct_code != 0; ct++) 3794 if (ct->ct_code <= code) 3795 break; 3796 if (ct->ct_code == code) 3797 return (ct); 3798 } 3799 return (NULL); 3800 } 3801 3802 /* 3803 * Populate cachetab entry with L2 or L3 cache-information using 3804 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3805 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3806 * information is found. 3807 */ 3808 static int 3809 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3810 { 3811 uint32_t level, i; 3812 int ret = 0; 3813 3814 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3815 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3816 3817 if (level == 2 || level == 3) { 3818 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3819 ct->ct_line_size = 3820 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3821 ct->ct_size = ct->ct_assoc * 3822 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3823 ct->ct_line_size * 3824 (cpi->cpi_std_4[i]->cp_ecx + 1); 3825 3826 if (level == 2) { 3827 ct->ct_label = l2_cache_str; 3828 } else if (level == 3) { 3829 ct->ct_label = l3_cache_str; 3830 } 3831 ret = 1; 3832 } 3833 } 3834 3835 return (ret); 3836 } 3837 3838 /* 3839 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3840 * The walk is terminated if the walker returns non-zero. 3841 */ 3842 static void 3843 intel_walk_cacheinfo(struct cpuid_info *cpi, 3844 void *arg, int (*func)(void *, const struct cachetab *)) 3845 { 3846 const struct cachetab *ct; 3847 struct cachetab des_49_ct, des_b1_ct; 3848 uint8_t *dp; 3849 int i; 3850 3851 if ((dp = cpi->cpi_cacheinfo) == NULL) 3852 return; 3853 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3854 /* 3855 * For overloaded descriptor 0x49 we use cpuid function 4 3856 * if supported by the current processor, to create 3857 * cache information. 3858 * For overloaded descriptor 0xb1 we use X86_PAE flag 3859 * to disambiguate the cache information. 3860 */ 3861 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3862 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3863 ct = &des_49_ct; 3864 } else if (*dp == 0xb1) { 3865 des_b1_ct.ct_code = 0xb1; 3866 des_b1_ct.ct_assoc = 4; 3867 des_b1_ct.ct_line_size = 0; 3868 if (is_x86_feature(x86_featureset, X86FSET_PAE)) { 3869 des_b1_ct.ct_size = 8; 3870 des_b1_ct.ct_label = itlb2M_str; 3871 } else { 3872 des_b1_ct.ct_size = 4; 3873 des_b1_ct.ct_label = itlb4M_str; 3874 } 3875 ct = &des_b1_ct; 3876 } else { 3877 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3878 continue; 3879 } 3880 } 3881 3882 if (func(arg, ct) != 0) { 3883 break; 3884 } 3885 } 3886 } 3887 3888 /* 3889 * (Like the Intel one, except for Cyrix CPUs) 3890 */ 3891 static void 3892 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3893 void *arg, int (*func)(void *, const struct cachetab *)) 3894 { 3895 const struct cachetab *ct; 3896 uint8_t *dp; 3897 int i; 3898 3899 if ((dp = cpi->cpi_cacheinfo) == NULL) 3900 return; 3901 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3902 /* 3903 * Search Cyrix-specific descriptor table first .. 3904 */ 3905 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3906 if (func(arg, ct) != 0) 3907 break; 3908 continue; 3909 } 3910 /* 3911 * .. else fall back to the Intel one 3912 */ 3913 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3914 if (func(arg, ct) != 0) 3915 break; 3916 continue; 3917 } 3918 } 3919 } 3920 3921 /* 3922 * A cacheinfo walker that adds associativity, line-size, and size properties 3923 * to the devinfo node it is passed as an argument. 3924 */ 3925 static int 3926 add_cacheent_props(void *arg, const struct cachetab *ct) 3927 { 3928 dev_info_t *devi = arg; 3929 3930 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3931 if (ct->ct_line_size != 0) 3932 add_cache_prop(devi, ct->ct_label, line_str, 3933 ct->ct_line_size); 3934 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3935 return (0); 3936 } 3937 3938 3939 static const char fully_assoc[] = "fully-associative?"; 3940 3941 /* 3942 * AMD style cache/tlb description 3943 * 3944 * Extended functions 5 and 6 directly describe properties of 3945 * tlbs and various cache levels. 3946 */ 3947 static void 3948 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3949 { 3950 switch (assoc) { 3951 case 0: /* reserved; ignore */ 3952 break; 3953 default: 3954 add_cache_prop(devi, label, assoc_str, assoc); 3955 break; 3956 case 0xff: 3957 add_cache_prop(devi, label, fully_assoc, 1); 3958 break; 3959 } 3960 } 3961 3962 static void 3963 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3964 { 3965 if (size == 0) 3966 return; 3967 add_cache_prop(devi, label, size_str, size); 3968 add_amd_assoc(devi, label, assoc); 3969 } 3970 3971 static void 3972 add_amd_cache(dev_info_t *devi, const char *label, 3973 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3974 { 3975 if (size == 0 || line_size == 0) 3976 return; 3977 add_amd_assoc(devi, label, assoc); 3978 /* 3979 * Most AMD parts have a sectored cache. Multiple cache lines are 3980 * associated with each tag. A sector consists of all cache lines 3981 * associated with a tag. For example, the AMD K6-III has a sector 3982 * size of 2 cache lines per tag. 3983 */ 3984 if (lines_per_tag != 0) 3985 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3986 add_cache_prop(devi, label, line_str, line_size); 3987 add_cache_prop(devi, label, size_str, size * 1024); 3988 } 3989 3990 static void 3991 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3992 { 3993 switch (assoc) { 3994 case 0: /* off */ 3995 break; 3996 case 1: 3997 case 2: 3998 case 4: 3999 add_cache_prop(devi, label, assoc_str, assoc); 4000 break; 4001 case 6: 4002 add_cache_prop(devi, label, assoc_str, 8); 4003 break; 4004 case 8: 4005 add_cache_prop(devi, label, assoc_str, 16); 4006 break; 4007 case 0xf: 4008 add_cache_prop(devi, label, fully_assoc, 1); 4009 break; 4010 default: /* reserved; ignore */ 4011 break; 4012 } 4013 } 4014 4015 static void 4016 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 4017 { 4018 if (size == 0 || assoc == 0) 4019 return; 4020 add_amd_l2_assoc(devi, label, assoc); 4021 add_cache_prop(devi, label, size_str, size); 4022 } 4023 4024 static void 4025 add_amd_l2_cache(dev_info_t *devi, const char *label, 4026 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 4027 { 4028 if (size == 0 || assoc == 0 || line_size == 0) 4029 return; 4030 add_amd_l2_assoc(devi, label, assoc); 4031 if (lines_per_tag != 0) 4032 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 4033 add_cache_prop(devi, label, line_str, line_size); 4034 add_cache_prop(devi, label, size_str, size * 1024); 4035 } 4036 4037 static void 4038 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 4039 { 4040 struct cpuid_regs *cp; 4041 4042 if (cpi->cpi_xmaxeax < 0x80000005) 4043 return; 4044 cp = &cpi->cpi_extd[5]; 4045 4046 /* 4047 * 4M/2M L1 TLB configuration 4048 * 4049 * We report the size for 2M pages because AMD uses two 4050 * TLB entries for one 4M page. 4051 */ 4052 add_amd_tlb(devi, "dtlb-2M", 4053 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 4054 add_amd_tlb(devi, "itlb-2M", 4055 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 4056 4057 /* 4058 * 4K L1 TLB configuration 4059 */ 4060 4061 switch (cpi->cpi_vendor) { 4062 uint_t nentries; 4063 case X86_VENDOR_TM: 4064 if (cpi->cpi_family >= 5) { 4065 /* 4066 * Crusoe processors have 256 TLB entries, but 4067 * cpuid data format constrains them to only 4068 * reporting 255 of them. 4069 */ 4070 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 4071 nentries = 256; 4072 /* 4073 * Crusoe processors also have a unified TLB 4074 */ 4075 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 4076 nentries); 4077 break; 4078 } 4079 /*FALLTHROUGH*/ 4080 default: 4081 add_amd_tlb(devi, itlb4k_str, 4082 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 4083 add_amd_tlb(devi, dtlb4k_str, 4084 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 4085 break; 4086 } 4087 4088 /* 4089 * data L1 cache configuration 4090 */ 4091 4092 add_amd_cache(devi, l1_dcache_str, 4093 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 4094 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 4095 4096 /* 4097 * code L1 cache configuration 4098 */ 4099 4100 add_amd_cache(devi, l1_icache_str, 4101 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 4102 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 4103 4104 if (cpi->cpi_xmaxeax < 0x80000006) 4105 return; 4106 cp = &cpi->cpi_extd[6]; 4107 4108 /* Check for a unified L2 TLB for large pages */ 4109 4110 if (BITX(cp->cp_eax, 31, 16) == 0) 4111 add_amd_l2_tlb(devi, "l2-tlb-2M", 4112 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4113 else { 4114 add_amd_l2_tlb(devi, "l2-dtlb-2M", 4115 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 4116 add_amd_l2_tlb(devi, "l2-itlb-2M", 4117 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4118 } 4119 4120 /* Check for a unified L2 TLB for 4K pages */ 4121 4122 if (BITX(cp->cp_ebx, 31, 16) == 0) { 4123 add_amd_l2_tlb(devi, "l2-tlb-4K", 4124 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4125 } else { 4126 add_amd_l2_tlb(devi, "l2-dtlb-4K", 4127 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 4128 add_amd_l2_tlb(devi, "l2-itlb-4K", 4129 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4130 } 4131 4132 add_amd_l2_cache(devi, l2_cache_str, 4133 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 4134 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 4135 } 4136 4137 /* 4138 * There are two basic ways that the x86 world describes it cache 4139 * and tlb architecture - Intel's way and AMD's way. 4140 * 4141 * Return which flavor of cache architecture we should use 4142 */ 4143 static int 4144 x86_which_cacheinfo(struct cpuid_info *cpi) 4145 { 4146 switch (cpi->cpi_vendor) { 4147 case X86_VENDOR_Intel: 4148 if (cpi->cpi_maxeax >= 2) 4149 return (X86_VENDOR_Intel); 4150 break; 4151 case X86_VENDOR_AMD: 4152 /* 4153 * The K5 model 1 was the first part from AMD that reported 4154 * cache sizes via extended cpuid functions. 4155 */ 4156 if (cpi->cpi_family > 5 || 4157 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 4158 return (X86_VENDOR_AMD); 4159 break; 4160 case X86_VENDOR_TM: 4161 if (cpi->cpi_family >= 5) 4162 return (X86_VENDOR_AMD); 4163 /*FALLTHROUGH*/ 4164 default: 4165 /* 4166 * If they have extended CPU data for 0x80000005 4167 * then we assume they have AMD-format cache 4168 * information. 4169 * 4170 * If not, and the vendor happens to be Cyrix, 4171 * then try our-Cyrix specific handler. 4172 * 4173 * If we're not Cyrix, then assume we're using Intel's 4174 * table-driven format instead. 4175 */ 4176 if (cpi->cpi_xmaxeax >= 0x80000005) 4177 return (X86_VENDOR_AMD); 4178 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 4179 return (X86_VENDOR_Cyrix); 4180 else if (cpi->cpi_maxeax >= 2) 4181 return (X86_VENDOR_Intel); 4182 break; 4183 } 4184 return (-1); 4185 } 4186 4187 void 4188 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 4189 struct cpuid_info *cpi) 4190 { 4191 dev_info_t *cpu_devi; 4192 int create; 4193 4194 cpu_devi = (dev_info_t *)dip; 4195 4196 /* device_type */ 4197 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 4198 "device_type", "cpu"); 4199 4200 /* reg */ 4201 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4202 "reg", cpu_id); 4203 4204 /* cpu-mhz, and clock-frequency */ 4205 if (cpu_freq > 0) { 4206 long long mul; 4207 4208 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4209 "cpu-mhz", cpu_freq); 4210 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 4211 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4212 "clock-frequency", (int)mul); 4213 } 4214 4215 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) { 4216 return; 4217 } 4218 4219 /* vendor-id */ 4220 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 4221 "vendor-id", cpi->cpi_vendorstr); 4222 4223 if (cpi->cpi_maxeax == 0) { 4224 return; 4225 } 4226 4227 /* 4228 * family, model, and step 4229 */ 4230 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4231 "family", CPI_FAMILY(cpi)); 4232 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4233 "cpu-model", CPI_MODEL(cpi)); 4234 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4235 "stepping-id", CPI_STEP(cpi)); 4236 4237 /* type */ 4238 switch (cpi->cpi_vendor) { 4239 case X86_VENDOR_Intel: 4240 create = 1; 4241 break; 4242 default: 4243 create = 0; 4244 break; 4245 } 4246 if (create) 4247 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4248 "type", CPI_TYPE(cpi)); 4249 4250 /* ext-family */ 4251 switch (cpi->cpi_vendor) { 4252 case X86_VENDOR_Intel: 4253 case X86_VENDOR_AMD: 4254 create = cpi->cpi_family >= 0xf; 4255 break; 4256 default: 4257 create = 0; 4258 break; 4259 } 4260 if (create) 4261 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4262 "ext-family", CPI_FAMILY_XTD(cpi)); 4263 4264 /* ext-model */ 4265 switch (cpi->cpi_vendor) { 4266 case X86_VENDOR_Intel: 4267 create = IS_EXTENDED_MODEL_INTEL(cpi); 4268 break; 4269 case X86_VENDOR_AMD: 4270 create = CPI_FAMILY(cpi) == 0xf; 4271 break; 4272 default: 4273 create = 0; 4274 break; 4275 } 4276 if (create) 4277 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4278 "ext-model", CPI_MODEL_XTD(cpi)); 4279 4280 /* generation */ 4281 switch (cpi->cpi_vendor) { 4282 case X86_VENDOR_AMD: 4283 /* 4284 * AMD K5 model 1 was the first part to support this 4285 */ 4286 create = cpi->cpi_xmaxeax >= 0x80000001; 4287 break; 4288 default: 4289 create = 0; 4290 break; 4291 } 4292 if (create) 4293 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4294 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 4295 4296 /* brand-id */ 4297 switch (cpi->cpi_vendor) { 4298 case X86_VENDOR_Intel: 4299 /* 4300 * brand id first appeared on Pentium III Xeon model 8, 4301 * and Celeron model 8 processors and Opteron 4302 */ 4303 create = cpi->cpi_family > 6 || 4304 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 4305 break; 4306 case X86_VENDOR_AMD: 4307 create = cpi->cpi_family >= 0xf; 4308 break; 4309 default: 4310 create = 0; 4311 break; 4312 } 4313 if (create && cpi->cpi_brandid != 0) { 4314 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4315 "brand-id", cpi->cpi_brandid); 4316 } 4317 4318 /* chunks, and apic-id */ 4319 switch (cpi->cpi_vendor) { 4320 /* 4321 * first available on Pentium IV and Opteron (K8) 4322 */ 4323 case X86_VENDOR_Intel: 4324 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 4325 break; 4326 case X86_VENDOR_AMD: 4327 create = cpi->cpi_family >= 0xf; 4328 break; 4329 default: 4330 create = 0; 4331 break; 4332 } 4333 if (create) { 4334 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4335 "chunks", CPI_CHUNKS(cpi)); 4336 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4337 "apic-id", cpi->cpi_apicid); 4338 if (cpi->cpi_chipid >= 0) { 4339 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4340 "chip#", cpi->cpi_chipid); 4341 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4342 "clog#", cpi->cpi_clogid); 4343 } 4344 } 4345 4346 /* cpuid-features */ 4347 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4348 "cpuid-features", CPI_FEATURES_EDX(cpi)); 4349 4350 4351 /* cpuid-features-ecx */ 4352 switch (cpi->cpi_vendor) { 4353 case X86_VENDOR_Intel: 4354 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 4355 break; 4356 case X86_VENDOR_AMD: 4357 create = cpi->cpi_family >= 0xf; 4358 break; 4359 default: 4360 create = 0; 4361 break; 4362 } 4363 if (create) 4364 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4365 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 4366 4367 /* ext-cpuid-features */ 4368 switch (cpi->cpi_vendor) { 4369 case X86_VENDOR_Intel: 4370 case X86_VENDOR_AMD: 4371 case X86_VENDOR_Cyrix: 4372 case X86_VENDOR_TM: 4373 case X86_VENDOR_Centaur: 4374 create = cpi->cpi_xmaxeax >= 0x80000001; 4375 break; 4376 default: 4377 create = 0; 4378 break; 4379 } 4380 if (create) { 4381 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4382 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 4383 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4384 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 4385 } 4386 4387 /* 4388 * Brand String first appeared in Intel Pentium IV, AMD K5 4389 * model 1, and Cyrix GXm. On earlier models we try and 4390 * simulate something similar .. so this string should always 4391 * same -something- about the processor, however lame. 4392 */ 4393 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 4394 "brand-string", cpi->cpi_brandstr); 4395 4396 /* 4397 * Finally, cache and tlb information 4398 */ 4399 switch (x86_which_cacheinfo(cpi)) { 4400 case X86_VENDOR_Intel: 4401 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 4402 break; 4403 case X86_VENDOR_Cyrix: 4404 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 4405 break; 4406 case X86_VENDOR_AMD: 4407 amd_cache_info(cpi, cpu_devi); 4408 break; 4409 default: 4410 break; 4411 } 4412 } 4413 4414 struct l2info { 4415 int *l2i_csz; 4416 int *l2i_lsz; 4417 int *l2i_assoc; 4418 int l2i_ret; 4419 }; 4420 4421 /* 4422 * A cacheinfo walker that fetches the size, line-size and associativity 4423 * of the L2 cache 4424 */ 4425 static int 4426 intel_l2cinfo(void *arg, const struct cachetab *ct) 4427 { 4428 struct l2info *l2i = arg; 4429 int *ip; 4430 4431 if (ct->ct_label != l2_cache_str && 4432 ct->ct_label != sl2_cache_str) 4433 return (0); /* not an L2 -- keep walking */ 4434 4435 if ((ip = l2i->l2i_csz) != NULL) 4436 *ip = ct->ct_size; 4437 if ((ip = l2i->l2i_lsz) != NULL) 4438 *ip = ct->ct_line_size; 4439 if ((ip = l2i->l2i_assoc) != NULL) 4440 *ip = ct->ct_assoc; 4441 l2i->l2i_ret = ct->ct_size; 4442 return (1); /* was an L2 -- terminate walk */ 4443 } 4444 4445 /* 4446 * AMD L2/L3 Cache and TLB Associativity Field Definition: 4447 * 4448 * Unlike the associativity for the L1 cache and tlb where the 8 bit 4449 * value is the associativity, the associativity for the L2 cache and 4450 * tlb is encoded in the following table. The 4 bit L2 value serves as 4451 * an index into the amd_afd[] array to determine the associativity. 4452 * -1 is undefined. 0 is fully associative. 4453 */ 4454 4455 static int amd_afd[] = 4456 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 4457 4458 static void 4459 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 4460 { 4461 struct cpuid_regs *cp; 4462 uint_t size, assoc; 4463 int i; 4464 int *ip; 4465 4466 if (cpi->cpi_xmaxeax < 0x80000006) 4467 return; 4468 cp = &cpi->cpi_extd[6]; 4469 4470 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 4471 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 4472 uint_t cachesz = size * 1024; 4473 assoc = amd_afd[i]; 4474 4475 ASSERT(assoc != -1); 4476 4477 if ((ip = l2i->l2i_csz) != NULL) 4478 *ip = cachesz; 4479 if ((ip = l2i->l2i_lsz) != NULL) 4480 *ip = BITX(cp->cp_ecx, 7, 0); 4481 if ((ip = l2i->l2i_assoc) != NULL) 4482 *ip = assoc; 4483 l2i->l2i_ret = cachesz; 4484 } 4485 } 4486 4487 int 4488 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 4489 { 4490 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4491 struct l2info __l2info, *l2i = &__l2info; 4492 4493 l2i->l2i_csz = csz; 4494 l2i->l2i_lsz = lsz; 4495 l2i->l2i_assoc = assoc; 4496 l2i->l2i_ret = -1; 4497 4498 switch (x86_which_cacheinfo(cpi)) { 4499 case X86_VENDOR_Intel: 4500 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 4501 break; 4502 case X86_VENDOR_Cyrix: 4503 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 4504 break; 4505 case X86_VENDOR_AMD: 4506 amd_l2cacheinfo(cpi, l2i); 4507 break; 4508 default: 4509 break; 4510 } 4511 return (l2i->l2i_ret); 4512 } 4513 4514 #if !defined(__xpv) 4515 4516 uint32_t * 4517 cpuid_mwait_alloc(cpu_t *cpu) 4518 { 4519 uint32_t *ret; 4520 size_t mwait_size; 4521 4522 ASSERT(cpuid_checkpass(CPU, 2)); 4523 4524 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 4525 if (mwait_size == 0) 4526 return (NULL); 4527 4528 /* 4529 * kmem_alloc() returns cache line size aligned data for mwait_size 4530 * allocations. mwait_size is currently cache line sized. Neither 4531 * of these implementation details are guarantied to be true in the 4532 * future. 4533 * 4534 * First try allocating mwait_size as kmem_alloc() currently returns 4535 * correctly aligned memory. If kmem_alloc() does not return 4536 * mwait_size aligned memory, then use mwait_size ROUNDUP. 4537 * 4538 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 4539 * decide to free this memory. 4540 */ 4541 ret = kmem_zalloc(mwait_size, KM_SLEEP); 4542 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 4543 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4544 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 4545 *ret = MWAIT_RUNNING; 4546 return (ret); 4547 } else { 4548 kmem_free(ret, mwait_size); 4549 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 4550 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4551 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 4552 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 4553 *ret = MWAIT_RUNNING; 4554 return (ret); 4555 } 4556 } 4557 4558 void 4559 cpuid_mwait_free(cpu_t *cpu) 4560 { 4561 if (cpu->cpu_m.mcpu_cpi == NULL) { 4562 return; 4563 } 4564 4565 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 4566 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 4567 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 4568 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 4569 } 4570 4571 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 4572 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 4573 } 4574 4575 void 4576 patch_tsc_read(int flag) 4577 { 4578 size_t cnt; 4579 4580 switch (flag) { 4581 case TSC_NONE: 4582 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 4583 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 4584 break; 4585 case TSC_RDTSC_MFENCE: 4586 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 4587 (void) memcpy((void *)tsc_read, 4588 (void *)&_tsc_mfence_start, cnt); 4589 break; 4590 case TSC_RDTSC_LFENCE: 4591 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 4592 (void) memcpy((void *)tsc_read, 4593 (void *)&_tsc_lfence_start, cnt); 4594 break; 4595 case TSC_TSCP: 4596 cnt = &_tscp_end - &_tscp_start; 4597 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 4598 break; 4599 default: 4600 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */ 4601 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag); 4602 break; 4603 } 4604 tsc_type = flag; 4605 } 4606 4607 int 4608 cpuid_deep_cstates_supported(void) 4609 { 4610 struct cpuid_info *cpi; 4611 struct cpuid_regs regs; 4612 4613 ASSERT(cpuid_checkpass(CPU, 1)); 4614 4615 cpi = CPU->cpu_m.mcpu_cpi; 4616 4617 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) 4618 return (0); 4619 4620 switch (cpi->cpi_vendor) { 4621 case X86_VENDOR_Intel: 4622 if (cpi->cpi_xmaxeax < 0x80000007) 4623 return (0); 4624 4625 /* 4626 * TSC run at a constant rate in all ACPI C-states? 4627 */ 4628 regs.cp_eax = 0x80000007; 4629 (void) __cpuid_insn(®s); 4630 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 4631 4632 default: 4633 return (0); 4634 } 4635 } 4636 4637 #endif /* !__xpv */ 4638 4639 void 4640 post_startup_cpu_fixups(void) 4641 { 4642 #ifndef __xpv 4643 /* 4644 * Some AMD processors support C1E state. Entering this state will 4645 * cause the local APIC timer to stop, which we can't deal with at 4646 * this time. 4647 */ 4648 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 4649 on_trap_data_t otd; 4650 uint64_t reg; 4651 4652 if (!on_trap(&otd, OT_DATA_ACCESS)) { 4653 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 4654 /* Disable C1E state if it is enabled by BIOS */ 4655 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 4656 AMD_ACTONCMPHALT_MASK) { 4657 reg &= ~(AMD_ACTONCMPHALT_MASK << 4658 AMD_ACTONCMPHALT_SHIFT); 4659 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 4660 } 4661 } 4662 no_trap(); 4663 } 4664 #endif /* !__xpv */ 4665 } 4666 4667 /* 4668 * Setup necessary registers to enable XSAVE feature on this processor. 4669 * This function needs to be called early enough, so that no xsave/xrstor 4670 * ops will execute on the processor before the MSRs are properly set up. 4671 * 4672 * Current implementation has the following assumption: 4673 * - cpuid_pass1() is done, so that X86 features are known. 4674 * - fpu_probe() is done, so that fp_save_mech is chosen. 4675 */ 4676 void 4677 xsave_setup_msr(cpu_t *cpu) 4678 { 4679 ASSERT(fp_save_mech == FP_XSAVE); 4680 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); 4681 4682 /* Enable OSXSAVE in CR4. */ 4683 setcr4(getcr4() | CR4_OSXSAVE); 4684 /* 4685 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report 4686 * correct value. 4687 */ 4688 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE; 4689 setup_xfem(); 4690 } 4691 4692 /* 4693 * Starting with the Westmere processor the local 4694 * APIC timer will continue running in all C-states, 4695 * including the deepest C-states. 4696 */ 4697 int 4698 cpuid_arat_supported(void) 4699 { 4700 struct cpuid_info *cpi; 4701 struct cpuid_regs regs; 4702 4703 ASSERT(cpuid_checkpass(CPU, 1)); 4704 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 4705 4706 cpi = CPU->cpu_m.mcpu_cpi; 4707 4708 switch (cpi->cpi_vendor) { 4709 case X86_VENDOR_Intel: 4710 /* 4711 * Always-running Local APIC Timer is 4712 * indicated by CPUID.6.EAX[2]. 4713 */ 4714 if (cpi->cpi_maxeax >= 6) { 4715 regs.cp_eax = 6; 4716 (void) cpuid_insn(NULL, ®s); 4717 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4718 } else { 4719 return (0); 4720 } 4721 default: 4722 return (0); 4723 } 4724 } 4725 4726 /* 4727 * Check support for Intel ENERGY_PERF_BIAS feature 4728 */ 4729 int 4730 cpuid_iepb_supported(struct cpu *cp) 4731 { 4732 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi; 4733 struct cpuid_regs regs; 4734 4735 ASSERT(cpuid_checkpass(cp, 1)); 4736 4737 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) || 4738 !(is_x86_feature(x86_featureset, X86FSET_MSR))) { 4739 return (0); 4740 } 4741 4742 /* 4743 * Intel ENERGY_PERF_BIAS MSR is indicated by 4744 * capability bit CPUID.6.ECX.3 4745 */ 4746 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6)) 4747 return (0); 4748 4749 regs.cp_eax = 0x6; 4750 (void) cpuid_insn(NULL, ®s); 4751 return (regs.cp_ecx & CPUID_EPB_SUPPORT); 4752 } 4753 4754 /* 4755 * Check support for TSC deadline timer 4756 * 4757 * TSC deadline timer provides a superior software programming 4758 * model over local APIC timer that eliminates "time drifts". 4759 * Instead of specifying a relative time, software specifies an 4760 * absolute time as the target at which the processor should 4761 * generate a timer event. 4762 */ 4763 int 4764 cpuid_deadline_tsc_supported(void) 4765 { 4766 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi; 4767 struct cpuid_regs regs; 4768 4769 ASSERT(cpuid_checkpass(CPU, 1)); 4770 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 4771 4772 switch (cpi->cpi_vendor) { 4773 case X86_VENDOR_Intel: 4774 if (cpi->cpi_maxeax >= 1) { 4775 regs.cp_eax = 1; 4776 (void) cpuid_insn(NULL, ®s); 4777 return (regs.cp_ecx & CPUID_DEADLINE_TSC); 4778 } else { 4779 return (0); 4780 } 4781 default: 4782 return (0); 4783 } 4784 } 4785 4786 #if defined(__amd64) && !defined(__xpv) 4787 /* 4788 * Patch in versions of bcopy for high performance Intel Nhm processors 4789 * and later... 4790 */ 4791 void 4792 patch_memops(uint_t vendor) 4793 { 4794 size_t cnt, i; 4795 caddr_t to, from; 4796 4797 if ((vendor == X86_VENDOR_Intel) && 4798 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) { 4799 cnt = &bcopy_patch_end - &bcopy_patch_start; 4800 to = &bcopy_ck_size; 4801 from = &bcopy_patch_start; 4802 for (i = 0; i < cnt; i++) { 4803 *to++ = *from++; 4804 } 4805 } 4806 } 4807 #endif /* __amd64 && !__xpv */ 4808 4809 /* 4810 * This function finds the number of bits to represent the number of cores per 4811 * chip and the number of strands per core for the Intel platforms. 4812 * It re-uses the x2APIC cpuid code of the cpuid_pass2(). 4813 */ 4814 void 4815 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits) 4816 { 4817 struct cpuid_regs regs; 4818 struct cpuid_regs *cp = ®s; 4819 4820 if (vendor != X86_VENDOR_Intel) { 4821 return; 4822 } 4823 4824 /* if the cpuid level is 0xB, extended topo is available. */ 4825 cp->cp_eax = 0; 4826 if (__cpuid_insn(cp) >= 0xB) { 4827 4828 cp->cp_eax = 0xB; 4829 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 4830 (void) __cpuid_insn(cp); 4831 4832 /* 4833 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 4834 * indicates that the extended topology enumeration leaf is 4835 * available. 4836 */ 4837 if (cp->cp_ebx) { 4838 uint_t coreid_shift = 0; 4839 uint_t chipid_shift = 0; 4840 uint_t i; 4841 uint_t level; 4842 4843 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 4844 cp->cp_eax = 0xB; 4845 cp->cp_ecx = i; 4846 4847 (void) __cpuid_insn(cp); 4848 level = CPI_CPU_LEVEL_TYPE(cp); 4849 4850 if (level == 1) { 4851 /* 4852 * Thread level processor topology 4853 * Number of bits shift right APIC ID 4854 * to get the coreid. 4855 */ 4856 coreid_shift = BITX(cp->cp_eax, 4, 0); 4857 } else if (level == 2) { 4858 /* 4859 * Core level processor topology 4860 * Number of bits shift right APIC ID 4861 * to get the chipid. 4862 */ 4863 chipid_shift = BITX(cp->cp_eax, 4, 0); 4864 } 4865 } 4866 4867 if (coreid_shift > 0 && chipid_shift > coreid_shift) { 4868 *strand_nbits = coreid_shift; 4869 *core_nbits = chipid_shift - coreid_shift; 4870 } 4871 } 4872 } 4873 } 4874