1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 by Delphix. All rights reserved. 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net> 26 */ 27 /* 28 * Copyright (c) 2010, Intel Corporation. 29 * All rights reserved. 30 */ 31 /* 32 * Portions Copyright 2009 Advanced Micro Devices, Inc. 33 */ 34 /* 35 * Copyright (c) 2015, Joyent, Inc. All rights reserved. 36 */ 37 /* 38 * Various routines to handle identification 39 * and classification of x86 processors. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/archsystm.h> 44 #include <sys/x86_archext.h> 45 #include <sys/kmem.h> 46 #include <sys/systm.h> 47 #include <sys/cmn_err.h> 48 #include <sys/sunddi.h> 49 #include <sys/sunndi.h> 50 #include <sys/cpuvar.h> 51 #include <sys/processor.h> 52 #include <sys/sysmacros.h> 53 #include <sys/pg.h> 54 #include <sys/fp.h> 55 #include <sys/controlregs.h> 56 #include <sys/bitmap.h> 57 #include <sys/auxv_386.h> 58 #include <sys/memnode.h> 59 #include <sys/pci_cfgspace.h> 60 61 #ifdef __xpv 62 #include <sys/hypervisor.h> 63 #else 64 #include <sys/ontrap.h> 65 #endif 66 67 /* 68 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 69 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 70 * them accordingly. For most modern processors, feature detection occurs here 71 * in pass 1. 72 * 73 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 74 * for the boot CPU and does the basic analysis that the early kernel needs. 75 * x86_featureset is set based on the return value of cpuid_pass1() of the boot 76 * CPU. 77 * 78 * Pass 1 includes: 79 * 80 * o Determining vendor/model/family/stepping and setting x86_type and 81 * x86_vendor accordingly. 82 * o Processing the feature flags returned by the cpuid instruction while 83 * applying any workarounds or tricks for the specific processor. 84 * o Mapping the feature flags into Solaris feature bits (X86_*). 85 * o Processing extended feature flags if supported by the processor, 86 * again while applying specific processor knowledge. 87 * o Determining the CMT characteristics of the system. 88 * 89 * Pass 1 is done on non-boot CPUs during their initialization and the results 90 * are used only as a meager attempt at ensuring that all processors within the 91 * system support the same features. 92 * 93 * Pass 2 of cpuid feature analysis happens just at the beginning 94 * of startup(). It just copies in and corrects the remainder 95 * of the cpuid data we depend on: standard cpuid functions that we didn't 96 * need for pass1 feature analysis, and extended cpuid functions beyond the 97 * simple feature processing done in pass1. 98 * 99 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 100 * particular kernel memory allocation has been made available. It creates a 101 * readable brand string based on the data collected in the first two passes. 102 * 103 * Pass 4 of cpuid analysis is invoked after post_startup() when all 104 * the support infrastructure for various hardware features has been 105 * initialized. It determines which processor features will be reported 106 * to userland via the aux vector. 107 * 108 * All passes are executed on all CPUs, but only the boot CPU determines what 109 * features the kernel will use. 110 * 111 * Much of the worst junk in this file is for the support of processors 112 * that didn't really implement the cpuid instruction properly. 113 * 114 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 115 * the pass numbers. Accordingly, changes to the pass code may require changes 116 * to the accessor code. 117 */ 118 119 uint_t x86_vendor = X86_VENDOR_IntelClone; 120 uint_t x86_type = X86_TYPE_OTHER; 121 uint_t x86_clflush_size = 0; 122 123 uint_t pentiumpro_bug4046376; 124 125 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)]; 126 127 static char *x86_feature_names[NUM_X86_FEATURES] = { 128 "lgpg", 129 "tsc", 130 "msr", 131 "mtrr", 132 "pge", 133 "de", 134 "cmov", 135 "mmx", 136 "mca", 137 "pae", 138 "cv8", 139 "pat", 140 "sep", 141 "sse", 142 "sse2", 143 "htt", 144 "asysc", 145 "nx", 146 "sse3", 147 "cx16", 148 "cmp", 149 "tscp", 150 "mwait", 151 "sse4a", 152 "cpuid", 153 "ssse3", 154 "sse4_1", 155 "sse4_2", 156 "1gpg", 157 "clfsh", 158 "64", 159 "aes", 160 "pclmulqdq", 161 "xsave", 162 "avx", 163 "vmx", 164 "svm", 165 "topoext", 166 "f16c", 167 "rdrand", 168 "x2apic", 169 "avx2", 170 "bmi1", 171 "bmi2", 172 "fma" 173 }; 174 175 boolean_t 176 is_x86_feature(void *featureset, uint_t feature) 177 { 178 ASSERT(feature < NUM_X86_FEATURES); 179 return (BT_TEST((ulong_t *)featureset, feature)); 180 } 181 182 void 183 add_x86_feature(void *featureset, uint_t feature) 184 { 185 ASSERT(feature < NUM_X86_FEATURES); 186 BT_SET((ulong_t *)featureset, feature); 187 } 188 189 void 190 remove_x86_feature(void *featureset, uint_t feature) 191 { 192 ASSERT(feature < NUM_X86_FEATURES); 193 BT_CLEAR((ulong_t *)featureset, feature); 194 } 195 196 boolean_t 197 compare_x86_featureset(void *setA, void *setB) 198 { 199 /* 200 * We assume that the unused bits of the bitmap are always zero. 201 */ 202 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) { 203 return (B_TRUE); 204 } else { 205 return (B_FALSE); 206 } 207 } 208 209 void 210 print_x86_featureset(void *featureset) 211 { 212 uint_t i; 213 214 for (i = 0; i < NUM_X86_FEATURES; i++) { 215 if (is_x86_feature(featureset, i)) { 216 cmn_err(CE_CONT, "?x86_feature: %s\n", 217 x86_feature_names[i]); 218 } 219 } 220 } 221 222 static size_t xsave_state_size = 0; 223 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE); 224 boolean_t xsave_force_disable = B_FALSE; 225 226 /* 227 * This is set to platform type we are running on. 228 */ 229 static int platform_type = -1; 230 231 #if !defined(__xpv) 232 /* 233 * Variable to patch if hypervisor platform detection needs to be 234 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 235 */ 236 int enable_platform_detection = 1; 237 #endif 238 239 /* 240 * monitor/mwait info. 241 * 242 * size_actual and buf_actual are the real address and size allocated to get 243 * proper mwait_buf alignement. buf_actual and size_actual should be passed 244 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 245 * processor cache-line alignment, but this is not guarantied in the furture. 246 */ 247 struct mwait_info { 248 size_t mon_min; /* min size to avoid missed wakeups */ 249 size_t mon_max; /* size to avoid false wakeups */ 250 size_t size_actual; /* size actually allocated */ 251 void *buf_actual; /* memory actually allocated */ 252 uint32_t support; /* processor support of monitor/mwait */ 253 }; 254 255 /* 256 * xsave/xrestor info. 257 * 258 * This structure contains HW feature bits and size of the xsave save area. 259 * Note: the kernel will use the maximum size required for all hardware 260 * features. It is not optimize for potential memory savings if features at 261 * the end of the save area are not enabled. 262 */ 263 struct xsave_info { 264 uint32_t xsav_hw_features_low; /* Supported HW features */ 265 uint32_t xsav_hw_features_high; /* Supported HW features */ 266 size_t xsav_max_size; /* max size save area for HW features */ 267 size_t ymm_size; /* AVX: size of ymm save area */ 268 size_t ymm_offset; /* AVX: offset for ymm save area */ 269 }; 270 271 272 /* 273 * These constants determine how many of the elements of the 274 * cpuid we cache in the cpuid_info data structure; the 275 * remaining elements are accessible via the cpuid instruction. 276 */ 277 278 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */ 279 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */ 280 281 /* 282 * Some terminology needs to be explained: 283 * - Socket: Something that can be plugged into a motherboard. 284 * - Package: Same as socket 285 * - Chip: Same as socket. Note that AMD's documentation uses term "chip" 286 * differently: there, chip is the same as processor node (below) 287 * - Processor node: Some AMD processors have more than one 288 * "subprocessor" embedded in a package. These subprocessors (nodes) 289 * are fully-functional processors themselves with cores, caches, 290 * memory controllers, PCI configuration spaces. They are connected 291 * inside the package with Hypertransport links. On single-node 292 * processors, processor node is equivalent to chip/socket/package. 293 * - Compute Unit: Some AMD processors pair cores in "compute units" that 294 * share the FPU and the I$ and L2 caches. 295 */ 296 297 struct cpuid_info { 298 uint_t cpi_pass; /* last pass completed */ 299 /* 300 * standard function information 301 */ 302 uint_t cpi_maxeax; /* fn 0: %eax */ 303 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 304 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 305 306 uint_t cpi_family; /* fn 1: extended family */ 307 uint_t cpi_model; /* fn 1: extended model */ 308 uint_t cpi_step; /* fn 1: stepping */ 309 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */ 310 /* AMD: package/socket # */ 311 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 312 int cpi_clogid; /* fn 1: %ebx: thread # */ 313 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 314 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 315 uint_t cpi_ncache; /* fn 2: number of elements */ 316 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 317 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 318 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 319 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 320 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */ 321 /* 322 * extended function information 323 */ 324 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 325 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 326 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 327 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 328 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */ 329 330 id_t cpi_coreid; /* same coreid => strands share core */ 331 int cpi_pkgcoreid; /* core number within single package */ 332 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 333 /* Intel: fn 4: %eax[31-26] */ 334 /* 335 * supported feature information 336 */ 337 uint32_t cpi_support[6]; 338 #define STD_EDX_FEATURES 0 339 #define AMD_EDX_FEATURES 1 340 #define TM_EDX_FEATURES 2 341 #define STD_ECX_FEATURES 3 342 #define AMD_ECX_FEATURES 4 343 #define STD_EBX_FEATURES 5 344 /* 345 * Synthesized information, where known. 346 */ 347 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 348 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 349 uint32_t cpi_socket; /* Chip package/socket type */ 350 351 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 352 uint32_t cpi_apicid; 353 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ 354 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ 355 /* Intel: 1 */ 356 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */ 357 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */ 358 359 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */ 360 }; 361 362 363 static struct cpuid_info cpuid_info0; 364 365 /* 366 * These bit fields are defined by the Intel Application Note AP-485 367 * "Intel Processor Identification and the CPUID Instruction" 368 */ 369 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 370 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 371 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 372 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 373 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 374 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 375 376 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 377 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 378 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 379 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 380 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx) 381 382 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 383 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 384 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 385 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 386 387 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 388 #define CPI_XMAXEAX_MAX 0x80000100 389 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 390 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 391 392 /* 393 * Function 4 (Deterministic Cache Parameters) macros 394 * Defined by Intel Application Note AP-485 395 */ 396 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 397 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 398 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 399 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 400 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 401 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 402 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 403 404 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 405 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 406 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 407 408 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 409 410 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 411 412 413 /* 414 * A couple of shorthand macros to identify "later" P6-family chips 415 * like the Pentium M and Core. First, the "older" P6-based stuff 416 * (loosely defined as "pre-Pentium-4"): 417 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 418 */ 419 420 #define IS_LEGACY_P6(cpi) ( \ 421 cpi->cpi_family == 6 && \ 422 (cpi->cpi_model == 1 || \ 423 cpi->cpi_model == 3 || \ 424 cpi->cpi_model == 5 || \ 425 cpi->cpi_model == 6 || \ 426 cpi->cpi_model == 7 || \ 427 cpi->cpi_model == 8 || \ 428 cpi->cpi_model == 0xA || \ 429 cpi->cpi_model == 0xB) \ 430 ) 431 432 /* A "new F6" is everything with family 6 that's not the above */ 433 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 434 435 /* Extended family/model support */ 436 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 437 cpi->cpi_family >= 0xf) 438 439 /* 440 * Info for monitor/mwait idle loop. 441 * 442 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 443 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 444 * 2006. 445 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 446 * Documentation Updates" #33633, Rev 2.05, December 2006. 447 */ 448 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 449 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 450 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 451 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 452 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 453 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 454 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 455 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 456 /* 457 * Number of sub-cstates for a given c-state. 458 */ 459 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 460 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 461 462 /* 463 * XSAVE leaf 0xD enumeration 464 */ 465 #define CPUID_LEAFD_2_YMM_OFFSET 576 466 #define CPUID_LEAFD_2_YMM_SIZE 256 467 468 /* 469 * Functions we consune from cpuid_subr.c; don't publish these in a header 470 * file to try and keep people using the expected cpuid_* interfaces. 471 */ 472 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 473 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 474 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 475 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 476 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 477 478 /* 479 * Apply up various platform-dependent restrictions where the 480 * underlying platform restrictions mean the CPU can be marked 481 * as less capable than its cpuid instruction would imply. 482 */ 483 #if defined(__xpv) 484 static void 485 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 486 { 487 switch (eax) { 488 case 1: { 489 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 490 0 : CPUID_INTC_EDX_MCA; 491 cp->cp_edx &= 492 ~(mcamask | 493 CPUID_INTC_EDX_PSE | 494 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 495 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 496 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 497 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 498 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 499 break; 500 } 501 502 case 0x80000001: 503 cp->cp_edx &= 504 ~(CPUID_AMD_EDX_PSE | 505 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 506 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 507 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 508 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 509 CPUID_AMD_EDX_TSCP); 510 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 511 break; 512 default: 513 break; 514 } 515 516 switch (vendor) { 517 case X86_VENDOR_Intel: 518 switch (eax) { 519 case 4: 520 /* 521 * Zero out the (ncores-per-chip - 1) field 522 */ 523 cp->cp_eax &= 0x03fffffff; 524 break; 525 default: 526 break; 527 } 528 break; 529 case X86_VENDOR_AMD: 530 switch (eax) { 531 532 case 0x80000001: 533 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 534 break; 535 536 case 0x80000008: 537 /* 538 * Zero out the (ncores-per-chip - 1) field 539 */ 540 cp->cp_ecx &= 0xffffff00; 541 break; 542 default: 543 break; 544 } 545 break; 546 default: 547 break; 548 } 549 } 550 #else 551 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 552 #endif 553 554 /* 555 * Some undocumented ways of patching the results of the cpuid 556 * instruction to permit running Solaris 10 on future cpus that 557 * we don't currently support. Could be set to non-zero values 558 * via settings in eeprom. 559 */ 560 561 uint32_t cpuid_feature_ecx_include; 562 uint32_t cpuid_feature_ecx_exclude; 563 uint32_t cpuid_feature_edx_include; 564 uint32_t cpuid_feature_edx_exclude; 565 566 /* 567 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs. 568 */ 569 void 570 cpuid_alloc_space(cpu_t *cpu) 571 { 572 /* 573 * By convention, cpu0 is the boot cpu, which is set up 574 * before memory allocation is available. All other cpus get 575 * their cpuid_info struct allocated here. 576 */ 577 ASSERT(cpu->cpu_id != 0); 578 ASSERT(cpu->cpu_m.mcpu_cpi == NULL); 579 cpu->cpu_m.mcpu_cpi = 580 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 581 } 582 583 void 584 cpuid_free_space(cpu_t *cpu) 585 { 586 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 587 int i; 588 589 ASSERT(cpi != NULL); 590 ASSERT(cpi != &cpuid_info0); 591 592 /* 593 * Free up any function 4 related dynamic storage 594 */ 595 for (i = 1; i < cpi->cpi_std_4_size; i++) 596 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 597 if (cpi->cpi_std_4_size > 0) 598 kmem_free(cpi->cpi_std_4, 599 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 600 601 kmem_free(cpi, sizeof (*cpi)); 602 cpu->cpu_m.mcpu_cpi = NULL; 603 } 604 605 #if !defined(__xpv) 606 /* 607 * Determine the type of the underlying platform. This is used to customize 608 * initialization of various subsystems (e.g. TSC). determine_platform() must 609 * only ever be called once to prevent two processors from seeing different 610 * values of platform_type. Must be called before cpuid_pass1(), the earliest 611 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv). 612 */ 613 void 614 determine_platform(void) 615 { 616 struct cpuid_regs cp; 617 uint32_t base; 618 uint32_t regs[4]; 619 char *hvstr = (char *)regs; 620 621 ASSERT(platform_type == -1); 622 623 platform_type = HW_NATIVE; 624 625 if (!enable_platform_detection) 626 return; 627 628 /* 629 * If Hypervisor CPUID bit is set, try to determine hypervisor 630 * vendor signature, and set platform type accordingly. 631 * 632 * References: 633 * http://lkml.org/lkml/2008/10/1/246 634 * http://kb.vmware.com/kb/1009458 635 */ 636 cp.cp_eax = 0x1; 637 (void) __cpuid_insn(&cp); 638 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) { 639 cp.cp_eax = 0x40000000; 640 (void) __cpuid_insn(&cp); 641 regs[0] = cp.cp_ebx; 642 regs[1] = cp.cp_ecx; 643 regs[2] = cp.cp_edx; 644 regs[3] = 0; 645 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) { 646 platform_type = HW_XEN_HVM; 647 return; 648 } 649 if (strcmp(hvstr, HVSIG_VMWARE) == 0) { 650 platform_type = HW_VMWARE; 651 return; 652 } 653 if (strcmp(hvstr, HVSIG_KVM) == 0) { 654 platform_type = HW_KVM; 655 return; 656 } 657 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0) 658 platform_type = HW_MICROSOFT; 659 } else { 660 /* 661 * Check older VMware hardware versions. VMware hypervisor is 662 * detected by performing an IN operation to VMware hypervisor 663 * port and checking that value returned in %ebx is VMware 664 * hypervisor magic value. 665 * 666 * References: http://kb.vmware.com/kb/1009458 667 */ 668 vmware_port(VMWARE_HVCMD_GETVERSION, regs); 669 if (regs[1] == VMWARE_HVMAGIC) { 670 platform_type = HW_VMWARE; 671 return; 672 } 673 } 674 675 /* 676 * Check Xen hypervisor. In a fully virtualized domain, 677 * Xen's pseudo-cpuid function returns a string representing the 678 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum 679 * supported cpuid function. We need at least a (base + 2) leaf value 680 * to do what we want to do. Try different base values, since the 681 * hypervisor might use a different one depending on whether Hyper-V 682 * emulation is switched on by default or not. 683 */ 684 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 685 cp.cp_eax = base; 686 (void) __cpuid_insn(&cp); 687 regs[0] = cp.cp_ebx; 688 regs[1] = cp.cp_ecx; 689 regs[2] = cp.cp_edx; 690 regs[3] = 0; 691 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 && 692 cp.cp_eax >= (base + 2)) { 693 platform_type &= ~HW_NATIVE; 694 platform_type |= HW_XEN_HVM; 695 return; 696 } 697 } 698 } 699 700 int 701 get_hwenv(void) 702 { 703 ASSERT(platform_type != -1); 704 return (platform_type); 705 } 706 707 int 708 is_controldom(void) 709 { 710 return (0); 711 } 712 713 #else 714 715 int 716 get_hwenv(void) 717 { 718 return (HW_XEN_PV); 719 } 720 721 int 722 is_controldom(void) 723 { 724 return (DOMAIN_IS_INITDOMAIN(xen_info)); 725 } 726 727 #endif /* __xpv */ 728 729 static void 730 cpuid_intel_getids(cpu_t *cpu, void *feature) 731 { 732 uint_t i; 733 uint_t chipid_shift = 0; 734 uint_t coreid_shift = 0; 735 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 736 737 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 738 chipid_shift++; 739 740 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift; 741 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1); 742 743 if (is_x86_feature(feature, X86FSET_CMP)) { 744 /* 745 * Multi-core (and possibly multi-threaded) 746 * processors. 747 */ 748 uint_t ncpu_per_core; 749 if (cpi->cpi_ncore_per_chip == 1) 750 ncpu_per_core = cpi->cpi_ncpu_per_chip; 751 else if (cpi->cpi_ncore_per_chip > 1) 752 ncpu_per_core = cpi->cpi_ncpu_per_chip / 753 cpi->cpi_ncore_per_chip; 754 /* 755 * 8bit APIC IDs on dual core Pentiums 756 * look like this: 757 * 758 * +-----------------------+------+------+ 759 * | Physical Package ID | MC | HT | 760 * +-----------------------+------+------+ 761 * <------- chipid --------> 762 * <------- coreid ---------------> 763 * <--- clogid --> 764 * <------> 765 * pkgcoreid 766 * 767 * Where the number of bits necessary to 768 * represent MC and HT fields together equals 769 * to the minimum number of bits necessary to 770 * store the value of cpi->cpi_ncpu_per_chip. 771 * Of those bits, the MC part uses the number 772 * of bits necessary to store the value of 773 * cpi->cpi_ncore_per_chip. 774 */ 775 for (i = 1; i < ncpu_per_core; i <<= 1) 776 coreid_shift++; 777 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift; 778 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 779 } else if (is_x86_feature(feature, X86FSET_HTT)) { 780 /* 781 * Single-core multi-threaded processors. 782 */ 783 cpi->cpi_coreid = cpi->cpi_chipid; 784 cpi->cpi_pkgcoreid = 0; 785 } 786 cpi->cpi_procnodeid = cpi->cpi_chipid; 787 cpi->cpi_compunitid = cpi->cpi_coreid; 788 } 789 790 static void 791 cpuid_amd_getids(cpu_t *cpu) 792 { 793 int i, first_half, coreidsz; 794 uint32_t nb_caps_reg; 795 uint_t node2_1; 796 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 797 struct cpuid_regs *cp; 798 799 /* 800 * AMD CMP chips currently have a single thread per core. 801 * 802 * Since no two cpus share a core we must assign a distinct coreid 803 * per cpu, and we do this by using the cpu_id. This scheme does not, 804 * however, guarantee that sibling cores of a chip will have sequential 805 * coreids starting at a multiple of the number of cores per chip - 806 * that is usually the case, but if the ACPI MADT table is presented 807 * in a different order then we need to perform a few more gymnastics 808 * for the pkgcoreid. 809 * 810 * All processors in the system have the same number of enabled 811 * cores. Cores within a processor are always numbered sequentially 812 * from 0 regardless of how many or which are disabled, and there 813 * is no way for operating system to discover the real core id when some 814 * are disabled. 815 * 816 * In family 0x15, the cores come in pairs called compute units. They 817 * share I$ and L2 caches and the FPU. Enumeration of this feature is 818 * simplified by the new topology extensions CPUID leaf, indicated by 819 * the X86 feature X86FSET_TOPOEXT. 820 */ 821 822 cpi->cpi_coreid = cpu->cpu_id; 823 cpi->cpi_compunitid = cpu->cpu_id; 824 825 if (cpi->cpi_xmaxeax >= 0x80000008) { 826 827 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 828 829 /* 830 * In AMD parlance chip is really a node while Solaris 831 * sees chip as equivalent to socket/package. 832 */ 833 cpi->cpi_ncore_per_chip = 834 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 835 if (coreidsz == 0) { 836 /* Use legacy method */ 837 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1) 838 coreidsz++; 839 if (coreidsz == 0) 840 coreidsz = 1; 841 } 842 } else { 843 /* Assume single-core part */ 844 cpi->cpi_ncore_per_chip = 1; 845 coreidsz = 1; 846 } 847 848 cpi->cpi_clogid = cpi->cpi_pkgcoreid = 849 cpi->cpi_apicid & ((1<<coreidsz) - 1); 850 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip; 851 852 /* Get node ID, compute unit ID */ 853 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 854 cpi->cpi_xmaxeax >= 0x8000001e) { 855 cp = &cpi->cpi_extd[0x1e]; 856 cp->cp_eax = 0x8000001e; 857 (void) __cpuid_insn(cp); 858 859 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1; 860 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0); 861 cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1; 862 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) 863 + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit) 864 * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg); 865 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) { 866 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 867 } else if (cpi->cpi_family == 0x10) { 868 /* 869 * See if we are a multi-node processor. 870 * All processors in the system have the same number of nodes 871 */ 872 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8); 873 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) { 874 /* Single-node */ 875 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, 876 coreidsz); 877 } else { 878 879 /* 880 * Multi-node revision D (2 nodes per package 881 * are supported) 882 */ 883 cpi->cpi_procnodes_per_pkg = 2; 884 885 first_half = (cpi->cpi_pkgcoreid <= 886 (cpi->cpi_ncore_per_chip/2 - 1)); 887 888 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { 889 /* We are BSP */ 890 cpi->cpi_procnodeid = (first_half ? 0 : 1); 891 } else { 892 893 /* We are AP */ 894 /* NodeId[2:1] bits to use for reading F3xe8 */ 895 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1; 896 897 nb_caps_reg = 898 pci_getl_func(0, 24 + node2_1, 3, 0xe8); 899 900 /* 901 * Check IntNodeNum bit (31:30, but bit 31 is 902 * always 0 on dual-node processors) 903 */ 904 if (BITX(nb_caps_reg, 30, 30) == 0) 905 cpi->cpi_procnodeid = node2_1 + 906 !first_half; 907 else 908 cpi->cpi_procnodeid = node2_1 + 909 first_half; 910 } 911 } 912 } else { 913 cpi->cpi_procnodeid = 0; 914 } 915 916 cpi->cpi_chipid = 917 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg; 918 } 919 920 /* 921 * Setup XFeature_Enabled_Mask register. Required by xsave feature. 922 */ 923 void 924 setup_xfem(void) 925 { 926 uint64_t flags = XFEATURE_LEGACY_FP; 927 928 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); 929 930 if (is_x86_feature(x86_featureset, X86FSET_SSE)) 931 flags |= XFEATURE_SSE; 932 933 if (is_x86_feature(x86_featureset, X86FSET_AVX)) 934 flags |= XFEATURE_AVX; 935 936 set_xcr(XFEATURE_ENABLED_MASK, flags); 937 938 xsave_bv_all = flags; 939 } 940 941 void 942 cpuid_pass1(cpu_t *cpu, uchar_t *featureset) 943 { 944 uint32_t mask_ecx, mask_edx; 945 struct cpuid_info *cpi; 946 struct cpuid_regs *cp; 947 int xcpuid; 948 #if !defined(__xpv) 949 extern int idle_cpu_prefer_mwait; 950 #endif 951 952 /* 953 * Space statically allocated for BSP, ensure pointer is set 954 */ 955 if (cpu->cpu_id == 0) { 956 if (cpu->cpu_m.mcpu_cpi == NULL) 957 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 958 } 959 960 add_x86_feature(featureset, X86FSET_CPUID); 961 962 cpi = cpu->cpu_m.mcpu_cpi; 963 ASSERT(cpi != NULL); 964 cp = &cpi->cpi_std[0]; 965 cp->cp_eax = 0; 966 cpi->cpi_maxeax = __cpuid_insn(cp); 967 { 968 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 969 *iptr++ = cp->cp_ebx; 970 *iptr++ = cp->cp_edx; 971 *iptr++ = cp->cp_ecx; 972 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 973 } 974 975 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 976 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 977 978 /* 979 * Limit the range in case of weird hardware 980 */ 981 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 982 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 983 if (cpi->cpi_maxeax < 1) 984 goto pass1_done; 985 986 cp = &cpi->cpi_std[1]; 987 cp->cp_eax = 1; 988 (void) __cpuid_insn(cp); 989 990 /* 991 * Extract identifying constants for easy access. 992 */ 993 cpi->cpi_model = CPI_MODEL(cpi); 994 cpi->cpi_family = CPI_FAMILY(cpi); 995 996 if (cpi->cpi_family == 0xf) 997 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 998 999 /* 1000 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 1001 * Intel, and presumably everyone else, uses model == 0xf, as 1002 * one would expect (max value means possible overflow). Sigh. 1003 */ 1004 1005 switch (cpi->cpi_vendor) { 1006 case X86_VENDOR_Intel: 1007 if (IS_EXTENDED_MODEL_INTEL(cpi)) 1008 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 1009 break; 1010 case X86_VENDOR_AMD: 1011 if (CPI_FAMILY(cpi) == 0xf) 1012 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 1013 break; 1014 default: 1015 if (cpi->cpi_model == 0xf) 1016 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 1017 break; 1018 } 1019 1020 cpi->cpi_step = CPI_STEP(cpi); 1021 cpi->cpi_brandid = CPI_BRANDID(cpi); 1022 1023 /* 1024 * *default* assumptions: 1025 * - believe %edx feature word 1026 * - ignore %ecx feature word 1027 * - 32-bit virtual and physical addressing 1028 */ 1029 mask_edx = 0xffffffff; 1030 mask_ecx = 0; 1031 1032 cpi->cpi_pabits = cpi->cpi_vabits = 32; 1033 1034 switch (cpi->cpi_vendor) { 1035 case X86_VENDOR_Intel: 1036 if (cpi->cpi_family == 5) 1037 x86_type = X86_TYPE_P5; 1038 else if (IS_LEGACY_P6(cpi)) { 1039 x86_type = X86_TYPE_P6; 1040 pentiumpro_bug4046376 = 1; 1041 /* 1042 * Clear the SEP bit when it was set erroneously 1043 */ 1044 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 1045 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 1046 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 1047 x86_type = X86_TYPE_P4; 1048 /* 1049 * We don't currently depend on any of the %ecx 1050 * features until Prescott, so we'll only check 1051 * this from P4 onwards. We might want to revisit 1052 * that idea later. 1053 */ 1054 mask_ecx = 0xffffffff; 1055 } else if (cpi->cpi_family > 0xf) 1056 mask_ecx = 0xffffffff; 1057 /* 1058 * We don't support MONITOR/MWAIT if leaf 5 is not available 1059 * to obtain the monitor linesize. 1060 */ 1061 if (cpi->cpi_maxeax < 5) 1062 mask_ecx &= ~CPUID_INTC_ECX_MON; 1063 break; 1064 case X86_VENDOR_IntelClone: 1065 default: 1066 break; 1067 case X86_VENDOR_AMD: 1068 #if defined(OPTERON_ERRATUM_108) 1069 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 1070 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 1071 cpi->cpi_model = 0xc; 1072 } else 1073 #endif 1074 if (cpi->cpi_family == 5) { 1075 /* 1076 * AMD K5 and K6 1077 * 1078 * These CPUs have an incomplete implementation 1079 * of MCA/MCE which we mask away. 1080 */ 1081 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 1082 1083 /* 1084 * Model 0 uses the wrong (APIC) bit 1085 * to indicate PGE. Fix it here. 1086 */ 1087 if (cpi->cpi_model == 0) { 1088 if (cp->cp_edx & 0x200) { 1089 cp->cp_edx &= ~0x200; 1090 cp->cp_edx |= CPUID_INTC_EDX_PGE; 1091 } 1092 } 1093 1094 /* 1095 * Early models had problems w/ MMX; disable. 1096 */ 1097 if (cpi->cpi_model < 6) 1098 mask_edx &= ~CPUID_INTC_EDX_MMX; 1099 } 1100 1101 /* 1102 * For newer families, SSE3 and CX16, at least, are valid; 1103 * enable all 1104 */ 1105 if (cpi->cpi_family >= 0xf) 1106 mask_ecx = 0xffffffff; 1107 /* 1108 * We don't support MONITOR/MWAIT if leaf 5 is not available 1109 * to obtain the monitor linesize. 1110 */ 1111 if (cpi->cpi_maxeax < 5) 1112 mask_ecx &= ~CPUID_INTC_ECX_MON; 1113 1114 #if !defined(__xpv) 1115 /* 1116 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 1117 * processors. AMD does not intend MWAIT to be used in the cpu 1118 * idle loop on current and future processors. 10h and future 1119 * AMD processors use more power in MWAIT than HLT. 1120 * Pre-family-10h Opterons do not have the MWAIT instruction. 1121 */ 1122 idle_cpu_prefer_mwait = 0; 1123 #endif 1124 1125 break; 1126 case X86_VENDOR_TM: 1127 /* 1128 * workaround the NT workaround in CMS 4.1 1129 */ 1130 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 1131 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 1132 cp->cp_edx |= CPUID_INTC_EDX_CX8; 1133 break; 1134 case X86_VENDOR_Centaur: 1135 /* 1136 * workaround the NT workarounds again 1137 */ 1138 if (cpi->cpi_family == 6) 1139 cp->cp_edx |= CPUID_INTC_EDX_CX8; 1140 break; 1141 case X86_VENDOR_Cyrix: 1142 /* 1143 * We rely heavily on the probing in locore 1144 * to actually figure out what parts, if any, 1145 * of the Cyrix cpuid instruction to believe. 1146 */ 1147 switch (x86_type) { 1148 case X86_TYPE_CYRIX_486: 1149 mask_edx = 0; 1150 break; 1151 case X86_TYPE_CYRIX_6x86: 1152 mask_edx = 0; 1153 break; 1154 case X86_TYPE_CYRIX_6x86L: 1155 mask_edx = 1156 CPUID_INTC_EDX_DE | 1157 CPUID_INTC_EDX_CX8; 1158 break; 1159 case X86_TYPE_CYRIX_6x86MX: 1160 mask_edx = 1161 CPUID_INTC_EDX_DE | 1162 CPUID_INTC_EDX_MSR | 1163 CPUID_INTC_EDX_CX8 | 1164 CPUID_INTC_EDX_PGE | 1165 CPUID_INTC_EDX_CMOV | 1166 CPUID_INTC_EDX_MMX; 1167 break; 1168 case X86_TYPE_CYRIX_GXm: 1169 mask_edx = 1170 CPUID_INTC_EDX_MSR | 1171 CPUID_INTC_EDX_CX8 | 1172 CPUID_INTC_EDX_CMOV | 1173 CPUID_INTC_EDX_MMX; 1174 break; 1175 case X86_TYPE_CYRIX_MediaGX: 1176 break; 1177 case X86_TYPE_CYRIX_MII: 1178 case X86_TYPE_VIA_CYRIX_III: 1179 mask_edx = 1180 CPUID_INTC_EDX_DE | 1181 CPUID_INTC_EDX_TSC | 1182 CPUID_INTC_EDX_MSR | 1183 CPUID_INTC_EDX_CX8 | 1184 CPUID_INTC_EDX_PGE | 1185 CPUID_INTC_EDX_CMOV | 1186 CPUID_INTC_EDX_MMX; 1187 break; 1188 default: 1189 break; 1190 } 1191 break; 1192 } 1193 1194 #if defined(__xpv) 1195 /* 1196 * Do not support MONITOR/MWAIT under a hypervisor 1197 */ 1198 mask_ecx &= ~CPUID_INTC_ECX_MON; 1199 /* 1200 * Do not support XSAVE under a hypervisor for now 1201 */ 1202 xsave_force_disable = B_TRUE; 1203 1204 #endif /* __xpv */ 1205 1206 if (xsave_force_disable) { 1207 mask_ecx &= ~CPUID_INTC_ECX_XSAVE; 1208 mask_ecx &= ~CPUID_INTC_ECX_AVX; 1209 mask_ecx &= ~CPUID_INTC_ECX_F16C; 1210 mask_ecx &= ~CPUID_INTC_ECX_FMA; 1211 } 1212 1213 /* 1214 * Now we've figured out the masks that determine 1215 * which bits we choose to believe, apply the masks 1216 * to the feature words, then map the kernel's view 1217 * of these feature words into its feature word. 1218 */ 1219 cp->cp_edx &= mask_edx; 1220 cp->cp_ecx &= mask_ecx; 1221 1222 /* 1223 * apply any platform restrictions (we don't call this 1224 * immediately after __cpuid_insn here, because we need the 1225 * workarounds applied above first) 1226 */ 1227 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 1228 1229 /* 1230 * In addition to ecx and edx, Intel is storing a bunch of instruction 1231 * set extensions in leaf 7's ebx. 1232 */ 1233 if (cpi->cpi_vendor == X86_VENDOR_Intel && cpi->cpi_maxeax >= 7) { 1234 struct cpuid_regs *ecp; 1235 ecp = &cpi->cpi_std[7]; 1236 ecp->cp_eax = 7; 1237 ecp->cp_ecx = 0; 1238 (void) __cpuid_insn(ecp); 1239 /* 1240 * If XSAVE has been disabled, just ignore all of the AVX 1241 * dependent flags here. 1242 */ 1243 if (xsave_force_disable) { 1244 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1; 1245 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2; 1246 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2; 1247 } 1248 } 1249 1250 /* 1251 * fold in overrides from the "eeprom" mechanism 1252 */ 1253 cp->cp_edx |= cpuid_feature_edx_include; 1254 cp->cp_edx &= ~cpuid_feature_edx_exclude; 1255 1256 cp->cp_ecx |= cpuid_feature_ecx_include; 1257 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 1258 1259 if (cp->cp_edx & CPUID_INTC_EDX_PSE) { 1260 add_x86_feature(featureset, X86FSET_LARGEPAGE); 1261 } 1262 if (cp->cp_edx & CPUID_INTC_EDX_TSC) { 1263 add_x86_feature(featureset, X86FSET_TSC); 1264 } 1265 if (cp->cp_edx & CPUID_INTC_EDX_MSR) { 1266 add_x86_feature(featureset, X86FSET_MSR); 1267 } 1268 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) { 1269 add_x86_feature(featureset, X86FSET_MTRR); 1270 } 1271 if (cp->cp_edx & CPUID_INTC_EDX_PGE) { 1272 add_x86_feature(featureset, X86FSET_PGE); 1273 } 1274 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) { 1275 add_x86_feature(featureset, X86FSET_CMOV); 1276 } 1277 if (cp->cp_edx & CPUID_INTC_EDX_MMX) { 1278 add_x86_feature(featureset, X86FSET_MMX); 1279 } 1280 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 1281 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) { 1282 add_x86_feature(featureset, X86FSET_MCA); 1283 } 1284 if (cp->cp_edx & CPUID_INTC_EDX_PAE) { 1285 add_x86_feature(featureset, X86FSET_PAE); 1286 } 1287 if (cp->cp_edx & CPUID_INTC_EDX_CX8) { 1288 add_x86_feature(featureset, X86FSET_CX8); 1289 } 1290 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) { 1291 add_x86_feature(featureset, X86FSET_CX16); 1292 } 1293 if (cp->cp_edx & CPUID_INTC_EDX_PAT) { 1294 add_x86_feature(featureset, X86FSET_PAT); 1295 } 1296 if (cp->cp_edx & CPUID_INTC_EDX_SEP) { 1297 add_x86_feature(featureset, X86FSET_SEP); 1298 } 1299 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 1300 /* 1301 * In our implementation, fxsave/fxrstor 1302 * are prerequisites before we'll even 1303 * try and do SSE things. 1304 */ 1305 if (cp->cp_edx & CPUID_INTC_EDX_SSE) { 1306 add_x86_feature(featureset, X86FSET_SSE); 1307 } 1308 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) { 1309 add_x86_feature(featureset, X86FSET_SSE2); 1310 } 1311 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) { 1312 add_x86_feature(featureset, X86FSET_SSE3); 1313 } 1314 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) { 1315 add_x86_feature(featureset, X86FSET_SSSE3); 1316 } 1317 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) { 1318 add_x86_feature(featureset, X86FSET_SSE4_1); 1319 } 1320 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) { 1321 add_x86_feature(featureset, X86FSET_SSE4_2); 1322 } 1323 if (cp->cp_ecx & CPUID_INTC_ECX_AES) { 1324 add_x86_feature(featureset, X86FSET_AES); 1325 } 1326 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) { 1327 add_x86_feature(featureset, X86FSET_PCLMULQDQ); 1328 } 1329 1330 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) { 1331 add_x86_feature(featureset, X86FSET_XSAVE); 1332 1333 /* We only test AVX when there is XSAVE */ 1334 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) { 1335 add_x86_feature(featureset, 1336 X86FSET_AVX); 1337 1338 /* 1339 * Intel says we can't check these without also 1340 * checking AVX. 1341 */ 1342 if (cp->cp_ecx & CPUID_INTC_ECX_F16C) 1343 add_x86_feature(featureset, 1344 X86FSET_F16C); 1345 1346 if (cp->cp_ecx & CPUID_INTC_ECX_FMA) 1347 add_x86_feature(featureset, 1348 X86FSET_FMA); 1349 1350 if (cpi->cpi_std[7].cp_ebx & 1351 CPUID_INTC_EBX_7_0_BMI1) 1352 add_x86_feature(featureset, 1353 X86FSET_BMI1); 1354 1355 if (cpi->cpi_std[7].cp_ebx & 1356 CPUID_INTC_EBX_7_0_BMI2) 1357 add_x86_feature(featureset, 1358 X86FSET_BMI2); 1359 1360 if (cpi->cpi_std[7].cp_ebx & 1361 CPUID_INTC_EBX_7_0_AVX2) 1362 add_x86_feature(featureset, 1363 X86FSET_AVX2); 1364 } 1365 } 1366 } 1367 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) { 1368 add_x86_feature(featureset, X86FSET_X2APIC); 1369 } 1370 if (cp->cp_edx & CPUID_INTC_EDX_DE) { 1371 add_x86_feature(featureset, X86FSET_DE); 1372 } 1373 #if !defined(__xpv) 1374 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 1375 1376 /* 1377 * We require the CLFLUSH instruction for erratum workaround 1378 * to use MONITOR/MWAIT. 1379 */ 1380 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1381 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 1382 add_x86_feature(featureset, X86FSET_MWAIT); 1383 } else { 1384 extern int idle_cpu_assert_cflush_monitor; 1385 1386 /* 1387 * All processors we are aware of which have 1388 * MONITOR/MWAIT also have CLFLUSH. 1389 */ 1390 if (idle_cpu_assert_cflush_monitor) { 1391 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 1392 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 1393 } 1394 } 1395 } 1396 #endif /* __xpv */ 1397 1398 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) { 1399 add_x86_feature(featureset, X86FSET_VMX); 1400 } 1401 1402 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND) 1403 add_x86_feature(featureset, X86FSET_RDRAND); 1404 1405 /* 1406 * Only need it first time, rest of the cpus would follow suit. 1407 * we only capture this for the bootcpu. 1408 */ 1409 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1410 add_x86_feature(featureset, X86FSET_CLFSH); 1411 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 1412 } 1413 if (is_x86_feature(featureset, X86FSET_PAE)) 1414 cpi->cpi_pabits = 36; 1415 1416 /* 1417 * Hyperthreading configuration is slightly tricky on Intel 1418 * and pure clones, and even trickier on AMD. 1419 * 1420 * (AMD chose to set the HTT bit on their CMP processors, 1421 * even though they're not actually hyperthreaded. Thus it 1422 * takes a bit more work to figure out what's really going 1423 * on ... see the handling of the CMP_LGCY bit below) 1424 */ 1425 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 1426 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 1427 if (cpi->cpi_ncpu_per_chip > 1) 1428 add_x86_feature(featureset, X86FSET_HTT); 1429 } else { 1430 cpi->cpi_ncpu_per_chip = 1; 1431 } 1432 1433 /* 1434 * Work on the "extended" feature information, doing 1435 * some basic initialization for cpuid_pass2() 1436 */ 1437 xcpuid = 0; 1438 switch (cpi->cpi_vendor) { 1439 case X86_VENDOR_Intel: 1440 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 1441 xcpuid++; 1442 break; 1443 case X86_VENDOR_AMD: 1444 if (cpi->cpi_family > 5 || 1445 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 1446 xcpuid++; 1447 break; 1448 case X86_VENDOR_Cyrix: 1449 /* 1450 * Only these Cyrix CPUs are -known- to support 1451 * extended cpuid operations. 1452 */ 1453 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1454 x86_type == X86_TYPE_CYRIX_GXm) 1455 xcpuid++; 1456 break; 1457 case X86_VENDOR_Centaur: 1458 case X86_VENDOR_TM: 1459 default: 1460 xcpuid++; 1461 break; 1462 } 1463 1464 if (xcpuid) { 1465 cp = &cpi->cpi_extd[0]; 1466 cp->cp_eax = 0x80000000; 1467 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1468 } 1469 1470 if (cpi->cpi_xmaxeax & 0x80000000) { 1471 1472 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1473 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1474 1475 switch (cpi->cpi_vendor) { 1476 case X86_VENDOR_Intel: 1477 case X86_VENDOR_AMD: 1478 if (cpi->cpi_xmaxeax < 0x80000001) 1479 break; 1480 cp = &cpi->cpi_extd[1]; 1481 cp->cp_eax = 0x80000001; 1482 (void) __cpuid_insn(cp); 1483 1484 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1485 cpi->cpi_family == 5 && 1486 cpi->cpi_model == 6 && 1487 cpi->cpi_step == 6) { 1488 /* 1489 * K6 model 6 uses bit 10 to indicate SYSC 1490 * Later models use bit 11. Fix it here. 1491 */ 1492 if (cp->cp_edx & 0x400) { 1493 cp->cp_edx &= ~0x400; 1494 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1495 } 1496 } 1497 1498 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1499 1500 /* 1501 * Compute the additions to the kernel's feature word. 1502 */ 1503 if (cp->cp_edx & CPUID_AMD_EDX_NX) { 1504 add_x86_feature(featureset, X86FSET_NX); 1505 } 1506 1507 /* 1508 * Regardless whether or not we boot 64-bit, 1509 * we should have a way to identify whether 1510 * the CPU is capable of running 64-bit. 1511 */ 1512 if (cp->cp_edx & CPUID_AMD_EDX_LM) { 1513 add_x86_feature(featureset, X86FSET_64); 1514 } 1515 1516 #if defined(__amd64) 1517 /* 1 GB large page - enable only for 64 bit kernel */ 1518 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) { 1519 add_x86_feature(featureset, X86FSET_1GPG); 1520 } 1521 #endif 1522 1523 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1524 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1525 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) { 1526 add_x86_feature(featureset, X86FSET_SSE4A); 1527 } 1528 1529 /* 1530 * If both the HTT and CMP_LGCY bits are set, 1531 * then we're not actually HyperThreaded. Read 1532 * "AMD CPUID Specification" for more details. 1533 */ 1534 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1535 is_x86_feature(featureset, X86FSET_HTT) && 1536 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1537 remove_x86_feature(featureset, X86FSET_HTT); 1538 add_x86_feature(featureset, X86FSET_CMP); 1539 } 1540 #if defined(__amd64) 1541 /* 1542 * It's really tricky to support syscall/sysret in 1543 * the i386 kernel; we rely on sysenter/sysexit 1544 * instead. In the amd64 kernel, things are -way- 1545 * better. 1546 */ 1547 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) { 1548 add_x86_feature(featureset, X86FSET_ASYSC); 1549 } 1550 1551 /* 1552 * While we're thinking about system calls, note 1553 * that AMD processors don't support sysenter 1554 * in long mode at all, so don't try to program them. 1555 */ 1556 if (x86_vendor == X86_VENDOR_AMD) { 1557 remove_x86_feature(featureset, X86FSET_SEP); 1558 } 1559 #endif 1560 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) { 1561 add_x86_feature(featureset, X86FSET_TSCP); 1562 } 1563 1564 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) { 1565 add_x86_feature(featureset, X86FSET_SVM); 1566 } 1567 1568 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) { 1569 add_x86_feature(featureset, X86FSET_TOPOEXT); 1570 } 1571 break; 1572 default: 1573 break; 1574 } 1575 1576 /* 1577 * Get CPUID data about processor cores and hyperthreads. 1578 */ 1579 switch (cpi->cpi_vendor) { 1580 case X86_VENDOR_Intel: 1581 if (cpi->cpi_maxeax >= 4) { 1582 cp = &cpi->cpi_std[4]; 1583 cp->cp_eax = 4; 1584 cp->cp_ecx = 0; 1585 (void) __cpuid_insn(cp); 1586 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1587 } 1588 /*FALLTHROUGH*/ 1589 case X86_VENDOR_AMD: 1590 if (cpi->cpi_xmaxeax < 0x80000008) 1591 break; 1592 cp = &cpi->cpi_extd[8]; 1593 cp->cp_eax = 0x80000008; 1594 (void) __cpuid_insn(cp); 1595 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1596 1597 /* 1598 * Virtual and physical address limits from 1599 * cpuid override previously guessed values. 1600 */ 1601 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1602 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1603 break; 1604 default: 1605 break; 1606 } 1607 1608 /* 1609 * Derive the number of cores per chip 1610 */ 1611 switch (cpi->cpi_vendor) { 1612 case X86_VENDOR_Intel: 1613 if (cpi->cpi_maxeax < 4) { 1614 cpi->cpi_ncore_per_chip = 1; 1615 break; 1616 } else { 1617 cpi->cpi_ncore_per_chip = 1618 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1619 } 1620 break; 1621 case X86_VENDOR_AMD: 1622 if (cpi->cpi_xmaxeax < 0x80000008) { 1623 cpi->cpi_ncore_per_chip = 1; 1624 break; 1625 } else { 1626 /* 1627 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1628 * 1 less than the number of physical cores on 1629 * the chip. In family 0x10 this value can 1630 * be affected by "downcoring" - it reflects 1631 * 1 less than the number of cores actually 1632 * enabled on this node. 1633 */ 1634 cpi->cpi_ncore_per_chip = 1635 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1636 } 1637 break; 1638 default: 1639 cpi->cpi_ncore_per_chip = 1; 1640 break; 1641 } 1642 1643 /* 1644 * Get CPUID data about TSC Invariance in Deep C-State. 1645 */ 1646 switch (cpi->cpi_vendor) { 1647 case X86_VENDOR_Intel: 1648 if (cpi->cpi_maxeax >= 7) { 1649 cp = &cpi->cpi_extd[7]; 1650 cp->cp_eax = 0x80000007; 1651 cp->cp_ecx = 0; 1652 (void) __cpuid_insn(cp); 1653 } 1654 break; 1655 default: 1656 break; 1657 } 1658 } else { 1659 cpi->cpi_ncore_per_chip = 1; 1660 } 1661 1662 /* 1663 * If more than one core, then this processor is CMP. 1664 */ 1665 if (cpi->cpi_ncore_per_chip > 1) { 1666 add_x86_feature(featureset, X86FSET_CMP); 1667 } 1668 1669 /* 1670 * If the number of cores is the same as the number 1671 * of CPUs, then we cannot have HyperThreading. 1672 */ 1673 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) { 1674 remove_x86_feature(featureset, X86FSET_HTT); 1675 } 1676 1677 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1678 cpi->cpi_procnodes_per_pkg = 1; 1679 cpi->cpi_cores_per_compunit = 1; 1680 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE && 1681 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) { 1682 /* 1683 * Single-core single-threaded processors. 1684 */ 1685 cpi->cpi_chipid = -1; 1686 cpi->cpi_clogid = 0; 1687 cpi->cpi_coreid = cpu->cpu_id; 1688 cpi->cpi_pkgcoreid = 0; 1689 if (cpi->cpi_vendor == X86_VENDOR_AMD) 1690 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); 1691 else 1692 cpi->cpi_procnodeid = cpi->cpi_chipid; 1693 } else if (cpi->cpi_ncpu_per_chip > 1) { 1694 if (cpi->cpi_vendor == X86_VENDOR_Intel) 1695 cpuid_intel_getids(cpu, featureset); 1696 else if (cpi->cpi_vendor == X86_VENDOR_AMD) 1697 cpuid_amd_getids(cpu); 1698 else { 1699 /* 1700 * All other processors are currently 1701 * assumed to have single cores. 1702 */ 1703 cpi->cpi_coreid = cpi->cpi_chipid; 1704 cpi->cpi_pkgcoreid = 0; 1705 cpi->cpi_procnodeid = cpi->cpi_chipid; 1706 cpi->cpi_compunitid = cpi->cpi_chipid; 1707 } 1708 } 1709 1710 /* 1711 * Synthesize chip "revision" and socket type 1712 */ 1713 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1714 cpi->cpi_model, cpi->cpi_step); 1715 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1716 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1717 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1718 cpi->cpi_model, cpi->cpi_step); 1719 1720 pass1_done: 1721 cpi->cpi_pass = 1; 1722 } 1723 1724 /* 1725 * Make copies of the cpuid table entries we depend on, in 1726 * part for ease of parsing now, in part so that we have only 1727 * one place to correct any of it, in part for ease of 1728 * later export to userland, and in part so we can look at 1729 * this stuff in a crash dump. 1730 */ 1731 1732 /*ARGSUSED*/ 1733 void 1734 cpuid_pass2(cpu_t *cpu) 1735 { 1736 uint_t n, nmax; 1737 int i; 1738 struct cpuid_regs *cp; 1739 uint8_t *dp; 1740 uint32_t *iptr; 1741 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1742 1743 ASSERT(cpi->cpi_pass == 1); 1744 1745 if (cpi->cpi_maxeax < 1) 1746 goto pass2_done; 1747 1748 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1749 nmax = NMAX_CPI_STD; 1750 /* 1751 * (We already handled n == 0 and n == 1 in pass 1) 1752 */ 1753 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1754 cp->cp_eax = n; 1755 1756 /* 1757 * CPUID function 4 expects %ecx to be initialized 1758 * with an index which indicates which cache to return 1759 * information about. The OS is expected to call function 4 1760 * with %ecx set to 0, 1, 2, ... until it returns with 1761 * EAX[4:0] set to 0, which indicates there are no more 1762 * caches. 1763 * 1764 * Here, populate cpi_std[4] with the information returned by 1765 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1766 * when dynamic memory allocation becomes available. 1767 * 1768 * Note: we need to explicitly initialize %ecx here, since 1769 * function 4 may have been previously invoked. 1770 */ 1771 if (n == 4) 1772 cp->cp_ecx = 0; 1773 1774 (void) __cpuid_insn(cp); 1775 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1776 switch (n) { 1777 case 2: 1778 /* 1779 * "the lower 8 bits of the %eax register 1780 * contain a value that identifies the number 1781 * of times the cpuid [instruction] has to be 1782 * executed to obtain a complete image of the 1783 * processor's caching systems." 1784 * 1785 * How *do* they make this stuff up? 1786 */ 1787 cpi->cpi_ncache = sizeof (*cp) * 1788 BITX(cp->cp_eax, 7, 0); 1789 if (cpi->cpi_ncache == 0) 1790 break; 1791 cpi->cpi_ncache--; /* skip count byte */ 1792 1793 /* 1794 * Well, for now, rather than attempt to implement 1795 * this slightly dubious algorithm, we just look 1796 * at the first 15 .. 1797 */ 1798 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1799 cpi->cpi_ncache = sizeof (*cp) - 1; 1800 1801 dp = cpi->cpi_cacheinfo; 1802 if (BITX(cp->cp_eax, 31, 31) == 0) { 1803 uint8_t *p = (void *)&cp->cp_eax; 1804 for (i = 1; i < 4; i++) 1805 if (p[i] != 0) 1806 *dp++ = p[i]; 1807 } 1808 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1809 uint8_t *p = (void *)&cp->cp_ebx; 1810 for (i = 0; i < 4; i++) 1811 if (p[i] != 0) 1812 *dp++ = p[i]; 1813 } 1814 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1815 uint8_t *p = (void *)&cp->cp_ecx; 1816 for (i = 0; i < 4; i++) 1817 if (p[i] != 0) 1818 *dp++ = p[i]; 1819 } 1820 if (BITX(cp->cp_edx, 31, 31) == 0) { 1821 uint8_t *p = (void *)&cp->cp_edx; 1822 for (i = 0; i < 4; i++) 1823 if (p[i] != 0) 1824 *dp++ = p[i]; 1825 } 1826 break; 1827 1828 case 3: /* Processor serial number, if PSN supported */ 1829 break; 1830 1831 case 4: /* Deterministic cache parameters */ 1832 break; 1833 1834 case 5: /* Monitor/Mwait parameters */ 1835 { 1836 size_t mwait_size; 1837 1838 /* 1839 * check cpi_mwait.support which was set in cpuid_pass1 1840 */ 1841 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1842 break; 1843 1844 /* 1845 * Protect ourself from insane mwait line size. 1846 * Workaround for incomplete hardware emulator(s). 1847 */ 1848 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1849 if (mwait_size < sizeof (uint32_t) || 1850 !ISP2(mwait_size)) { 1851 #if DEBUG 1852 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1853 "size %ld", cpu->cpu_id, (long)mwait_size); 1854 #endif 1855 break; 1856 } 1857 1858 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1859 cpi->cpi_mwait.mon_max = mwait_size; 1860 if (MWAIT_EXTENSION(cpi)) { 1861 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1862 if (MWAIT_INT_ENABLE(cpi)) 1863 cpi->cpi_mwait.support |= 1864 MWAIT_ECX_INT_ENABLE; 1865 } 1866 break; 1867 } 1868 default: 1869 break; 1870 } 1871 } 1872 1873 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1874 struct cpuid_regs regs; 1875 1876 cp = ®s; 1877 cp->cp_eax = 0xB; 1878 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1879 1880 (void) __cpuid_insn(cp); 1881 1882 /* 1883 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1884 * indicates that the extended topology enumeration leaf is 1885 * available. 1886 */ 1887 if (cp->cp_ebx) { 1888 uint32_t x2apic_id; 1889 uint_t coreid_shift = 0; 1890 uint_t ncpu_per_core = 1; 1891 uint_t chipid_shift = 0; 1892 uint_t ncpu_per_chip = 1; 1893 uint_t i; 1894 uint_t level; 1895 1896 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1897 cp->cp_eax = 0xB; 1898 cp->cp_ecx = i; 1899 1900 (void) __cpuid_insn(cp); 1901 level = CPI_CPU_LEVEL_TYPE(cp); 1902 1903 if (level == 1) { 1904 x2apic_id = cp->cp_edx; 1905 coreid_shift = BITX(cp->cp_eax, 4, 0); 1906 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1907 } else if (level == 2) { 1908 x2apic_id = cp->cp_edx; 1909 chipid_shift = BITX(cp->cp_eax, 4, 0); 1910 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1911 } 1912 } 1913 1914 cpi->cpi_apicid = x2apic_id; 1915 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1916 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1917 ncpu_per_core; 1918 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1919 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1920 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1921 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1922 } 1923 1924 /* Make cp NULL so that we don't stumble on others */ 1925 cp = NULL; 1926 } 1927 1928 /* 1929 * XSAVE enumeration 1930 */ 1931 if (cpi->cpi_maxeax >= 0xD) { 1932 struct cpuid_regs regs; 1933 boolean_t cpuid_d_valid = B_TRUE; 1934 1935 cp = ®s; 1936 cp->cp_eax = 0xD; 1937 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1938 1939 (void) __cpuid_insn(cp); 1940 1941 /* 1942 * Sanity checks for debug 1943 */ 1944 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 || 1945 (cp->cp_eax & XFEATURE_SSE) == 0) { 1946 cpuid_d_valid = B_FALSE; 1947 } 1948 1949 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax; 1950 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx; 1951 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx; 1952 1953 /* 1954 * If the hw supports AVX, get the size and offset in the save 1955 * area for the ymm state. 1956 */ 1957 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) { 1958 cp->cp_eax = 0xD; 1959 cp->cp_ecx = 2; 1960 cp->cp_edx = cp->cp_ebx = 0; 1961 1962 (void) __cpuid_insn(cp); 1963 1964 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET || 1965 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) { 1966 cpuid_d_valid = B_FALSE; 1967 } 1968 1969 cpi->cpi_xsave.ymm_size = cp->cp_eax; 1970 cpi->cpi_xsave.ymm_offset = cp->cp_ebx; 1971 } 1972 1973 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { 1974 xsave_state_size = 0; 1975 } else if (cpuid_d_valid) { 1976 xsave_state_size = cpi->cpi_xsave.xsav_max_size; 1977 } else { 1978 /* Broken CPUID 0xD, probably in HVM */ 1979 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid " 1980 "value: hw_low = %d, hw_high = %d, xsave_size = %d" 1981 ", ymm_size = %d, ymm_offset = %d\n", 1982 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low, 1983 cpi->cpi_xsave.xsav_hw_features_high, 1984 (int)cpi->cpi_xsave.xsav_max_size, 1985 (int)cpi->cpi_xsave.ymm_size, 1986 (int)cpi->cpi_xsave.ymm_offset); 1987 1988 if (xsave_state_size != 0) { 1989 /* 1990 * This must be a non-boot CPU. We cannot 1991 * continue, because boot cpu has already 1992 * enabled XSAVE. 1993 */ 1994 ASSERT(cpu->cpu_id != 0); 1995 cmn_err(CE_PANIC, "cpu%d: we have already " 1996 "enabled XSAVE on boot cpu, cannot " 1997 "continue.", cpu->cpu_id); 1998 } else { 1999 /* 2000 * If we reached here on the boot CPU, it's also 2001 * almost certain that we'll reach here on the 2002 * non-boot CPUs. When we're here on a boot CPU 2003 * we should disable the feature, on a non-boot 2004 * CPU we need to confirm that we have. 2005 */ 2006 if (cpu->cpu_id == 0) { 2007 remove_x86_feature(x86_featureset, 2008 X86FSET_XSAVE); 2009 remove_x86_feature(x86_featureset, 2010 X86FSET_AVX); 2011 remove_x86_feature(x86_featureset, 2012 X86FSET_F16C); 2013 remove_x86_feature(x86_featureset, 2014 X86FSET_BMI1); 2015 remove_x86_feature(x86_featureset, 2016 X86FSET_BMI2); 2017 remove_x86_feature(x86_featureset, 2018 X86FSET_FMA); 2019 remove_x86_feature(x86_featureset, 2020 X86FSET_AVX2); 2021 CPI_FEATURES_ECX(cpi) &= 2022 ~CPUID_INTC_ECX_XSAVE; 2023 CPI_FEATURES_ECX(cpi) &= 2024 ~CPUID_INTC_ECX_AVX; 2025 CPI_FEATURES_ECX(cpi) &= 2026 ~CPUID_INTC_ECX_F16C; 2027 CPI_FEATURES_ECX(cpi) &= 2028 ~CPUID_INTC_ECX_FMA; 2029 CPI_FEATURES_7_0_EBX(cpi) &= 2030 ~CPUID_INTC_EBX_7_0_BMI1; 2031 CPI_FEATURES_7_0_EBX(cpi) &= 2032 ~CPUID_INTC_EBX_7_0_BMI2; 2033 CPI_FEATURES_7_0_EBX(cpi) &= 2034 ~CPUID_INTC_EBX_7_0_AVX2; 2035 xsave_force_disable = B_TRUE; 2036 } else { 2037 VERIFY(is_x86_feature(x86_featureset, 2038 X86FSET_XSAVE) == B_FALSE); 2039 } 2040 } 2041 } 2042 } 2043 2044 2045 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 2046 goto pass2_done; 2047 2048 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 2049 nmax = NMAX_CPI_EXTD; 2050 /* 2051 * Copy the extended properties, fixing them as we go. 2052 * (We already handled n == 0 and n == 1 in pass 1) 2053 */ 2054 iptr = (void *)cpi->cpi_brandstr; 2055 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 2056 cp->cp_eax = 0x80000000 + n; 2057 (void) __cpuid_insn(cp); 2058 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 2059 switch (n) { 2060 case 2: 2061 case 3: 2062 case 4: 2063 /* 2064 * Extract the brand string 2065 */ 2066 *iptr++ = cp->cp_eax; 2067 *iptr++ = cp->cp_ebx; 2068 *iptr++ = cp->cp_ecx; 2069 *iptr++ = cp->cp_edx; 2070 break; 2071 case 5: 2072 switch (cpi->cpi_vendor) { 2073 case X86_VENDOR_AMD: 2074 /* 2075 * The Athlon and Duron were the first 2076 * parts to report the sizes of the 2077 * TLB for large pages. Before then, 2078 * we don't trust the data. 2079 */ 2080 if (cpi->cpi_family < 6 || 2081 (cpi->cpi_family == 6 && 2082 cpi->cpi_model < 1)) 2083 cp->cp_eax = 0; 2084 break; 2085 default: 2086 break; 2087 } 2088 break; 2089 case 6: 2090 switch (cpi->cpi_vendor) { 2091 case X86_VENDOR_AMD: 2092 /* 2093 * The Athlon and Duron were the first 2094 * AMD parts with L2 TLB's. 2095 * Before then, don't trust the data. 2096 */ 2097 if (cpi->cpi_family < 6 || 2098 cpi->cpi_family == 6 && 2099 cpi->cpi_model < 1) 2100 cp->cp_eax = cp->cp_ebx = 0; 2101 /* 2102 * AMD Duron rev A0 reports L2 2103 * cache size incorrectly as 1K 2104 * when it is really 64K 2105 */ 2106 if (cpi->cpi_family == 6 && 2107 cpi->cpi_model == 3 && 2108 cpi->cpi_step == 0) { 2109 cp->cp_ecx &= 0xffff; 2110 cp->cp_ecx |= 0x400000; 2111 } 2112 break; 2113 case X86_VENDOR_Cyrix: /* VIA C3 */ 2114 /* 2115 * VIA C3 processors are a bit messed 2116 * up w.r.t. encoding cache sizes in %ecx 2117 */ 2118 if (cpi->cpi_family != 6) 2119 break; 2120 /* 2121 * model 7 and 8 were incorrectly encoded 2122 * 2123 * xxx is model 8 really broken? 2124 */ 2125 if (cpi->cpi_model == 7 || 2126 cpi->cpi_model == 8) 2127 cp->cp_ecx = 2128 BITX(cp->cp_ecx, 31, 24) << 16 | 2129 BITX(cp->cp_ecx, 23, 16) << 12 | 2130 BITX(cp->cp_ecx, 15, 8) << 8 | 2131 BITX(cp->cp_ecx, 7, 0); 2132 /* 2133 * model 9 stepping 1 has wrong associativity 2134 */ 2135 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 2136 cp->cp_ecx |= 8 << 12; 2137 break; 2138 case X86_VENDOR_Intel: 2139 /* 2140 * Extended L2 Cache features function. 2141 * First appeared on Prescott. 2142 */ 2143 default: 2144 break; 2145 } 2146 break; 2147 default: 2148 break; 2149 } 2150 } 2151 2152 pass2_done: 2153 cpi->cpi_pass = 2; 2154 } 2155 2156 static const char * 2157 intel_cpubrand(const struct cpuid_info *cpi) 2158 { 2159 int i; 2160 2161 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 2162 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 2163 return ("i486"); 2164 2165 switch (cpi->cpi_family) { 2166 case 5: 2167 return ("Intel Pentium(r)"); 2168 case 6: 2169 switch (cpi->cpi_model) { 2170 uint_t celeron, xeon; 2171 const struct cpuid_regs *cp; 2172 case 0: 2173 case 1: 2174 case 2: 2175 return ("Intel Pentium(r) Pro"); 2176 case 3: 2177 case 4: 2178 return ("Intel Pentium(r) II"); 2179 case 6: 2180 return ("Intel Celeron(r)"); 2181 case 5: 2182 case 7: 2183 celeron = xeon = 0; 2184 cp = &cpi->cpi_std[2]; /* cache info */ 2185 2186 for (i = 1; i < 4; i++) { 2187 uint_t tmp; 2188 2189 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 2190 if (tmp == 0x40) 2191 celeron++; 2192 if (tmp >= 0x44 && tmp <= 0x45) 2193 xeon++; 2194 } 2195 2196 for (i = 0; i < 2; i++) { 2197 uint_t tmp; 2198 2199 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 2200 if (tmp == 0x40) 2201 celeron++; 2202 else if (tmp >= 0x44 && tmp <= 0x45) 2203 xeon++; 2204 } 2205 2206 for (i = 0; i < 4; i++) { 2207 uint_t tmp; 2208 2209 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 2210 if (tmp == 0x40) 2211 celeron++; 2212 else if (tmp >= 0x44 && tmp <= 0x45) 2213 xeon++; 2214 } 2215 2216 for (i = 0; i < 4; i++) { 2217 uint_t tmp; 2218 2219 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 2220 if (tmp == 0x40) 2221 celeron++; 2222 else if (tmp >= 0x44 && tmp <= 0x45) 2223 xeon++; 2224 } 2225 2226 if (celeron) 2227 return ("Intel Celeron(r)"); 2228 if (xeon) 2229 return (cpi->cpi_model == 5 ? 2230 "Intel Pentium(r) II Xeon(tm)" : 2231 "Intel Pentium(r) III Xeon(tm)"); 2232 return (cpi->cpi_model == 5 ? 2233 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 2234 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 2235 default: 2236 break; 2237 } 2238 default: 2239 break; 2240 } 2241 2242 /* BrandID is present if the field is nonzero */ 2243 if (cpi->cpi_brandid != 0) { 2244 static const struct { 2245 uint_t bt_bid; 2246 const char *bt_str; 2247 } brand_tbl[] = { 2248 { 0x1, "Intel(r) Celeron(r)" }, 2249 { 0x2, "Intel(r) Pentium(r) III" }, 2250 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 2251 { 0x4, "Intel(r) Pentium(r) III" }, 2252 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 2253 { 0x7, "Mobile Intel(r) Celeron(r)" }, 2254 { 0x8, "Intel(r) Pentium(r) 4" }, 2255 { 0x9, "Intel(r) Pentium(r) 4" }, 2256 { 0xa, "Intel(r) Celeron(r)" }, 2257 { 0xb, "Intel(r) Xeon(tm)" }, 2258 { 0xc, "Intel(r) Xeon(tm) MP" }, 2259 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 2260 { 0xf, "Mobile Intel(r) Celeron(r)" }, 2261 { 0x11, "Mobile Genuine Intel(r)" }, 2262 { 0x12, "Intel(r) Celeron(r) M" }, 2263 { 0x13, "Mobile Intel(r) Celeron(r)" }, 2264 { 0x14, "Intel(r) Celeron(r)" }, 2265 { 0x15, "Mobile Genuine Intel(r)" }, 2266 { 0x16, "Intel(r) Pentium(r) M" }, 2267 { 0x17, "Mobile Intel(r) Celeron(r)" } 2268 }; 2269 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 2270 uint_t sgn; 2271 2272 sgn = (cpi->cpi_family << 8) | 2273 (cpi->cpi_model << 4) | cpi->cpi_step; 2274 2275 for (i = 0; i < btblmax; i++) 2276 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 2277 break; 2278 if (i < btblmax) { 2279 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 2280 return ("Intel(r) Celeron(r)"); 2281 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 2282 return ("Intel(r) Xeon(tm) MP"); 2283 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 2284 return ("Intel(r) Xeon(tm)"); 2285 return (brand_tbl[i].bt_str); 2286 } 2287 } 2288 2289 return (NULL); 2290 } 2291 2292 static const char * 2293 amd_cpubrand(const struct cpuid_info *cpi) 2294 { 2295 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 2296 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 2297 return ("i486 compatible"); 2298 2299 switch (cpi->cpi_family) { 2300 case 5: 2301 switch (cpi->cpi_model) { 2302 case 0: 2303 case 1: 2304 case 2: 2305 case 3: 2306 case 4: 2307 case 5: 2308 return ("AMD-K5(r)"); 2309 case 6: 2310 case 7: 2311 return ("AMD-K6(r)"); 2312 case 8: 2313 return ("AMD-K6(r)-2"); 2314 case 9: 2315 return ("AMD-K6(r)-III"); 2316 default: 2317 return ("AMD (family 5)"); 2318 } 2319 case 6: 2320 switch (cpi->cpi_model) { 2321 case 1: 2322 return ("AMD-K7(tm)"); 2323 case 0: 2324 case 2: 2325 case 4: 2326 return ("AMD Athlon(tm)"); 2327 case 3: 2328 case 7: 2329 return ("AMD Duron(tm)"); 2330 case 6: 2331 case 8: 2332 case 10: 2333 /* 2334 * Use the L2 cache size to distinguish 2335 */ 2336 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 2337 "AMD Athlon(tm)" : "AMD Duron(tm)"); 2338 default: 2339 return ("AMD (family 6)"); 2340 } 2341 default: 2342 break; 2343 } 2344 2345 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 2346 cpi->cpi_brandid != 0) { 2347 switch (BITX(cpi->cpi_brandid, 7, 5)) { 2348 case 3: 2349 return ("AMD Opteron(tm) UP 1xx"); 2350 case 4: 2351 return ("AMD Opteron(tm) DP 2xx"); 2352 case 5: 2353 return ("AMD Opteron(tm) MP 8xx"); 2354 default: 2355 return ("AMD Opteron(tm)"); 2356 } 2357 } 2358 2359 return (NULL); 2360 } 2361 2362 static const char * 2363 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 2364 { 2365 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 2366 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 2367 type == X86_TYPE_CYRIX_486) 2368 return ("i486 compatible"); 2369 2370 switch (type) { 2371 case X86_TYPE_CYRIX_6x86: 2372 return ("Cyrix 6x86"); 2373 case X86_TYPE_CYRIX_6x86L: 2374 return ("Cyrix 6x86L"); 2375 case X86_TYPE_CYRIX_6x86MX: 2376 return ("Cyrix 6x86MX"); 2377 case X86_TYPE_CYRIX_GXm: 2378 return ("Cyrix GXm"); 2379 case X86_TYPE_CYRIX_MediaGX: 2380 return ("Cyrix MediaGX"); 2381 case X86_TYPE_CYRIX_MII: 2382 return ("Cyrix M2"); 2383 case X86_TYPE_VIA_CYRIX_III: 2384 return ("VIA Cyrix M3"); 2385 default: 2386 /* 2387 * Have another wild guess .. 2388 */ 2389 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 2390 return ("Cyrix 5x86"); 2391 else if (cpi->cpi_family == 5) { 2392 switch (cpi->cpi_model) { 2393 case 2: 2394 return ("Cyrix 6x86"); /* Cyrix M1 */ 2395 case 4: 2396 return ("Cyrix MediaGX"); 2397 default: 2398 break; 2399 } 2400 } else if (cpi->cpi_family == 6) { 2401 switch (cpi->cpi_model) { 2402 case 0: 2403 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 2404 case 5: 2405 case 6: 2406 case 7: 2407 case 8: 2408 case 9: 2409 return ("VIA C3"); 2410 default: 2411 break; 2412 } 2413 } 2414 break; 2415 } 2416 return (NULL); 2417 } 2418 2419 /* 2420 * This only gets called in the case that the CPU extended 2421 * feature brand string (0x80000002, 0x80000003, 0x80000004) 2422 * aren't available, or contain null bytes for some reason. 2423 */ 2424 static void 2425 fabricate_brandstr(struct cpuid_info *cpi) 2426 { 2427 const char *brand = NULL; 2428 2429 switch (cpi->cpi_vendor) { 2430 case X86_VENDOR_Intel: 2431 brand = intel_cpubrand(cpi); 2432 break; 2433 case X86_VENDOR_AMD: 2434 brand = amd_cpubrand(cpi); 2435 break; 2436 case X86_VENDOR_Cyrix: 2437 brand = cyrix_cpubrand(cpi, x86_type); 2438 break; 2439 case X86_VENDOR_NexGen: 2440 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2441 brand = "NexGen Nx586"; 2442 break; 2443 case X86_VENDOR_Centaur: 2444 if (cpi->cpi_family == 5) 2445 switch (cpi->cpi_model) { 2446 case 4: 2447 brand = "Centaur C6"; 2448 break; 2449 case 8: 2450 brand = "Centaur C2"; 2451 break; 2452 case 9: 2453 brand = "Centaur C3"; 2454 break; 2455 default: 2456 break; 2457 } 2458 break; 2459 case X86_VENDOR_Rise: 2460 if (cpi->cpi_family == 5 && 2461 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 2462 brand = "Rise mP6"; 2463 break; 2464 case X86_VENDOR_SiS: 2465 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2466 brand = "SiS 55x"; 2467 break; 2468 case X86_VENDOR_TM: 2469 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 2470 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 2471 break; 2472 case X86_VENDOR_NSC: 2473 case X86_VENDOR_UMC: 2474 default: 2475 break; 2476 } 2477 if (brand) { 2478 (void) strcpy((char *)cpi->cpi_brandstr, brand); 2479 return; 2480 } 2481 2482 /* 2483 * If all else fails ... 2484 */ 2485 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 2486 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 2487 cpi->cpi_model, cpi->cpi_step); 2488 } 2489 2490 /* 2491 * This routine is called just after kernel memory allocation 2492 * becomes available on cpu0, and as part of mp_startup() on 2493 * the other cpus. 2494 * 2495 * Fixup the brand string, and collect any information from cpuid 2496 * that requires dynamically allocated storage to represent. 2497 */ 2498 /*ARGSUSED*/ 2499 void 2500 cpuid_pass3(cpu_t *cpu) 2501 { 2502 int i, max, shft, level, size; 2503 struct cpuid_regs regs; 2504 struct cpuid_regs *cp; 2505 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2506 2507 ASSERT(cpi->cpi_pass == 2); 2508 2509 /* 2510 * Function 4: Deterministic cache parameters 2511 * 2512 * Take this opportunity to detect the number of threads 2513 * sharing the last level cache, and construct a corresponding 2514 * cache id. The respective cpuid_info members are initialized 2515 * to the default case of "no last level cache sharing". 2516 */ 2517 cpi->cpi_ncpu_shr_last_cache = 1; 2518 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 2519 2520 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 2521 2522 /* 2523 * Find the # of elements (size) returned by fn 4, and along 2524 * the way detect last level cache sharing details. 2525 */ 2526 bzero(®s, sizeof (regs)); 2527 cp = ®s; 2528 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 2529 cp->cp_eax = 4; 2530 cp->cp_ecx = i; 2531 2532 (void) __cpuid_insn(cp); 2533 2534 if (CPI_CACHE_TYPE(cp) == 0) 2535 break; 2536 level = CPI_CACHE_LVL(cp); 2537 if (level > max) { 2538 max = level; 2539 cpi->cpi_ncpu_shr_last_cache = 2540 CPI_NTHR_SHR_CACHE(cp) + 1; 2541 } 2542 } 2543 cpi->cpi_std_4_size = size = i; 2544 2545 /* 2546 * Allocate the cpi_std_4 array. The first element 2547 * references the regs for fn 4, %ecx == 0, which 2548 * cpuid_pass2() stashed in cpi->cpi_std[4]. 2549 */ 2550 if (size > 0) { 2551 cpi->cpi_std_4 = 2552 kmem_alloc(size * sizeof (cp), KM_SLEEP); 2553 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 2554 2555 /* 2556 * Allocate storage to hold the additional regs 2557 * for function 4, %ecx == 1 .. cpi_std_4_size. 2558 * 2559 * The regs for fn 4, %ecx == 0 has already 2560 * been allocated as indicated above. 2561 */ 2562 for (i = 1; i < size; i++) { 2563 cp = cpi->cpi_std_4[i] = 2564 kmem_zalloc(sizeof (regs), KM_SLEEP); 2565 cp->cp_eax = 4; 2566 cp->cp_ecx = i; 2567 2568 (void) __cpuid_insn(cp); 2569 } 2570 } 2571 /* 2572 * Determine the number of bits needed to represent 2573 * the number of CPUs sharing the last level cache. 2574 * 2575 * Shift off that number of bits from the APIC id to 2576 * derive the cache id. 2577 */ 2578 shft = 0; 2579 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 2580 shft++; 2581 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 2582 } 2583 2584 /* 2585 * Now fixup the brand string 2586 */ 2587 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2588 fabricate_brandstr(cpi); 2589 } else { 2590 2591 /* 2592 * If we successfully extracted a brand string from the cpuid 2593 * instruction, clean it up by removing leading spaces and 2594 * similar junk. 2595 */ 2596 if (cpi->cpi_brandstr[0]) { 2597 size_t maxlen = sizeof (cpi->cpi_brandstr); 2598 char *src, *dst; 2599 2600 dst = src = (char *)cpi->cpi_brandstr; 2601 src[maxlen - 1] = '\0'; 2602 /* 2603 * strip leading spaces 2604 */ 2605 while (*src == ' ') 2606 src++; 2607 /* 2608 * Remove any 'Genuine' or "Authentic" prefixes 2609 */ 2610 if (strncmp(src, "Genuine ", 8) == 0) 2611 src += 8; 2612 if (strncmp(src, "Authentic ", 10) == 0) 2613 src += 10; 2614 2615 /* 2616 * Now do an in-place copy. 2617 * Map (R) to (r) and (TM) to (tm). 2618 * The era of teletypes is long gone, and there's 2619 * -really- no need to shout. 2620 */ 2621 while (*src != '\0') { 2622 if (src[0] == '(') { 2623 if (strncmp(src + 1, "R)", 2) == 0) { 2624 (void) strncpy(dst, "(r)", 3); 2625 src += 3; 2626 dst += 3; 2627 continue; 2628 } 2629 if (strncmp(src + 1, "TM)", 3) == 0) { 2630 (void) strncpy(dst, "(tm)", 4); 2631 src += 4; 2632 dst += 4; 2633 continue; 2634 } 2635 } 2636 *dst++ = *src++; 2637 } 2638 *dst = '\0'; 2639 2640 /* 2641 * Finally, remove any trailing spaces 2642 */ 2643 while (--dst > cpi->cpi_brandstr) 2644 if (*dst == ' ') 2645 *dst = '\0'; 2646 else 2647 break; 2648 } else 2649 fabricate_brandstr(cpi); 2650 } 2651 cpi->cpi_pass = 3; 2652 } 2653 2654 /* 2655 * This routine is called out of bind_hwcap() much later in the life 2656 * of the kernel (post_startup()). The job of this routine is to resolve 2657 * the hardware feature support and kernel support for those features into 2658 * what we're actually going to tell applications via the aux vector. 2659 */ 2660 void 2661 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out) 2662 { 2663 struct cpuid_info *cpi; 2664 uint_t hwcap_flags = 0, hwcap_flags_2 = 0; 2665 2666 if (cpu == NULL) 2667 cpu = CPU; 2668 cpi = cpu->cpu_m.mcpu_cpi; 2669 2670 ASSERT(cpi->cpi_pass == 3); 2671 2672 if (cpi->cpi_maxeax >= 1) { 2673 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2674 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2675 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES]; 2676 2677 *edx = CPI_FEATURES_EDX(cpi); 2678 *ecx = CPI_FEATURES_ECX(cpi); 2679 *ebx = CPI_FEATURES_7_0_EBX(cpi); 2680 2681 /* 2682 * [these require explicit kernel support] 2683 */ 2684 if (!is_x86_feature(x86_featureset, X86FSET_SEP)) 2685 *edx &= ~CPUID_INTC_EDX_SEP; 2686 2687 if (!is_x86_feature(x86_featureset, X86FSET_SSE)) 2688 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2689 if (!is_x86_feature(x86_featureset, X86FSET_SSE2)) 2690 *edx &= ~CPUID_INTC_EDX_SSE2; 2691 2692 if (!is_x86_feature(x86_featureset, X86FSET_HTT)) 2693 *edx &= ~CPUID_INTC_EDX_HTT; 2694 2695 if (!is_x86_feature(x86_featureset, X86FSET_SSE3)) 2696 *ecx &= ~CPUID_INTC_ECX_SSE3; 2697 2698 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3)) 2699 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2700 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1)) 2701 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2702 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2)) 2703 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2704 if (!is_x86_feature(x86_featureset, X86FSET_AES)) 2705 *ecx &= ~CPUID_INTC_ECX_AES; 2706 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ)) 2707 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ; 2708 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE)) 2709 *ecx &= ~(CPUID_INTC_ECX_XSAVE | 2710 CPUID_INTC_ECX_OSXSAVE); 2711 if (!is_x86_feature(x86_featureset, X86FSET_AVX)) 2712 *ecx &= ~CPUID_INTC_ECX_AVX; 2713 if (!is_x86_feature(x86_featureset, X86FSET_F16C)) 2714 *ecx &= ~CPUID_INTC_ECX_F16C; 2715 if (!is_x86_feature(x86_featureset, X86FSET_FMA)) 2716 *ecx &= ~CPUID_INTC_ECX_FMA; 2717 if (!is_x86_feature(x86_featureset, X86FSET_BMI1)) 2718 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1; 2719 if (!is_x86_feature(x86_featureset, X86FSET_BMI2)) 2720 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2; 2721 if (!is_x86_feature(x86_featureset, X86FSET_AVX2)) 2722 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2; 2723 2724 /* 2725 * [no explicit support required beyond x87 fp context] 2726 */ 2727 if (!fpu_exists) 2728 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2729 2730 /* 2731 * Now map the supported feature vector to things that we 2732 * think userland will care about. 2733 */ 2734 if (*edx & CPUID_INTC_EDX_SEP) 2735 hwcap_flags |= AV_386_SEP; 2736 if (*edx & CPUID_INTC_EDX_SSE) 2737 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2738 if (*edx & CPUID_INTC_EDX_SSE2) 2739 hwcap_flags |= AV_386_SSE2; 2740 if (*ecx & CPUID_INTC_ECX_SSE3) 2741 hwcap_flags |= AV_386_SSE3; 2742 if (*ecx & CPUID_INTC_ECX_SSSE3) 2743 hwcap_flags |= AV_386_SSSE3; 2744 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2745 hwcap_flags |= AV_386_SSE4_1; 2746 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2747 hwcap_flags |= AV_386_SSE4_2; 2748 if (*ecx & CPUID_INTC_ECX_MOVBE) 2749 hwcap_flags |= AV_386_MOVBE; 2750 if (*ecx & CPUID_INTC_ECX_AES) 2751 hwcap_flags |= AV_386_AES; 2752 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2753 hwcap_flags |= AV_386_PCLMULQDQ; 2754 if ((*ecx & CPUID_INTC_ECX_XSAVE) && 2755 (*ecx & CPUID_INTC_ECX_OSXSAVE)) { 2756 hwcap_flags |= AV_386_XSAVE; 2757 2758 if (*ecx & CPUID_INTC_ECX_AVX) { 2759 hwcap_flags |= AV_386_AVX; 2760 if (*ecx & CPUID_INTC_ECX_F16C) 2761 hwcap_flags_2 |= AV_386_2_F16C; 2762 if (*ecx & CPUID_INTC_ECX_FMA) 2763 hwcap_flags_2 |= AV_386_2_FMA; 2764 if (*ebx & CPUID_INTC_EBX_7_0_BMI1) 2765 hwcap_flags_2 |= AV_386_2_BMI1; 2766 if (*ebx & CPUID_INTC_EBX_7_0_BMI2) 2767 hwcap_flags_2 |= AV_386_2_BMI2; 2768 if (*ebx & CPUID_INTC_EBX_7_0_AVX2) 2769 hwcap_flags_2 |= AV_386_2_AVX2; 2770 } 2771 } 2772 if (*ecx & CPUID_INTC_ECX_VMX) 2773 hwcap_flags |= AV_386_VMX; 2774 if (*ecx & CPUID_INTC_ECX_POPCNT) 2775 hwcap_flags |= AV_386_POPCNT; 2776 if (*edx & CPUID_INTC_EDX_FPU) 2777 hwcap_flags |= AV_386_FPU; 2778 if (*edx & CPUID_INTC_EDX_MMX) 2779 hwcap_flags |= AV_386_MMX; 2780 2781 if (*edx & CPUID_INTC_EDX_TSC) 2782 hwcap_flags |= AV_386_TSC; 2783 if (*edx & CPUID_INTC_EDX_CX8) 2784 hwcap_flags |= AV_386_CX8; 2785 if (*edx & CPUID_INTC_EDX_CMOV) 2786 hwcap_flags |= AV_386_CMOV; 2787 if (*ecx & CPUID_INTC_ECX_CX16) 2788 hwcap_flags |= AV_386_CX16; 2789 2790 if (*ecx & CPUID_INTC_ECX_RDRAND) 2791 hwcap_flags_2 |= AV_386_2_RDRAND; 2792 } 2793 2794 if (cpi->cpi_xmaxeax < 0x80000001) 2795 goto pass4_done; 2796 2797 switch (cpi->cpi_vendor) { 2798 struct cpuid_regs cp; 2799 uint32_t *edx, *ecx; 2800 2801 case X86_VENDOR_Intel: 2802 /* 2803 * Seems like Intel duplicated what we necessary 2804 * here to make the initial crop of 64-bit OS's work. 2805 * Hopefully, those are the only "extended" bits 2806 * they'll add. 2807 */ 2808 /*FALLTHROUGH*/ 2809 2810 case X86_VENDOR_AMD: 2811 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2812 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2813 2814 *edx = CPI_FEATURES_XTD_EDX(cpi); 2815 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2816 2817 /* 2818 * [these features require explicit kernel support] 2819 */ 2820 switch (cpi->cpi_vendor) { 2821 case X86_VENDOR_Intel: 2822 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 2823 *edx &= ~CPUID_AMD_EDX_TSCP; 2824 break; 2825 2826 case X86_VENDOR_AMD: 2827 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 2828 *edx &= ~CPUID_AMD_EDX_TSCP; 2829 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A)) 2830 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2831 break; 2832 2833 default: 2834 break; 2835 } 2836 2837 /* 2838 * [no explicit support required beyond 2839 * x87 fp context and exception handlers] 2840 */ 2841 if (!fpu_exists) 2842 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2843 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2844 2845 if (!is_x86_feature(x86_featureset, X86FSET_NX)) 2846 *edx &= ~CPUID_AMD_EDX_NX; 2847 #if !defined(__amd64) 2848 *edx &= ~CPUID_AMD_EDX_LM; 2849 #endif 2850 /* 2851 * Now map the supported feature vector to 2852 * things that we think userland will care about. 2853 */ 2854 #if defined(__amd64) 2855 if (*edx & CPUID_AMD_EDX_SYSC) 2856 hwcap_flags |= AV_386_AMD_SYSC; 2857 #endif 2858 if (*edx & CPUID_AMD_EDX_MMXamd) 2859 hwcap_flags |= AV_386_AMD_MMX; 2860 if (*edx & CPUID_AMD_EDX_3DNow) 2861 hwcap_flags |= AV_386_AMD_3DNow; 2862 if (*edx & CPUID_AMD_EDX_3DNowx) 2863 hwcap_flags |= AV_386_AMD_3DNowx; 2864 if (*ecx & CPUID_AMD_ECX_SVM) 2865 hwcap_flags |= AV_386_AMD_SVM; 2866 2867 switch (cpi->cpi_vendor) { 2868 case X86_VENDOR_AMD: 2869 if (*edx & CPUID_AMD_EDX_TSCP) 2870 hwcap_flags |= AV_386_TSCP; 2871 if (*ecx & CPUID_AMD_ECX_AHF64) 2872 hwcap_flags |= AV_386_AHF; 2873 if (*ecx & CPUID_AMD_ECX_SSE4A) 2874 hwcap_flags |= AV_386_AMD_SSE4A; 2875 if (*ecx & CPUID_AMD_ECX_LZCNT) 2876 hwcap_flags |= AV_386_AMD_LZCNT; 2877 break; 2878 2879 case X86_VENDOR_Intel: 2880 if (*edx & CPUID_AMD_EDX_TSCP) 2881 hwcap_flags |= AV_386_TSCP; 2882 /* 2883 * Aarrgh. 2884 * Intel uses a different bit in the same word. 2885 */ 2886 if (*ecx & CPUID_INTC_ECX_AHF64) 2887 hwcap_flags |= AV_386_AHF; 2888 break; 2889 2890 default: 2891 break; 2892 } 2893 break; 2894 2895 case X86_VENDOR_TM: 2896 cp.cp_eax = 0x80860001; 2897 (void) __cpuid_insn(&cp); 2898 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2899 break; 2900 2901 default: 2902 break; 2903 } 2904 2905 pass4_done: 2906 cpi->cpi_pass = 4; 2907 if (hwcap_out != NULL) { 2908 hwcap_out[0] = hwcap_flags; 2909 hwcap_out[1] = hwcap_flags_2; 2910 } 2911 } 2912 2913 2914 /* 2915 * Simulate the cpuid instruction using the data we previously 2916 * captured about this CPU. We try our best to return the truth 2917 * about the hardware, independently of kernel support. 2918 */ 2919 uint32_t 2920 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2921 { 2922 struct cpuid_info *cpi; 2923 struct cpuid_regs *xcp; 2924 2925 if (cpu == NULL) 2926 cpu = CPU; 2927 cpi = cpu->cpu_m.mcpu_cpi; 2928 2929 ASSERT(cpuid_checkpass(cpu, 3)); 2930 2931 /* 2932 * CPUID data is cached in two separate places: cpi_std for standard 2933 * CPUID functions, and cpi_extd for extended CPUID functions. 2934 */ 2935 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2936 xcp = &cpi->cpi_std[cp->cp_eax]; 2937 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2938 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2939 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2940 else 2941 /* 2942 * The caller is asking for data from an input parameter which 2943 * the kernel has not cached. In this case we go fetch from 2944 * the hardware and return the data directly to the user. 2945 */ 2946 return (__cpuid_insn(cp)); 2947 2948 cp->cp_eax = xcp->cp_eax; 2949 cp->cp_ebx = xcp->cp_ebx; 2950 cp->cp_ecx = xcp->cp_ecx; 2951 cp->cp_edx = xcp->cp_edx; 2952 return (cp->cp_eax); 2953 } 2954 2955 int 2956 cpuid_checkpass(cpu_t *cpu, int pass) 2957 { 2958 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2959 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2960 } 2961 2962 int 2963 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2964 { 2965 ASSERT(cpuid_checkpass(cpu, 3)); 2966 2967 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2968 } 2969 2970 int 2971 cpuid_is_cmt(cpu_t *cpu) 2972 { 2973 if (cpu == NULL) 2974 cpu = CPU; 2975 2976 ASSERT(cpuid_checkpass(cpu, 1)); 2977 2978 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2979 } 2980 2981 /* 2982 * AMD and Intel both implement the 64-bit variant of the syscall 2983 * instruction (syscallq), so if there's -any- support for syscall, 2984 * cpuid currently says "yes, we support this". 2985 * 2986 * However, Intel decided to -not- implement the 32-bit variant of the 2987 * syscall instruction, so we provide a predicate to allow our caller 2988 * to test that subtlety here. 2989 * 2990 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2991 * even in the case where the hardware would in fact support it. 2992 */ 2993 /*ARGSUSED*/ 2994 int 2995 cpuid_syscall32_insn(cpu_t *cpu) 2996 { 2997 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2998 2999 #if !defined(__xpv) 3000 if (cpu == NULL) 3001 cpu = CPU; 3002 3003 /*CSTYLED*/ 3004 { 3005 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3006 3007 if (cpi->cpi_vendor == X86_VENDOR_AMD && 3008 cpi->cpi_xmaxeax >= 0x80000001 && 3009 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 3010 return (1); 3011 } 3012 #endif 3013 return (0); 3014 } 3015 3016 int 3017 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 3018 { 3019 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3020 3021 static const char fmt[] = 3022 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 3023 static const char fmt_ht[] = 3024 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 3025 3026 ASSERT(cpuid_checkpass(cpu, 1)); 3027 3028 if (cpuid_is_cmt(cpu)) 3029 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 3030 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 3031 cpi->cpi_family, cpi->cpi_model, 3032 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 3033 return (snprintf(s, n, fmt, 3034 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 3035 cpi->cpi_family, cpi->cpi_model, 3036 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 3037 } 3038 3039 const char * 3040 cpuid_getvendorstr(cpu_t *cpu) 3041 { 3042 ASSERT(cpuid_checkpass(cpu, 1)); 3043 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 3044 } 3045 3046 uint_t 3047 cpuid_getvendor(cpu_t *cpu) 3048 { 3049 ASSERT(cpuid_checkpass(cpu, 1)); 3050 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 3051 } 3052 3053 uint_t 3054 cpuid_getfamily(cpu_t *cpu) 3055 { 3056 ASSERT(cpuid_checkpass(cpu, 1)); 3057 return (cpu->cpu_m.mcpu_cpi->cpi_family); 3058 } 3059 3060 uint_t 3061 cpuid_getmodel(cpu_t *cpu) 3062 { 3063 ASSERT(cpuid_checkpass(cpu, 1)); 3064 return (cpu->cpu_m.mcpu_cpi->cpi_model); 3065 } 3066 3067 uint_t 3068 cpuid_get_ncpu_per_chip(cpu_t *cpu) 3069 { 3070 ASSERT(cpuid_checkpass(cpu, 1)); 3071 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 3072 } 3073 3074 uint_t 3075 cpuid_get_ncore_per_chip(cpu_t *cpu) 3076 { 3077 ASSERT(cpuid_checkpass(cpu, 1)); 3078 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 3079 } 3080 3081 uint_t 3082 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 3083 { 3084 ASSERT(cpuid_checkpass(cpu, 2)); 3085 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 3086 } 3087 3088 id_t 3089 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 3090 { 3091 ASSERT(cpuid_checkpass(cpu, 2)); 3092 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 3093 } 3094 3095 uint_t 3096 cpuid_getstep(cpu_t *cpu) 3097 { 3098 ASSERT(cpuid_checkpass(cpu, 1)); 3099 return (cpu->cpu_m.mcpu_cpi->cpi_step); 3100 } 3101 3102 uint_t 3103 cpuid_getsig(struct cpu *cpu) 3104 { 3105 ASSERT(cpuid_checkpass(cpu, 1)); 3106 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 3107 } 3108 3109 uint32_t 3110 cpuid_getchiprev(struct cpu *cpu) 3111 { 3112 ASSERT(cpuid_checkpass(cpu, 1)); 3113 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 3114 } 3115 3116 const char * 3117 cpuid_getchiprevstr(struct cpu *cpu) 3118 { 3119 ASSERT(cpuid_checkpass(cpu, 1)); 3120 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 3121 } 3122 3123 uint32_t 3124 cpuid_getsockettype(struct cpu *cpu) 3125 { 3126 ASSERT(cpuid_checkpass(cpu, 1)); 3127 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 3128 } 3129 3130 const char * 3131 cpuid_getsocketstr(cpu_t *cpu) 3132 { 3133 static const char *socketstr = NULL; 3134 struct cpuid_info *cpi; 3135 3136 ASSERT(cpuid_checkpass(cpu, 1)); 3137 cpi = cpu->cpu_m.mcpu_cpi; 3138 3139 /* Assume that socket types are the same across the system */ 3140 if (socketstr == NULL) 3141 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 3142 cpi->cpi_model, cpi->cpi_step); 3143 3144 3145 return (socketstr); 3146 } 3147 3148 int 3149 cpuid_get_chipid(cpu_t *cpu) 3150 { 3151 ASSERT(cpuid_checkpass(cpu, 1)); 3152 3153 if (cpuid_is_cmt(cpu)) 3154 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 3155 return (cpu->cpu_id); 3156 } 3157 3158 id_t 3159 cpuid_get_coreid(cpu_t *cpu) 3160 { 3161 ASSERT(cpuid_checkpass(cpu, 1)); 3162 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 3163 } 3164 3165 int 3166 cpuid_get_pkgcoreid(cpu_t *cpu) 3167 { 3168 ASSERT(cpuid_checkpass(cpu, 1)); 3169 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 3170 } 3171 3172 int 3173 cpuid_get_clogid(cpu_t *cpu) 3174 { 3175 ASSERT(cpuid_checkpass(cpu, 1)); 3176 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 3177 } 3178 3179 int 3180 cpuid_get_cacheid(cpu_t *cpu) 3181 { 3182 ASSERT(cpuid_checkpass(cpu, 1)); 3183 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 3184 } 3185 3186 uint_t 3187 cpuid_get_procnodeid(cpu_t *cpu) 3188 { 3189 ASSERT(cpuid_checkpass(cpu, 1)); 3190 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid); 3191 } 3192 3193 uint_t 3194 cpuid_get_procnodes_per_pkg(cpu_t *cpu) 3195 { 3196 ASSERT(cpuid_checkpass(cpu, 1)); 3197 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); 3198 } 3199 3200 uint_t 3201 cpuid_get_compunitid(cpu_t *cpu) 3202 { 3203 ASSERT(cpuid_checkpass(cpu, 1)); 3204 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid); 3205 } 3206 3207 uint_t 3208 cpuid_get_cores_per_compunit(cpu_t *cpu) 3209 { 3210 ASSERT(cpuid_checkpass(cpu, 1)); 3211 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit); 3212 } 3213 3214 /*ARGSUSED*/ 3215 int 3216 cpuid_have_cr8access(cpu_t *cpu) 3217 { 3218 #if defined(__amd64) 3219 return (1); 3220 #else 3221 struct cpuid_info *cpi; 3222 3223 ASSERT(cpu != NULL); 3224 cpi = cpu->cpu_m.mcpu_cpi; 3225 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 3226 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 3227 return (1); 3228 return (0); 3229 #endif 3230 } 3231 3232 uint32_t 3233 cpuid_get_apicid(cpu_t *cpu) 3234 { 3235 ASSERT(cpuid_checkpass(cpu, 1)); 3236 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 3237 return (UINT32_MAX); 3238 } else { 3239 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 3240 } 3241 } 3242 3243 void 3244 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 3245 { 3246 struct cpuid_info *cpi; 3247 3248 if (cpu == NULL) 3249 cpu = CPU; 3250 cpi = cpu->cpu_m.mcpu_cpi; 3251 3252 ASSERT(cpuid_checkpass(cpu, 1)); 3253 3254 if (pabits) 3255 *pabits = cpi->cpi_pabits; 3256 if (vabits) 3257 *vabits = cpi->cpi_vabits; 3258 } 3259 3260 /* 3261 * Returns the number of data TLB entries for a corresponding 3262 * pagesize. If it can't be computed, or isn't known, the 3263 * routine returns zero. If you ask about an architecturally 3264 * impossible pagesize, the routine will panic (so that the 3265 * hat implementor knows that things are inconsistent.) 3266 */ 3267 uint_t 3268 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 3269 { 3270 struct cpuid_info *cpi; 3271 uint_t dtlb_nent = 0; 3272 3273 if (cpu == NULL) 3274 cpu = CPU; 3275 cpi = cpu->cpu_m.mcpu_cpi; 3276 3277 ASSERT(cpuid_checkpass(cpu, 1)); 3278 3279 /* 3280 * Check the L2 TLB info 3281 */ 3282 if (cpi->cpi_xmaxeax >= 0x80000006) { 3283 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 3284 3285 switch (pagesize) { 3286 3287 case 4 * 1024: 3288 /* 3289 * All zero in the top 16 bits of the register 3290 * indicates a unified TLB. Size is in low 16 bits. 3291 */ 3292 if ((cp->cp_ebx & 0xffff0000) == 0) 3293 dtlb_nent = cp->cp_ebx & 0x0000ffff; 3294 else 3295 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 3296 break; 3297 3298 case 2 * 1024 * 1024: 3299 if ((cp->cp_eax & 0xffff0000) == 0) 3300 dtlb_nent = cp->cp_eax & 0x0000ffff; 3301 else 3302 dtlb_nent = BITX(cp->cp_eax, 27, 16); 3303 break; 3304 3305 default: 3306 panic("unknown L2 pagesize"); 3307 /*NOTREACHED*/ 3308 } 3309 } 3310 3311 if (dtlb_nent != 0) 3312 return (dtlb_nent); 3313 3314 /* 3315 * No L2 TLB support for this size, try L1. 3316 */ 3317 if (cpi->cpi_xmaxeax >= 0x80000005) { 3318 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 3319 3320 switch (pagesize) { 3321 case 4 * 1024: 3322 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 3323 break; 3324 case 2 * 1024 * 1024: 3325 dtlb_nent = BITX(cp->cp_eax, 23, 16); 3326 break; 3327 default: 3328 panic("unknown L1 d-TLB pagesize"); 3329 /*NOTREACHED*/ 3330 } 3331 } 3332 3333 return (dtlb_nent); 3334 } 3335 3336 /* 3337 * Return 0 if the erratum is not present or not applicable, positive 3338 * if it is, and negative if the status of the erratum is unknown. 3339 * 3340 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 3341 * Processors" #25759, Rev 3.57, August 2005 3342 */ 3343 int 3344 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 3345 { 3346 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3347 uint_t eax; 3348 3349 /* 3350 * Bail out if this CPU isn't an AMD CPU, or if it's 3351 * a legacy (32-bit) AMD CPU. 3352 */ 3353 if (cpi->cpi_vendor != X86_VENDOR_AMD || 3354 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 3355 cpi->cpi_family == 6) 3356 3357 return (0); 3358 3359 eax = cpi->cpi_std[1].cp_eax; 3360 3361 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 3362 #define SH_B3(eax) (eax == 0xf51) 3363 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 3364 3365 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 3366 3367 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 3368 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 3369 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 3370 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 3371 3372 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 3373 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 3374 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 3375 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 3376 3377 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 3378 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 3379 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 3380 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 3381 #define BH_E4(eax) (eax == 0x20fb1) 3382 #define SH_E5(eax) (eax == 0x20f42) 3383 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 3384 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 3385 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 3386 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 3387 DH_E6(eax) || JH_E6(eax)) 3388 3389 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 3390 #define DR_B0(eax) (eax == 0x100f20) 3391 #define DR_B1(eax) (eax == 0x100f21) 3392 #define DR_BA(eax) (eax == 0x100f2a) 3393 #define DR_B2(eax) (eax == 0x100f22) 3394 #define DR_B3(eax) (eax == 0x100f23) 3395 #define RB_C0(eax) (eax == 0x100f40) 3396 3397 switch (erratum) { 3398 case 1: 3399 return (cpi->cpi_family < 0x10); 3400 case 51: /* what does the asterisk mean? */ 3401 return (B(eax) || SH_C0(eax) || CG(eax)); 3402 case 52: 3403 return (B(eax)); 3404 case 57: 3405 return (cpi->cpi_family <= 0x11); 3406 case 58: 3407 return (B(eax)); 3408 case 60: 3409 return (cpi->cpi_family <= 0x11); 3410 case 61: 3411 case 62: 3412 case 63: 3413 case 64: 3414 case 65: 3415 case 66: 3416 case 68: 3417 case 69: 3418 case 70: 3419 case 71: 3420 return (B(eax)); 3421 case 72: 3422 return (SH_B0(eax)); 3423 case 74: 3424 return (B(eax)); 3425 case 75: 3426 return (cpi->cpi_family < 0x10); 3427 case 76: 3428 return (B(eax)); 3429 case 77: 3430 return (cpi->cpi_family <= 0x11); 3431 case 78: 3432 return (B(eax) || SH_C0(eax)); 3433 case 79: 3434 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3435 case 80: 3436 case 81: 3437 case 82: 3438 return (B(eax)); 3439 case 83: 3440 return (B(eax) || SH_C0(eax) || CG(eax)); 3441 case 85: 3442 return (cpi->cpi_family < 0x10); 3443 case 86: 3444 return (SH_C0(eax) || CG(eax)); 3445 case 88: 3446 #if !defined(__amd64) 3447 return (0); 3448 #else 3449 return (B(eax) || SH_C0(eax)); 3450 #endif 3451 case 89: 3452 return (cpi->cpi_family < 0x10); 3453 case 90: 3454 return (B(eax) || SH_C0(eax) || CG(eax)); 3455 case 91: 3456 case 92: 3457 return (B(eax) || SH_C0(eax)); 3458 case 93: 3459 return (SH_C0(eax)); 3460 case 94: 3461 return (B(eax) || SH_C0(eax) || CG(eax)); 3462 case 95: 3463 #if !defined(__amd64) 3464 return (0); 3465 #else 3466 return (B(eax) || SH_C0(eax)); 3467 #endif 3468 case 96: 3469 return (B(eax) || SH_C0(eax) || CG(eax)); 3470 case 97: 3471 case 98: 3472 return (SH_C0(eax) || CG(eax)); 3473 case 99: 3474 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3475 case 100: 3476 return (B(eax) || SH_C0(eax)); 3477 case 101: 3478 case 103: 3479 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3480 case 104: 3481 return (SH_C0(eax) || CG(eax) || D0(eax)); 3482 case 105: 3483 case 106: 3484 case 107: 3485 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3486 case 108: 3487 return (DH_CG(eax)); 3488 case 109: 3489 return (SH_C0(eax) || CG(eax) || D0(eax)); 3490 case 110: 3491 return (D0(eax) || EX(eax)); 3492 case 111: 3493 return (CG(eax)); 3494 case 112: 3495 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3496 case 113: 3497 return (eax == 0x20fc0); 3498 case 114: 3499 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 3500 case 115: 3501 return (SH_E0(eax) || JH_E1(eax)); 3502 case 116: 3503 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 3504 case 117: 3505 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3506 case 118: 3507 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 3508 JH_E6(eax)); 3509 case 121: 3510 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3511 case 122: 3512 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 3513 case 123: 3514 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 3515 case 131: 3516 return (cpi->cpi_family < 0x10); 3517 case 6336786: 3518 /* 3519 * Test for AdvPowerMgmtInfo.TscPStateInvariant 3520 * if this is a K8 family or newer processor 3521 */ 3522 if (CPI_FAMILY(cpi) == 0xf) { 3523 struct cpuid_regs regs; 3524 regs.cp_eax = 0x80000007; 3525 (void) __cpuid_insn(®s); 3526 return (!(regs.cp_edx & 0x100)); 3527 } 3528 return (0); 3529 case 6323525: 3530 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 3531 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 3532 3533 case 6671130: 3534 /* 3535 * check for processors (pre-Shanghai) that do not provide 3536 * optimal management of 1gb ptes in its tlb. 3537 */ 3538 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 3539 3540 case 298: 3541 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 3542 DR_B2(eax) || RB_C0(eax)); 3543 3544 case 721: 3545 #if defined(__amd64) 3546 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12); 3547 #else 3548 return (0); 3549 #endif 3550 3551 default: 3552 return (-1); 3553 3554 } 3555 } 3556 3557 /* 3558 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 3559 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 3560 */ 3561 int 3562 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 3563 { 3564 struct cpuid_info *cpi; 3565 uint_t osvwid; 3566 static int osvwfeature = -1; 3567 uint64_t osvwlength; 3568 3569 3570 cpi = cpu->cpu_m.mcpu_cpi; 3571 3572 /* confirm OSVW supported */ 3573 if (osvwfeature == -1) { 3574 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 3575 } else { 3576 /* assert that osvw feature setting is consistent on all cpus */ 3577 ASSERT(osvwfeature == 3578 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 3579 } 3580 if (!osvwfeature) 3581 return (-1); 3582 3583 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 3584 3585 switch (erratum) { 3586 case 298: /* osvwid is 0 */ 3587 osvwid = 0; 3588 if (osvwlength <= (uint64_t)osvwid) { 3589 /* osvwid 0 is unknown */ 3590 return (-1); 3591 } 3592 3593 /* 3594 * Check the OSVW STATUS MSR to determine the state 3595 * of the erratum where: 3596 * 0 - fixed by HW 3597 * 1 - BIOS has applied the workaround when BIOS 3598 * workaround is available. (Or for other errata, 3599 * OS workaround is required.) 3600 * For a value of 1, caller will confirm that the 3601 * erratum 298 workaround has indeed been applied by BIOS. 3602 * 3603 * A 1 may be set in cpus that have a HW fix 3604 * in a mixed cpu system. Regarding erratum 298: 3605 * In a multiprocessor platform, the workaround above 3606 * should be applied to all processors regardless of 3607 * silicon revision when an affected processor is 3608 * present. 3609 */ 3610 3611 return (rdmsr(MSR_AMD_OSVW_STATUS + 3612 (osvwid / OSVW_ID_CNT_PER_MSR)) & 3613 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 3614 3615 default: 3616 return (-1); 3617 } 3618 } 3619 3620 static const char assoc_str[] = "associativity"; 3621 static const char line_str[] = "line-size"; 3622 static const char size_str[] = "size"; 3623 3624 static void 3625 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 3626 uint32_t val) 3627 { 3628 char buf[128]; 3629 3630 /* 3631 * ndi_prop_update_int() is used because it is desirable for 3632 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 3633 */ 3634 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 3635 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 3636 } 3637 3638 /* 3639 * Intel-style cache/tlb description 3640 * 3641 * Standard cpuid level 2 gives a randomly ordered 3642 * selection of tags that index into a table that describes 3643 * cache and tlb properties. 3644 */ 3645 3646 static const char l1_icache_str[] = "l1-icache"; 3647 static const char l1_dcache_str[] = "l1-dcache"; 3648 static const char l2_cache_str[] = "l2-cache"; 3649 static const char l3_cache_str[] = "l3-cache"; 3650 static const char itlb4k_str[] = "itlb-4K"; 3651 static const char dtlb4k_str[] = "dtlb-4K"; 3652 static const char itlb2M_str[] = "itlb-2M"; 3653 static const char itlb4M_str[] = "itlb-4M"; 3654 static const char dtlb4M_str[] = "dtlb-4M"; 3655 static const char dtlb24_str[] = "dtlb0-2M-4M"; 3656 static const char itlb424_str[] = "itlb-4K-2M-4M"; 3657 static const char itlb24_str[] = "itlb-2M-4M"; 3658 static const char dtlb44_str[] = "dtlb-4K-4M"; 3659 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 3660 static const char sl2_cache_str[] = "sectored-l2-cache"; 3661 static const char itrace_str[] = "itrace-cache"; 3662 static const char sl3_cache_str[] = "sectored-l3-cache"; 3663 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 3664 3665 static const struct cachetab { 3666 uint8_t ct_code; 3667 uint8_t ct_assoc; 3668 uint16_t ct_line_size; 3669 size_t ct_size; 3670 const char *ct_label; 3671 } intel_ctab[] = { 3672 /* 3673 * maintain descending order! 3674 * 3675 * Codes ignored - Reason 3676 * ---------------------- 3677 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3678 * f0H/f1H - Currently we do not interpret prefetch size by design 3679 */ 3680 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3681 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3682 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3683 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3684 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3685 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3686 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3687 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3688 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3689 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3690 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3691 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3692 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3693 { 0xc0, 4, 0, 8, dtlb44_str }, 3694 { 0xba, 4, 0, 64, dtlb4k_str }, 3695 { 0xb4, 4, 0, 256, dtlb4k_str }, 3696 { 0xb3, 4, 0, 128, dtlb4k_str }, 3697 { 0xb2, 4, 0, 64, itlb4k_str }, 3698 { 0xb0, 4, 0, 128, itlb4k_str }, 3699 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3700 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3701 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3702 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3703 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3704 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3705 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3706 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3707 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3708 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3709 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3710 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3711 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3712 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3713 { 0x73, 8, 0, 64*1024, itrace_str}, 3714 { 0x72, 8, 0, 32*1024, itrace_str}, 3715 { 0x71, 8, 0, 16*1024, itrace_str}, 3716 { 0x70, 8, 0, 12*1024, itrace_str}, 3717 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3718 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3719 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3720 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3721 { 0x5d, 0, 0, 256, dtlb44_str}, 3722 { 0x5c, 0, 0, 128, dtlb44_str}, 3723 { 0x5b, 0, 0, 64, dtlb44_str}, 3724 { 0x5a, 4, 0, 32, dtlb24_str}, 3725 { 0x59, 0, 0, 16, dtlb4k_str}, 3726 { 0x57, 4, 0, 16, dtlb4k_str}, 3727 { 0x56, 4, 0, 16, dtlb4M_str}, 3728 { 0x55, 0, 0, 7, itlb24_str}, 3729 { 0x52, 0, 0, 256, itlb424_str}, 3730 { 0x51, 0, 0, 128, itlb424_str}, 3731 { 0x50, 0, 0, 64, itlb424_str}, 3732 { 0x4f, 0, 0, 32, itlb4k_str}, 3733 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3734 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3735 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3736 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3737 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3738 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3739 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3740 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3741 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3742 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3743 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3744 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3745 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3746 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3747 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3748 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3749 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3750 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3751 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3752 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3753 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3754 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3755 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3756 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3757 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3758 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3759 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3760 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3761 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3762 { 0x0b, 4, 0, 4, itlb4M_str}, 3763 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3764 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3765 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3766 { 0x05, 4, 0, 32, dtlb4M_str}, 3767 { 0x04, 4, 0, 8, dtlb4M_str}, 3768 { 0x03, 4, 0, 64, dtlb4k_str}, 3769 { 0x02, 4, 0, 2, itlb4M_str}, 3770 { 0x01, 4, 0, 32, itlb4k_str}, 3771 { 0 } 3772 }; 3773 3774 static const struct cachetab cyrix_ctab[] = { 3775 { 0x70, 4, 0, 32, "tlb-4K" }, 3776 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3777 { 0 } 3778 }; 3779 3780 /* 3781 * Search a cache table for a matching entry 3782 */ 3783 static const struct cachetab * 3784 find_cacheent(const struct cachetab *ct, uint_t code) 3785 { 3786 if (code != 0) { 3787 for (; ct->ct_code != 0; ct++) 3788 if (ct->ct_code <= code) 3789 break; 3790 if (ct->ct_code == code) 3791 return (ct); 3792 } 3793 return (NULL); 3794 } 3795 3796 /* 3797 * Populate cachetab entry with L2 or L3 cache-information using 3798 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3799 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3800 * information is found. 3801 */ 3802 static int 3803 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3804 { 3805 uint32_t level, i; 3806 int ret = 0; 3807 3808 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3809 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3810 3811 if (level == 2 || level == 3) { 3812 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3813 ct->ct_line_size = 3814 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3815 ct->ct_size = ct->ct_assoc * 3816 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3817 ct->ct_line_size * 3818 (cpi->cpi_std_4[i]->cp_ecx + 1); 3819 3820 if (level == 2) { 3821 ct->ct_label = l2_cache_str; 3822 } else if (level == 3) { 3823 ct->ct_label = l3_cache_str; 3824 } 3825 ret = 1; 3826 } 3827 } 3828 3829 return (ret); 3830 } 3831 3832 /* 3833 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3834 * The walk is terminated if the walker returns non-zero. 3835 */ 3836 static void 3837 intel_walk_cacheinfo(struct cpuid_info *cpi, 3838 void *arg, int (*func)(void *, const struct cachetab *)) 3839 { 3840 const struct cachetab *ct; 3841 struct cachetab des_49_ct, des_b1_ct; 3842 uint8_t *dp; 3843 int i; 3844 3845 if ((dp = cpi->cpi_cacheinfo) == NULL) 3846 return; 3847 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3848 /* 3849 * For overloaded descriptor 0x49 we use cpuid function 4 3850 * if supported by the current processor, to create 3851 * cache information. 3852 * For overloaded descriptor 0xb1 we use X86_PAE flag 3853 * to disambiguate the cache information. 3854 */ 3855 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3856 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3857 ct = &des_49_ct; 3858 } else if (*dp == 0xb1) { 3859 des_b1_ct.ct_code = 0xb1; 3860 des_b1_ct.ct_assoc = 4; 3861 des_b1_ct.ct_line_size = 0; 3862 if (is_x86_feature(x86_featureset, X86FSET_PAE)) { 3863 des_b1_ct.ct_size = 8; 3864 des_b1_ct.ct_label = itlb2M_str; 3865 } else { 3866 des_b1_ct.ct_size = 4; 3867 des_b1_ct.ct_label = itlb4M_str; 3868 } 3869 ct = &des_b1_ct; 3870 } else { 3871 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3872 continue; 3873 } 3874 } 3875 3876 if (func(arg, ct) != 0) { 3877 break; 3878 } 3879 } 3880 } 3881 3882 /* 3883 * (Like the Intel one, except for Cyrix CPUs) 3884 */ 3885 static void 3886 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3887 void *arg, int (*func)(void *, const struct cachetab *)) 3888 { 3889 const struct cachetab *ct; 3890 uint8_t *dp; 3891 int i; 3892 3893 if ((dp = cpi->cpi_cacheinfo) == NULL) 3894 return; 3895 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3896 /* 3897 * Search Cyrix-specific descriptor table first .. 3898 */ 3899 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3900 if (func(arg, ct) != 0) 3901 break; 3902 continue; 3903 } 3904 /* 3905 * .. else fall back to the Intel one 3906 */ 3907 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3908 if (func(arg, ct) != 0) 3909 break; 3910 continue; 3911 } 3912 } 3913 } 3914 3915 /* 3916 * A cacheinfo walker that adds associativity, line-size, and size properties 3917 * to the devinfo node it is passed as an argument. 3918 */ 3919 static int 3920 add_cacheent_props(void *arg, const struct cachetab *ct) 3921 { 3922 dev_info_t *devi = arg; 3923 3924 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3925 if (ct->ct_line_size != 0) 3926 add_cache_prop(devi, ct->ct_label, line_str, 3927 ct->ct_line_size); 3928 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3929 return (0); 3930 } 3931 3932 3933 static const char fully_assoc[] = "fully-associative?"; 3934 3935 /* 3936 * AMD style cache/tlb description 3937 * 3938 * Extended functions 5 and 6 directly describe properties of 3939 * tlbs and various cache levels. 3940 */ 3941 static void 3942 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3943 { 3944 switch (assoc) { 3945 case 0: /* reserved; ignore */ 3946 break; 3947 default: 3948 add_cache_prop(devi, label, assoc_str, assoc); 3949 break; 3950 case 0xff: 3951 add_cache_prop(devi, label, fully_assoc, 1); 3952 break; 3953 } 3954 } 3955 3956 static void 3957 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3958 { 3959 if (size == 0) 3960 return; 3961 add_cache_prop(devi, label, size_str, size); 3962 add_amd_assoc(devi, label, assoc); 3963 } 3964 3965 static void 3966 add_amd_cache(dev_info_t *devi, const char *label, 3967 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3968 { 3969 if (size == 0 || line_size == 0) 3970 return; 3971 add_amd_assoc(devi, label, assoc); 3972 /* 3973 * Most AMD parts have a sectored cache. Multiple cache lines are 3974 * associated with each tag. A sector consists of all cache lines 3975 * associated with a tag. For example, the AMD K6-III has a sector 3976 * size of 2 cache lines per tag. 3977 */ 3978 if (lines_per_tag != 0) 3979 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3980 add_cache_prop(devi, label, line_str, line_size); 3981 add_cache_prop(devi, label, size_str, size * 1024); 3982 } 3983 3984 static void 3985 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3986 { 3987 switch (assoc) { 3988 case 0: /* off */ 3989 break; 3990 case 1: 3991 case 2: 3992 case 4: 3993 add_cache_prop(devi, label, assoc_str, assoc); 3994 break; 3995 case 6: 3996 add_cache_prop(devi, label, assoc_str, 8); 3997 break; 3998 case 8: 3999 add_cache_prop(devi, label, assoc_str, 16); 4000 break; 4001 case 0xf: 4002 add_cache_prop(devi, label, fully_assoc, 1); 4003 break; 4004 default: /* reserved; ignore */ 4005 break; 4006 } 4007 } 4008 4009 static void 4010 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 4011 { 4012 if (size == 0 || assoc == 0) 4013 return; 4014 add_amd_l2_assoc(devi, label, assoc); 4015 add_cache_prop(devi, label, size_str, size); 4016 } 4017 4018 static void 4019 add_amd_l2_cache(dev_info_t *devi, const char *label, 4020 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 4021 { 4022 if (size == 0 || assoc == 0 || line_size == 0) 4023 return; 4024 add_amd_l2_assoc(devi, label, assoc); 4025 if (lines_per_tag != 0) 4026 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 4027 add_cache_prop(devi, label, line_str, line_size); 4028 add_cache_prop(devi, label, size_str, size * 1024); 4029 } 4030 4031 static void 4032 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 4033 { 4034 struct cpuid_regs *cp; 4035 4036 if (cpi->cpi_xmaxeax < 0x80000005) 4037 return; 4038 cp = &cpi->cpi_extd[5]; 4039 4040 /* 4041 * 4M/2M L1 TLB configuration 4042 * 4043 * We report the size for 2M pages because AMD uses two 4044 * TLB entries for one 4M page. 4045 */ 4046 add_amd_tlb(devi, "dtlb-2M", 4047 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 4048 add_amd_tlb(devi, "itlb-2M", 4049 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 4050 4051 /* 4052 * 4K L1 TLB configuration 4053 */ 4054 4055 switch (cpi->cpi_vendor) { 4056 uint_t nentries; 4057 case X86_VENDOR_TM: 4058 if (cpi->cpi_family >= 5) { 4059 /* 4060 * Crusoe processors have 256 TLB entries, but 4061 * cpuid data format constrains them to only 4062 * reporting 255 of them. 4063 */ 4064 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 4065 nentries = 256; 4066 /* 4067 * Crusoe processors also have a unified TLB 4068 */ 4069 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 4070 nentries); 4071 break; 4072 } 4073 /*FALLTHROUGH*/ 4074 default: 4075 add_amd_tlb(devi, itlb4k_str, 4076 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 4077 add_amd_tlb(devi, dtlb4k_str, 4078 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 4079 break; 4080 } 4081 4082 /* 4083 * data L1 cache configuration 4084 */ 4085 4086 add_amd_cache(devi, l1_dcache_str, 4087 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 4088 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 4089 4090 /* 4091 * code L1 cache configuration 4092 */ 4093 4094 add_amd_cache(devi, l1_icache_str, 4095 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 4096 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 4097 4098 if (cpi->cpi_xmaxeax < 0x80000006) 4099 return; 4100 cp = &cpi->cpi_extd[6]; 4101 4102 /* Check for a unified L2 TLB for large pages */ 4103 4104 if (BITX(cp->cp_eax, 31, 16) == 0) 4105 add_amd_l2_tlb(devi, "l2-tlb-2M", 4106 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4107 else { 4108 add_amd_l2_tlb(devi, "l2-dtlb-2M", 4109 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 4110 add_amd_l2_tlb(devi, "l2-itlb-2M", 4111 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4112 } 4113 4114 /* Check for a unified L2 TLB for 4K pages */ 4115 4116 if (BITX(cp->cp_ebx, 31, 16) == 0) { 4117 add_amd_l2_tlb(devi, "l2-tlb-4K", 4118 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4119 } else { 4120 add_amd_l2_tlb(devi, "l2-dtlb-4K", 4121 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 4122 add_amd_l2_tlb(devi, "l2-itlb-4K", 4123 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4124 } 4125 4126 add_amd_l2_cache(devi, l2_cache_str, 4127 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 4128 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 4129 } 4130 4131 /* 4132 * There are two basic ways that the x86 world describes it cache 4133 * and tlb architecture - Intel's way and AMD's way. 4134 * 4135 * Return which flavor of cache architecture we should use 4136 */ 4137 static int 4138 x86_which_cacheinfo(struct cpuid_info *cpi) 4139 { 4140 switch (cpi->cpi_vendor) { 4141 case X86_VENDOR_Intel: 4142 if (cpi->cpi_maxeax >= 2) 4143 return (X86_VENDOR_Intel); 4144 break; 4145 case X86_VENDOR_AMD: 4146 /* 4147 * The K5 model 1 was the first part from AMD that reported 4148 * cache sizes via extended cpuid functions. 4149 */ 4150 if (cpi->cpi_family > 5 || 4151 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 4152 return (X86_VENDOR_AMD); 4153 break; 4154 case X86_VENDOR_TM: 4155 if (cpi->cpi_family >= 5) 4156 return (X86_VENDOR_AMD); 4157 /*FALLTHROUGH*/ 4158 default: 4159 /* 4160 * If they have extended CPU data for 0x80000005 4161 * then we assume they have AMD-format cache 4162 * information. 4163 * 4164 * If not, and the vendor happens to be Cyrix, 4165 * then try our-Cyrix specific handler. 4166 * 4167 * If we're not Cyrix, then assume we're using Intel's 4168 * table-driven format instead. 4169 */ 4170 if (cpi->cpi_xmaxeax >= 0x80000005) 4171 return (X86_VENDOR_AMD); 4172 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 4173 return (X86_VENDOR_Cyrix); 4174 else if (cpi->cpi_maxeax >= 2) 4175 return (X86_VENDOR_Intel); 4176 break; 4177 } 4178 return (-1); 4179 } 4180 4181 void 4182 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 4183 struct cpuid_info *cpi) 4184 { 4185 dev_info_t *cpu_devi; 4186 int create; 4187 4188 cpu_devi = (dev_info_t *)dip; 4189 4190 /* device_type */ 4191 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 4192 "device_type", "cpu"); 4193 4194 /* reg */ 4195 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4196 "reg", cpu_id); 4197 4198 /* cpu-mhz, and clock-frequency */ 4199 if (cpu_freq > 0) { 4200 long long mul; 4201 4202 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4203 "cpu-mhz", cpu_freq); 4204 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 4205 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4206 "clock-frequency", (int)mul); 4207 } 4208 4209 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) { 4210 return; 4211 } 4212 4213 /* vendor-id */ 4214 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 4215 "vendor-id", cpi->cpi_vendorstr); 4216 4217 if (cpi->cpi_maxeax == 0) { 4218 return; 4219 } 4220 4221 /* 4222 * family, model, and step 4223 */ 4224 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4225 "family", CPI_FAMILY(cpi)); 4226 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4227 "cpu-model", CPI_MODEL(cpi)); 4228 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4229 "stepping-id", CPI_STEP(cpi)); 4230 4231 /* type */ 4232 switch (cpi->cpi_vendor) { 4233 case X86_VENDOR_Intel: 4234 create = 1; 4235 break; 4236 default: 4237 create = 0; 4238 break; 4239 } 4240 if (create) 4241 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4242 "type", CPI_TYPE(cpi)); 4243 4244 /* ext-family */ 4245 switch (cpi->cpi_vendor) { 4246 case X86_VENDOR_Intel: 4247 case X86_VENDOR_AMD: 4248 create = cpi->cpi_family >= 0xf; 4249 break; 4250 default: 4251 create = 0; 4252 break; 4253 } 4254 if (create) 4255 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4256 "ext-family", CPI_FAMILY_XTD(cpi)); 4257 4258 /* ext-model */ 4259 switch (cpi->cpi_vendor) { 4260 case X86_VENDOR_Intel: 4261 create = IS_EXTENDED_MODEL_INTEL(cpi); 4262 break; 4263 case X86_VENDOR_AMD: 4264 create = CPI_FAMILY(cpi) == 0xf; 4265 break; 4266 default: 4267 create = 0; 4268 break; 4269 } 4270 if (create) 4271 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4272 "ext-model", CPI_MODEL_XTD(cpi)); 4273 4274 /* generation */ 4275 switch (cpi->cpi_vendor) { 4276 case X86_VENDOR_AMD: 4277 /* 4278 * AMD K5 model 1 was the first part to support this 4279 */ 4280 create = cpi->cpi_xmaxeax >= 0x80000001; 4281 break; 4282 default: 4283 create = 0; 4284 break; 4285 } 4286 if (create) 4287 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4288 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 4289 4290 /* brand-id */ 4291 switch (cpi->cpi_vendor) { 4292 case X86_VENDOR_Intel: 4293 /* 4294 * brand id first appeared on Pentium III Xeon model 8, 4295 * and Celeron model 8 processors and Opteron 4296 */ 4297 create = cpi->cpi_family > 6 || 4298 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 4299 break; 4300 case X86_VENDOR_AMD: 4301 create = cpi->cpi_family >= 0xf; 4302 break; 4303 default: 4304 create = 0; 4305 break; 4306 } 4307 if (create && cpi->cpi_brandid != 0) { 4308 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4309 "brand-id", cpi->cpi_brandid); 4310 } 4311 4312 /* chunks, and apic-id */ 4313 switch (cpi->cpi_vendor) { 4314 /* 4315 * first available on Pentium IV and Opteron (K8) 4316 */ 4317 case X86_VENDOR_Intel: 4318 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 4319 break; 4320 case X86_VENDOR_AMD: 4321 create = cpi->cpi_family >= 0xf; 4322 break; 4323 default: 4324 create = 0; 4325 break; 4326 } 4327 if (create) { 4328 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4329 "chunks", CPI_CHUNKS(cpi)); 4330 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4331 "apic-id", cpi->cpi_apicid); 4332 if (cpi->cpi_chipid >= 0) { 4333 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4334 "chip#", cpi->cpi_chipid); 4335 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4336 "clog#", cpi->cpi_clogid); 4337 } 4338 } 4339 4340 /* cpuid-features */ 4341 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4342 "cpuid-features", CPI_FEATURES_EDX(cpi)); 4343 4344 4345 /* cpuid-features-ecx */ 4346 switch (cpi->cpi_vendor) { 4347 case X86_VENDOR_Intel: 4348 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 4349 break; 4350 case X86_VENDOR_AMD: 4351 create = cpi->cpi_family >= 0xf; 4352 break; 4353 default: 4354 create = 0; 4355 break; 4356 } 4357 if (create) 4358 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4359 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 4360 4361 /* ext-cpuid-features */ 4362 switch (cpi->cpi_vendor) { 4363 case X86_VENDOR_Intel: 4364 case X86_VENDOR_AMD: 4365 case X86_VENDOR_Cyrix: 4366 case X86_VENDOR_TM: 4367 case X86_VENDOR_Centaur: 4368 create = cpi->cpi_xmaxeax >= 0x80000001; 4369 break; 4370 default: 4371 create = 0; 4372 break; 4373 } 4374 if (create) { 4375 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4376 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 4377 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4378 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 4379 } 4380 4381 /* 4382 * Brand String first appeared in Intel Pentium IV, AMD K5 4383 * model 1, and Cyrix GXm. On earlier models we try and 4384 * simulate something similar .. so this string should always 4385 * same -something- about the processor, however lame. 4386 */ 4387 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 4388 "brand-string", cpi->cpi_brandstr); 4389 4390 /* 4391 * Finally, cache and tlb information 4392 */ 4393 switch (x86_which_cacheinfo(cpi)) { 4394 case X86_VENDOR_Intel: 4395 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 4396 break; 4397 case X86_VENDOR_Cyrix: 4398 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 4399 break; 4400 case X86_VENDOR_AMD: 4401 amd_cache_info(cpi, cpu_devi); 4402 break; 4403 default: 4404 break; 4405 } 4406 } 4407 4408 struct l2info { 4409 int *l2i_csz; 4410 int *l2i_lsz; 4411 int *l2i_assoc; 4412 int l2i_ret; 4413 }; 4414 4415 /* 4416 * A cacheinfo walker that fetches the size, line-size and associativity 4417 * of the L2 cache 4418 */ 4419 static int 4420 intel_l2cinfo(void *arg, const struct cachetab *ct) 4421 { 4422 struct l2info *l2i = arg; 4423 int *ip; 4424 4425 if (ct->ct_label != l2_cache_str && 4426 ct->ct_label != sl2_cache_str) 4427 return (0); /* not an L2 -- keep walking */ 4428 4429 if ((ip = l2i->l2i_csz) != NULL) 4430 *ip = ct->ct_size; 4431 if ((ip = l2i->l2i_lsz) != NULL) 4432 *ip = ct->ct_line_size; 4433 if ((ip = l2i->l2i_assoc) != NULL) 4434 *ip = ct->ct_assoc; 4435 l2i->l2i_ret = ct->ct_size; 4436 return (1); /* was an L2 -- terminate walk */ 4437 } 4438 4439 /* 4440 * AMD L2/L3 Cache and TLB Associativity Field Definition: 4441 * 4442 * Unlike the associativity for the L1 cache and tlb where the 8 bit 4443 * value is the associativity, the associativity for the L2 cache and 4444 * tlb is encoded in the following table. The 4 bit L2 value serves as 4445 * an index into the amd_afd[] array to determine the associativity. 4446 * -1 is undefined. 0 is fully associative. 4447 */ 4448 4449 static int amd_afd[] = 4450 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 4451 4452 static void 4453 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 4454 { 4455 struct cpuid_regs *cp; 4456 uint_t size, assoc; 4457 int i; 4458 int *ip; 4459 4460 if (cpi->cpi_xmaxeax < 0x80000006) 4461 return; 4462 cp = &cpi->cpi_extd[6]; 4463 4464 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 4465 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 4466 uint_t cachesz = size * 1024; 4467 assoc = amd_afd[i]; 4468 4469 ASSERT(assoc != -1); 4470 4471 if ((ip = l2i->l2i_csz) != NULL) 4472 *ip = cachesz; 4473 if ((ip = l2i->l2i_lsz) != NULL) 4474 *ip = BITX(cp->cp_ecx, 7, 0); 4475 if ((ip = l2i->l2i_assoc) != NULL) 4476 *ip = assoc; 4477 l2i->l2i_ret = cachesz; 4478 } 4479 } 4480 4481 int 4482 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 4483 { 4484 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4485 struct l2info __l2info, *l2i = &__l2info; 4486 4487 l2i->l2i_csz = csz; 4488 l2i->l2i_lsz = lsz; 4489 l2i->l2i_assoc = assoc; 4490 l2i->l2i_ret = -1; 4491 4492 switch (x86_which_cacheinfo(cpi)) { 4493 case X86_VENDOR_Intel: 4494 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 4495 break; 4496 case X86_VENDOR_Cyrix: 4497 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 4498 break; 4499 case X86_VENDOR_AMD: 4500 amd_l2cacheinfo(cpi, l2i); 4501 break; 4502 default: 4503 break; 4504 } 4505 return (l2i->l2i_ret); 4506 } 4507 4508 #if !defined(__xpv) 4509 4510 uint32_t * 4511 cpuid_mwait_alloc(cpu_t *cpu) 4512 { 4513 uint32_t *ret; 4514 size_t mwait_size; 4515 4516 ASSERT(cpuid_checkpass(CPU, 2)); 4517 4518 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 4519 if (mwait_size == 0) 4520 return (NULL); 4521 4522 /* 4523 * kmem_alloc() returns cache line size aligned data for mwait_size 4524 * allocations. mwait_size is currently cache line sized. Neither 4525 * of these implementation details are guarantied to be true in the 4526 * future. 4527 * 4528 * First try allocating mwait_size as kmem_alloc() currently returns 4529 * correctly aligned memory. If kmem_alloc() does not return 4530 * mwait_size aligned memory, then use mwait_size ROUNDUP. 4531 * 4532 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 4533 * decide to free this memory. 4534 */ 4535 ret = kmem_zalloc(mwait_size, KM_SLEEP); 4536 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 4537 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4538 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 4539 *ret = MWAIT_RUNNING; 4540 return (ret); 4541 } else { 4542 kmem_free(ret, mwait_size); 4543 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 4544 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4545 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 4546 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 4547 *ret = MWAIT_RUNNING; 4548 return (ret); 4549 } 4550 } 4551 4552 void 4553 cpuid_mwait_free(cpu_t *cpu) 4554 { 4555 if (cpu->cpu_m.mcpu_cpi == NULL) { 4556 return; 4557 } 4558 4559 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 4560 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 4561 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 4562 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 4563 } 4564 4565 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 4566 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 4567 } 4568 4569 void 4570 patch_tsc_read(int flag) 4571 { 4572 size_t cnt; 4573 4574 switch (flag) { 4575 case X86_NO_TSC: 4576 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 4577 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 4578 break; 4579 case X86_HAVE_TSCP: 4580 cnt = &_tscp_end - &_tscp_start; 4581 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 4582 break; 4583 case X86_TSC_MFENCE: 4584 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 4585 (void) memcpy((void *)tsc_read, 4586 (void *)&_tsc_mfence_start, cnt); 4587 break; 4588 case X86_TSC_LFENCE: 4589 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 4590 (void) memcpy((void *)tsc_read, 4591 (void *)&_tsc_lfence_start, cnt); 4592 break; 4593 default: 4594 break; 4595 } 4596 } 4597 4598 int 4599 cpuid_deep_cstates_supported(void) 4600 { 4601 struct cpuid_info *cpi; 4602 struct cpuid_regs regs; 4603 4604 ASSERT(cpuid_checkpass(CPU, 1)); 4605 4606 cpi = CPU->cpu_m.mcpu_cpi; 4607 4608 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) 4609 return (0); 4610 4611 switch (cpi->cpi_vendor) { 4612 case X86_VENDOR_Intel: 4613 if (cpi->cpi_xmaxeax < 0x80000007) 4614 return (0); 4615 4616 /* 4617 * TSC run at a constant rate in all ACPI C-states? 4618 */ 4619 regs.cp_eax = 0x80000007; 4620 (void) __cpuid_insn(®s); 4621 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 4622 4623 default: 4624 return (0); 4625 } 4626 } 4627 4628 #endif /* !__xpv */ 4629 4630 void 4631 post_startup_cpu_fixups(void) 4632 { 4633 #ifndef __xpv 4634 /* 4635 * Some AMD processors support C1E state. Entering this state will 4636 * cause the local APIC timer to stop, which we can't deal with at 4637 * this time. 4638 */ 4639 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 4640 on_trap_data_t otd; 4641 uint64_t reg; 4642 4643 if (!on_trap(&otd, OT_DATA_ACCESS)) { 4644 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 4645 /* Disable C1E state if it is enabled by BIOS */ 4646 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 4647 AMD_ACTONCMPHALT_MASK) { 4648 reg &= ~(AMD_ACTONCMPHALT_MASK << 4649 AMD_ACTONCMPHALT_SHIFT); 4650 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 4651 } 4652 } 4653 no_trap(); 4654 } 4655 #endif /* !__xpv */ 4656 } 4657 4658 /* 4659 * Setup necessary registers to enable XSAVE feature on this processor. 4660 * This function needs to be called early enough, so that no xsave/xrstor 4661 * ops will execute on the processor before the MSRs are properly set up. 4662 * 4663 * Current implementation has the following assumption: 4664 * - cpuid_pass1() is done, so that X86 features are known. 4665 * - fpu_probe() is done, so that fp_save_mech is chosen. 4666 */ 4667 void 4668 xsave_setup_msr(cpu_t *cpu) 4669 { 4670 ASSERT(fp_save_mech == FP_XSAVE); 4671 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); 4672 4673 /* Enable OSXSAVE in CR4. */ 4674 setcr4(getcr4() | CR4_OSXSAVE); 4675 /* 4676 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report 4677 * correct value. 4678 */ 4679 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE; 4680 setup_xfem(); 4681 } 4682 4683 /* 4684 * Starting with the Westmere processor the local 4685 * APIC timer will continue running in all C-states, 4686 * including the deepest C-states. 4687 */ 4688 int 4689 cpuid_arat_supported(void) 4690 { 4691 struct cpuid_info *cpi; 4692 struct cpuid_regs regs; 4693 4694 ASSERT(cpuid_checkpass(CPU, 1)); 4695 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 4696 4697 cpi = CPU->cpu_m.mcpu_cpi; 4698 4699 switch (cpi->cpi_vendor) { 4700 case X86_VENDOR_Intel: 4701 /* 4702 * Always-running Local APIC Timer is 4703 * indicated by CPUID.6.EAX[2]. 4704 */ 4705 if (cpi->cpi_maxeax >= 6) { 4706 regs.cp_eax = 6; 4707 (void) cpuid_insn(NULL, ®s); 4708 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4709 } else { 4710 return (0); 4711 } 4712 default: 4713 return (0); 4714 } 4715 } 4716 4717 /* 4718 * Check support for Intel ENERGY_PERF_BIAS feature 4719 */ 4720 int 4721 cpuid_iepb_supported(struct cpu *cp) 4722 { 4723 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi; 4724 struct cpuid_regs regs; 4725 4726 ASSERT(cpuid_checkpass(cp, 1)); 4727 4728 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) || 4729 !(is_x86_feature(x86_featureset, X86FSET_MSR))) { 4730 return (0); 4731 } 4732 4733 /* 4734 * Intel ENERGY_PERF_BIAS MSR is indicated by 4735 * capability bit CPUID.6.ECX.3 4736 */ 4737 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6)) 4738 return (0); 4739 4740 regs.cp_eax = 0x6; 4741 (void) cpuid_insn(NULL, ®s); 4742 return (regs.cp_ecx & CPUID_EPB_SUPPORT); 4743 } 4744 4745 /* 4746 * Check support for TSC deadline timer 4747 * 4748 * TSC deadline timer provides a superior software programming 4749 * model over local APIC timer that eliminates "time drifts". 4750 * Instead of specifying a relative time, software specifies an 4751 * absolute time as the target at which the processor should 4752 * generate a timer event. 4753 */ 4754 int 4755 cpuid_deadline_tsc_supported(void) 4756 { 4757 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi; 4758 struct cpuid_regs regs; 4759 4760 ASSERT(cpuid_checkpass(CPU, 1)); 4761 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 4762 4763 switch (cpi->cpi_vendor) { 4764 case X86_VENDOR_Intel: 4765 if (cpi->cpi_maxeax >= 1) { 4766 regs.cp_eax = 1; 4767 (void) cpuid_insn(NULL, ®s); 4768 return (regs.cp_ecx & CPUID_DEADLINE_TSC); 4769 } else { 4770 return (0); 4771 } 4772 default: 4773 return (0); 4774 } 4775 } 4776 4777 #if defined(__amd64) && !defined(__xpv) 4778 /* 4779 * Patch in versions of bcopy for high performance Intel Nhm processors 4780 * and later... 4781 */ 4782 void 4783 patch_memops(uint_t vendor) 4784 { 4785 size_t cnt, i; 4786 caddr_t to, from; 4787 4788 if ((vendor == X86_VENDOR_Intel) && 4789 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) { 4790 cnt = &bcopy_patch_end - &bcopy_patch_start; 4791 to = &bcopy_ck_size; 4792 from = &bcopy_patch_start; 4793 for (i = 0; i < cnt; i++) { 4794 *to++ = *from++; 4795 } 4796 } 4797 } 4798 #endif /* __amd64 && !__xpv */ 4799 4800 /* 4801 * This function finds the number of bits to represent the number of cores per 4802 * chip and the number of strands per core for the Intel platforms. 4803 * It re-uses the x2APIC cpuid code of the cpuid_pass2(). 4804 */ 4805 void 4806 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits) 4807 { 4808 struct cpuid_regs regs; 4809 struct cpuid_regs *cp = ®s; 4810 4811 if (vendor != X86_VENDOR_Intel) { 4812 return; 4813 } 4814 4815 /* if the cpuid level is 0xB, extended topo is available. */ 4816 cp->cp_eax = 0; 4817 if (__cpuid_insn(cp) >= 0xB) { 4818 4819 cp->cp_eax = 0xB; 4820 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 4821 (void) __cpuid_insn(cp); 4822 4823 /* 4824 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 4825 * indicates that the extended topology enumeration leaf is 4826 * available. 4827 */ 4828 if (cp->cp_ebx) { 4829 uint_t coreid_shift = 0; 4830 uint_t chipid_shift = 0; 4831 uint_t i; 4832 uint_t level; 4833 4834 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 4835 cp->cp_eax = 0xB; 4836 cp->cp_ecx = i; 4837 4838 (void) __cpuid_insn(cp); 4839 level = CPI_CPU_LEVEL_TYPE(cp); 4840 4841 if (level == 1) { 4842 /* 4843 * Thread level processor topology 4844 * Number of bits shift right APIC ID 4845 * to get the coreid. 4846 */ 4847 coreid_shift = BITX(cp->cp_eax, 4, 0); 4848 } else if (level == 2) { 4849 /* 4850 * Core level processor topology 4851 * Number of bits shift right APIC ID 4852 * to get the chipid. 4853 */ 4854 chipid_shift = BITX(cp->cp_eax, 4, 0); 4855 } 4856 } 4857 4858 if (coreid_shift > 0 && chipid_shift > coreid_shift) { 4859 *strand_nbits = coreid_shift; 4860 *core_nbits = chipid_shift - coreid_shift; 4861 } 4862 } 4863 } 4864 } 4865