1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 by Delphix. All rights reserved. 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net> 26 */ 27 /* 28 * Copyright (c) 2010, Intel Corporation. 29 * All rights reserved. 30 */ 31 /* 32 * Portions Copyright 2009 Advanced Micro Devices, Inc. 33 */ 34 /* 35 * Copyright (c) 2015, Joyent, Inc. All rights reserved. 36 */ 37 /* 38 * Various routines to handle identification 39 * and classification of x86 processors. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/archsystm.h> 44 #include <sys/x86_archext.h> 45 #include <sys/kmem.h> 46 #include <sys/systm.h> 47 #include <sys/cmn_err.h> 48 #include <sys/sunddi.h> 49 #include <sys/sunndi.h> 50 #include <sys/cpuvar.h> 51 #include <sys/processor.h> 52 #include <sys/sysmacros.h> 53 #include <sys/pg.h> 54 #include <sys/fp.h> 55 #include <sys/controlregs.h> 56 #include <sys/bitmap.h> 57 #include <sys/auxv_386.h> 58 #include <sys/memnode.h> 59 #include <sys/pci_cfgspace.h> 60 61 #ifdef __xpv 62 #include <sys/hypervisor.h> 63 #else 64 #include <sys/ontrap.h> 65 #endif 66 67 /* 68 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 69 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 70 * them accordingly. For most modern processors, feature detection occurs here 71 * in pass 1. 72 * 73 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 74 * for the boot CPU and does the basic analysis that the early kernel needs. 75 * x86_featureset is set based on the return value of cpuid_pass1() of the boot 76 * CPU. 77 * 78 * Pass 1 includes: 79 * 80 * o Determining vendor/model/family/stepping and setting x86_type and 81 * x86_vendor accordingly. 82 * o Processing the feature flags returned by the cpuid instruction while 83 * applying any workarounds or tricks for the specific processor. 84 * o Mapping the feature flags into Solaris feature bits (X86_*). 85 * o Processing extended feature flags if supported by the processor, 86 * again while applying specific processor knowledge. 87 * o Determining the CMT characteristics of the system. 88 * 89 * Pass 1 is done on non-boot CPUs during their initialization and the results 90 * are used only as a meager attempt at ensuring that all processors within the 91 * system support the same features. 92 * 93 * Pass 2 of cpuid feature analysis happens just at the beginning 94 * of startup(). It just copies in and corrects the remainder 95 * of the cpuid data we depend on: standard cpuid functions that we didn't 96 * need for pass1 feature analysis, and extended cpuid functions beyond the 97 * simple feature processing done in pass1. 98 * 99 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 100 * particular kernel memory allocation has been made available. It creates a 101 * readable brand string based on the data collected in the first two passes. 102 * 103 * Pass 4 of cpuid analysis is invoked after post_startup() when all 104 * the support infrastructure for various hardware features has been 105 * initialized. It determines which processor features will be reported 106 * to userland via the aux vector. 107 * 108 * All passes are executed on all CPUs, but only the boot CPU determines what 109 * features the kernel will use. 110 * 111 * Much of the worst junk in this file is for the support of processors 112 * that didn't really implement the cpuid instruction properly. 113 * 114 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 115 * the pass numbers. Accordingly, changes to the pass code may require changes 116 * to the accessor code. 117 */ 118 119 uint_t x86_vendor = X86_VENDOR_IntelClone; 120 uint_t x86_type = X86_TYPE_OTHER; 121 uint_t x86_clflush_size = 0; 122 123 uint_t pentiumpro_bug4046376; 124 125 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)]; 126 127 static char *x86_feature_names[NUM_X86_FEATURES] = { 128 "lgpg", 129 "tsc", 130 "msr", 131 "mtrr", 132 "pge", 133 "de", 134 "cmov", 135 "mmx", 136 "mca", 137 "pae", 138 "cv8", 139 "pat", 140 "sep", 141 "sse", 142 "sse2", 143 "htt", 144 "asysc", 145 "nx", 146 "sse3", 147 "cx16", 148 "cmp", 149 "tscp", 150 "mwait", 151 "sse4a", 152 "cpuid", 153 "ssse3", 154 "sse4_1", 155 "sse4_2", 156 "1gpg", 157 "clfsh", 158 "64", 159 "aes", 160 "pclmulqdq", 161 "xsave", 162 "avx", 163 "vmx", 164 "svm", 165 "topoext", 166 "f16c", 167 "rdrand", 168 "x2apic", 169 "avx2", 170 "bmi1", 171 "bmi2", 172 "fma", 173 "smep" 174 }; 175 176 boolean_t 177 is_x86_feature(void *featureset, uint_t feature) 178 { 179 ASSERT(feature < NUM_X86_FEATURES); 180 return (BT_TEST((ulong_t *)featureset, feature)); 181 } 182 183 void 184 add_x86_feature(void *featureset, uint_t feature) 185 { 186 ASSERT(feature < NUM_X86_FEATURES); 187 BT_SET((ulong_t *)featureset, feature); 188 } 189 190 void 191 remove_x86_feature(void *featureset, uint_t feature) 192 { 193 ASSERT(feature < NUM_X86_FEATURES); 194 BT_CLEAR((ulong_t *)featureset, feature); 195 } 196 197 boolean_t 198 compare_x86_featureset(void *setA, void *setB) 199 { 200 /* 201 * We assume that the unused bits of the bitmap are always zero. 202 */ 203 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) { 204 return (B_TRUE); 205 } else { 206 return (B_FALSE); 207 } 208 } 209 210 void 211 print_x86_featureset(void *featureset) 212 { 213 uint_t i; 214 215 for (i = 0; i < NUM_X86_FEATURES; i++) { 216 if (is_x86_feature(featureset, i)) { 217 cmn_err(CE_CONT, "?x86_feature: %s\n", 218 x86_feature_names[i]); 219 } 220 } 221 } 222 223 static size_t xsave_state_size = 0; 224 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE); 225 boolean_t xsave_force_disable = B_FALSE; 226 227 /* 228 * This is set to platform type we are running on. 229 */ 230 static int platform_type = -1; 231 232 #if !defined(__xpv) 233 /* 234 * Variable to patch if hypervisor platform detection needs to be 235 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 236 */ 237 int enable_platform_detection = 1; 238 #endif 239 240 /* 241 * monitor/mwait info. 242 * 243 * size_actual and buf_actual are the real address and size allocated to get 244 * proper mwait_buf alignement. buf_actual and size_actual should be passed 245 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 246 * processor cache-line alignment, but this is not guarantied in the furture. 247 */ 248 struct mwait_info { 249 size_t mon_min; /* min size to avoid missed wakeups */ 250 size_t mon_max; /* size to avoid false wakeups */ 251 size_t size_actual; /* size actually allocated */ 252 void *buf_actual; /* memory actually allocated */ 253 uint32_t support; /* processor support of monitor/mwait */ 254 }; 255 256 /* 257 * xsave/xrestor info. 258 * 259 * This structure contains HW feature bits and size of the xsave save area. 260 * Note: the kernel will use the maximum size required for all hardware 261 * features. It is not optimize for potential memory savings if features at 262 * the end of the save area are not enabled. 263 */ 264 struct xsave_info { 265 uint32_t xsav_hw_features_low; /* Supported HW features */ 266 uint32_t xsav_hw_features_high; /* Supported HW features */ 267 size_t xsav_max_size; /* max size save area for HW features */ 268 size_t ymm_size; /* AVX: size of ymm save area */ 269 size_t ymm_offset; /* AVX: offset for ymm save area */ 270 }; 271 272 273 /* 274 * These constants determine how many of the elements of the 275 * cpuid we cache in the cpuid_info data structure; the 276 * remaining elements are accessible via the cpuid instruction. 277 */ 278 279 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */ 280 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */ 281 282 /* 283 * Some terminology needs to be explained: 284 * - Socket: Something that can be plugged into a motherboard. 285 * - Package: Same as socket 286 * - Chip: Same as socket. Note that AMD's documentation uses term "chip" 287 * differently: there, chip is the same as processor node (below) 288 * - Processor node: Some AMD processors have more than one 289 * "subprocessor" embedded in a package. These subprocessors (nodes) 290 * are fully-functional processors themselves with cores, caches, 291 * memory controllers, PCI configuration spaces. They are connected 292 * inside the package with Hypertransport links. On single-node 293 * processors, processor node is equivalent to chip/socket/package. 294 * - Compute Unit: Some AMD processors pair cores in "compute units" that 295 * share the FPU and the I$ and L2 caches. 296 */ 297 298 struct cpuid_info { 299 uint_t cpi_pass; /* last pass completed */ 300 /* 301 * standard function information 302 */ 303 uint_t cpi_maxeax; /* fn 0: %eax */ 304 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 305 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 306 307 uint_t cpi_family; /* fn 1: extended family */ 308 uint_t cpi_model; /* fn 1: extended model */ 309 uint_t cpi_step; /* fn 1: stepping */ 310 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */ 311 /* AMD: package/socket # */ 312 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 313 int cpi_clogid; /* fn 1: %ebx: thread # */ 314 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 315 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 316 uint_t cpi_ncache; /* fn 2: number of elements */ 317 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 318 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 319 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 320 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 321 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */ 322 /* 323 * extended function information 324 */ 325 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 326 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 327 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 328 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 329 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */ 330 331 id_t cpi_coreid; /* same coreid => strands share core */ 332 int cpi_pkgcoreid; /* core number within single package */ 333 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 334 /* Intel: fn 4: %eax[31-26] */ 335 /* 336 * supported feature information 337 */ 338 uint32_t cpi_support[6]; 339 #define STD_EDX_FEATURES 0 340 #define AMD_EDX_FEATURES 1 341 #define TM_EDX_FEATURES 2 342 #define STD_ECX_FEATURES 3 343 #define AMD_ECX_FEATURES 4 344 #define STD_EBX_FEATURES 5 345 /* 346 * Synthesized information, where known. 347 */ 348 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 349 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 350 uint32_t cpi_socket; /* Chip package/socket type */ 351 352 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 353 uint32_t cpi_apicid; 354 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ 355 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ 356 /* Intel: 1 */ 357 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */ 358 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */ 359 360 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */ 361 }; 362 363 364 static struct cpuid_info cpuid_info0; 365 366 /* 367 * These bit fields are defined by the Intel Application Note AP-485 368 * "Intel Processor Identification and the CPUID Instruction" 369 */ 370 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 371 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 372 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 373 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 374 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 375 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 376 377 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 378 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 379 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 380 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 381 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx) 382 383 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 384 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 385 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 386 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 387 388 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 389 #define CPI_XMAXEAX_MAX 0x80000100 390 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 391 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 392 393 /* 394 * Function 4 (Deterministic Cache Parameters) macros 395 * Defined by Intel Application Note AP-485 396 */ 397 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 398 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 399 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 400 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 401 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 402 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 403 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 404 405 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 406 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 407 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 408 409 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 410 411 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 412 413 414 /* 415 * A couple of shorthand macros to identify "later" P6-family chips 416 * like the Pentium M and Core. First, the "older" P6-based stuff 417 * (loosely defined as "pre-Pentium-4"): 418 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 419 */ 420 421 #define IS_LEGACY_P6(cpi) ( \ 422 cpi->cpi_family == 6 && \ 423 (cpi->cpi_model == 1 || \ 424 cpi->cpi_model == 3 || \ 425 cpi->cpi_model == 5 || \ 426 cpi->cpi_model == 6 || \ 427 cpi->cpi_model == 7 || \ 428 cpi->cpi_model == 8 || \ 429 cpi->cpi_model == 0xA || \ 430 cpi->cpi_model == 0xB) \ 431 ) 432 433 /* A "new F6" is everything with family 6 that's not the above */ 434 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 435 436 /* Extended family/model support */ 437 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 438 cpi->cpi_family >= 0xf) 439 440 /* 441 * Info for monitor/mwait idle loop. 442 * 443 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 444 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 445 * 2006. 446 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 447 * Documentation Updates" #33633, Rev 2.05, December 2006. 448 */ 449 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 450 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 451 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 452 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 453 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 454 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 455 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 456 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 457 /* 458 * Number of sub-cstates for a given c-state. 459 */ 460 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 461 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 462 463 /* 464 * XSAVE leaf 0xD enumeration 465 */ 466 #define CPUID_LEAFD_2_YMM_OFFSET 576 467 #define CPUID_LEAFD_2_YMM_SIZE 256 468 469 /* 470 * Functions we consune from cpuid_subr.c; don't publish these in a header 471 * file to try and keep people using the expected cpuid_* interfaces. 472 */ 473 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 474 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 475 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 476 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 477 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 478 479 /* 480 * Apply up various platform-dependent restrictions where the 481 * underlying platform restrictions mean the CPU can be marked 482 * as less capable than its cpuid instruction would imply. 483 */ 484 #if defined(__xpv) 485 static void 486 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 487 { 488 switch (eax) { 489 case 1: { 490 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 491 0 : CPUID_INTC_EDX_MCA; 492 cp->cp_edx &= 493 ~(mcamask | 494 CPUID_INTC_EDX_PSE | 495 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 496 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 497 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 498 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 499 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 500 break; 501 } 502 503 case 0x80000001: 504 cp->cp_edx &= 505 ~(CPUID_AMD_EDX_PSE | 506 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 507 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 508 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 509 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 510 CPUID_AMD_EDX_TSCP); 511 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 512 break; 513 default: 514 break; 515 } 516 517 switch (vendor) { 518 case X86_VENDOR_Intel: 519 switch (eax) { 520 case 4: 521 /* 522 * Zero out the (ncores-per-chip - 1) field 523 */ 524 cp->cp_eax &= 0x03fffffff; 525 break; 526 default: 527 break; 528 } 529 break; 530 case X86_VENDOR_AMD: 531 switch (eax) { 532 533 case 0x80000001: 534 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 535 break; 536 537 case 0x80000008: 538 /* 539 * Zero out the (ncores-per-chip - 1) field 540 */ 541 cp->cp_ecx &= 0xffffff00; 542 break; 543 default: 544 break; 545 } 546 break; 547 default: 548 break; 549 } 550 } 551 #else 552 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 553 #endif 554 555 /* 556 * Some undocumented ways of patching the results of the cpuid 557 * instruction to permit running Solaris 10 on future cpus that 558 * we don't currently support. Could be set to non-zero values 559 * via settings in eeprom. 560 */ 561 562 uint32_t cpuid_feature_ecx_include; 563 uint32_t cpuid_feature_ecx_exclude; 564 uint32_t cpuid_feature_edx_include; 565 uint32_t cpuid_feature_edx_exclude; 566 567 /* 568 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs. 569 */ 570 void 571 cpuid_alloc_space(cpu_t *cpu) 572 { 573 /* 574 * By convention, cpu0 is the boot cpu, which is set up 575 * before memory allocation is available. All other cpus get 576 * their cpuid_info struct allocated here. 577 */ 578 ASSERT(cpu->cpu_id != 0); 579 ASSERT(cpu->cpu_m.mcpu_cpi == NULL); 580 cpu->cpu_m.mcpu_cpi = 581 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 582 } 583 584 void 585 cpuid_free_space(cpu_t *cpu) 586 { 587 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 588 int i; 589 590 ASSERT(cpi != NULL); 591 ASSERT(cpi != &cpuid_info0); 592 593 /* 594 * Free up any function 4 related dynamic storage 595 */ 596 for (i = 1; i < cpi->cpi_std_4_size; i++) 597 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 598 if (cpi->cpi_std_4_size > 0) 599 kmem_free(cpi->cpi_std_4, 600 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 601 602 kmem_free(cpi, sizeof (*cpi)); 603 cpu->cpu_m.mcpu_cpi = NULL; 604 } 605 606 #if !defined(__xpv) 607 /* 608 * Determine the type of the underlying platform. This is used to customize 609 * initialization of various subsystems (e.g. TSC). determine_platform() must 610 * only ever be called once to prevent two processors from seeing different 611 * values of platform_type. Must be called before cpuid_pass1(), the earliest 612 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv). 613 */ 614 void 615 determine_platform(void) 616 { 617 struct cpuid_regs cp; 618 uint32_t base; 619 uint32_t regs[4]; 620 char *hvstr = (char *)regs; 621 622 ASSERT(platform_type == -1); 623 624 platform_type = HW_NATIVE; 625 626 if (!enable_platform_detection) 627 return; 628 629 /* 630 * If Hypervisor CPUID bit is set, try to determine hypervisor 631 * vendor signature, and set platform type accordingly. 632 * 633 * References: 634 * http://lkml.org/lkml/2008/10/1/246 635 * http://kb.vmware.com/kb/1009458 636 */ 637 cp.cp_eax = 0x1; 638 (void) __cpuid_insn(&cp); 639 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) { 640 cp.cp_eax = 0x40000000; 641 (void) __cpuid_insn(&cp); 642 regs[0] = cp.cp_ebx; 643 regs[1] = cp.cp_ecx; 644 regs[2] = cp.cp_edx; 645 regs[3] = 0; 646 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) { 647 platform_type = HW_XEN_HVM; 648 return; 649 } 650 if (strcmp(hvstr, HVSIG_VMWARE) == 0) { 651 platform_type = HW_VMWARE; 652 return; 653 } 654 if (strcmp(hvstr, HVSIG_KVM) == 0) { 655 platform_type = HW_KVM; 656 return; 657 } 658 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0) 659 platform_type = HW_MICROSOFT; 660 } else { 661 /* 662 * Check older VMware hardware versions. VMware hypervisor is 663 * detected by performing an IN operation to VMware hypervisor 664 * port and checking that value returned in %ebx is VMware 665 * hypervisor magic value. 666 * 667 * References: http://kb.vmware.com/kb/1009458 668 */ 669 vmware_port(VMWARE_HVCMD_GETVERSION, regs); 670 if (regs[1] == VMWARE_HVMAGIC) { 671 platform_type = HW_VMWARE; 672 return; 673 } 674 } 675 676 /* 677 * Check Xen hypervisor. In a fully virtualized domain, 678 * Xen's pseudo-cpuid function returns a string representing the 679 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum 680 * supported cpuid function. We need at least a (base + 2) leaf value 681 * to do what we want to do. Try different base values, since the 682 * hypervisor might use a different one depending on whether Hyper-V 683 * emulation is switched on by default or not. 684 */ 685 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 686 cp.cp_eax = base; 687 (void) __cpuid_insn(&cp); 688 regs[0] = cp.cp_ebx; 689 regs[1] = cp.cp_ecx; 690 regs[2] = cp.cp_edx; 691 regs[3] = 0; 692 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 && 693 cp.cp_eax >= (base + 2)) { 694 platform_type &= ~HW_NATIVE; 695 platform_type |= HW_XEN_HVM; 696 return; 697 } 698 } 699 } 700 701 int 702 get_hwenv(void) 703 { 704 ASSERT(platform_type != -1); 705 return (platform_type); 706 } 707 708 int 709 is_controldom(void) 710 { 711 return (0); 712 } 713 714 #else 715 716 int 717 get_hwenv(void) 718 { 719 return (HW_XEN_PV); 720 } 721 722 int 723 is_controldom(void) 724 { 725 return (DOMAIN_IS_INITDOMAIN(xen_info)); 726 } 727 728 #endif /* __xpv */ 729 730 static void 731 cpuid_intel_getids(cpu_t *cpu, void *feature) 732 { 733 uint_t i; 734 uint_t chipid_shift = 0; 735 uint_t coreid_shift = 0; 736 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 737 738 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 739 chipid_shift++; 740 741 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift; 742 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1); 743 744 if (is_x86_feature(feature, X86FSET_CMP)) { 745 /* 746 * Multi-core (and possibly multi-threaded) 747 * processors. 748 */ 749 uint_t ncpu_per_core; 750 if (cpi->cpi_ncore_per_chip == 1) 751 ncpu_per_core = cpi->cpi_ncpu_per_chip; 752 else if (cpi->cpi_ncore_per_chip > 1) 753 ncpu_per_core = cpi->cpi_ncpu_per_chip / 754 cpi->cpi_ncore_per_chip; 755 /* 756 * 8bit APIC IDs on dual core Pentiums 757 * look like this: 758 * 759 * +-----------------------+------+------+ 760 * | Physical Package ID | MC | HT | 761 * +-----------------------+------+------+ 762 * <------- chipid --------> 763 * <------- coreid ---------------> 764 * <--- clogid --> 765 * <------> 766 * pkgcoreid 767 * 768 * Where the number of bits necessary to 769 * represent MC and HT fields together equals 770 * to the minimum number of bits necessary to 771 * store the value of cpi->cpi_ncpu_per_chip. 772 * Of those bits, the MC part uses the number 773 * of bits necessary to store the value of 774 * cpi->cpi_ncore_per_chip. 775 */ 776 for (i = 1; i < ncpu_per_core; i <<= 1) 777 coreid_shift++; 778 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift; 779 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 780 } else if (is_x86_feature(feature, X86FSET_HTT)) { 781 /* 782 * Single-core multi-threaded processors. 783 */ 784 cpi->cpi_coreid = cpi->cpi_chipid; 785 cpi->cpi_pkgcoreid = 0; 786 } 787 cpi->cpi_procnodeid = cpi->cpi_chipid; 788 cpi->cpi_compunitid = cpi->cpi_coreid; 789 } 790 791 static void 792 cpuid_amd_getids(cpu_t *cpu) 793 { 794 int i, first_half, coreidsz; 795 uint32_t nb_caps_reg; 796 uint_t node2_1; 797 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 798 struct cpuid_regs *cp; 799 800 /* 801 * AMD CMP chips currently have a single thread per core. 802 * 803 * Since no two cpus share a core we must assign a distinct coreid 804 * per cpu, and we do this by using the cpu_id. This scheme does not, 805 * however, guarantee that sibling cores of a chip will have sequential 806 * coreids starting at a multiple of the number of cores per chip - 807 * that is usually the case, but if the ACPI MADT table is presented 808 * in a different order then we need to perform a few more gymnastics 809 * for the pkgcoreid. 810 * 811 * All processors in the system have the same number of enabled 812 * cores. Cores within a processor are always numbered sequentially 813 * from 0 regardless of how many or which are disabled, and there 814 * is no way for operating system to discover the real core id when some 815 * are disabled. 816 * 817 * In family 0x15, the cores come in pairs called compute units. They 818 * share I$ and L2 caches and the FPU. Enumeration of this feature is 819 * simplified by the new topology extensions CPUID leaf, indicated by 820 * the X86 feature X86FSET_TOPOEXT. 821 */ 822 823 cpi->cpi_coreid = cpu->cpu_id; 824 cpi->cpi_compunitid = cpu->cpu_id; 825 826 if (cpi->cpi_xmaxeax >= 0x80000008) { 827 828 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 829 830 /* 831 * In AMD parlance chip is really a node while Solaris 832 * sees chip as equivalent to socket/package. 833 */ 834 cpi->cpi_ncore_per_chip = 835 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 836 if (coreidsz == 0) { 837 /* Use legacy method */ 838 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1) 839 coreidsz++; 840 if (coreidsz == 0) 841 coreidsz = 1; 842 } 843 } else { 844 /* Assume single-core part */ 845 cpi->cpi_ncore_per_chip = 1; 846 coreidsz = 1; 847 } 848 849 cpi->cpi_clogid = cpi->cpi_pkgcoreid = 850 cpi->cpi_apicid & ((1<<coreidsz) - 1); 851 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip; 852 853 /* Get node ID, compute unit ID */ 854 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 855 cpi->cpi_xmaxeax >= 0x8000001e) { 856 cp = &cpi->cpi_extd[0x1e]; 857 cp->cp_eax = 0x8000001e; 858 (void) __cpuid_insn(cp); 859 860 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1; 861 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0); 862 cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1; 863 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) 864 + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit) 865 * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg); 866 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) { 867 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 868 } else if (cpi->cpi_family == 0x10) { 869 /* 870 * See if we are a multi-node processor. 871 * All processors in the system have the same number of nodes 872 */ 873 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8); 874 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) { 875 /* Single-node */ 876 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, 877 coreidsz); 878 } else { 879 880 /* 881 * Multi-node revision D (2 nodes per package 882 * are supported) 883 */ 884 cpi->cpi_procnodes_per_pkg = 2; 885 886 first_half = (cpi->cpi_pkgcoreid <= 887 (cpi->cpi_ncore_per_chip/2 - 1)); 888 889 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { 890 /* We are BSP */ 891 cpi->cpi_procnodeid = (first_half ? 0 : 1); 892 } else { 893 894 /* We are AP */ 895 /* NodeId[2:1] bits to use for reading F3xe8 */ 896 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1; 897 898 nb_caps_reg = 899 pci_getl_func(0, 24 + node2_1, 3, 0xe8); 900 901 /* 902 * Check IntNodeNum bit (31:30, but bit 31 is 903 * always 0 on dual-node processors) 904 */ 905 if (BITX(nb_caps_reg, 30, 30) == 0) 906 cpi->cpi_procnodeid = node2_1 + 907 !first_half; 908 else 909 cpi->cpi_procnodeid = node2_1 + 910 first_half; 911 } 912 } 913 } else { 914 cpi->cpi_procnodeid = 0; 915 } 916 917 cpi->cpi_chipid = 918 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg; 919 } 920 921 /* 922 * Setup XFeature_Enabled_Mask register. Required by xsave feature. 923 */ 924 void 925 setup_xfem(void) 926 { 927 uint64_t flags = XFEATURE_LEGACY_FP; 928 929 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); 930 931 if (is_x86_feature(x86_featureset, X86FSET_SSE)) 932 flags |= XFEATURE_SSE; 933 934 if (is_x86_feature(x86_featureset, X86FSET_AVX)) 935 flags |= XFEATURE_AVX; 936 937 set_xcr(XFEATURE_ENABLED_MASK, flags); 938 939 xsave_bv_all = flags; 940 } 941 942 void 943 cpuid_pass1(cpu_t *cpu, uchar_t *featureset) 944 { 945 uint32_t mask_ecx, mask_edx; 946 struct cpuid_info *cpi; 947 struct cpuid_regs *cp; 948 int xcpuid; 949 #if !defined(__xpv) 950 extern int idle_cpu_prefer_mwait; 951 #endif 952 953 /* 954 * Space statically allocated for BSP, ensure pointer is set 955 */ 956 if (cpu->cpu_id == 0) { 957 if (cpu->cpu_m.mcpu_cpi == NULL) 958 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 959 } 960 961 add_x86_feature(featureset, X86FSET_CPUID); 962 963 cpi = cpu->cpu_m.mcpu_cpi; 964 ASSERT(cpi != NULL); 965 cp = &cpi->cpi_std[0]; 966 cp->cp_eax = 0; 967 cpi->cpi_maxeax = __cpuid_insn(cp); 968 { 969 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 970 *iptr++ = cp->cp_ebx; 971 *iptr++ = cp->cp_edx; 972 *iptr++ = cp->cp_ecx; 973 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 974 } 975 976 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 977 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 978 979 /* 980 * Limit the range in case of weird hardware 981 */ 982 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 983 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 984 if (cpi->cpi_maxeax < 1) 985 goto pass1_done; 986 987 cp = &cpi->cpi_std[1]; 988 cp->cp_eax = 1; 989 (void) __cpuid_insn(cp); 990 991 /* 992 * Extract identifying constants for easy access. 993 */ 994 cpi->cpi_model = CPI_MODEL(cpi); 995 cpi->cpi_family = CPI_FAMILY(cpi); 996 997 if (cpi->cpi_family == 0xf) 998 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 999 1000 /* 1001 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 1002 * Intel, and presumably everyone else, uses model == 0xf, as 1003 * one would expect (max value means possible overflow). Sigh. 1004 */ 1005 1006 switch (cpi->cpi_vendor) { 1007 case X86_VENDOR_Intel: 1008 if (IS_EXTENDED_MODEL_INTEL(cpi)) 1009 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 1010 break; 1011 case X86_VENDOR_AMD: 1012 if (CPI_FAMILY(cpi) == 0xf) 1013 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 1014 break; 1015 default: 1016 if (cpi->cpi_model == 0xf) 1017 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 1018 break; 1019 } 1020 1021 cpi->cpi_step = CPI_STEP(cpi); 1022 cpi->cpi_brandid = CPI_BRANDID(cpi); 1023 1024 /* 1025 * *default* assumptions: 1026 * - believe %edx feature word 1027 * - ignore %ecx feature word 1028 * - 32-bit virtual and physical addressing 1029 */ 1030 mask_edx = 0xffffffff; 1031 mask_ecx = 0; 1032 1033 cpi->cpi_pabits = cpi->cpi_vabits = 32; 1034 1035 switch (cpi->cpi_vendor) { 1036 case X86_VENDOR_Intel: 1037 if (cpi->cpi_family == 5) 1038 x86_type = X86_TYPE_P5; 1039 else if (IS_LEGACY_P6(cpi)) { 1040 x86_type = X86_TYPE_P6; 1041 pentiumpro_bug4046376 = 1; 1042 /* 1043 * Clear the SEP bit when it was set erroneously 1044 */ 1045 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 1046 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 1047 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 1048 x86_type = X86_TYPE_P4; 1049 /* 1050 * We don't currently depend on any of the %ecx 1051 * features until Prescott, so we'll only check 1052 * this from P4 onwards. We might want to revisit 1053 * that idea later. 1054 */ 1055 mask_ecx = 0xffffffff; 1056 } else if (cpi->cpi_family > 0xf) 1057 mask_ecx = 0xffffffff; 1058 /* 1059 * We don't support MONITOR/MWAIT if leaf 5 is not available 1060 * to obtain the monitor linesize. 1061 */ 1062 if (cpi->cpi_maxeax < 5) 1063 mask_ecx &= ~CPUID_INTC_ECX_MON; 1064 break; 1065 case X86_VENDOR_IntelClone: 1066 default: 1067 break; 1068 case X86_VENDOR_AMD: 1069 #if defined(OPTERON_ERRATUM_108) 1070 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 1071 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 1072 cpi->cpi_model = 0xc; 1073 } else 1074 #endif 1075 if (cpi->cpi_family == 5) { 1076 /* 1077 * AMD K5 and K6 1078 * 1079 * These CPUs have an incomplete implementation 1080 * of MCA/MCE which we mask away. 1081 */ 1082 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 1083 1084 /* 1085 * Model 0 uses the wrong (APIC) bit 1086 * to indicate PGE. Fix it here. 1087 */ 1088 if (cpi->cpi_model == 0) { 1089 if (cp->cp_edx & 0x200) { 1090 cp->cp_edx &= ~0x200; 1091 cp->cp_edx |= CPUID_INTC_EDX_PGE; 1092 } 1093 } 1094 1095 /* 1096 * Early models had problems w/ MMX; disable. 1097 */ 1098 if (cpi->cpi_model < 6) 1099 mask_edx &= ~CPUID_INTC_EDX_MMX; 1100 } 1101 1102 /* 1103 * For newer families, SSE3 and CX16, at least, are valid; 1104 * enable all 1105 */ 1106 if (cpi->cpi_family >= 0xf) 1107 mask_ecx = 0xffffffff; 1108 /* 1109 * We don't support MONITOR/MWAIT if leaf 5 is not available 1110 * to obtain the monitor linesize. 1111 */ 1112 if (cpi->cpi_maxeax < 5) 1113 mask_ecx &= ~CPUID_INTC_ECX_MON; 1114 1115 #if !defined(__xpv) 1116 /* 1117 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 1118 * processors. AMD does not intend MWAIT to be used in the cpu 1119 * idle loop on current and future processors. 10h and future 1120 * AMD processors use more power in MWAIT than HLT. 1121 * Pre-family-10h Opterons do not have the MWAIT instruction. 1122 */ 1123 idle_cpu_prefer_mwait = 0; 1124 #endif 1125 1126 break; 1127 case X86_VENDOR_TM: 1128 /* 1129 * workaround the NT workaround in CMS 4.1 1130 */ 1131 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 1132 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 1133 cp->cp_edx |= CPUID_INTC_EDX_CX8; 1134 break; 1135 case X86_VENDOR_Centaur: 1136 /* 1137 * workaround the NT workarounds again 1138 */ 1139 if (cpi->cpi_family == 6) 1140 cp->cp_edx |= CPUID_INTC_EDX_CX8; 1141 break; 1142 case X86_VENDOR_Cyrix: 1143 /* 1144 * We rely heavily on the probing in locore 1145 * to actually figure out what parts, if any, 1146 * of the Cyrix cpuid instruction to believe. 1147 */ 1148 switch (x86_type) { 1149 case X86_TYPE_CYRIX_486: 1150 mask_edx = 0; 1151 break; 1152 case X86_TYPE_CYRIX_6x86: 1153 mask_edx = 0; 1154 break; 1155 case X86_TYPE_CYRIX_6x86L: 1156 mask_edx = 1157 CPUID_INTC_EDX_DE | 1158 CPUID_INTC_EDX_CX8; 1159 break; 1160 case X86_TYPE_CYRIX_6x86MX: 1161 mask_edx = 1162 CPUID_INTC_EDX_DE | 1163 CPUID_INTC_EDX_MSR | 1164 CPUID_INTC_EDX_CX8 | 1165 CPUID_INTC_EDX_PGE | 1166 CPUID_INTC_EDX_CMOV | 1167 CPUID_INTC_EDX_MMX; 1168 break; 1169 case X86_TYPE_CYRIX_GXm: 1170 mask_edx = 1171 CPUID_INTC_EDX_MSR | 1172 CPUID_INTC_EDX_CX8 | 1173 CPUID_INTC_EDX_CMOV | 1174 CPUID_INTC_EDX_MMX; 1175 break; 1176 case X86_TYPE_CYRIX_MediaGX: 1177 break; 1178 case X86_TYPE_CYRIX_MII: 1179 case X86_TYPE_VIA_CYRIX_III: 1180 mask_edx = 1181 CPUID_INTC_EDX_DE | 1182 CPUID_INTC_EDX_TSC | 1183 CPUID_INTC_EDX_MSR | 1184 CPUID_INTC_EDX_CX8 | 1185 CPUID_INTC_EDX_PGE | 1186 CPUID_INTC_EDX_CMOV | 1187 CPUID_INTC_EDX_MMX; 1188 break; 1189 default: 1190 break; 1191 } 1192 break; 1193 } 1194 1195 #if defined(__xpv) 1196 /* 1197 * Do not support MONITOR/MWAIT under a hypervisor 1198 */ 1199 mask_ecx &= ~CPUID_INTC_ECX_MON; 1200 /* 1201 * Do not support XSAVE under a hypervisor for now 1202 */ 1203 xsave_force_disable = B_TRUE; 1204 1205 #endif /* __xpv */ 1206 1207 if (xsave_force_disable) { 1208 mask_ecx &= ~CPUID_INTC_ECX_XSAVE; 1209 mask_ecx &= ~CPUID_INTC_ECX_AVX; 1210 mask_ecx &= ~CPUID_INTC_ECX_F16C; 1211 mask_ecx &= ~CPUID_INTC_ECX_FMA; 1212 } 1213 1214 /* 1215 * Now we've figured out the masks that determine 1216 * which bits we choose to believe, apply the masks 1217 * to the feature words, then map the kernel's view 1218 * of these feature words into its feature word. 1219 */ 1220 cp->cp_edx &= mask_edx; 1221 cp->cp_ecx &= mask_ecx; 1222 1223 /* 1224 * apply any platform restrictions (we don't call this 1225 * immediately after __cpuid_insn here, because we need the 1226 * workarounds applied above first) 1227 */ 1228 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 1229 1230 /* 1231 * In addition to ecx and edx, Intel is storing a bunch of instruction 1232 * set extensions in leaf 7's ebx. 1233 */ 1234 if (cpi->cpi_vendor == X86_VENDOR_Intel && cpi->cpi_maxeax >= 7) { 1235 struct cpuid_regs *ecp; 1236 ecp = &cpi->cpi_std[7]; 1237 ecp->cp_eax = 7; 1238 ecp->cp_ecx = 0; 1239 (void) __cpuid_insn(ecp); 1240 /* 1241 * If XSAVE has been disabled, just ignore all of the AVX 1242 * dependent flags here. 1243 */ 1244 if (xsave_force_disable) { 1245 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1; 1246 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2; 1247 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2; 1248 } 1249 1250 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP) 1251 add_x86_feature(featureset, X86FSET_SMEP); 1252 } 1253 1254 /* 1255 * fold in overrides from the "eeprom" mechanism 1256 */ 1257 cp->cp_edx |= cpuid_feature_edx_include; 1258 cp->cp_edx &= ~cpuid_feature_edx_exclude; 1259 1260 cp->cp_ecx |= cpuid_feature_ecx_include; 1261 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 1262 1263 if (cp->cp_edx & CPUID_INTC_EDX_PSE) { 1264 add_x86_feature(featureset, X86FSET_LARGEPAGE); 1265 } 1266 if (cp->cp_edx & CPUID_INTC_EDX_TSC) { 1267 add_x86_feature(featureset, X86FSET_TSC); 1268 } 1269 if (cp->cp_edx & CPUID_INTC_EDX_MSR) { 1270 add_x86_feature(featureset, X86FSET_MSR); 1271 } 1272 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) { 1273 add_x86_feature(featureset, X86FSET_MTRR); 1274 } 1275 if (cp->cp_edx & CPUID_INTC_EDX_PGE) { 1276 add_x86_feature(featureset, X86FSET_PGE); 1277 } 1278 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) { 1279 add_x86_feature(featureset, X86FSET_CMOV); 1280 } 1281 if (cp->cp_edx & CPUID_INTC_EDX_MMX) { 1282 add_x86_feature(featureset, X86FSET_MMX); 1283 } 1284 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 1285 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) { 1286 add_x86_feature(featureset, X86FSET_MCA); 1287 } 1288 if (cp->cp_edx & CPUID_INTC_EDX_PAE) { 1289 add_x86_feature(featureset, X86FSET_PAE); 1290 } 1291 if (cp->cp_edx & CPUID_INTC_EDX_CX8) { 1292 add_x86_feature(featureset, X86FSET_CX8); 1293 } 1294 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) { 1295 add_x86_feature(featureset, X86FSET_CX16); 1296 } 1297 if (cp->cp_edx & CPUID_INTC_EDX_PAT) { 1298 add_x86_feature(featureset, X86FSET_PAT); 1299 } 1300 if (cp->cp_edx & CPUID_INTC_EDX_SEP) { 1301 add_x86_feature(featureset, X86FSET_SEP); 1302 } 1303 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 1304 /* 1305 * In our implementation, fxsave/fxrstor 1306 * are prerequisites before we'll even 1307 * try and do SSE things. 1308 */ 1309 if (cp->cp_edx & CPUID_INTC_EDX_SSE) { 1310 add_x86_feature(featureset, X86FSET_SSE); 1311 } 1312 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) { 1313 add_x86_feature(featureset, X86FSET_SSE2); 1314 } 1315 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) { 1316 add_x86_feature(featureset, X86FSET_SSE3); 1317 } 1318 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) { 1319 add_x86_feature(featureset, X86FSET_SSSE3); 1320 } 1321 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) { 1322 add_x86_feature(featureset, X86FSET_SSE4_1); 1323 } 1324 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) { 1325 add_x86_feature(featureset, X86FSET_SSE4_2); 1326 } 1327 if (cp->cp_ecx & CPUID_INTC_ECX_AES) { 1328 add_x86_feature(featureset, X86FSET_AES); 1329 } 1330 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) { 1331 add_x86_feature(featureset, X86FSET_PCLMULQDQ); 1332 } 1333 1334 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) { 1335 add_x86_feature(featureset, X86FSET_XSAVE); 1336 1337 /* We only test AVX when there is XSAVE */ 1338 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) { 1339 add_x86_feature(featureset, 1340 X86FSET_AVX); 1341 1342 /* 1343 * Intel says we can't check these without also 1344 * checking AVX. 1345 */ 1346 if (cp->cp_ecx & CPUID_INTC_ECX_F16C) 1347 add_x86_feature(featureset, 1348 X86FSET_F16C); 1349 1350 if (cp->cp_ecx & CPUID_INTC_ECX_FMA) 1351 add_x86_feature(featureset, 1352 X86FSET_FMA); 1353 1354 if (cpi->cpi_std[7].cp_ebx & 1355 CPUID_INTC_EBX_7_0_BMI1) 1356 add_x86_feature(featureset, 1357 X86FSET_BMI1); 1358 1359 if (cpi->cpi_std[7].cp_ebx & 1360 CPUID_INTC_EBX_7_0_BMI2) 1361 add_x86_feature(featureset, 1362 X86FSET_BMI2); 1363 1364 if (cpi->cpi_std[7].cp_ebx & 1365 CPUID_INTC_EBX_7_0_AVX2) 1366 add_x86_feature(featureset, 1367 X86FSET_AVX2); 1368 } 1369 } 1370 } 1371 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) { 1372 add_x86_feature(featureset, X86FSET_X2APIC); 1373 } 1374 if (cp->cp_edx & CPUID_INTC_EDX_DE) { 1375 add_x86_feature(featureset, X86FSET_DE); 1376 } 1377 #if !defined(__xpv) 1378 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 1379 1380 /* 1381 * We require the CLFLUSH instruction for erratum workaround 1382 * to use MONITOR/MWAIT. 1383 */ 1384 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1385 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 1386 add_x86_feature(featureset, X86FSET_MWAIT); 1387 } else { 1388 extern int idle_cpu_assert_cflush_monitor; 1389 1390 /* 1391 * All processors we are aware of which have 1392 * MONITOR/MWAIT also have CLFLUSH. 1393 */ 1394 if (idle_cpu_assert_cflush_monitor) { 1395 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 1396 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 1397 } 1398 } 1399 } 1400 #endif /* __xpv */ 1401 1402 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) { 1403 add_x86_feature(featureset, X86FSET_VMX); 1404 } 1405 1406 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND) 1407 add_x86_feature(featureset, X86FSET_RDRAND); 1408 1409 /* 1410 * Only need it first time, rest of the cpus would follow suit. 1411 * we only capture this for the bootcpu. 1412 */ 1413 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1414 add_x86_feature(featureset, X86FSET_CLFSH); 1415 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 1416 } 1417 if (is_x86_feature(featureset, X86FSET_PAE)) 1418 cpi->cpi_pabits = 36; 1419 1420 /* 1421 * Hyperthreading configuration is slightly tricky on Intel 1422 * and pure clones, and even trickier on AMD. 1423 * 1424 * (AMD chose to set the HTT bit on their CMP processors, 1425 * even though they're not actually hyperthreaded. Thus it 1426 * takes a bit more work to figure out what's really going 1427 * on ... see the handling of the CMP_LGCY bit below) 1428 */ 1429 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 1430 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 1431 if (cpi->cpi_ncpu_per_chip > 1) 1432 add_x86_feature(featureset, X86FSET_HTT); 1433 } else { 1434 cpi->cpi_ncpu_per_chip = 1; 1435 } 1436 1437 /* 1438 * Work on the "extended" feature information, doing 1439 * some basic initialization for cpuid_pass2() 1440 */ 1441 xcpuid = 0; 1442 switch (cpi->cpi_vendor) { 1443 case X86_VENDOR_Intel: 1444 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 1445 xcpuid++; 1446 break; 1447 case X86_VENDOR_AMD: 1448 if (cpi->cpi_family > 5 || 1449 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 1450 xcpuid++; 1451 break; 1452 case X86_VENDOR_Cyrix: 1453 /* 1454 * Only these Cyrix CPUs are -known- to support 1455 * extended cpuid operations. 1456 */ 1457 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1458 x86_type == X86_TYPE_CYRIX_GXm) 1459 xcpuid++; 1460 break; 1461 case X86_VENDOR_Centaur: 1462 case X86_VENDOR_TM: 1463 default: 1464 xcpuid++; 1465 break; 1466 } 1467 1468 if (xcpuid) { 1469 cp = &cpi->cpi_extd[0]; 1470 cp->cp_eax = 0x80000000; 1471 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1472 } 1473 1474 if (cpi->cpi_xmaxeax & 0x80000000) { 1475 1476 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1477 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1478 1479 switch (cpi->cpi_vendor) { 1480 case X86_VENDOR_Intel: 1481 case X86_VENDOR_AMD: 1482 if (cpi->cpi_xmaxeax < 0x80000001) 1483 break; 1484 cp = &cpi->cpi_extd[1]; 1485 cp->cp_eax = 0x80000001; 1486 (void) __cpuid_insn(cp); 1487 1488 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1489 cpi->cpi_family == 5 && 1490 cpi->cpi_model == 6 && 1491 cpi->cpi_step == 6) { 1492 /* 1493 * K6 model 6 uses bit 10 to indicate SYSC 1494 * Later models use bit 11. Fix it here. 1495 */ 1496 if (cp->cp_edx & 0x400) { 1497 cp->cp_edx &= ~0x400; 1498 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1499 } 1500 } 1501 1502 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1503 1504 /* 1505 * Compute the additions to the kernel's feature word. 1506 */ 1507 if (cp->cp_edx & CPUID_AMD_EDX_NX) { 1508 add_x86_feature(featureset, X86FSET_NX); 1509 } 1510 1511 /* 1512 * Regardless whether or not we boot 64-bit, 1513 * we should have a way to identify whether 1514 * the CPU is capable of running 64-bit. 1515 */ 1516 if (cp->cp_edx & CPUID_AMD_EDX_LM) { 1517 add_x86_feature(featureset, X86FSET_64); 1518 } 1519 1520 #if defined(__amd64) 1521 /* 1 GB large page - enable only for 64 bit kernel */ 1522 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) { 1523 add_x86_feature(featureset, X86FSET_1GPG); 1524 } 1525 #endif 1526 1527 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1528 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1529 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) { 1530 add_x86_feature(featureset, X86FSET_SSE4A); 1531 } 1532 1533 /* 1534 * If both the HTT and CMP_LGCY bits are set, 1535 * then we're not actually HyperThreaded. Read 1536 * "AMD CPUID Specification" for more details. 1537 */ 1538 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1539 is_x86_feature(featureset, X86FSET_HTT) && 1540 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1541 remove_x86_feature(featureset, X86FSET_HTT); 1542 add_x86_feature(featureset, X86FSET_CMP); 1543 } 1544 #if defined(__amd64) 1545 /* 1546 * It's really tricky to support syscall/sysret in 1547 * the i386 kernel; we rely on sysenter/sysexit 1548 * instead. In the amd64 kernel, things are -way- 1549 * better. 1550 */ 1551 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) { 1552 add_x86_feature(featureset, X86FSET_ASYSC); 1553 } 1554 1555 /* 1556 * While we're thinking about system calls, note 1557 * that AMD processors don't support sysenter 1558 * in long mode at all, so don't try to program them. 1559 */ 1560 if (x86_vendor == X86_VENDOR_AMD) { 1561 remove_x86_feature(featureset, X86FSET_SEP); 1562 } 1563 #endif 1564 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) { 1565 add_x86_feature(featureset, X86FSET_TSCP); 1566 } 1567 1568 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) { 1569 add_x86_feature(featureset, X86FSET_SVM); 1570 } 1571 1572 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) { 1573 add_x86_feature(featureset, X86FSET_TOPOEXT); 1574 } 1575 break; 1576 default: 1577 break; 1578 } 1579 1580 /* 1581 * Get CPUID data about processor cores and hyperthreads. 1582 */ 1583 switch (cpi->cpi_vendor) { 1584 case X86_VENDOR_Intel: 1585 if (cpi->cpi_maxeax >= 4) { 1586 cp = &cpi->cpi_std[4]; 1587 cp->cp_eax = 4; 1588 cp->cp_ecx = 0; 1589 (void) __cpuid_insn(cp); 1590 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1591 } 1592 /*FALLTHROUGH*/ 1593 case X86_VENDOR_AMD: 1594 if (cpi->cpi_xmaxeax < 0x80000008) 1595 break; 1596 cp = &cpi->cpi_extd[8]; 1597 cp->cp_eax = 0x80000008; 1598 (void) __cpuid_insn(cp); 1599 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1600 1601 /* 1602 * Virtual and physical address limits from 1603 * cpuid override previously guessed values. 1604 */ 1605 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1606 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1607 break; 1608 default: 1609 break; 1610 } 1611 1612 /* 1613 * Derive the number of cores per chip 1614 */ 1615 switch (cpi->cpi_vendor) { 1616 case X86_VENDOR_Intel: 1617 if (cpi->cpi_maxeax < 4) { 1618 cpi->cpi_ncore_per_chip = 1; 1619 break; 1620 } else { 1621 cpi->cpi_ncore_per_chip = 1622 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1623 } 1624 break; 1625 case X86_VENDOR_AMD: 1626 if (cpi->cpi_xmaxeax < 0x80000008) { 1627 cpi->cpi_ncore_per_chip = 1; 1628 break; 1629 } else { 1630 /* 1631 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1632 * 1 less than the number of physical cores on 1633 * the chip. In family 0x10 this value can 1634 * be affected by "downcoring" - it reflects 1635 * 1 less than the number of cores actually 1636 * enabled on this node. 1637 */ 1638 cpi->cpi_ncore_per_chip = 1639 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1640 } 1641 break; 1642 default: 1643 cpi->cpi_ncore_per_chip = 1; 1644 break; 1645 } 1646 1647 /* 1648 * Get CPUID data about TSC Invariance in Deep C-State. 1649 */ 1650 switch (cpi->cpi_vendor) { 1651 case X86_VENDOR_Intel: 1652 if (cpi->cpi_maxeax >= 7) { 1653 cp = &cpi->cpi_extd[7]; 1654 cp->cp_eax = 0x80000007; 1655 cp->cp_ecx = 0; 1656 (void) __cpuid_insn(cp); 1657 } 1658 break; 1659 default: 1660 break; 1661 } 1662 } else { 1663 cpi->cpi_ncore_per_chip = 1; 1664 } 1665 1666 /* 1667 * If more than one core, then this processor is CMP. 1668 */ 1669 if (cpi->cpi_ncore_per_chip > 1) { 1670 add_x86_feature(featureset, X86FSET_CMP); 1671 } 1672 1673 /* 1674 * If the number of cores is the same as the number 1675 * of CPUs, then we cannot have HyperThreading. 1676 */ 1677 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) { 1678 remove_x86_feature(featureset, X86FSET_HTT); 1679 } 1680 1681 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1682 cpi->cpi_procnodes_per_pkg = 1; 1683 cpi->cpi_cores_per_compunit = 1; 1684 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE && 1685 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) { 1686 /* 1687 * Single-core single-threaded processors. 1688 */ 1689 cpi->cpi_chipid = -1; 1690 cpi->cpi_clogid = 0; 1691 cpi->cpi_coreid = cpu->cpu_id; 1692 cpi->cpi_pkgcoreid = 0; 1693 if (cpi->cpi_vendor == X86_VENDOR_AMD) 1694 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); 1695 else 1696 cpi->cpi_procnodeid = cpi->cpi_chipid; 1697 } else if (cpi->cpi_ncpu_per_chip > 1) { 1698 if (cpi->cpi_vendor == X86_VENDOR_Intel) 1699 cpuid_intel_getids(cpu, featureset); 1700 else if (cpi->cpi_vendor == X86_VENDOR_AMD) 1701 cpuid_amd_getids(cpu); 1702 else { 1703 /* 1704 * All other processors are currently 1705 * assumed to have single cores. 1706 */ 1707 cpi->cpi_coreid = cpi->cpi_chipid; 1708 cpi->cpi_pkgcoreid = 0; 1709 cpi->cpi_procnodeid = cpi->cpi_chipid; 1710 cpi->cpi_compunitid = cpi->cpi_chipid; 1711 } 1712 } 1713 1714 /* 1715 * Synthesize chip "revision" and socket type 1716 */ 1717 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1718 cpi->cpi_model, cpi->cpi_step); 1719 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1720 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1721 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1722 cpi->cpi_model, cpi->cpi_step); 1723 1724 pass1_done: 1725 cpi->cpi_pass = 1; 1726 } 1727 1728 /* 1729 * Make copies of the cpuid table entries we depend on, in 1730 * part for ease of parsing now, in part so that we have only 1731 * one place to correct any of it, in part for ease of 1732 * later export to userland, and in part so we can look at 1733 * this stuff in a crash dump. 1734 */ 1735 1736 /*ARGSUSED*/ 1737 void 1738 cpuid_pass2(cpu_t *cpu) 1739 { 1740 uint_t n, nmax; 1741 int i; 1742 struct cpuid_regs *cp; 1743 uint8_t *dp; 1744 uint32_t *iptr; 1745 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1746 1747 ASSERT(cpi->cpi_pass == 1); 1748 1749 if (cpi->cpi_maxeax < 1) 1750 goto pass2_done; 1751 1752 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1753 nmax = NMAX_CPI_STD; 1754 /* 1755 * (We already handled n == 0 and n == 1 in pass 1) 1756 */ 1757 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1758 cp->cp_eax = n; 1759 1760 /* 1761 * CPUID function 4 expects %ecx to be initialized 1762 * with an index which indicates which cache to return 1763 * information about. The OS is expected to call function 4 1764 * with %ecx set to 0, 1, 2, ... until it returns with 1765 * EAX[4:0] set to 0, which indicates there are no more 1766 * caches. 1767 * 1768 * Here, populate cpi_std[4] with the information returned by 1769 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1770 * when dynamic memory allocation becomes available. 1771 * 1772 * Note: we need to explicitly initialize %ecx here, since 1773 * function 4 may have been previously invoked. 1774 */ 1775 if (n == 4) 1776 cp->cp_ecx = 0; 1777 1778 (void) __cpuid_insn(cp); 1779 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1780 switch (n) { 1781 case 2: 1782 /* 1783 * "the lower 8 bits of the %eax register 1784 * contain a value that identifies the number 1785 * of times the cpuid [instruction] has to be 1786 * executed to obtain a complete image of the 1787 * processor's caching systems." 1788 * 1789 * How *do* they make this stuff up? 1790 */ 1791 cpi->cpi_ncache = sizeof (*cp) * 1792 BITX(cp->cp_eax, 7, 0); 1793 if (cpi->cpi_ncache == 0) 1794 break; 1795 cpi->cpi_ncache--; /* skip count byte */ 1796 1797 /* 1798 * Well, for now, rather than attempt to implement 1799 * this slightly dubious algorithm, we just look 1800 * at the first 15 .. 1801 */ 1802 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1803 cpi->cpi_ncache = sizeof (*cp) - 1; 1804 1805 dp = cpi->cpi_cacheinfo; 1806 if (BITX(cp->cp_eax, 31, 31) == 0) { 1807 uint8_t *p = (void *)&cp->cp_eax; 1808 for (i = 1; i < 4; i++) 1809 if (p[i] != 0) 1810 *dp++ = p[i]; 1811 } 1812 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1813 uint8_t *p = (void *)&cp->cp_ebx; 1814 for (i = 0; i < 4; i++) 1815 if (p[i] != 0) 1816 *dp++ = p[i]; 1817 } 1818 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1819 uint8_t *p = (void *)&cp->cp_ecx; 1820 for (i = 0; i < 4; i++) 1821 if (p[i] != 0) 1822 *dp++ = p[i]; 1823 } 1824 if (BITX(cp->cp_edx, 31, 31) == 0) { 1825 uint8_t *p = (void *)&cp->cp_edx; 1826 for (i = 0; i < 4; i++) 1827 if (p[i] != 0) 1828 *dp++ = p[i]; 1829 } 1830 break; 1831 1832 case 3: /* Processor serial number, if PSN supported */ 1833 break; 1834 1835 case 4: /* Deterministic cache parameters */ 1836 break; 1837 1838 case 5: /* Monitor/Mwait parameters */ 1839 { 1840 size_t mwait_size; 1841 1842 /* 1843 * check cpi_mwait.support which was set in cpuid_pass1 1844 */ 1845 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1846 break; 1847 1848 /* 1849 * Protect ourself from insane mwait line size. 1850 * Workaround for incomplete hardware emulator(s). 1851 */ 1852 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1853 if (mwait_size < sizeof (uint32_t) || 1854 !ISP2(mwait_size)) { 1855 #if DEBUG 1856 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1857 "size %ld", cpu->cpu_id, (long)mwait_size); 1858 #endif 1859 break; 1860 } 1861 1862 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1863 cpi->cpi_mwait.mon_max = mwait_size; 1864 if (MWAIT_EXTENSION(cpi)) { 1865 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1866 if (MWAIT_INT_ENABLE(cpi)) 1867 cpi->cpi_mwait.support |= 1868 MWAIT_ECX_INT_ENABLE; 1869 } 1870 break; 1871 } 1872 default: 1873 break; 1874 } 1875 } 1876 1877 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1878 struct cpuid_regs regs; 1879 1880 cp = ®s; 1881 cp->cp_eax = 0xB; 1882 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1883 1884 (void) __cpuid_insn(cp); 1885 1886 /* 1887 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1888 * indicates that the extended topology enumeration leaf is 1889 * available. 1890 */ 1891 if (cp->cp_ebx) { 1892 uint32_t x2apic_id; 1893 uint_t coreid_shift = 0; 1894 uint_t ncpu_per_core = 1; 1895 uint_t chipid_shift = 0; 1896 uint_t ncpu_per_chip = 1; 1897 uint_t i; 1898 uint_t level; 1899 1900 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1901 cp->cp_eax = 0xB; 1902 cp->cp_ecx = i; 1903 1904 (void) __cpuid_insn(cp); 1905 level = CPI_CPU_LEVEL_TYPE(cp); 1906 1907 if (level == 1) { 1908 x2apic_id = cp->cp_edx; 1909 coreid_shift = BITX(cp->cp_eax, 4, 0); 1910 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1911 } else if (level == 2) { 1912 x2apic_id = cp->cp_edx; 1913 chipid_shift = BITX(cp->cp_eax, 4, 0); 1914 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1915 } 1916 } 1917 1918 cpi->cpi_apicid = x2apic_id; 1919 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1920 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1921 ncpu_per_core; 1922 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1923 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1924 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1925 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1926 } 1927 1928 /* Make cp NULL so that we don't stumble on others */ 1929 cp = NULL; 1930 } 1931 1932 /* 1933 * XSAVE enumeration 1934 */ 1935 if (cpi->cpi_maxeax >= 0xD) { 1936 struct cpuid_regs regs; 1937 boolean_t cpuid_d_valid = B_TRUE; 1938 1939 cp = ®s; 1940 cp->cp_eax = 0xD; 1941 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1942 1943 (void) __cpuid_insn(cp); 1944 1945 /* 1946 * Sanity checks for debug 1947 */ 1948 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 || 1949 (cp->cp_eax & XFEATURE_SSE) == 0) { 1950 cpuid_d_valid = B_FALSE; 1951 } 1952 1953 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax; 1954 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx; 1955 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx; 1956 1957 /* 1958 * If the hw supports AVX, get the size and offset in the save 1959 * area for the ymm state. 1960 */ 1961 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) { 1962 cp->cp_eax = 0xD; 1963 cp->cp_ecx = 2; 1964 cp->cp_edx = cp->cp_ebx = 0; 1965 1966 (void) __cpuid_insn(cp); 1967 1968 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET || 1969 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) { 1970 cpuid_d_valid = B_FALSE; 1971 } 1972 1973 cpi->cpi_xsave.ymm_size = cp->cp_eax; 1974 cpi->cpi_xsave.ymm_offset = cp->cp_ebx; 1975 } 1976 1977 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { 1978 xsave_state_size = 0; 1979 } else if (cpuid_d_valid) { 1980 xsave_state_size = cpi->cpi_xsave.xsav_max_size; 1981 } else { 1982 /* Broken CPUID 0xD, probably in HVM */ 1983 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid " 1984 "value: hw_low = %d, hw_high = %d, xsave_size = %d" 1985 ", ymm_size = %d, ymm_offset = %d\n", 1986 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low, 1987 cpi->cpi_xsave.xsav_hw_features_high, 1988 (int)cpi->cpi_xsave.xsav_max_size, 1989 (int)cpi->cpi_xsave.ymm_size, 1990 (int)cpi->cpi_xsave.ymm_offset); 1991 1992 if (xsave_state_size != 0) { 1993 /* 1994 * This must be a non-boot CPU. We cannot 1995 * continue, because boot cpu has already 1996 * enabled XSAVE. 1997 */ 1998 ASSERT(cpu->cpu_id != 0); 1999 cmn_err(CE_PANIC, "cpu%d: we have already " 2000 "enabled XSAVE on boot cpu, cannot " 2001 "continue.", cpu->cpu_id); 2002 } else { 2003 /* 2004 * If we reached here on the boot CPU, it's also 2005 * almost certain that we'll reach here on the 2006 * non-boot CPUs. When we're here on a boot CPU 2007 * we should disable the feature, on a non-boot 2008 * CPU we need to confirm that we have. 2009 */ 2010 if (cpu->cpu_id == 0) { 2011 remove_x86_feature(x86_featureset, 2012 X86FSET_XSAVE); 2013 remove_x86_feature(x86_featureset, 2014 X86FSET_AVX); 2015 remove_x86_feature(x86_featureset, 2016 X86FSET_F16C); 2017 remove_x86_feature(x86_featureset, 2018 X86FSET_BMI1); 2019 remove_x86_feature(x86_featureset, 2020 X86FSET_BMI2); 2021 remove_x86_feature(x86_featureset, 2022 X86FSET_FMA); 2023 remove_x86_feature(x86_featureset, 2024 X86FSET_AVX2); 2025 CPI_FEATURES_ECX(cpi) &= 2026 ~CPUID_INTC_ECX_XSAVE; 2027 CPI_FEATURES_ECX(cpi) &= 2028 ~CPUID_INTC_ECX_AVX; 2029 CPI_FEATURES_ECX(cpi) &= 2030 ~CPUID_INTC_ECX_F16C; 2031 CPI_FEATURES_ECX(cpi) &= 2032 ~CPUID_INTC_ECX_FMA; 2033 CPI_FEATURES_7_0_EBX(cpi) &= 2034 ~CPUID_INTC_EBX_7_0_BMI1; 2035 CPI_FEATURES_7_0_EBX(cpi) &= 2036 ~CPUID_INTC_EBX_7_0_BMI2; 2037 CPI_FEATURES_7_0_EBX(cpi) &= 2038 ~CPUID_INTC_EBX_7_0_AVX2; 2039 xsave_force_disable = B_TRUE; 2040 } else { 2041 VERIFY(is_x86_feature(x86_featureset, 2042 X86FSET_XSAVE) == B_FALSE); 2043 } 2044 } 2045 } 2046 } 2047 2048 2049 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 2050 goto pass2_done; 2051 2052 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 2053 nmax = NMAX_CPI_EXTD; 2054 /* 2055 * Copy the extended properties, fixing them as we go. 2056 * (We already handled n == 0 and n == 1 in pass 1) 2057 */ 2058 iptr = (void *)cpi->cpi_brandstr; 2059 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 2060 cp->cp_eax = 0x80000000 + n; 2061 (void) __cpuid_insn(cp); 2062 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 2063 switch (n) { 2064 case 2: 2065 case 3: 2066 case 4: 2067 /* 2068 * Extract the brand string 2069 */ 2070 *iptr++ = cp->cp_eax; 2071 *iptr++ = cp->cp_ebx; 2072 *iptr++ = cp->cp_ecx; 2073 *iptr++ = cp->cp_edx; 2074 break; 2075 case 5: 2076 switch (cpi->cpi_vendor) { 2077 case X86_VENDOR_AMD: 2078 /* 2079 * The Athlon and Duron were the first 2080 * parts to report the sizes of the 2081 * TLB for large pages. Before then, 2082 * we don't trust the data. 2083 */ 2084 if (cpi->cpi_family < 6 || 2085 (cpi->cpi_family == 6 && 2086 cpi->cpi_model < 1)) 2087 cp->cp_eax = 0; 2088 break; 2089 default: 2090 break; 2091 } 2092 break; 2093 case 6: 2094 switch (cpi->cpi_vendor) { 2095 case X86_VENDOR_AMD: 2096 /* 2097 * The Athlon and Duron were the first 2098 * AMD parts with L2 TLB's. 2099 * Before then, don't trust the data. 2100 */ 2101 if (cpi->cpi_family < 6 || 2102 cpi->cpi_family == 6 && 2103 cpi->cpi_model < 1) 2104 cp->cp_eax = cp->cp_ebx = 0; 2105 /* 2106 * AMD Duron rev A0 reports L2 2107 * cache size incorrectly as 1K 2108 * when it is really 64K 2109 */ 2110 if (cpi->cpi_family == 6 && 2111 cpi->cpi_model == 3 && 2112 cpi->cpi_step == 0) { 2113 cp->cp_ecx &= 0xffff; 2114 cp->cp_ecx |= 0x400000; 2115 } 2116 break; 2117 case X86_VENDOR_Cyrix: /* VIA C3 */ 2118 /* 2119 * VIA C3 processors are a bit messed 2120 * up w.r.t. encoding cache sizes in %ecx 2121 */ 2122 if (cpi->cpi_family != 6) 2123 break; 2124 /* 2125 * model 7 and 8 were incorrectly encoded 2126 * 2127 * xxx is model 8 really broken? 2128 */ 2129 if (cpi->cpi_model == 7 || 2130 cpi->cpi_model == 8) 2131 cp->cp_ecx = 2132 BITX(cp->cp_ecx, 31, 24) << 16 | 2133 BITX(cp->cp_ecx, 23, 16) << 12 | 2134 BITX(cp->cp_ecx, 15, 8) << 8 | 2135 BITX(cp->cp_ecx, 7, 0); 2136 /* 2137 * model 9 stepping 1 has wrong associativity 2138 */ 2139 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 2140 cp->cp_ecx |= 8 << 12; 2141 break; 2142 case X86_VENDOR_Intel: 2143 /* 2144 * Extended L2 Cache features function. 2145 * First appeared on Prescott. 2146 */ 2147 default: 2148 break; 2149 } 2150 break; 2151 default: 2152 break; 2153 } 2154 } 2155 2156 pass2_done: 2157 cpi->cpi_pass = 2; 2158 } 2159 2160 static const char * 2161 intel_cpubrand(const struct cpuid_info *cpi) 2162 { 2163 int i; 2164 2165 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 2166 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 2167 return ("i486"); 2168 2169 switch (cpi->cpi_family) { 2170 case 5: 2171 return ("Intel Pentium(r)"); 2172 case 6: 2173 switch (cpi->cpi_model) { 2174 uint_t celeron, xeon; 2175 const struct cpuid_regs *cp; 2176 case 0: 2177 case 1: 2178 case 2: 2179 return ("Intel Pentium(r) Pro"); 2180 case 3: 2181 case 4: 2182 return ("Intel Pentium(r) II"); 2183 case 6: 2184 return ("Intel Celeron(r)"); 2185 case 5: 2186 case 7: 2187 celeron = xeon = 0; 2188 cp = &cpi->cpi_std[2]; /* cache info */ 2189 2190 for (i = 1; i < 4; i++) { 2191 uint_t tmp; 2192 2193 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 2194 if (tmp == 0x40) 2195 celeron++; 2196 if (tmp >= 0x44 && tmp <= 0x45) 2197 xeon++; 2198 } 2199 2200 for (i = 0; i < 2; i++) { 2201 uint_t tmp; 2202 2203 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 2204 if (tmp == 0x40) 2205 celeron++; 2206 else if (tmp >= 0x44 && tmp <= 0x45) 2207 xeon++; 2208 } 2209 2210 for (i = 0; i < 4; i++) { 2211 uint_t tmp; 2212 2213 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 2214 if (tmp == 0x40) 2215 celeron++; 2216 else if (tmp >= 0x44 && tmp <= 0x45) 2217 xeon++; 2218 } 2219 2220 for (i = 0; i < 4; i++) { 2221 uint_t tmp; 2222 2223 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 2224 if (tmp == 0x40) 2225 celeron++; 2226 else if (tmp >= 0x44 && tmp <= 0x45) 2227 xeon++; 2228 } 2229 2230 if (celeron) 2231 return ("Intel Celeron(r)"); 2232 if (xeon) 2233 return (cpi->cpi_model == 5 ? 2234 "Intel Pentium(r) II Xeon(tm)" : 2235 "Intel Pentium(r) III Xeon(tm)"); 2236 return (cpi->cpi_model == 5 ? 2237 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 2238 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 2239 default: 2240 break; 2241 } 2242 default: 2243 break; 2244 } 2245 2246 /* BrandID is present if the field is nonzero */ 2247 if (cpi->cpi_brandid != 0) { 2248 static const struct { 2249 uint_t bt_bid; 2250 const char *bt_str; 2251 } brand_tbl[] = { 2252 { 0x1, "Intel(r) Celeron(r)" }, 2253 { 0x2, "Intel(r) Pentium(r) III" }, 2254 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 2255 { 0x4, "Intel(r) Pentium(r) III" }, 2256 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 2257 { 0x7, "Mobile Intel(r) Celeron(r)" }, 2258 { 0x8, "Intel(r) Pentium(r) 4" }, 2259 { 0x9, "Intel(r) Pentium(r) 4" }, 2260 { 0xa, "Intel(r) Celeron(r)" }, 2261 { 0xb, "Intel(r) Xeon(tm)" }, 2262 { 0xc, "Intel(r) Xeon(tm) MP" }, 2263 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 2264 { 0xf, "Mobile Intel(r) Celeron(r)" }, 2265 { 0x11, "Mobile Genuine Intel(r)" }, 2266 { 0x12, "Intel(r) Celeron(r) M" }, 2267 { 0x13, "Mobile Intel(r) Celeron(r)" }, 2268 { 0x14, "Intel(r) Celeron(r)" }, 2269 { 0x15, "Mobile Genuine Intel(r)" }, 2270 { 0x16, "Intel(r) Pentium(r) M" }, 2271 { 0x17, "Mobile Intel(r) Celeron(r)" } 2272 }; 2273 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 2274 uint_t sgn; 2275 2276 sgn = (cpi->cpi_family << 8) | 2277 (cpi->cpi_model << 4) | cpi->cpi_step; 2278 2279 for (i = 0; i < btblmax; i++) 2280 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 2281 break; 2282 if (i < btblmax) { 2283 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 2284 return ("Intel(r) Celeron(r)"); 2285 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 2286 return ("Intel(r) Xeon(tm) MP"); 2287 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 2288 return ("Intel(r) Xeon(tm)"); 2289 return (brand_tbl[i].bt_str); 2290 } 2291 } 2292 2293 return (NULL); 2294 } 2295 2296 static const char * 2297 amd_cpubrand(const struct cpuid_info *cpi) 2298 { 2299 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 2300 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 2301 return ("i486 compatible"); 2302 2303 switch (cpi->cpi_family) { 2304 case 5: 2305 switch (cpi->cpi_model) { 2306 case 0: 2307 case 1: 2308 case 2: 2309 case 3: 2310 case 4: 2311 case 5: 2312 return ("AMD-K5(r)"); 2313 case 6: 2314 case 7: 2315 return ("AMD-K6(r)"); 2316 case 8: 2317 return ("AMD-K6(r)-2"); 2318 case 9: 2319 return ("AMD-K6(r)-III"); 2320 default: 2321 return ("AMD (family 5)"); 2322 } 2323 case 6: 2324 switch (cpi->cpi_model) { 2325 case 1: 2326 return ("AMD-K7(tm)"); 2327 case 0: 2328 case 2: 2329 case 4: 2330 return ("AMD Athlon(tm)"); 2331 case 3: 2332 case 7: 2333 return ("AMD Duron(tm)"); 2334 case 6: 2335 case 8: 2336 case 10: 2337 /* 2338 * Use the L2 cache size to distinguish 2339 */ 2340 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 2341 "AMD Athlon(tm)" : "AMD Duron(tm)"); 2342 default: 2343 return ("AMD (family 6)"); 2344 } 2345 default: 2346 break; 2347 } 2348 2349 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 2350 cpi->cpi_brandid != 0) { 2351 switch (BITX(cpi->cpi_brandid, 7, 5)) { 2352 case 3: 2353 return ("AMD Opteron(tm) UP 1xx"); 2354 case 4: 2355 return ("AMD Opteron(tm) DP 2xx"); 2356 case 5: 2357 return ("AMD Opteron(tm) MP 8xx"); 2358 default: 2359 return ("AMD Opteron(tm)"); 2360 } 2361 } 2362 2363 return (NULL); 2364 } 2365 2366 static const char * 2367 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 2368 { 2369 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 2370 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 2371 type == X86_TYPE_CYRIX_486) 2372 return ("i486 compatible"); 2373 2374 switch (type) { 2375 case X86_TYPE_CYRIX_6x86: 2376 return ("Cyrix 6x86"); 2377 case X86_TYPE_CYRIX_6x86L: 2378 return ("Cyrix 6x86L"); 2379 case X86_TYPE_CYRIX_6x86MX: 2380 return ("Cyrix 6x86MX"); 2381 case X86_TYPE_CYRIX_GXm: 2382 return ("Cyrix GXm"); 2383 case X86_TYPE_CYRIX_MediaGX: 2384 return ("Cyrix MediaGX"); 2385 case X86_TYPE_CYRIX_MII: 2386 return ("Cyrix M2"); 2387 case X86_TYPE_VIA_CYRIX_III: 2388 return ("VIA Cyrix M3"); 2389 default: 2390 /* 2391 * Have another wild guess .. 2392 */ 2393 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 2394 return ("Cyrix 5x86"); 2395 else if (cpi->cpi_family == 5) { 2396 switch (cpi->cpi_model) { 2397 case 2: 2398 return ("Cyrix 6x86"); /* Cyrix M1 */ 2399 case 4: 2400 return ("Cyrix MediaGX"); 2401 default: 2402 break; 2403 } 2404 } else if (cpi->cpi_family == 6) { 2405 switch (cpi->cpi_model) { 2406 case 0: 2407 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 2408 case 5: 2409 case 6: 2410 case 7: 2411 case 8: 2412 case 9: 2413 return ("VIA C3"); 2414 default: 2415 break; 2416 } 2417 } 2418 break; 2419 } 2420 return (NULL); 2421 } 2422 2423 /* 2424 * This only gets called in the case that the CPU extended 2425 * feature brand string (0x80000002, 0x80000003, 0x80000004) 2426 * aren't available, or contain null bytes for some reason. 2427 */ 2428 static void 2429 fabricate_brandstr(struct cpuid_info *cpi) 2430 { 2431 const char *brand = NULL; 2432 2433 switch (cpi->cpi_vendor) { 2434 case X86_VENDOR_Intel: 2435 brand = intel_cpubrand(cpi); 2436 break; 2437 case X86_VENDOR_AMD: 2438 brand = amd_cpubrand(cpi); 2439 break; 2440 case X86_VENDOR_Cyrix: 2441 brand = cyrix_cpubrand(cpi, x86_type); 2442 break; 2443 case X86_VENDOR_NexGen: 2444 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2445 brand = "NexGen Nx586"; 2446 break; 2447 case X86_VENDOR_Centaur: 2448 if (cpi->cpi_family == 5) 2449 switch (cpi->cpi_model) { 2450 case 4: 2451 brand = "Centaur C6"; 2452 break; 2453 case 8: 2454 brand = "Centaur C2"; 2455 break; 2456 case 9: 2457 brand = "Centaur C3"; 2458 break; 2459 default: 2460 break; 2461 } 2462 break; 2463 case X86_VENDOR_Rise: 2464 if (cpi->cpi_family == 5 && 2465 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 2466 brand = "Rise mP6"; 2467 break; 2468 case X86_VENDOR_SiS: 2469 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2470 brand = "SiS 55x"; 2471 break; 2472 case X86_VENDOR_TM: 2473 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 2474 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 2475 break; 2476 case X86_VENDOR_NSC: 2477 case X86_VENDOR_UMC: 2478 default: 2479 break; 2480 } 2481 if (brand) { 2482 (void) strcpy((char *)cpi->cpi_brandstr, brand); 2483 return; 2484 } 2485 2486 /* 2487 * If all else fails ... 2488 */ 2489 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 2490 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 2491 cpi->cpi_model, cpi->cpi_step); 2492 } 2493 2494 /* 2495 * This routine is called just after kernel memory allocation 2496 * becomes available on cpu0, and as part of mp_startup() on 2497 * the other cpus. 2498 * 2499 * Fixup the brand string, and collect any information from cpuid 2500 * that requires dynamically allocated storage to represent. 2501 */ 2502 /*ARGSUSED*/ 2503 void 2504 cpuid_pass3(cpu_t *cpu) 2505 { 2506 int i, max, shft, level, size; 2507 struct cpuid_regs regs; 2508 struct cpuid_regs *cp; 2509 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2510 2511 ASSERT(cpi->cpi_pass == 2); 2512 2513 /* 2514 * Function 4: Deterministic cache parameters 2515 * 2516 * Take this opportunity to detect the number of threads 2517 * sharing the last level cache, and construct a corresponding 2518 * cache id. The respective cpuid_info members are initialized 2519 * to the default case of "no last level cache sharing". 2520 */ 2521 cpi->cpi_ncpu_shr_last_cache = 1; 2522 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 2523 2524 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 2525 2526 /* 2527 * Find the # of elements (size) returned by fn 4, and along 2528 * the way detect last level cache sharing details. 2529 */ 2530 bzero(®s, sizeof (regs)); 2531 cp = ®s; 2532 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 2533 cp->cp_eax = 4; 2534 cp->cp_ecx = i; 2535 2536 (void) __cpuid_insn(cp); 2537 2538 if (CPI_CACHE_TYPE(cp) == 0) 2539 break; 2540 level = CPI_CACHE_LVL(cp); 2541 if (level > max) { 2542 max = level; 2543 cpi->cpi_ncpu_shr_last_cache = 2544 CPI_NTHR_SHR_CACHE(cp) + 1; 2545 } 2546 } 2547 cpi->cpi_std_4_size = size = i; 2548 2549 /* 2550 * Allocate the cpi_std_4 array. The first element 2551 * references the regs for fn 4, %ecx == 0, which 2552 * cpuid_pass2() stashed in cpi->cpi_std[4]. 2553 */ 2554 if (size > 0) { 2555 cpi->cpi_std_4 = 2556 kmem_alloc(size * sizeof (cp), KM_SLEEP); 2557 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 2558 2559 /* 2560 * Allocate storage to hold the additional regs 2561 * for function 4, %ecx == 1 .. cpi_std_4_size. 2562 * 2563 * The regs for fn 4, %ecx == 0 has already 2564 * been allocated as indicated above. 2565 */ 2566 for (i = 1; i < size; i++) { 2567 cp = cpi->cpi_std_4[i] = 2568 kmem_zalloc(sizeof (regs), KM_SLEEP); 2569 cp->cp_eax = 4; 2570 cp->cp_ecx = i; 2571 2572 (void) __cpuid_insn(cp); 2573 } 2574 } 2575 /* 2576 * Determine the number of bits needed to represent 2577 * the number of CPUs sharing the last level cache. 2578 * 2579 * Shift off that number of bits from the APIC id to 2580 * derive the cache id. 2581 */ 2582 shft = 0; 2583 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 2584 shft++; 2585 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 2586 } 2587 2588 /* 2589 * Now fixup the brand string 2590 */ 2591 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2592 fabricate_brandstr(cpi); 2593 } else { 2594 2595 /* 2596 * If we successfully extracted a brand string from the cpuid 2597 * instruction, clean it up by removing leading spaces and 2598 * similar junk. 2599 */ 2600 if (cpi->cpi_brandstr[0]) { 2601 size_t maxlen = sizeof (cpi->cpi_brandstr); 2602 char *src, *dst; 2603 2604 dst = src = (char *)cpi->cpi_brandstr; 2605 src[maxlen - 1] = '\0'; 2606 /* 2607 * strip leading spaces 2608 */ 2609 while (*src == ' ') 2610 src++; 2611 /* 2612 * Remove any 'Genuine' or "Authentic" prefixes 2613 */ 2614 if (strncmp(src, "Genuine ", 8) == 0) 2615 src += 8; 2616 if (strncmp(src, "Authentic ", 10) == 0) 2617 src += 10; 2618 2619 /* 2620 * Now do an in-place copy. 2621 * Map (R) to (r) and (TM) to (tm). 2622 * The era of teletypes is long gone, and there's 2623 * -really- no need to shout. 2624 */ 2625 while (*src != '\0') { 2626 if (src[0] == '(') { 2627 if (strncmp(src + 1, "R)", 2) == 0) { 2628 (void) strncpy(dst, "(r)", 3); 2629 src += 3; 2630 dst += 3; 2631 continue; 2632 } 2633 if (strncmp(src + 1, "TM)", 3) == 0) { 2634 (void) strncpy(dst, "(tm)", 4); 2635 src += 4; 2636 dst += 4; 2637 continue; 2638 } 2639 } 2640 *dst++ = *src++; 2641 } 2642 *dst = '\0'; 2643 2644 /* 2645 * Finally, remove any trailing spaces 2646 */ 2647 while (--dst > cpi->cpi_brandstr) 2648 if (*dst == ' ') 2649 *dst = '\0'; 2650 else 2651 break; 2652 } else 2653 fabricate_brandstr(cpi); 2654 } 2655 cpi->cpi_pass = 3; 2656 } 2657 2658 /* 2659 * This routine is called out of bind_hwcap() much later in the life 2660 * of the kernel (post_startup()). The job of this routine is to resolve 2661 * the hardware feature support and kernel support for those features into 2662 * what we're actually going to tell applications via the aux vector. 2663 */ 2664 void 2665 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out) 2666 { 2667 struct cpuid_info *cpi; 2668 uint_t hwcap_flags = 0, hwcap_flags_2 = 0; 2669 2670 if (cpu == NULL) 2671 cpu = CPU; 2672 cpi = cpu->cpu_m.mcpu_cpi; 2673 2674 ASSERT(cpi->cpi_pass == 3); 2675 2676 if (cpi->cpi_maxeax >= 1) { 2677 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2678 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2679 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES]; 2680 2681 *edx = CPI_FEATURES_EDX(cpi); 2682 *ecx = CPI_FEATURES_ECX(cpi); 2683 *ebx = CPI_FEATURES_7_0_EBX(cpi); 2684 2685 /* 2686 * [these require explicit kernel support] 2687 */ 2688 if (!is_x86_feature(x86_featureset, X86FSET_SEP)) 2689 *edx &= ~CPUID_INTC_EDX_SEP; 2690 2691 if (!is_x86_feature(x86_featureset, X86FSET_SSE)) 2692 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2693 if (!is_x86_feature(x86_featureset, X86FSET_SSE2)) 2694 *edx &= ~CPUID_INTC_EDX_SSE2; 2695 2696 if (!is_x86_feature(x86_featureset, X86FSET_HTT)) 2697 *edx &= ~CPUID_INTC_EDX_HTT; 2698 2699 if (!is_x86_feature(x86_featureset, X86FSET_SSE3)) 2700 *ecx &= ~CPUID_INTC_ECX_SSE3; 2701 2702 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3)) 2703 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2704 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1)) 2705 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2706 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2)) 2707 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2708 if (!is_x86_feature(x86_featureset, X86FSET_AES)) 2709 *ecx &= ~CPUID_INTC_ECX_AES; 2710 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ)) 2711 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ; 2712 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE)) 2713 *ecx &= ~(CPUID_INTC_ECX_XSAVE | 2714 CPUID_INTC_ECX_OSXSAVE); 2715 if (!is_x86_feature(x86_featureset, X86FSET_AVX)) 2716 *ecx &= ~CPUID_INTC_ECX_AVX; 2717 if (!is_x86_feature(x86_featureset, X86FSET_F16C)) 2718 *ecx &= ~CPUID_INTC_ECX_F16C; 2719 if (!is_x86_feature(x86_featureset, X86FSET_FMA)) 2720 *ecx &= ~CPUID_INTC_ECX_FMA; 2721 if (!is_x86_feature(x86_featureset, X86FSET_BMI1)) 2722 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1; 2723 if (!is_x86_feature(x86_featureset, X86FSET_BMI2)) 2724 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2; 2725 if (!is_x86_feature(x86_featureset, X86FSET_AVX2)) 2726 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2; 2727 2728 /* 2729 * [no explicit support required beyond x87 fp context] 2730 */ 2731 if (!fpu_exists) 2732 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2733 2734 /* 2735 * Now map the supported feature vector to things that we 2736 * think userland will care about. 2737 */ 2738 if (*edx & CPUID_INTC_EDX_SEP) 2739 hwcap_flags |= AV_386_SEP; 2740 if (*edx & CPUID_INTC_EDX_SSE) 2741 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2742 if (*edx & CPUID_INTC_EDX_SSE2) 2743 hwcap_flags |= AV_386_SSE2; 2744 if (*ecx & CPUID_INTC_ECX_SSE3) 2745 hwcap_flags |= AV_386_SSE3; 2746 if (*ecx & CPUID_INTC_ECX_SSSE3) 2747 hwcap_flags |= AV_386_SSSE3; 2748 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2749 hwcap_flags |= AV_386_SSE4_1; 2750 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2751 hwcap_flags |= AV_386_SSE4_2; 2752 if (*ecx & CPUID_INTC_ECX_MOVBE) 2753 hwcap_flags |= AV_386_MOVBE; 2754 if (*ecx & CPUID_INTC_ECX_AES) 2755 hwcap_flags |= AV_386_AES; 2756 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2757 hwcap_flags |= AV_386_PCLMULQDQ; 2758 if ((*ecx & CPUID_INTC_ECX_XSAVE) && 2759 (*ecx & CPUID_INTC_ECX_OSXSAVE)) { 2760 hwcap_flags |= AV_386_XSAVE; 2761 2762 if (*ecx & CPUID_INTC_ECX_AVX) { 2763 hwcap_flags |= AV_386_AVX; 2764 if (*ecx & CPUID_INTC_ECX_F16C) 2765 hwcap_flags_2 |= AV_386_2_F16C; 2766 if (*ecx & CPUID_INTC_ECX_FMA) 2767 hwcap_flags_2 |= AV_386_2_FMA; 2768 if (*ebx & CPUID_INTC_EBX_7_0_BMI1) 2769 hwcap_flags_2 |= AV_386_2_BMI1; 2770 if (*ebx & CPUID_INTC_EBX_7_0_BMI2) 2771 hwcap_flags_2 |= AV_386_2_BMI2; 2772 if (*ebx & CPUID_INTC_EBX_7_0_AVX2) 2773 hwcap_flags_2 |= AV_386_2_AVX2; 2774 } 2775 } 2776 if (*ecx & CPUID_INTC_ECX_VMX) 2777 hwcap_flags |= AV_386_VMX; 2778 if (*ecx & CPUID_INTC_ECX_POPCNT) 2779 hwcap_flags |= AV_386_POPCNT; 2780 if (*edx & CPUID_INTC_EDX_FPU) 2781 hwcap_flags |= AV_386_FPU; 2782 if (*edx & CPUID_INTC_EDX_MMX) 2783 hwcap_flags |= AV_386_MMX; 2784 2785 if (*edx & CPUID_INTC_EDX_TSC) 2786 hwcap_flags |= AV_386_TSC; 2787 if (*edx & CPUID_INTC_EDX_CX8) 2788 hwcap_flags |= AV_386_CX8; 2789 if (*edx & CPUID_INTC_EDX_CMOV) 2790 hwcap_flags |= AV_386_CMOV; 2791 if (*ecx & CPUID_INTC_ECX_CX16) 2792 hwcap_flags |= AV_386_CX16; 2793 2794 if (*ecx & CPUID_INTC_ECX_RDRAND) 2795 hwcap_flags_2 |= AV_386_2_RDRAND; 2796 } 2797 2798 if (cpi->cpi_xmaxeax < 0x80000001) 2799 goto pass4_done; 2800 2801 switch (cpi->cpi_vendor) { 2802 struct cpuid_regs cp; 2803 uint32_t *edx, *ecx; 2804 2805 case X86_VENDOR_Intel: 2806 /* 2807 * Seems like Intel duplicated what we necessary 2808 * here to make the initial crop of 64-bit OS's work. 2809 * Hopefully, those are the only "extended" bits 2810 * they'll add. 2811 */ 2812 /*FALLTHROUGH*/ 2813 2814 case X86_VENDOR_AMD: 2815 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2816 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2817 2818 *edx = CPI_FEATURES_XTD_EDX(cpi); 2819 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2820 2821 /* 2822 * [these features require explicit kernel support] 2823 */ 2824 switch (cpi->cpi_vendor) { 2825 case X86_VENDOR_Intel: 2826 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 2827 *edx &= ~CPUID_AMD_EDX_TSCP; 2828 break; 2829 2830 case X86_VENDOR_AMD: 2831 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 2832 *edx &= ~CPUID_AMD_EDX_TSCP; 2833 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A)) 2834 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2835 break; 2836 2837 default: 2838 break; 2839 } 2840 2841 /* 2842 * [no explicit support required beyond 2843 * x87 fp context and exception handlers] 2844 */ 2845 if (!fpu_exists) 2846 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2847 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2848 2849 if (!is_x86_feature(x86_featureset, X86FSET_NX)) 2850 *edx &= ~CPUID_AMD_EDX_NX; 2851 #if !defined(__amd64) 2852 *edx &= ~CPUID_AMD_EDX_LM; 2853 #endif 2854 /* 2855 * Now map the supported feature vector to 2856 * things that we think userland will care about. 2857 */ 2858 #if defined(__amd64) 2859 if (*edx & CPUID_AMD_EDX_SYSC) 2860 hwcap_flags |= AV_386_AMD_SYSC; 2861 #endif 2862 if (*edx & CPUID_AMD_EDX_MMXamd) 2863 hwcap_flags |= AV_386_AMD_MMX; 2864 if (*edx & CPUID_AMD_EDX_3DNow) 2865 hwcap_flags |= AV_386_AMD_3DNow; 2866 if (*edx & CPUID_AMD_EDX_3DNowx) 2867 hwcap_flags |= AV_386_AMD_3DNowx; 2868 if (*ecx & CPUID_AMD_ECX_SVM) 2869 hwcap_flags |= AV_386_AMD_SVM; 2870 2871 switch (cpi->cpi_vendor) { 2872 case X86_VENDOR_AMD: 2873 if (*edx & CPUID_AMD_EDX_TSCP) 2874 hwcap_flags |= AV_386_TSCP; 2875 if (*ecx & CPUID_AMD_ECX_AHF64) 2876 hwcap_flags |= AV_386_AHF; 2877 if (*ecx & CPUID_AMD_ECX_SSE4A) 2878 hwcap_flags |= AV_386_AMD_SSE4A; 2879 if (*ecx & CPUID_AMD_ECX_LZCNT) 2880 hwcap_flags |= AV_386_AMD_LZCNT; 2881 break; 2882 2883 case X86_VENDOR_Intel: 2884 if (*edx & CPUID_AMD_EDX_TSCP) 2885 hwcap_flags |= AV_386_TSCP; 2886 /* 2887 * Aarrgh. 2888 * Intel uses a different bit in the same word. 2889 */ 2890 if (*ecx & CPUID_INTC_ECX_AHF64) 2891 hwcap_flags |= AV_386_AHF; 2892 break; 2893 2894 default: 2895 break; 2896 } 2897 break; 2898 2899 case X86_VENDOR_TM: 2900 cp.cp_eax = 0x80860001; 2901 (void) __cpuid_insn(&cp); 2902 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2903 break; 2904 2905 default: 2906 break; 2907 } 2908 2909 pass4_done: 2910 cpi->cpi_pass = 4; 2911 if (hwcap_out != NULL) { 2912 hwcap_out[0] = hwcap_flags; 2913 hwcap_out[1] = hwcap_flags_2; 2914 } 2915 } 2916 2917 2918 /* 2919 * Simulate the cpuid instruction using the data we previously 2920 * captured about this CPU. We try our best to return the truth 2921 * about the hardware, independently of kernel support. 2922 */ 2923 uint32_t 2924 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2925 { 2926 struct cpuid_info *cpi; 2927 struct cpuid_regs *xcp; 2928 2929 if (cpu == NULL) 2930 cpu = CPU; 2931 cpi = cpu->cpu_m.mcpu_cpi; 2932 2933 ASSERT(cpuid_checkpass(cpu, 3)); 2934 2935 /* 2936 * CPUID data is cached in two separate places: cpi_std for standard 2937 * CPUID functions, and cpi_extd for extended CPUID functions. 2938 */ 2939 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2940 xcp = &cpi->cpi_std[cp->cp_eax]; 2941 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2942 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2943 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2944 else 2945 /* 2946 * The caller is asking for data from an input parameter which 2947 * the kernel has not cached. In this case we go fetch from 2948 * the hardware and return the data directly to the user. 2949 */ 2950 return (__cpuid_insn(cp)); 2951 2952 cp->cp_eax = xcp->cp_eax; 2953 cp->cp_ebx = xcp->cp_ebx; 2954 cp->cp_ecx = xcp->cp_ecx; 2955 cp->cp_edx = xcp->cp_edx; 2956 return (cp->cp_eax); 2957 } 2958 2959 int 2960 cpuid_checkpass(cpu_t *cpu, int pass) 2961 { 2962 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2963 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2964 } 2965 2966 int 2967 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2968 { 2969 ASSERT(cpuid_checkpass(cpu, 3)); 2970 2971 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2972 } 2973 2974 int 2975 cpuid_is_cmt(cpu_t *cpu) 2976 { 2977 if (cpu == NULL) 2978 cpu = CPU; 2979 2980 ASSERT(cpuid_checkpass(cpu, 1)); 2981 2982 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2983 } 2984 2985 /* 2986 * AMD and Intel both implement the 64-bit variant of the syscall 2987 * instruction (syscallq), so if there's -any- support for syscall, 2988 * cpuid currently says "yes, we support this". 2989 * 2990 * However, Intel decided to -not- implement the 32-bit variant of the 2991 * syscall instruction, so we provide a predicate to allow our caller 2992 * to test that subtlety here. 2993 * 2994 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2995 * even in the case where the hardware would in fact support it. 2996 */ 2997 /*ARGSUSED*/ 2998 int 2999 cpuid_syscall32_insn(cpu_t *cpu) 3000 { 3001 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 3002 3003 #if !defined(__xpv) 3004 if (cpu == NULL) 3005 cpu = CPU; 3006 3007 /*CSTYLED*/ 3008 { 3009 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3010 3011 if (cpi->cpi_vendor == X86_VENDOR_AMD && 3012 cpi->cpi_xmaxeax >= 0x80000001 && 3013 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 3014 return (1); 3015 } 3016 #endif 3017 return (0); 3018 } 3019 3020 int 3021 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 3022 { 3023 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3024 3025 static const char fmt[] = 3026 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 3027 static const char fmt_ht[] = 3028 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 3029 3030 ASSERT(cpuid_checkpass(cpu, 1)); 3031 3032 if (cpuid_is_cmt(cpu)) 3033 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 3034 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 3035 cpi->cpi_family, cpi->cpi_model, 3036 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 3037 return (snprintf(s, n, fmt, 3038 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 3039 cpi->cpi_family, cpi->cpi_model, 3040 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 3041 } 3042 3043 const char * 3044 cpuid_getvendorstr(cpu_t *cpu) 3045 { 3046 ASSERT(cpuid_checkpass(cpu, 1)); 3047 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 3048 } 3049 3050 uint_t 3051 cpuid_getvendor(cpu_t *cpu) 3052 { 3053 ASSERT(cpuid_checkpass(cpu, 1)); 3054 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 3055 } 3056 3057 uint_t 3058 cpuid_getfamily(cpu_t *cpu) 3059 { 3060 ASSERT(cpuid_checkpass(cpu, 1)); 3061 return (cpu->cpu_m.mcpu_cpi->cpi_family); 3062 } 3063 3064 uint_t 3065 cpuid_getmodel(cpu_t *cpu) 3066 { 3067 ASSERT(cpuid_checkpass(cpu, 1)); 3068 return (cpu->cpu_m.mcpu_cpi->cpi_model); 3069 } 3070 3071 uint_t 3072 cpuid_get_ncpu_per_chip(cpu_t *cpu) 3073 { 3074 ASSERT(cpuid_checkpass(cpu, 1)); 3075 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 3076 } 3077 3078 uint_t 3079 cpuid_get_ncore_per_chip(cpu_t *cpu) 3080 { 3081 ASSERT(cpuid_checkpass(cpu, 1)); 3082 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 3083 } 3084 3085 uint_t 3086 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 3087 { 3088 ASSERT(cpuid_checkpass(cpu, 2)); 3089 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 3090 } 3091 3092 id_t 3093 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 3094 { 3095 ASSERT(cpuid_checkpass(cpu, 2)); 3096 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 3097 } 3098 3099 uint_t 3100 cpuid_getstep(cpu_t *cpu) 3101 { 3102 ASSERT(cpuid_checkpass(cpu, 1)); 3103 return (cpu->cpu_m.mcpu_cpi->cpi_step); 3104 } 3105 3106 uint_t 3107 cpuid_getsig(struct cpu *cpu) 3108 { 3109 ASSERT(cpuid_checkpass(cpu, 1)); 3110 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 3111 } 3112 3113 uint32_t 3114 cpuid_getchiprev(struct cpu *cpu) 3115 { 3116 ASSERT(cpuid_checkpass(cpu, 1)); 3117 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 3118 } 3119 3120 const char * 3121 cpuid_getchiprevstr(struct cpu *cpu) 3122 { 3123 ASSERT(cpuid_checkpass(cpu, 1)); 3124 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 3125 } 3126 3127 uint32_t 3128 cpuid_getsockettype(struct cpu *cpu) 3129 { 3130 ASSERT(cpuid_checkpass(cpu, 1)); 3131 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 3132 } 3133 3134 const char * 3135 cpuid_getsocketstr(cpu_t *cpu) 3136 { 3137 static const char *socketstr = NULL; 3138 struct cpuid_info *cpi; 3139 3140 ASSERT(cpuid_checkpass(cpu, 1)); 3141 cpi = cpu->cpu_m.mcpu_cpi; 3142 3143 /* Assume that socket types are the same across the system */ 3144 if (socketstr == NULL) 3145 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 3146 cpi->cpi_model, cpi->cpi_step); 3147 3148 3149 return (socketstr); 3150 } 3151 3152 int 3153 cpuid_get_chipid(cpu_t *cpu) 3154 { 3155 ASSERT(cpuid_checkpass(cpu, 1)); 3156 3157 if (cpuid_is_cmt(cpu)) 3158 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 3159 return (cpu->cpu_id); 3160 } 3161 3162 id_t 3163 cpuid_get_coreid(cpu_t *cpu) 3164 { 3165 ASSERT(cpuid_checkpass(cpu, 1)); 3166 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 3167 } 3168 3169 int 3170 cpuid_get_pkgcoreid(cpu_t *cpu) 3171 { 3172 ASSERT(cpuid_checkpass(cpu, 1)); 3173 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 3174 } 3175 3176 int 3177 cpuid_get_clogid(cpu_t *cpu) 3178 { 3179 ASSERT(cpuid_checkpass(cpu, 1)); 3180 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 3181 } 3182 3183 int 3184 cpuid_get_cacheid(cpu_t *cpu) 3185 { 3186 ASSERT(cpuid_checkpass(cpu, 1)); 3187 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 3188 } 3189 3190 uint_t 3191 cpuid_get_procnodeid(cpu_t *cpu) 3192 { 3193 ASSERT(cpuid_checkpass(cpu, 1)); 3194 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid); 3195 } 3196 3197 uint_t 3198 cpuid_get_procnodes_per_pkg(cpu_t *cpu) 3199 { 3200 ASSERT(cpuid_checkpass(cpu, 1)); 3201 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); 3202 } 3203 3204 uint_t 3205 cpuid_get_compunitid(cpu_t *cpu) 3206 { 3207 ASSERT(cpuid_checkpass(cpu, 1)); 3208 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid); 3209 } 3210 3211 uint_t 3212 cpuid_get_cores_per_compunit(cpu_t *cpu) 3213 { 3214 ASSERT(cpuid_checkpass(cpu, 1)); 3215 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit); 3216 } 3217 3218 /*ARGSUSED*/ 3219 int 3220 cpuid_have_cr8access(cpu_t *cpu) 3221 { 3222 #if defined(__amd64) 3223 return (1); 3224 #else 3225 struct cpuid_info *cpi; 3226 3227 ASSERT(cpu != NULL); 3228 cpi = cpu->cpu_m.mcpu_cpi; 3229 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 3230 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 3231 return (1); 3232 return (0); 3233 #endif 3234 } 3235 3236 uint32_t 3237 cpuid_get_apicid(cpu_t *cpu) 3238 { 3239 ASSERT(cpuid_checkpass(cpu, 1)); 3240 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 3241 return (UINT32_MAX); 3242 } else { 3243 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 3244 } 3245 } 3246 3247 void 3248 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 3249 { 3250 struct cpuid_info *cpi; 3251 3252 if (cpu == NULL) 3253 cpu = CPU; 3254 cpi = cpu->cpu_m.mcpu_cpi; 3255 3256 ASSERT(cpuid_checkpass(cpu, 1)); 3257 3258 if (pabits) 3259 *pabits = cpi->cpi_pabits; 3260 if (vabits) 3261 *vabits = cpi->cpi_vabits; 3262 } 3263 3264 /* 3265 * Returns the number of data TLB entries for a corresponding 3266 * pagesize. If it can't be computed, or isn't known, the 3267 * routine returns zero. If you ask about an architecturally 3268 * impossible pagesize, the routine will panic (so that the 3269 * hat implementor knows that things are inconsistent.) 3270 */ 3271 uint_t 3272 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 3273 { 3274 struct cpuid_info *cpi; 3275 uint_t dtlb_nent = 0; 3276 3277 if (cpu == NULL) 3278 cpu = CPU; 3279 cpi = cpu->cpu_m.mcpu_cpi; 3280 3281 ASSERT(cpuid_checkpass(cpu, 1)); 3282 3283 /* 3284 * Check the L2 TLB info 3285 */ 3286 if (cpi->cpi_xmaxeax >= 0x80000006) { 3287 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 3288 3289 switch (pagesize) { 3290 3291 case 4 * 1024: 3292 /* 3293 * All zero in the top 16 bits of the register 3294 * indicates a unified TLB. Size is in low 16 bits. 3295 */ 3296 if ((cp->cp_ebx & 0xffff0000) == 0) 3297 dtlb_nent = cp->cp_ebx & 0x0000ffff; 3298 else 3299 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 3300 break; 3301 3302 case 2 * 1024 * 1024: 3303 if ((cp->cp_eax & 0xffff0000) == 0) 3304 dtlb_nent = cp->cp_eax & 0x0000ffff; 3305 else 3306 dtlb_nent = BITX(cp->cp_eax, 27, 16); 3307 break; 3308 3309 default: 3310 panic("unknown L2 pagesize"); 3311 /*NOTREACHED*/ 3312 } 3313 } 3314 3315 if (dtlb_nent != 0) 3316 return (dtlb_nent); 3317 3318 /* 3319 * No L2 TLB support for this size, try L1. 3320 */ 3321 if (cpi->cpi_xmaxeax >= 0x80000005) { 3322 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 3323 3324 switch (pagesize) { 3325 case 4 * 1024: 3326 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 3327 break; 3328 case 2 * 1024 * 1024: 3329 dtlb_nent = BITX(cp->cp_eax, 23, 16); 3330 break; 3331 default: 3332 panic("unknown L1 d-TLB pagesize"); 3333 /*NOTREACHED*/ 3334 } 3335 } 3336 3337 return (dtlb_nent); 3338 } 3339 3340 /* 3341 * Return 0 if the erratum is not present or not applicable, positive 3342 * if it is, and negative if the status of the erratum is unknown. 3343 * 3344 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 3345 * Processors" #25759, Rev 3.57, August 2005 3346 */ 3347 int 3348 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 3349 { 3350 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3351 uint_t eax; 3352 3353 /* 3354 * Bail out if this CPU isn't an AMD CPU, or if it's 3355 * a legacy (32-bit) AMD CPU. 3356 */ 3357 if (cpi->cpi_vendor != X86_VENDOR_AMD || 3358 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 3359 cpi->cpi_family == 6) 3360 3361 return (0); 3362 3363 eax = cpi->cpi_std[1].cp_eax; 3364 3365 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 3366 #define SH_B3(eax) (eax == 0xf51) 3367 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 3368 3369 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 3370 3371 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 3372 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 3373 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 3374 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 3375 3376 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 3377 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 3378 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 3379 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 3380 3381 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 3382 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 3383 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 3384 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 3385 #define BH_E4(eax) (eax == 0x20fb1) 3386 #define SH_E5(eax) (eax == 0x20f42) 3387 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 3388 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 3389 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 3390 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 3391 DH_E6(eax) || JH_E6(eax)) 3392 3393 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 3394 #define DR_B0(eax) (eax == 0x100f20) 3395 #define DR_B1(eax) (eax == 0x100f21) 3396 #define DR_BA(eax) (eax == 0x100f2a) 3397 #define DR_B2(eax) (eax == 0x100f22) 3398 #define DR_B3(eax) (eax == 0x100f23) 3399 #define RB_C0(eax) (eax == 0x100f40) 3400 3401 switch (erratum) { 3402 case 1: 3403 return (cpi->cpi_family < 0x10); 3404 case 51: /* what does the asterisk mean? */ 3405 return (B(eax) || SH_C0(eax) || CG(eax)); 3406 case 52: 3407 return (B(eax)); 3408 case 57: 3409 return (cpi->cpi_family <= 0x11); 3410 case 58: 3411 return (B(eax)); 3412 case 60: 3413 return (cpi->cpi_family <= 0x11); 3414 case 61: 3415 case 62: 3416 case 63: 3417 case 64: 3418 case 65: 3419 case 66: 3420 case 68: 3421 case 69: 3422 case 70: 3423 case 71: 3424 return (B(eax)); 3425 case 72: 3426 return (SH_B0(eax)); 3427 case 74: 3428 return (B(eax)); 3429 case 75: 3430 return (cpi->cpi_family < 0x10); 3431 case 76: 3432 return (B(eax)); 3433 case 77: 3434 return (cpi->cpi_family <= 0x11); 3435 case 78: 3436 return (B(eax) || SH_C0(eax)); 3437 case 79: 3438 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3439 case 80: 3440 case 81: 3441 case 82: 3442 return (B(eax)); 3443 case 83: 3444 return (B(eax) || SH_C0(eax) || CG(eax)); 3445 case 85: 3446 return (cpi->cpi_family < 0x10); 3447 case 86: 3448 return (SH_C0(eax) || CG(eax)); 3449 case 88: 3450 #if !defined(__amd64) 3451 return (0); 3452 #else 3453 return (B(eax) || SH_C0(eax)); 3454 #endif 3455 case 89: 3456 return (cpi->cpi_family < 0x10); 3457 case 90: 3458 return (B(eax) || SH_C0(eax) || CG(eax)); 3459 case 91: 3460 case 92: 3461 return (B(eax) || SH_C0(eax)); 3462 case 93: 3463 return (SH_C0(eax)); 3464 case 94: 3465 return (B(eax) || SH_C0(eax) || CG(eax)); 3466 case 95: 3467 #if !defined(__amd64) 3468 return (0); 3469 #else 3470 return (B(eax) || SH_C0(eax)); 3471 #endif 3472 case 96: 3473 return (B(eax) || SH_C0(eax) || CG(eax)); 3474 case 97: 3475 case 98: 3476 return (SH_C0(eax) || CG(eax)); 3477 case 99: 3478 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3479 case 100: 3480 return (B(eax) || SH_C0(eax)); 3481 case 101: 3482 case 103: 3483 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3484 case 104: 3485 return (SH_C0(eax) || CG(eax) || D0(eax)); 3486 case 105: 3487 case 106: 3488 case 107: 3489 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3490 case 108: 3491 return (DH_CG(eax)); 3492 case 109: 3493 return (SH_C0(eax) || CG(eax) || D0(eax)); 3494 case 110: 3495 return (D0(eax) || EX(eax)); 3496 case 111: 3497 return (CG(eax)); 3498 case 112: 3499 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3500 case 113: 3501 return (eax == 0x20fc0); 3502 case 114: 3503 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 3504 case 115: 3505 return (SH_E0(eax) || JH_E1(eax)); 3506 case 116: 3507 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 3508 case 117: 3509 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3510 case 118: 3511 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 3512 JH_E6(eax)); 3513 case 121: 3514 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3515 case 122: 3516 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 3517 case 123: 3518 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 3519 case 131: 3520 return (cpi->cpi_family < 0x10); 3521 case 6336786: 3522 /* 3523 * Test for AdvPowerMgmtInfo.TscPStateInvariant 3524 * if this is a K8 family or newer processor 3525 */ 3526 if (CPI_FAMILY(cpi) == 0xf) { 3527 struct cpuid_regs regs; 3528 regs.cp_eax = 0x80000007; 3529 (void) __cpuid_insn(®s); 3530 return (!(regs.cp_edx & 0x100)); 3531 } 3532 return (0); 3533 case 6323525: 3534 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 3535 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 3536 3537 case 6671130: 3538 /* 3539 * check for processors (pre-Shanghai) that do not provide 3540 * optimal management of 1gb ptes in its tlb. 3541 */ 3542 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 3543 3544 case 298: 3545 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 3546 DR_B2(eax) || RB_C0(eax)); 3547 3548 case 721: 3549 #if defined(__amd64) 3550 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12); 3551 #else 3552 return (0); 3553 #endif 3554 3555 default: 3556 return (-1); 3557 3558 } 3559 } 3560 3561 /* 3562 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 3563 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 3564 */ 3565 int 3566 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 3567 { 3568 struct cpuid_info *cpi; 3569 uint_t osvwid; 3570 static int osvwfeature = -1; 3571 uint64_t osvwlength; 3572 3573 3574 cpi = cpu->cpu_m.mcpu_cpi; 3575 3576 /* confirm OSVW supported */ 3577 if (osvwfeature == -1) { 3578 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 3579 } else { 3580 /* assert that osvw feature setting is consistent on all cpus */ 3581 ASSERT(osvwfeature == 3582 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 3583 } 3584 if (!osvwfeature) 3585 return (-1); 3586 3587 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 3588 3589 switch (erratum) { 3590 case 298: /* osvwid is 0 */ 3591 osvwid = 0; 3592 if (osvwlength <= (uint64_t)osvwid) { 3593 /* osvwid 0 is unknown */ 3594 return (-1); 3595 } 3596 3597 /* 3598 * Check the OSVW STATUS MSR to determine the state 3599 * of the erratum where: 3600 * 0 - fixed by HW 3601 * 1 - BIOS has applied the workaround when BIOS 3602 * workaround is available. (Or for other errata, 3603 * OS workaround is required.) 3604 * For a value of 1, caller will confirm that the 3605 * erratum 298 workaround has indeed been applied by BIOS. 3606 * 3607 * A 1 may be set in cpus that have a HW fix 3608 * in a mixed cpu system. Regarding erratum 298: 3609 * In a multiprocessor platform, the workaround above 3610 * should be applied to all processors regardless of 3611 * silicon revision when an affected processor is 3612 * present. 3613 */ 3614 3615 return (rdmsr(MSR_AMD_OSVW_STATUS + 3616 (osvwid / OSVW_ID_CNT_PER_MSR)) & 3617 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 3618 3619 default: 3620 return (-1); 3621 } 3622 } 3623 3624 static const char assoc_str[] = "associativity"; 3625 static const char line_str[] = "line-size"; 3626 static const char size_str[] = "size"; 3627 3628 static void 3629 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 3630 uint32_t val) 3631 { 3632 char buf[128]; 3633 3634 /* 3635 * ndi_prop_update_int() is used because it is desirable for 3636 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 3637 */ 3638 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 3639 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 3640 } 3641 3642 /* 3643 * Intel-style cache/tlb description 3644 * 3645 * Standard cpuid level 2 gives a randomly ordered 3646 * selection of tags that index into a table that describes 3647 * cache and tlb properties. 3648 */ 3649 3650 static const char l1_icache_str[] = "l1-icache"; 3651 static const char l1_dcache_str[] = "l1-dcache"; 3652 static const char l2_cache_str[] = "l2-cache"; 3653 static const char l3_cache_str[] = "l3-cache"; 3654 static const char itlb4k_str[] = "itlb-4K"; 3655 static const char dtlb4k_str[] = "dtlb-4K"; 3656 static const char itlb2M_str[] = "itlb-2M"; 3657 static const char itlb4M_str[] = "itlb-4M"; 3658 static const char dtlb4M_str[] = "dtlb-4M"; 3659 static const char dtlb24_str[] = "dtlb0-2M-4M"; 3660 static const char itlb424_str[] = "itlb-4K-2M-4M"; 3661 static const char itlb24_str[] = "itlb-2M-4M"; 3662 static const char dtlb44_str[] = "dtlb-4K-4M"; 3663 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 3664 static const char sl2_cache_str[] = "sectored-l2-cache"; 3665 static const char itrace_str[] = "itrace-cache"; 3666 static const char sl3_cache_str[] = "sectored-l3-cache"; 3667 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 3668 3669 static const struct cachetab { 3670 uint8_t ct_code; 3671 uint8_t ct_assoc; 3672 uint16_t ct_line_size; 3673 size_t ct_size; 3674 const char *ct_label; 3675 } intel_ctab[] = { 3676 /* 3677 * maintain descending order! 3678 * 3679 * Codes ignored - Reason 3680 * ---------------------- 3681 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3682 * f0H/f1H - Currently we do not interpret prefetch size by design 3683 */ 3684 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3685 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3686 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3687 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3688 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3689 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3690 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3691 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3692 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3693 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3694 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3695 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3696 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3697 { 0xc0, 4, 0, 8, dtlb44_str }, 3698 { 0xba, 4, 0, 64, dtlb4k_str }, 3699 { 0xb4, 4, 0, 256, dtlb4k_str }, 3700 { 0xb3, 4, 0, 128, dtlb4k_str }, 3701 { 0xb2, 4, 0, 64, itlb4k_str }, 3702 { 0xb0, 4, 0, 128, itlb4k_str }, 3703 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3704 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3705 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3706 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3707 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3708 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3709 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3710 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3711 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3712 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3713 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3714 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3715 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3716 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3717 { 0x73, 8, 0, 64*1024, itrace_str}, 3718 { 0x72, 8, 0, 32*1024, itrace_str}, 3719 { 0x71, 8, 0, 16*1024, itrace_str}, 3720 { 0x70, 8, 0, 12*1024, itrace_str}, 3721 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3722 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3723 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3724 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3725 { 0x5d, 0, 0, 256, dtlb44_str}, 3726 { 0x5c, 0, 0, 128, dtlb44_str}, 3727 { 0x5b, 0, 0, 64, dtlb44_str}, 3728 { 0x5a, 4, 0, 32, dtlb24_str}, 3729 { 0x59, 0, 0, 16, dtlb4k_str}, 3730 { 0x57, 4, 0, 16, dtlb4k_str}, 3731 { 0x56, 4, 0, 16, dtlb4M_str}, 3732 { 0x55, 0, 0, 7, itlb24_str}, 3733 { 0x52, 0, 0, 256, itlb424_str}, 3734 { 0x51, 0, 0, 128, itlb424_str}, 3735 { 0x50, 0, 0, 64, itlb424_str}, 3736 { 0x4f, 0, 0, 32, itlb4k_str}, 3737 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3738 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3739 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3740 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3741 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3742 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3743 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3744 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3745 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3746 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3747 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3748 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3749 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3750 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3751 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3752 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3753 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3754 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3755 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3756 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3757 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3758 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3759 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3760 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3761 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3762 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3763 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3764 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3765 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3766 { 0x0b, 4, 0, 4, itlb4M_str}, 3767 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3768 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3769 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3770 { 0x05, 4, 0, 32, dtlb4M_str}, 3771 { 0x04, 4, 0, 8, dtlb4M_str}, 3772 { 0x03, 4, 0, 64, dtlb4k_str}, 3773 { 0x02, 4, 0, 2, itlb4M_str}, 3774 { 0x01, 4, 0, 32, itlb4k_str}, 3775 { 0 } 3776 }; 3777 3778 static const struct cachetab cyrix_ctab[] = { 3779 { 0x70, 4, 0, 32, "tlb-4K" }, 3780 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3781 { 0 } 3782 }; 3783 3784 /* 3785 * Search a cache table for a matching entry 3786 */ 3787 static const struct cachetab * 3788 find_cacheent(const struct cachetab *ct, uint_t code) 3789 { 3790 if (code != 0) { 3791 for (; ct->ct_code != 0; ct++) 3792 if (ct->ct_code <= code) 3793 break; 3794 if (ct->ct_code == code) 3795 return (ct); 3796 } 3797 return (NULL); 3798 } 3799 3800 /* 3801 * Populate cachetab entry with L2 or L3 cache-information using 3802 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3803 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3804 * information is found. 3805 */ 3806 static int 3807 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3808 { 3809 uint32_t level, i; 3810 int ret = 0; 3811 3812 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3813 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3814 3815 if (level == 2 || level == 3) { 3816 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3817 ct->ct_line_size = 3818 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3819 ct->ct_size = ct->ct_assoc * 3820 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3821 ct->ct_line_size * 3822 (cpi->cpi_std_4[i]->cp_ecx + 1); 3823 3824 if (level == 2) { 3825 ct->ct_label = l2_cache_str; 3826 } else if (level == 3) { 3827 ct->ct_label = l3_cache_str; 3828 } 3829 ret = 1; 3830 } 3831 } 3832 3833 return (ret); 3834 } 3835 3836 /* 3837 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3838 * The walk is terminated if the walker returns non-zero. 3839 */ 3840 static void 3841 intel_walk_cacheinfo(struct cpuid_info *cpi, 3842 void *arg, int (*func)(void *, const struct cachetab *)) 3843 { 3844 const struct cachetab *ct; 3845 struct cachetab des_49_ct, des_b1_ct; 3846 uint8_t *dp; 3847 int i; 3848 3849 if ((dp = cpi->cpi_cacheinfo) == NULL) 3850 return; 3851 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3852 /* 3853 * For overloaded descriptor 0x49 we use cpuid function 4 3854 * if supported by the current processor, to create 3855 * cache information. 3856 * For overloaded descriptor 0xb1 we use X86_PAE flag 3857 * to disambiguate the cache information. 3858 */ 3859 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3860 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3861 ct = &des_49_ct; 3862 } else if (*dp == 0xb1) { 3863 des_b1_ct.ct_code = 0xb1; 3864 des_b1_ct.ct_assoc = 4; 3865 des_b1_ct.ct_line_size = 0; 3866 if (is_x86_feature(x86_featureset, X86FSET_PAE)) { 3867 des_b1_ct.ct_size = 8; 3868 des_b1_ct.ct_label = itlb2M_str; 3869 } else { 3870 des_b1_ct.ct_size = 4; 3871 des_b1_ct.ct_label = itlb4M_str; 3872 } 3873 ct = &des_b1_ct; 3874 } else { 3875 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3876 continue; 3877 } 3878 } 3879 3880 if (func(arg, ct) != 0) { 3881 break; 3882 } 3883 } 3884 } 3885 3886 /* 3887 * (Like the Intel one, except for Cyrix CPUs) 3888 */ 3889 static void 3890 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3891 void *arg, int (*func)(void *, const struct cachetab *)) 3892 { 3893 const struct cachetab *ct; 3894 uint8_t *dp; 3895 int i; 3896 3897 if ((dp = cpi->cpi_cacheinfo) == NULL) 3898 return; 3899 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3900 /* 3901 * Search Cyrix-specific descriptor table first .. 3902 */ 3903 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3904 if (func(arg, ct) != 0) 3905 break; 3906 continue; 3907 } 3908 /* 3909 * .. else fall back to the Intel one 3910 */ 3911 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3912 if (func(arg, ct) != 0) 3913 break; 3914 continue; 3915 } 3916 } 3917 } 3918 3919 /* 3920 * A cacheinfo walker that adds associativity, line-size, and size properties 3921 * to the devinfo node it is passed as an argument. 3922 */ 3923 static int 3924 add_cacheent_props(void *arg, const struct cachetab *ct) 3925 { 3926 dev_info_t *devi = arg; 3927 3928 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3929 if (ct->ct_line_size != 0) 3930 add_cache_prop(devi, ct->ct_label, line_str, 3931 ct->ct_line_size); 3932 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3933 return (0); 3934 } 3935 3936 3937 static const char fully_assoc[] = "fully-associative?"; 3938 3939 /* 3940 * AMD style cache/tlb description 3941 * 3942 * Extended functions 5 and 6 directly describe properties of 3943 * tlbs and various cache levels. 3944 */ 3945 static void 3946 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3947 { 3948 switch (assoc) { 3949 case 0: /* reserved; ignore */ 3950 break; 3951 default: 3952 add_cache_prop(devi, label, assoc_str, assoc); 3953 break; 3954 case 0xff: 3955 add_cache_prop(devi, label, fully_assoc, 1); 3956 break; 3957 } 3958 } 3959 3960 static void 3961 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3962 { 3963 if (size == 0) 3964 return; 3965 add_cache_prop(devi, label, size_str, size); 3966 add_amd_assoc(devi, label, assoc); 3967 } 3968 3969 static void 3970 add_amd_cache(dev_info_t *devi, const char *label, 3971 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3972 { 3973 if (size == 0 || line_size == 0) 3974 return; 3975 add_amd_assoc(devi, label, assoc); 3976 /* 3977 * Most AMD parts have a sectored cache. Multiple cache lines are 3978 * associated with each tag. A sector consists of all cache lines 3979 * associated with a tag. For example, the AMD K6-III has a sector 3980 * size of 2 cache lines per tag. 3981 */ 3982 if (lines_per_tag != 0) 3983 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3984 add_cache_prop(devi, label, line_str, line_size); 3985 add_cache_prop(devi, label, size_str, size * 1024); 3986 } 3987 3988 static void 3989 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3990 { 3991 switch (assoc) { 3992 case 0: /* off */ 3993 break; 3994 case 1: 3995 case 2: 3996 case 4: 3997 add_cache_prop(devi, label, assoc_str, assoc); 3998 break; 3999 case 6: 4000 add_cache_prop(devi, label, assoc_str, 8); 4001 break; 4002 case 8: 4003 add_cache_prop(devi, label, assoc_str, 16); 4004 break; 4005 case 0xf: 4006 add_cache_prop(devi, label, fully_assoc, 1); 4007 break; 4008 default: /* reserved; ignore */ 4009 break; 4010 } 4011 } 4012 4013 static void 4014 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 4015 { 4016 if (size == 0 || assoc == 0) 4017 return; 4018 add_amd_l2_assoc(devi, label, assoc); 4019 add_cache_prop(devi, label, size_str, size); 4020 } 4021 4022 static void 4023 add_amd_l2_cache(dev_info_t *devi, const char *label, 4024 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 4025 { 4026 if (size == 0 || assoc == 0 || line_size == 0) 4027 return; 4028 add_amd_l2_assoc(devi, label, assoc); 4029 if (lines_per_tag != 0) 4030 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 4031 add_cache_prop(devi, label, line_str, line_size); 4032 add_cache_prop(devi, label, size_str, size * 1024); 4033 } 4034 4035 static void 4036 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 4037 { 4038 struct cpuid_regs *cp; 4039 4040 if (cpi->cpi_xmaxeax < 0x80000005) 4041 return; 4042 cp = &cpi->cpi_extd[5]; 4043 4044 /* 4045 * 4M/2M L1 TLB configuration 4046 * 4047 * We report the size for 2M pages because AMD uses two 4048 * TLB entries for one 4M page. 4049 */ 4050 add_amd_tlb(devi, "dtlb-2M", 4051 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 4052 add_amd_tlb(devi, "itlb-2M", 4053 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 4054 4055 /* 4056 * 4K L1 TLB configuration 4057 */ 4058 4059 switch (cpi->cpi_vendor) { 4060 uint_t nentries; 4061 case X86_VENDOR_TM: 4062 if (cpi->cpi_family >= 5) { 4063 /* 4064 * Crusoe processors have 256 TLB entries, but 4065 * cpuid data format constrains them to only 4066 * reporting 255 of them. 4067 */ 4068 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 4069 nentries = 256; 4070 /* 4071 * Crusoe processors also have a unified TLB 4072 */ 4073 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 4074 nentries); 4075 break; 4076 } 4077 /*FALLTHROUGH*/ 4078 default: 4079 add_amd_tlb(devi, itlb4k_str, 4080 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 4081 add_amd_tlb(devi, dtlb4k_str, 4082 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 4083 break; 4084 } 4085 4086 /* 4087 * data L1 cache configuration 4088 */ 4089 4090 add_amd_cache(devi, l1_dcache_str, 4091 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 4092 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 4093 4094 /* 4095 * code L1 cache configuration 4096 */ 4097 4098 add_amd_cache(devi, l1_icache_str, 4099 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 4100 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 4101 4102 if (cpi->cpi_xmaxeax < 0x80000006) 4103 return; 4104 cp = &cpi->cpi_extd[6]; 4105 4106 /* Check for a unified L2 TLB for large pages */ 4107 4108 if (BITX(cp->cp_eax, 31, 16) == 0) 4109 add_amd_l2_tlb(devi, "l2-tlb-2M", 4110 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4111 else { 4112 add_amd_l2_tlb(devi, "l2-dtlb-2M", 4113 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 4114 add_amd_l2_tlb(devi, "l2-itlb-2M", 4115 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4116 } 4117 4118 /* Check for a unified L2 TLB for 4K pages */ 4119 4120 if (BITX(cp->cp_ebx, 31, 16) == 0) { 4121 add_amd_l2_tlb(devi, "l2-tlb-4K", 4122 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4123 } else { 4124 add_amd_l2_tlb(devi, "l2-dtlb-4K", 4125 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 4126 add_amd_l2_tlb(devi, "l2-itlb-4K", 4127 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 4128 } 4129 4130 add_amd_l2_cache(devi, l2_cache_str, 4131 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 4132 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 4133 } 4134 4135 /* 4136 * There are two basic ways that the x86 world describes it cache 4137 * and tlb architecture - Intel's way and AMD's way. 4138 * 4139 * Return which flavor of cache architecture we should use 4140 */ 4141 static int 4142 x86_which_cacheinfo(struct cpuid_info *cpi) 4143 { 4144 switch (cpi->cpi_vendor) { 4145 case X86_VENDOR_Intel: 4146 if (cpi->cpi_maxeax >= 2) 4147 return (X86_VENDOR_Intel); 4148 break; 4149 case X86_VENDOR_AMD: 4150 /* 4151 * The K5 model 1 was the first part from AMD that reported 4152 * cache sizes via extended cpuid functions. 4153 */ 4154 if (cpi->cpi_family > 5 || 4155 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 4156 return (X86_VENDOR_AMD); 4157 break; 4158 case X86_VENDOR_TM: 4159 if (cpi->cpi_family >= 5) 4160 return (X86_VENDOR_AMD); 4161 /*FALLTHROUGH*/ 4162 default: 4163 /* 4164 * If they have extended CPU data for 0x80000005 4165 * then we assume they have AMD-format cache 4166 * information. 4167 * 4168 * If not, and the vendor happens to be Cyrix, 4169 * then try our-Cyrix specific handler. 4170 * 4171 * If we're not Cyrix, then assume we're using Intel's 4172 * table-driven format instead. 4173 */ 4174 if (cpi->cpi_xmaxeax >= 0x80000005) 4175 return (X86_VENDOR_AMD); 4176 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 4177 return (X86_VENDOR_Cyrix); 4178 else if (cpi->cpi_maxeax >= 2) 4179 return (X86_VENDOR_Intel); 4180 break; 4181 } 4182 return (-1); 4183 } 4184 4185 void 4186 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 4187 struct cpuid_info *cpi) 4188 { 4189 dev_info_t *cpu_devi; 4190 int create; 4191 4192 cpu_devi = (dev_info_t *)dip; 4193 4194 /* device_type */ 4195 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 4196 "device_type", "cpu"); 4197 4198 /* reg */ 4199 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4200 "reg", cpu_id); 4201 4202 /* cpu-mhz, and clock-frequency */ 4203 if (cpu_freq > 0) { 4204 long long mul; 4205 4206 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4207 "cpu-mhz", cpu_freq); 4208 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 4209 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4210 "clock-frequency", (int)mul); 4211 } 4212 4213 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) { 4214 return; 4215 } 4216 4217 /* vendor-id */ 4218 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 4219 "vendor-id", cpi->cpi_vendorstr); 4220 4221 if (cpi->cpi_maxeax == 0) { 4222 return; 4223 } 4224 4225 /* 4226 * family, model, and step 4227 */ 4228 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4229 "family", CPI_FAMILY(cpi)); 4230 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4231 "cpu-model", CPI_MODEL(cpi)); 4232 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4233 "stepping-id", CPI_STEP(cpi)); 4234 4235 /* type */ 4236 switch (cpi->cpi_vendor) { 4237 case X86_VENDOR_Intel: 4238 create = 1; 4239 break; 4240 default: 4241 create = 0; 4242 break; 4243 } 4244 if (create) 4245 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4246 "type", CPI_TYPE(cpi)); 4247 4248 /* ext-family */ 4249 switch (cpi->cpi_vendor) { 4250 case X86_VENDOR_Intel: 4251 case X86_VENDOR_AMD: 4252 create = cpi->cpi_family >= 0xf; 4253 break; 4254 default: 4255 create = 0; 4256 break; 4257 } 4258 if (create) 4259 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4260 "ext-family", CPI_FAMILY_XTD(cpi)); 4261 4262 /* ext-model */ 4263 switch (cpi->cpi_vendor) { 4264 case X86_VENDOR_Intel: 4265 create = IS_EXTENDED_MODEL_INTEL(cpi); 4266 break; 4267 case X86_VENDOR_AMD: 4268 create = CPI_FAMILY(cpi) == 0xf; 4269 break; 4270 default: 4271 create = 0; 4272 break; 4273 } 4274 if (create) 4275 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4276 "ext-model", CPI_MODEL_XTD(cpi)); 4277 4278 /* generation */ 4279 switch (cpi->cpi_vendor) { 4280 case X86_VENDOR_AMD: 4281 /* 4282 * AMD K5 model 1 was the first part to support this 4283 */ 4284 create = cpi->cpi_xmaxeax >= 0x80000001; 4285 break; 4286 default: 4287 create = 0; 4288 break; 4289 } 4290 if (create) 4291 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4292 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 4293 4294 /* brand-id */ 4295 switch (cpi->cpi_vendor) { 4296 case X86_VENDOR_Intel: 4297 /* 4298 * brand id first appeared on Pentium III Xeon model 8, 4299 * and Celeron model 8 processors and Opteron 4300 */ 4301 create = cpi->cpi_family > 6 || 4302 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 4303 break; 4304 case X86_VENDOR_AMD: 4305 create = cpi->cpi_family >= 0xf; 4306 break; 4307 default: 4308 create = 0; 4309 break; 4310 } 4311 if (create && cpi->cpi_brandid != 0) { 4312 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4313 "brand-id", cpi->cpi_brandid); 4314 } 4315 4316 /* chunks, and apic-id */ 4317 switch (cpi->cpi_vendor) { 4318 /* 4319 * first available on Pentium IV and Opteron (K8) 4320 */ 4321 case X86_VENDOR_Intel: 4322 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 4323 break; 4324 case X86_VENDOR_AMD: 4325 create = cpi->cpi_family >= 0xf; 4326 break; 4327 default: 4328 create = 0; 4329 break; 4330 } 4331 if (create) { 4332 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4333 "chunks", CPI_CHUNKS(cpi)); 4334 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4335 "apic-id", cpi->cpi_apicid); 4336 if (cpi->cpi_chipid >= 0) { 4337 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4338 "chip#", cpi->cpi_chipid); 4339 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4340 "clog#", cpi->cpi_clogid); 4341 } 4342 } 4343 4344 /* cpuid-features */ 4345 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4346 "cpuid-features", CPI_FEATURES_EDX(cpi)); 4347 4348 4349 /* cpuid-features-ecx */ 4350 switch (cpi->cpi_vendor) { 4351 case X86_VENDOR_Intel: 4352 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 4353 break; 4354 case X86_VENDOR_AMD: 4355 create = cpi->cpi_family >= 0xf; 4356 break; 4357 default: 4358 create = 0; 4359 break; 4360 } 4361 if (create) 4362 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4363 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 4364 4365 /* ext-cpuid-features */ 4366 switch (cpi->cpi_vendor) { 4367 case X86_VENDOR_Intel: 4368 case X86_VENDOR_AMD: 4369 case X86_VENDOR_Cyrix: 4370 case X86_VENDOR_TM: 4371 case X86_VENDOR_Centaur: 4372 create = cpi->cpi_xmaxeax >= 0x80000001; 4373 break; 4374 default: 4375 create = 0; 4376 break; 4377 } 4378 if (create) { 4379 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4380 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 4381 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 4382 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 4383 } 4384 4385 /* 4386 * Brand String first appeared in Intel Pentium IV, AMD K5 4387 * model 1, and Cyrix GXm. On earlier models we try and 4388 * simulate something similar .. so this string should always 4389 * same -something- about the processor, however lame. 4390 */ 4391 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 4392 "brand-string", cpi->cpi_brandstr); 4393 4394 /* 4395 * Finally, cache and tlb information 4396 */ 4397 switch (x86_which_cacheinfo(cpi)) { 4398 case X86_VENDOR_Intel: 4399 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 4400 break; 4401 case X86_VENDOR_Cyrix: 4402 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 4403 break; 4404 case X86_VENDOR_AMD: 4405 amd_cache_info(cpi, cpu_devi); 4406 break; 4407 default: 4408 break; 4409 } 4410 } 4411 4412 struct l2info { 4413 int *l2i_csz; 4414 int *l2i_lsz; 4415 int *l2i_assoc; 4416 int l2i_ret; 4417 }; 4418 4419 /* 4420 * A cacheinfo walker that fetches the size, line-size and associativity 4421 * of the L2 cache 4422 */ 4423 static int 4424 intel_l2cinfo(void *arg, const struct cachetab *ct) 4425 { 4426 struct l2info *l2i = arg; 4427 int *ip; 4428 4429 if (ct->ct_label != l2_cache_str && 4430 ct->ct_label != sl2_cache_str) 4431 return (0); /* not an L2 -- keep walking */ 4432 4433 if ((ip = l2i->l2i_csz) != NULL) 4434 *ip = ct->ct_size; 4435 if ((ip = l2i->l2i_lsz) != NULL) 4436 *ip = ct->ct_line_size; 4437 if ((ip = l2i->l2i_assoc) != NULL) 4438 *ip = ct->ct_assoc; 4439 l2i->l2i_ret = ct->ct_size; 4440 return (1); /* was an L2 -- terminate walk */ 4441 } 4442 4443 /* 4444 * AMD L2/L3 Cache and TLB Associativity Field Definition: 4445 * 4446 * Unlike the associativity for the L1 cache and tlb where the 8 bit 4447 * value is the associativity, the associativity for the L2 cache and 4448 * tlb is encoded in the following table. The 4 bit L2 value serves as 4449 * an index into the amd_afd[] array to determine the associativity. 4450 * -1 is undefined. 0 is fully associative. 4451 */ 4452 4453 static int amd_afd[] = 4454 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 4455 4456 static void 4457 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 4458 { 4459 struct cpuid_regs *cp; 4460 uint_t size, assoc; 4461 int i; 4462 int *ip; 4463 4464 if (cpi->cpi_xmaxeax < 0x80000006) 4465 return; 4466 cp = &cpi->cpi_extd[6]; 4467 4468 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 4469 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 4470 uint_t cachesz = size * 1024; 4471 assoc = amd_afd[i]; 4472 4473 ASSERT(assoc != -1); 4474 4475 if ((ip = l2i->l2i_csz) != NULL) 4476 *ip = cachesz; 4477 if ((ip = l2i->l2i_lsz) != NULL) 4478 *ip = BITX(cp->cp_ecx, 7, 0); 4479 if ((ip = l2i->l2i_assoc) != NULL) 4480 *ip = assoc; 4481 l2i->l2i_ret = cachesz; 4482 } 4483 } 4484 4485 int 4486 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 4487 { 4488 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4489 struct l2info __l2info, *l2i = &__l2info; 4490 4491 l2i->l2i_csz = csz; 4492 l2i->l2i_lsz = lsz; 4493 l2i->l2i_assoc = assoc; 4494 l2i->l2i_ret = -1; 4495 4496 switch (x86_which_cacheinfo(cpi)) { 4497 case X86_VENDOR_Intel: 4498 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 4499 break; 4500 case X86_VENDOR_Cyrix: 4501 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 4502 break; 4503 case X86_VENDOR_AMD: 4504 amd_l2cacheinfo(cpi, l2i); 4505 break; 4506 default: 4507 break; 4508 } 4509 return (l2i->l2i_ret); 4510 } 4511 4512 #if !defined(__xpv) 4513 4514 uint32_t * 4515 cpuid_mwait_alloc(cpu_t *cpu) 4516 { 4517 uint32_t *ret; 4518 size_t mwait_size; 4519 4520 ASSERT(cpuid_checkpass(CPU, 2)); 4521 4522 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 4523 if (mwait_size == 0) 4524 return (NULL); 4525 4526 /* 4527 * kmem_alloc() returns cache line size aligned data for mwait_size 4528 * allocations. mwait_size is currently cache line sized. Neither 4529 * of these implementation details are guarantied to be true in the 4530 * future. 4531 * 4532 * First try allocating mwait_size as kmem_alloc() currently returns 4533 * correctly aligned memory. If kmem_alloc() does not return 4534 * mwait_size aligned memory, then use mwait_size ROUNDUP. 4535 * 4536 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 4537 * decide to free this memory. 4538 */ 4539 ret = kmem_zalloc(mwait_size, KM_SLEEP); 4540 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 4541 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4542 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 4543 *ret = MWAIT_RUNNING; 4544 return (ret); 4545 } else { 4546 kmem_free(ret, mwait_size); 4547 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 4548 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4549 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 4550 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 4551 *ret = MWAIT_RUNNING; 4552 return (ret); 4553 } 4554 } 4555 4556 void 4557 cpuid_mwait_free(cpu_t *cpu) 4558 { 4559 if (cpu->cpu_m.mcpu_cpi == NULL) { 4560 return; 4561 } 4562 4563 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 4564 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 4565 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 4566 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 4567 } 4568 4569 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 4570 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 4571 } 4572 4573 void 4574 patch_tsc_read(int flag) 4575 { 4576 size_t cnt; 4577 4578 switch (flag) { 4579 case X86_NO_TSC: 4580 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 4581 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 4582 break; 4583 case X86_HAVE_TSCP: 4584 cnt = &_tscp_end - &_tscp_start; 4585 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 4586 break; 4587 case X86_TSC_MFENCE: 4588 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 4589 (void) memcpy((void *)tsc_read, 4590 (void *)&_tsc_mfence_start, cnt); 4591 break; 4592 case X86_TSC_LFENCE: 4593 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 4594 (void) memcpy((void *)tsc_read, 4595 (void *)&_tsc_lfence_start, cnt); 4596 break; 4597 default: 4598 break; 4599 } 4600 } 4601 4602 int 4603 cpuid_deep_cstates_supported(void) 4604 { 4605 struct cpuid_info *cpi; 4606 struct cpuid_regs regs; 4607 4608 ASSERT(cpuid_checkpass(CPU, 1)); 4609 4610 cpi = CPU->cpu_m.mcpu_cpi; 4611 4612 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) 4613 return (0); 4614 4615 switch (cpi->cpi_vendor) { 4616 case X86_VENDOR_Intel: 4617 if (cpi->cpi_xmaxeax < 0x80000007) 4618 return (0); 4619 4620 /* 4621 * TSC run at a constant rate in all ACPI C-states? 4622 */ 4623 regs.cp_eax = 0x80000007; 4624 (void) __cpuid_insn(®s); 4625 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 4626 4627 default: 4628 return (0); 4629 } 4630 } 4631 4632 #endif /* !__xpv */ 4633 4634 void 4635 post_startup_cpu_fixups(void) 4636 { 4637 #ifndef __xpv 4638 /* 4639 * Some AMD processors support C1E state. Entering this state will 4640 * cause the local APIC timer to stop, which we can't deal with at 4641 * this time. 4642 */ 4643 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 4644 on_trap_data_t otd; 4645 uint64_t reg; 4646 4647 if (!on_trap(&otd, OT_DATA_ACCESS)) { 4648 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 4649 /* Disable C1E state if it is enabled by BIOS */ 4650 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 4651 AMD_ACTONCMPHALT_MASK) { 4652 reg &= ~(AMD_ACTONCMPHALT_MASK << 4653 AMD_ACTONCMPHALT_SHIFT); 4654 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 4655 } 4656 } 4657 no_trap(); 4658 } 4659 #endif /* !__xpv */ 4660 } 4661 4662 /* 4663 * Setup necessary registers to enable XSAVE feature on this processor. 4664 * This function needs to be called early enough, so that no xsave/xrstor 4665 * ops will execute on the processor before the MSRs are properly set up. 4666 * 4667 * Current implementation has the following assumption: 4668 * - cpuid_pass1() is done, so that X86 features are known. 4669 * - fpu_probe() is done, so that fp_save_mech is chosen. 4670 */ 4671 void 4672 xsave_setup_msr(cpu_t *cpu) 4673 { 4674 ASSERT(fp_save_mech == FP_XSAVE); 4675 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); 4676 4677 /* Enable OSXSAVE in CR4. */ 4678 setcr4(getcr4() | CR4_OSXSAVE); 4679 /* 4680 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report 4681 * correct value. 4682 */ 4683 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE; 4684 setup_xfem(); 4685 } 4686 4687 /* 4688 * Starting with the Westmere processor the local 4689 * APIC timer will continue running in all C-states, 4690 * including the deepest C-states. 4691 */ 4692 int 4693 cpuid_arat_supported(void) 4694 { 4695 struct cpuid_info *cpi; 4696 struct cpuid_regs regs; 4697 4698 ASSERT(cpuid_checkpass(CPU, 1)); 4699 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 4700 4701 cpi = CPU->cpu_m.mcpu_cpi; 4702 4703 switch (cpi->cpi_vendor) { 4704 case X86_VENDOR_Intel: 4705 /* 4706 * Always-running Local APIC Timer is 4707 * indicated by CPUID.6.EAX[2]. 4708 */ 4709 if (cpi->cpi_maxeax >= 6) { 4710 regs.cp_eax = 6; 4711 (void) cpuid_insn(NULL, ®s); 4712 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4713 } else { 4714 return (0); 4715 } 4716 default: 4717 return (0); 4718 } 4719 } 4720 4721 /* 4722 * Check support for Intel ENERGY_PERF_BIAS feature 4723 */ 4724 int 4725 cpuid_iepb_supported(struct cpu *cp) 4726 { 4727 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi; 4728 struct cpuid_regs regs; 4729 4730 ASSERT(cpuid_checkpass(cp, 1)); 4731 4732 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) || 4733 !(is_x86_feature(x86_featureset, X86FSET_MSR))) { 4734 return (0); 4735 } 4736 4737 /* 4738 * Intel ENERGY_PERF_BIAS MSR is indicated by 4739 * capability bit CPUID.6.ECX.3 4740 */ 4741 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6)) 4742 return (0); 4743 4744 regs.cp_eax = 0x6; 4745 (void) cpuid_insn(NULL, ®s); 4746 return (regs.cp_ecx & CPUID_EPB_SUPPORT); 4747 } 4748 4749 /* 4750 * Check support for TSC deadline timer 4751 * 4752 * TSC deadline timer provides a superior software programming 4753 * model over local APIC timer that eliminates "time drifts". 4754 * Instead of specifying a relative time, software specifies an 4755 * absolute time as the target at which the processor should 4756 * generate a timer event. 4757 */ 4758 int 4759 cpuid_deadline_tsc_supported(void) 4760 { 4761 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi; 4762 struct cpuid_regs regs; 4763 4764 ASSERT(cpuid_checkpass(CPU, 1)); 4765 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 4766 4767 switch (cpi->cpi_vendor) { 4768 case X86_VENDOR_Intel: 4769 if (cpi->cpi_maxeax >= 1) { 4770 regs.cp_eax = 1; 4771 (void) cpuid_insn(NULL, ®s); 4772 return (regs.cp_ecx & CPUID_DEADLINE_TSC); 4773 } else { 4774 return (0); 4775 } 4776 default: 4777 return (0); 4778 } 4779 } 4780 4781 #if defined(__amd64) && !defined(__xpv) 4782 /* 4783 * Patch in versions of bcopy for high performance Intel Nhm processors 4784 * and later... 4785 */ 4786 void 4787 patch_memops(uint_t vendor) 4788 { 4789 size_t cnt, i; 4790 caddr_t to, from; 4791 4792 if ((vendor == X86_VENDOR_Intel) && 4793 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) { 4794 cnt = &bcopy_patch_end - &bcopy_patch_start; 4795 to = &bcopy_ck_size; 4796 from = &bcopy_patch_start; 4797 for (i = 0; i < cnt; i++) { 4798 *to++ = *from++; 4799 } 4800 } 4801 } 4802 #endif /* __amd64 && !__xpv */ 4803 4804 /* 4805 * This function finds the number of bits to represent the number of cores per 4806 * chip and the number of strands per core for the Intel platforms. 4807 * It re-uses the x2APIC cpuid code of the cpuid_pass2(). 4808 */ 4809 void 4810 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits) 4811 { 4812 struct cpuid_regs regs; 4813 struct cpuid_regs *cp = ®s; 4814 4815 if (vendor != X86_VENDOR_Intel) { 4816 return; 4817 } 4818 4819 /* if the cpuid level is 0xB, extended topo is available. */ 4820 cp->cp_eax = 0; 4821 if (__cpuid_insn(cp) >= 0xB) { 4822 4823 cp->cp_eax = 0xB; 4824 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 4825 (void) __cpuid_insn(cp); 4826 4827 /* 4828 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 4829 * indicates that the extended topology enumeration leaf is 4830 * available. 4831 */ 4832 if (cp->cp_ebx) { 4833 uint_t coreid_shift = 0; 4834 uint_t chipid_shift = 0; 4835 uint_t i; 4836 uint_t level; 4837 4838 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 4839 cp->cp_eax = 0xB; 4840 cp->cp_ecx = i; 4841 4842 (void) __cpuid_insn(cp); 4843 level = CPI_CPU_LEVEL_TYPE(cp); 4844 4845 if (level == 1) { 4846 /* 4847 * Thread level processor topology 4848 * Number of bits shift right APIC ID 4849 * to get the coreid. 4850 */ 4851 coreid_shift = BITX(cp->cp_eax, 4, 0); 4852 } else if (level == 2) { 4853 /* 4854 * Core level processor topology 4855 * Number of bits shift right APIC ID 4856 * to get the chipid. 4857 */ 4858 chipid_shift = BITX(cp->cp_eax, 4, 0); 4859 } 4860 } 4861 4862 if (coreid_shift > 0 && chipid_shift > coreid_shift) { 4863 *strand_nbits = coreid_shift; 4864 *core_nbits = chipid_shift - coreid_shift; 4865 } 4866 } 4867 } 4868 } 4869