1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2010, Intel Corporation. 26 * All rights reserved. 27 */ 28 /* 29 * Portions Copyright 2009 Advanced Micro Devices, Inc. 30 */ 31 32 /* 33 * Various routines to handle identification 34 * and classification of x86 processors. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/archsystm.h> 39 #include <sys/x86_archext.h> 40 #include <sys/kmem.h> 41 #include <sys/systm.h> 42 #include <sys/cmn_err.h> 43 #include <sys/sunddi.h> 44 #include <sys/sunndi.h> 45 #include <sys/cpuvar.h> 46 #include <sys/processor.h> 47 #include <sys/sysmacros.h> 48 #include <sys/pg.h> 49 #include <sys/fp.h> 50 #include <sys/controlregs.h> 51 #include <sys/auxv_386.h> 52 #include <sys/bitmap.h> 53 #include <sys/memnode.h> 54 #include <sys/pci_cfgspace.h> 55 56 #ifdef __xpv 57 #include <sys/hypervisor.h> 58 #else 59 #include <sys/ontrap.h> 60 #endif 61 62 /* 63 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 64 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 65 * them accordingly. For most modern processors, feature detection occurs here 66 * in pass 1. 67 * 68 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 69 * for the boot CPU and does the basic analysis that the early kernel needs. 70 * x86_featureset is set based on the return value of cpuid_pass1() of the boot 71 * CPU. 72 * 73 * Pass 1 includes: 74 * 75 * o Determining vendor/model/family/stepping and setting x86_type and 76 * x86_vendor accordingly. 77 * o Processing the feature flags returned by the cpuid instruction while 78 * applying any workarounds or tricks for the specific processor. 79 * o Mapping the feature flags into Solaris feature bits (X86_*). 80 * o Processing extended feature flags if supported by the processor, 81 * again while applying specific processor knowledge. 82 * o Determining the CMT characteristics of the system. 83 * 84 * Pass 1 is done on non-boot CPUs during their initialization and the results 85 * are used only as a meager attempt at ensuring that all processors within the 86 * system support the same features. 87 * 88 * Pass 2 of cpuid feature analysis happens just at the beginning 89 * of startup(). It just copies in and corrects the remainder 90 * of the cpuid data we depend on: standard cpuid functions that we didn't 91 * need for pass1 feature analysis, and extended cpuid functions beyond the 92 * simple feature processing done in pass1. 93 * 94 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 95 * particular kernel memory allocation has been made available. It creates a 96 * readable brand string based on the data collected in the first two passes. 97 * 98 * Pass 4 of cpuid analysis is invoked after post_startup() when all 99 * the support infrastructure for various hardware features has been 100 * initialized. It determines which processor features will be reported 101 * to userland via the aux vector. 102 * 103 * All passes are executed on all CPUs, but only the boot CPU determines what 104 * features the kernel will use. 105 * 106 * Much of the worst junk in this file is for the support of processors 107 * that didn't really implement the cpuid instruction properly. 108 * 109 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 110 * the pass numbers. Accordingly, changes to the pass code may require changes 111 * to the accessor code. 112 */ 113 114 uint_t x86_vendor = X86_VENDOR_IntelClone; 115 uint_t x86_type = X86_TYPE_OTHER; 116 uint_t x86_clflush_size = 0; 117 118 uint_t pentiumpro_bug4046376; 119 uint_t pentiumpro_bug4064495; 120 121 #define NUM_X86_FEATURES 33 122 void *x86_featureset; 123 ulong_t x86_featureset0[BT_SIZEOFMAP(NUM_X86_FEATURES)]; 124 125 char *x86_feature_names[NUM_X86_FEATURES] = { 126 "lgpg", 127 "tsc", 128 "msr", 129 "mtrr", 130 "pge", 131 "de", 132 "cmov", 133 "mmx", 134 "mca", 135 "pae", 136 "cv8", 137 "pat", 138 "sep", 139 "sse", 140 "sse2", 141 "htt", 142 "asysc", 143 "nx", 144 "sse3", 145 "cx16", 146 "cmp", 147 "tscp", 148 "mwait", 149 "sse4a", 150 "cpuid", 151 "ssse3", 152 "sse4_1", 153 "sse4_2", 154 "1gpg", 155 "clfsh", 156 "64", 157 "aes", 158 "pclmulqdq" }; 159 160 static void * 161 init_x86_featureset(void) 162 { 163 return (kmem_zalloc(BT_SIZEOFMAP(NUM_X86_FEATURES), KM_SLEEP)); 164 } 165 166 void 167 free_x86_featureset(void *featureset) 168 { 169 kmem_free(featureset, BT_SIZEOFMAP(NUM_X86_FEATURES)); 170 } 171 172 boolean_t 173 is_x86_feature(void *featureset, uint_t feature) 174 { 175 ASSERT(feature < NUM_X86_FEATURES); 176 return (BT_TEST((ulong_t *)featureset, feature)); 177 } 178 179 void 180 add_x86_feature(void *featureset, uint_t feature) 181 { 182 ASSERT(feature < NUM_X86_FEATURES); 183 BT_SET((ulong_t *)featureset, feature); 184 } 185 186 void 187 remove_x86_feature(void *featureset, uint_t feature) 188 { 189 ASSERT(feature < NUM_X86_FEATURES); 190 BT_CLEAR((ulong_t *)featureset, feature); 191 } 192 193 boolean_t 194 compare_x86_featureset(void *setA, void *setB) 195 { 196 /* 197 * We assume that the unused bits of the bitmap are always zero. 198 */ 199 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) { 200 return (B_TRUE); 201 } else { 202 return (B_FALSE); 203 } 204 } 205 206 void 207 print_x86_featureset(void *featureset) 208 { 209 uint_t i; 210 211 for (i = 0; i < NUM_X86_FEATURES; i++) { 212 if (is_x86_feature(featureset, i)) { 213 cmn_err(CE_CONT, "?x86_feature: %s\n", 214 x86_feature_names[i]); 215 } 216 } 217 } 218 219 uint_t enable486; 220 /* 221 * This is set to platform type Solaris is running on. 222 */ 223 static int platform_type = -1; 224 225 #if !defined(__xpv) 226 /* 227 * Variable to patch if hypervisor platform detection needs to be 228 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 229 */ 230 int enable_platform_detection = 1; 231 #endif 232 233 /* 234 * monitor/mwait info. 235 * 236 * size_actual and buf_actual are the real address and size allocated to get 237 * proper mwait_buf alignement. buf_actual and size_actual should be passed 238 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 239 * processor cache-line alignment, but this is not guarantied in the furture. 240 */ 241 struct mwait_info { 242 size_t mon_min; /* min size to avoid missed wakeups */ 243 size_t mon_max; /* size to avoid false wakeups */ 244 size_t size_actual; /* size actually allocated */ 245 void *buf_actual; /* memory actually allocated */ 246 uint32_t support; /* processor support of monitor/mwait */ 247 }; 248 249 /* 250 * These constants determine how many of the elements of the 251 * cpuid we cache in the cpuid_info data structure; the 252 * remaining elements are accessible via the cpuid instruction. 253 */ 254 255 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 256 #define NMAX_CPI_EXTD 0x1c /* eax = 0x80000000 .. 0x8000001b */ 257 258 /* 259 * Some terminology needs to be explained: 260 * - Socket: Something that can be plugged into a motherboard. 261 * - Package: Same as socket 262 * - Chip: Same as socket. Note that AMD's documentation uses term "chip" 263 * differently: there, chip is the same as processor node (below) 264 * - Processor node: Some AMD processors have more than one 265 * "subprocessor" embedded in a package. These subprocessors (nodes) 266 * are fully-functional processors themselves with cores, caches, 267 * memory controllers, PCI configuration spaces. They are connected 268 * inside the package with Hypertransport links. On single-node 269 * processors, processor node is equivalent to chip/socket/package. 270 */ 271 272 struct cpuid_info { 273 uint_t cpi_pass; /* last pass completed */ 274 /* 275 * standard function information 276 */ 277 uint_t cpi_maxeax; /* fn 0: %eax */ 278 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 279 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 280 281 uint_t cpi_family; /* fn 1: extended family */ 282 uint_t cpi_model; /* fn 1: extended model */ 283 uint_t cpi_step; /* fn 1: stepping */ 284 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */ 285 /* AMD: package/socket # */ 286 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 287 int cpi_clogid; /* fn 1: %ebx: thread # */ 288 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 289 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 290 uint_t cpi_ncache; /* fn 2: number of elements */ 291 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 292 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 293 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 294 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 295 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 296 /* 297 * extended function information 298 */ 299 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 300 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 301 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 302 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 303 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */ 304 305 id_t cpi_coreid; /* same coreid => strands share core */ 306 int cpi_pkgcoreid; /* core number within single package */ 307 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 308 /* Intel: fn 4: %eax[31-26] */ 309 /* 310 * supported feature information 311 */ 312 uint32_t cpi_support[5]; 313 #define STD_EDX_FEATURES 0 314 #define AMD_EDX_FEATURES 1 315 #define TM_EDX_FEATURES 2 316 #define STD_ECX_FEATURES 3 317 #define AMD_ECX_FEATURES 4 318 /* 319 * Synthesized information, where known. 320 */ 321 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 322 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 323 uint32_t cpi_socket; /* Chip package/socket type */ 324 325 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 326 uint32_t cpi_apicid; 327 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ 328 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ 329 /* Intel: 1 */ 330 }; 331 332 333 static struct cpuid_info cpuid_info0; 334 335 /* 336 * These bit fields are defined by the Intel Application Note AP-485 337 * "Intel Processor Identification and the CPUID Instruction" 338 */ 339 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 340 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 341 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 342 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 343 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 344 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 345 346 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 347 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 348 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 349 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 350 351 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 352 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 353 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 354 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 355 356 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 357 #define CPI_XMAXEAX_MAX 0x80000100 358 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 359 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 360 361 /* 362 * Function 4 (Deterministic Cache Parameters) macros 363 * Defined by Intel Application Note AP-485 364 */ 365 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 366 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 367 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 368 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 369 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 370 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 371 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 372 373 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 374 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 375 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 376 377 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 378 379 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 380 381 382 /* 383 * A couple of shorthand macros to identify "later" P6-family chips 384 * like the Pentium M and Core. First, the "older" P6-based stuff 385 * (loosely defined as "pre-Pentium-4"): 386 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 387 */ 388 389 #define IS_LEGACY_P6(cpi) ( \ 390 cpi->cpi_family == 6 && \ 391 (cpi->cpi_model == 1 || \ 392 cpi->cpi_model == 3 || \ 393 cpi->cpi_model == 5 || \ 394 cpi->cpi_model == 6 || \ 395 cpi->cpi_model == 7 || \ 396 cpi->cpi_model == 8 || \ 397 cpi->cpi_model == 0xA || \ 398 cpi->cpi_model == 0xB) \ 399 ) 400 401 /* A "new F6" is everything with family 6 that's not the above */ 402 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 403 404 /* Extended family/model support */ 405 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 406 cpi->cpi_family >= 0xf) 407 408 /* 409 * Info for monitor/mwait idle loop. 410 * 411 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 412 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 413 * 2006. 414 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 415 * Documentation Updates" #33633, Rev 2.05, December 2006. 416 */ 417 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 418 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 419 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 420 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 421 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 422 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 423 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 424 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 425 /* 426 * Number of sub-cstates for a given c-state. 427 */ 428 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 429 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 430 431 /* 432 * Functions we consune from cpuid_subr.c; don't publish these in a header 433 * file to try and keep people using the expected cpuid_* interfaces. 434 */ 435 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 436 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 437 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 438 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 439 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 440 441 /* 442 * Apply up various platform-dependent restrictions where the 443 * underlying platform restrictions mean the CPU can be marked 444 * as less capable than its cpuid instruction would imply. 445 */ 446 #if defined(__xpv) 447 static void 448 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 449 { 450 switch (eax) { 451 case 1: { 452 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 453 0 : CPUID_INTC_EDX_MCA; 454 cp->cp_edx &= 455 ~(mcamask | 456 CPUID_INTC_EDX_PSE | 457 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 458 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 459 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 460 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 461 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 462 break; 463 } 464 465 case 0x80000001: 466 cp->cp_edx &= 467 ~(CPUID_AMD_EDX_PSE | 468 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 469 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 470 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 471 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 472 CPUID_AMD_EDX_TSCP); 473 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 474 break; 475 default: 476 break; 477 } 478 479 switch (vendor) { 480 case X86_VENDOR_Intel: 481 switch (eax) { 482 case 4: 483 /* 484 * Zero out the (ncores-per-chip - 1) field 485 */ 486 cp->cp_eax &= 0x03fffffff; 487 break; 488 default: 489 break; 490 } 491 break; 492 case X86_VENDOR_AMD: 493 switch (eax) { 494 495 case 0x80000001: 496 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 497 break; 498 499 case 0x80000008: 500 /* 501 * Zero out the (ncores-per-chip - 1) field 502 */ 503 cp->cp_ecx &= 0xffffff00; 504 break; 505 default: 506 break; 507 } 508 break; 509 default: 510 break; 511 } 512 } 513 #else 514 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 515 #endif 516 517 /* 518 * Some undocumented ways of patching the results of the cpuid 519 * instruction to permit running Solaris 10 on future cpus that 520 * we don't currently support. Could be set to non-zero values 521 * via settings in eeprom. 522 */ 523 524 uint32_t cpuid_feature_ecx_include; 525 uint32_t cpuid_feature_ecx_exclude; 526 uint32_t cpuid_feature_edx_include; 527 uint32_t cpuid_feature_edx_exclude; 528 529 /* 530 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs. 531 */ 532 void 533 cpuid_alloc_space(cpu_t *cpu) 534 { 535 /* 536 * By convention, cpu0 is the boot cpu, which is set up 537 * before memory allocation is available. All other cpus get 538 * their cpuid_info struct allocated here. 539 */ 540 ASSERT(cpu->cpu_id != 0); 541 ASSERT(cpu->cpu_m.mcpu_cpi == NULL); 542 cpu->cpu_m.mcpu_cpi = 543 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 544 } 545 546 void 547 cpuid_free_space(cpu_t *cpu) 548 { 549 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 550 int i; 551 552 ASSERT(cpi != NULL); 553 ASSERT(cpi != &cpuid_info0); 554 555 /* 556 * Free up any function 4 related dynamic storage 557 */ 558 for (i = 1; i < cpi->cpi_std_4_size; i++) 559 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 560 if (cpi->cpi_std_4_size > 0) 561 kmem_free(cpi->cpi_std_4, 562 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 563 564 kmem_free(cpi, sizeof (*cpi)); 565 cpu->cpu_m.mcpu_cpi = NULL; 566 } 567 568 #if !defined(__xpv) 569 570 static void 571 determine_platform() 572 { 573 struct cpuid_regs cp; 574 char *xen_str; 575 uint32_t xen_signature[4], base; 576 577 platform_type = HW_NATIVE; 578 579 if (!enable_platform_detection) 580 return; 581 582 /* 583 * In a fully virtualized domain, Xen's pseudo-cpuid function 584 * returns a string representing the Xen signature in %ebx, %ecx, 585 * and %edx. %eax contains the maximum supported cpuid function. 586 * We need at least a (base + 2) leaf value to do what we want 587 * to do. Try different base values, since the hypervisor might 588 * use a different one depending on whether hyper-v emulation 589 * is switched on by default or not. 590 */ 591 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 592 cp.cp_eax = base; 593 (void) __cpuid_insn(&cp); 594 xen_signature[0] = cp.cp_ebx; 595 xen_signature[1] = cp.cp_ecx; 596 xen_signature[2] = cp.cp_edx; 597 xen_signature[3] = 0; 598 xen_str = (char *)xen_signature; 599 if (strcmp("XenVMMXenVMM", xen_str) == 0 && 600 cp.cp_eax >= (base + 2)) { 601 platform_type = HW_XEN_HVM; 602 return; 603 } 604 } 605 606 if (vmware_platform()) /* running under vmware hypervisor? */ 607 platform_type = HW_VMWARE; 608 } 609 610 int 611 get_hwenv(void) 612 { 613 if (platform_type == -1) 614 determine_platform(); 615 616 return (platform_type); 617 } 618 619 int 620 is_controldom(void) 621 { 622 return (0); 623 } 624 625 #else 626 627 int 628 get_hwenv(void) 629 { 630 return (HW_XEN_PV); 631 } 632 633 int 634 is_controldom(void) 635 { 636 return (DOMAIN_IS_INITDOMAIN(xen_info)); 637 } 638 639 #endif /* __xpv */ 640 641 static void 642 cpuid_intel_getids(cpu_t *cpu, void *feature) 643 { 644 uint_t i; 645 uint_t chipid_shift = 0; 646 uint_t coreid_shift = 0; 647 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 648 649 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 650 chipid_shift++; 651 652 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift; 653 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1); 654 655 if (is_x86_feature(feature, X86FSET_CMP)) { 656 /* 657 * Multi-core (and possibly multi-threaded) 658 * processors. 659 */ 660 uint_t ncpu_per_core; 661 if (cpi->cpi_ncore_per_chip == 1) 662 ncpu_per_core = cpi->cpi_ncpu_per_chip; 663 else if (cpi->cpi_ncore_per_chip > 1) 664 ncpu_per_core = cpi->cpi_ncpu_per_chip / 665 cpi->cpi_ncore_per_chip; 666 /* 667 * 8bit APIC IDs on dual core Pentiums 668 * look like this: 669 * 670 * +-----------------------+------+------+ 671 * | Physical Package ID | MC | HT | 672 * +-----------------------+------+------+ 673 * <------- chipid --------> 674 * <------- coreid ---------------> 675 * <--- clogid --> 676 * <------> 677 * pkgcoreid 678 * 679 * Where the number of bits necessary to 680 * represent MC and HT fields together equals 681 * to the minimum number of bits necessary to 682 * store the value of cpi->cpi_ncpu_per_chip. 683 * Of those bits, the MC part uses the number 684 * of bits necessary to store the value of 685 * cpi->cpi_ncore_per_chip. 686 */ 687 for (i = 1; i < ncpu_per_core; i <<= 1) 688 coreid_shift++; 689 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift; 690 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 691 } else if (is_x86_feature(feature, X86FSET_HTT)) { 692 /* 693 * Single-core multi-threaded processors. 694 */ 695 cpi->cpi_coreid = cpi->cpi_chipid; 696 cpi->cpi_pkgcoreid = 0; 697 } 698 cpi->cpi_procnodeid = cpi->cpi_chipid; 699 } 700 701 static void 702 cpuid_amd_getids(cpu_t *cpu) 703 { 704 int i, first_half, coreidsz; 705 uint32_t nb_caps_reg; 706 uint_t node2_1; 707 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 708 709 /* 710 * AMD CMP chips currently have a single thread per core. 711 * 712 * Since no two cpus share a core we must assign a distinct coreid 713 * per cpu, and we do this by using the cpu_id. This scheme does not, 714 * however, guarantee that sibling cores of a chip will have sequential 715 * coreids starting at a multiple of the number of cores per chip - 716 * that is usually the case, but if the ACPI MADT table is presented 717 * in a different order then we need to perform a few more gymnastics 718 * for the pkgcoreid. 719 * 720 * All processors in the system have the same number of enabled 721 * cores. Cores within a processor are always numbered sequentially 722 * from 0 regardless of how many or which are disabled, and there 723 * is no way for operating system to discover the real core id when some 724 * are disabled. 725 */ 726 727 cpi->cpi_coreid = cpu->cpu_id; 728 729 if (cpi->cpi_xmaxeax >= 0x80000008) { 730 731 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 732 733 /* 734 * In AMD parlance chip is really a node while Solaris 735 * sees chip as equivalent to socket/package. 736 */ 737 cpi->cpi_ncore_per_chip = 738 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 739 if (coreidsz == 0) { 740 /* Use legacy method */ 741 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1) 742 coreidsz++; 743 if (coreidsz == 0) 744 coreidsz = 1; 745 } 746 } else { 747 /* Assume single-core part */ 748 cpi->cpi_ncore_per_chip = 1; 749 coreidsz = 1; 750 } 751 752 cpi->cpi_clogid = cpi->cpi_pkgcoreid = 753 cpi->cpi_apicid & ((1<<coreidsz) - 1); 754 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip; 755 756 /* Get nodeID */ 757 if (cpi->cpi_family == 0xf) { 758 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 759 cpi->cpi_chipid = cpi->cpi_procnodeid; 760 } else if (cpi->cpi_family == 0x10) { 761 /* 762 * See if we are a multi-node processor. 763 * All processors in the system have the same number of nodes 764 */ 765 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8); 766 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) { 767 /* Single-node */ 768 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, 769 coreidsz); 770 cpi->cpi_chipid = cpi->cpi_procnodeid; 771 } else { 772 773 /* 774 * Multi-node revision D (2 nodes per package 775 * are supported) 776 */ 777 cpi->cpi_procnodes_per_pkg = 2; 778 779 first_half = (cpi->cpi_pkgcoreid <= 780 (cpi->cpi_ncore_per_chip/2 - 1)); 781 782 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { 783 /* We are BSP */ 784 cpi->cpi_procnodeid = (first_half ? 0 : 1); 785 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; 786 } else { 787 788 /* We are AP */ 789 /* NodeId[2:1] bits to use for reading F3xe8 */ 790 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1; 791 792 nb_caps_reg = 793 pci_getl_func(0, 24 + node2_1, 3, 0xe8); 794 795 /* 796 * Check IntNodeNum bit (31:30, but bit 31 is 797 * always 0 on dual-node processors) 798 */ 799 if (BITX(nb_caps_reg, 30, 30) == 0) 800 cpi->cpi_procnodeid = node2_1 + 801 !first_half; 802 else 803 cpi->cpi_procnodeid = node2_1 + 804 first_half; 805 806 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1; 807 } 808 } 809 } else if (cpi->cpi_family >= 0x11) { 810 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 811 cpi->cpi_chipid = cpi->cpi_procnodeid; 812 } else { 813 cpi->cpi_procnodeid = 0; 814 cpi->cpi_chipid = cpi->cpi_procnodeid; 815 } 816 } 817 818 void * 819 cpuid_pass1(cpu_t *cpu) 820 { 821 uint32_t mask_ecx, mask_edx; 822 void *featureset; 823 struct cpuid_info *cpi; 824 struct cpuid_regs *cp; 825 int xcpuid; 826 #if !defined(__xpv) 827 extern int idle_cpu_prefer_mwait; 828 #endif 829 830 831 #if !defined(__xpv) 832 determine_platform(); 833 #endif 834 /* 835 * Space statically allocated for BSP, ensure pointer is set 836 */ 837 if (cpu->cpu_id == 0) { 838 if (cpu->cpu_m.mcpu_cpi == NULL) 839 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 840 featureset = x86_featureset0; 841 } else { 842 featureset = init_x86_featureset(); 843 } 844 845 add_x86_feature(featureset, X86FSET_CPUID); 846 847 cpi = cpu->cpu_m.mcpu_cpi; 848 ASSERT(cpi != NULL); 849 cp = &cpi->cpi_std[0]; 850 cp->cp_eax = 0; 851 cpi->cpi_maxeax = __cpuid_insn(cp); 852 { 853 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 854 *iptr++ = cp->cp_ebx; 855 *iptr++ = cp->cp_edx; 856 *iptr++ = cp->cp_ecx; 857 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 858 } 859 860 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 861 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 862 863 /* 864 * Limit the range in case of weird hardware 865 */ 866 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 867 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 868 if (cpi->cpi_maxeax < 1) 869 goto pass1_done; 870 871 cp = &cpi->cpi_std[1]; 872 cp->cp_eax = 1; 873 (void) __cpuid_insn(cp); 874 875 /* 876 * Extract identifying constants for easy access. 877 */ 878 cpi->cpi_model = CPI_MODEL(cpi); 879 cpi->cpi_family = CPI_FAMILY(cpi); 880 881 if (cpi->cpi_family == 0xf) 882 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 883 884 /* 885 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 886 * Intel, and presumably everyone else, uses model == 0xf, as 887 * one would expect (max value means possible overflow). Sigh. 888 */ 889 890 switch (cpi->cpi_vendor) { 891 case X86_VENDOR_Intel: 892 if (IS_EXTENDED_MODEL_INTEL(cpi)) 893 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 894 break; 895 case X86_VENDOR_AMD: 896 if (CPI_FAMILY(cpi) == 0xf) 897 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 898 break; 899 default: 900 if (cpi->cpi_model == 0xf) 901 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 902 break; 903 } 904 905 cpi->cpi_step = CPI_STEP(cpi); 906 cpi->cpi_brandid = CPI_BRANDID(cpi); 907 908 /* 909 * *default* assumptions: 910 * - believe %edx feature word 911 * - ignore %ecx feature word 912 * - 32-bit virtual and physical addressing 913 */ 914 mask_edx = 0xffffffff; 915 mask_ecx = 0; 916 917 cpi->cpi_pabits = cpi->cpi_vabits = 32; 918 919 switch (cpi->cpi_vendor) { 920 case X86_VENDOR_Intel: 921 if (cpi->cpi_family == 5) 922 x86_type = X86_TYPE_P5; 923 else if (IS_LEGACY_P6(cpi)) { 924 x86_type = X86_TYPE_P6; 925 pentiumpro_bug4046376 = 1; 926 pentiumpro_bug4064495 = 1; 927 /* 928 * Clear the SEP bit when it was set erroneously 929 */ 930 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 931 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 932 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 933 x86_type = X86_TYPE_P4; 934 /* 935 * We don't currently depend on any of the %ecx 936 * features until Prescott, so we'll only check 937 * this from P4 onwards. We might want to revisit 938 * that idea later. 939 */ 940 mask_ecx = 0xffffffff; 941 } else if (cpi->cpi_family > 0xf) 942 mask_ecx = 0xffffffff; 943 /* 944 * We don't support MONITOR/MWAIT if leaf 5 is not available 945 * to obtain the monitor linesize. 946 */ 947 if (cpi->cpi_maxeax < 5) 948 mask_ecx &= ~CPUID_INTC_ECX_MON; 949 break; 950 case X86_VENDOR_IntelClone: 951 default: 952 break; 953 case X86_VENDOR_AMD: 954 #if defined(OPTERON_ERRATUM_108) 955 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 956 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 957 cpi->cpi_model = 0xc; 958 } else 959 #endif 960 if (cpi->cpi_family == 5) { 961 /* 962 * AMD K5 and K6 963 * 964 * These CPUs have an incomplete implementation 965 * of MCA/MCE which we mask away. 966 */ 967 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 968 969 /* 970 * Model 0 uses the wrong (APIC) bit 971 * to indicate PGE. Fix it here. 972 */ 973 if (cpi->cpi_model == 0) { 974 if (cp->cp_edx & 0x200) { 975 cp->cp_edx &= ~0x200; 976 cp->cp_edx |= CPUID_INTC_EDX_PGE; 977 } 978 } 979 980 /* 981 * Early models had problems w/ MMX; disable. 982 */ 983 if (cpi->cpi_model < 6) 984 mask_edx &= ~CPUID_INTC_EDX_MMX; 985 } 986 987 /* 988 * For newer families, SSE3 and CX16, at least, are valid; 989 * enable all 990 */ 991 if (cpi->cpi_family >= 0xf) 992 mask_ecx = 0xffffffff; 993 /* 994 * We don't support MONITOR/MWAIT if leaf 5 is not available 995 * to obtain the monitor linesize. 996 */ 997 if (cpi->cpi_maxeax < 5) 998 mask_ecx &= ~CPUID_INTC_ECX_MON; 999 1000 #if !defined(__xpv) 1001 /* 1002 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 1003 * processors. AMD does not intend MWAIT to be used in the cpu 1004 * idle loop on current and future processors. 10h and future 1005 * AMD processors use more power in MWAIT than HLT. 1006 * Pre-family-10h Opterons do not have the MWAIT instruction. 1007 */ 1008 idle_cpu_prefer_mwait = 0; 1009 #endif 1010 1011 break; 1012 case X86_VENDOR_TM: 1013 /* 1014 * workaround the NT workaround in CMS 4.1 1015 */ 1016 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 1017 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 1018 cp->cp_edx |= CPUID_INTC_EDX_CX8; 1019 break; 1020 case X86_VENDOR_Centaur: 1021 /* 1022 * workaround the NT workarounds again 1023 */ 1024 if (cpi->cpi_family == 6) 1025 cp->cp_edx |= CPUID_INTC_EDX_CX8; 1026 break; 1027 case X86_VENDOR_Cyrix: 1028 /* 1029 * We rely heavily on the probing in locore 1030 * to actually figure out what parts, if any, 1031 * of the Cyrix cpuid instruction to believe. 1032 */ 1033 switch (x86_type) { 1034 case X86_TYPE_CYRIX_486: 1035 mask_edx = 0; 1036 break; 1037 case X86_TYPE_CYRIX_6x86: 1038 mask_edx = 0; 1039 break; 1040 case X86_TYPE_CYRIX_6x86L: 1041 mask_edx = 1042 CPUID_INTC_EDX_DE | 1043 CPUID_INTC_EDX_CX8; 1044 break; 1045 case X86_TYPE_CYRIX_6x86MX: 1046 mask_edx = 1047 CPUID_INTC_EDX_DE | 1048 CPUID_INTC_EDX_MSR | 1049 CPUID_INTC_EDX_CX8 | 1050 CPUID_INTC_EDX_PGE | 1051 CPUID_INTC_EDX_CMOV | 1052 CPUID_INTC_EDX_MMX; 1053 break; 1054 case X86_TYPE_CYRIX_GXm: 1055 mask_edx = 1056 CPUID_INTC_EDX_MSR | 1057 CPUID_INTC_EDX_CX8 | 1058 CPUID_INTC_EDX_CMOV | 1059 CPUID_INTC_EDX_MMX; 1060 break; 1061 case X86_TYPE_CYRIX_MediaGX: 1062 break; 1063 case X86_TYPE_CYRIX_MII: 1064 case X86_TYPE_VIA_CYRIX_III: 1065 mask_edx = 1066 CPUID_INTC_EDX_DE | 1067 CPUID_INTC_EDX_TSC | 1068 CPUID_INTC_EDX_MSR | 1069 CPUID_INTC_EDX_CX8 | 1070 CPUID_INTC_EDX_PGE | 1071 CPUID_INTC_EDX_CMOV | 1072 CPUID_INTC_EDX_MMX; 1073 break; 1074 default: 1075 break; 1076 } 1077 break; 1078 } 1079 1080 #if defined(__xpv) 1081 /* 1082 * Do not support MONITOR/MWAIT under a hypervisor 1083 */ 1084 mask_ecx &= ~CPUID_INTC_ECX_MON; 1085 #endif /* __xpv */ 1086 1087 /* 1088 * Now we've figured out the masks that determine 1089 * which bits we choose to believe, apply the masks 1090 * to the feature words, then map the kernel's view 1091 * of these feature words into its feature word. 1092 */ 1093 cp->cp_edx &= mask_edx; 1094 cp->cp_ecx &= mask_ecx; 1095 1096 /* 1097 * apply any platform restrictions (we don't call this 1098 * immediately after __cpuid_insn here, because we need the 1099 * workarounds applied above first) 1100 */ 1101 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 1102 1103 /* 1104 * fold in overrides from the "eeprom" mechanism 1105 */ 1106 cp->cp_edx |= cpuid_feature_edx_include; 1107 cp->cp_edx &= ~cpuid_feature_edx_exclude; 1108 1109 cp->cp_ecx |= cpuid_feature_ecx_include; 1110 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 1111 1112 if (cp->cp_edx & CPUID_INTC_EDX_PSE) { 1113 add_x86_feature(featureset, X86FSET_LARGEPAGE); 1114 } 1115 if (cp->cp_edx & CPUID_INTC_EDX_TSC) { 1116 add_x86_feature(featureset, X86FSET_TSC); 1117 } 1118 if (cp->cp_edx & CPUID_INTC_EDX_MSR) { 1119 add_x86_feature(featureset, X86FSET_MSR); 1120 } 1121 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) { 1122 add_x86_feature(featureset, X86FSET_MTRR); 1123 } 1124 if (cp->cp_edx & CPUID_INTC_EDX_PGE) { 1125 add_x86_feature(featureset, X86FSET_PGE); 1126 } 1127 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) { 1128 add_x86_feature(featureset, X86FSET_CMOV); 1129 } 1130 if (cp->cp_edx & CPUID_INTC_EDX_MMX) { 1131 add_x86_feature(featureset, X86FSET_MMX); 1132 } 1133 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 1134 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) { 1135 add_x86_feature(featureset, X86FSET_MCA); 1136 } 1137 if (cp->cp_edx & CPUID_INTC_EDX_PAE) { 1138 add_x86_feature(featureset, X86FSET_PAE); 1139 } 1140 if (cp->cp_edx & CPUID_INTC_EDX_CX8) { 1141 add_x86_feature(featureset, X86FSET_CX8); 1142 } 1143 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) { 1144 add_x86_feature(featureset, X86FSET_CX16); 1145 } 1146 if (cp->cp_edx & CPUID_INTC_EDX_PAT) { 1147 add_x86_feature(featureset, X86FSET_PAT); 1148 } 1149 if (cp->cp_edx & CPUID_INTC_EDX_SEP) { 1150 add_x86_feature(featureset, X86FSET_SEP); 1151 } 1152 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 1153 /* 1154 * In our implementation, fxsave/fxrstor 1155 * are prerequisites before we'll even 1156 * try and do SSE things. 1157 */ 1158 if (cp->cp_edx & CPUID_INTC_EDX_SSE) { 1159 add_x86_feature(featureset, X86FSET_SSE); 1160 } 1161 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) { 1162 add_x86_feature(featureset, X86FSET_SSE2); 1163 } 1164 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) { 1165 add_x86_feature(featureset, X86FSET_SSE3); 1166 } 1167 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1168 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) { 1169 add_x86_feature(featureset, X86FSET_SSSE3); 1170 } 1171 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) { 1172 add_x86_feature(featureset, X86FSET_SSE4_1); 1173 } 1174 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) { 1175 add_x86_feature(featureset, X86FSET_SSE4_2); 1176 } 1177 if (cp->cp_ecx & CPUID_INTC_ECX_AES) { 1178 add_x86_feature(featureset, X86FSET_AES); 1179 } 1180 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) { 1181 add_x86_feature(featureset, X86FSET_PCLMULQDQ); 1182 } 1183 } 1184 } 1185 if (cp->cp_edx & CPUID_INTC_EDX_DE) { 1186 add_x86_feature(featureset, X86FSET_DE); 1187 } 1188 #if !defined(__xpv) 1189 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 1190 1191 /* 1192 * We require the CLFLUSH instruction for erratum workaround 1193 * to use MONITOR/MWAIT. 1194 */ 1195 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1196 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 1197 add_x86_feature(featureset, X86FSET_MWAIT); 1198 } else { 1199 extern int idle_cpu_assert_cflush_monitor; 1200 1201 /* 1202 * All processors we are aware of which have 1203 * MONITOR/MWAIT also have CLFLUSH. 1204 */ 1205 if (idle_cpu_assert_cflush_monitor) { 1206 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 1207 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 1208 } 1209 } 1210 } 1211 #endif /* __xpv */ 1212 1213 /* 1214 * Only need it first time, rest of the cpus would follow suite. 1215 * we only capture this for the bootcpu. 1216 */ 1217 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 1218 add_x86_feature(featureset, X86FSET_CLFSH); 1219 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 1220 } 1221 if (is_x86_feature(featureset, X86FSET_PAE)) 1222 cpi->cpi_pabits = 36; 1223 1224 /* 1225 * Hyperthreading configuration is slightly tricky on Intel 1226 * and pure clones, and even trickier on AMD. 1227 * 1228 * (AMD chose to set the HTT bit on their CMP processors, 1229 * even though they're not actually hyperthreaded. Thus it 1230 * takes a bit more work to figure out what's really going 1231 * on ... see the handling of the CMP_LGCY bit below) 1232 */ 1233 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 1234 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 1235 if (cpi->cpi_ncpu_per_chip > 1) 1236 add_x86_feature(featureset, X86FSET_HTT); 1237 } else { 1238 cpi->cpi_ncpu_per_chip = 1; 1239 } 1240 1241 /* 1242 * Work on the "extended" feature information, doing 1243 * some basic initialization for cpuid_pass2() 1244 */ 1245 xcpuid = 0; 1246 switch (cpi->cpi_vendor) { 1247 case X86_VENDOR_Intel: 1248 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 1249 xcpuid++; 1250 break; 1251 case X86_VENDOR_AMD: 1252 if (cpi->cpi_family > 5 || 1253 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 1254 xcpuid++; 1255 break; 1256 case X86_VENDOR_Cyrix: 1257 /* 1258 * Only these Cyrix CPUs are -known- to support 1259 * extended cpuid operations. 1260 */ 1261 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1262 x86_type == X86_TYPE_CYRIX_GXm) 1263 xcpuid++; 1264 break; 1265 case X86_VENDOR_Centaur: 1266 case X86_VENDOR_TM: 1267 default: 1268 xcpuid++; 1269 break; 1270 } 1271 1272 if (xcpuid) { 1273 cp = &cpi->cpi_extd[0]; 1274 cp->cp_eax = 0x80000000; 1275 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1276 } 1277 1278 if (cpi->cpi_xmaxeax & 0x80000000) { 1279 1280 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1281 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1282 1283 switch (cpi->cpi_vendor) { 1284 case X86_VENDOR_Intel: 1285 case X86_VENDOR_AMD: 1286 if (cpi->cpi_xmaxeax < 0x80000001) 1287 break; 1288 cp = &cpi->cpi_extd[1]; 1289 cp->cp_eax = 0x80000001; 1290 (void) __cpuid_insn(cp); 1291 1292 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1293 cpi->cpi_family == 5 && 1294 cpi->cpi_model == 6 && 1295 cpi->cpi_step == 6) { 1296 /* 1297 * K6 model 6 uses bit 10 to indicate SYSC 1298 * Later models use bit 11. Fix it here. 1299 */ 1300 if (cp->cp_edx & 0x400) { 1301 cp->cp_edx &= ~0x400; 1302 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1303 } 1304 } 1305 1306 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1307 1308 /* 1309 * Compute the additions to the kernel's feature word. 1310 */ 1311 if (cp->cp_edx & CPUID_AMD_EDX_NX) { 1312 add_x86_feature(featureset, X86FSET_NX); 1313 } 1314 1315 /* 1316 * Regardless whether or not we boot 64-bit, 1317 * we should have a way to identify whether 1318 * the CPU is capable of running 64-bit. 1319 */ 1320 if (cp->cp_edx & CPUID_AMD_EDX_LM) { 1321 add_x86_feature(featureset, X86FSET_64); 1322 } 1323 1324 #if defined(__amd64) 1325 /* 1 GB large page - enable only for 64 bit kernel */ 1326 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) { 1327 add_x86_feature(featureset, X86FSET_1GPG); 1328 } 1329 #endif 1330 1331 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1332 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1333 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) { 1334 add_x86_feature(featureset, X86FSET_SSE4A); 1335 } 1336 1337 /* 1338 * If both the HTT and CMP_LGCY bits are set, 1339 * then we're not actually HyperThreaded. Read 1340 * "AMD CPUID Specification" for more details. 1341 */ 1342 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1343 is_x86_feature(featureset, X86FSET_HTT) && 1344 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1345 remove_x86_feature(featureset, X86FSET_HTT); 1346 add_x86_feature(featureset, X86FSET_CMP); 1347 } 1348 #if defined(__amd64) 1349 /* 1350 * It's really tricky to support syscall/sysret in 1351 * the i386 kernel; we rely on sysenter/sysexit 1352 * instead. In the amd64 kernel, things are -way- 1353 * better. 1354 */ 1355 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) { 1356 add_x86_feature(featureset, X86FSET_ASYSC); 1357 } 1358 1359 /* 1360 * While we're thinking about system calls, note 1361 * that AMD processors don't support sysenter 1362 * in long mode at all, so don't try to program them. 1363 */ 1364 if (x86_vendor == X86_VENDOR_AMD) { 1365 remove_x86_feature(featureset, X86FSET_SEP); 1366 } 1367 #endif 1368 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) { 1369 add_x86_feature(featureset, X86FSET_TSCP); 1370 } 1371 break; 1372 default: 1373 break; 1374 } 1375 1376 /* 1377 * Get CPUID data about processor cores and hyperthreads. 1378 */ 1379 switch (cpi->cpi_vendor) { 1380 case X86_VENDOR_Intel: 1381 if (cpi->cpi_maxeax >= 4) { 1382 cp = &cpi->cpi_std[4]; 1383 cp->cp_eax = 4; 1384 cp->cp_ecx = 0; 1385 (void) __cpuid_insn(cp); 1386 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1387 } 1388 /*FALLTHROUGH*/ 1389 case X86_VENDOR_AMD: 1390 if (cpi->cpi_xmaxeax < 0x80000008) 1391 break; 1392 cp = &cpi->cpi_extd[8]; 1393 cp->cp_eax = 0x80000008; 1394 (void) __cpuid_insn(cp); 1395 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1396 1397 /* 1398 * Virtual and physical address limits from 1399 * cpuid override previously guessed values. 1400 */ 1401 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1402 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1403 break; 1404 default: 1405 break; 1406 } 1407 1408 /* 1409 * Derive the number of cores per chip 1410 */ 1411 switch (cpi->cpi_vendor) { 1412 case X86_VENDOR_Intel: 1413 if (cpi->cpi_maxeax < 4) { 1414 cpi->cpi_ncore_per_chip = 1; 1415 break; 1416 } else { 1417 cpi->cpi_ncore_per_chip = 1418 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1419 } 1420 break; 1421 case X86_VENDOR_AMD: 1422 if (cpi->cpi_xmaxeax < 0x80000008) { 1423 cpi->cpi_ncore_per_chip = 1; 1424 break; 1425 } else { 1426 /* 1427 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1428 * 1 less than the number of physical cores on 1429 * the chip. In family 0x10 this value can 1430 * be affected by "downcoring" - it reflects 1431 * 1 less than the number of cores actually 1432 * enabled on this node. 1433 */ 1434 cpi->cpi_ncore_per_chip = 1435 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1436 } 1437 break; 1438 default: 1439 cpi->cpi_ncore_per_chip = 1; 1440 break; 1441 } 1442 1443 /* 1444 * Get CPUID data about TSC Invariance in Deep C-State. 1445 */ 1446 switch (cpi->cpi_vendor) { 1447 case X86_VENDOR_Intel: 1448 if (cpi->cpi_maxeax >= 7) { 1449 cp = &cpi->cpi_extd[7]; 1450 cp->cp_eax = 0x80000007; 1451 cp->cp_ecx = 0; 1452 (void) __cpuid_insn(cp); 1453 } 1454 break; 1455 default: 1456 break; 1457 } 1458 } else { 1459 cpi->cpi_ncore_per_chip = 1; 1460 } 1461 1462 /* 1463 * If more than one core, then this processor is CMP. 1464 */ 1465 if (cpi->cpi_ncore_per_chip > 1) { 1466 add_x86_feature(featureset, X86FSET_CMP); 1467 } 1468 1469 /* 1470 * If the number of cores is the same as the number 1471 * of CPUs, then we cannot have HyperThreading. 1472 */ 1473 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) { 1474 remove_x86_feature(featureset, X86FSET_HTT); 1475 } 1476 1477 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1478 cpi->cpi_procnodes_per_pkg = 1; 1479 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE && 1480 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) { 1481 /* 1482 * Single-core single-threaded processors. 1483 */ 1484 cpi->cpi_chipid = -1; 1485 cpi->cpi_clogid = 0; 1486 cpi->cpi_coreid = cpu->cpu_id; 1487 cpi->cpi_pkgcoreid = 0; 1488 if (cpi->cpi_vendor == X86_VENDOR_AMD) 1489 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); 1490 else 1491 cpi->cpi_procnodeid = cpi->cpi_chipid; 1492 } else if (cpi->cpi_ncpu_per_chip > 1) { 1493 if (cpi->cpi_vendor == X86_VENDOR_Intel) 1494 cpuid_intel_getids(cpu, featureset); 1495 else if (cpi->cpi_vendor == X86_VENDOR_AMD) 1496 cpuid_amd_getids(cpu); 1497 else { 1498 /* 1499 * All other processors are currently 1500 * assumed to have single cores. 1501 */ 1502 cpi->cpi_coreid = cpi->cpi_chipid; 1503 cpi->cpi_pkgcoreid = 0; 1504 cpi->cpi_procnodeid = cpi->cpi_chipid; 1505 } 1506 } 1507 1508 /* 1509 * Synthesize chip "revision" and socket type 1510 */ 1511 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1512 cpi->cpi_model, cpi->cpi_step); 1513 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1514 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1515 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1516 cpi->cpi_model, cpi->cpi_step); 1517 1518 pass1_done: 1519 cpi->cpi_pass = 1; 1520 return (featureset); 1521 } 1522 1523 /* 1524 * Make copies of the cpuid table entries we depend on, in 1525 * part for ease of parsing now, in part so that we have only 1526 * one place to correct any of it, in part for ease of 1527 * later export to userland, and in part so we can look at 1528 * this stuff in a crash dump. 1529 */ 1530 1531 /*ARGSUSED*/ 1532 void 1533 cpuid_pass2(cpu_t *cpu) 1534 { 1535 uint_t n, nmax; 1536 int i; 1537 struct cpuid_regs *cp; 1538 uint8_t *dp; 1539 uint32_t *iptr; 1540 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1541 1542 ASSERT(cpi->cpi_pass == 1); 1543 1544 if (cpi->cpi_maxeax < 1) 1545 goto pass2_done; 1546 1547 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1548 nmax = NMAX_CPI_STD; 1549 /* 1550 * (We already handled n == 0 and n == 1 in pass 1) 1551 */ 1552 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1553 cp->cp_eax = n; 1554 1555 /* 1556 * CPUID function 4 expects %ecx to be initialized 1557 * with an index which indicates which cache to return 1558 * information about. The OS is expected to call function 4 1559 * with %ecx set to 0, 1, 2, ... until it returns with 1560 * EAX[4:0] set to 0, which indicates there are no more 1561 * caches. 1562 * 1563 * Here, populate cpi_std[4] with the information returned by 1564 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1565 * when dynamic memory allocation becomes available. 1566 * 1567 * Note: we need to explicitly initialize %ecx here, since 1568 * function 4 may have been previously invoked. 1569 */ 1570 if (n == 4) 1571 cp->cp_ecx = 0; 1572 1573 (void) __cpuid_insn(cp); 1574 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1575 switch (n) { 1576 case 2: 1577 /* 1578 * "the lower 8 bits of the %eax register 1579 * contain a value that identifies the number 1580 * of times the cpuid [instruction] has to be 1581 * executed to obtain a complete image of the 1582 * processor's caching systems." 1583 * 1584 * How *do* they make this stuff up? 1585 */ 1586 cpi->cpi_ncache = sizeof (*cp) * 1587 BITX(cp->cp_eax, 7, 0); 1588 if (cpi->cpi_ncache == 0) 1589 break; 1590 cpi->cpi_ncache--; /* skip count byte */ 1591 1592 /* 1593 * Well, for now, rather than attempt to implement 1594 * this slightly dubious algorithm, we just look 1595 * at the first 15 .. 1596 */ 1597 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1598 cpi->cpi_ncache = sizeof (*cp) - 1; 1599 1600 dp = cpi->cpi_cacheinfo; 1601 if (BITX(cp->cp_eax, 31, 31) == 0) { 1602 uint8_t *p = (void *)&cp->cp_eax; 1603 for (i = 1; i < 4; i++) 1604 if (p[i] != 0) 1605 *dp++ = p[i]; 1606 } 1607 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1608 uint8_t *p = (void *)&cp->cp_ebx; 1609 for (i = 0; i < 4; i++) 1610 if (p[i] != 0) 1611 *dp++ = p[i]; 1612 } 1613 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1614 uint8_t *p = (void *)&cp->cp_ecx; 1615 for (i = 0; i < 4; i++) 1616 if (p[i] != 0) 1617 *dp++ = p[i]; 1618 } 1619 if (BITX(cp->cp_edx, 31, 31) == 0) { 1620 uint8_t *p = (void *)&cp->cp_edx; 1621 for (i = 0; i < 4; i++) 1622 if (p[i] != 0) 1623 *dp++ = p[i]; 1624 } 1625 break; 1626 1627 case 3: /* Processor serial number, if PSN supported */ 1628 break; 1629 1630 case 4: /* Deterministic cache parameters */ 1631 break; 1632 1633 case 5: /* Monitor/Mwait parameters */ 1634 { 1635 size_t mwait_size; 1636 1637 /* 1638 * check cpi_mwait.support which was set in cpuid_pass1 1639 */ 1640 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1641 break; 1642 1643 /* 1644 * Protect ourself from insane mwait line size. 1645 * Workaround for incomplete hardware emulator(s). 1646 */ 1647 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1648 if (mwait_size < sizeof (uint32_t) || 1649 !ISP2(mwait_size)) { 1650 #if DEBUG 1651 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1652 "size %ld", cpu->cpu_id, (long)mwait_size); 1653 #endif 1654 break; 1655 } 1656 1657 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1658 cpi->cpi_mwait.mon_max = mwait_size; 1659 if (MWAIT_EXTENSION(cpi)) { 1660 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1661 if (MWAIT_INT_ENABLE(cpi)) 1662 cpi->cpi_mwait.support |= 1663 MWAIT_ECX_INT_ENABLE; 1664 } 1665 break; 1666 } 1667 default: 1668 break; 1669 } 1670 } 1671 1672 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1673 struct cpuid_regs regs; 1674 1675 cp = ®s; 1676 cp->cp_eax = 0xB; 1677 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1678 1679 (void) __cpuid_insn(cp); 1680 1681 /* 1682 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1683 * indicates that the extended topology enumeration leaf is 1684 * available. 1685 */ 1686 if (cp->cp_ebx) { 1687 uint32_t x2apic_id; 1688 uint_t coreid_shift = 0; 1689 uint_t ncpu_per_core = 1; 1690 uint_t chipid_shift = 0; 1691 uint_t ncpu_per_chip = 1; 1692 uint_t i; 1693 uint_t level; 1694 1695 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1696 cp->cp_eax = 0xB; 1697 cp->cp_ecx = i; 1698 1699 (void) __cpuid_insn(cp); 1700 level = CPI_CPU_LEVEL_TYPE(cp); 1701 1702 if (level == 1) { 1703 x2apic_id = cp->cp_edx; 1704 coreid_shift = BITX(cp->cp_eax, 4, 0); 1705 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1706 } else if (level == 2) { 1707 x2apic_id = cp->cp_edx; 1708 chipid_shift = BITX(cp->cp_eax, 4, 0); 1709 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1710 } 1711 } 1712 1713 cpi->cpi_apicid = x2apic_id; 1714 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1715 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1716 ncpu_per_core; 1717 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1718 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1719 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1720 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1721 } 1722 1723 /* Make cp NULL so that we don't stumble on others */ 1724 cp = NULL; 1725 } 1726 1727 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1728 goto pass2_done; 1729 1730 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1731 nmax = NMAX_CPI_EXTD; 1732 /* 1733 * Copy the extended properties, fixing them as we go. 1734 * (We already handled n == 0 and n == 1 in pass 1) 1735 */ 1736 iptr = (void *)cpi->cpi_brandstr; 1737 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1738 cp->cp_eax = 0x80000000 + n; 1739 (void) __cpuid_insn(cp); 1740 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1741 switch (n) { 1742 case 2: 1743 case 3: 1744 case 4: 1745 /* 1746 * Extract the brand string 1747 */ 1748 *iptr++ = cp->cp_eax; 1749 *iptr++ = cp->cp_ebx; 1750 *iptr++ = cp->cp_ecx; 1751 *iptr++ = cp->cp_edx; 1752 break; 1753 case 5: 1754 switch (cpi->cpi_vendor) { 1755 case X86_VENDOR_AMD: 1756 /* 1757 * The Athlon and Duron were the first 1758 * parts to report the sizes of the 1759 * TLB for large pages. Before then, 1760 * we don't trust the data. 1761 */ 1762 if (cpi->cpi_family < 6 || 1763 (cpi->cpi_family == 6 && 1764 cpi->cpi_model < 1)) 1765 cp->cp_eax = 0; 1766 break; 1767 default: 1768 break; 1769 } 1770 break; 1771 case 6: 1772 switch (cpi->cpi_vendor) { 1773 case X86_VENDOR_AMD: 1774 /* 1775 * The Athlon and Duron were the first 1776 * AMD parts with L2 TLB's. 1777 * Before then, don't trust the data. 1778 */ 1779 if (cpi->cpi_family < 6 || 1780 cpi->cpi_family == 6 && 1781 cpi->cpi_model < 1) 1782 cp->cp_eax = cp->cp_ebx = 0; 1783 /* 1784 * AMD Duron rev A0 reports L2 1785 * cache size incorrectly as 1K 1786 * when it is really 64K 1787 */ 1788 if (cpi->cpi_family == 6 && 1789 cpi->cpi_model == 3 && 1790 cpi->cpi_step == 0) { 1791 cp->cp_ecx &= 0xffff; 1792 cp->cp_ecx |= 0x400000; 1793 } 1794 break; 1795 case X86_VENDOR_Cyrix: /* VIA C3 */ 1796 /* 1797 * VIA C3 processors are a bit messed 1798 * up w.r.t. encoding cache sizes in %ecx 1799 */ 1800 if (cpi->cpi_family != 6) 1801 break; 1802 /* 1803 * model 7 and 8 were incorrectly encoded 1804 * 1805 * xxx is model 8 really broken? 1806 */ 1807 if (cpi->cpi_model == 7 || 1808 cpi->cpi_model == 8) 1809 cp->cp_ecx = 1810 BITX(cp->cp_ecx, 31, 24) << 16 | 1811 BITX(cp->cp_ecx, 23, 16) << 12 | 1812 BITX(cp->cp_ecx, 15, 8) << 8 | 1813 BITX(cp->cp_ecx, 7, 0); 1814 /* 1815 * model 9 stepping 1 has wrong associativity 1816 */ 1817 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1818 cp->cp_ecx |= 8 << 12; 1819 break; 1820 case X86_VENDOR_Intel: 1821 /* 1822 * Extended L2 Cache features function. 1823 * First appeared on Prescott. 1824 */ 1825 default: 1826 break; 1827 } 1828 break; 1829 default: 1830 break; 1831 } 1832 } 1833 1834 pass2_done: 1835 cpi->cpi_pass = 2; 1836 } 1837 1838 static const char * 1839 intel_cpubrand(const struct cpuid_info *cpi) 1840 { 1841 int i; 1842 1843 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 1844 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1845 return ("i486"); 1846 1847 switch (cpi->cpi_family) { 1848 case 5: 1849 return ("Intel Pentium(r)"); 1850 case 6: 1851 switch (cpi->cpi_model) { 1852 uint_t celeron, xeon; 1853 const struct cpuid_regs *cp; 1854 case 0: 1855 case 1: 1856 case 2: 1857 return ("Intel Pentium(r) Pro"); 1858 case 3: 1859 case 4: 1860 return ("Intel Pentium(r) II"); 1861 case 6: 1862 return ("Intel Celeron(r)"); 1863 case 5: 1864 case 7: 1865 celeron = xeon = 0; 1866 cp = &cpi->cpi_std[2]; /* cache info */ 1867 1868 for (i = 1; i < 4; i++) { 1869 uint_t tmp; 1870 1871 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1872 if (tmp == 0x40) 1873 celeron++; 1874 if (tmp >= 0x44 && tmp <= 0x45) 1875 xeon++; 1876 } 1877 1878 for (i = 0; i < 2; i++) { 1879 uint_t tmp; 1880 1881 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1882 if (tmp == 0x40) 1883 celeron++; 1884 else if (tmp >= 0x44 && tmp <= 0x45) 1885 xeon++; 1886 } 1887 1888 for (i = 0; i < 4; i++) { 1889 uint_t tmp; 1890 1891 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1892 if (tmp == 0x40) 1893 celeron++; 1894 else if (tmp >= 0x44 && tmp <= 0x45) 1895 xeon++; 1896 } 1897 1898 for (i = 0; i < 4; i++) { 1899 uint_t tmp; 1900 1901 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1902 if (tmp == 0x40) 1903 celeron++; 1904 else if (tmp >= 0x44 && tmp <= 0x45) 1905 xeon++; 1906 } 1907 1908 if (celeron) 1909 return ("Intel Celeron(r)"); 1910 if (xeon) 1911 return (cpi->cpi_model == 5 ? 1912 "Intel Pentium(r) II Xeon(tm)" : 1913 "Intel Pentium(r) III Xeon(tm)"); 1914 return (cpi->cpi_model == 5 ? 1915 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1916 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1917 default: 1918 break; 1919 } 1920 default: 1921 break; 1922 } 1923 1924 /* BrandID is present if the field is nonzero */ 1925 if (cpi->cpi_brandid != 0) { 1926 static const struct { 1927 uint_t bt_bid; 1928 const char *bt_str; 1929 } brand_tbl[] = { 1930 { 0x1, "Intel(r) Celeron(r)" }, 1931 { 0x2, "Intel(r) Pentium(r) III" }, 1932 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1933 { 0x4, "Intel(r) Pentium(r) III" }, 1934 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1935 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1936 { 0x8, "Intel(r) Pentium(r) 4" }, 1937 { 0x9, "Intel(r) Pentium(r) 4" }, 1938 { 0xa, "Intel(r) Celeron(r)" }, 1939 { 0xb, "Intel(r) Xeon(tm)" }, 1940 { 0xc, "Intel(r) Xeon(tm) MP" }, 1941 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1942 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1943 { 0x11, "Mobile Genuine Intel(r)" }, 1944 { 0x12, "Intel(r) Celeron(r) M" }, 1945 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1946 { 0x14, "Intel(r) Celeron(r)" }, 1947 { 0x15, "Mobile Genuine Intel(r)" }, 1948 { 0x16, "Intel(r) Pentium(r) M" }, 1949 { 0x17, "Mobile Intel(r) Celeron(r)" } 1950 }; 1951 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1952 uint_t sgn; 1953 1954 sgn = (cpi->cpi_family << 8) | 1955 (cpi->cpi_model << 4) | cpi->cpi_step; 1956 1957 for (i = 0; i < btblmax; i++) 1958 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1959 break; 1960 if (i < btblmax) { 1961 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1962 return ("Intel(r) Celeron(r)"); 1963 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1964 return ("Intel(r) Xeon(tm) MP"); 1965 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1966 return ("Intel(r) Xeon(tm)"); 1967 return (brand_tbl[i].bt_str); 1968 } 1969 } 1970 1971 return (NULL); 1972 } 1973 1974 static const char * 1975 amd_cpubrand(const struct cpuid_info *cpi) 1976 { 1977 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 1978 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1979 return ("i486 compatible"); 1980 1981 switch (cpi->cpi_family) { 1982 case 5: 1983 switch (cpi->cpi_model) { 1984 case 0: 1985 case 1: 1986 case 2: 1987 case 3: 1988 case 4: 1989 case 5: 1990 return ("AMD-K5(r)"); 1991 case 6: 1992 case 7: 1993 return ("AMD-K6(r)"); 1994 case 8: 1995 return ("AMD-K6(r)-2"); 1996 case 9: 1997 return ("AMD-K6(r)-III"); 1998 default: 1999 return ("AMD (family 5)"); 2000 } 2001 case 6: 2002 switch (cpi->cpi_model) { 2003 case 1: 2004 return ("AMD-K7(tm)"); 2005 case 0: 2006 case 2: 2007 case 4: 2008 return ("AMD Athlon(tm)"); 2009 case 3: 2010 case 7: 2011 return ("AMD Duron(tm)"); 2012 case 6: 2013 case 8: 2014 case 10: 2015 /* 2016 * Use the L2 cache size to distinguish 2017 */ 2018 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 2019 "AMD Athlon(tm)" : "AMD Duron(tm)"); 2020 default: 2021 return ("AMD (family 6)"); 2022 } 2023 default: 2024 break; 2025 } 2026 2027 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 2028 cpi->cpi_brandid != 0) { 2029 switch (BITX(cpi->cpi_brandid, 7, 5)) { 2030 case 3: 2031 return ("AMD Opteron(tm) UP 1xx"); 2032 case 4: 2033 return ("AMD Opteron(tm) DP 2xx"); 2034 case 5: 2035 return ("AMD Opteron(tm) MP 8xx"); 2036 default: 2037 return ("AMD Opteron(tm)"); 2038 } 2039 } 2040 2041 return (NULL); 2042 } 2043 2044 static const char * 2045 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 2046 { 2047 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 2048 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 2049 type == X86_TYPE_CYRIX_486) 2050 return ("i486 compatible"); 2051 2052 switch (type) { 2053 case X86_TYPE_CYRIX_6x86: 2054 return ("Cyrix 6x86"); 2055 case X86_TYPE_CYRIX_6x86L: 2056 return ("Cyrix 6x86L"); 2057 case X86_TYPE_CYRIX_6x86MX: 2058 return ("Cyrix 6x86MX"); 2059 case X86_TYPE_CYRIX_GXm: 2060 return ("Cyrix GXm"); 2061 case X86_TYPE_CYRIX_MediaGX: 2062 return ("Cyrix MediaGX"); 2063 case X86_TYPE_CYRIX_MII: 2064 return ("Cyrix M2"); 2065 case X86_TYPE_VIA_CYRIX_III: 2066 return ("VIA Cyrix M3"); 2067 default: 2068 /* 2069 * Have another wild guess .. 2070 */ 2071 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 2072 return ("Cyrix 5x86"); 2073 else if (cpi->cpi_family == 5) { 2074 switch (cpi->cpi_model) { 2075 case 2: 2076 return ("Cyrix 6x86"); /* Cyrix M1 */ 2077 case 4: 2078 return ("Cyrix MediaGX"); 2079 default: 2080 break; 2081 } 2082 } else if (cpi->cpi_family == 6) { 2083 switch (cpi->cpi_model) { 2084 case 0: 2085 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 2086 case 5: 2087 case 6: 2088 case 7: 2089 case 8: 2090 case 9: 2091 return ("VIA C3"); 2092 default: 2093 break; 2094 } 2095 } 2096 break; 2097 } 2098 return (NULL); 2099 } 2100 2101 /* 2102 * This only gets called in the case that the CPU extended 2103 * feature brand string (0x80000002, 0x80000003, 0x80000004) 2104 * aren't available, or contain null bytes for some reason. 2105 */ 2106 static void 2107 fabricate_brandstr(struct cpuid_info *cpi) 2108 { 2109 const char *brand = NULL; 2110 2111 switch (cpi->cpi_vendor) { 2112 case X86_VENDOR_Intel: 2113 brand = intel_cpubrand(cpi); 2114 break; 2115 case X86_VENDOR_AMD: 2116 brand = amd_cpubrand(cpi); 2117 break; 2118 case X86_VENDOR_Cyrix: 2119 brand = cyrix_cpubrand(cpi, x86_type); 2120 break; 2121 case X86_VENDOR_NexGen: 2122 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2123 brand = "NexGen Nx586"; 2124 break; 2125 case X86_VENDOR_Centaur: 2126 if (cpi->cpi_family == 5) 2127 switch (cpi->cpi_model) { 2128 case 4: 2129 brand = "Centaur C6"; 2130 break; 2131 case 8: 2132 brand = "Centaur C2"; 2133 break; 2134 case 9: 2135 brand = "Centaur C3"; 2136 break; 2137 default: 2138 break; 2139 } 2140 break; 2141 case X86_VENDOR_Rise: 2142 if (cpi->cpi_family == 5 && 2143 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 2144 brand = "Rise mP6"; 2145 break; 2146 case X86_VENDOR_SiS: 2147 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 2148 brand = "SiS 55x"; 2149 break; 2150 case X86_VENDOR_TM: 2151 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 2152 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 2153 break; 2154 case X86_VENDOR_NSC: 2155 case X86_VENDOR_UMC: 2156 default: 2157 break; 2158 } 2159 if (brand) { 2160 (void) strcpy((char *)cpi->cpi_brandstr, brand); 2161 return; 2162 } 2163 2164 /* 2165 * If all else fails ... 2166 */ 2167 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 2168 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 2169 cpi->cpi_model, cpi->cpi_step); 2170 } 2171 2172 /* 2173 * This routine is called just after kernel memory allocation 2174 * becomes available on cpu0, and as part of mp_startup() on 2175 * the other cpus. 2176 * 2177 * Fixup the brand string, and collect any information from cpuid 2178 * that requires dynamicically allocated storage to represent. 2179 */ 2180 /*ARGSUSED*/ 2181 void 2182 cpuid_pass3(cpu_t *cpu) 2183 { 2184 int i, max, shft, level, size; 2185 struct cpuid_regs regs; 2186 struct cpuid_regs *cp; 2187 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2188 2189 ASSERT(cpi->cpi_pass == 2); 2190 2191 /* 2192 * Function 4: Deterministic cache parameters 2193 * 2194 * Take this opportunity to detect the number of threads 2195 * sharing the last level cache, and construct a corresponding 2196 * cache id. The respective cpuid_info members are initialized 2197 * to the default case of "no last level cache sharing". 2198 */ 2199 cpi->cpi_ncpu_shr_last_cache = 1; 2200 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 2201 2202 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 2203 2204 /* 2205 * Find the # of elements (size) returned by fn 4, and along 2206 * the way detect last level cache sharing details. 2207 */ 2208 bzero(®s, sizeof (regs)); 2209 cp = ®s; 2210 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 2211 cp->cp_eax = 4; 2212 cp->cp_ecx = i; 2213 2214 (void) __cpuid_insn(cp); 2215 2216 if (CPI_CACHE_TYPE(cp) == 0) 2217 break; 2218 level = CPI_CACHE_LVL(cp); 2219 if (level > max) { 2220 max = level; 2221 cpi->cpi_ncpu_shr_last_cache = 2222 CPI_NTHR_SHR_CACHE(cp) + 1; 2223 } 2224 } 2225 cpi->cpi_std_4_size = size = i; 2226 2227 /* 2228 * Allocate the cpi_std_4 array. The first element 2229 * references the regs for fn 4, %ecx == 0, which 2230 * cpuid_pass2() stashed in cpi->cpi_std[4]. 2231 */ 2232 if (size > 0) { 2233 cpi->cpi_std_4 = 2234 kmem_alloc(size * sizeof (cp), KM_SLEEP); 2235 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 2236 2237 /* 2238 * Allocate storage to hold the additional regs 2239 * for function 4, %ecx == 1 .. cpi_std_4_size. 2240 * 2241 * The regs for fn 4, %ecx == 0 has already 2242 * been allocated as indicated above. 2243 */ 2244 for (i = 1; i < size; i++) { 2245 cp = cpi->cpi_std_4[i] = 2246 kmem_zalloc(sizeof (regs), KM_SLEEP); 2247 cp->cp_eax = 4; 2248 cp->cp_ecx = i; 2249 2250 (void) __cpuid_insn(cp); 2251 } 2252 } 2253 /* 2254 * Determine the number of bits needed to represent 2255 * the number of CPUs sharing the last level cache. 2256 * 2257 * Shift off that number of bits from the APIC id to 2258 * derive the cache id. 2259 */ 2260 shft = 0; 2261 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 2262 shft++; 2263 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 2264 } 2265 2266 /* 2267 * Now fixup the brand string 2268 */ 2269 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2270 fabricate_brandstr(cpi); 2271 } else { 2272 2273 /* 2274 * If we successfully extracted a brand string from the cpuid 2275 * instruction, clean it up by removing leading spaces and 2276 * similar junk. 2277 */ 2278 if (cpi->cpi_brandstr[0]) { 2279 size_t maxlen = sizeof (cpi->cpi_brandstr); 2280 char *src, *dst; 2281 2282 dst = src = (char *)cpi->cpi_brandstr; 2283 src[maxlen - 1] = '\0'; 2284 /* 2285 * strip leading spaces 2286 */ 2287 while (*src == ' ') 2288 src++; 2289 /* 2290 * Remove any 'Genuine' or "Authentic" prefixes 2291 */ 2292 if (strncmp(src, "Genuine ", 8) == 0) 2293 src += 8; 2294 if (strncmp(src, "Authentic ", 10) == 0) 2295 src += 10; 2296 2297 /* 2298 * Now do an in-place copy. 2299 * Map (R) to (r) and (TM) to (tm). 2300 * The era of teletypes is long gone, and there's 2301 * -really- no need to shout. 2302 */ 2303 while (*src != '\0') { 2304 if (src[0] == '(') { 2305 if (strncmp(src + 1, "R)", 2) == 0) { 2306 (void) strncpy(dst, "(r)", 3); 2307 src += 3; 2308 dst += 3; 2309 continue; 2310 } 2311 if (strncmp(src + 1, "TM)", 3) == 0) { 2312 (void) strncpy(dst, "(tm)", 4); 2313 src += 4; 2314 dst += 4; 2315 continue; 2316 } 2317 } 2318 *dst++ = *src++; 2319 } 2320 *dst = '\0'; 2321 2322 /* 2323 * Finally, remove any trailing spaces 2324 */ 2325 while (--dst > cpi->cpi_brandstr) 2326 if (*dst == ' ') 2327 *dst = '\0'; 2328 else 2329 break; 2330 } else 2331 fabricate_brandstr(cpi); 2332 } 2333 cpi->cpi_pass = 3; 2334 } 2335 2336 /* 2337 * This routine is called out of bind_hwcap() much later in the life 2338 * of the kernel (post_startup()). The job of this routine is to resolve 2339 * the hardware feature support and kernel support for those features into 2340 * what we're actually going to tell applications via the aux vector. 2341 */ 2342 uint_t 2343 cpuid_pass4(cpu_t *cpu) 2344 { 2345 struct cpuid_info *cpi; 2346 uint_t hwcap_flags = 0; 2347 2348 if (cpu == NULL) 2349 cpu = CPU; 2350 cpi = cpu->cpu_m.mcpu_cpi; 2351 2352 ASSERT(cpi->cpi_pass == 3); 2353 2354 if (cpi->cpi_maxeax >= 1) { 2355 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2356 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2357 2358 *edx = CPI_FEATURES_EDX(cpi); 2359 *ecx = CPI_FEATURES_ECX(cpi); 2360 2361 /* 2362 * [these require explicit kernel support] 2363 */ 2364 if (!is_x86_feature(x86_featureset, X86FSET_SEP)) 2365 *edx &= ~CPUID_INTC_EDX_SEP; 2366 2367 if (!is_x86_feature(x86_featureset, X86FSET_SSE)) 2368 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2369 if (!is_x86_feature(x86_featureset, X86FSET_SSE2)) 2370 *edx &= ~CPUID_INTC_EDX_SSE2; 2371 2372 if (!is_x86_feature(x86_featureset, X86FSET_HTT)) 2373 *edx &= ~CPUID_INTC_EDX_HTT; 2374 2375 if (!is_x86_feature(x86_featureset, X86FSET_SSE3)) 2376 *ecx &= ~CPUID_INTC_ECX_SSE3; 2377 2378 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2379 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3)) 2380 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2381 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1)) 2382 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2383 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2)) 2384 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2385 if (!is_x86_feature(x86_featureset, X86FSET_AES)) 2386 *ecx &= ~CPUID_INTC_ECX_AES; 2387 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ)) 2388 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ; 2389 } 2390 2391 /* 2392 * [no explicit support required beyond x87 fp context] 2393 */ 2394 if (!fpu_exists) 2395 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2396 2397 /* 2398 * Now map the supported feature vector to things that we 2399 * think userland will care about. 2400 */ 2401 if (*edx & CPUID_INTC_EDX_SEP) 2402 hwcap_flags |= AV_386_SEP; 2403 if (*edx & CPUID_INTC_EDX_SSE) 2404 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2405 if (*edx & CPUID_INTC_EDX_SSE2) 2406 hwcap_flags |= AV_386_SSE2; 2407 if (*ecx & CPUID_INTC_ECX_SSE3) 2408 hwcap_flags |= AV_386_SSE3; 2409 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2410 if (*ecx & CPUID_INTC_ECX_SSSE3) 2411 hwcap_flags |= AV_386_SSSE3; 2412 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2413 hwcap_flags |= AV_386_SSE4_1; 2414 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2415 hwcap_flags |= AV_386_SSE4_2; 2416 if (*ecx & CPUID_INTC_ECX_MOVBE) 2417 hwcap_flags |= AV_386_MOVBE; 2418 if (*ecx & CPUID_INTC_ECX_AES) 2419 hwcap_flags |= AV_386_AES; 2420 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2421 hwcap_flags |= AV_386_PCLMULQDQ; 2422 } 2423 if (*ecx & CPUID_INTC_ECX_POPCNT) 2424 hwcap_flags |= AV_386_POPCNT; 2425 if (*edx & CPUID_INTC_EDX_FPU) 2426 hwcap_flags |= AV_386_FPU; 2427 if (*edx & CPUID_INTC_EDX_MMX) 2428 hwcap_flags |= AV_386_MMX; 2429 2430 if (*edx & CPUID_INTC_EDX_TSC) 2431 hwcap_flags |= AV_386_TSC; 2432 if (*edx & CPUID_INTC_EDX_CX8) 2433 hwcap_flags |= AV_386_CX8; 2434 if (*edx & CPUID_INTC_EDX_CMOV) 2435 hwcap_flags |= AV_386_CMOV; 2436 if (*ecx & CPUID_INTC_ECX_CX16) 2437 hwcap_flags |= AV_386_CX16; 2438 } 2439 2440 if (cpi->cpi_xmaxeax < 0x80000001) 2441 goto pass4_done; 2442 2443 switch (cpi->cpi_vendor) { 2444 struct cpuid_regs cp; 2445 uint32_t *edx, *ecx; 2446 2447 case X86_VENDOR_Intel: 2448 /* 2449 * Seems like Intel duplicated what we necessary 2450 * here to make the initial crop of 64-bit OS's work. 2451 * Hopefully, those are the only "extended" bits 2452 * they'll add. 2453 */ 2454 /*FALLTHROUGH*/ 2455 2456 case X86_VENDOR_AMD: 2457 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2458 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2459 2460 *edx = CPI_FEATURES_XTD_EDX(cpi); 2461 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2462 2463 /* 2464 * [these features require explicit kernel support] 2465 */ 2466 switch (cpi->cpi_vendor) { 2467 case X86_VENDOR_Intel: 2468 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 2469 *edx &= ~CPUID_AMD_EDX_TSCP; 2470 break; 2471 2472 case X86_VENDOR_AMD: 2473 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 2474 *edx &= ~CPUID_AMD_EDX_TSCP; 2475 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A)) 2476 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2477 break; 2478 2479 default: 2480 break; 2481 } 2482 2483 /* 2484 * [no explicit support required beyond 2485 * x87 fp context and exception handlers] 2486 */ 2487 if (!fpu_exists) 2488 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2489 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2490 2491 if (!is_x86_feature(x86_featureset, X86FSET_NX)) 2492 *edx &= ~CPUID_AMD_EDX_NX; 2493 #if !defined(__amd64) 2494 *edx &= ~CPUID_AMD_EDX_LM; 2495 #endif 2496 /* 2497 * Now map the supported feature vector to 2498 * things that we think userland will care about. 2499 */ 2500 #if defined(__amd64) 2501 if (*edx & CPUID_AMD_EDX_SYSC) 2502 hwcap_flags |= AV_386_AMD_SYSC; 2503 #endif 2504 if (*edx & CPUID_AMD_EDX_MMXamd) 2505 hwcap_flags |= AV_386_AMD_MMX; 2506 if (*edx & CPUID_AMD_EDX_3DNow) 2507 hwcap_flags |= AV_386_AMD_3DNow; 2508 if (*edx & CPUID_AMD_EDX_3DNowx) 2509 hwcap_flags |= AV_386_AMD_3DNowx; 2510 2511 switch (cpi->cpi_vendor) { 2512 case X86_VENDOR_AMD: 2513 if (*edx & CPUID_AMD_EDX_TSCP) 2514 hwcap_flags |= AV_386_TSCP; 2515 if (*ecx & CPUID_AMD_ECX_AHF64) 2516 hwcap_flags |= AV_386_AHF; 2517 if (*ecx & CPUID_AMD_ECX_SSE4A) 2518 hwcap_flags |= AV_386_AMD_SSE4A; 2519 if (*ecx & CPUID_AMD_ECX_LZCNT) 2520 hwcap_flags |= AV_386_AMD_LZCNT; 2521 break; 2522 2523 case X86_VENDOR_Intel: 2524 if (*edx & CPUID_AMD_EDX_TSCP) 2525 hwcap_flags |= AV_386_TSCP; 2526 /* 2527 * Aarrgh. 2528 * Intel uses a different bit in the same word. 2529 */ 2530 if (*ecx & CPUID_INTC_ECX_AHF64) 2531 hwcap_flags |= AV_386_AHF; 2532 break; 2533 2534 default: 2535 break; 2536 } 2537 break; 2538 2539 case X86_VENDOR_TM: 2540 cp.cp_eax = 0x80860001; 2541 (void) __cpuid_insn(&cp); 2542 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2543 break; 2544 2545 default: 2546 break; 2547 } 2548 2549 pass4_done: 2550 cpi->cpi_pass = 4; 2551 return (hwcap_flags); 2552 } 2553 2554 2555 /* 2556 * Simulate the cpuid instruction using the data we previously 2557 * captured about this CPU. We try our best to return the truth 2558 * about the hardware, independently of kernel support. 2559 */ 2560 uint32_t 2561 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2562 { 2563 struct cpuid_info *cpi; 2564 struct cpuid_regs *xcp; 2565 2566 if (cpu == NULL) 2567 cpu = CPU; 2568 cpi = cpu->cpu_m.mcpu_cpi; 2569 2570 ASSERT(cpuid_checkpass(cpu, 3)); 2571 2572 /* 2573 * CPUID data is cached in two separate places: cpi_std for standard 2574 * CPUID functions, and cpi_extd for extended CPUID functions. 2575 */ 2576 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2577 xcp = &cpi->cpi_std[cp->cp_eax]; 2578 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2579 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2580 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2581 else 2582 /* 2583 * The caller is asking for data from an input parameter which 2584 * the kernel has not cached. In this case we go fetch from 2585 * the hardware and return the data directly to the user. 2586 */ 2587 return (__cpuid_insn(cp)); 2588 2589 cp->cp_eax = xcp->cp_eax; 2590 cp->cp_ebx = xcp->cp_ebx; 2591 cp->cp_ecx = xcp->cp_ecx; 2592 cp->cp_edx = xcp->cp_edx; 2593 return (cp->cp_eax); 2594 } 2595 2596 int 2597 cpuid_checkpass(cpu_t *cpu, int pass) 2598 { 2599 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2600 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2601 } 2602 2603 int 2604 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2605 { 2606 ASSERT(cpuid_checkpass(cpu, 3)); 2607 2608 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2609 } 2610 2611 int 2612 cpuid_is_cmt(cpu_t *cpu) 2613 { 2614 if (cpu == NULL) 2615 cpu = CPU; 2616 2617 ASSERT(cpuid_checkpass(cpu, 1)); 2618 2619 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2620 } 2621 2622 /* 2623 * AMD and Intel both implement the 64-bit variant of the syscall 2624 * instruction (syscallq), so if there's -any- support for syscall, 2625 * cpuid currently says "yes, we support this". 2626 * 2627 * However, Intel decided to -not- implement the 32-bit variant of the 2628 * syscall instruction, so we provide a predicate to allow our caller 2629 * to test that subtlety here. 2630 * 2631 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2632 * even in the case where the hardware would in fact support it. 2633 */ 2634 /*ARGSUSED*/ 2635 int 2636 cpuid_syscall32_insn(cpu_t *cpu) 2637 { 2638 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2639 2640 #if !defined(__xpv) 2641 if (cpu == NULL) 2642 cpu = CPU; 2643 2644 /*CSTYLED*/ 2645 { 2646 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2647 2648 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2649 cpi->cpi_xmaxeax >= 0x80000001 && 2650 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2651 return (1); 2652 } 2653 #endif 2654 return (0); 2655 } 2656 2657 int 2658 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2659 { 2660 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2661 2662 static const char fmt[] = 2663 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2664 static const char fmt_ht[] = 2665 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2666 2667 ASSERT(cpuid_checkpass(cpu, 1)); 2668 2669 if (cpuid_is_cmt(cpu)) 2670 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2671 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2672 cpi->cpi_family, cpi->cpi_model, 2673 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2674 return (snprintf(s, n, fmt, 2675 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2676 cpi->cpi_family, cpi->cpi_model, 2677 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2678 } 2679 2680 const char * 2681 cpuid_getvendorstr(cpu_t *cpu) 2682 { 2683 ASSERT(cpuid_checkpass(cpu, 1)); 2684 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2685 } 2686 2687 uint_t 2688 cpuid_getvendor(cpu_t *cpu) 2689 { 2690 ASSERT(cpuid_checkpass(cpu, 1)); 2691 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2692 } 2693 2694 uint_t 2695 cpuid_getfamily(cpu_t *cpu) 2696 { 2697 ASSERT(cpuid_checkpass(cpu, 1)); 2698 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2699 } 2700 2701 uint_t 2702 cpuid_getmodel(cpu_t *cpu) 2703 { 2704 ASSERT(cpuid_checkpass(cpu, 1)); 2705 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2706 } 2707 2708 uint_t 2709 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2710 { 2711 ASSERT(cpuid_checkpass(cpu, 1)); 2712 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2713 } 2714 2715 uint_t 2716 cpuid_get_ncore_per_chip(cpu_t *cpu) 2717 { 2718 ASSERT(cpuid_checkpass(cpu, 1)); 2719 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2720 } 2721 2722 uint_t 2723 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2724 { 2725 ASSERT(cpuid_checkpass(cpu, 2)); 2726 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2727 } 2728 2729 id_t 2730 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2731 { 2732 ASSERT(cpuid_checkpass(cpu, 2)); 2733 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2734 } 2735 2736 uint_t 2737 cpuid_getstep(cpu_t *cpu) 2738 { 2739 ASSERT(cpuid_checkpass(cpu, 1)); 2740 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2741 } 2742 2743 uint_t 2744 cpuid_getsig(struct cpu *cpu) 2745 { 2746 ASSERT(cpuid_checkpass(cpu, 1)); 2747 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2748 } 2749 2750 uint32_t 2751 cpuid_getchiprev(struct cpu *cpu) 2752 { 2753 ASSERT(cpuid_checkpass(cpu, 1)); 2754 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2755 } 2756 2757 const char * 2758 cpuid_getchiprevstr(struct cpu *cpu) 2759 { 2760 ASSERT(cpuid_checkpass(cpu, 1)); 2761 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2762 } 2763 2764 uint32_t 2765 cpuid_getsockettype(struct cpu *cpu) 2766 { 2767 ASSERT(cpuid_checkpass(cpu, 1)); 2768 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2769 } 2770 2771 const char * 2772 cpuid_getsocketstr(cpu_t *cpu) 2773 { 2774 static const char *socketstr = NULL; 2775 struct cpuid_info *cpi; 2776 2777 ASSERT(cpuid_checkpass(cpu, 1)); 2778 cpi = cpu->cpu_m.mcpu_cpi; 2779 2780 /* Assume that socket types are the same across the system */ 2781 if (socketstr == NULL) 2782 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 2783 cpi->cpi_model, cpi->cpi_step); 2784 2785 2786 return (socketstr); 2787 } 2788 2789 int 2790 cpuid_get_chipid(cpu_t *cpu) 2791 { 2792 ASSERT(cpuid_checkpass(cpu, 1)); 2793 2794 if (cpuid_is_cmt(cpu)) 2795 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2796 return (cpu->cpu_id); 2797 } 2798 2799 id_t 2800 cpuid_get_coreid(cpu_t *cpu) 2801 { 2802 ASSERT(cpuid_checkpass(cpu, 1)); 2803 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2804 } 2805 2806 int 2807 cpuid_get_pkgcoreid(cpu_t *cpu) 2808 { 2809 ASSERT(cpuid_checkpass(cpu, 1)); 2810 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2811 } 2812 2813 int 2814 cpuid_get_clogid(cpu_t *cpu) 2815 { 2816 ASSERT(cpuid_checkpass(cpu, 1)); 2817 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2818 } 2819 2820 int 2821 cpuid_get_cacheid(cpu_t *cpu) 2822 { 2823 ASSERT(cpuid_checkpass(cpu, 1)); 2824 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2825 } 2826 2827 uint_t 2828 cpuid_get_procnodeid(cpu_t *cpu) 2829 { 2830 ASSERT(cpuid_checkpass(cpu, 1)); 2831 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid); 2832 } 2833 2834 uint_t 2835 cpuid_get_procnodes_per_pkg(cpu_t *cpu) 2836 { 2837 ASSERT(cpuid_checkpass(cpu, 1)); 2838 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); 2839 } 2840 2841 /*ARGSUSED*/ 2842 int 2843 cpuid_have_cr8access(cpu_t *cpu) 2844 { 2845 #if defined(__amd64) 2846 return (1); 2847 #else 2848 struct cpuid_info *cpi; 2849 2850 ASSERT(cpu != NULL); 2851 cpi = cpu->cpu_m.mcpu_cpi; 2852 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 2853 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 2854 return (1); 2855 return (0); 2856 #endif 2857 } 2858 2859 uint32_t 2860 cpuid_get_apicid(cpu_t *cpu) 2861 { 2862 ASSERT(cpuid_checkpass(cpu, 1)); 2863 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 2864 return (UINT32_MAX); 2865 } else { 2866 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 2867 } 2868 } 2869 2870 void 2871 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2872 { 2873 struct cpuid_info *cpi; 2874 2875 if (cpu == NULL) 2876 cpu = CPU; 2877 cpi = cpu->cpu_m.mcpu_cpi; 2878 2879 ASSERT(cpuid_checkpass(cpu, 1)); 2880 2881 if (pabits) 2882 *pabits = cpi->cpi_pabits; 2883 if (vabits) 2884 *vabits = cpi->cpi_vabits; 2885 } 2886 2887 /* 2888 * Returns the number of data TLB entries for a corresponding 2889 * pagesize. If it can't be computed, or isn't known, the 2890 * routine returns zero. If you ask about an architecturally 2891 * impossible pagesize, the routine will panic (so that the 2892 * hat implementor knows that things are inconsistent.) 2893 */ 2894 uint_t 2895 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2896 { 2897 struct cpuid_info *cpi; 2898 uint_t dtlb_nent = 0; 2899 2900 if (cpu == NULL) 2901 cpu = CPU; 2902 cpi = cpu->cpu_m.mcpu_cpi; 2903 2904 ASSERT(cpuid_checkpass(cpu, 1)); 2905 2906 /* 2907 * Check the L2 TLB info 2908 */ 2909 if (cpi->cpi_xmaxeax >= 0x80000006) { 2910 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2911 2912 switch (pagesize) { 2913 2914 case 4 * 1024: 2915 /* 2916 * All zero in the top 16 bits of the register 2917 * indicates a unified TLB. Size is in low 16 bits. 2918 */ 2919 if ((cp->cp_ebx & 0xffff0000) == 0) 2920 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2921 else 2922 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2923 break; 2924 2925 case 2 * 1024 * 1024: 2926 if ((cp->cp_eax & 0xffff0000) == 0) 2927 dtlb_nent = cp->cp_eax & 0x0000ffff; 2928 else 2929 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2930 break; 2931 2932 default: 2933 panic("unknown L2 pagesize"); 2934 /*NOTREACHED*/ 2935 } 2936 } 2937 2938 if (dtlb_nent != 0) 2939 return (dtlb_nent); 2940 2941 /* 2942 * No L2 TLB support for this size, try L1. 2943 */ 2944 if (cpi->cpi_xmaxeax >= 0x80000005) { 2945 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2946 2947 switch (pagesize) { 2948 case 4 * 1024: 2949 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2950 break; 2951 case 2 * 1024 * 1024: 2952 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2953 break; 2954 default: 2955 panic("unknown L1 d-TLB pagesize"); 2956 /*NOTREACHED*/ 2957 } 2958 } 2959 2960 return (dtlb_nent); 2961 } 2962 2963 /* 2964 * Return 0 if the erratum is not present or not applicable, positive 2965 * if it is, and negative if the status of the erratum is unknown. 2966 * 2967 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2968 * Processors" #25759, Rev 3.57, August 2005 2969 */ 2970 int 2971 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2972 { 2973 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2974 uint_t eax; 2975 2976 /* 2977 * Bail out if this CPU isn't an AMD CPU, or if it's 2978 * a legacy (32-bit) AMD CPU. 2979 */ 2980 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2981 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2982 cpi->cpi_family == 6) 2983 2984 return (0); 2985 2986 eax = cpi->cpi_std[1].cp_eax; 2987 2988 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2989 #define SH_B3(eax) (eax == 0xf51) 2990 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2991 2992 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2993 2994 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2995 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2996 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2997 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2998 2999 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 3000 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 3001 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 3002 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 3003 3004 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 3005 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 3006 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 3007 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 3008 #define BH_E4(eax) (eax == 0x20fb1) 3009 #define SH_E5(eax) (eax == 0x20f42) 3010 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 3011 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 3012 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 3013 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 3014 DH_E6(eax) || JH_E6(eax)) 3015 3016 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 3017 #define DR_B0(eax) (eax == 0x100f20) 3018 #define DR_B1(eax) (eax == 0x100f21) 3019 #define DR_BA(eax) (eax == 0x100f2a) 3020 #define DR_B2(eax) (eax == 0x100f22) 3021 #define DR_B3(eax) (eax == 0x100f23) 3022 #define RB_C0(eax) (eax == 0x100f40) 3023 3024 switch (erratum) { 3025 case 1: 3026 return (cpi->cpi_family < 0x10); 3027 case 51: /* what does the asterisk mean? */ 3028 return (B(eax) || SH_C0(eax) || CG(eax)); 3029 case 52: 3030 return (B(eax)); 3031 case 57: 3032 return (cpi->cpi_family <= 0x11); 3033 case 58: 3034 return (B(eax)); 3035 case 60: 3036 return (cpi->cpi_family <= 0x11); 3037 case 61: 3038 case 62: 3039 case 63: 3040 case 64: 3041 case 65: 3042 case 66: 3043 case 68: 3044 case 69: 3045 case 70: 3046 case 71: 3047 return (B(eax)); 3048 case 72: 3049 return (SH_B0(eax)); 3050 case 74: 3051 return (B(eax)); 3052 case 75: 3053 return (cpi->cpi_family < 0x10); 3054 case 76: 3055 return (B(eax)); 3056 case 77: 3057 return (cpi->cpi_family <= 0x11); 3058 case 78: 3059 return (B(eax) || SH_C0(eax)); 3060 case 79: 3061 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3062 case 80: 3063 case 81: 3064 case 82: 3065 return (B(eax)); 3066 case 83: 3067 return (B(eax) || SH_C0(eax) || CG(eax)); 3068 case 85: 3069 return (cpi->cpi_family < 0x10); 3070 case 86: 3071 return (SH_C0(eax) || CG(eax)); 3072 case 88: 3073 #if !defined(__amd64) 3074 return (0); 3075 #else 3076 return (B(eax) || SH_C0(eax)); 3077 #endif 3078 case 89: 3079 return (cpi->cpi_family < 0x10); 3080 case 90: 3081 return (B(eax) || SH_C0(eax) || CG(eax)); 3082 case 91: 3083 case 92: 3084 return (B(eax) || SH_C0(eax)); 3085 case 93: 3086 return (SH_C0(eax)); 3087 case 94: 3088 return (B(eax) || SH_C0(eax) || CG(eax)); 3089 case 95: 3090 #if !defined(__amd64) 3091 return (0); 3092 #else 3093 return (B(eax) || SH_C0(eax)); 3094 #endif 3095 case 96: 3096 return (B(eax) || SH_C0(eax) || CG(eax)); 3097 case 97: 3098 case 98: 3099 return (SH_C0(eax) || CG(eax)); 3100 case 99: 3101 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3102 case 100: 3103 return (B(eax) || SH_C0(eax)); 3104 case 101: 3105 case 103: 3106 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3107 case 104: 3108 return (SH_C0(eax) || CG(eax) || D0(eax)); 3109 case 105: 3110 case 106: 3111 case 107: 3112 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3113 case 108: 3114 return (DH_CG(eax)); 3115 case 109: 3116 return (SH_C0(eax) || CG(eax) || D0(eax)); 3117 case 110: 3118 return (D0(eax) || EX(eax)); 3119 case 111: 3120 return (CG(eax)); 3121 case 112: 3122 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3123 case 113: 3124 return (eax == 0x20fc0); 3125 case 114: 3126 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 3127 case 115: 3128 return (SH_E0(eax) || JH_E1(eax)); 3129 case 116: 3130 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 3131 case 117: 3132 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 3133 case 118: 3134 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 3135 JH_E6(eax)); 3136 case 121: 3137 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 3138 case 122: 3139 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 3140 case 123: 3141 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 3142 case 131: 3143 return (cpi->cpi_family < 0x10); 3144 case 6336786: 3145 /* 3146 * Test for AdvPowerMgmtInfo.TscPStateInvariant 3147 * if this is a K8 family or newer processor 3148 */ 3149 if (CPI_FAMILY(cpi) == 0xf) { 3150 struct cpuid_regs regs; 3151 regs.cp_eax = 0x80000007; 3152 (void) __cpuid_insn(®s); 3153 return (!(regs.cp_edx & 0x100)); 3154 } 3155 return (0); 3156 case 6323525: 3157 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 3158 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 3159 3160 case 6671130: 3161 /* 3162 * check for processors (pre-Shanghai) that do not provide 3163 * optimal management of 1gb ptes in its tlb. 3164 */ 3165 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 3166 3167 case 298: 3168 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 3169 DR_B2(eax) || RB_C0(eax)); 3170 3171 default: 3172 return (-1); 3173 3174 } 3175 } 3176 3177 /* 3178 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 3179 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 3180 */ 3181 int 3182 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 3183 { 3184 struct cpuid_info *cpi; 3185 uint_t osvwid; 3186 static int osvwfeature = -1; 3187 uint64_t osvwlength; 3188 3189 3190 cpi = cpu->cpu_m.mcpu_cpi; 3191 3192 /* confirm OSVW supported */ 3193 if (osvwfeature == -1) { 3194 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 3195 } else { 3196 /* assert that osvw feature setting is consistent on all cpus */ 3197 ASSERT(osvwfeature == 3198 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 3199 } 3200 if (!osvwfeature) 3201 return (-1); 3202 3203 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 3204 3205 switch (erratum) { 3206 case 298: /* osvwid is 0 */ 3207 osvwid = 0; 3208 if (osvwlength <= (uint64_t)osvwid) { 3209 /* osvwid 0 is unknown */ 3210 return (-1); 3211 } 3212 3213 /* 3214 * Check the OSVW STATUS MSR to determine the state 3215 * of the erratum where: 3216 * 0 - fixed by HW 3217 * 1 - BIOS has applied the workaround when BIOS 3218 * workaround is available. (Or for other errata, 3219 * OS workaround is required.) 3220 * For a value of 1, caller will confirm that the 3221 * erratum 298 workaround has indeed been applied by BIOS. 3222 * 3223 * A 1 may be set in cpus that have a HW fix 3224 * in a mixed cpu system. Regarding erratum 298: 3225 * In a multiprocessor platform, the workaround above 3226 * should be applied to all processors regardless of 3227 * silicon revision when an affected processor is 3228 * present. 3229 */ 3230 3231 return (rdmsr(MSR_AMD_OSVW_STATUS + 3232 (osvwid / OSVW_ID_CNT_PER_MSR)) & 3233 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 3234 3235 default: 3236 return (-1); 3237 } 3238 } 3239 3240 static const char assoc_str[] = "associativity"; 3241 static const char line_str[] = "line-size"; 3242 static const char size_str[] = "size"; 3243 3244 static void 3245 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 3246 uint32_t val) 3247 { 3248 char buf[128]; 3249 3250 /* 3251 * ndi_prop_update_int() is used because it is desirable for 3252 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 3253 */ 3254 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 3255 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 3256 } 3257 3258 /* 3259 * Intel-style cache/tlb description 3260 * 3261 * Standard cpuid level 2 gives a randomly ordered 3262 * selection of tags that index into a table that describes 3263 * cache and tlb properties. 3264 */ 3265 3266 static const char l1_icache_str[] = "l1-icache"; 3267 static const char l1_dcache_str[] = "l1-dcache"; 3268 static const char l2_cache_str[] = "l2-cache"; 3269 static const char l3_cache_str[] = "l3-cache"; 3270 static const char itlb4k_str[] = "itlb-4K"; 3271 static const char dtlb4k_str[] = "dtlb-4K"; 3272 static const char itlb2M_str[] = "itlb-2M"; 3273 static const char itlb4M_str[] = "itlb-4M"; 3274 static const char dtlb4M_str[] = "dtlb-4M"; 3275 static const char dtlb24_str[] = "dtlb0-2M-4M"; 3276 static const char itlb424_str[] = "itlb-4K-2M-4M"; 3277 static const char itlb24_str[] = "itlb-2M-4M"; 3278 static const char dtlb44_str[] = "dtlb-4K-4M"; 3279 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 3280 static const char sl2_cache_str[] = "sectored-l2-cache"; 3281 static const char itrace_str[] = "itrace-cache"; 3282 static const char sl3_cache_str[] = "sectored-l3-cache"; 3283 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 3284 3285 static const struct cachetab { 3286 uint8_t ct_code; 3287 uint8_t ct_assoc; 3288 uint16_t ct_line_size; 3289 size_t ct_size; 3290 const char *ct_label; 3291 } intel_ctab[] = { 3292 /* 3293 * maintain descending order! 3294 * 3295 * Codes ignored - Reason 3296 * ---------------------- 3297 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3298 * f0H/f1H - Currently we do not interpret prefetch size by design 3299 */ 3300 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3301 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3302 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3303 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3304 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3305 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3306 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3307 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3308 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3309 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3310 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3311 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3312 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3313 { 0xc0, 4, 0, 8, dtlb44_str }, 3314 { 0xba, 4, 0, 64, dtlb4k_str }, 3315 { 0xb4, 4, 0, 256, dtlb4k_str }, 3316 { 0xb3, 4, 0, 128, dtlb4k_str }, 3317 { 0xb2, 4, 0, 64, itlb4k_str }, 3318 { 0xb0, 4, 0, 128, itlb4k_str }, 3319 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3320 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3321 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3322 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3323 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3324 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3325 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3326 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3327 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3328 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3329 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3330 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3331 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3332 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3333 { 0x73, 8, 0, 64*1024, itrace_str}, 3334 { 0x72, 8, 0, 32*1024, itrace_str}, 3335 { 0x71, 8, 0, 16*1024, itrace_str}, 3336 { 0x70, 8, 0, 12*1024, itrace_str}, 3337 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3338 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3339 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3340 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3341 { 0x5d, 0, 0, 256, dtlb44_str}, 3342 { 0x5c, 0, 0, 128, dtlb44_str}, 3343 { 0x5b, 0, 0, 64, dtlb44_str}, 3344 { 0x5a, 4, 0, 32, dtlb24_str}, 3345 { 0x59, 0, 0, 16, dtlb4k_str}, 3346 { 0x57, 4, 0, 16, dtlb4k_str}, 3347 { 0x56, 4, 0, 16, dtlb4M_str}, 3348 { 0x55, 0, 0, 7, itlb24_str}, 3349 { 0x52, 0, 0, 256, itlb424_str}, 3350 { 0x51, 0, 0, 128, itlb424_str}, 3351 { 0x50, 0, 0, 64, itlb424_str}, 3352 { 0x4f, 0, 0, 32, itlb4k_str}, 3353 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3354 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3355 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3356 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3357 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3358 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3359 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3360 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3361 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3362 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3363 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3364 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3365 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3366 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3367 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3368 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3369 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3370 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3371 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3372 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3373 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3374 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3375 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3376 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3377 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3378 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3379 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3380 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3381 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3382 { 0x0b, 4, 0, 4, itlb4M_str}, 3383 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3384 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3385 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3386 { 0x05, 4, 0, 32, dtlb4M_str}, 3387 { 0x04, 4, 0, 8, dtlb4M_str}, 3388 { 0x03, 4, 0, 64, dtlb4k_str}, 3389 { 0x02, 4, 0, 2, itlb4M_str}, 3390 { 0x01, 4, 0, 32, itlb4k_str}, 3391 { 0 } 3392 }; 3393 3394 static const struct cachetab cyrix_ctab[] = { 3395 { 0x70, 4, 0, 32, "tlb-4K" }, 3396 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3397 { 0 } 3398 }; 3399 3400 /* 3401 * Search a cache table for a matching entry 3402 */ 3403 static const struct cachetab * 3404 find_cacheent(const struct cachetab *ct, uint_t code) 3405 { 3406 if (code != 0) { 3407 for (; ct->ct_code != 0; ct++) 3408 if (ct->ct_code <= code) 3409 break; 3410 if (ct->ct_code == code) 3411 return (ct); 3412 } 3413 return (NULL); 3414 } 3415 3416 /* 3417 * Populate cachetab entry with L2 or L3 cache-information using 3418 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3419 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3420 * information is found. 3421 */ 3422 static int 3423 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3424 { 3425 uint32_t level, i; 3426 int ret = 0; 3427 3428 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3429 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3430 3431 if (level == 2 || level == 3) { 3432 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3433 ct->ct_line_size = 3434 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3435 ct->ct_size = ct->ct_assoc * 3436 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3437 ct->ct_line_size * 3438 (cpi->cpi_std_4[i]->cp_ecx + 1); 3439 3440 if (level == 2) { 3441 ct->ct_label = l2_cache_str; 3442 } else if (level == 3) { 3443 ct->ct_label = l3_cache_str; 3444 } 3445 ret = 1; 3446 } 3447 } 3448 3449 return (ret); 3450 } 3451 3452 /* 3453 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3454 * The walk is terminated if the walker returns non-zero. 3455 */ 3456 static void 3457 intel_walk_cacheinfo(struct cpuid_info *cpi, 3458 void *arg, int (*func)(void *, const struct cachetab *)) 3459 { 3460 const struct cachetab *ct; 3461 struct cachetab des_49_ct, des_b1_ct; 3462 uint8_t *dp; 3463 int i; 3464 3465 if ((dp = cpi->cpi_cacheinfo) == NULL) 3466 return; 3467 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3468 /* 3469 * For overloaded descriptor 0x49 we use cpuid function 4 3470 * if supported by the current processor, to create 3471 * cache information. 3472 * For overloaded descriptor 0xb1 we use X86_PAE flag 3473 * to disambiguate the cache information. 3474 */ 3475 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3476 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3477 ct = &des_49_ct; 3478 } else if (*dp == 0xb1) { 3479 des_b1_ct.ct_code = 0xb1; 3480 des_b1_ct.ct_assoc = 4; 3481 des_b1_ct.ct_line_size = 0; 3482 if (is_x86_feature(x86_featureset, X86FSET_PAE)) { 3483 des_b1_ct.ct_size = 8; 3484 des_b1_ct.ct_label = itlb2M_str; 3485 } else { 3486 des_b1_ct.ct_size = 4; 3487 des_b1_ct.ct_label = itlb4M_str; 3488 } 3489 ct = &des_b1_ct; 3490 } else { 3491 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3492 continue; 3493 } 3494 } 3495 3496 if (func(arg, ct) != 0) { 3497 break; 3498 } 3499 } 3500 } 3501 3502 /* 3503 * (Like the Intel one, except for Cyrix CPUs) 3504 */ 3505 static void 3506 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3507 void *arg, int (*func)(void *, const struct cachetab *)) 3508 { 3509 const struct cachetab *ct; 3510 uint8_t *dp; 3511 int i; 3512 3513 if ((dp = cpi->cpi_cacheinfo) == NULL) 3514 return; 3515 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3516 /* 3517 * Search Cyrix-specific descriptor table first .. 3518 */ 3519 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3520 if (func(arg, ct) != 0) 3521 break; 3522 continue; 3523 } 3524 /* 3525 * .. else fall back to the Intel one 3526 */ 3527 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3528 if (func(arg, ct) != 0) 3529 break; 3530 continue; 3531 } 3532 } 3533 } 3534 3535 /* 3536 * A cacheinfo walker that adds associativity, line-size, and size properties 3537 * to the devinfo node it is passed as an argument. 3538 */ 3539 static int 3540 add_cacheent_props(void *arg, const struct cachetab *ct) 3541 { 3542 dev_info_t *devi = arg; 3543 3544 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3545 if (ct->ct_line_size != 0) 3546 add_cache_prop(devi, ct->ct_label, line_str, 3547 ct->ct_line_size); 3548 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3549 return (0); 3550 } 3551 3552 3553 static const char fully_assoc[] = "fully-associative?"; 3554 3555 /* 3556 * AMD style cache/tlb description 3557 * 3558 * Extended functions 5 and 6 directly describe properties of 3559 * tlbs and various cache levels. 3560 */ 3561 static void 3562 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3563 { 3564 switch (assoc) { 3565 case 0: /* reserved; ignore */ 3566 break; 3567 default: 3568 add_cache_prop(devi, label, assoc_str, assoc); 3569 break; 3570 case 0xff: 3571 add_cache_prop(devi, label, fully_assoc, 1); 3572 break; 3573 } 3574 } 3575 3576 static void 3577 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3578 { 3579 if (size == 0) 3580 return; 3581 add_cache_prop(devi, label, size_str, size); 3582 add_amd_assoc(devi, label, assoc); 3583 } 3584 3585 static void 3586 add_amd_cache(dev_info_t *devi, const char *label, 3587 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3588 { 3589 if (size == 0 || line_size == 0) 3590 return; 3591 add_amd_assoc(devi, label, assoc); 3592 /* 3593 * Most AMD parts have a sectored cache. Multiple cache lines are 3594 * associated with each tag. A sector consists of all cache lines 3595 * associated with a tag. For example, the AMD K6-III has a sector 3596 * size of 2 cache lines per tag. 3597 */ 3598 if (lines_per_tag != 0) 3599 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3600 add_cache_prop(devi, label, line_str, line_size); 3601 add_cache_prop(devi, label, size_str, size * 1024); 3602 } 3603 3604 static void 3605 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3606 { 3607 switch (assoc) { 3608 case 0: /* off */ 3609 break; 3610 case 1: 3611 case 2: 3612 case 4: 3613 add_cache_prop(devi, label, assoc_str, assoc); 3614 break; 3615 case 6: 3616 add_cache_prop(devi, label, assoc_str, 8); 3617 break; 3618 case 8: 3619 add_cache_prop(devi, label, assoc_str, 16); 3620 break; 3621 case 0xf: 3622 add_cache_prop(devi, label, fully_assoc, 1); 3623 break; 3624 default: /* reserved; ignore */ 3625 break; 3626 } 3627 } 3628 3629 static void 3630 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3631 { 3632 if (size == 0 || assoc == 0) 3633 return; 3634 add_amd_l2_assoc(devi, label, assoc); 3635 add_cache_prop(devi, label, size_str, size); 3636 } 3637 3638 static void 3639 add_amd_l2_cache(dev_info_t *devi, const char *label, 3640 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3641 { 3642 if (size == 0 || assoc == 0 || line_size == 0) 3643 return; 3644 add_amd_l2_assoc(devi, label, assoc); 3645 if (lines_per_tag != 0) 3646 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3647 add_cache_prop(devi, label, line_str, line_size); 3648 add_cache_prop(devi, label, size_str, size * 1024); 3649 } 3650 3651 static void 3652 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3653 { 3654 struct cpuid_regs *cp; 3655 3656 if (cpi->cpi_xmaxeax < 0x80000005) 3657 return; 3658 cp = &cpi->cpi_extd[5]; 3659 3660 /* 3661 * 4M/2M L1 TLB configuration 3662 * 3663 * We report the size for 2M pages because AMD uses two 3664 * TLB entries for one 4M page. 3665 */ 3666 add_amd_tlb(devi, "dtlb-2M", 3667 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3668 add_amd_tlb(devi, "itlb-2M", 3669 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3670 3671 /* 3672 * 4K L1 TLB configuration 3673 */ 3674 3675 switch (cpi->cpi_vendor) { 3676 uint_t nentries; 3677 case X86_VENDOR_TM: 3678 if (cpi->cpi_family >= 5) { 3679 /* 3680 * Crusoe processors have 256 TLB entries, but 3681 * cpuid data format constrains them to only 3682 * reporting 255 of them. 3683 */ 3684 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3685 nentries = 256; 3686 /* 3687 * Crusoe processors also have a unified TLB 3688 */ 3689 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3690 nentries); 3691 break; 3692 } 3693 /*FALLTHROUGH*/ 3694 default: 3695 add_amd_tlb(devi, itlb4k_str, 3696 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3697 add_amd_tlb(devi, dtlb4k_str, 3698 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3699 break; 3700 } 3701 3702 /* 3703 * data L1 cache configuration 3704 */ 3705 3706 add_amd_cache(devi, l1_dcache_str, 3707 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3708 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3709 3710 /* 3711 * code L1 cache configuration 3712 */ 3713 3714 add_amd_cache(devi, l1_icache_str, 3715 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3716 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3717 3718 if (cpi->cpi_xmaxeax < 0x80000006) 3719 return; 3720 cp = &cpi->cpi_extd[6]; 3721 3722 /* Check for a unified L2 TLB for large pages */ 3723 3724 if (BITX(cp->cp_eax, 31, 16) == 0) 3725 add_amd_l2_tlb(devi, "l2-tlb-2M", 3726 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3727 else { 3728 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3729 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3730 add_amd_l2_tlb(devi, "l2-itlb-2M", 3731 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3732 } 3733 3734 /* Check for a unified L2 TLB for 4K pages */ 3735 3736 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3737 add_amd_l2_tlb(devi, "l2-tlb-4K", 3738 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3739 } else { 3740 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3741 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3742 add_amd_l2_tlb(devi, "l2-itlb-4K", 3743 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3744 } 3745 3746 add_amd_l2_cache(devi, l2_cache_str, 3747 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3748 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3749 } 3750 3751 /* 3752 * There are two basic ways that the x86 world describes it cache 3753 * and tlb architecture - Intel's way and AMD's way. 3754 * 3755 * Return which flavor of cache architecture we should use 3756 */ 3757 static int 3758 x86_which_cacheinfo(struct cpuid_info *cpi) 3759 { 3760 switch (cpi->cpi_vendor) { 3761 case X86_VENDOR_Intel: 3762 if (cpi->cpi_maxeax >= 2) 3763 return (X86_VENDOR_Intel); 3764 break; 3765 case X86_VENDOR_AMD: 3766 /* 3767 * The K5 model 1 was the first part from AMD that reported 3768 * cache sizes via extended cpuid functions. 3769 */ 3770 if (cpi->cpi_family > 5 || 3771 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3772 return (X86_VENDOR_AMD); 3773 break; 3774 case X86_VENDOR_TM: 3775 if (cpi->cpi_family >= 5) 3776 return (X86_VENDOR_AMD); 3777 /*FALLTHROUGH*/ 3778 default: 3779 /* 3780 * If they have extended CPU data for 0x80000005 3781 * then we assume they have AMD-format cache 3782 * information. 3783 * 3784 * If not, and the vendor happens to be Cyrix, 3785 * then try our-Cyrix specific handler. 3786 * 3787 * If we're not Cyrix, then assume we're using Intel's 3788 * table-driven format instead. 3789 */ 3790 if (cpi->cpi_xmaxeax >= 0x80000005) 3791 return (X86_VENDOR_AMD); 3792 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3793 return (X86_VENDOR_Cyrix); 3794 else if (cpi->cpi_maxeax >= 2) 3795 return (X86_VENDOR_Intel); 3796 break; 3797 } 3798 return (-1); 3799 } 3800 3801 void 3802 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 3803 struct cpuid_info *cpi) 3804 { 3805 dev_info_t *cpu_devi; 3806 int create; 3807 3808 cpu_devi = (dev_info_t *)dip; 3809 3810 /* device_type */ 3811 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3812 "device_type", "cpu"); 3813 3814 /* reg */ 3815 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3816 "reg", cpu_id); 3817 3818 /* cpu-mhz, and clock-frequency */ 3819 if (cpu_freq > 0) { 3820 long long mul; 3821 3822 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3823 "cpu-mhz", cpu_freq); 3824 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3825 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3826 "clock-frequency", (int)mul); 3827 } 3828 3829 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) { 3830 return; 3831 } 3832 3833 /* vendor-id */ 3834 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3835 "vendor-id", cpi->cpi_vendorstr); 3836 3837 if (cpi->cpi_maxeax == 0) { 3838 return; 3839 } 3840 3841 /* 3842 * family, model, and step 3843 */ 3844 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3845 "family", CPI_FAMILY(cpi)); 3846 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3847 "cpu-model", CPI_MODEL(cpi)); 3848 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3849 "stepping-id", CPI_STEP(cpi)); 3850 3851 /* type */ 3852 switch (cpi->cpi_vendor) { 3853 case X86_VENDOR_Intel: 3854 create = 1; 3855 break; 3856 default: 3857 create = 0; 3858 break; 3859 } 3860 if (create) 3861 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3862 "type", CPI_TYPE(cpi)); 3863 3864 /* ext-family */ 3865 switch (cpi->cpi_vendor) { 3866 case X86_VENDOR_Intel: 3867 case X86_VENDOR_AMD: 3868 create = cpi->cpi_family >= 0xf; 3869 break; 3870 default: 3871 create = 0; 3872 break; 3873 } 3874 if (create) 3875 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3876 "ext-family", CPI_FAMILY_XTD(cpi)); 3877 3878 /* ext-model */ 3879 switch (cpi->cpi_vendor) { 3880 case X86_VENDOR_Intel: 3881 create = IS_EXTENDED_MODEL_INTEL(cpi); 3882 break; 3883 case X86_VENDOR_AMD: 3884 create = CPI_FAMILY(cpi) == 0xf; 3885 break; 3886 default: 3887 create = 0; 3888 break; 3889 } 3890 if (create) 3891 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3892 "ext-model", CPI_MODEL_XTD(cpi)); 3893 3894 /* generation */ 3895 switch (cpi->cpi_vendor) { 3896 case X86_VENDOR_AMD: 3897 /* 3898 * AMD K5 model 1 was the first part to support this 3899 */ 3900 create = cpi->cpi_xmaxeax >= 0x80000001; 3901 break; 3902 default: 3903 create = 0; 3904 break; 3905 } 3906 if (create) 3907 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3908 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3909 3910 /* brand-id */ 3911 switch (cpi->cpi_vendor) { 3912 case X86_VENDOR_Intel: 3913 /* 3914 * brand id first appeared on Pentium III Xeon model 8, 3915 * and Celeron model 8 processors and Opteron 3916 */ 3917 create = cpi->cpi_family > 6 || 3918 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3919 break; 3920 case X86_VENDOR_AMD: 3921 create = cpi->cpi_family >= 0xf; 3922 break; 3923 default: 3924 create = 0; 3925 break; 3926 } 3927 if (create && cpi->cpi_brandid != 0) { 3928 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3929 "brand-id", cpi->cpi_brandid); 3930 } 3931 3932 /* chunks, and apic-id */ 3933 switch (cpi->cpi_vendor) { 3934 /* 3935 * first available on Pentium IV and Opteron (K8) 3936 */ 3937 case X86_VENDOR_Intel: 3938 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3939 break; 3940 case X86_VENDOR_AMD: 3941 create = cpi->cpi_family >= 0xf; 3942 break; 3943 default: 3944 create = 0; 3945 break; 3946 } 3947 if (create) { 3948 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3949 "chunks", CPI_CHUNKS(cpi)); 3950 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3951 "apic-id", cpi->cpi_apicid); 3952 if (cpi->cpi_chipid >= 0) { 3953 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3954 "chip#", cpi->cpi_chipid); 3955 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3956 "clog#", cpi->cpi_clogid); 3957 } 3958 } 3959 3960 /* cpuid-features */ 3961 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3962 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3963 3964 3965 /* cpuid-features-ecx */ 3966 switch (cpi->cpi_vendor) { 3967 case X86_VENDOR_Intel: 3968 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3969 break; 3970 default: 3971 create = 0; 3972 break; 3973 } 3974 if (create) 3975 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3976 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3977 3978 /* ext-cpuid-features */ 3979 switch (cpi->cpi_vendor) { 3980 case X86_VENDOR_Intel: 3981 case X86_VENDOR_AMD: 3982 case X86_VENDOR_Cyrix: 3983 case X86_VENDOR_TM: 3984 case X86_VENDOR_Centaur: 3985 create = cpi->cpi_xmaxeax >= 0x80000001; 3986 break; 3987 default: 3988 create = 0; 3989 break; 3990 } 3991 if (create) { 3992 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3993 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3994 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3995 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3996 } 3997 3998 /* 3999 * Brand String first appeared in Intel Pentium IV, AMD K5 4000 * model 1, and Cyrix GXm. On earlier models we try and 4001 * simulate something similar .. so this string should always 4002 * same -something- about the processor, however lame. 4003 */ 4004 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 4005 "brand-string", cpi->cpi_brandstr); 4006 4007 /* 4008 * Finally, cache and tlb information 4009 */ 4010 switch (x86_which_cacheinfo(cpi)) { 4011 case X86_VENDOR_Intel: 4012 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 4013 break; 4014 case X86_VENDOR_Cyrix: 4015 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 4016 break; 4017 case X86_VENDOR_AMD: 4018 amd_cache_info(cpi, cpu_devi); 4019 break; 4020 default: 4021 break; 4022 } 4023 } 4024 4025 struct l2info { 4026 int *l2i_csz; 4027 int *l2i_lsz; 4028 int *l2i_assoc; 4029 int l2i_ret; 4030 }; 4031 4032 /* 4033 * A cacheinfo walker that fetches the size, line-size and associativity 4034 * of the L2 cache 4035 */ 4036 static int 4037 intel_l2cinfo(void *arg, const struct cachetab *ct) 4038 { 4039 struct l2info *l2i = arg; 4040 int *ip; 4041 4042 if (ct->ct_label != l2_cache_str && 4043 ct->ct_label != sl2_cache_str) 4044 return (0); /* not an L2 -- keep walking */ 4045 4046 if ((ip = l2i->l2i_csz) != NULL) 4047 *ip = ct->ct_size; 4048 if ((ip = l2i->l2i_lsz) != NULL) 4049 *ip = ct->ct_line_size; 4050 if ((ip = l2i->l2i_assoc) != NULL) 4051 *ip = ct->ct_assoc; 4052 l2i->l2i_ret = ct->ct_size; 4053 return (1); /* was an L2 -- terminate walk */ 4054 } 4055 4056 /* 4057 * AMD L2/L3 Cache and TLB Associativity Field Definition: 4058 * 4059 * Unlike the associativity for the L1 cache and tlb where the 8 bit 4060 * value is the associativity, the associativity for the L2 cache and 4061 * tlb is encoded in the following table. The 4 bit L2 value serves as 4062 * an index into the amd_afd[] array to determine the associativity. 4063 * -1 is undefined. 0 is fully associative. 4064 */ 4065 4066 static int amd_afd[] = 4067 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 4068 4069 static void 4070 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 4071 { 4072 struct cpuid_regs *cp; 4073 uint_t size, assoc; 4074 int i; 4075 int *ip; 4076 4077 if (cpi->cpi_xmaxeax < 0x80000006) 4078 return; 4079 cp = &cpi->cpi_extd[6]; 4080 4081 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 4082 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 4083 uint_t cachesz = size * 1024; 4084 assoc = amd_afd[i]; 4085 4086 ASSERT(assoc != -1); 4087 4088 if ((ip = l2i->l2i_csz) != NULL) 4089 *ip = cachesz; 4090 if ((ip = l2i->l2i_lsz) != NULL) 4091 *ip = BITX(cp->cp_ecx, 7, 0); 4092 if ((ip = l2i->l2i_assoc) != NULL) 4093 *ip = assoc; 4094 l2i->l2i_ret = cachesz; 4095 } 4096 } 4097 4098 int 4099 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 4100 { 4101 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4102 struct l2info __l2info, *l2i = &__l2info; 4103 4104 l2i->l2i_csz = csz; 4105 l2i->l2i_lsz = lsz; 4106 l2i->l2i_assoc = assoc; 4107 l2i->l2i_ret = -1; 4108 4109 switch (x86_which_cacheinfo(cpi)) { 4110 case X86_VENDOR_Intel: 4111 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 4112 break; 4113 case X86_VENDOR_Cyrix: 4114 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 4115 break; 4116 case X86_VENDOR_AMD: 4117 amd_l2cacheinfo(cpi, l2i); 4118 break; 4119 default: 4120 break; 4121 } 4122 return (l2i->l2i_ret); 4123 } 4124 4125 #if !defined(__xpv) 4126 4127 uint32_t * 4128 cpuid_mwait_alloc(cpu_t *cpu) 4129 { 4130 uint32_t *ret; 4131 size_t mwait_size; 4132 4133 ASSERT(cpuid_checkpass(CPU, 2)); 4134 4135 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 4136 if (mwait_size == 0) 4137 return (NULL); 4138 4139 /* 4140 * kmem_alloc() returns cache line size aligned data for mwait_size 4141 * allocations. mwait_size is currently cache line sized. Neither 4142 * of these implementation details are guarantied to be true in the 4143 * future. 4144 * 4145 * First try allocating mwait_size as kmem_alloc() currently returns 4146 * correctly aligned memory. If kmem_alloc() does not return 4147 * mwait_size aligned memory, then use mwait_size ROUNDUP. 4148 * 4149 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 4150 * decide to free this memory. 4151 */ 4152 ret = kmem_zalloc(mwait_size, KM_SLEEP); 4153 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 4154 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4155 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 4156 *ret = MWAIT_RUNNING; 4157 return (ret); 4158 } else { 4159 kmem_free(ret, mwait_size); 4160 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 4161 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 4162 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 4163 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 4164 *ret = MWAIT_RUNNING; 4165 return (ret); 4166 } 4167 } 4168 4169 void 4170 cpuid_mwait_free(cpu_t *cpu) 4171 { 4172 if (cpu->cpu_m.mcpu_cpi == NULL) { 4173 return; 4174 } 4175 4176 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 4177 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 4178 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 4179 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 4180 } 4181 4182 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 4183 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 4184 } 4185 4186 void 4187 patch_tsc_read(int flag) 4188 { 4189 size_t cnt; 4190 4191 switch (flag) { 4192 case X86_NO_TSC: 4193 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 4194 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 4195 break; 4196 case X86_HAVE_TSCP: 4197 cnt = &_tscp_end - &_tscp_start; 4198 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 4199 break; 4200 case X86_TSC_MFENCE: 4201 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 4202 (void) memcpy((void *)tsc_read, 4203 (void *)&_tsc_mfence_start, cnt); 4204 break; 4205 case X86_TSC_LFENCE: 4206 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 4207 (void) memcpy((void *)tsc_read, 4208 (void *)&_tsc_lfence_start, cnt); 4209 break; 4210 default: 4211 break; 4212 } 4213 } 4214 4215 int 4216 cpuid_deep_cstates_supported(void) 4217 { 4218 struct cpuid_info *cpi; 4219 struct cpuid_regs regs; 4220 4221 ASSERT(cpuid_checkpass(CPU, 1)); 4222 4223 cpi = CPU->cpu_m.mcpu_cpi; 4224 4225 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) 4226 return (0); 4227 4228 switch (cpi->cpi_vendor) { 4229 case X86_VENDOR_Intel: 4230 if (cpi->cpi_xmaxeax < 0x80000007) 4231 return (0); 4232 4233 /* 4234 * TSC run at a constant rate in all ACPI C-states? 4235 */ 4236 regs.cp_eax = 0x80000007; 4237 (void) __cpuid_insn(®s); 4238 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 4239 4240 default: 4241 return (0); 4242 } 4243 } 4244 4245 #endif /* !__xpv */ 4246 4247 void 4248 post_startup_cpu_fixups(void) 4249 { 4250 #ifndef __xpv 4251 /* 4252 * Some AMD processors support C1E state. Entering this state will 4253 * cause the local APIC timer to stop, which we can't deal with at 4254 * this time. 4255 */ 4256 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 4257 on_trap_data_t otd; 4258 uint64_t reg; 4259 4260 if (!on_trap(&otd, OT_DATA_ACCESS)) { 4261 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 4262 /* Disable C1E state if it is enabled by BIOS */ 4263 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 4264 AMD_ACTONCMPHALT_MASK) { 4265 reg &= ~(AMD_ACTONCMPHALT_MASK << 4266 AMD_ACTONCMPHALT_SHIFT); 4267 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 4268 } 4269 } 4270 no_trap(); 4271 } 4272 #endif /* !__xpv */ 4273 } 4274 4275 /* 4276 * Starting with the Westmere processor the local 4277 * APIC timer will continue running in all C-states, 4278 * including the deepest C-states. 4279 */ 4280 int 4281 cpuid_arat_supported(void) 4282 { 4283 struct cpuid_info *cpi; 4284 struct cpuid_regs regs; 4285 4286 ASSERT(cpuid_checkpass(CPU, 1)); 4287 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 4288 4289 cpi = CPU->cpu_m.mcpu_cpi; 4290 4291 switch (cpi->cpi_vendor) { 4292 case X86_VENDOR_Intel: 4293 /* 4294 * Always-running Local APIC Timer is 4295 * indicated by CPUID.6.EAX[2]. 4296 */ 4297 if (cpi->cpi_maxeax >= 6) { 4298 regs.cp_eax = 6; 4299 (void) cpuid_insn(NULL, ®s); 4300 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4301 } else { 4302 return (0); 4303 } 4304 default: 4305 return (0); 4306 } 4307 } 4308 4309 /* 4310 * Check support for Intel ENERGY_PERF_BIAS feature 4311 */ 4312 int 4313 cpuid_iepb_supported(struct cpu *cp) 4314 { 4315 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi; 4316 struct cpuid_regs regs; 4317 4318 ASSERT(cpuid_checkpass(cp, 1)); 4319 4320 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) || 4321 !(is_x86_feature(x86_featureset, X86FSET_MSR))) { 4322 return (0); 4323 } 4324 4325 /* 4326 * Intel ENERGY_PERF_BIAS MSR is indicated by 4327 * capability bit CPUID.6.ECX.3 4328 */ 4329 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6)) 4330 return (0); 4331 4332 regs.cp_eax = 0x6; 4333 (void) cpuid_insn(NULL, ®s); 4334 return (regs.cp_ecx & CPUID_EPB_SUPPORT); 4335 } 4336 4337 /* 4338 * Check support for TSC deadline timer 4339 * 4340 * TSC deadline timer provides a superior software programming 4341 * model over local APIC timer that eliminates "time drifts". 4342 * Instead of specifying a relative time, software specifies an 4343 * absolute time as the target at which the processor should 4344 * generate a timer event. 4345 */ 4346 int 4347 cpuid_deadline_tsc_supported(void) 4348 { 4349 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi; 4350 struct cpuid_regs regs; 4351 4352 ASSERT(cpuid_checkpass(CPU, 1)); 4353 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 4354 4355 switch (cpi->cpi_vendor) { 4356 case X86_VENDOR_Intel: 4357 if (cpi->cpi_maxeax >= 1) { 4358 regs.cp_eax = 1; 4359 (void) cpuid_insn(NULL, ®s); 4360 return (regs.cp_ecx & CPUID_DEADLINE_TSC); 4361 } else { 4362 return (0); 4363 } 4364 default: 4365 return (0); 4366 } 4367 } 4368 4369 #if defined(__amd64) && !defined(__xpv) 4370 /* 4371 * Patch in versions of bcopy for high performance Intel Nhm processors 4372 * and later... 4373 */ 4374 void 4375 patch_memops(uint_t vendor) 4376 { 4377 size_t cnt, i; 4378 caddr_t to, from; 4379 4380 if ((vendor == X86_VENDOR_Intel) && 4381 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) { 4382 cnt = &bcopy_patch_end - &bcopy_patch_start; 4383 to = &bcopy_ck_size; 4384 from = &bcopy_patch_start; 4385 for (i = 0; i < cnt; i++) { 4386 *to++ = *from++; 4387 } 4388 } 4389 } 4390 #endif /* __amd64 && !__xpv */ 4391 4392 /* 4393 * This function finds the number of bits to represent the number of cores per 4394 * chip and the number of strands per core for the Intel platforms. 4395 * It re-uses the x2APIC cpuid code of the cpuid_pass2(). 4396 */ 4397 void 4398 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits) 4399 { 4400 struct cpuid_regs regs; 4401 struct cpuid_regs *cp = ®s; 4402 4403 if (vendor != X86_VENDOR_Intel) { 4404 return; 4405 } 4406 4407 /* if the cpuid level is 0xB, extended topo is available. */ 4408 cp->cp_eax = 0; 4409 if (__cpuid_insn(cp) >= 0xB) { 4410 4411 cp->cp_eax = 0xB; 4412 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 4413 (void) __cpuid_insn(cp); 4414 4415 /* 4416 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 4417 * indicates that the extended topology enumeration leaf is 4418 * available. 4419 */ 4420 if (cp->cp_ebx) { 4421 uint_t coreid_shift = 0; 4422 uint_t chipid_shift = 0; 4423 uint_t i; 4424 uint_t level; 4425 4426 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 4427 cp->cp_eax = 0xB; 4428 cp->cp_ecx = i; 4429 4430 (void) __cpuid_insn(cp); 4431 level = CPI_CPU_LEVEL_TYPE(cp); 4432 4433 if (level == 1) { 4434 /* 4435 * Thread level processor topology 4436 * Number of bits shift right APIC ID 4437 * to get the coreid. 4438 */ 4439 coreid_shift = BITX(cp->cp_eax, 4, 0); 4440 } else if (level == 2) { 4441 /* 4442 * Core level processor topology 4443 * Number of bits shift right APIC ID 4444 * to get the chipid. 4445 */ 4446 chipid_shift = BITX(cp->cp_eax, 4, 0); 4447 } 4448 } 4449 4450 if (coreid_shift > 0 && chipid_shift > coreid_shift) { 4451 *strand_nbits = coreid_shift; 4452 *core_nbits = chipid_shift - coreid_shift; 4453 } 4454 } 4455 } 4456 } 4457