1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 /* 31 * Various routines to handle identification 32 * and classification of x86 processors. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/archsystm.h> 37 #include <sys/x86_archext.h> 38 #include <sys/kmem.h> 39 #include <sys/systm.h> 40 #include <sys/cmn_err.h> 41 #include <sys/sunddi.h> 42 #include <sys/sunndi.h> 43 #include <sys/cpuvar.h> 44 #include <sys/processor.h> 45 #include <sys/sysmacros.h> 46 #include <sys/pg.h> 47 #include <sys/fp.h> 48 #include <sys/controlregs.h> 49 #include <sys/auxv_386.h> 50 #include <sys/bitmap.h> 51 #include <sys/memnode.h> 52 53 #ifdef __xpv 54 #include <sys/hypervisor.h> 55 #else 56 #include <sys/ontrap.h> 57 #endif 58 59 /* 60 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 61 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 62 * them accordingly. For most modern processors, feature detection occurs here 63 * in pass 1. 64 * 65 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 66 * for the boot CPU and does the basic analysis that the early kernel needs. 67 * x86_feature is set based on the return value of cpuid_pass1() of the boot 68 * CPU. 69 * 70 * Pass 1 includes: 71 * 72 * o Determining vendor/model/family/stepping and setting x86_type and 73 * x86_vendor accordingly. 74 * o Processing the feature flags returned by the cpuid instruction while 75 * applying any workarounds or tricks for the specific processor. 76 * o Mapping the feature flags into Solaris feature bits (X86_*). 77 * o Processing extended feature flags if supported by the processor, 78 * again while applying specific processor knowledge. 79 * o Determining the CMT characteristics of the system. 80 * 81 * Pass 1 is done on non-boot CPUs during their initialization and the results 82 * are used only as a meager attempt at ensuring that all processors within the 83 * system support the same features. 84 * 85 * Pass 2 of cpuid feature analysis happens just at the beginning 86 * of startup(). It just copies in and corrects the remainder 87 * of the cpuid data we depend on: standard cpuid functions that we didn't 88 * need for pass1 feature analysis, and extended cpuid functions beyond the 89 * simple feature processing done in pass1. 90 * 91 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 92 * particular kernel memory allocation has been made available. It creates a 93 * readable brand string based on the data collected in the first two passes. 94 * 95 * Pass 4 of cpuid analysis is invoked after post_startup() when all 96 * the support infrastructure for various hardware features has been 97 * initialized. It determines which processor features will be reported 98 * to userland via the aux vector. 99 * 100 * All passes are executed on all CPUs, but only the boot CPU determines what 101 * features the kernel will use. 102 * 103 * Much of the worst junk in this file is for the support of processors 104 * that didn't really implement the cpuid instruction properly. 105 * 106 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 107 * the pass numbers. Accordingly, changes to the pass code may require changes 108 * to the accessor code. 109 */ 110 111 uint_t x86_feature = 0; 112 uint_t x86_vendor = X86_VENDOR_IntelClone; 113 uint_t x86_type = X86_TYPE_OTHER; 114 uint_t x86_clflush_size = 0; 115 116 uint_t pentiumpro_bug4046376; 117 uint_t pentiumpro_bug4064495; 118 119 uint_t enable486; 120 /* 121 * This is set to platform type Solaris is running on. 122 */ 123 static int platform_type = -1; 124 125 #if !defined(__xpv) 126 /* 127 * Variable to patch if hypervisor platform detection needs to be 128 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 129 */ 130 int enable_platform_detection = 1; 131 #endif 132 133 /* 134 * monitor/mwait info. 135 * 136 * size_actual and buf_actual are the real address and size allocated to get 137 * proper mwait_buf alignement. buf_actual and size_actual should be passed 138 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 139 * processor cache-line alignment, but this is not guarantied in the furture. 140 */ 141 struct mwait_info { 142 size_t mon_min; /* min size to avoid missed wakeups */ 143 size_t mon_max; /* size to avoid false wakeups */ 144 size_t size_actual; /* size actually allocated */ 145 void *buf_actual; /* memory actually allocated */ 146 uint32_t support; /* processor support of monitor/mwait */ 147 }; 148 149 /* 150 * These constants determine how many of the elements of the 151 * cpuid we cache in the cpuid_info data structure; the 152 * remaining elements are accessible via the cpuid instruction. 153 */ 154 155 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 156 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 157 158 struct cpuid_info { 159 uint_t cpi_pass; /* last pass completed */ 160 /* 161 * standard function information 162 */ 163 uint_t cpi_maxeax; /* fn 0: %eax */ 164 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 165 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 166 167 uint_t cpi_family; /* fn 1: extended family */ 168 uint_t cpi_model; /* fn 1: extended model */ 169 uint_t cpi_step; /* fn 1: stepping */ 170 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 171 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 172 int cpi_clogid; /* fn 1: %ebx: thread # */ 173 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 174 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 175 uint_t cpi_ncache; /* fn 2: number of elements */ 176 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 177 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 178 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 179 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 180 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 181 /* 182 * extended function information 183 */ 184 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 185 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 186 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 187 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 188 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 189 id_t cpi_coreid; /* same coreid => strands share core */ 190 int cpi_pkgcoreid; /* core number within single package */ 191 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 192 /* Intel: fn 4: %eax[31-26] */ 193 /* 194 * supported feature information 195 */ 196 uint32_t cpi_support[5]; 197 #define STD_EDX_FEATURES 0 198 #define AMD_EDX_FEATURES 1 199 #define TM_EDX_FEATURES 2 200 #define STD_ECX_FEATURES 3 201 #define AMD_ECX_FEATURES 4 202 /* 203 * Synthesized information, where known. 204 */ 205 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 206 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 207 uint32_t cpi_socket; /* Chip package/socket type */ 208 209 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 210 uint32_t cpi_apicid; 211 }; 212 213 214 static struct cpuid_info cpuid_info0; 215 216 /* 217 * These bit fields are defined by the Intel Application Note AP-485 218 * "Intel Processor Identification and the CPUID Instruction" 219 */ 220 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 221 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 222 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 223 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 224 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 225 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 226 227 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 228 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 229 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 230 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 231 232 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 233 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 234 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 235 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 236 237 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 238 #define CPI_XMAXEAX_MAX 0x80000100 239 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 240 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 241 242 /* 243 * Function 4 (Deterministic Cache Parameters) macros 244 * Defined by Intel Application Note AP-485 245 */ 246 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 247 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 248 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 249 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 250 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 251 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 252 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 253 254 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 255 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 256 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 257 258 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 259 260 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 261 262 263 /* 264 * A couple of shorthand macros to identify "later" P6-family chips 265 * like the Pentium M and Core. First, the "older" P6-based stuff 266 * (loosely defined as "pre-Pentium-4"): 267 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 268 */ 269 270 #define IS_LEGACY_P6(cpi) ( \ 271 cpi->cpi_family == 6 && \ 272 (cpi->cpi_model == 1 || \ 273 cpi->cpi_model == 3 || \ 274 cpi->cpi_model == 5 || \ 275 cpi->cpi_model == 6 || \ 276 cpi->cpi_model == 7 || \ 277 cpi->cpi_model == 8 || \ 278 cpi->cpi_model == 0xA || \ 279 cpi->cpi_model == 0xB) \ 280 ) 281 282 /* A "new F6" is everything with family 6 that's not the above */ 283 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 284 285 /* Extended family/model support */ 286 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 287 cpi->cpi_family >= 0xf) 288 289 /* 290 * Info for monitor/mwait idle loop. 291 * 292 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 293 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 294 * 2006. 295 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 296 * Documentation Updates" #33633, Rev 2.05, December 2006. 297 */ 298 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 299 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 300 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 301 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 302 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 303 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 304 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 305 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 306 /* 307 * Number of sub-cstates for a given c-state. 308 */ 309 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 310 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 311 312 /* 313 * Functions we consune from cpuid_subr.c; don't publish these in a header 314 * file to try and keep people using the expected cpuid_* interfaces. 315 */ 316 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 317 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 318 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 319 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 320 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 321 322 /* 323 * Apply up various platform-dependent restrictions where the 324 * underlying platform restrictions mean the CPU can be marked 325 * as less capable than its cpuid instruction would imply. 326 */ 327 #if defined(__xpv) 328 static void 329 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 330 { 331 switch (eax) { 332 case 1: { 333 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 334 0 : CPUID_INTC_EDX_MCA; 335 cp->cp_edx &= 336 ~(mcamask | 337 CPUID_INTC_EDX_PSE | 338 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 339 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 340 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 341 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 342 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 343 break; 344 } 345 346 case 0x80000001: 347 cp->cp_edx &= 348 ~(CPUID_AMD_EDX_PSE | 349 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 350 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 351 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 352 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 353 CPUID_AMD_EDX_TSCP); 354 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 355 break; 356 default: 357 break; 358 } 359 360 switch (vendor) { 361 case X86_VENDOR_Intel: 362 switch (eax) { 363 case 4: 364 /* 365 * Zero out the (ncores-per-chip - 1) field 366 */ 367 cp->cp_eax &= 0x03fffffff; 368 break; 369 default: 370 break; 371 } 372 break; 373 case X86_VENDOR_AMD: 374 switch (eax) { 375 376 case 0x80000001: 377 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 378 break; 379 380 case 0x80000008: 381 /* 382 * Zero out the (ncores-per-chip - 1) field 383 */ 384 cp->cp_ecx &= 0xffffff00; 385 break; 386 default: 387 break; 388 } 389 break; 390 default: 391 break; 392 } 393 } 394 #else 395 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 396 #endif 397 398 /* 399 * Some undocumented ways of patching the results of the cpuid 400 * instruction to permit running Solaris 10 on future cpus that 401 * we don't currently support. Could be set to non-zero values 402 * via settings in eeprom. 403 */ 404 405 uint32_t cpuid_feature_ecx_include; 406 uint32_t cpuid_feature_ecx_exclude; 407 uint32_t cpuid_feature_edx_include; 408 uint32_t cpuid_feature_edx_exclude; 409 410 void 411 cpuid_alloc_space(cpu_t *cpu) 412 { 413 /* 414 * By convention, cpu0 is the boot cpu, which is set up 415 * before memory allocation is available. All other cpus get 416 * their cpuid_info struct allocated here. 417 */ 418 ASSERT(cpu->cpu_id != 0); 419 cpu->cpu_m.mcpu_cpi = 420 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 421 } 422 423 void 424 cpuid_free_space(cpu_t *cpu) 425 { 426 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 427 int i; 428 429 ASSERT(cpu->cpu_id != 0); 430 431 /* 432 * Free up any function 4 related dynamic storage 433 */ 434 for (i = 1; i < cpi->cpi_std_4_size; i++) 435 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 436 if (cpi->cpi_std_4_size > 0) 437 kmem_free(cpi->cpi_std_4, 438 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 439 440 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 441 } 442 443 #if !defined(__xpv) 444 445 static void 446 determine_platform() 447 { 448 struct cpuid_regs cp; 449 char *xen_str; 450 uint32_t xen_signature[4]; 451 452 platform_type = HW_NATIVE; 453 454 if (!enable_platform_detection) 455 return; 456 457 /* 458 * In a fully virtualized domain, Xen's pseudo-cpuid function 459 * 0x40000000 returns a string representing the Xen signature in 460 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 461 * function. 462 */ 463 cp.cp_eax = 0x40000000; 464 (void) __cpuid_insn(&cp); 465 xen_signature[0] = cp.cp_ebx; 466 xen_signature[1] = cp.cp_ecx; 467 xen_signature[2] = cp.cp_edx; 468 xen_signature[3] = 0; 469 xen_str = (char *)xen_signature; 470 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) { 471 platform_type = HW_XEN_HVM; 472 } else if (vmware_platform()) { /* running under vmware hypervisor? */ 473 platform_type = HW_VMWARE; 474 } 475 } 476 477 int 478 get_hwenv(void) 479 { 480 if (platform_type == -1) 481 determine_platform(); 482 483 return (platform_type); 484 } 485 486 int 487 is_controldom(void) 488 { 489 return (0); 490 } 491 492 #else 493 494 int 495 get_hwenv(void) 496 { 497 return (HW_XEN_PV); 498 } 499 500 int 501 is_controldom(void) 502 { 503 return (DOMAIN_IS_INITDOMAIN(xen_info)); 504 } 505 506 #endif /* __xpv */ 507 508 uint_t 509 cpuid_pass1(cpu_t *cpu) 510 { 511 uint32_t mask_ecx, mask_edx; 512 uint_t feature = X86_CPUID; 513 struct cpuid_info *cpi; 514 struct cpuid_regs *cp; 515 int xcpuid; 516 #if !defined(__xpv) 517 extern int idle_cpu_prefer_mwait; 518 #endif 519 520 521 #if !defined(__xpv) 522 determine_platform(); 523 #endif 524 /* 525 * Space statically allocated for cpu0, ensure pointer is set 526 */ 527 if (cpu->cpu_id == 0) 528 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 529 cpi = cpu->cpu_m.mcpu_cpi; 530 ASSERT(cpi != NULL); 531 cp = &cpi->cpi_std[0]; 532 cp->cp_eax = 0; 533 cpi->cpi_maxeax = __cpuid_insn(cp); 534 { 535 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 536 *iptr++ = cp->cp_ebx; 537 *iptr++ = cp->cp_edx; 538 *iptr++ = cp->cp_ecx; 539 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 540 } 541 542 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 543 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 544 545 /* 546 * Limit the range in case of weird hardware 547 */ 548 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 549 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 550 if (cpi->cpi_maxeax < 1) 551 goto pass1_done; 552 553 cp = &cpi->cpi_std[1]; 554 cp->cp_eax = 1; 555 (void) __cpuid_insn(cp); 556 557 /* 558 * Extract identifying constants for easy access. 559 */ 560 cpi->cpi_model = CPI_MODEL(cpi); 561 cpi->cpi_family = CPI_FAMILY(cpi); 562 563 if (cpi->cpi_family == 0xf) 564 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 565 566 /* 567 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 568 * Intel, and presumably everyone else, uses model == 0xf, as 569 * one would expect (max value means possible overflow). Sigh. 570 */ 571 572 switch (cpi->cpi_vendor) { 573 case X86_VENDOR_Intel: 574 if (IS_EXTENDED_MODEL_INTEL(cpi)) 575 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 576 break; 577 case X86_VENDOR_AMD: 578 if (CPI_FAMILY(cpi) == 0xf) 579 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 580 break; 581 default: 582 if (cpi->cpi_model == 0xf) 583 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 584 break; 585 } 586 587 cpi->cpi_step = CPI_STEP(cpi); 588 cpi->cpi_brandid = CPI_BRANDID(cpi); 589 590 /* 591 * *default* assumptions: 592 * - believe %edx feature word 593 * - ignore %ecx feature word 594 * - 32-bit virtual and physical addressing 595 */ 596 mask_edx = 0xffffffff; 597 mask_ecx = 0; 598 599 cpi->cpi_pabits = cpi->cpi_vabits = 32; 600 601 switch (cpi->cpi_vendor) { 602 case X86_VENDOR_Intel: 603 if (cpi->cpi_family == 5) 604 x86_type = X86_TYPE_P5; 605 else if (IS_LEGACY_P6(cpi)) { 606 x86_type = X86_TYPE_P6; 607 pentiumpro_bug4046376 = 1; 608 pentiumpro_bug4064495 = 1; 609 /* 610 * Clear the SEP bit when it was set erroneously 611 */ 612 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 613 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 614 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 615 x86_type = X86_TYPE_P4; 616 /* 617 * We don't currently depend on any of the %ecx 618 * features until Prescott, so we'll only check 619 * this from P4 onwards. We might want to revisit 620 * that idea later. 621 */ 622 mask_ecx = 0xffffffff; 623 } else if (cpi->cpi_family > 0xf) 624 mask_ecx = 0xffffffff; 625 /* 626 * We don't support MONITOR/MWAIT if leaf 5 is not available 627 * to obtain the monitor linesize. 628 */ 629 if (cpi->cpi_maxeax < 5) 630 mask_ecx &= ~CPUID_INTC_ECX_MON; 631 break; 632 case X86_VENDOR_IntelClone: 633 default: 634 break; 635 case X86_VENDOR_AMD: 636 #if defined(OPTERON_ERRATUM_108) 637 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 638 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 639 cpi->cpi_model = 0xc; 640 } else 641 #endif 642 if (cpi->cpi_family == 5) { 643 /* 644 * AMD K5 and K6 645 * 646 * These CPUs have an incomplete implementation 647 * of MCA/MCE which we mask away. 648 */ 649 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 650 651 /* 652 * Model 0 uses the wrong (APIC) bit 653 * to indicate PGE. Fix it here. 654 */ 655 if (cpi->cpi_model == 0) { 656 if (cp->cp_edx & 0x200) { 657 cp->cp_edx &= ~0x200; 658 cp->cp_edx |= CPUID_INTC_EDX_PGE; 659 } 660 } 661 662 /* 663 * Early models had problems w/ MMX; disable. 664 */ 665 if (cpi->cpi_model < 6) 666 mask_edx &= ~CPUID_INTC_EDX_MMX; 667 } 668 669 /* 670 * For newer families, SSE3 and CX16, at least, are valid; 671 * enable all 672 */ 673 if (cpi->cpi_family >= 0xf) 674 mask_ecx = 0xffffffff; 675 /* 676 * We don't support MONITOR/MWAIT if leaf 5 is not available 677 * to obtain the monitor linesize. 678 */ 679 if (cpi->cpi_maxeax < 5) 680 mask_ecx &= ~CPUID_INTC_ECX_MON; 681 682 #if !defined(__xpv) 683 /* 684 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 685 * processors. AMD does not intend MWAIT to be used in the cpu 686 * idle loop on current and future processors. 10h and future 687 * AMD processors use more power in MWAIT than HLT. 688 * Pre-family-10h Opterons do not have the MWAIT instruction. 689 */ 690 idle_cpu_prefer_mwait = 0; 691 #endif 692 693 break; 694 case X86_VENDOR_TM: 695 /* 696 * workaround the NT workaround in CMS 4.1 697 */ 698 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 699 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 700 cp->cp_edx |= CPUID_INTC_EDX_CX8; 701 break; 702 case X86_VENDOR_Centaur: 703 /* 704 * workaround the NT workarounds again 705 */ 706 if (cpi->cpi_family == 6) 707 cp->cp_edx |= CPUID_INTC_EDX_CX8; 708 break; 709 case X86_VENDOR_Cyrix: 710 /* 711 * We rely heavily on the probing in locore 712 * to actually figure out what parts, if any, 713 * of the Cyrix cpuid instruction to believe. 714 */ 715 switch (x86_type) { 716 case X86_TYPE_CYRIX_486: 717 mask_edx = 0; 718 break; 719 case X86_TYPE_CYRIX_6x86: 720 mask_edx = 0; 721 break; 722 case X86_TYPE_CYRIX_6x86L: 723 mask_edx = 724 CPUID_INTC_EDX_DE | 725 CPUID_INTC_EDX_CX8; 726 break; 727 case X86_TYPE_CYRIX_6x86MX: 728 mask_edx = 729 CPUID_INTC_EDX_DE | 730 CPUID_INTC_EDX_MSR | 731 CPUID_INTC_EDX_CX8 | 732 CPUID_INTC_EDX_PGE | 733 CPUID_INTC_EDX_CMOV | 734 CPUID_INTC_EDX_MMX; 735 break; 736 case X86_TYPE_CYRIX_GXm: 737 mask_edx = 738 CPUID_INTC_EDX_MSR | 739 CPUID_INTC_EDX_CX8 | 740 CPUID_INTC_EDX_CMOV | 741 CPUID_INTC_EDX_MMX; 742 break; 743 case X86_TYPE_CYRIX_MediaGX: 744 break; 745 case X86_TYPE_CYRIX_MII: 746 case X86_TYPE_VIA_CYRIX_III: 747 mask_edx = 748 CPUID_INTC_EDX_DE | 749 CPUID_INTC_EDX_TSC | 750 CPUID_INTC_EDX_MSR | 751 CPUID_INTC_EDX_CX8 | 752 CPUID_INTC_EDX_PGE | 753 CPUID_INTC_EDX_CMOV | 754 CPUID_INTC_EDX_MMX; 755 break; 756 default: 757 break; 758 } 759 break; 760 } 761 762 #if defined(__xpv) 763 /* 764 * Do not support MONITOR/MWAIT under a hypervisor 765 */ 766 mask_ecx &= ~CPUID_INTC_ECX_MON; 767 #endif /* __xpv */ 768 769 /* 770 * Now we've figured out the masks that determine 771 * which bits we choose to believe, apply the masks 772 * to the feature words, then map the kernel's view 773 * of these feature words into its feature word. 774 */ 775 cp->cp_edx &= mask_edx; 776 cp->cp_ecx &= mask_ecx; 777 778 /* 779 * apply any platform restrictions (we don't call this 780 * immediately after __cpuid_insn here, because we need the 781 * workarounds applied above first) 782 */ 783 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 784 785 /* 786 * fold in overrides from the "eeprom" mechanism 787 */ 788 cp->cp_edx |= cpuid_feature_edx_include; 789 cp->cp_edx &= ~cpuid_feature_edx_exclude; 790 791 cp->cp_ecx |= cpuid_feature_ecx_include; 792 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 793 794 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 795 feature |= X86_LARGEPAGE; 796 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 797 feature |= X86_TSC; 798 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 799 feature |= X86_MSR; 800 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 801 feature |= X86_MTRR; 802 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 803 feature |= X86_PGE; 804 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 805 feature |= X86_CMOV; 806 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 807 feature |= X86_MMX; 808 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 809 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 810 feature |= X86_MCA; 811 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 812 feature |= X86_PAE; 813 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 814 feature |= X86_CX8; 815 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 816 feature |= X86_CX16; 817 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 818 feature |= X86_PAT; 819 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 820 feature |= X86_SEP; 821 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 822 /* 823 * In our implementation, fxsave/fxrstor 824 * are prerequisites before we'll even 825 * try and do SSE things. 826 */ 827 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 828 feature |= X86_SSE; 829 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 830 feature |= X86_SSE2; 831 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 832 feature |= X86_SSE3; 833 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 834 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 835 feature |= X86_SSSE3; 836 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 837 feature |= X86_SSE4_1; 838 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 839 feature |= X86_SSE4_2; 840 if (cp->cp_ecx & CPUID_INTC_ECX_AES) 841 feature |= X86_AES; 842 } 843 } 844 if (cp->cp_edx & CPUID_INTC_EDX_DE) 845 feature |= X86_DE; 846 #if !defined(__xpv) 847 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 848 849 /* 850 * We require the CLFLUSH instruction for erratum workaround 851 * to use MONITOR/MWAIT. 852 */ 853 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 854 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 855 feature |= X86_MWAIT; 856 } else { 857 extern int idle_cpu_assert_cflush_monitor; 858 859 /* 860 * All processors we are aware of which have 861 * MONITOR/MWAIT also have CLFLUSH. 862 */ 863 if (idle_cpu_assert_cflush_monitor) { 864 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 865 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 866 } 867 } 868 } 869 #endif /* __xpv */ 870 871 /* 872 * Only need it first time, rest of the cpus would follow suite. 873 * we only capture this for the bootcpu. 874 */ 875 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 876 feature |= X86_CLFSH; 877 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 878 } 879 880 if (feature & X86_PAE) 881 cpi->cpi_pabits = 36; 882 883 /* 884 * Hyperthreading configuration is slightly tricky on Intel 885 * and pure clones, and even trickier on AMD. 886 * 887 * (AMD chose to set the HTT bit on their CMP processors, 888 * even though they're not actually hyperthreaded. Thus it 889 * takes a bit more work to figure out what's really going 890 * on ... see the handling of the CMP_LGCY bit below) 891 */ 892 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 893 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 894 if (cpi->cpi_ncpu_per_chip > 1) 895 feature |= X86_HTT; 896 } else { 897 cpi->cpi_ncpu_per_chip = 1; 898 } 899 900 /* 901 * Work on the "extended" feature information, doing 902 * some basic initialization for cpuid_pass2() 903 */ 904 xcpuid = 0; 905 switch (cpi->cpi_vendor) { 906 case X86_VENDOR_Intel: 907 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 908 xcpuid++; 909 break; 910 case X86_VENDOR_AMD: 911 if (cpi->cpi_family > 5 || 912 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 913 xcpuid++; 914 break; 915 case X86_VENDOR_Cyrix: 916 /* 917 * Only these Cyrix CPUs are -known- to support 918 * extended cpuid operations. 919 */ 920 if (x86_type == X86_TYPE_VIA_CYRIX_III || 921 x86_type == X86_TYPE_CYRIX_GXm) 922 xcpuid++; 923 break; 924 case X86_VENDOR_Centaur: 925 case X86_VENDOR_TM: 926 default: 927 xcpuid++; 928 break; 929 } 930 931 if (xcpuid) { 932 cp = &cpi->cpi_extd[0]; 933 cp->cp_eax = 0x80000000; 934 cpi->cpi_xmaxeax = __cpuid_insn(cp); 935 } 936 937 if (cpi->cpi_xmaxeax & 0x80000000) { 938 939 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 940 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 941 942 switch (cpi->cpi_vendor) { 943 case X86_VENDOR_Intel: 944 case X86_VENDOR_AMD: 945 if (cpi->cpi_xmaxeax < 0x80000001) 946 break; 947 cp = &cpi->cpi_extd[1]; 948 cp->cp_eax = 0x80000001; 949 (void) __cpuid_insn(cp); 950 951 if (cpi->cpi_vendor == X86_VENDOR_AMD && 952 cpi->cpi_family == 5 && 953 cpi->cpi_model == 6 && 954 cpi->cpi_step == 6) { 955 /* 956 * K6 model 6 uses bit 10 to indicate SYSC 957 * Later models use bit 11. Fix it here. 958 */ 959 if (cp->cp_edx & 0x400) { 960 cp->cp_edx &= ~0x400; 961 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 962 } 963 } 964 965 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 966 967 /* 968 * Compute the additions to the kernel's feature word. 969 */ 970 if (cp->cp_edx & CPUID_AMD_EDX_NX) 971 feature |= X86_NX; 972 973 /* 974 * Regardless whether or not we boot 64-bit, 975 * we should have a way to identify whether 976 * the CPU is capable of running 64-bit. 977 */ 978 if (cp->cp_edx & CPUID_AMD_EDX_LM) 979 feature |= X86_64; 980 981 #if defined(__amd64) 982 /* 1 GB large page - enable only for 64 bit kernel */ 983 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 984 feature |= X86_1GPG; 985 #endif 986 987 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 988 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 989 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 990 feature |= X86_SSE4A; 991 992 /* 993 * If both the HTT and CMP_LGCY bits are set, 994 * then we're not actually HyperThreaded. Read 995 * "AMD CPUID Specification" for more details. 996 */ 997 if (cpi->cpi_vendor == X86_VENDOR_AMD && 998 (feature & X86_HTT) && 999 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1000 feature &= ~X86_HTT; 1001 feature |= X86_CMP; 1002 } 1003 #if defined(__amd64) 1004 /* 1005 * It's really tricky to support syscall/sysret in 1006 * the i386 kernel; we rely on sysenter/sysexit 1007 * instead. In the amd64 kernel, things are -way- 1008 * better. 1009 */ 1010 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 1011 feature |= X86_ASYSC; 1012 1013 /* 1014 * While we're thinking about system calls, note 1015 * that AMD processors don't support sysenter 1016 * in long mode at all, so don't try to program them. 1017 */ 1018 if (x86_vendor == X86_VENDOR_AMD) 1019 feature &= ~X86_SEP; 1020 #endif 1021 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1022 feature |= X86_TSCP; 1023 break; 1024 default: 1025 break; 1026 } 1027 1028 /* 1029 * Get CPUID data about processor cores and hyperthreads. 1030 */ 1031 switch (cpi->cpi_vendor) { 1032 case X86_VENDOR_Intel: 1033 if (cpi->cpi_maxeax >= 4) { 1034 cp = &cpi->cpi_std[4]; 1035 cp->cp_eax = 4; 1036 cp->cp_ecx = 0; 1037 (void) __cpuid_insn(cp); 1038 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1039 } 1040 /*FALLTHROUGH*/ 1041 case X86_VENDOR_AMD: 1042 if (cpi->cpi_xmaxeax < 0x80000008) 1043 break; 1044 cp = &cpi->cpi_extd[8]; 1045 cp->cp_eax = 0x80000008; 1046 (void) __cpuid_insn(cp); 1047 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1048 1049 /* 1050 * Virtual and physical address limits from 1051 * cpuid override previously guessed values. 1052 */ 1053 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1054 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1055 break; 1056 default: 1057 break; 1058 } 1059 1060 /* 1061 * Derive the number of cores per chip 1062 */ 1063 switch (cpi->cpi_vendor) { 1064 case X86_VENDOR_Intel: 1065 if (cpi->cpi_maxeax < 4) { 1066 cpi->cpi_ncore_per_chip = 1; 1067 break; 1068 } else { 1069 cpi->cpi_ncore_per_chip = 1070 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1071 } 1072 break; 1073 case X86_VENDOR_AMD: 1074 if (cpi->cpi_xmaxeax < 0x80000008) { 1075 cpi->cpi_ncore_per_chip = 1; 1076 break; 1077 } else { 1078 /* 1079 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1080 * 1 less than the number of physical cores on 1081 * the chip. In family 0x10 this value can 1082 * be affected by "downcoring" - it reflects 1083 * 1 less than the number of cores actually 1084 * enabled on this node. 1085 */ 1086 cpi->cpi_ncore_per_chip = 1087 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1088 } 1089 break; 1090 default: 1091 cpi->cpi_ncore_per_chip = 1; 1092 break; 1093 } 1094 1095 /* 1096 * Get CPUID data about TSC Invariance in Deep C-State. 1097 */ 1098 switch (cpi->cpi_vendor) { 1099 case X86_VENDOR_Intel: 1100 if (cpi->cpi_maxeax >= 7) { 1101 cp = &cpi->cpi_extd[7]; 1102 cp->cp_eax = 0x80000007; 1103 cp->cp_ecx = 0; 1104 (void) __cpuid_insn(cp); 1105 } 1106 break; 1107 default: 1108 break; 1109 } 1110 } else { 1111 cpi->cpi_ncore_per_chip = 1; 1112 } 1113 1114 /* 1115 * If more than one core, then this processor is CMP. 1116 */ 1117 if (cpi->cpi_ncore_per_chip > 1) 1118 feature |= X86_CMP; 1119 1120 /* 1121 * If the number of cores is the same as the number 1122 * of CPUs, then we cannot have HyperThreading. 1123 */ 1124 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1125 feature &= ~X86_HTT; 1126 1127 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1128 /* 1129 * Single-core single-threaded processors. 1130 */ 1131 cpi->cpi_chipid = -1; 1132 cpi->cpi_clogid = 0; 1133 cpi->cpi_coreid = cpu->cpu_id; 1134 cpi->cpi_pkgcoreid = 0; 1135 } else if (cpi->cpi_ncpu_per_chip > 1) { 1136 uint_t i; 1137 uint_t chipid_shift = 0; 1138 uint_t coreid_shift = 0; 1139 uint_t apic_id = CPI_APIC_ID(cpi); 1140 1141 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1142 chipid_shift++; 1143 cpi->cpi_chipid = apic_id >> chipid_shift; 1144 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1145 1146 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1147 if (feature & X86_CMP) { 1148 /* 1149 * Multi-core (and possibly multi-threaded) 1150 * processors. 1151 */ 1152 uint_t ncpu_per_core; 1153 if (cpi->cpi_ncore_per_chip == 1) 1154 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1155 else if (cpi->cpi_ncore_per_chip > 1) 1156 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1157 cpi->cpi_ncore_per_chip; 1158 /* 1159 * 8bit APIC IDs on dual core Pentiums 1160 * look like this: 1161 * 1162 * +-----------------------+------+------+ 1163 * | Physical Package ID | MC | HT | 1164 * +-----------------------+------+------+ 1165 * <------- chipid --------> 1166 * <------- coreid ---------------> 1167 * <--- clogid --> 1168 * <------> 1169 * pkgcoreid 1170 * 1171 * Where the number of bits necessary to 1172 * represent MC and HT fields together equals 1173 * to the minimum number of bits necessary to 1174 * store the value of cpi->cpi_ncpu_per_chip. 1175 * Of those bits, the MC part uses the number 1176 * of bits necessary to store the value of 1177 * cpi->cpi_ncore_per_chip. 1178 */ 1179 for (i = 1; i < ncpu_per_core; i <<= 1) 1180 coreid_shift++; 1181 cpi->cpi_coreid = apic_id >> coreid_shift; 1182 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1183 coreid_shift; 1184 } else if (feature & X86_HTT) { 1185 /* 1186 * Single-core multi-threaded processors. 1187 */ 1188 cpi->cpi_coreid = cpi->cpi_chipid; 1189 cpi->cpi_pkgcoreid = 0; 1190 } 1191 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1192 /* 1193 * AMD CMP chips currently have a single thread per 1194 * core, with 2 cores on family 0xf and 2, 3 or 4 1195 * cores on family 0x10. 1196 * 1197 * Since no two cpus share a core we must assign a 1198 * distinct coreid per cpu, and we do this by using 1199 * the cpu_id. This scheme does not, however, 1200 * guarantee that sibling cores of a chip will have 1201 * sequential coreids starting at a multiple of the 1202 * number of cores per chip - that is usually the 1203 * case, but if the ACPI MADT table is presented 1204 * in a different order then we need to perform a 1205 * few more gymnastics for the pkgcoreid. 1206 * 1207 * In family 0xf CMPs there are 2 cores on all nodes 1208 * present - no mixing of single and dual core parts. 1209 * 1210 * In family 0x10 CMPs cpuid fn 2 ECX[15:12] 1211 * "ApicIdCoreIdSize[3:0]" tells us how 1212 * many least-significant bits in the ApicId 1213 * are used to represent the core number 1214 * within the node. Cores are always 1215 * numbered sequentially from 0 regardless 1216 * of how many or which are disabled, and 1217 * there seems to be no way to discover the 1218 * real core id when some are disabled. 1219 */ 1220 cpi->cpi_coreid = cpu->cpu_id; 1221 1222 if (cpi->cpi_family == 0x10 && 1223 cpi->cpi_xmaxeax >= 0x80000008) { 1224 int coreidsz = 1225 BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 1226 1227 cpi->cpi_pkgcoreid = 1228 apic_id & ((1 << coreidsz) - 1); 1229 } else { 1230 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 1231 } 1232 } else { 1233 /* 1234 * All other processors are currently 1235 * assumed to have single cores. 1236 */ 1237 cpi->cpi_coreid = cpi->cpi_chipid; 1238 cpi->cpi_pkgcoreid = 0; 1239 } 1240 } 1241 1242 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1243 1244 /* 1245 * Synthesize chip "revision" and socket type 1246 */ 1247 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1248 cpi->cpi_model, cpi->cpi_step); 1249 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1250 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1251 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1252 cpi->cpi_model, cpi->cpi_step); 1253 1254 pass1_done: 1255 cpi->cpi_pass = 1; 1256 return (feature); 1257 } 1258 1259 /* 1260 * Make copies of the cpuid table entries we depend on, in 1261 * part for ease of parsing now, in part so that we have only 1262 * one place to correct any of it, in part for ease of 1263 * later export to userland, and in part so we can look at 1264 * this stuff in a crash dump. 1265 */ 1266 1267 /*ARGSUSED*/ 1268 void 1269 cpuid_pass2(cpu_t *cpu) 1270 { 1271 uint_t n, nmax; 1272 int i; 1273 struct cpuid_regs *cp; 1274 uint8_t *dp; 1275 uint32_t *iptr; 1276 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1277 1278 ASSERT(cpi->cpi_pass == 1); 1279 1280 if (cpi->cpi_maxeax < 1) 1281 goto pass2_done; 1282 1283 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1284 nmax = NMAX_CPI_STD; 1285 /* 1286 * (We already handled n == 0 and n == 1 in pass 1) 1287 */ 1288 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1289 cp->cp_eax = n; 1290 1291 /* 1292 * CPUID function 4 expects %ecx to be initialized 1293 * with an index which indicates which cache to return 1294 * information about. The OS is expected to call function 4 1295 * with %ecx set to 0, 1, 2, ... until it returns with 1296 * EAX[4:0] set to 0, which indicates there are no more 1297 * caches. 1298 * 1299 * Here, populate cpi_std[4] with the information returned by 1300 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1301 * when dynamic memory allocation becomes available. 1302 * 1303 * Note: we need to explicitly initialize %ecx here, since 1304 * function 4 may have been previously invoked. 1305 */ 1306 if (n == 4) 1307 cp->cp_ecx = 0; 1308 1309 (void) __cpuid_insn(cp); 1310 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1311 switch (n) { 1312 case 2: 1313 /* 1314 * "the lower 8 bits of the %eax register 1315 * contain a value that identifies the number 1316 * of times the cpuid [instruction] has to be 1317 * executed to obtain a complete image of the 1318 * processor's caching systems." 1319 * 1320 * How *do* they make this stuff up? 1321 */ 1322 cpi->cpi_ncache = sizeof (*cp) * 1323 BITX(cp->cp_eax, 7, 0); 1324 if (cpi->cpi_ncache == 0) 1325 break; 1326 cpi->cpi_ncache--; /* skip count byte */ 1327 1328 /* 1329 * Well, for now, rather than attempt to implement 1330 * this slightly dubious algorithm, we just look 1331 * at the first 15 .. 1332 */ 1333 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1334 cpi->cpi_ncache = sizeof (*cp) - 1; 1335 1336 dp = cpi->cpi_cacheinfo; 1337 if (BITX(cp->cp_eax, 31, 31) == 0) { 1338 uint8_t *p = (void *)&cp->cp_eax; 1339 for (i = 1; i < 4; i++) 1340 if (p[i] != 0) 1341 *dp++ = p[i]; 1342 } 1343 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1344 uint8_t *p = (void *)&cp->cp_ebx; 1345 for (i = 0; i < 4; i++) 1346 if (p[i] != 0) 1347 *dp++ = p[i]; 1348 } 1349 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1350 uint8_t *p = (void *)&cp->cp_ecx; 1351 for (i = 0; i < 4; i++) 1352 if (p[i] != 0) 1353 *dp++ = p[i]; 1354 } 1355 if (BITX(cp->cp_edx, 31, 31) == 0) { 1356 uint8_t *p = (void *)&cp->cp_edx; 1357 for (i = 0; i < 4; i++) 1358 if (p[i] != 0) 1359 *dp++ = p[i]; 1360 } 1361 break; 1362 1363 case 3: /* Processor serial number, if PSN supported */ 1364 break; 1365 1366 case 4: /* Deterministic cache parameters */ 1367 break; 1368 1369 case 5: /* Monitor/Mwait parameters */ 1370 { 1371 size_t mwait_size; 1372 1373 /* 1374 * check cpi_mwait.support which was set in cpuid_pass1 1375 */ 1376 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1377 break; 1378 1379 /* 1380 * Protect ourself from insane mwait line size. 1381 * Workaround for incomplete hardware emulator(s). 1382 */ 1383 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1384 if (mwait_size < sizeof (uint32_t) || 1385 !ISP2(mwait_size)) { 1386 #if DEBUG 1387 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1388 "size %ld", cpu->cpu_id, (long)mwait_size); 1389 #endif 1390 break; 1391 } 1392 1393 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1394 cpi->cpi_mwait.mon_max = mwait_size; 1395 if (MWAIT_EXTENSION(cpi)) { 1396 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1397 if (MWAIT_INT_ENABLE(cpi)) 1398 cpi->cpi_mwait.support |= 1399 MWAIT_ECX_INT_ENABLE; 1400 } 1401 break; 1402 } 1403 default: 1404 break; 1405 } 1406 } 1407 1408 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1409 struct cpuid_regs regs; 1410 1411 cp = ®s; 1412 cp->cp_eax = 0xB; 1413 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1414 1415 (void) __cpuid_insn(cp); 1416 1417 /* 1418 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1419 * indicates that the extended topology enumeration leaf is 1420 * available. 1421 */ 1422 if (cp->cp_ebx) { 1423 uint32_t x2apic_id; 1424 uint_t coreid_shift = 0; 1425 uint_t ncpu_per_core = 1; 1426 uint_t chipid_shift = 0; 1427 uint_t ncpu_per_chip = 1; 1428 uint_t i; 1429 uint_t level; 1430 1431 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1432 cp->cp_eax = 0xB; 1433 cp->cp_ecx = i; 1434 1435 (void) __cpuid_insn(cp); 1436 level = CPI_CPU_LEVEL_TYPE(cp); 1437 1438 if (level == 1) { 1439 x2apic_id = cp->cp_edx; 1440 coreid_shift = BITX(cp->cp_eax, 4, 0); 1441 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1442 } else if (level == 2) { 1443 x2apic_id = cp->cp_edx; 1444 chipid_shift = BITX(cp->cp_eax, 4, 0); 1445 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1446 } 1447 } 1448 1449 cpi->cpi_apicid = x2apic_id; 1450 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1451 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1452 ncpu_per_core; 1453 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1454 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1455 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1456 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1457 } 1458 1459 /* Make cp NULL so that we don't stumble on others */ 1460 cp = NULL; 1461 } 1462 1463 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1464 goto pass2_done; 1465 1466 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1467 nmax = NMAX_CPI_EXTD; 1468 /* 1469 * Copy the extended properties, fixing them as we go. 1470 * (We already handled n == 0 and n == 1 in pass 1) 1471 */ 1472 iptr = (void *)cpi->cpi_brandstr; 1473 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1474 cp->cp_eax = 0x80000000 + n; 1475 (void) __cpuid_insn(cp); 1476 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1477 switch (n) { 1478 case 2: 1479 case 3: 1480 case 4: 1481 /* 1482 * Extract the brand string 1483 */ 1484 *iptr++ = cp->cp_eax; 1485 *iptr++ = cp->cp_ebx; 1486 *iptr++ = cp->cp_ecx; 1487 *iptr++ = cp->cp_edx; 1488 break; 1489 case 5: 1490 switch (cpi->cpi_vendor) { 1491 case X86_VENDOR_AMD: 1492 /* 1493 * The Athlon and Duron were the first 1494 * parts to report the sizes of the 1495 * TLB for large pages. Before then, 1496 * we don't trust the data. 1497 */ 1498 if (cpi->cpi_family < 6 || 1499 (cpi->cpi_family == 6 && 1500 cpi->cpi_model < 1)) 1501 cp->cp_eax = 0; 1502 break; 1503 default: 1504 break; 1505 } 1506 break; 1507 case 6: 1508 switch (cpi->cpi_vendor) { 1509 case X86_VENDOR_AMD: 1510 /* 1511 * The Athlon and Duron were the first 1512 * AMD parts with L2 TLB's. 1513 * Before then, don't trust the data. 1514 */ 1515 if (cpi->cpi_family < 6 || 1516 cpi->cpi_family == 6 && 1517 cpi->cpi_model < 1) 1518 cp->cp_eax = cp->cp_ebx = 0; 1519 /* 1520 * AMD Duron rev A0 reports L2 1521 * cache size incorrectly as 1K 1522 * when it is really 64K 1523 */ 1524 if (cpi->cpi_family == 6 && 1525 cpi->cpi_model == 3 && 1526 cpi->cpi_step == 0) { 1527 cp->cp_ecx &= 0xffff; 1528 cp->cp_ecx |= 0x400000; 1529 } 1530 break; 1531 case X86_VENDOR_Cyrix: /* VIA C3 */ 1532 /* 1533 * VIA C3 processors are a bit messed 1534 * up w.r.t. encoding cache sizes in %ecx 1535 */ 1536 if (cpi->cpi_family != 6) 1537 break; 1538 /* 1539 * model 7 and 8 were incorrectly encoded 1540 * 1541 * xxx is model 8 really broken? 1542 */ 1543 if (cpi->cpi_model == 7 || 1544 cpi->cpi_model == 8) 1545 cp->cp_ecx = 1546 BITX(cp->cp_ecx, 31, 24) << 16 | 1547 BITX(cp->cp_ecx, 23, 16) << 12 | 1548 BITX(cp->cp_ecx, 15, 8) << 8 | 1549 BITX(cp->cp_ecx, 7, 0); 1550 /* 1551 * model 9 stepping 1 has wrong associativity 1552 */ 1553 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1554 cp->cp_ecx |= 8 << 12; 1555 break; 1556 case X86_VENDOR_Intel: 1557 /* 1558 * Extended L2 Cache features function. 1559 * First appeared on Prescott. 1560 */ 1561 default: 1562 break; 1563 } 1564 break; 1565 default: 1566 break; 1567 } 1568 } 1569 1570 pass2_done: 1571 cpi->cpi_pass = 2; 1572 } 1573 1574 static const char * 1575 intel_cpubrand(const struct cpuid_info *cpi) 1576 { 1577 int i; 1578 1579 if ((x86_feature & X86_CPUID) == 0 || 1580 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1581 return ("i486"); 1582 1583 switch (cpi->cpi_family) { 1584 case 5: 1585 return ("Intel Pentium(r)"); 1586 case 6: 1587 switch (cpi->cpi_model) { 1588 uint_t celeron, xeon; 1589 const struct cpuid_regs *cp; 1590 case 0: 1591 case 1: 1592 case 2: 1593 return ("Intel Pentium(r) Pro"); 1594 case 3: 1595 case 4: 1596 return ("Intel Pentium(r) II"); 1597 case 6: 1598 return ("Intel Celeron(r)"); 1599 case 5: 1600 case 7: 1601 celeron = xeon = 0; 1602 cp = &cpi->cpi_std[2]; /* cache info */ 1603 1604 for (i = 1; i < 4; i++) { 1605 uint_t tmp; 1606 1607 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1608 if (tmp == 0x40) 1609 celeron++; 1610 if (tmp >= 0x44 && tmp <= 0x45) 1611 xeon++; 1612 } 1613 1614 for (i = 0; i < 2; i++) { 1615 uint_t tmp; 1616 1617 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1618 if (tmp == 0x40) 1619 celeron++; 1620 else if (tmp >= 0x44 && tmp <= 0x45) 1621 xeon++; 1622 } 1623 1624 for (i = 0; i < 4; i++) { 1625 uint_t tmp; 1626 1627 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1628 if (tmp == 0x40) 1629 celeron++; 1630 else if (tmp >= 0x44 && tmp <= 0x45) 1631 xeon++; 1632 } 1633 1634 for (i = 0; i < 4; i++) { 1635 uint_t tmp; 1636 1637 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1638 if (tmp == 0x40) 1639 celeron++; 1640 else if (tmp >= 0x44 && tmp <= 0x45) 1641 xeon++; 1642 } 1643 1644 if (celeron) 1645 return ("Intel Celeron(r)"); 1646 if (xeon) 1647 return (cpi->cpi_model == 5 ? 1648 "Intel Pentium(r) II Xeon(tm)" : 1649 "Intel Pentium(r) III Xeon(tm)"); 1650 return (cpi->cpi_model == 5 ? 1651 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1652 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1653 default: 1654 break; 1655 } 1656 default: 1657 break; 1658 } 1659 1660 /* BrandID is present if the field is nonzero */ 1661 if (cpi->cpi_brandid != 0) { 1662 static const struct { 1663 uint_t bt_bid; 1664 const char *bt_str; 1665 } brand_tbl[] = { 1666 { 0x1, "Intel(r) Celeron(r)" }, 1667 { 0x2, "Intel(r) Pentium(r) III" }, 1668 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1669 { 0x4, "Intel(r) Pentium(r) III" }, 1670 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1671 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1672 { 0x8, "Intel(r) Pentium(r) 4" }, 1673 { 0x9, "Intel(r) Pentium(r) 4" }, 1674 { 0xa, "Intel(r) Celeron(r)" }, 1675 { 0xb, "Intel(r) Xeon(tm)" }, 1676 { 0xc, "Intel(r) Xeon(tm) MP" }, 1677 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1678 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1679 { 0x11, "Mobile Genuine Intel(r)" }, 1680 { 0x12, "Intel(r) Celeron(r) M" }, 1681 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1682 { 0x14, "Intel(r) Celeron(r)" }, 1683 { 0x15, "Mobile Genuine Intel(r)" }, 1684 { 0x16, "Intel(r) Pentium(r) M" }, 1685 { 0x17, "Mobile Intel(r) Celeron(r)" } 1686 }; 1687 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1688 uint_t sgn; 1689 1690 sgn = (cpi->cpi_family << 8) | 1691 (cpi->cpi_model << 4) | cpi->cpi_step; 1692 1693 for (i = 0; i < btblmax; i++) 1694 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1695 break; 1696 if (i < btblmax) { 1697 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1698 return ("Intel(r) Celeron(r)"); 1699 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1700 return ("Intel(r) Xeon(tm) MP"); 1701 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1702 return ("Intel(r) Xeon(tm)"); 1703 return (brand_tbl[i].bt_str); 1704 } 1705 } 1706 1707 return (NULL); 1708 } 1709 1710 static const char * 1711 amd_cpubrand(const struct cpuid_info *cpi) 1712 { 1713 if ((x86_feature & X86_CPUID) == 0 || 1714 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1715 return ("i486 compatible"); 1716 1717 switch (cpi->cpi_family) { 1718 case 5: 1719 switch (cpi->cpi_model) { 1720 case 0: 1721 case 1: 1722 case 2: 1723 case 3: 1724 case 4: 1725 case 5: 1726 return ("AMD-K5(r)"); 1727 case 6: 1728 case 7: 1729 return ("AMD-K6(r)"); 1730 case 8: 1731 return ("AMD-K6(r)-2"); 1732 case 9: 1733 return ("AMD-K6(r)-III"); 1734 default: 1735 return ("AMD (family 5)"); 1736 } 1737 case 6: 1738 switch (cpi->cpi_model) { 1739 case 1: 1740 return ("AMD-K7(tm)"); 1741 case 0: 1742 case 2: 1743 case 4: 1744 return ("AMD Athlon(tm)"); 1745 case 3: 1746 case 7: 1747 return ("AMD Duron(tm)"); 1748 case 6: 1749 case 8: 1750 case 10: 1751 /* 1752 * Use the L2 cache size to distinguish 1753 */ 1754 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1755 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1756 default: 1757 return ("AMD (family 6)"); 1758 } 1759 default: 1760 break; 1761 } 1762 1763 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1764 cpi->cpi_brandid != 0) { 1765 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1766 case 3: 1767 return ("AMD Opteron(tm) UP 1xx"); 1768 case 4: 1769 return ("AMD Opteron(tm) DP 2xx"); 1770 case 5: 1771 return ("AMD Opteron(tm) MP 8xx"); 1772 default: 1773 return ("AMD Opteron(tm)"); 1774 } 1775 } 1776 1777 return (NULL); 1778 } 1779 1780 static const char * 1781 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1782 { 1783 if ((x86_feature & X86_CPUID) == 0 || 1784 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1785 type == X86_TYPE_CYRIX_486) 1786 return ("i486 compatible"); 1787 1788 switch (type) { 1789 case X86_TYPE_CYRIX_6x86: 1790 return ("Cyrix 6x86"); 1791 case X86_TYPE_CYRIX_6x86L: 1792 return ("Cyrix 6x86L"); 1793 case X86_TYPE_CYRIX_6x86MX: 1794 return ("Cyrix 6x86MX"); 1795 case X86_TYPE_CYRIX_GXm: 1796 return ("Cyrix GXm"); 1797 case X86_TYPE_CYRIX_MediaGX: 1798 return ("Cyrix MediaGX"); 1799 case X86_TYPE_CYRIX_MII: 1800 return ("Cyrix M2"); 1801 case X86_TYPE_VIA_CYRIX_III: 1802 return ("VIA Cyrix M3"); 1803 default: 1804 /* 1805 * Have another wild guess .. 1806 */ 1807 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1808 return ("Cyrix 5x86"); 1809 else if (cpi->cpi_family == 5) { 1810 switch (cpi->cpi_model) { 1811 case 2: 1812 return ("Cyrix 6x86"); /* Cyrix M1 */ 1813 case 4: 1814 return ("Cyrix MediaGX"); 1815 default: 1816 break; 1817 } 1818 } else if (cpi->cpi_family == 6) { 1819 switch (cpi->cpi_model) { 1820 case 0: 1821 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1822 case 5: 1823 case 6: 1824 case 7: 1825 case 8: 1826 case 9: 1827 return ("VIA C3"); 1828 default: 1829 break; 1830 } 1831 } 1832 break; 1833 } 1834 return (NULL); 1835 } 1836 1837 /* 1838 * This only gets called in the case that the CPU extended 1839 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1840 * aren't available, or contain null bytes for some reason. 1841 */ 1842 static void 1843 fabricate_brandstr(struct cpuid_info *cpi) 1844 { 1845 const char *brand = NULL; 1846 1847 switch (cpi->cpi_vendor) { 1848 case X86_VENDOR_Intel: 1849 brand = intel_cpubrand(cpi); 1850 break; 1851 case X86_VENDOR_AMD: 1852 brand = amd_cpubrand(cpi); 1853 break; 1854 case X86_VENDOR_Cyrix: 1855 brand = cyrix_cpubrand(cpi, x86_type); 1856 break; 1857 case X86_VENDOR_NexGen: 1858 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1859 brand = "NexGen Nx586"; 1860 break; 1861 case X86_VENDOR_Centaur: 1862 if (cpi->cpi_family == 5) 1863 switch (cpi->cpi_model) { 1864 case 4: 1865 brand = "Centaur C6"; 1866 break; 1867 case 8: 1868 brand = "Centaur C2"; 1869 break; 1870 case 9: 1871 brand = "Centaur C3"; 1872 break; 1873 default: 1874 break; 1875 } 1876 break; 1877 case X86_VENDOR_Rise: 1878 if (cpi->cpi_family == 5 && 1879 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1880 brand = "Rise mP6"; 1881 break; 1882 case X86_VENDOR_SiS: 1883 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1884 brand = "SiS 55x"; 1885 break; 1886 case X86_VENDOR_TM: 1887 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1888 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1889 break; 1890 case X86_VENDOR_NSC: 1891 case X86_VENDOR_UMC: 1892 default: 1893 break; 1894 } 1895 if (brand) { 1896 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1897 return; 1898 } 1899 1900 /* 1901 * If all else fails ... 1902 */ 1903 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1904 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1905 cpi->cpi_model, cpi->cpi_step); 1906 } 1907 1908 /* 1909 * This routine is called just after kernel memory allocation 1910 * becomes available on cpu0, and as part of mp_startup() on 1911 * the other cpus. 1912 * 1913 * Fixup the brand string, and collect any information from cpuid 1914 * that requires dynamicically allocated storage to represent. 1915 */ 1916 /*ARGSUSED*/ 1917 void 1918 cpuid_pass3(cpu_t *cpu) 1919 { 1920 int i, max, shft, level, size; 1921 struct cpuid_regs regs; 1922 struct cpuid_regs *cp; 1923 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1924 1925 ASSERT(cpi->cpi_pass == 2); 1926 1927 /* 1928 * Function 4: Deterministic cache parameters 1929 * 1930 * Take this opportunity to detect the number of threads 1931 * sharing the last level cache, and construct a corresponding 1932 * cache id. The respective cpuid_info members are initialized 1933 * to the default case of "no last level cache sharing". 1934 */ 1935 cpi->cpi_ncpu_shr_last_cache = 1; 1936 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1937 1938 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1939 1940 /* 1941 * Find the # of elements (size) returned by fn 4, and along 1942 * the way detect last level cache sharing details. 1943 */ 1944 bzero(®s, sizeof (regs)); 1945 cp = ®s; 1946 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1947 cp->cp_eax = 4; 1948 cp->cp_ecx = i; 1949 1950 (void) __cpuid_insn(cp); 1951 1952 if (CPI_CACHE_TYPE(cp) == 0) 1953 break; 1954 level = CPI_CACHE_LVL(cp); 1955 if (level > max) { 1956 max = level; 1957 cpi->cpi_ncpu_shr_last_cache = 1958 CPI_NTHR_SHR_CACHE(cp) + 1; 1959 } 1960 } 1961 cpi->cpi_std_4_size = size = i; 1962 1963 /* 1964 * Allocate the cpi_std_4 array. The first element 1965 * references the regs for fn 4, %ecx == 0, which 1966 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1967 */ 1968 if (size > 0) { 1969 cpi->cpi_std_4 = 1970 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1971 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1972 1973 /* 1974 * Allocate storage to hold the additional regs 1975 * for function 4, %ecx == 1 .. cpi_std_4_size. 1976 * 1977 * The regs for fn 4, %ecx == 0 has already 1978 * been allocated as indicated above. 1979 */ 1980 for (i = 1; i < size; i++) { 1981 cp = cpi->cpi_std_4[i] = 1982 kmem_zalloc(sizeof (regs), KM_SLEEP); 1983 cp->cp_eax = 4; 1984 cp->cp_ecx = i; 1985 1986 (void) __cpuid_insn(cp); 1987 } 1988 } 1989 /* 1990 * Determine the number of bits needed to represent 1991 * the number of CPUs sharing the last level cache. 1992 * 1993 * Shift off that number of bits from the APIC id to 1994 * derive the cache id. 1995 */ 1996 shft = 0; 1997 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1998 shft++; 1999 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 2000 } 2001 2002 /* 2003 * Now fixup the brand string 2004 */ 2005 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2006 fabricate_brandstr(cpi); 2007 } else { 2008 2009 /* 2010 * If we successfully extracted a brand string from the cpuid 2011 * instruction, clean it up by removing leading spaces and 2012 * similar junk. 2013 */ 2014 if (cpi->cpi_brandstr[0]) { 2015 size_t maxlen = sizeof (cpi->cpi_brandstr); 2016 char *src, *dst; 2017 2018 dst = src = (char *)cpi->cpi_brandstr; 2019 src[maxlen - 1] = '\0'; 2020 /* 2021 * strip leading spaces 2022 */ 2023 while (*src == ' ') 2024 src++; 2025 /* 2026 * Remove any 'Genuine' or "Authentic" prefixes 2027 */ 2028 if (strncmp(src, "Genuine ", 8) == 0) 2029 src += 8; 2030 if (strncmp(src, "Authentic ", 10) == 0) 2031 src += 10; 2032 2033 /* 2034 * Now do an in-place copy. 2035 * Map (R) to (r) and (TM) to (tm). 2036 * The era of teletypes is long gone, and there's 2037 * -really- no need to shout. 2038 */ 2039 while (*src != '\0') { 2040 if (src[0] == '(') { 2041 if (strncmp(src + 1, "R)", 2) == 0) { 2042 (void) strncpy(dst, "(r)", 3); 2043 src += 3; 2044 dst += 3; 2045 continue; 2046 } 2047 if (strncmp(src + 1, "TM)", 3) == 0) { 2048 (void) strncpy(dst, "(tm)", 4); 2049 src += 4; 2050 dst += 4; 2051 continue; 2052 } 2053 } 2054 *dst++ = *src++; 2055 } 2056 *dst = '\0'; 2057 2058 /* 2059 * Finally, remove any trailing spaces 2060 */ 2061 while (--dst > cpi->cpi_brandstr) 2062 if (*dst == ' ') 2063 *dst = '\0'; 2064 else 2065 break; 2066 } else 2067 fabricate_brandstr(cpi); 2068 } 2069 cpi->cpi_pass = 3; 2070 } 2071 2072 /* 2073 * This routine is called out of bind_hwcap() much later in the life 2074 * of the kernel (post_startup()). The job of this routine is to resolve 2075 * the hardware feature support and kernel support for those features into 2076 * what we're actually going to tell applications via the aux vector. 2077 */ 2078 uint_t 2079 cpuid_pass4(cpu_t *cpu) 2080 { 2081 struct cpuid_info *cpi; 2082 uint_t hwcap_flags = 0; 2083 2084 if (cpu == NULL) 2085 cpu = CPU; 2086 cpi = cpu->cpu_m.mcpu_cpi; 2087 2088 ASSERT(cpi->cpi_pass == 3); 2089 2090 if (cpi->cpi_maxeax >= 1) { 2091 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2092 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2093 2094 *edx = CPI_FEATURES_EDX(cpi); 2095 *ecx = CPI_FEATURES_ECX(cpi); 2096 2097 /* 2098 * [these require explicit kernel support] 2099 */ 2100 if ((x86_feature & X86_SEP) == 0) 2101 *edx &= ~CPUID_INTC_EDX_SEP; 2102 2103 if ((x86_feature & X86_SSE) == 0) 2104 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2105 if ((x86_feature & X86_SSE2) == 0) 2106 *edx &= ~CPUID_INTC_EDX_SSE2; 2107 2108 if ((x86_feature & X86_HTT) == 0) 2109 *edx &= ~CPUID_INTC_EDX_HTT; 2110 2111 if ((x86_feature & X86_SSE3) == 0) 2112 *ecx &= ~CPUID_INTC_ECX_SSE3; 2113 2114 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2115 if ((x86_feature & X86_SSSE3) == 0) 2116 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2117 if ((x86_feature & X86_SSE4_1) == 0) 2118 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2119 if ((x86_feature & X86_SSE4_2) == 0) 2120 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2121 if ((x86_feature & X86_AES) == 0) 2122 *ecx &= ~CPUID_INTC_ECX_AES; 2123 } 2124 2125 /* 2126 * [no explicit support required beyond x87 fp context] 2127 */ 2128 if (!fpu_exists) 2129 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2130 2131 /* 2132 * Now map the supported feature vector to things that we 2133 * think userland will care about. 2134 */ 2135 if (*edx & CPUID_INTC_EDX_SEP) 2136 hwcap_flags |= AV_386_SEP; 2137 if (*edx & CPUID_INTC_EDX_SSE) 2138 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2139 if (*edx & CPUID_INTC_EDX_SSE2) 2140 hwcap_flags |= AV_386_SSE2; 2141 if (*ecx & CPUID_INTC_ECX_SSE3) 2142 hwcap_flags |= AV_386_SSE3; 2143 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2144 if (*ecx & CPUID_INTC_ECX_SSSE3) 2145 hwcap_flags |= AV_386_SSSE3; 2146 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2147 hwcap_flags |= AV_386_SSE4_1; 2148 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2149 hwcap_flags |= AV_386_SSE4_2; 2150 if (*ecx & CPUID_INTC_ECX_MOVBE) 2151 hwcap_flags |= AV_386_MOVBE; 2152 if (*ecx & CPUID_INTC_ECX_AES) 2153 hwcap_flags |= AV_386_AES; 2154 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2155 hwcap_flags |= AV_386_PCLMULQDQ; 2156 } 2157 if (*ecx & CPUID_INTC_ECX_POPCNT) 2158 hwcap_flags |= AV_386_POPCNT; 2159 if (*edx & CPUID_INTC_EDX_FPU) 2160 hwcap_flags |= AV_386_FPU; 2161 if (*edx & CPUID_INTC_EDX_MMX) 2162 hwcap_flags |= AV_386_MMX; 2163 2164 if (*edx & CPUID_INTC_EDX_TSC) 2165 hwcap_flags |= AV_386_TSC; 2166 if (*edx & CPUID_INTC_EDX_CX8) 2167 hwcap_flags |= AV_386_CX8; 2168 if (*edx & CPUID_INTC_EDX_CMOV) 2169 hwcap_flags |= AV_386_CMOV; 2170 if (*ecx & CPUID_INTC_ECX_MON) 2171 hwcap_flags |= AV_386_MON; 2172 if (*ecx & CPUID_INTC_ECX_CX16) 2173 hwcap_flags |= AV_386_CX16; 2174 } 2175 2176 if (x86_feature & X86_HTT) 2177 hwcap_flags |= AV_386_PAUSE; 2178 2179 if (cpi->cpi_xmaxeax < 0x80000001) 2180 goto pass4_done; 2181 2182 switch (cpi->cpi_vendor) { 2183 struct cpuid_regs cp; 2184 uint32_t *edx, *ecx; 2185 2186 case X86_VENDOR_Intel: 2187 /* 2188 * Seems like Intel duplicated what we necessary 2189 * here to make the initial crop of 64-bit OS's work. 2190 * Hopefully, those are the only "extended" bits 2191 * they'll add. 2192 */ 2193 /*FALLTHROUGH*/ 2194 2195 case X86_VENDOR_AMD: 2196 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2197 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2198 2199 *edx = CPI_FEATURES_XTD_EDX(cpi); 2200 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2201 2202 /* 2203 * [these features require explicit kernel support] 2204 */ 2205 switch (cpi->cpi_vendor) { 2206 case X86_VENDOR_Intel: 2207 if ((x86_feature & X86_TSCP) == 0) 2208 *edx &= ~CPUID_AMD_EDX_TSCP; 2209 break; 2210 2211 case X86_VENDOR_AMD: 2212 if ((x86_feature & X86_TSCP) == 0) 2213 *edx &= ~CPUID_AMD_EDX_TSCP; 2214 if ((x86_feature & X86_SSE4A) == 0) 2215 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2216 break; 2217 2218 default: 2219 break; 2220 } 2221 2222 /* 2223 * [no explicit support required beyond 2224 * x87 fp context and exception handlers] 2225 */ 2226 if (!fpu_exists) 2227 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2228 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2229 2230 if ((x86_feature & X86_NX) == 0) 2231 *edx &= ~CPUID_AMD_EDX_NX; 2232 #if !defined(__amd64) 2233 *edx &= ~CPUID_AMD_EDX_LM; 2234 #endif 2235 /* 2236 * Now map the supported feature vector to 2237 * things that we think userland will care about. 2238 */ 2239 #if defined(__amd64) 2240 if (*edx & CPUID_AMD_EDX_SYSC) 2241 hwcap_flags |= AV_386_AMD_SYSC; 2242 #endif 2243 if (*edx & CPUID_AMD_EDX_MMXamd) 2244 hwcap_flags |= AV_386_AMD_MMX; 2245 if (*edx & CPUID_AMD_EDX_3DNow) 2246 hwcap_flags |= AV_386_AMD_3DNow; 2247 if (*edx & CPUID_AMD_EDX_3DNowx) 2248 hwcap_flags |= AV_386_AMD_3DNowx; 2249 2250 switch (cpi->cpi_vendor) { 2251 case X86_VENDOR_AMD: 2252 if (*edx & CPUID_AMD_EDX_TSCP) 2253 hwcap_flags |= AV_386_TSCP; 2254 if (*ecx & CPUID_AMD_ECX_AHF64) 2255 hwcap_flags |= AV_386_AHF; 2256 if (*ecx & CPUID_AMD_ECX_SSE4A) 2257 hwcap_flags |= AV_386_AMD_SSE4A; 2258 if (*ecx & CPUID_AMD_ECX_LZCNT) 2259 hwcap_flags |= AV_386_AMD_LZCNT; 2260 break; 2261 2262 case X86_VENDOR_Intel: 2263 if (*edx & CPUID_AMD_EDX_TSCP) 2264 hwcap_flags |= AV_386_TSCP; 2265 /* 2266 * Aarrgh. 2267 * Intel uses a different bit in the same word. 2268 */ 2269 if (*ecx & CPUID_INTC_ECX_AHF64) 2270 hwcap_flags |= AV_386_AHF; 2271 break; 2272 2273 default: 2274 break; 2275 } 2276 break; 2277 2278 case X86_VENDOR_TM: 2279 cp.cp_eax = 0x80860001; 2280 (void) __cpuid_insn(&cp); 2281 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2282 break; 2283 2284 default: 2285 break; 2286 } 2287 2288 pass4_done: 2289 cpi->cpi_pass = 4; 2290 return (hwcap_flags); 2291 } 2292 2293 2294 /* 2295 * Simulate the cpuid instruction using the data we previously 2296 * captured about this CPU. We try our best to return the truth 2297 * about the hardware, independently of kernel support. 2298 */ 2299 uint32_t 2300 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2301 { 2302 struct cpuid_info *cpi; 2303 struct cpuid_regs *xcp; 2304 2305 if (cpu == NULL) 2306 cpu = CPU; 2307 cpi = cpu->cpu_m.mcpu_cpi; 2308 2309 ASSERT(cpuid_checkpass(cpu, 3)); 2310 2311 /* 2312 * CPUID data is cached in two separate places: cpi_std for standard 2313 * CPUID functions, and cpi_extd for extended CPUID functions. 2314 */ 2315 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2316 xcp = &cpi->cpi_std[cp->cp_eax]; 2317 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2318 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2319 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2320 else 2321 /* 2322 * The caller is asking for data from an input parameter which 2323 * the kernel has not cached. In this case we go fetch from 2324 * the hardware and return the data directly to the user. 2325 */ 2326 return (__cpuid_insn(cp)); 2327 2328 cp->cp_eax = xcp->cp_eax; 2329 cp->cp_ebx = xcp->cp_ebx; 2330 cp->cp_ecx = xcp->cp_ecx; 2331 cp->cp_edx = xcp->cp_edx; 2332 return (cp->cp_eax); 2333 } 2334 2335 int 2336 cpuid_checkpass(cpu_t *cpu, int pass) 2337 { 2338 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2339 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2340 } 2341 2342 int 2343 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2344 { 2345 ASSERT(cpuid_checkpass(cpu, 3)); 2346 2347 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2348 } 2349 2350 int 2351 cpuid_is_cmt(cpu_t *cpu) 2352 { 2353 if (cpu == NULL) 2354 cpu = CPU; 2355 2356 ASSERT(cpuid_checkpass(cpu, 1)); 2357 2358 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2359 } 2360 2361 /* 2362 * AMD and Intel both implement the 64-bit variant of the syscall 2363 * instruction (syscallq), so if there's -any- support for syscall, 2364 * cpuid currently says "yes, we support this". 2365 * 2366 * However, Intel decided to -not- implement the 32-bit variant of the 2367 * syscall instruction, so we provide a predicate to allow our caller 2368 * to test that subtlety here. 2369 * 2370 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2371 * even in the case where the hardware would in fact support it. 2372 */ 2373 /*ARGSUSED*/ 2374 int 2375 cpuid_syscall32_insn(cpu_t *cpu) 2376 { 2377 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2378 2379 #if !defined(__xpv) 2380 if (cpu == NULL) 2381 cpu = CPU; 2382 2383 /*CSTYLED*/ 2384 { 2385 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2386 2387 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2388 cpi->cpi_xmaxeax >= 0x80000001 && 2389 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2390 return (1); 2391 } 2392 #endif 2393 return (0); 2394 } 2395 2396 int 2397 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2398 { 2399 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2400 2401 static const char fmt[] = 2402 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2403 static const char fmt_ht[] = 2404 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2405 2406 ASSERT(cpuid_checkpass(cpu, 1)); 2407 2408 if (cpuid_is_cmt(cpu)) 2409 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2410 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2411 cpi->cpi_family, cpi->cpi_model, 2412 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2413 return (snprintf(s, n, fmt, 2414 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2415 cpi->cpi_family, cpi->cpi_model, 2416 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2417 } 2418 2419 const char * 2420 cpuid_getvendorstr(cpu_t *cpu) 2421 { 2422 ASSERT(cpuid_checkpass(cpu, 1)); 2423 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2424 } 2425 2426 uint_t 2427 cpuid_getvendor(cpu_t *cpu) 2428 { 2429 ASSERT(cpuid_checkpass(cpu, 1)); 2430 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2431 } 2432 2433 uint_t 2434 cpuid_getfamily(cpu_t *cpu) 2435 { 2436 ASSERT(cpuid_checkpass(cpu, 1)); 2437 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2438 } 2439 2440 uint_t 2441 cpuid_getmodel(cpu_t *cpu) 2442 { 2443 ASSERT(cpuid_checkpass(cpu, 1)); 2444 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2445 } 2446 2447 uint_t 2448 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2449 { 2450 ASSERT(cpuid_checkpass(cpu, 1)); 2451 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2452 } 2453 2454 uint_t 2455 cpuid_get_ncore_per_chip(cpu_t *cpu) 2456 { 2457 ASSERT(cpuid_checkpass(cpu, 1)); 2458 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2459 } 2460 2461 uint_t 2462 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2463 { 2464 ASSERT(cpuid_checkpass(cpu, 2)); 2465 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2466 } 2467 2468 id_t 2469 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2470 { 2471 ASSERT(cpuid_checkpass(cpu, 2)); 2472 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2473 } 2474 2475 uint_t 2476 cpuid_getstep(cpu_t *cpu) 2477 { 2478 ASSERT(cpuid_checkpass(cpu, 1)); 2479 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2480 } 2481 2482 uint_t 2483 cpuid_getsig(struct cpu *cpu) 2484 { 2485 ASSERT(cpuid_checkpass(cpu, 1)); 2486 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2487 } 2488 2489 uint32_t 2490 cpuid_getchiprev(struct cpu *cpu) 2491 { 2492 ASSERT(cpuid_checkpass(cpu, 1)); 2493 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2494 } 2495 2496 const char * 2497 cpuid_getchiprevstr(struct cpu *cpu) 2498 { 2499 ASSERT(cpuid_checkpass(cpu, 1)); 2500 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2501 } 2502 2503 uint32_t 2504 cpuid_getsockettype(struct cpu *cpu) 2505 { 2506 ASSERT(cpuid_checkpass(cpu, 1)); 2507 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2508 } 2509 2510 const char * 2511 cpuid_getsocketstr(cpu_t *cpu) 2512 { 2513 static const char *socketstr = NULL; 2514 struct cpuid_info *cpi; 2515 2516 ASSERT(cpuid_checkpass(cpu, 1)); 2517 cpi = cpu->cpu_m.mcpu_cpi; 2518 2519 /* Assume that socket types are the same across the system */ 2520 if (socketstr == NULL) 2521 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 2522 cpi->cpi_model, cpi->cpi_step); 2523 2524 2525 return (socketstr); 2526 } 2527 2528 int 2529 cpuid_get_chipid(cpu_t *cpu) 2530 { 2531 ASSERT(cpuid_checkpass(cpu, 1)); 2532 2533 if (cpuid_is_cmt(cpu)) 2534 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2535 return (cpu->cpu_id); 2536 } 2537 2538 id_t 2539 cpuid_get_coreid(cpu_t *cpu) 2540 { 2541 ASSERT(cpuid_checkpass(cpu, 1)); 2542 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2543 } 2544 2545 int 2546 cpuid_get_pkgcoreid(cpu_t *cpu) 2547 { 2548 ASSERT(cpuid_checkpass(cpu, 1)); 2549 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2550 } 2551 2552 int 2553 cpuid_get_clogid(cpu_t *cpu) 2554 { 2555 ASSERT(cpuid_checkpass(cpu, 1)); 2556 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2557 } 2558 2559 /*ARGSUSED*/ 2560 int 2561 cpuid_have_cr8access(cpu_t *cpu) 2562 { 2563 #if defined(__amd64) 2564 return (1); 2565 #else 2566 struct cpuid_info *cpi; 2567 2568 ASSERT(cpu != NULL); 2569 cpi = cpu->cpu_m.mcpu_cpi; 2570 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 2571 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 2572 return (1); 2573 return (0); 2574 #endif 2575 } 2576 2577 uint32_t 2578 cpuid_get_apicid(cpu_t *cpu) 2579 { 2580 ASSERT(cpuid_checkpass(cpu, 1)); 2581 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 2582 return (UINT32_MAX); 2583 } else { 2584 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 2585 } 2586 } 2587 2588 void 2589 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2590 { 2591 struct cpuid_info *cpi; 2592 2593 if (cpu == NULL) 2594 cpu = CPU; 2595 cpi = cpu->cpu_m.mcpu_cpi; 2596 2597 ASSERT(cpuid_checkpass(cpu, 1)); 2598 2599 if (pabits) 2600 *pabits = cpi->cpi_pabits; 2601 if (vabits) 2602 *vabits = cpi->cpi_vabits; 2603 } 2604 2605 /* 2606 * Returns the number of data TLB entries for a corresponding 2607 * pagesize. If it can't be computed, or isn't known, the 2608 * routine returns zero. If you ask about an architecturally 2609 * impossible pagesize, the routine will panic (so that the 2610 * hat implementor knows that things are inconsistent.) 2611 */ 2612 uint_t 2613 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2614 { 2615 struct cpuid_info *cpi; 2616 uint_t dtlb_nent = 0; 2617 2618 if (cpu == NULL) 2619 cpu = CPU; 2620 cpi = cpu->cpu_m.mcpu_cpi; 2621 2622 ASSERT(cpuid_checkpass(cpu, 1)); 2623 2624 /* 2625 * Check the L2 TLB info 2626 */ 2627 if (cpi->cpi_xmaxeax >= 0x80000006) { 2628 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2629 2630 switch (pagesize) { 2631 2632 case 4 * 1024: 2633 /* 2634 * All zero in the top 16 bits of the register 2635 * indicates a unified TLB. Size is in low 16 bits. 2636 */ 2637 if ((cp->cp_ebx & 0xffff0000) == 0) 2638 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2639 else 2640 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2641 break; 2642 2643 case 2 * 1024 * 1024: 2644 if ((cp->cp_eax & 0xffff0000) == 0) 2645 dtlb_nent = cp->cp_eax & 0x0000ffff; 2646 else 2647 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2648 break; 2649 2650 default: 2651 panic("unknown L2 pagesize"); 2652 /*NOTREACHED*/ 2653 } 2654 } 2655 2656 if (dtlb_nent != 0) 2657 return (dtlb_nent); 2658 2659 /* 2660 * No L2 TLB support for this size, try L1. 2661 */ 2662 if (cpi->cpi_xmaxeax >= 0x80000005) { 2663 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2664 2665 switch (pagesize) { 2666 case 4 * 1024: 2667 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2668 break; 2669 case 2 * 1024 * 1024: 2670 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2671 break; 2672 default: 2673 panic("unknown L1 d-TLB pagesize"); 2674 /*NOTREACHED*/ 2675 } 2676 } 2677 2678 return (dtlb_nent); 2679 } 2680 2681 /* 2682 * Return 0 if the erratum is not present or not applicable, positive 2683 * if it is, and negative if the status of the erratum is unknown. 2684 * 2685 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2686 * Processors" #25759, Rev 3.57, August 2005 2687 */ 2688 int 2689 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2690 { 2691 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2692 uint_t eax; 2693 2694 /* 2695 * Bail out if this CPU isn't an AMD CPU, or if it's 2696 * a legacy (32-bit) AMD CPU. 2697 */ 2698 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2699 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2700 cpi->cpi_family == 6) 2701 2702 return (0); 2703 2704 eax = cpi->cpi_std[1].cp_eax; 2705 2706 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2707 #define SH_B3(eax) (eax == 0xf51) 2708 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2709 2710 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2711 2712 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2713 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2714 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2715 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2716 2717 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2718 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2719 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2720 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2721 2722 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2723 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2724 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2725 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2726 #define BH_E4(eax) (eax == 0x20fb1) 2727 #define SH_E5(eax) (eax == 0x20f42) 2728 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2729 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2730 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2731 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2732 DH_E6(eax) || JH_E6(eax)) 2733 2734 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2735 #define DR_B0(eax) (eax == 0x100f20) 2736 #define DR_B1(eax) (eax == 0x100f21) 2737 #define DR_BA(eax) (eax == 0x100f2a) 2738 #define DR_B2(eax) (eax == 0x100f22) 2739 #define DR_B3(eax) (eax == 0x100f23) 2740 #define RB_C0(eax) (eax == 0x100f40) 2741 2742 switch (erratum) { 2743 case 1: 2744 return (cpi->cpi_family < 0x10); 2745 case 51: /* what does the asterisk mean? */ 2746 return (B(eax) || SH_C0(eax) || CG(eax)); 2747 case 52: 2748 return (B(eax)); 2749 case 57: 2750 return (cpi->cpi_family <= 0x11); 2751 case 58: 2752 return (B(eax)); 2753 case 60: 2754 return (cpi->cpi_family <= 0x11); 2755 case 61: 2756 case 62: 2757 case 63: 2758 case 64: 2759 case 65: 2760 case 66: 2761 case 68: 2762 case 69: 2763 case 70: 2764 case 71: 2765 return (B(eax)); 2766 case 72: 2767 return (SH_B0(eax)); 2768 case 74: 2769 return (B(eax)); 2770 case 75: 2771 return (cpi->cpi_family < 0x10); 2772 case 76: 2773 return (B(eax)); 2774 case 77: 2775 return (cpi->cpi_family <= 0x11); 2776 case 78: 2777 return (B(eax) || SH_C0(eax)); 2778 case 79: 2779 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2780 case 80: 2781 case 81: 2782 case 82: 2783 return (B(eax)); 2784 case 83: 2785 return (B(eax) || SH_C0(eax) || CG(eax)); 2786 case 85: 2787 return (cpi->cpi_family < 0x10); 2788 case 86: 2789 return (SH_C0(eax) || CG(eax)); 2790 case 88: 2791 #if !defined(__amd64) 2792 return (0); 2793 #else 2794 return (B(eax) || SH_C0(eax)); 2795 #endif 2796 case 89: 2797 return (cpi->cpi_family < 0x10); 2798 case 90: 2799 return (B(eax) || SH_C0(eax) || CG(eax)); 2800 case 91: 2801 case 92: 2802 return (B(eax) || SH_C0(eax)); 2803 case 93: 2804 return (SH_C0(eax)); 2805 case 94: 2806 return (B(eax) || SH_C0(eax) || CG(eax)); 2807 case 95: 2808 #if !defined(__amd64) 2809 return (0); 2810 #else 2811 return (B(eax) || SH_C0(eax)); 2812 #endif 2813 case 96: 2814 return (B(eax) || SH_C0(eax) || CG(eax)); 2815 case 97: 2816 case 98: 2817 return (SH_C0(eax) || CG(eax)); 2818 case 99: 2819 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2820 case 100: 2821 return (B(eax) || SH_C0(eax)); 2822 case 101: 2823 case 103: 2824 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2825 case 104: 2826 return (SH_C0(eax) || CG(eax) || D0(eax)); 2827 case 105: 2828 case 106: 2829 case 107: 2830 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2831 case 108: 2832 return (DH_CG(eax)); 2833 case 109: 2834 return (SH_C0(eax) || CG(eax) || D0(eax)); 2835 case 110: 2836 return (D0(eax) || EX(eax)); 2837 case 111: 2838 return (CG(eax)); 2839 case 112: 2840 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2841 case 113: 2842 return (eax == 0x20fc0); 2843 case 114: 2844 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2845 case 115: 2846 return (SH_E0(eax) || JH_E1(eax)); 2847 case 116: 2848 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2849 case 117: 2850 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2851 case 118: 2852 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2853 JH_E6(eax)); 2854 case 121: 2855 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2856 case 122: 2857 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2858 case 123: 2859 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2860 case 131: 2861 return (cpi->cpi_family < 0x10); 2862 case 6336786: 2863 /* 2864 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2865 * if this is a K8 family or newer processor 2866 */ 2867 if (CPI_FAMILY(cpi) == 0xf) { 2868 struct cpuid_regs regs; 2869 regs.cp_eax = 0x80000007; 2870 (void) __cpuid_insn(®s); 2871 return (!(regs.cp_edx & 0x100)); 2872 } 2873 return (0); 2874 case 6323525: 2875 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2876 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2877 2878 case 6671130: 2879 /* 2880 * check for processors (pre-Shanghai) that do not provide 2881 * optimal management of 1gb ptes in its tlb. 2882 */ 2883 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 2884 2885 case 298: 2886 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 2887 DR_B2(eax) || RB_C0(eax)); 2888 2889 default: 2890 return (-1); 2891 2892 } 2893 } 2894 2895 /* 2896 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 2897 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 2898 */ 2899 int 2900 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 2901 { 2902 struct cpuid_info *cpi; 2903 uint_t osvwid; 2904 static int osvwfeature = -1; 2905 uint64_t osvwlength; 2906 2907 2908 cpi = cpu->cpu_m.mcpu_cpi; 2909 2910 /* confirm OSVW supported */ 2911 if (osvwfeature == -1) { 2912 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 2913 } else { 2914 /* assert that osvw feature setting is consistent on all cpus */ 2915 ASSERT(osvwfeature == 2916 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 2917 } 2918 if (!osvwfeature) 2919 return (-1); 2920 2921 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 2922 2923 switch (erratum) { 2924 case 298: /* osvwid is 0 */ 2925 osvwid = 0; 2926 if (osvwlength <= (uint64_t)osvwid) { 2927 /* osvwid 0 is unknown */ 2928 return (-1); 2929 } 2930 2931 /* 2932 * Check the OSVW STATUS MSR to determine the state 2933 * of the erratum where: 2934 * 0 - fixed by HW 2935 * 1 - BIOS has applied the workaround when BIOS 2936 * workaround is available. (Or for other errata, 2937 * OS workaround is required.) 2938 * For a value of 1, caller will confirm that the 2939 * erratum 298 workaround has indeed been applied by BIOS. 2940 * 2941 * A 1 may be set in cpus that have a HW fix 2942 * in a mixed cpu system. Regarding erratum 298: 2943 * In a multiprocessor platform, the workaround above 2944 * should be applied to all processors regardless of 2945 * silicon revision when an affected processor is 2946 * present. 2947 */ 2948 2949 return (rdmsr(MSR_AMD_OSVW_STATUS + 2950 (osvwid / OSVW_ID_CNT_PER_MSR)) & 2951 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 2952 2953 default: 2954 return (-1); 2955 } 2956 } 2957 2958 static const char assoc_str[] = "associativity"; 2959 static const char line_str[] = "line-size"; 2960 static const char size_str[] = "size"; 2961 2962 static void 2963 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2964 uint32_t val) 2965 { 2966 char buf[128]; 2967 2968 /* 2969 * ndi_prop_update_int() is used because it is desirable for 2970 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2971 */ 2972 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2973 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2974 } 2975 2976 /* 2977 * Intel-style cache/tlb description 2978 * 2979 * Standard cpuid level 2 gives a randomly ordered 2980 * selection of tags that index into a table that describes 2981 * cache and tlb properties. 2982 */ 2983 2984 static const char l1_icache_str[] = "l1-icache"; 2985 static const char l1_dcache_str[] = "l1-dcache"; 2986 static const char l2_cache_str[] = "l2-cache"; 2987 static const char l3_cache_str[] = "l3-cache"; 2988 static const char itlb4k_str[] = "itlb-4K"; 2989 static const char dtlb4k_str[] = "dtlb-4K"; 2990 static const char itlb2M_str[] = "itlb-2M"; 2991 static const char itlb4M_str[] = "itlb-4M"; 2992 static const char dtlb4M_str[] = "dtlb-4M"; 2993 static const char dtlb24_str[] = "dtlb0-2M-4M"; 2994 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2995 static const char itlb24_str[] = "itlb-2M-4M"; 2996 static const char dtlb44_str[] = "dtlb-4K-4M"; 2997 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2998 static const char sl2_cache_str[] = "sectored-l2-cache"; 2999 static const char itrace_str[] = "itrace-cache"; 3000 static const char sl3_cache_str[] = "sectored-l3-cache"; 3001 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 3002 3003 static const struct cachetab { 3004 uint8_t ct_code; 3005 uint8_t ct_assoc; 3006 uint16_t ct_line_size; 3007 size_t ct_size; 3008 const char *ct_label; 3009 } intel_ctab[] = { 3010 /* 3011 * maintain descending order! 3012 * 3013 * Codes ignored - Reason 3014 * ---------------------- 3015 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3016 * f0H/f1H - Currently we do not interpret prefetch size by design 3017 */ 3018 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3019 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3020 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3021 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3022 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3023 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3024 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3025 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3026 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3027 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3028 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3029 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3030 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3031 { 0xc0, 4, 0, 8, dtlb44_str }, 3032 { 0xba, 4, 0, 64, dtlb4k_str }, 3033 { 0xb4, 4, 0, 256, dtlb4k_str }, 3034 { 0xb3, 4, 0, 128, dtlb4k_str }, 3035 { 0xb2, 4, 0, 64, itlb4k_str }, 3036 { 0xb0, 4, 0, 128, itlb4k_str }, 3037 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3038 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3039 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3040 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3041 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3042 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3043 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3044 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3045 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3046 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3047 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3048 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3049 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3050 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3051 { 0x73, 8, 0, 64*1024, itrace_str}, 3052 { 0x72, 8, 0, 32*1024, itrace_str}, 3053 { 0x71, 8, 0, 16*1024, itrace_str}, 3054 { 0x70, 8, 0, 12*1024, itrace_str}, 3055 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3056 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3057 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3058 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3059 { 0x5d, 0, 0, 256, dtlb44_str}, 3060 { 0x5c, 0, 0, 128, dtlb44_str}, 3061 { 0x5b, 0, 0, 64, dtlb44_str}, 3062 { 0x5a, 4, 0, 32, dtlb24_str}, 3063 { 0x59, 0, 0, 16, dtlb4k_str}, 3064 { 0x57, 4, 0, 16, dtlb4k_str}, 3065 { 0x56, 4, 0, 16, dtlb4M_str}, 3066 { 0x55, 0, 0, 7, itlb24_str}, 3067 { 0x52, 0, 0, 256, itlb424_str}, 3068 { 0x51, 0, 0, 128, itlb424_str}, 3069 { 0x50, 0, 0, 64, itlb424_str}, 3070 { 0x4f, 0, 0, 32, itlb4k_str}, 3071 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3072 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3073 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3074 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3075 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3076 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3077 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3078 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3079 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3080 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3081 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3082 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3083 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3084 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3085 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3086 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3087 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3088 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3089 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3090 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3091 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3092 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3093 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3094 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3095 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3096 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3097 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3098 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3099 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3100 { 0x0b, 4, 0, 4, itlb4M_str}, 3101 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3102 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3103 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3104 { 0x05, 4, 0, 32, dtlb4M_str}, 3105 { 0x04, 4, 0, 8, dtlb4M_str}, 3106 { 0x03, 4, 0, 64, dtlb4k_str}, 3107 { 0x02, 4, 0, 2, itlb4M_str}, 3108 { 0x01, 4, 0, 32, itlb4k_str}, 3109 { 0 } 3110 }; 3111 3112 static const struct cachetab cyrix_ctab[] = { 3113 { 0x70, 4, 0, 32, "tlb-4K" }, 3114 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3115 { 0 } 3116 }; 3117 3118 /* 3119 * Search a cache table for a matching entry 3120 */ 3121 static const struct cachetab * 3122 find_cacheent(const struct cachetab *ct, uint_t code) 3123 { 3124 if (code != 0) { 3125 for (; ct->ct_code != 0; ct++) 3126 if (ct->ct_code <= code) 3127 break; 3128 if (ct->ct_code == code) 3129 return (ct); 3130 } 3131 return (NULL); 3132 } 3133 3134 /* 3135 * Populate cachetab entry with L2 or L3 cache-information using 3136 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3137 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3138 * information is found. 3139 */ 3140 static int 3141 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3142 { 3143 uint32_t level, i; 3144 int ret = 0; 3145 3146 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3147 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3148 3149 if (level == 2 || level == 3) { 3150 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3151 ct->ct_line_size = 3152 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3153 ct->ct_size = ct->ct_assoc * 3154 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3155 ct->ct_line_size * 3156 (cpi->cpi_std_4[i]->cp_ecx + 1); 3157 3158 if (level == 2) { 3159 ct->ct_label = l2_cache_str; 3160 } else if (level == 3) { 3161 ct->ct_label = l3_cache_str; 3162 } 3163 ret = 1; 3164 } 3165 } 3166 3167 return (ret); 3168 } 3169 3170 /* 3171 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3172 * The walk is terminated if the walker returns non-zero. 3173 */ 3174 static void 3175 intel_walk_cacheinfo(struct cpuid_info *cpi, 3176 void *arg, int (*func)(void *, const struct cachetab *)) 3177 { 3178 const struct cachetab *ct; 3179 struct cachetab des_49_ct, des_b1_ct; 3180 uint8_t *dp; 3181 int i; 3182 3183 if ((dp = cpi->cpi_cacheinfo) == NULL) 3184 return; 3185 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3186 /* 3187 * For overloaded descriptor 0x49 we use cpuid function 4 3188 * if supported by the current processor, to create 3189 * cache information. 3190 * For overloaded descriptor 0xb1 we use X86_PAE flag 3191 * to disambiguate the cache information. 3192 */ 3193 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3194 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3195 ct = &des_49_ct; 3196 } else if (*dp == 0xb1) { 3197 des_b1_ct.ct_code = 0xb1; 3198 des_b1_ct.ct_assoc = 4; 3199 des_b1_ct.ct_line_size = 0; 3200 if (x86_feature & X86_PAE) { 3201 des_b1_ct.ct_size = 8; 3202 des_b1_ct.ct_label = itlb2M_str; 3203 } else { 3204 des_b1_ct.ct_size = 4; 3205 des_b1_ct.ct_label = itlb4M_str; 3206 } 3207 ct = &des_b1_ct; 3208 } else { 3209 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3210 continue; 3211 } 3212 } 3213 3214 if (func(arg, ct) != 0) { 3215 break; 3216 } 3217 } 3218 } 3219 3220 /* 3221 * (Like the Intel one, except for Cyrix CPUs) 3222 */ 3223 static void 3224 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3225 void *arg, int (*func)(void *, const struct cachetab *)) 3226 { 3227 const struct cachetab *ct; 3228 uint8_t *dp; 3229 int i; 3230 3231 if ((dp = cpi->cpi_cacheinfo) == NULL) 3232 return; 3233 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3234 /* 3235 * Search Cyrix-specific descriptor table first .. 3236 */ 3237 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3238 if (func(arg, ct) != 0) 3239 break; 3240 continue; 3241 } 3242 /* 3243 * .. else fall back to the Intel one 3244 */ 3245 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3246 if (func(arg, ct) != 0) 3247 break; 3248 continue; 3249 } 3250 } 3251 } 3252 3253 /* 3254 * A cacheinfo walker that adds associativity, line-size, and size properties 3255 * to the devinfo node it is passed as an argument. 3256 */ 3257 static int 3258 add_cacheent_props(void *arg, const struct cachetab *ct) 3259 { 3260 dev_info_t *devi = arg; 3261 3262 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3263 if (ct->ct_line_size != 0) 3264 add_cache_prop(devi, ct->ct_label, line_str, 3265 ct->ct_line_size); 3266 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3267 return (0); 3268 } 3269 3270 3271 static const char fully_assoc[] = "fully-associative?"; 3272 3273 /* 3274 * AMD style cache/tlb description 3275 * 3276 * Extended functions 5 and 6 directly describe properties of 3277 * tlbs and various cache levels. 3278 */ 3279 static void 3280 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3281 { 3282 switch (assoc) { 3283 case 0: /* reserved; ignore */ 3284 break; 3285 default: 3286 add_cache_prop(devi, label, assoc_str, assoc); 3287 break; 3288 case 0xff: 3289 add_cache_prop(devi, label, fully_assoc, 1); 3290 break; 3291 } 3292 } 3293 3294 static void 3295 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3296 { 3297 if (size == 0) 3298 return; 3299 add_cache_prop(devi, label, size_str, size); 3300 add_amd_assoc(devi, label, assoc); 3301 } 3302 3303 static void 3304 add_amd_cache(dev_info_t *devi, const char *label, 3305 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3306 { 3307 if (size == 0 || line_size == 0) 3308 return; 3309 add_amd_assoc(devi, label, assoc); 3310 /* 3311 * Most AMD parts have a sectored cache. Multiple cache lines are 3312 * associated with each tag. A sector consists of all cache lines 3313 * associated with a tag. For example, the AMD K6-III has a sector 3314 * size of 2 cache lines per tag. 3315 */ 3316 if (lines_per_tag != 0) 3317 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3318 add_cache_prop(devi, label, line_str, line_size); 3319 add_cache_prop(devi, label, size_str, size * 1024); 3320 } 3321 3322 static void 3323 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3324 { 3325 switch (assoc) { 3326 case 0: /* off */ 3327 break; 3328 case 1: 3329 case 2: 3330 case 4: 3331 add_cache_prop(devi, label, assoc_str, assoc); 3332 break; 3333 case 6: 3334 add_cache_prop(devi, label, assoc_str, 8); 3335 break; 3336 case 8: 3337 add_cache_prop(devi, label, assoc_str, 16); 3338 break; 3339 case 0xf: 3340 add_cache_prop(devi, label, fully_assoc, 1); 3341 break; 3342 default: /* reserved; ignore */ 3343 break; 3344 } 3345 } 3346 3347 static void 3348 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3349 { 3350 if (size == 0 || assoc == 0) 3351 return; 3352 add_amd_l2_assoc(devi, label, assoc); 3353 add_cache_prop(devi, label, size_str, size); 3354 } 3355 3356 static void 3357 add_amd_l2_cache(dev_info_t *devi, const char *label, 3358 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3359 { 3360 if (size == 0 || assoc == 0 || line_size == 0) 3361 return; 3362 add_amd_l2_assoc(devi, label, assoc); 3363 if (lines_per_tag != 0) 3364 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3365 add_cache_prop(devi, label, line_str, line_size); 3366 add_cache_prop(devi, label, size_str, size * 1024); 3367 } 3368 3369 static void 3370 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3371 { 3372 struct cpuid_regs *cp; 3373 3374 if (cpi->cpi_xmaxeax < 0x80000005) 3375 return; 3376 cp = &cpi->cpi_extd[5]; 3377 3378 /* 3379 * 4M/2M L1 TLB configuration 3380 * 3381 * We report the size for 2M pages because AMD uses two 3382 * TLB entries for one 4M page. 3383 */ 3384 add_amd_tlb(devi, "dtlb-2M", 3385 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3386 add_amd_tlb(devi, "itlb-2M", 3387 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3388 3389 /* 3390 * 4K L1 TLB configuration 3391 */ 3392 3393 switch (cpi->cpi_vendor) { 3394 uint_t nentries; 3395 case X86_VENDOR_TM: 3396 if (cpi->cpi_family >= 5) { 3397 /* 3398 * Crusoe processors have 256 TLB entries, but 3399 * cpuid data format constrains them to only 3400 * reporting 255 of them. 3401 */ 3402 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3403 nentries = 256; 3404 /* 3405 * Crusoe processors also have a unified TLB 3406 */ 3407 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3408 nentries); 3409 break; 3410 } 3411 /*FALLTHROUGH*/ 3412 default: 3413 add_amd_tlb(devi, itlb4k_str, 3414 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3415 add_amd_tlb(devi, dtlb4k_str, 3416 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3417 break; 3418 } 3419 3420 /* 3421 * data L1 cache configuration 3422 */ 3423 3424 add_amd_cache(devi, l1_dcache_str, 3425 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3426 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3427 3428 /* 3429 * code L1 cache configuration 3430 */ 3431 3432 add_amd_cache(devi, l1_icache_str, 3433 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3434 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3435 3436 if (cpi->cpi_xmaxeax < 0x80000006) 3437 return; 3438 cp = &cpi->cpi_extd[6]; 3439 3440 /* Check for a unified L2 TLB for large pages */ 3441 3442 if (BITX(cp->cp_eax, 31, 16) == 0) 3443 add_amd_l2_tlb(devi, "l2-tlb-2M", 3444 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3445 else { 3446 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3447 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3448 add_amd_l2_tlb(devi, "l2-itlb-2M", 3449 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3450 } 3451 3452 /* Check for a unified L2 TLB for 4K pages */ 3453 3454 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3455 add_amd_l2_tlb(devi, "l2-tlb-4K", 3456 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3457 } else { 3458 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3459 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3460 add_amd_l2_tlb(devi, "l2-itlb-4K", 3461 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3462 } 3463 3464 add_amd_l2_cache(devi, l2_cache_str, 3465 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3466 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3467 } 3468 3469 /* 3470 * There are two basic ways that the x86 world describes it cache 3471 * and tlb architecture - Intel's way and AMD's way. 3472 * 3473 * Return which flavor of cache architecture we should use 3474 */ 3475 static int 3476 x86_which_cacheinfo(struct cpuid_info *cpi) 3477 { 3478 switch (cpi->cpi_vendor) { 3479 case X86_VENDOR_Intel: 3480 if (cpi->cpi_maxeax >= 2) 3481 return (X86_VENDOR_Intel); 3482 break; 3483 case X86_VENDOR_AMD: 3484 /* 3485 * The K5 model 1 was the first part from AMD that reported 3486 * cache sizes via extended cpuid functions. 3487 */ 3488 if (cpi->cpi_family > 5 || 3489 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3490 return (X86_VENDOR_AMD); 3491 break; 3492 case X86_VENDOR_TM: 3493 if (cpi->cpi_family >= 5) 3494 return (X86_VENDOR_AMD); 3495 /*FALLTHROUGH*/ 3496 default: 3497 /* 3498 * If they have extended CPU data for 0x80000005 3499 * then we assume they have AMD-format cache 3500 * information. 3501 * 3502 * If not, and the vendor happens to be Cyrix, 3503 * then try our-Cyrix specific handler. 3504 * 3505 * If we're not Cyrix, then assume we're using Intel's 3506 * table-driven format instead. 3507 */ 3508 if (cpi->cpi_xmaxeax >= 0x80000005) 3509 return (X86_VENDOR_AMD); 3510 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3511 return (X86_VENDOR_Cyrix); 3512 else if (cpi->cpi_maxeax >= 2) 3513 return (X86_VENDOR_Intel); 3514 break; 3515 } 3516 return (-1); 3517 } 3518 3519 void 3520 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 3521 struct cpuid_info *cpi) 3522 { 3523 dev_info_t *cpu_devi; 3524 int create; 3525 3526 cpu_devi = (dev_info_t *)dip; 3527 3528 /* device_type */ 3529 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3530 "device_type", "cpu"); 3531 3532 /* reg */ 3533 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3534 "reg", cpu_id); 3535 3536 /* cpu-mhz, and clock-frequency */ 3537 if (cpu_freq > 0) { 3538 long long mul; 3539 3540 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3541 "cpu-mhz", cpu_freq); 3542 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3543 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3544 "clock-frequency", (int)mul); 3545 } 3546 3547 if ((x86_feature & X86_CPUID) == 0) { 3548 return; 3549 } 3550 3551 /* vendor-id */ 3552 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3553 "vendor-id", cpi->cpi_vendorstr); 3554 3555 if (cpi->cpi_maxeax == 0) { 3556 return; 3557 } 3558 3559 /* 3560 * family, model, and step 3561 */ 3562 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3563 "family", CPI_FAMILY(cpi)); 3564 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3565 "cpu-model", CPI_MODEL(cpi)); 3566 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3567 "stepping-id", CPI_STEP(cpi)); 3568 3569 /* type */ 3570 switch (cpi->cpi_vendor) { 3571 case X86_VENDOR_Intel: 3572 create = 1; 3573 break; 3574 default: 3575 create = 0; 3576 break; 3577 } 3578 if (create) 3579 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3580 "type", CPI_TYPE(cpi)); 3581 3582 /* ext-family */ 3583 switch (cpi->cpi_vendor) { 3584 case X86_VENDOR_Intel: 3585 case X86_VENDOR_AMD: 3586 create = cpi->cpi_family >= 0xf; 3587 break; 3588 default: 3589 create = 0; 3590 break; 3591 } 3592 if (create) 3593 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3594 "ext-family", CPI_FAMILY_XTD(cpi)); 3595 3596 /* ext-model */ 3597 switch (cpi->cpi_vendor) { 3598 case X86_VENDOR_Intel: 3599 create = IS_EXTENDED_MODEL_INTEL(cpi); 3600 break; 3601 case X86_VENDOR_AMD: 3602 create = CPI_FAMILY(cpi) == 0xf; 3603 break; 3604 default: 3605 create = 0; 3606 break; 3607 } 3608 if (create) 3609 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3610 "ext-model", CPI_MODEL_XTD(cpi)); 3611 3612 /* generation */ 3613 switch (cpi->cpi_vendor) { 3614 case X86_VENDOR_AMD: 3615 /* 3616 * AMD K5 model 1 was the first part to support this 3617 */ 3618 create = cpi->cpi_xmaxeax >= 0x80000001; 3619 break; 3620 default: 3621 create = 0; 3622 break; 3623 } 3624 if (create) 3625 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3626 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3627 3628 /* brand-id */ 3629 switch (cpi->cpi_vendor) { 3630 case X86_VENDOR_Intel: 3631 /* 3632 * brand id first appeared on Pentium III Xeon model 8, 3633 * and Celeron model 8 processors and Opteron 3634 */ 3635 create = cpi->cpi_family > 6 || 3636 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3637 break; 3638 case X86_VENDOR_AMD: 3639 create = cpi->cpi_family >= 0xf; 3640 break; 3641 default: 3642 create = 0; 3643 break; 3644 } 3645 if (create && cpi->cpi_brandid != 0) { 3646 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3647 "brand-id", cpi->cpi_brandid); 3648 } 3649 3650 /* chunks, and apic-id */ 3651 switch (cpi->cpi_vendor) { 3652 /* 3653 * first available on Pentium IV and Opteron (K8) 3654 */ 3655 case X86_VENDOR_Intel: 3656 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3657 break; 3658 case X86_VENDOR_AMD: 3659 create = cpi->cpi_family >= 0xf; 3660 break; 3661 default: 3662 create = 0; 3663 break; 3664 } 3665 if (create) { 3666 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3667 "chunks", CPI_CHUNKS(cpi)); 3668 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3669 "apic-id", cpi->cpi_apicid); 3670 if (cpi->cpi_chipid >= 0) { 3671 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3672 "chip#", cpi->cpi_chipid); 3673 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3674 "clog#", cpi->cpi_clogid); 3675 } 3676 } 3677 3678 /* cpuid-features */ 3679 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3680 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3681 3682 3683 /* cpuid-features-ecx */ 3684 switch (cpi->cpi_vendor) { 3685 case X86_VENDOR_Intel: 3686 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3687 break; 3688 default: 3689 create = 0; 3690 break; 3691 } 3692 if (create) 3693 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3694 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3695 3696 /* ext-cpuid-features */ 3697 switch (cpi->cpi_vendor) { 3698 case X86_VENDOR_Intel: 3699 case X86_VENDOR_AMD: 3700 case X86_VENDOR_Cyrix: 3701 case X86_VENDOR_TM: 3702 case X86_VENDOR_Centaur: 3703 create = cpi->cpi_xmaxeax >= 0x80000001; 3704 break; 3705 default: 3706 create = 0; 3707 break; 3708 } 3709 if (create) { 3710 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3711 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3712 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3713 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3714 } 3715 3716 /* 3717 * Brand String first appeared in Intel Pentium IV, AMD K5 3718 * model 1, and Cyrix GXm. On earlier models we try and 3719 * simulate something similar .. so this string should always 3720 * same -something- about the processor, however lame. 3721 */ 3722 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3723 "brand-string", cpi->cpi_brandstr); 3724 3725 /* 3726 * Finally, cache and tlb information 3727 */ 3728 switch (x86_which_cacheinfo(cpi)) { 3729 case X86_VENDOR_Intel: 3730 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3731 break; 3732 case X86_VENDOR_Cyrix: 3733 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3734 break; 3735 case X86_VENDOR_AMD: 3736 amd_cache_info(cpi, cpu_devi); 3737 break; 3738 default: 3739 break; 3740 } 3741 } 3742 3743 struct l2info { 3744 int *l2i_csz; 3745 int *l2i_lsz; 3746 int *l2i_assoc; 3747 int l2i_ret; 3748 }; 3749 3750 /* 3751 * A cacheinfo walker that fetches the size, line-size and associativity 3752 * of the L2 cache 3753 */ 3754 static int 3755 intel_l2cinfo(void *arg, const struct cachetab *ct) 3756 { 3757 struct l2info *l2i = arg; 3758 int *ip; 3759 3760 if (ct->ct_label != l2_cache_str && 3761 ct->ct_label != sl2_cache_str) 3762 return (0); /* not an L2 -- keep walking */ 3763 3764 if ((ip = l2i->l2i_csz) != NULL) 3765 *ip = ct->ct_size; 3766 if ((ip = l2i->l2i_lsz) != NULL) 3767 *ip = ct->ct_line_size; 3768 if ((ip = l2i->l2i_assoc) != NULL) 3769 *ip = ct->ct_assoc; 3770 l2i->l2i_ret = ct->ct_size; 3771 return (1); /* was an L2 -- terminate walk */ 3772 } 3773 3774 /* 3775 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3776 * 3777 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3778 * value is the associativity, the associativity for the L2 cache and 3779 * tlb is encoded in the following table. The 4 bit L2 value serves as 3780 * an index into the amd_afd[] array to determine the associativity. 3781 * -1 is undefined. 0 is fully associative. 3782 */ 3783 3784 static int amd_afd[] = 3785 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3786 3787 static void 3788 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3789 { 3790 struct cpuid_regs *cp; 3791 uint_t size, assoc; 3792 int i; 3793 int *ip; 3794 3795 if (cpi->cpi_xmaxeax < 0x80000006) 3796 return; 3797 cp = &cpi->cpi_extd[6]; 3798 3799 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3800 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3801 uint_t cachesz = size * 1024; 3802 assoc = amd_afd[i]; 3803 3804 ASSERT(assoc != -1); 3805 3806 if ((ip = l2i->l2i_csz) != NULL) 3807 *ip = cachesz; 3808 if ((ip = l2i->l2i_lsz) != NULL) 3809 *ip = BITX(cp->cp_ecx, 7, 0); 3810 if ((ip = l2i->l2i_assoc) != NULL) 3811 *ip = assoc; 3812 l2i->l2i_ret = cachesz; 3813 } 3814 } 3815 3816 int 3817 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3818 { 3819 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3820 struct l2info __l2info, *l2i = &__l2info; 3821 3822 l2i->l2i_csz = csz; 3823 l2i->l2i_lsz = lsz; 3824 l2i->l2i_assoc = assoc; 3825 l2i->l2i_ret = -1; 3826 3827 switch (x86_which_cacheinfo(cpi)) { 3828 case X86_VENDOR_Intel: 3829 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3830 break; 3831 case X86_VENDOR_Cyrix: 3832 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3833 break; 3834 case X86_VENDOR_AMD: 3835 amd_l2cacheinfo(cpi, l2i); 3836 break; 3837 default: 3838 break; 3839 } 3840 return (l2i->l2i_ret); 3841 } 3842 3843 #if !defined(__xpv) 3844 3845 uint32_t * 3846 cpuid_mwait_alloc(cpu_t *cpu) 3847 { 3848 uint32_t *ret; 3849 size_t mwait_size; 3850 3851 ASSERT(cpuid_checkpass(cpu, 2)); 3852 3853 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3854 if (mwait_size == 0) 3855 return (NULL); 3856 3857 /* 3858 * kmem_alloc() returns cache line size aligned data for mwait_size 3859 * allocations. mwait_size is currently cache line sized. Neither 3860 * of these implementation details are guarantied to be true in the 3861 * future. 3862 * 3863 * First try allocating mwait_size as kmem_alloc() currently returns 3864 * correctly aligned memory. If kmem_alloc() does not return 3865 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3866 * 3867 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3868 * decide to free this memory. 3869 */ 3870 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3871 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3872 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3873 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3874 *ret = MWAIT_RUNNING; 3875 return (ret); 3876 } else { 3877 kmem_free(ret, mwait_size); 3878 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3879 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3880 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3881 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3882 *ret = MWAIT_RUNNING; 3883 return (ret); 3884 } 3885 } 3886 3887 void 3888 cpuid_mwait_free(cpu_t *cpu) 3889 { 3890 ASSERT(cpuid_checkpass(cpu, 2)); 3891 3892 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3893 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3894 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3895 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3896 } 3897 3898 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3899 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3900 } 3901 3902 void 3903 patch_tsc_read(int flag) 3904 { 3905 size_t cnt; 3906 3907 switch (flag) { 3908 case X86_NO_TSC: 3909 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 3910 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 3911 break; 3912 case X86_HAVE_TSCP: 3913 cnt = &_tscp_end - &_tscp_start; 3914 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 3915 break; 3916 case X86_TSC_MFENCE: 3917 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 3918 (void) memcpy((void *)tsc_read, 3919 (void *)&_tsc_mfence_start, cnt); 3920 break; 3921 case X86_TSC_LFENCE: 3922 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 3923 (void) memcpy((void *)tsc_read, 3924 (void *)&_tsc_lfence_start, cnt); 3925 break; 3926 default: 3927 break; 3928 } 3929 } 3930 3931 int 3932 cpuid_deep_cstates_supported(void) 3933 { 3934 struct cpuid_info *cpi; 3935 struct cpuid_regs regs; 3936 3937 ASSERT(cpuid_checkpass(CPU, 1)); 3938 3939 cpi = CPU->cpu_m.mcpu_cpi; 3940 3941 if (!(x86_feature & X86_CPUID)) 3942 return (0); 3943 3944 switch (cpi->cpi_vendor) { 3945 case X86_VENDOR_Intel: 3946 if (cpi->cpi_xmaxeax < 0x80000007) 3947 return (0); 3948 3949 /* 3950 * TSC run at a constant rate in all ACPI C-states? 3951 */ 3952 regs.cp_eax = 0x80000007; 3953 (void) __cpuid_insn(®s); 3954 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 3955 3956 default: 3957 return (0); 3958 } 3959 } 3960 3961 #endif /* !__xpv */ 3962 3963 void 3964 post_startup_cpu_fixups(void) 3965 { 3966 #ifndef __xpv 3967 /* 3968 * Some AMD processors support C1E state. Entering this state will 3969 * cause the local APIC timer to stop, which we can't deal with at 3970 * this time. 3971 */ 3972 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 3973 on_trap_data_t otd; 3974 uint64_t reg; 3975 3976 if (!on_trap(&otd, OT_DATA_ACCESS)) { 3977 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 3978 /* Disable C1E state if it is enabled by BIOS */ 3979 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 3980 AMD_ACTONCMPHALT_MASK) { 3981 reg &= ~(AMD_ACTONCMPHALT_MASK << 3982 AMD_ACTONCMPHALT_SHIFT); 3983 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 3984 } 3985 } 3986 no_trap(); 3987 } 3988 #endif /* !__xpv */ 3989 } 3990 3991 /* 3992 * Starting with the Westmere processor the local 3993 * APIC timer will continue running in all C-states, 3994 * including the deepest C-states. 3995 */ 3996 int 3997 cpuid_arat_supported(void) 3998 { 3999 struct cpuid_info *cpi; 4000 struct cpuid_regs regs; 4001 4002 ASSERT(cpuid_checkpass(CPU, 1)); 4003 ASSERT(x86_feature & X86_CPUID); 4004 4005 cpi = CPU->cpu_m.mcpu_cpi; 4006 4007 switch (cpi->cpi_vendor) { 4008 case X86_VENDOR_Intel: 4009 /* 4010 * Always-running Local APIC Timer is 4011 * indicated by CPUID.6.EAX[2]. 4012 */ 4013 if (cpi->cpi_maxeax >= 6) { 4014 regs.cp_eax = 6; 4015 (void) cpuid_insn(NULL, ®s); 4016 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4017 } else { 4018 return (0); 4019 } 4020 default: 4021 return (0); 4022 } 4023 } 4024 4025 #if defined(__amd64) && !defined(__xpv) 4026 /* 4027 * Patch in versions of bcopy for high performance Intel Nhm processors 4028 * and later... 4029 */ 4030 void 4031 patch_memops(uint_t vendor) 4032 { 4033 size_t cnt, i; 4034 caddr_t to, from; 4035 4036 if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) { 4037 cnt = &bcopy_patch_end - &bcopy_patch_start; 4038 to = &bcopy_ck_size; 4039 from = &bcopy_patch_start; 4040 for (i = 0; i < cnt; i++) { 4041 *to++ = *from++; 4042 } 4043 } 4044 } 4045 #endif /* __amd64 && !__xpv */ 4046