1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 /* 31 * Various routines to handle identification 32 * and classification of x86 processors. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/archsystm.h> 37 #include <sys/x86_archext.h> 38 #include <sys/kmem.h> 39 #include <sys/systm.h> 40 #include <sys/cmn_err.h> 41 #include <sys/sunddi.h> 42 #include <sys/sunndi.h> 43 #include <sys/cpuvar.h> 44 #include <sys/processor.h> 45 #include <sys/sysmacros.h> 46 #include <sys/pg.h> 47 #include <sys/fp.h> 48 #include <sys/controlregs.h> 49 #include <sys/auxv_386.h> 50 #include <sys/bitmap.h> 51 #include <sys/memnode.h> 52 53 #ifdef __xpv 54 #include <sys/hypervisor.h> 55 #else 56 #include <sys/ontrap.h> 57 #endif 58 59 /* 60 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 61 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 62 * them accordingly. For most modern processors, feature detection occurs here 63 * in pass 1. 64 * 65 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 66 * for the boot CPU and does the basic analysis that the early kernel needs. 67 * x86_feature is set based on the return value of cpuid_pass1() of the boot 68 * CPU. 69 * 70 * Pass 1 includes: 71 * 72 * o Determining vendor/model/family/stepping and setting x86_type and 73 * x86_vendor accordingly. 74 * o Processing the feature flags returned by the cpuid instruction while 75 * applying any workarounds or tricks for the specific processor. 76 * o Mapping the feature flags into Solaris feature bits (X86_*). 77 * o Processing extended feature flags if supported by the processor, 78 * again while applying specific processor knowledge. 79 * o Determining the CMT characteristics of the system. 80 * 81 * Pass 1 is done on non-boot CPUs during their initialization and the results 82 * are used only as a meager attempt at ensuring that all processors within the 83 * system support the same features. 84 * 85 * Pass 2 of cpuid feature analysis happens just at the beginning 86 * of startup(). It just copies in and corrects the remainder 87 * of the cpuid data we depend on: standard cpuid functions that we didn't 88 * need for pass1 feature analysis, and extended cpuid functions beyond the 89 * simple feature processing done in pass1. 90 * 91 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 92 * particular kernel memory allocation has been made available. It creates a 93 * readable brand string based on the data collected in the first two passes. 94 * 95 * Pass 4 of cpuid analysis is invoked after post_startup() when all 96 * the support infrastructure for various hardware features has been 97 * initialized. It determines which processor features will be reported 98 * to userland via the aux vector. 99 * 100 * All passes are executed on all CPUs, but only the boot CPU determines what 101 * features the kernel will use. 102 * 103 * Much of the worst junk in this file is for the support of processors 104 * that didn't really implement the cpuid instruction properly. 105 * 106 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 107 * the pass numbers. Accordingly, changes to the pass code may require changes 108 * to the accessor code. 109 */ 110 111 uint_t x86_feature = 0; 112 uint_t x86_vendor = X86_VENDOR_IntelClone; 113 uint_t x86_type = X86_TYPE_OTHER; 114 uint_t x86_clflush_size = 0; 115 116 uint_t pentiumpro_bug4046376; 117 uint_t pentiumpro_bug4064495; 118 119 uint_t enable486; 120 /* 121 * This is set to platform type Solaris is running on. 122 */ 123 static int platform_type = HW_NATIVE; 124 125 /* 126 * monitor/mwait info. 127 * 128 * size_actual and buf_actual are the real address and size allocated to get 129 * proper mwait_buf alignement. buf_actual and size_actual should be passed 130 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 131 * processor cache-line alignment, but this is not guarantied in the furture. 132 */ 133 struct mwait_info { 134 size_t mon_min; /* min size to avoid missed wakeups */ 135 size_t mon_max; /* size to avoid false wakeups */ 136 size_t size_actual; /* size actually allocated */ 137 void *buf_actual; /* memory actually allocated */ 138 uint32_t support; /* processor support of monitor/mwait */ 139 }; 140 141 /* 142 * These constants determine how many of the elements of the 143 * cpuid we cache in the cpuid_info data structure; the 144 * remaining elements are accessible via the cpuid instruction. 145 */ 146 147 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 148 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 149 150 struct cpuid_info { 151 uint_t cpi_pass; /* last pass completed */ 152 /* 153 * standard function information 154 */ 155 uint_t cpi_maxeax; /* fn 0: %eax */ 156 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 157 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 158 159 uint_t cpi_family; /* fn 1: extended family */ 160 uint_t cpi_model; /* fn 1: extended model */ 161 uint_t cpi_step; /* fn 1: stepping */ 162 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 163 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 164 int cpi_clogid; /* fn 1: %ebx: thread # */ 165 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 166 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 167 uint_t cpi_ncache; /* fn 2: number of elements */ 168 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 169 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 170 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 171 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 172 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 173 /* 174 * extended function information 175 */ 176 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 177 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 178 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 179 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 180 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 181 id_t cpi_coreid; /* same coreid => strands share core */ 182 int cpi_pkgcoreid; /* core number within single package */ 183 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 184 /* Intel: fn 4: %eax[31-26] */ 185 /* 186 * supported feature information 187 */ 188 uint32_t cpi_support[5]; 189 #define STD_EDX_FEATURES 0 190 #define AMD_EDX_FEATURES 1 191 #define TM_EDX_FEATURES 2 192 #define STD_ECX_FEATURES 3 193 #define AMD_ECX_FEATURES 4 194 /* 195 * Synthesized information, where known. 196 */ 197 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 198 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 199 uint32_t cpi_socket; /* Chip package/socket type */ 200 201 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 202 uint32_t cpi_apicid; 203 }; 204 205 206 static struct cpuid_info cpuid_info0; 207 208 /* 209 * These bit fields are defined by the Intel Application Note AP-485 210 * "Intel Processor Identification and the CPUID Instruction" 211 */ 212 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 213 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 214 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 215 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 216 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 217 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 218 219 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 220 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 221 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 222 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 223 224 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 225 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 226 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 227 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 228 229 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 230 #define CPI_XMAXEAX_MAX 0x80000100 231 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 232 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 233 234 /* 235 * Function 4 (Deterministic Cache Parameters) macros 236 * Defined by Intel Application Note AP-485 237 */ 238 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 239 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 240 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 241 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 242 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 243 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 244 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 245 246 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 247 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 248 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 249 250 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 251 252 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 253 254 255 /* 256 * A couple of shorthand macros to identify "later" P6-family chips 257 * like the Pentium M and Core. First, the "older" P6-based stuff 258 * (loosely defined as "pre-Pentium-4"): 259 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 260 */ 261 262 #define IS_LEGACY_P6(cpi) ( \ 263 cpi->cpi_family == 6 && \ 264 (cpi->cpi_model == 1 || \ 265 cpi->cpi_model == 3 || \ 266 cpi->cpi_model == 5 || \ 267 cpi->cpi_model == 6 || \ 268 cpi->cpi_model == 7 || \ 269 cpi->cpi_model == 8 || \ 270 cpi->cpi_model == 0xA || \ 271 cpi->cpi_model == 0xB) \ 272 ) 273 274 /* A "new F6" is everything with family 6 that's not the above */ 275 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 276 277 /* Extended family/model support */ 278 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 279 cpi->cpi_family >= 0xf) 280 281 /* 282 * Info for monitor/mwait idle loop. 283 * 284 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 285 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 286 * 2006. 287 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 288 * Documentation Updates" #33633, Rev 2.05, December 2006. 289 */ 290 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 291 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 292 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 293 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 294 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 295 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 296 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 297 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 298 /* 299 * Number of sub-cstates for a given c-state. 300 */ 301 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 302 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 303 304 /* 305 * Functions we consune from cpuid_subr.c; don't publish these in a header 306 * file to try and keep people using the expected cpuid_* interfaces. 307 */ 308 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 309 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 310 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 311 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 312 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 313 314 /* 315 * Apply up various platform-dependent restrictions where the 316 * underlying platform restrictions mean the CPU can be marked 317 * as less capable than its cpuid instruction would imply. 318 */ 319 #if defined(__xpv) 320 static void 321 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 322 { 323 switch (eax) { 324 case 1: { 325 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 326 0 : CPUID_INTC_EDX_MCA; 327 cp->cp_edx &= 328 ~(mcamask | 329 CPUID_INTC_EDX_PSE | 330 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 331 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 332 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 333 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 334 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 335 break; 336 } 337 338 case 0x80000001: 339 cp->cp_edx &= 340 ~(CPUID_AMD_EDX_PSE | 341 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 342 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 343 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 344 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 345 CPUID_AMD_EDX_TSCP); 346 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 347 break; 348 default: 349 break; 350 } 351 352 switch (vendor) { 353 case X86_VENDOR_Intel: 354 switch (eax) { 355 case 4: 356 /* 357 * Zero out the (ncores-per-chip - 1) field 358 */ 359 cp->cp_eax &= 0x03fffffff; 360 break; 361 default: 362 break; 363 } 364 break; 365 case X86_VENDOR_AMD: 366 switch (eax) { 367 368 case 0x80000001: 369 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 370 break; 371 372 case 0x80000008: 373 /* 374 * Zero out the (ncores-per-chip - 1) field 375 */ 376 cp->cp_ecx &= 0xffffff00; 377 break; 378 default: 379 break; 380 } 381 break; 382 default: 383 break; 384 } 385 } 386 #else 387 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 388 #endif 389 390 /* 391 * Some undocumented ways of patching the results of the cpuid 392 * instruction to permit running Solaris 10 on future cpus that 393 * we don't currently support. Could be set to non-zero values 394 * via settings in eeprom. 395 */ 396 397 uint32_t cpuid_feature_ecx_include; 398 uint32_t cpuid_feature_ecx_exclude; 399 uint32_t cpuid_feature_edx_include; 400 uint32_t cpuid_feature_edx_exclude; 401 402 void 403 cpuid_alloc_space(cpu_t *cpu) 404 { 405 /* 406 * By convention, cpu0 is the boot cpu, which is set up 407 * before memory allocation is available. All other cpus get 408 * their cpuid_info struct allocated here. 409 */ 410 ASSERT(cpu->cpu_id != 0); 411 cpu->cpu_m.mcpu_cpi = 412 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 413 } 414 415 void 416 cpuid_free_space(cpu_t *cpu) 417 { 418 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 419 int i; 420 421 ASSERT(cpu->cpu_id != 0); 422 423 /* 424 * Free up any function 4 related dynamic storage 425 */ 426 for (i = 1; i < cpi->cpi_std_4_size; i++) 427 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 428 if (cpi->cpi_std_4_size > 0) 429 kmem_free(cpi->cpi_std_4, 430 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 431 432 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 433 } 434 435 #if !defined(__xpv) 436 437 static void 438 determine_platform() 439 { 440 struct cpuid_regs cp; 441 char *xen_str; 442 uint32_t xen_signature[4]; 443 444 /* 445 * In a fully virtualized domain, Xen's pseudo-cpuid function 446 * 0x40000000 returns a string representing the Xen signature in 447 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 448 * function. 449 */ 450 cp.cp_eax = 0x40000000; 451 (void) __cpuid_insn(&cp); 452 xen_signature[0] = cp.cp_ebx; 453 xen_signature[1] = cp.cp_ecx; 454 xen_signature[2] = cp.cp_edx; 455 xen_signature[3] = 0; 456 xen_str = (char *)xen_signature; 457 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) { 458 platform_type = HW_XEN_HVM; 459 } else if (vmware_platform()) { /* running under vmware hypervisor? */ 460 platform_type = HW_VMWARE; 461 } 462 } 463 464 int 465 get_hwenv(void) 466 { 467 return (platform_type); 468 } 469 470 int 471 is_controldom(void) 472 { 473 return (0); 474 } 475 476 #else 477 478 int 479 get_hwenv(void) 480 { 481 return (HW_XEN_PV); 482 } 483 484 int 485 is_controldom(void) 486 { 487 return (DOMAIN_IS_INITDOMAIN(xen_info)); 488 } 489 490 #endif /* __xpv */ 491 492 uint_t 493 cpuid_pass1(cpu_t *cpu) 494 { 495 uint32_t mask_ecx, mask_edx; 496 uint_t feature = X86_CPUID; 497 struct cpuid_info *cpi; 498 struct cpuid_regs *cp; 499 int xcpuid; 500 #if !defined(__xpv) 501 extern int idle_cpu_prefer_mwait; 502 #endif 503 504 505 #if !defined(__xpv) 506 determine_platform(); 507 #endif 508 /* 509 * Space statically allocated for cpu0, ensure pointer is set 510 */ 511 if (cpu->cpu_id == 0) 512 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 513 cpi = cpu->cpu_m.mcpu_cpi; 514 ASSERT(cpi != NULL); 515 cp = &cpi->cpi_std[0]; 516 cp->cp_eax = 0; 517 cpi->cpi_maxeax = __cpuid_insn(cp); 518 { 519 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 520 *iptr++ = cp->cp_ebx; 521 *iptr++ = cp->cp_edx; 522 *iptr++ = cp->cp_ecx; 523 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 524 } 525 526 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 527 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 528 529 /* 530 * Limit the range in case of weird hardware 531 */ 532 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 533 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 534 if (cpi->cpi_maxeax < 1) 535 goto pass1_done; 536 537 cp = &cpi->cpi_std[1]; 538 cp->cp_eax = 1; 539 (void) __cpuid_insn(cp); 540 541 /* 542 * Extract identifying constants for easy access. 543 */ 544 cpi->cpi_model = CPI_MODEL(cpi); 545 cpi->cpi_family = CPI_FAMILY(cpi); 546 547 if (cpi->cpi_family == 0xf) 548 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 549 550 /* 551 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 552 * Intel, and presumably everyone else, uses model == 0xf, as 553 * one would expect (max value means possible overflow). Sigh. 554 */ 555 556 switch (cpi->cpi_vendor) { 557 case X86_VENDOR_Intel: 558 if (IS_EXTENDED_MODEL_INTEL(cpi)) 559 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 560 break; 561 case X86_VENDOR_AMD: 562 if (CPI_FAMILY(cpi) == 0xf) 563 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 564 break; 565 default: 566 if (cpi->cpi_model == 0xf) 567 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 568 break; 569 } 570 571 cpi->cpi_step = CPI_STEP(cpi); 572 cpi->cpi_brandid = CPI_BRANDID(cpi); 573 574 /* 575 * *default* assumptions: 576 * - believe %edx feature word 577 * - ignore %ecx feature word 578 * - 32-bit virtual and physical addressing 579 */ 580 mask_edx = 0xffffffff; 581 mask_ecx = 0; 582 583 cpi->cpi_pabits = cpi->cpi_vabits = 32; 584 585 switch (cpi->cpi_vendor) { 586 case X86_VENDOR_Intel: 587 if (cpi->cpi_family == 5) 588 x86_type = X86_TYPE_P5; 589 else if (IS_LEGACY_P6(cpi)) { 590 x86_type = X86_TYPE_P6; 591 pentiumpro_bug4046376 = 1; 592 pentiumpro_bug4064495 = 1; 593 /* 594 * Clear the SEP bit when it was set erroneously 595 */ 596 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 597 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 598 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 599 x86_type = X86_TYPE_P4; 600 /* 601 * We don't currently depend on any of the %ecx 602 * features until Prescott, so we'll only check 603 * this from P4 onwards. We might want to revisit 604 * that idea later. 605 */ 606 mask_ecx = 0xffffffff; 607 } else if (cpi->cpi_family > 0xf) 608 mask_ecx = 0xffffffff; 609 /* 610 * We don't support MONITOR/MWAIT if leaf 5 is not available 611 * to obtain the monitor linesize. 612 */ 613 if (cpi->cpi_maxeax < 5) 614 mask_ecx &= ~CPUID_INTC_ECX_MON; 615 break; 616 case X86_VENDOR_IntelClone: 617 default: 618 break; 619 case X86_VENDOR_AMD: 620 #if defined(OPTERON_ERRATUM_108) 621 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 622 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 623 cpi->cpi_model = 0xc; 624 } else 625 #endif 626 if (cpi->cpi_family == 5) { 627 /* 628 * AMD K5 and K6 629 * 630 * These CPUs have an incomplete implementation 631 * of MCA/MCE which we mask away. 632 */ 633 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 634 635 /* 636 * Model 0 uses the wrong (APIC) bit 637 * to indicate PGE. Fix it here. 638 */ 639 if (cpi->cpi_model == 0) { 640 if (cp->cp_edx & 0x200) { 641 cp->cp_edx &= ~0x200; 642 cp->cp_edx |= CPUID_INTC_EDX_PGE; 643 } 644 } 645 646 /* 647 * Early models had problems w/ MMX; disable. 648 */ 649 if (cpi->cpi_model < 6) 650 mask_edx &= ~CPUID_INTC_EDX_MMX; 651 } 652 653 /* 654 * For newer families, SSE3 and CX16, at least, are valid; 655 * enable all 656 */ 657 if (cpi->cpi_family >= 0xf) 658 mask_ecx = 0xffffffff; 659 /* 660 * We don't support MONITOR/MWAIT if leaf 5 is not available 661 * to obtain the monitor linesize. 662 */ 663 if (cpi->cpi_maxeax < 5) 664 mask_ecx &= ~CPUID_INTC_ECX_MON; 665 666 #if !defined(__xpv) 667 /* 668 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 669 * processors. AMD does not intend MWAIT to be used in the cpu 670 * idle loop on current and future processors. 10h and future 671 * AMD processors use more power in MWAIT than HLT. 672 * Pre-family-10h Opterons do not have the MWAIT instruction. 673 */ 674 idle_cpu_prefer_mwait = 0; 675 #endif 676 677 break; 678 case X86_VENDOR_TM: 679 /* 680 * workaround the NT workaround in CMS 4.1 681 */ 682 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 683 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 684 cp->cp_edx |= CPUID_INTC_EDX_CX8; 685 break; 686 case X86_VENDOR_Centaur: 687 /* 688 * workaround the NT workarounds again 689 */ 690 if (cpi->cpi_family == 6) 691 cp->cp_edx |= CPUID_INTC_EDX_CX8; 692 break; 693 case X86_VENDOR_Cyrix: 694 /* 695 * We rely heavily on the probing in locore 696 * to actually figure out what parts, if any, 697 * of the Cyrix cpuid instruction to believe. 698 */ 699 switch (x86_type) { 700 case X86_TYPE_CYRIX_486: 701 mask_edx = 0; 702 break; 703 case X86_TYPE_CYRIX_6x86: 704 mask_edx = 0; 705 break; 706 case X86_TYPE_CYRIX_6x86L: 707 mask_edx = 708 CPUID_INTC_EDX_DE | 709 CPUID_INTC_EDX_CX8; 710 break; 711 case X86_TYPE_CYRIX_6x86MX: 712 mask_edx = 713 CPUID_INTC_EDX_DE | 714 CPUID_INTC_EDX_MSR | 715 CPUID_INTC_EDX_CX8 | 716 CPUID_INTC_EDX_PGE | 717 CPUID_INTC_EDX_CMOV | 718 CPUID_INTC_EDX_MMX; 719 break; 720 case X86_TYPE_CYRIX_GXm: 721 mask_edx = 722 CPUID_INTC_EDX_MSR | 723 CPUID_INTC_EDX_CX8 | 724 CPUID_INTC_EDX_CMOV | 725 CPUID_INTC_EDX_MMX; 726 break; 727 case X86_TYPE_CYRIX_MediaGX: 728 break; 729 case X86_TYPE_CYRIX_MII: 730 case X86_TYPE_VIA_CYRIX_III: 731 mask_edx = 732 CPUID_INTC_EDX_DE | 733 CPUID_INTC_EDX_TSC | 734 CPUID_INTC_EDX_MSR | 735 CPUID_INTC_EDX_CX8 | 736 CPUID_INTC_EDX_PGE | 737 CPUID_INTC_EDX_CMOV | 738 CPUID_INTC_EDX_MMX; 739 break; 740 default: 741 break; 742 } 743 break; 744 } 745 746 #if defined(__xpv) 747 /* 748 * Do not support MONITOR/MWAIT under a hypervisor 749 */ 750 mask_ecx &= ~CPUID_INTC_ECX_MON; 751 #endif /* __xpv */ 752 753 /* 754 * Now we've figured out the masks that determine 755 * which bits we choose to believe, apply the masks 756 * to the feature words, then map the kernel's view 757 * of these feature words into its feature word. 758 */ 759 cp->cp_edx &= mask_edx; 760 cp->cp_ecx &= mask_ecx; 761 762 /* 763 * apply any platform restrictions (we don't call this 764 * immediately after __cpuid_insn here, because we need the 765 * workarounds applied above first) 766 */ 767 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 768 769 /* 770 * fold in overrides from the "eeprom" mechanism 771 */ 772 cp->cp_edx |= cpuid_feature_edx_include; 773 cp->cp_edx &= ~cpuid_feature_edx_exclude; 774 775 cp->cp_ecx |= cpuid_feature_ecx_include; 776 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 777 778 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 779 feature |= X86_LARGEPAGE; 780 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 781 feature |= X86_TSC; 782 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 783 feature |= X86_MSR; 784 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 785 feature |= X86_MTRR; 786 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 787 feature |= X86_PGE; 788 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 789 feature |= X86_CMOV; 790 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 791 feature |= X86_MMX; 792 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 793 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 794 feature |= X86_MCA; 795 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 796 feature |= X86_PAE; 797 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 798 feature |= X86_CX8; 799 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 800 feature |= X86_CX16; 801 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 802 feature |= X86_PAT; 803 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 804 feature |= X86_SEP; 805 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 806 /* 807 * In our implementation, fxsave/fxrstor 808 * are prerequisites before we'll even 809 * try and do SSE things. 810 */ 811 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 812 feature |= X86_SSE; 813 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 814 feature |= X86_SSE2; 815 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 816 feature |= X86_SSE3; 817 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 818 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 819 feature |= X86_SSSE3; 820 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 821 feature |= X86_SSE4_1; 822 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 823 feature |= X86_SSE4_2; 824 if (cp->cp_ecx & CPUID_INTC_ECX_AES) 825 feature |= X86_AES; 826 } 827 } 828 if (cp->cp_edx & CPUID_INTC_EDX_DE) 829 feature |= X86_DE; 830 #if !defined(__xpv) 831 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 832 833 /* 834 * We require the CLFLUSH instruction for erratum workaround 835 * to use MONITOR/MWAIT. 836 */ 837 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 838 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 839 feature |= X86_MWAIT; 840 } else { 841 extern int idle_cpu_assert_cflush_monitor; 842 843 /* 844 * All processors we are aware of which have 845 * MONITOR/MWAIT also have CLFLUSH. 846 */ 847 if (idle_cpu_assert_cflush_monitor) { 848 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 849 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 850 } 851 } 852 } 853 #endif /* __xpv */ 854 855 /* 856 * Only need it first time, rest of the cpus would follow suite. 857 * we only capture this for the bootcpu. 858 */ 859 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 860 feature |= X86_CLFSH; 861 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 862 } 863 864 if (feature & X86_PAE) 865 cpi->cpi_pabits = 36; 866 867 /* 868 * Hyperthreading configuration is slightly tricky on Intel 869 * and pure clones, and even trickier on AMD. 870 * 871 * (AMD chose to set the HTT bit on their CMP processors, 872 * even though they're not actually hyperthreaded. Thus it 873 * takes a bit more work to figure out what's really going 874 * on ... see the handling of the CMP_LGCY bit below) 875 */ 876 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 877 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 878 if (cpi->cpi_ncpu_per_chip > 1) 879 feature |= X86_HTT; 880 } else { 881 cpi->cpi_ncpu_per_chip = 1; 882 } 883 884 /* 885 * Work on the "extended" feature information, doing 886 * some basic initialization for cpuid_pass2() 887 */ 888 xcpuid = 0; 889 switch (cpi->cpi_vendor) { 890 case X86_VENDOR_Intel: 891 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 892 xcpuid++; 893 break; 894 case X86_VENDOR_AMD: 895 if (cpi->cpi_family > 5 || 896 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 897 xcpuid++; 898 break; 899 case X86_VENDOR_Cyrix: 900 /* 901 * Only these Cyrix CPUs are -known- to support 902 * extended cpuid operations. 903 */ 904 if (x86_type == X86_TYPE_VIA_CYRIX_III || 905 x86_type == X86_TYPE_CYRIX_GXm) 906 xcpuid++; 907 break; 908 case X86_VENDOR_Centaur: 909 case X86_VENDOR_TM: 910 default: 911 xcpuid++; 912 break; 913 } 914 915 if (xcpuid) { 916 cp = &cpi->cpi_extd[0]; 917 cp->cp_eax = 0x80000000; 918 cpi->cpi_xmaxeax = __cpuid_insn(cp); 919 } 920 921 if (cpi->cpi_xmaxeax & 0x80000000) { 922 923 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 924 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 925 926 switch (cpi->cpi_vendor) { 927 case X86_VENDOR_Intel: 928 case X86_VENDOR_AMD: 929 if (cpi->cpi_xmaxeax < 0x80000001) 930 break; 931 cp = &cpi->cpi_extd[1]; 932 cp->cp_eax = 0x80000001; 933 (void) __cpuid_insn(cp); 934 935 if (cpi->cpi_vendor == X86_VENDOR_AMD && 936 cpi->cpi_family == 5 && 937 cpi->cpi_model == 6 && 938 cpi->cpi_step == 6) { 939 /* 940 * K6 model 6 uses bit 10 to indicate SYSC 941 * Later models use bit 11. Fix it here. 942 */ 943 if (cp->cp_edx & 0x400) { 944 cp->cp_edx &= ~0x400; 945 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 946 } 947 } 948 949 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 950 951 /* 952 * Compute the additions to the kernel's feature word. 953 */ 954 if (cp->cp_edx & CPUID_AMD_EDX_NX) 955 feature |= X86_NX; 956 957 /* 958 * Regardless whether or not we boot 64-bit, 959 * we should have a way to identify whether 960 * the CPU is capable of running 64-bit. 961 */ 962 if (cp->cp_edx & CPUID_AMD_EDX_LM) 963 feature |= X86_64; 964 965 #if defined(__amd64) 966 /* 1 GB large page - enable only for 64 bit kernel */ 967 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 968 feature |= X86_1GPG; 969 #endif 970 971 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 972 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 973 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 974 feature |= X86_SSE4A; 975 976 /* 977 * If both the HTT and CMP_LGCY bits are set, 978 * then we're not actually HyperThreaded. Read 979 * "AMD CPUID Specification" for more details. 980 */ 981 if (cpi->cpi_vendor == X86_VENDOR_AMD && 982 (feature & X86_HTT) && 983 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 984 feature &= ~X86_HTT; 985 feature |= X86_CMP; 986 } 987 #if defined(__amd64) 988 /* 989 * It's really tricky to support syscall/sysret in 990 * the i386 kernel; we rely on sysenter/sysexit 991 * instead. In the amd64 kernel, things are -way- 992 * better. 993 */ 994 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 995 feature |= X86_ASYSC; 996 997 /* 998 * While we're thinking about system calls, note 999 * that AMD processors don't support sysenter 1000 * in long mode at all, so don't try to program them. 1001 */ 1002 if (x86_vendor == X86_VENDOR_AMD) 1003 feature &= ~X86_SEP; 1004 #endif 1005 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1006 feature |= X86_TSCP; 1007 break; 1008 default: 1009 break; 1010 } 1011 1012 /* 1013 * Get CPUID data about processor cores and hyperthreads. 1014 */ 1015 switch (cpi->cpi_vendor) { 1016 case X86_VENDOR_Intel: 1017 if (cpi->cpi_maxeax >= 4) { 1018 cp = &cpi->cpi_std[4]; 1019 cp->cp_eax = 4; 1020 cp->cp_ecx = 0; 1021 (void) __cpuid_insn(cp); 1022 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1023 } 1024 /*FALLTHROUGH*/ 1025 case X86_VENDOR_AMD: 1026 if (cpi->cpi_xmaxeax < 0x80000008) 1027 break; 1028 cp = &cpi->cpi_extd[8]; 1029 cp->cp_eax = 0x80000008; 1030 (void) __cpuid_insn(cp); 1031 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1032 1033 /* 1034 * Virtual and physical address limits from 1035 * cpuid override previously guessed values. 1036 */ 1037 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1038 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1039 break; 1040 default: 1041 break; 1042 } 1043 1044 /* 1045 * Derive the number of cores per chip 1046 */ 1047 switch (cpi->cpi_vendor) { 1048 case X86_VENDOR_Intel: 1049 if (cpi->cpi_maxeax < 4) { 1050 cpi->cpi_ncore_per_chip = 1; 1051 break; 1052 } else { 1053 cpi->cpi_ncore_per_chip = 1054 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1055 } 1056 break; 1057 case X86_VENDOR_AMD: 1058 if (cpi->cpi_xmaxeax < 0x80000008) { 1059 cpi->cpi_ncore_per_chip = 1; 1060 break; 1061 } else { 1062 /* 1063 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1064 * 1 less than the number of physical cores on 1065 * the chip. In family 0x10 this value can 1066 * be affected by "downcoring" - it reflects 1067 * 1 less than the number of cores actually 1068 * enabled on this node. 1069 */ 1070 cpi->cpi_ncore_per_chip = 1071 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1072 } 1073 break; 1074 default: 1075 cpi->cpi_ncore_per_chip = 1; 1076 break; 1077 } 1078 1079 /* 1080 * Get CPUID data about TSC Invariance in Deep C-State. 1081 */ 1082 switch (cpi->cpi_vendor) { 1083 case X86_VENDOR_Intel: 1084 if (cpi->cpi_maxeax >= 7) { 1085 cp = &cpi->cpi_extd[7]; 1086 cp->cp_eax = 0x80000007; 1087 cp->cp_ecx = 0; 1088 (void) __cpuid_insn(cp); 1089 } 1090 break; 1091 default: 1092 break; 1093 } 1094 } else { 1095 cpi->cpi_ncore_per_chip = 1; 1096 } 1097 1098 /* 1099 * If more than one core, then this processor is CMP. 1100 */ 1101 if (cpi->cpi_ncore_per_chip > 1) 1102 feature |= X86_CMP; 1103 1104 /* 1105 * If the number of cores is the same as the number 1106 * of CPUs, then we cannot have HyperThreading. 1107 */ 1108 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1109 feature &= ~X86_HTT; 1110 1111 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1112 /* 1113 * Single-core single-threaded processors. 1114 */ 1115 cpi->cpi_chipid = -1; 1116 cpi->cpi_clogid = 0; 1117 cpi->cpi_coreid = cpu->cpu_id; 1118 cpi->cpi_pkgcoreid = 0; 1119 } else if (cpi->cpi_ncpu_per_chip > 1) { 1120 uint_t i; 1121 uint_t chipid_shift = 0; 1122 uint_t coreid_shift = 0; 1123 uint_t apic_id = CPI_APIC_ID(cpi); 1124 1125 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1126 chipid_shift++; 1127 cpi->cpi_chipid = apic_id >> chipid_shift; 1128 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1129 1130 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1131 if (feature & X86_CMP) { 1132 /* 1133 * Multi-core (and possibly multi-threaded) 1134 * processors. 1135 */ 1136 uint_t ncpu_per_core; 1137 if (cpi->cpi_ncore_per_chip == 1) 1138 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1139 else if (cpi->cpi_ncore_per_chip > 1) 1140 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1141 cpi->cpi_ncore_per_chip; 1142 /* 1143 * 8bit APIC IDs on dual core Pentiums 1144 * look like this: 1145 * 1146 * +-----------------------+------+------+ 1147 * | Physical Package ID | MC | HT | 1148 * +-----------------------+------+------+ 1149 * <------- chipid --------> 1150 * <------- coreid ---------------> 1151 * <--- clogid --> 1152 * <------> 1153 * pkgcoreid 1154 * 1155 * Where the number of bits necessary to 1156 * represent MC and HT fields together equals 1157 * to the minimum number of bits necessary to 1158 * store the value of cpi->cpi_ncpu_per_chip. 1159 * Of those bits, the MC part uses the number 1160 * of bits necessary to store the value of 1161 * cpi->cpi_ncore_per_chip. 1162 */ 1163 for (i = 1; i < ncpu_per_core; i <<= 1) 1164 coreid_shift++; 1165 cpi->cpi_coreid = apic_id >> coreid_shift; 1166 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1167 coreid_shift; 1168 } else if (feature & X86_HTT) { 1169 /* 1170 * Single-core multi-threaded processors. 1171 */ 1172 cpi->cpi_coreid = cpi->cpi_chipid; 1173 cpi->cpi_pkgcoreid = 0; 1174 } 1175 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1176 /* 1177 * AMD CMP chips currently have a single thread per 1178 * core, with 2 cores on family 0xf and 2, 3 or 4 1179 * cores on family 0x10. 1180 * 1181 * Since no two cpus share a core we must assign a 1182 * distinct coreid per cpu, and we do this by using 1183 * the cpu_id. This scheme does not, however, 1184 * guarantee that sibling cores of a chip will have 1185 * sequential coreids starting at a multiple of the 1186 * number of cores per chip - that is usually the 1187 * case, but if the ACPI MADT table is presented 1188 * in a different order then we need to perform a 1189 * few more gymnastics for the pkgcoreid. 1190 * 1191 * In family 0xf CMPs there are 2 cores on all nodes 1192 * present - no mixing of single and dual core parts. 1193 * 1194 * In family 0x10 CMPs cpuid fn 2 ECX[15:12] 1195 * "ApicIdCoreIdSize[3:0]" tells us how 1196 * many least-significant bits in the ApicId 1197 * are used to represent the core number 1198 * within the node. Cores are always 1199 * numbered sequentially from 0 regardless 1200 * of how many or which are disabled, and 1201 * there seems to be no way to discover the 1202 * real core id when some are disabled. 1203 */ 1204 cpi->cpi_coreid = cpu->cpu_id; 1205 1206 if (cpi->cpi_family == 0x10 && 1207 cpi->cpi_xmaxeax >= 0x80000008) { 1208 int coreidsz = 1209 BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 1210 1211 cpi->cpi_pkgcoreid = 1212 apic_id & ((1 << coreidsz) - 1); 1213 } else { 1214 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 1215 } 1216 } else { 1217 /* 1218 * All other processors are currently 1219 * assumed to have single cores. 1220 */ 1221 cpi->cpi_coreid = cpi->cpi_chipid; 1222 cpi->cpi_pkgcoreid = 0; 1223 } 1224 } 1225 1226 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1227 1228 /* 1229 * Synthesize chip "revision" and socket type 1230 */ 1231 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1232 cpi->cpi_model, cpi->cpi_step); 1233 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1234 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1235 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1236 cpi->cpi_model, cpi->cpi_step); 1237 1238 pass1_done: 1239 cpi->cpi_pass = 1; 1240 return (feature); 1241 } 1242 1243 /* 1244 * Make copies of the cpuid table entries we depend on, in 1245 * part for ease of parsing now, in part so that we have only 1246 * one place to correct any of it, in part for ease of 1247 * later export to userland, and in part so we can look at 1248 * this stuff in a crash dump. 1249 */ 1250 1251 /*ARGSUSED*/ 1252 void 1253 cpuid_pass2(cpu_t *cpu) 1254 { 1255 uint_t n, nmax; 1256 int i; 1257 struct cpuid_regs *cp; 1258 uint8_t *dp; 1259 uint32_t *iptr; 1260 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1261 1262 ASSERT(cpi->cpi_pass == 1); 1263 1264 if (cpi->cpi_maxeax < 1) 1265 goto pass2_done; 1266 1267 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1268 nmax = NMAX_CPI_STD; 1269 /* 1270 * (We already handled n == 0 and n == 1 in pass 1) 1271 */ 1272 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1273 cp->cp_eax = n; 1274 1275 /* 1276 * CPUID function 4 expects %ecx to be initialized 1277 * with an index which indicates which cache to return 1278 * information about. The OS is expected to call function 4 1279 * with %ecx set to 0, 1, 2, ... until it returns with 1280 * EAX[4:0] set to 0, which indicates there are no more 1281 * caches. 1282 * 1283 * Here, populate cpi_std[4] with the information returned by 1284 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1285 * when dynamic memory allocation becomes available. 1286 * 1287 * Note: we need to explicitly initialize %ecx here, since 1288 * function 4 may have been previously invoked. 1289 */ 1290 if (n == 4) 1291 cp->cp_ecx = 0; 1292 1293 (void) __cpuid_insn(cp); 1294 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1295 switch (n) { 1296 case 2: 1297 /* 1298 * "the lower 8 bits of the %eax register 1299 * contain a value that identifies the number 1300 * of times the cpuid [instruction] has to be 1301 * executed to obtain a complete image of the 1302 * processor's caching systems." 1303 * 1304 * How *do* they make this stuff up? 1305 */ 1306 cpi->cpi_ncache = sizeof (*cp) * 1307 BITX(cp->cp_eax, 7, 0); 1308 if (cpi->cpi_ncache == 0) 1309 break; 1310 cpi->cpi_ncache--; /* skip count byte */ 1311 1312 /* 1313 * Well, for now, rather than attempt to implement 1314 * this slightly dubious algorithm, we just look 1315 * at the first 15 .. 1316 */ 1317 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1318 cpi->cpi_ncache = sizeof (*cp) - 1; 1319 1320 dp = cpi->cpi_cacheinfo; 1321 if (BITX(cp->cp_eax, 31, 31) == 0) { 1322 uint8_t *p = (void *)&cp->cp_eax; 1323 for (i = 1; i < 4; i++) 1324 if (p[i] != 0) 1325 *dp++ = p[i]; 1326 } 1327 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1328 uint8_t *p = (void *)&cp->cp_ebx; 1329 for (i = 0; i < 4; i++) 1330 if (p[i] != 0) 1331 *dp++ = p[i]; 1332 } 1333 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1334 uint8_t *p = (void *)&cp->cp_ecx; 1335 for (i = 0; i < 4; i++) 1336 if (p[i] != 0) 1337 *dp++ = p[i]; 1338 } 1339 if (BITX(cp->cp_edx, 31, 31) == 0) { 1340 uint8_t *p = (void *)&cp->cp_edx; 1341 for (i = 0; i < 4; i++) 1342 if (p[i] != 0) 1343 *dp++ = p[i]; 1344 } 1345 break; 1346 1347 case 3: /* Processor serial number, if PSN supported */ 1348 break; 1349 1350 case 4: /* Deterministic cache parameters */ 1351 break; 1352 1353 case 5: /* Monitor/Mwait parameters */ 1354 { 1355 size_t mwait_size; 1356 1357 /* 1358 * check cpi_mwait.support which was set in cpuid_pass1 1359 */ 1360 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1361 break; 1362 1363 /* 1364 * Protect ourself from insane mwait line size. 1365 * Workaround for incomplete hardware emulator(s). 1366 */ 1367 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1368 if (mwait_size < sizeof (uint32_t) || 1369 !ISP2(mwait_size)) { 1370 #if DEBUG 1371 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1372 "size %ld", cpu->cpu_id, (long)mwait_size); 1373 #endif 1374 break; 1375 } 1376 1377 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1378 cpi->cpi_mwait.mon_max = mwait_size; 1379 if (MWAIT_EXTENSION(cpi)) { 1380 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1381 if (MWAIT_INT_ENABLE(cpi)) 1382 cpi->cpi_mwait.support |= 1383 MWAIT_ECX_INT_ENABLE; 1384 } 1385 break; 1386 } 1387 default: 1388 break; 1389 } 1390 } 1391 1392 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1393 struct cpuid_regs regs; 1394 1395 cp = ®s; 1396 cp->cp_eax = 0xB; 1397 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1398 1399 (void) __cpuid_insn(cp); 1400 1401 /* 1402 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1403 * indicates that the extended topology enumeration leaf is 1404 * available. 1405 */ 1406 if (cp->cp_ebx) { 1407 uint32_t x2apic_id; 1408 uint_t coreid_shift = 0; 1409 uint_t ncpu_per_core = 1; 1410 uint_t chipid_shift = 0; 1411 uint_t ncpu_per_chip = 1; 1412 uint_t i; 1413 uint_t level; 1414 1415 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1416 cp->cp_eax = 0xB; 1417 cp->cp_ecx = i; 1418 1419 (void) __cpuid_insn(cp); 1420 level = CPI_CPU_LEVEL_TYPE(cp); 1421 1422 if (level == 1) { 1423 x2apic_id = cp->cp_edx; 1424 coreid_shift = BITX(cp->cp_eax, 4, 0); 1425 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1426 } else if (level == 2) { 1427 x2apic_id = cp->cp_edx; 1428 chipid_shift = BITX(cp->cp_eax, 4, 0); 1429 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1430 } 1431 } 1432 1433 cpi->cpi_apicid = x2apic_id; 1434 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1435 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1436 ncpu_per_core; 1437 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1438 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1439 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1440 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1441 } 1442 1443 /* Make cp NULL so that we don't stumble on others */ 1444 cp = NULL; 1445 } 1446 1447 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1448 goto pass2_done; 1449 1450 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1451 nmax = NMAX_CPI_EXTD; 1452 /* 1453 * Copy the extended properties, fixing them as we go. 1454 * (We already handled n == 0 and n == 1 in pass 1) 1455 */ 1456 iptr = (void *)cpi->cpi_brandstr; 1457 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1458 cp->cp_eax = 0x80000000 + n; 1459 (void) __cpuid_insn(cp); 1460 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1461 switch (n) { 1462 case 2: 1463 case 3: 1464 case 4: 1465 /* 1466 * Extract the brand string 1467 */ 1468 *iptr++ = cp->cp_eax; 1469 *iptr++ = cp->cp_ebx; 1470 *iptr++ = cp->cp_ecx; 1471 *iptr++ = cp->cp_edx; 1472 break; 1473 case 5: 1474 switch (cpi->cpi_vendor) { 1475 case X86_VENDOR_AMD: 1476 /* 1477 * The Athlon and Duron were the first 1478 * parts to report the sizes of the 1479 * TLB for large pages. Before then, 1480 * we don't trust the data. 1481 */ 1482 if (cpi->cpi_family < 6 || 1483 (cpi->cpi_family == 6 && 1484 cpi->cpi_model < 1)) 1485 cp->cp_eax = 0; 1486 break; 1487 default: 1488 break; 1489 } 1490 break; 1491 case 6: 1492 switch (cpi->cpi_vendor) { 1493 case X86_VENDOR_AMD: 1494 /* 1495 * The Athlon and Duron were the first 1496 * AMD parts with L2 TLB's. 1497 * Before then, don't trust the data. 1498 */ 1499 if (cpi->cpi_family < 6 || 1500 cpi->cpi_family == 6 && 1501 cpi->cpi_model < 1) 1502 cp->cp_eax = cp->cp_ebx = 0; 1503 /* 1504 * AMD Duron rev A0 reports L2 1505 * cache size incorrectly as 1K 1506 * when it is really 64K 1507 */ 1508 if (cpi->cpi_family == 6 && 1509 cpi->cpi_model == 3 && 1510 cpi->cpi_step == 0) { 1511 cp->cp_ecx &= 0xffff; 1512 cp->cp_ecx |= 0x400000; 1513 } 1514 break; 1515 case X86_VENDOR_Cyrix: /* VIA C3 */ 1516 /* 1517 * VIA C3 processors are a bit messed 1518 * up w.r.t. encoding cache sizes in %ecx 1519 */ 1520 if (cpi->cpi_family != 6) 1521 break; 1522 /* 1523 * model 7 and 8 were incorrectly encoded 1524 * 1525 * xxx is model 8 really broken? 1526 */ 1527 if (cpi->cpi_model == 7 || 1528 cpi->cpi_model == 8) 1529 cp->cp_ecx = 1530 BITX(cp->cp_ecx, 31, 24) << 16 | 1531 BITX(cp->cp_ecx, 23, 16) << 12 | 1532 BITX(cp->cp_ecx, 15, 8) << 8 | 1533 BITX(cp->cp_ecx, 7, 0); 1534 /* 1535 * model 9 stepping 1 has wrong associativity 1536 */ 1537 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1538 cp->cp_ecx |= 8 << 12; 1539 break; 1540 case X86_VENDOR_Intel: 1541 /* 1542 * Extended L2 Cache features function. 1543 * First appeared on Prescott. 1544 */ 1545 default: 1546 break; 1547 } 1548 break; 1549 default: 1550 break; 1551 } 1552 } 1553 1554 pass2_done: 1555 cpi->cpi_pass = 2; 1556 } 1557 1558 static const char * 1559 intel_cpubrand(const struct cpuid_info *cpi) 1560 { 1561 int i; 1562 1563 if ((x86_feature & X86_CPUID) == 0 || 1564 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1565 return ("i486"); 1566 1567 switch (cpi->cpi_family) { 1568 case 5: 1569 return ("Intel Pentium(r)"); 1570 case 6: 1571 switch (cpi->cpi_model) { 1572 uint_t celeron, xeon; 1573 const struct cpuid_regs *cp; 1574 case 0: 1575 case 1: 1576 case 2: 1577 return ("Intel Pentium(r) Pro"); 1578 case 3: 1579 case 4: 1580 return ("Intel Pentium(r) II"); 1581 case 6: 1582 return ("Intel Celeron(r)"); 1583 case 5: 1584 case 7: 1585 celeron = xeon = 0; 1586 cp = &cpi->cpi_std[2]; /* cache info */ 1587 1588 for (i = 1; i < 4; i++) { 1589 uint_t tmp; 1590 1591 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1592 if (tmp == 0x40) 1593 celeron++; 1594 if (tmp >= 0x44 && tmp <= 0x45) 1595 xeon++; 1596 } 1597 1598 for (i = 0; i < 2; i++) { 1599 uint_t tmp; 1600 1601 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1602 if (tmp == 0x40) 1603 celeron++; 1604 else if (tmp >= 0x44 && tmp <= 0x45) 1605 xeon++; 1606 } 1607 1608 for (i = 0; i < 4; i++) { 1609 uint_t tmp; 1610 1611 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1612 if (tmp == 0x40) 1613 celeron++; 1614 else if (tmp >= 0x44 && tmp <= 0x45) 1615 xeon++; 1616 } 1617 1618 for (i = 0; i < 4; i++) { 1619 uint_t tmp; 1620 1621 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1622 if (tmp == 0x40) 1623 celeron++; 1624 else if (tmp >= 0x44 && tmp <= 0x45) 1625 xeon++; 1626 } 1627 1628 if (celeron) 1629 return ("Intel Celeron(r)"); 1630 if (xeon) 1631 return (cpi->cpi_model == 5 ? 1632 "Intel Pentium(r) II Xeon(tm)" : 1633 "Intel Pentium(r) III Xeon(tm)"); 1634 return (cpi->cpi_model == 5 ? 1635 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1636 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1637 default: 1638 break; 1639 } 1640 default: 1641 break; 1642 } 1643 1644 /* BrandID is present if the field is nonzero */ 1645 if (cpi->cpi_brandid != 0) { 1646 static const struct { 1647 uint_t bt_bid; 1648 const char *bt_str; 1649 } brand_tbl[] = { 1650 { 0x1, "Intel(r) Celeron(r)" }, 1651 { 0x2, "Intel(r) Pentium(r) III" }, 1652 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1653 { 0x4, "Intel(r) Pentium(r) III" }, 1654 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1655 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1656 { 0x8, "Intel(r) Pentium(r) 4" }, 1657 { 0x9, "Intel(r) Pentium(r) 4" }, 1658 { 0xa, "Intel(r) Celeron(r)" }, 1659 { 0xb, "Intel(r) Xeon(tm)" }, 1660 { 0xc, "Intel(r) Xeon(tm) MP" }, 1661 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1662 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1663 { 0x11, "Mobile Genuine Intel(r)" }, 1664 { 0x12, "Intel(r) Celeron(r) M" }, 1665 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1666 { 0x14, "Intel(r) Celeron(r)" }, 1667 { 0x15, "Mobile Genuine Intel(r)" }, 1668 { 0x16, "Intel(r) Pentium(r) M" }, 1669 { 0x17, "Mobile Intel(r) Celeron(r)" } 1670 }; 1671 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1672 uint_t sgn; 1673 1674 sgn = (cpi->cpi_family << 8) | 1675 (cpi->cpi_model << 4) | cpi->cpi_step; 1676 1677 for (i = 0; i < btblmax; i++) 1678 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1679 break; 1680 if (i < btblmax) { 1681 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1682 return ("Intel(r) Celeron(r)"); 1683 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1684 return ("Intel(r) Xeon(tm) MP"); 1685 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1686 return ("Intel(r) Xeon(tm)"); 1687 return (brand_tbl[i].bt_str); 1688 } 1689 } 1690 1691 return (NULL); 1692 } 1693 1694 static const char * 1695 amd_cpubrand(const struct cpuid_info *cpi) 1696 { 1697 if ((x86_feature & X86_CPUID) == 0 || 1698 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1699 return ("i486 compatible"); 1700 1701 switch (cpi->cpi_family) { 1702 case 5: 1703 switch (cpi->cpi_model) { 1704 case 0: 1705 case 1: 1706 case 2: 1707 case 3: 1708 case 4: 1709 case 5: 1710 return ("AMD-K5(r)"); 1711 case 6: 1712 case 7: 1713 return ("AMD-K6(r)"); 1714 case 8: 1715 return ("AMD-K6(r)-2"); 1716 case 9: 1717 return ("AMD-K6(r)-III"); 1718 default: 1719 return ("AMD (family 5)"); 1720 } 1721 case 6: 1722 switch (cpi->cpi_model) { 1723 case 1: 1724 return ("AMD-K7(tm)"); 1725 case 0: 1726 case 2: 1727 case 4: 1728 return ("AMD Athlon(tm)"); 1729 case 3: 1730 case 7: 1731 return ("AMD Duron(tm)"); 1732 case 6: 1733 case 8: 1734 case 10: 1735 /* 1736 * Use the L2 cache size to distinguish 1737 */ 1738 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1739 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1740 default: 1741 return ("AMD (family 6)"); 1742 } 1743 default: 1744 break; 1745 } 1746 1747 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1748 cpi->cpi_brandid != 0) { 1749 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1750 case 3: 1751 return ("AMD Opteron(tm) UP 1xx"); 1752 case 4: 1753 return ("AMD Opteron(tm) DP 2xx"); 1754 case 5: 1755 return ("AMD Opteron(tm) MP 8xx"); 1756 default: 1757 return ("AMD Opteron(tm)"); 1758 } 1759 } 1760 1761 return (NULL); 1762 } 1763 1764 static const char * 1765 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1766 { 1767 if ((x86_feature & X86_CPUID) == 0 || 1768 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1769 type == X86_TYPE_CYRIX_486) 1770 return ("i486 compatible"); 1771 1772 switch (type) { 1773 case X86_TYPE_CYRIX_6x86: 1774 return ("Cyrix 6x86"); 1775 case X86_TYPE_CYRIX_6x86L: 1776 return ("Cyrix 6x86L"); 1777 case X86_TYPE_CYRIX_6x86MX: 1778 return ("Cyrix 6x86MX"); 1779 case X86_TYPE_CYRIX_GXm: 1780 return ("Cyrix GXm"); 1781 case X86_TYPE_CYRIX_MediaGX: 1782 return ("Cyrix MediaGX"); 1783 case X86_TYPE_CYRIX_MII: 1784 return ("Cyrix M2"); 1785 case X86_TYPE_VIA_CYRIX_III: 1786 return ("VIA Cyrix M3"); 1787 default: 1788 /* 1789 * Have another wild guess .. 1790 */ 1791 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1792 return ("Cyrix 5x86"); 1793 else if (cpi->cpi_family == 5) { 1794 switch (cpi->cpi_model) { 1795 case 2: 1796 return ("Cyrix 6x86"); /* Cyrix M1 */ 1797 case 4: 1798 return ("Cyrix MediaGX"); 1799 default: 1800 break; 1801 } 1802 } else if (cpi->cpi_family == 6) { 1803 switch (cpi->cpi_model) { 1804 case 0: 1805 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1806 case 5: 1807 case 6: 1808 case 7: 1809 case 8: 1810 case 9: 1811 return ("VIA C3"); 1812 default: 1813 break; 1814 } 1815 } 1816 break; 1817 } 1818 return (NULL); 1819 } 1820 1821 /* 1822 * This only gets called in the case that the CPU extended 1823 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1824 * aren't available, or contain null bytes for some reason. 1825 */ 1826 static void 1827 fabricate_brandstr(struct cpuid_info *cpi) 1828 { 1829 const char *brand = NULL; 1830 1831 switch (cpi->cpi_vendor) { 1832 case X86_VENDOR_Intel: 1833 brand = intel_cpubrand(cpi); 1834 break; 1835 case X86_VENDOR_AMD: 1836 brand = amd_cpubrand(cpi); 1837 break; 1838 case X86_VENDOR_Cyrix: 1839 brand = cyrix_cpubrand(cpi, x86_type); 1840 break; 1841 case X86_VENDOR_NexGen: 1842 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1843 brand = "NexGen Nx586"; 1844 break; 1845 case X86_VENDOR_Centaur: 1846 if (cpi->cpi_family == 5) 1847 switch (cpi->cpi_model) { 1848 case 4: 1849 brand = "Centaur C6"; 1850 break; 1851 case 8: 1852 brand = "Centaur C2"; 1853 break; 1854 case 9: 1855 brand = "Centaur C3"; 1856 break; 1857 default: 1858 break; 1859 } 1860 break; 1861 case X86_VENDOR_Rise: 1862 if (cpi->cpi_family == 5 && 1863 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1864 brand = "Rise mP6"; 1865 break; 1866 case X86_VENDOR_SiS: 1867 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1868 brand = "SiS 55x"; 1869 break; 1870 case X86_VENDOR_TM: 1871 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1872 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1873 break; 1874 case X86_VENDOR_NSC: 1875 case X86_VENDOR_UMC: 1876 default: 1877 break; 1878 } 1879 if (brand) { 1880 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1881 return; 1882 } 1883 1884 /* 1885 * If all else fails ... 1886 */ 1887 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1888 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1889 cpi->cpi_model, cpi->cpi_step); 1890 } 1891 1892 /* 1893 * This routine is called just after kernel memory allocation 1894 * becomes available on cpu0, and as part of mp_startup() on 1895 * the other cpus. 1896 * 1897 * Fixup the brand string, and collect any information from cpuid 1898 * that requires dynamicically allocated storage to represent. 1899 */ 1900 /*ARGSUSED*/ 1901 void 1902 cpuid_pass3(cpu_t *cpu) 1903 { 1904 int i, max, shft, level, size; 1905 struct cpuid_regs regs; 1906 struct cpuid_regs *cp; 1907 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1908 1909 ASSERT(cpi->cpi_pass == 2); 1910 1911 /* 1912 * Function 4: Deterministic cache parameters 1913 * 1914 * Take this opportunity to detect the number of threads 1915 * sharing the last level cache, and construct a corresponding 1916 * cache id. The respective cpuid_info members are initialized 1917 * to the default case of "no last level cache sharing". 1918 */ 1919 cpi->cpi_ncpu_shr_last_cache = 1; 1920 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1921 1922 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1923 1924 /* 1925 * Find the # of elements (size) returned by fn 4, and along 1926 * the way detect last level cache sharing details. 1927 */ 1928 bzero(®s, sizeof (regs)); 1929 cp = ®s; 1930 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1931 cp->cp_eax = 4; 1932 cp->cp_ecx = i; 1933 1934 (void) __cpuid_insn(cp); 1935 1936 if (CPI_CACHE_TYPE(cp) == 0) 1937 break; 1938 level = CPI_CACHE_LVL(cp); 1939 if (level > max) { 1940 max = level; 1941 cpi->cpi_ncpu_shr_last_cache = 1942 CPI_NTHR_SHR_CACHE(cp) + 1; 1943 } 1944 } 1945 cpi->cpi_std_4_size = size = i; 1946 1947 /* 1948 * Allocate the cpi_std_4 array. The first element 1949 * references the regs for fn 4, %ecx == 0, which 1950 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1951 */ 1952 if (size > 0) { 1953 cpi->cpi_std_4 = 1954 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1955 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1956 1957 /* 1958 * Allocate storage to hold the additional regs 1959 * for function 4, %ecx == 1 .. cpi_std_4_size. 1960 * 1961 * The regs for fn 4, %ecx == 0 has already 1962 * been allocated as indicated above. 1963 */ 1964 for (i = 1; i < size; i++) { 1965 cp = cpi->cpi_std_4[i] = 1966 kmem_zalloc(sizeof (regs), KM_SLEEP); 1967 cp->cp_eax = 4; 1968 cp->cp_ecx = i; 1969 1970 (void) __cpuid_insn(cp); 1971 } 1972 } 1973 /* 1974 * Determine the number of bits needed to represent 1975 * the number of CPUs sharing the last level cache. 1976 * 1977 * Shift off that number of bits from the APIC id to 1978 * derive the cache id. 1979 */ 1980 shft = 0; 1981 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1982 shft++; 1983 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 1984 } 1985 1986 /* 1987 * Now fixup the brand string 1988 */ 1989 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1990 fabricate_brandstr(cpi); 1991 } else { 1992 1993 /* 1994 * If we successfully extracted a brand string from the cpuid 1995 * instruction, clean it up by removing leading spaces and 1996 * similar junk. 1997 */ 1998 if (cpi->cpi_brandstr[0]) { 1999 size_t maxlen = sizeof (cpi->cpi_brandstr); 2000 char *src, *dst; 2001 2002 dst = src = (char *)cpi->cpi_brandstr; 2003 src[maxlen - 1] = '\0'; 2004 /* 2005 * strip leading spaces 2006 */ 2007 while (*src == ' ') 2008 src++; 2009 /* 2010 * Remove any 'Genuine' or "Authentic" prefixes 2011 */ 2012 if (strncmp(src, "Genuine ", 8) == 0) 2013 src += 8; 2014 if (strncmp(src, "Authentic ", 10) == 0) 2015 src += 10; 2016 2017 /* 2018 * Now do an in-place copy. 2019 * Map (R) to (r) and (TM) to (tm). 2020 * The era of teletypes is long gone, and there's 2021 * -really- no need to shout. 2022 */ 2023 while (*src != '\0') { 2024 if (src[0] == '(') { 2025 if (strncmp(src + 1, "R)", 2) == 0) { 2026 (void) strncpy(dst, "(r)", 3); 2027 src += 3; 2028 dst += 3; 2029 continue; 2030 } 2031 if (strncmp(src + 1, "TM)", 3) == 0) { 2032 (void) strncpy(dst, "(tm)", 4); 2033 src += 4; 2034 dst += 4; 2035 continue; 2036 } 2037 } 2038 *dst++ = *src++; 2039 } 2040 *dst = '\0'; 2041 2042 /* 2043 * Finally, remove any trailing spaces 2044 */ 2045 while (--dst > cpi->cpi_brandstr) 2046 if (*dst == ' ') 2047 *dst = '\0'; 2048 else 2049 break; 2050 } else 2051 fabricate_brandstr(cpi); 2052 } 2053 cpi->cpi_pass = 3; 2054 } 2055 2056 /* 2057 * This routine is called out of bind_hwcap() much later in the life 2058 * of the kernel (post_startup()). The job of this routine is to resolve 2059 * the hardware feature support and kernel support for those features into 2060 * what we're actually going to tell applications via the aux vector. 2061 */ 2062 uint_t 2063 cpuid_pass4(cpu_t *cpu) 2064 { 2065 struct cpuid_info *cpi; 2066 uint_t hwcap_flags = 0; 2067 2068 if (cpu == NULL) 2069 cpu = CPU; 2070 cpi = cpu->cpu_m.mcpu_cpi; 2071 2072 ASSERT(cpi->cpi_pass == 3); 2073 2074 if (cpi->cpi_maxeax >= 1) { 2075 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2076 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2077 2078 *edx = CPI_FEATURES_EDX(cpi); 2079 *ecx = CPI_FEATURES_ECX(cpi); 2080 2081 /* 2082 * [these require explicit kernel support] 2083 */ 2084 if ((x86_feature & X86_SEP) == 0) 2085 *edx &= ~CPUID_INTC_EDX_SEP; 2086 2087 if ((x86_feature & X86_SSE) == 0) 2088 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2089 if ((x86_feature & X86_SSE2) == 0) 2090 *edx &= ~CPUID_INTC_EDX_SSE2; 2091 2092 if ((x86_feature & X86_HTT) == 0) 2093 *edx &= ~CPUID_INTC_EDX_HTT; 2094 2095 if ((x86_feature & X86_SSE3) == 0) 2096 *ecx &= ~CPUID_INTC_ECX_SSE3; 2097 2098 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2099 if ((x86_feature & X86_SSSE3) == 0) 2100 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2101 if ((x86_feature & X86_SSE4_1) == 0) 2102 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2103 if ((x86_feature & X86_SSE4_2) == 0) 2104 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2105 if ((x86_feature & X86_AES) == 0) 2106 *ecx &= ~CPUID_INTC_ECX_AES; 2107 } 2108 2109 /* 2110 * [no explicit support required beyond x87 fp context] 2111 */ 2112 if (!fpu_exists) 2113 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2114 2115 /* 2116 * Now map the supported feature vector to things that we 2117 * think userland will care about. 2118 */ 2119 if (*edx & CPUID_INTC_EDX_SEP) 2120 hwcap_flags |= AV_386_SEP; 2121 if (*edx & CPUID_INTC_EDX_SSE) 2122 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2123 if (*edx & CPUID_INTC_EDX_SSE2) 2124 hwcap_flags |= AV_386_SSE2; 2125 if (*ecx & CPUID_INTC_ECX_SSE3) 2126 hwcap_flags |= AV_386_SSE3; 2127 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2128 if (*ecx & CPUID_INTC_ECX_SSSE3) 2129 hwcap_flags |= AV_386_SSSE3; 2130 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2131 hwcap_flags |= AV_386_SSE4_1; 2132 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2133 hwcap_flags |= AV_386_SSE4_2; 2134 if (*ecx & CPUID_INTC_ECX_MOVBE) 2135 hwcap_flags |= AV_386_MOVBE; 2136 if (*ecx & CPUID_INTC_ECX_AES) 2137 hwcap_flags |= AV_386_AES; 2138 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2139 hwcap_flags |= AV_386_PCLMULQDQ; 2140 } 2141 if (*ecx & CPUID_INTC_ECX_POPCNT) 2142 hwcap_flags |= AV_386_POPCNT; 2143 if (*edx & CPUID_INTC_EDX_FPU) 2144 hwcap_flags |= AV_386_FPU; 2145 if (*edx & CPUID_INTC_EDX_MMX) 2146 hwcap_flags |= AV_386_MMX; 2147 2148 if (*edx & CPUID_INTC_EDX_TSC) 2149 hwcap_flags |= AV_386_TSC; 2150 if (*edx & CPUID_INTC_EDX_CX8) 2151 hwcap_flags |= AV_386_CX8; 2152 if (*edx & CPUID_INTC_EDX_CMOV) 2153 hwcap_flags |= AV_386_CMOV; 2154 if (*ecx & CPUID_INTC_ECX_MON) 2155 hwcap_flags |= AV_386_MON; 2156 if (*ecx & CPUID_INTC_ECX_CX16) 2157 hwcap_flags |= AV_386_CX16; 2158 } 2159 2160 if (x86_feature & X86_HTT) 2161 hwcap_flags |= AV_386_PAUSE; 2162 2163 if (cpi->cpi_xmaxeax < 0x80000001) 2164 goto pass4_done; 2165 2166 switch (cpi->cpi_vendor) { 2167 struct cpuid_regs cp; 2168 uint32_t *edx, *ecx; 2169 2170 case X86_VENDOR_Intel: 2171 /* 2172 * Seems like Intel duplicated what we necessary 2173 * here to make the initial crop of 64-bit OS's work. 2174 * Hopefully, those are the only "extended" bits 2175 * they'll add. 2176 */ 2177 /*FALLTHROUGH*/ 2178 2179 case X86_VENDOR_AMD: 2180 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2181 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2182 2183 *edx = CPI_FEATURES_XTD_EDX(cpi); 2184 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2185 2186 /* 2187 * [these features require explicit kernel support] 2188 */ 2189 switch (cpi->cpi_vendor) { 2190 case X86_VENDOR_Intel: 2191 if ((x86_feature & X86_TSCP) == 0) 2192 *edx &= ~CPUID_AMD_EDX_TSCP; 2193 break; 2194 2195 case X86_VENDOR_AMD: 2196 if ((x86_feature & X86_TSCP) == 0) 2197 *edx &= ~CPUID_AMD_EDX_TSCP; 2198 if ((x86_feature & X86_SSE4A) == 0) 2199 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2200 break; 2201 2202 default: 2203 break; 2204 } 2205 2206 /* 2207 * [no explicit support required beyond 2208 * x87 fp context and exception handlers] 2209 */ 2210 if (!fpu_exists) 2211 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2212 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2213 2214 if ((x86_feature & X86_NX) == 0) 2215 *edx &= ~CPUID_AMD_EDX_NX; 2216 #if !defined(__amd64) 2217 *edx &= ~CPUID_AMD_EDX_LM; 2218 #endif 2219 /* 2220 * Now map the supported feature vector to 2221 * things that we think userland will care about. 2222 */ 2223 #if defined(__amd64) 2224 if (*edx & CPUID_AMD_EDX_SYSC) 2225 hwcap_flags |= AV_386_AMD_SYSC; 2226 #endif 2227 if (*edx & CPUID_AMD_EDX_MMXamd) 2228 hwcap_flags |= AV_386_AMD_MMX; 2229 if (*edx & CPUID_AMD_EDX_3DNow) 2230 hwcap_flags |= AV_386_AMD_3DNow; 2231 if (*edx & CPUID_AMD_EDX_3DNowx) 2232 hwcap_flags |= AV_386_AMD_3DNowx; 2233 2234 switch (cpi->cpi_vendor) { 2235 case X86_VENDOR_AMD: 2236 if (*edx & CPUID_AMD_EDX_TSCP) 2237 hwcap_flags |= AV_386_TSCP; 2238 if (*ecx & CPUID_AMD_ECX_AHF64) 2239 hwcap_flags |= AV_386_AHF; 2240 if (*ecx & CPUID_AMD_ECX_SSE4A) 2241 hwcap_flags |= AV_386_AMD_SSE4A; 2242 if (*ecx & CPUID_AMD_ECX_LZCNT) 2243 hwcap_flags |= AV_386_AMD_LZCNT; 2244 break; 2245 2246 case X86_VENDOR_Intel: 2247 if (*edx & CPUID_AMD_EDX_TSCP) 2248 hwcap_flags |= AV_386_TSCP; 2249 /* 2250 * Aarrgh. 2251 * Intel uses a different bit in the same word. 2252 */ 2253 if (*ecx & CPUID_INTC_ECX_AHF64) 2254 hwcap_flags |= AV_386_AHF; 2255 break; 2256 2257 default: 2258 break; 2259 } 2260 break; 2261 2262 case X86_VENDOR_TM: 2263 cp.cp_eax = 0x80860001; 2264 (void) __cpuid_insn(&cp); 2265 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2266 break; 2267 2268 default: 2269 break; 2270 } 2271 2272 pass4_done: 2273 cpi->cpi_pass = 4; 2274 return (hwcap_flags); 2275 } 2276 2277 2278 /* 2279 * Simulate the cpuid instruction using the data we previously 2280 * captured about this CPU. We try our best to return the truth 2281 * about the hardware, independently of kernel support. 2282 */ 2283 uint32_t 2284 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2285 { 2286 struct cpuid_info *cpi; 2287 struct cpuid_regs *xcp; 2288 2289 if (cpu == NULL) 2290 cpu = CPU; 2291 cpi = cpu->cpu_m.mcpu_cpi; 2292 2293 ASSERT(cpuid_checkpass(cpu, 3)); 2294 2295 /* 2296 * CPUID data is cached in two separate places: cpi_std for standard 2297 * CPUID functions, and cpi_extd for extended CPUID functions. 2298 */ 2299 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2300 xcp = &cpi->cpi_std[cp->cp_eax]; 2301 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2302 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2303 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2304 else 2305 /* 2306 * The caller is asking for data from an input parameter which 2307 * the kernel has not cached. In this case we go fetch from 2308 * the hardware and return the data directly to the user. 2309 */ 2310 return (__cpuid_insn(cp)); 2311 2312 cp->cp_eax = xcp->cp_eax; 2313 cp->cp_ebx = xcp->cp_ebx; 2314 cp->cp_ecx = xcp->cp_ecx; 2315 cp->cp_edx = xcp->cp_edx; 2316 return (cp->cp_eax); 2317 } 2318 2319 int 2320 cpuid_checkpass(cpu_t *cpu, int pass) 2321 { 2322 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2323 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2324 } 2325 2326 int 2327 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2328 { 2329 ASSERT(cpuid_checkpass(cpu, 3)); 2330 2331 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2332 } 2333 2334 int 2335 cpuid_is_cmt(cpu_t *cpu) 2336 { 2337 if (cpu == NULL) 2338 cpu = CPU; 2339 2340 ASSERT(cpuid_checkpass(cpu, 1)); 2341 2342 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2343 } 2344 2345 /* 2346 * AMD and Intel both implement the 64-bit variant of the syscall 2347 * instruction (syscallq), so if there's -any- support for syscall, 2348 * cpuid currently says "yes, we support this". 2349 * 2350 * However, Intel decided to -not- implement the 32-bit variant of the 2351 * syscall instruction, so we provide a predicate to allow our caller 2352 * to test that subtlety here. 2353 * 2354 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2355 * even in the case where the hardware would in fact support it. 2356 */ 2357 /*ARGSUSED*/ 2358 int 2359 cpuid_syscall32_insn(cpu_t *cpu) 2360 { 2361 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2362 2363 #if !defined(__xpv) 2364 if (cpu == NULL) 2365 cpu = CPU; 2366 2367 /*CSTYLED*/ 2368 { 2369 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2370 2371 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2372 cpi->cpi_xmaxeax >= 0x80000001 && 2373 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2374 return (1); 2375 } 2376 #endif 2377 return (0); 2378 } 2379 2380 int 2381 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2382 { 2383 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2384 2385 static const char fmt[] = 2386 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2387 static const char fmt_ht[] = 2388 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2389 2390 ASSERT(cpuid_checkpass(cpu, 1)); 2391 2392 if (cpuid_is_cmt(cpu)) 2393 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2394 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2395 cpi->cpi_family, cpi->cpi_model, 2396 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2397 return (snprintf(s, n, fmt, 2398 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2399 cpi->cpi_family, cpi->cpi_model, 2400 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2401 } 2402 2403 const char * 2404 cpuid_getvendorstr(cpu_t *cpu) 2405 { 2406 ASSERT(cpuid_checkpass(cpu, 1)); 2407 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2408 } 2409 2410 uint_t 2411 cpuid_getvendor(cpu_t *cpu) 2412 { 2413 ASSERT(cpuid_checkpass(cpu, 1)); 2414 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2415 } 2416 2417 uint_t 2418 cpuid_getfamily(cpu_t *cpu) 2419 { 2420 ASSERT(cpuid_checkpass(cpu, 1)); 2421 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2422 } 2423 2424 uint_t 2425 cpuid_getmodel(cpu_t *cpu) 2426 { 2427 ASSERT(cpuid_checkpass(cpu, 1)); 2428 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2429 } 2430 2431 uint_t 2432 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2433 { 2434 ASSERT(cpuid_checkpass(cpu, 1)); 2435 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2436 } 2437 2438 uint_t 2439 cpuid_get_ncore_per_chip(cpu_t *cpu) 2440 { 2441 ASSERT(cpuid_checkpass(cpu, 1)); 2442 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2443 } 2444 2445 uint_t 2446 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2447 { 2448 ASSERT(cpuid_checkpass(cpu, 2)); 2449 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2450 } 2451 2452 id_t 2453 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2454 { 2455 ASSERT(cpuid_checkpass(cpu, 2)); 2456 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2457 } 2458 2459 uint_t 2460 cpuid_getstep(cpu_t *cpu) 2461 { 2462 ASSERT(cpuid_checkpass(cpu, 1)); 2463 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2464 } 2465 2466 uint_t 2467 cpuid_getsig(struct cpu *cpu) 2468 { 2469 ASSERT(cpuid_checkpass(cpu, 1)); 2470 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2471 } 2472 2473 uint32_t 2474 cpuid_getchiprev(struct cpu *cpu) 2475 { 2476 ASSERT(cpuid_checkpass(cpu, 1)); 2477 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2478 } 2479 2480 const char * 2481 cpuid_getchiprevstr(struct cpu *cpu) 2482 { 2483 ASSERT(cpuid_checkpass(cpu, 1)); 2484 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2485 } 2486 2487 uint32_t 2488 cpuid_getsockettype(struct cpu *cpu) 2489 { 2490 ASSERT(cpuid_checkpass(cpu, 1)); 2491 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2492 } 2493 2494 const char * 2495 cpuid_getsocketstr(cpu_t *cpu) 2496 { 2497 static const char *socketstr = NULL; 2498 struct cpuid_info *cpi; 2499 2500 ASSERT(cpuid_checkpass(cpu, 1)); 2501 cpi = cpu->cpu_m.mcpu_cpi; 2502 2503 /* Assume that socket types are the same across the system */ 2504 if (socketstr == NULL) 2505 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 2506 cpi->cpi_model, cpi->cpi_step); 2507 2508 2509 return (socketstr); 2510 } 2511 2512 int 2513 cpuid_get_chipid(cpu_t *cpu) 2514 { 2515 ASSERT(cpuid_checkpass(cpu, 1)); 2516 2517 if (cpuid_is_cmt(cpu)) 2518 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2519 return (cpu->cpu_id); 2520 } 2521 2522 id_t 2523 cpuid_get_coreid(cpu_t *cpu) 2524 { 2525 ASSERT(cpuid_checkpass(cpu, 1)); 2526 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2527 } 2528 2529 int 2530 cpuid_get_pkgcoreid(cpu_t *cpu) 2531 { 2532 ASSERT(cpuid_checkpass(cpu, 1)); 2533 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2534 } 2535 2536 int 2537 cpuid_get_clogid(cpu_t *cpu) 2538 { 2539 ASSERT(cpuid_checkpass(cpu, 1)); 2540 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2541 } 2542 2543 /*ARGSUSED*/ 2544 int 2545 cpuid_have_cr8access(cpu_t *cpu) 2546 { 2547 #if defined(__amd64) 2548 return (1); 2549 #else 2550 struct cpuid_info *cpi; 2551 2552 ASSERT(cpu != NULL); 2553 cpi = cpu->cpu_m.mcpu_cpi; 2554 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 2555 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 2556 return (1); 2557 return (0); 2558 #endif 2559 } 2560 2561 uint32_t 2562 cpuid_get_apicid(cpu_t *cpu) 2563 { 2564 ASSERT(cpuid_checkpass(cpu, 1)); 2565 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 2566 return (UINT32_MAX); 2567 } else { 2568 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 2569 } 2570 } 2571 2572 void 2573 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2574 { 2575 struct cpuid_info *cpi; 2576 2577 if (cpu == NULL) 2578 cpu = CPU; 2579 cpi = cpu->cpu_m.mcpu_cpi; 2580 2581 ASSERT(cpuid_checkpass(cpu, 1)); 2582 2583 if (pabits) 2584 *pabits = cpi->cpi_pabits; 2585 if (vabits) 2586 *vabits = cpi->cpi_vabits; 2587 } 2588 2589 /* 2590 * Returns the number of data TLB entries for a corresponding 2591 * pagesize. If it can't be computed, or isn't known, the 2592 * routine returns zero. If you ask about an architecturally 2593 * impossible pagesize, the routine will panic (so that the 2594 * hat implementor knows that things are inconsistent.) 2595 */ 2596 uint_t 2597 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2598 { 2599 struct cpuid_info *cpi; 2600 uint_t dtlb_nent = 0; 2601 2602 if (cpu == NULL) 2603 cpu = CPU; 2604 cpi = cpu->cpu_m.mcpu_cpi; 2605 2606 ASSERT(cpuid_checkpass(cpu, 1)); 2607 2608 /* 2609 * Check the L2 TLB info 2610 */ 2611 if (cpi->cpi_xmaxeax >= 0x80000006) { 2612 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2613 2614 switch (pagesize) { 2615 2616 case 4 * 1024: 2617 /* 2618 * All zero in the top 16 bits of the register 2619 * indicates a unified TLB. Size is in low 16 bits. 2620 */ 2621 if ((cp->cp_ebx & 0xffff0000) == 0) 2622 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2623 else 2624 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2625 break; 2626 2627 case 2 * 1024 * 1024: 2628 if ((cp->cp_eax & 0xffff0000) == 0) 2629 dtlb_nent = cp->cp_eax & 0x0000ffff; 2630 else 2631 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2632 break; 2633 2634 default: 2635 panic("unknown L2 pagesize"); 2636 /*NOTREACHED*/ 2637 } 2638 } 2639 2640 if (dtlb_nent != 0) 2641 return (dtlb_nent); 2642 2643 /* 2644 * No L2 TLB support for this size, try L1. 2645 */ 2646 if (cpi->cpi_xmaxeax >= 0x80000005) { 2647 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2648 2649 switch (pagesize) { 2650 case 4 * 1024: 2651 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2652 break; 2653 case 2 * 1024 * 1024: 2654 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2655 break; 2656 default: 2657 panic("unknown L1 d-TLB pagesize"); 2658 /*NOTREACHED*/ 2659 } 2660 } 2661 2662 return (dtlb_nent); 2663 } 2664 2665 /* 2666 * Return 0 if the erratum is not present or not applicable, positive 2667 * if it is, and negative if the status of the erratum is unknown. 2668 * 2669 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2670 * Processors" #25759, Rev 3.57, August 2005 2671 */ 2672 int 2673 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2674 { 2675 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2676 uint_t eax; 2677 2678 /* 2679 * Bail out if this CPU isn't an AMD CPU, or if it's 2680 * a legacy (32-bit) AMD CPU. 2681 */ 2682 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2683 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2684 cpi->cpi_family == 6) 2685 2686 return (0); 2687 2688 eax = cpi->cpi_std[1].cp_eax; 2689 2690 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2691 #define SH_B3(eax) (eax == 0xf51) 2692 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2693 2694 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2695 2696 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2697 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2698 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2699 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2700 2701 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2702 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2703 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2704 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2705 2706 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2707 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2708 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2709 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2710 #define BH_E4(eax) (eax == 0x20fb1) 2711 #define SH_E5(eax) (eax == 0x20f42) 2712 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2713 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2714 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2715 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2716 DH_E6(eax) || JH_E6(eax)) 2717 2718 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2719 #define DR_B0(eax) (eax == 0x100f20) 2720 #define DR_B1(eax) (eax == 0x100f21) 2721 #define DR_BA(eax) (eax == 0x100f2a) 2722 #define DR_B2(eax) (eax == 0x100f22) 2723 #define DR_B3(eax) (eax == 0x100f23) 2724 #define RB_C0(eax) (eax == 0x100f40) 2725 2726 switch (erratum) { 2727 case 1: 2728 return (cpi->cpi_family < 0x10); 2729 case 51: /* what does the asterisk mean? */ 2730 return (B(eax) || SH_C0(eax) || CG(eax)); 2731 case 52: 2732 return (B(eax)); 2733 case 57: 2734 return (cpi->cpi_family <= 0x11); 2735 case 58: 2736 return (B(eax)); 2737 case 60: 2738 return (cpi->cpi_family <= 0x11); 2739 case 61: 2740 case 62: 2741 case 63: 2742 case 64: 2743 case 65: 2744 case 66: 2745 case 68: 2746 case 69: 2747 case 70: 2748 case 71: 2749 return (B(eax)); 2750 case 72: 2751 return (SH_B0(eax)); 2752 case 74: 2753 return (B(eax)); 2754 case 75: 2755 return (cpi->cpi_family < 0x10); 2756 case 76: 2757 return (B(eax)); 2758 case 77: 2759 return (cpi->cpi_family <= 0x11); 2760 case 78: 2761 return (B(eax) || SH_C0(eax)); 2762 case 79: 2763 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2764 case 80: 2765 case 81: 2766 case 82: 2767 return (B(eax)); 2768 case 83: 2769 return (B(eax) || SH_C0(eax) || CG(eax)); 2770 case 85: 2771 return (cpi->cpi_family < 0x10); 2772 case 86: 2773 return (SH_C0(eax) || CG(eax)); 2774 case 88: 2775 #if !defined(__amd64) 2776 return (0); 2777 #else 2778 return (B(eax) || SH_C0(eax)); 2779 #endif 2780 case 89: 2781 return (cpi->cpi_family < 0x10); 2782 case 90: 2783 return (B(eax) || SH_C0(eax) || CG(eax)); 2784 case 91: 2785 case 92: 2786 return (B(eax) || SH_C0(eax)); 2787 case 93: 2788 return (SH_C0(eax)); 2789 case 94: 2790 return (B(eax) || SH_C0(eax) || CG(eax)); 2791 case 95: 2792 #if !defined(__amd64) 2793 return (0); 2794 #else 2795 return (B(eax) || SH_C0(eax)); 2796 #endif 2797 case 96: 2798 return (B(eax) || SH_C0(eax) || CG(eax)); 2799 case 97: 2800 case 98: 2801 return (SH_C0(eax) || CG(eax)); 2802 case 99: 2803 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2804 case 100: 2805 return (B(eax) || SH_C0(eax)); 2806 case 101: 2807 case 103: 2808 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2809 case 104: 2810 return (SH_C0(eax) || CG(eax) || D0(eax)); 2811 case 105: 2812 case 106: 2813 case 107: 2814 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2815 case 108: 2816 return (DH_CG(eax)); 2817 case 109: 2818 return (SH_C0(eax) || CG(eax) || D0(eax)); 2819 case 110: 2820 return (D0(eax) || EX(eax)); 2821 case 111: 2822 return (CG(eax)); 2823 case 112: 2824 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2825 case 113: 2826 return (eax == 0x20fc0); 2827 case 114: 2828 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2829 case 115: 2830 return (SH_E0(eax) || JH_E1(eax)); 2831 case 116: 2832 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2833 case 117: 2834 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2835 case 118: 2836 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2837 JH_E6(eax)); 2838 case 121: 2839 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2840 case 122: 2841 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2842 case 123: 2843 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2844 case 131: 2845 return (cpi->cpi_family < 0x10); 2846 case 6336786: 2847 /* 2848 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2849 * if this is a K8 family or newer processor 2850 */ 2851 if (CPI_FAMILY(cpi) == 0xf) { 2852 struct cpuid_regs regs; 2853 regs.cp_eax = 0x80000007; 2854 (void) __cpuid_insn(®s); 2855 return (!(regs.cp_edx & 0x100)); 2856 } 2857 return (0); 2858 case 6323525: 2859 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2860 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2861 2862 case 6671130: 2863 /* 2864 * check for processors (pre-Shanghai) that do not provide 2865 * optimal management of 1gb ptes in its tlb. 2866 */ 2867 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 2868 2869 case 298: 2870 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 2871 DR_B2(eax) || RB_C0(eax)); 2872 2873 default: 2874 return (-1); 2875 2876 } 2877 } 2878 2879 /* 2880 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 2881 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 2882 */ 2883 int 2884 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 2885 { 2886 struct cpuid_info *cpi; 2887 uint_t osvwid; 2888 static int osvwfeature = -1; 2889 uint64_t osvwlength; 2890 2891 2892 cpi = cpu->cpu_m.mcpu_cpi; 2893 2894 /* confirm OSVW supported */ 2895 if (osvwfeature == -1) { 2896 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 2897 } else { 2898 /* assert that osvw feature setting is consistent on all cpus */ 2899 ASSERT(osvwfeature == 2900 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 2901 } 2902 if (!osvwfeature) 2903 return (-1); 2904 2905 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 2906 2907 switch (erratum) { 2908 case 298: /* osvwid is 0 */ 2909 osvwid = 0; 2910 if (osvwlength <= (uint64_t)osvwid) { 2911 /* osvwid 0 is unknown */ 2912 return (-1); 2913 } 2914 2915 /* 2916 * Check the OSVW STATUS MSR to determine the state 2917 * of the erratum where: 2918 * 0 - fixed by HW 2919 * 1 - BIOS has applied the workaround when BIOS 2920 * workaround is available. (Or for other errata, 2921 * OS workaround is required.) 2922 * For a value of 1, caller will confirm that the 2923 * erratum 298 workaround has indeed been applied by BIOS. 2924 * 2925 * A 1 may be set in cpus that have a HW fix 2926 * in a mixed cpu system. Regarding erratum 298: 2927 * In a multiprocessor platform, the workaround above 2928 * should be applied to all processors regardless of 2929 * silicon revision when an affected processor is 2930 * present. 2931 */ 2932 2933 return (rdmsr(MSR_AMD_OSVW_STATUS + 2934 (osvwid / OSVW_ID_CNT_PER_MSR)) & 2935 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 2936 2937 default: 2938 return (-1); 2939 } 2940 } 2941 2942 static const char assoc_str[] = "associativity"; 2943 static const char line_str[] = "line-size"; 2944 static const char size_str[] = "size"; 2945 2946 static void 2947 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2948 uint32_t val) 2949 { 2950 char buf[128]; 2951 2952 /* 2953 * ndi_prop_update_int() is used because it is desirable for 2954 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2955 */ 2956 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2957 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2958 } 2959 2960 /* 2961 * Intel-style cache/tlb description 2962 * 2963 * Standard cpuid level 2 gives a randomly ordered 2964 * selection of tags that index into a table that describes 2965 * cache and tlb properties. 2966 */ 2967 2968 static const char l1_icache_str[] = "l1-icache"; 2969 static const char l1_dcache_str[] = "l1-dcache"; 2970 static const char l2_cache_str[] = "l2-cache"; 2971 static const char l3_cache_str[] = "l3-cache"; 2972 static const char itlb4k_str[] = "itlb-4K"; 2973 static const char dtlb4k_str[] = "dtlb-4K"; 2974 static const char itlb2M_str[] = "itlb-2M"; 2975 static const char itlb4M_str[] = "itlb-4M"; 2976 static const char dtlb4M_str[] = "dtlb-4M"; 2977 static const char dtlb24_str[] = "dtlb0-2M-4M"; 2978 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2979 static const char itlb24_str[] = "itlb-2M-4M"; 2980 static const char dtlb44_str[] = "dtlb-4K-4M"; 2981 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2982 static const char sl2_cache_str[] = "sectored-l2-cache"; 2983 static const char itrace_str[] = "itrace-cache"; 2984 static const char sl3_cache_str[] = "sectored-l3-cache"; 2985 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 2986 2987 static const struct cachetab { 2988 uint8_t ct_code; 2989 uint8_t ct_assoc; 2990 uint16_t ct_line_size; 2991 size_t ct_size; 2992 const char *ct_label; 2993 } intel_ctab[] = { 2994 /* 2995 * maintain descending order! 2996 * 2997 * Codes ignored - Reason 2998 * ---------------------- 2999 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3000 * f0H/f1H - Currently we do not interpret prefetch size by design 3001 */ 3002 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3003 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3004 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3005 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3006 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3007 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3008 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3009 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3010 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3011 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3012 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3013 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3014 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3015 { 0xc0, 4, 0, 8, dtlb44_str }, 3016 { 0xba, 4, 0, 64, dtlb4k_str }, 3017 { 0xb4, 4, 0, 256, dtlb4k_str }, 3018 { 0xb3, 4, 0, 128, dtlb4k_str }, 3019 { 0xb2, 4, 0, 64, itlb4k_str }, 3020 { 0xb0, 4, 0, 128, itlb4k_str }, 3021 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3022 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3023 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3024 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3025 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3026 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3027 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3028 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3029 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3030 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3031 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3032 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3033 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3034 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3035 { 0x73, 8, 0, 64*1024, itrace_str}, 3036 { 0x72, 8, 0, 32*1024, itrace_str}, 3037 { 0x71, 8, 0, 16*1024, itrace_str}, 3038 { 0x70, 8, 0, 12*1024, itrace_str}, 3039 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3040 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3041 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3042 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3043 { 0x5d, 0, 0, 256, dtlb44_str}, 3044 { 0x5c, 0, 0, 128, dtlb44_str}, 3045 { 0x5b, 0, 0, 64, dtlb44_str}, 3046 { 0x5a, 4, 0, 32, dtlb24_str}, 3047 { 0x59, 0, 0, 16, dtlb4k_str}, 3048 { 0x57, 4, 0, 16, dtlb4k_str}, 3049 { 0x56, 4, 0, 16, dtlb4M_str}, 3050 { 0x55, 0, 0, 7, itlb24_str}, 3051 { 0x52, 0, 0, 256, itlb424_str}, 3052 { 0x51, 0, 0, 128, itlb424_str}, 3053 { 0x50, 0, 0, 64, itlb424_str}, 3054 { 0x4f, 0, 0, 32, itlb4k_str}, 3055 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3056 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3057 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3058 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3059 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3060 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3061 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3062 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3063 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3064 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3065 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3066 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3067 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3068 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3069 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3070 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3071 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3072 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3073 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3074 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3075 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3076 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3077 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3078 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3079 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3080 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3081 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3082 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3083 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3084 { 0x0b, 4, 0, 4, itlb4M_str}, 3085 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3086 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3087 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3088 { 0x05, 4, 0, 32, dtlb4M_str}, 3089 { 0x04, 4, 0, 8, dtlb4M_str}, 3090 { 0x03, 4, 0, 64, dtlb4k_str}, 3091 { 0x02, 4, 0, 2, itlb4M_str}, 3092 { 0x01, 4, 0, 32, itlb4k_str}, 3093 { 0 } 3094 }; 3095 3096 static const struct cachetab cyrix_ctab[] = { 3097 { 0x70, 4, 0, 32, "tlb-4K" }, 3098 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3099 { 0 } 3100 }; 3101 3102 /* 3103 * Search a cache table for a matching entry 3104 */ 3105 static const struct cachetab * 3106 find_cacheent(const struct cachetab *ct, uint_t code) 3107 { 3108 if (code != 0) { 3109 for (; ct->ct_code != 0; ct++) 3110 if (ct->ct_code <= code) 3111 break; 3112 if (ct->ct_code == code) 3113 return (ct); 3114 } 3115 return (NULL); 3116 } 3117 3118 /* 3119 * Populate cachetab entry with L2 or L3 cache-information using 3120 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3121 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3122 * information is found. 3123 */ 3124 static int 3125 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3126 { 3127 uint32_t level, i; 3128 int ret = 0; 3129 3130 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3131 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3132 3133 if (level == 2 || level == 3) { 3134 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3135 ct->ct_line_size = 3136 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3137 ct->ct_size = ct->ct_assoc * 3138 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3139 ct->ct_line_size * 3140 (cpi->cpi_std_4[i]->cp_ecx + 1); 3141 3142 if (level == 2) { 3143 ct->ct_label = l2_cache_str; 3144 } else if (level == 3) { 3145 ct->ct_label = l3_cache_str; 3146 } 3147 ret = 1; 3148 } 3149 } 3150 3151 return (ret); 3152 } 3153 3154 /* 3155 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3156 * The walk is terminated if the walker returns non-zero. 3157 */ 3158 static void 3159 intel_walk_cacheinfo(struct cpuid_info *cpi, 3160 void *arg, int (*func)(void *, const struct cachetab *)) 3161 { 3162 const struct cachetab *ct; 3163 struct cachetab des_49_ct, des_b1_ct; 3164 uint8_t *dp; 3165 int i; 3166 3167 if ((dp = cpi->cpi_cacheinfo) == NULL) 3168 return; 3169 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3170 /* 3171 * For overloaded descriptor 0x49 we use cpuid function 4 3172 * if supported by the current processor, to create 3173 * cache information. 3174 * For overloaded descriptor 0xb1 we use X86_PAE flag 3175 * to disambiguate the cache information. 3176 */ 3177 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3178 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3179 ct = &des_49_ct; 3180 } else if (*dp == 0xb1) { 3181 des_b1_ct.ct_code = 0xb1; 3182 des_b1_ct.ct_assoc = 4; 3183 des_b1_ct.ct_line_size = 0; 3184 if (x86_feature & X86_PAE) { 3185 des_b1_ct.ct_size = 8; 3186 des_b1_ct.ct_label = itlb2M_str; 3187 } else { 3188 des_b1_ct.ct_size = 4; 3189 des_b1_ct.ct_label = itlb4M_str; 3190 } 3191 ct = &des_b1_ct; 3192 } else { 3193 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3194 continue; 3195 } 3196 } 3197 3198 if (func(arg, ct) != 0) { 3199 break; 3200 } 3201 } 3202 } 3203 3204 /* 3205 * (Like the Intel one, except for Cyrix CPUs) 3206 */ 3207 static void 3208 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3209 void *arg, int (*func)(void *, const struct cachetab *)) 3210 { 3211 const struct cachetab *ct; 3212 uint8_t *dp; 3213 int i; 3214 3215 if ((dp = cpi->cpi_cacheinfo) == NULL) 3216 return; 3217 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3218 /* 3219 * Search Cyrix-specific descriptor table first .. 3220 */ 3221 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3222 if (func(arg, ct) != 0) 3223 break; 3224 continue; 3225 } 3226 /* 3227 * .. else fall back to the Intel one 3228 */ 3229 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3230 if (func(arg, ct) != 0) 3231 break; 3232 continue; 3233 } 3234 } 3235 } 3236 3237 /* 3238 * A cacheinfo walker that adds associativity, line-size, and size properties 3239 * to the devinfo node it is passed as an argument. 3240 */ 3241 static int 3242 add_cacheent_props(void *arg, const struct cachetab *ct) 3243 { 3244 dev_info_t *devi = arg; 3245 3246 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3247 if (ct->ct_line_size != 0) 3248 add_cache_prop(devi, ct->ct_label, line_str, 3249 ct->ct_line_size); 3250 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3251 return (0); 3252 } 3253 3254 3255 static const char fully_assoc[] = "fully-associative?"; 3256 3257 /* 3258 * AMD style cache/tlb description 3259 * 3260 * Extended functions 5 and 6 directly describe properties of 3261 * tlbs and various cache levels. 3262 */ 3263 static void 3264 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3265 { 3266 switch (assoc) { 3267 case 0: /* reserved; ignore */ 3268 break; 3269 default: 3270 add_cache_prop(devi, label, assoc_str, assoc); 3271 break; 3272 case 0xff: 3273 add_cache_prop(devi, label, fully_assoc, 1); 3274 break; 3275 } 3276 } 3277 3278 static void 3279 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3280 { 3281 if (size == 0) 3282 return; 3283 add_cache_prop(devi, label, size_str, size); 3284 add_amd_assoc(devi, label, assoc); 3285 } 3286 3287 static void 3288 add_amd_cache(dev_info_t *devi, const char *label, 3289 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3290 { 3291 if (size == 0 || line_size == 0) 3292 return; 3293 add_amd_assoc(devi, label, assoc); 3294 /* 3295 * Most AMD parts have a sectored cache. Multiple cache lines are 3296 * associated with each tag. A sector consists of all cache lines 3297 * associated with a tag. For example, the AMD K6-III has a sector 3298 * size of 2 cache lines per tag. 3299 */ 3300 if (lines_per_tag != 0) 3301 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3302 add_cache_prop(devi, label, line_str, line_size); 3303 add_cache_prop(devi, label, size_str, size * 1024); 3304 } 3305 3306 static void 3307 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3308 { 3309 switch (assoc) { 3310 case 0: /* off */ 3311 break; 3312 case 1: 3313 case 2: 3314 case 4: 3315 add_cache_prop(devi, label, assoc_str, assoc); 3316 break; 3317 case 6: 3318 add_cache_prop(devi, label, assoc_str, 8); 3319 break; 3320 case 8: 3321 add_cache_prop(devi, label, assoc_str, 16); 3322 break; 3323 case 0xf: 3324 add_cache_prop(devi, label, fully_assoc, 1); 3325 break; 3326 default: /* reserved; ignore */ 3327 break; 3328 } 3329 } 3330 3331 static void 3332 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3333 { 3334 if (size == 0 || assoc == 0) 3335 return; 3336 add_amd_l2_assoc(devi, label, assoc); 3337 add_cache_prop(devi, label, size_str, size); 3338 } 3339 3340 static void 3341 add_amd_l2_cache(dev_info_t *devi, const char *label, 3342 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3343 { 3344 if (size == 0 || assoc == 0 || line_size == 0) 3345 return; 3346 add_amd_l2_assoc(devi, label, assoc); 3347 if (lines_per_tag != 0) 3348 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3349 add_cache_prop(devi, label, line_str, line_size); 3350 add_cache_prop(devi, label, size_str, size * 1024); 3351 } 3352 3353 static void 3354 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3355 { 3356 struct cpuid_regs *cp; 3357 3358 if (cpi->cpi_xmaxeax < 0x80000005) 3359 return; 3360 cp = &cpi->cpi_extd[5]; 3361 3362 /* 3363 * 4M/2M L1 TLB configuration 3364 * 3365 * We report the size for 2M pages because AMD uses two 3366 * TLB entries for one 4M page. 3367 */ 3368 add_amd_tlb(devi, "dtlb-2M", 3369 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3370 add_amd_tlb(devi, "itlb-2M", 3371 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3372 3373 /* 3374 * 4K L1 TLB configuration 3375 */ 3376 3377 switch (cpi->cpi_vendor) { 3378 uint_t nentries; 3379 case X86_VENDOR_TM: 3380 if (cpi->cpi_family >= 5) { 3381 /* 3382 * Crusoe processors have 256 TLB entries, but 3383 * cpuid data format constrains them to only 3384 * reporting 255 of them. 3385 */ 3386 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3387 nentries = 256; 3388 /* 3389 * Crusoe processors also have a unified TLB 3390 */ 3391 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3392 nentries); 3393 break; 3394 } 3395 /*FALLTHROUGH*/ 3396 default: 3397 add_amd_tlb(devi, itlb4k_str, 3398 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3399 add_amd_tlb(devi, dtlb4k_str, 3400 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3401 break; 3402 } 3403 3404 /* 3405 * data L1 cache configuration 3406 */ 3407 3408 add_amd_cache(devi, l1_dcache_str, 3409 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3410 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3411 3412 /* 3413 * code L1 cache configuration 3414 */ 3415 3416 add_amd_cache(devi, l1_icache_str, 3417 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3418 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3419 3420 if (cpi->cpi_xmaxeax < 0x80000006) 3421 return; 3422 cp = &cpi->cpi_extd[6]; 3423 3424 /* Check for a unified L2 TLB for large pages */ 3425 3426 if (BITX(cp->cp_eax, 31, 16) == 0) 3427 add_amd_l2_tlb(devi, "l2-tlb-2M", 3428 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3429 else { 3430 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3431 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3432 add_amd_l2_tlb(devi, "l2-itlb-2M", 3433 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3434 } 3435 3436 /* Check for a unified L2 TLB for 4K pages */ 3437 3438 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3439 add_amd_l2_tlb(devi, "l2-tlb-4K", 3440 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3441 } else { 3442 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3443 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3444 add_amd_l2_tlb(devi, "l2-itlb-4K", 3445 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3446 } 3447 3448 add_amd_l2_cache(devi, l2_cache_str, 3449 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3450 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3451 } 3452 3453 /* 3454 * There are two basic ways that the x86 world describes it cache 3455 * and tlb architecture - Intel's way and AMD's way. 3456 * 3457 * Return which flavor of cache architecture we should use 3458 */ 3459 static int 3460 x86_which_cacheinfo(struct cpuid_info *cpi) 3461 { 3462 switch (cpi->cpi_vendor) { 3463 case X86_VENDOR_Intel: 3464 if (cpi->cpi_maxeax >= 2) 3465 return (X86_VENDOR_Intel); 3466 break; 3467 case X86_VENDOR_AMD: 3468 /* 3469 * The K5 model 1 was the first part from AMD that reported 3470 * cache sizes via extended cpuid functions. 3471 */ 3472 if (cpi->cpi_family > 5 || 3473 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3474 return (X86_VENDOR_AMD); 3475 break; 3476 case X86_VENDOR_TM: 3477 if (cpi->cpi_family >= 5) 3478 return (X86_VENDOR_AMD); 3479 /*FALLTHROUGH*/ 3480 default: 3481 /* 3482 * If they have extended CPU data for 0x80000005 3483 * then we assume they have AMD-format cache 3484 * information. 3485 * 3486 * If not, and the vendor happens to be Cyrix, 3487 * then try our-Cyrix specific handler. 3488 * 3489 * If we're not Cyrix, then assume we're using Intel's 3490 * table-driven format instead. 3491 */ 3492 if (cpi->cpi_xmaxeax >= 0x80000005) 3493 return (X86_VENDOR_AMD); 3494 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3495 return (X86_VENDOR_Cyrix); 3496 else if (cpi->cpi_maxeax >= 2) 3497 return (X86_VENDOR_Intel); 3498 break; 3499 } 3500 return (-1); 3501 } 3502 3503 void 3504 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 3505 struct cpuid_info *cpi) 3506 { 3507 dev_info_t *cpu_devi; 3508 int create; 3509 3510 cpu_devi = (dev_info_t *)dip; 3511 3512 /* device_type */ 3513 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3514 "device_type", "cpu"); 3515 3516 /* reg */ 3517 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3518 "reg", cpu_id); 3519 3520 /* cpu-mhz, and clock-frequency */ 3521 if (cpu_freq > 0) { 3522 long long mul; 3523 3524 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3525 "cpu-mhz", cpu_freq); 3526 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3527 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3528 "clock-frequency", (int)mul); 3529 } 3530 3531 if ((x86_feature & X86_CPUID) == 0) { 3532 return; 3533 } 3534 3535 /* vendor-id */ 3536 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3537 "vendor-id", cpi->cpi_vendorstr); 3538 3539 if (cpi->cpi_maxeax == 0) { 3540 return; 3541 } 3542 3543 /* 3544 * family, model, and step 3545 */ 3546 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3547 "family", CPI_FAMILY(cpi)); 3548 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3549 "cpu-model", CPI_MODEL(cpi)); 3550 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3551 "stepping-id", CPI_STEP(cpi)); 3552 3553 /* type */ 3554 switch (cpi->cpi_vendor) { 3555 case X86_VENDOR_Intel: 3556 create = 1; 3557 break; 3558 default: 3559 create = 0; 3560 break; 3561 } 3562 if (create) 3563 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3564 "type", CPI_TYPE(cpi)); 3565 3566 /* ext-family */ 3567 switch (cpi->cpi_vendor) { 3568 case X86_VENDOR_Intel: 3569 case X86_VENDOR_AMD: 3570 create = cpi->cpi_family >= 0xf; 3571 break; 3572 default: 3573 create = 0; 3574 break; 3575 } 3576 if (create) 3577 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3578 "ext-family", CPI_FAMILY_XTD(cpi)); 3579 3580 /* ext-model */ 3581 switch (cpi->cpi_vendor) { 3582 case X86_VENDOR_Intel: 3583 create = IS_EXTENDED_MODEL_INTEL(cpi); 3584 break; 3585 case X86_VENDOR_AMD: 3586 create = CPI_FAMILY(cpi) == 0xf; 3587 break; 3588 default: 3589 create = 0; 3590 break; 3591 } 3592 if (create) 3593 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3594 "ext-model", CPI_MODEL_XTD(cpi)); 3595 3596 /* generation */ 3597 switch (cpi->cpi_vendor) { 3598 case X86_VENDOR_AMD: 3599 /* 3600 * AMD K5 model 1 was the first part to support this 3601 */ 3602 create = cpi->cpi_xmaxeax >= 0x80000001; 3603 break; 3604 default: 3605 create = 0; 3606 break; 3607 } 3608 if (create) 3609 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3610 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3611 3612 /* brand-id */ 3613 switch (cpi->cpi_vendor) { 3614 case X86_VENDOR_Intel: 3615 /* 3616 * brand id first appeared on Pentium III Xeon model 8, 3617 * and Celeron model 8 processors and Opteron 3618 */ 3619 create = cpi->cpi_family > 6 || 3620 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3621 break; 3622 case X86_VENDOR_AMD: 3623 create = cpi->cpi_family >= 0xf; 3624 break; 3625 default: 3626 create = 0; 3627 break; 3628 } 3629 if (create && cpi->cpi_brandid != 0) { 3630 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3631 "brand-id", cpi->cpi_brandid); 3632 } 3633 3634 /* chunks, and apic-id */ 3635 switch (cpi->cpi_vendor) { 3636 /* 3637 * first available on Pentium IV and Opteron (K8) 3638 */ 3639 case X86_VENDOR_Intel: 3640 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3641 break; 3642 case X86_VENDOR_AMD: 3643 create = cpi->cpi_family >= 0xf; 3644 break; 3645 default: 3646 create = 0; 3647 break; 3648 } 3649 if (create) { 3650 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3651 "chunks", CPI_CHUNKS(cpi)); 3652 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3653 "apic-id", cpi->cpi_apicid); 3654 if (cpi->cpi_chipid >= 0) { 3655 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3656 "chip#", cpi->cpi_chipid); 3657 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3658 "clog#", cpi->cpi_clogid); 3659 } 3660 } 3661 3662 /* cpuid-features */ 3663 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3664 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3665 3666 3667 /* cpuid-features-ecx */ 3668 switch (cpi->cpi_vendor) { 3669 case X86_VENDOR_Intel: 3670 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3671 break; 3672 default: 3673 create = 0; 3674 break; 3675 } 3676 if (create) 3677 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3678 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3679 3680 /* ext-cpuid-features */ 3681 switch (cpi->cpi_vendor) { 3682 case X86_VENDOR_Intel: 3683 case X86_VENDOR_AMD: 3684 case X86_VENDOR_Cyrix: 3685 case X86_VENDOR_TM: 3686 case X86_VENDOR_Centaur: 3687 create = cpi->cpi_xmaxeax >= 0x80000001; 3688 break; 3689 default: 3690 create = 0; 3691 break; 3692 } 3693 if (create) { 3694 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3695 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3696 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3697 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3698 } 3699 3700 /* 3701 * Brand String first appeared in Intel Pentium IV, AMD K5 3702 * model 1, and Cyrix GXm. On earlier models we try and 3703 * simulate something similar .. so this string should always 3704 * same -something- about the processor, however lame. 3705 */ 3706 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3707 "brand-string", cpi->cpi_brandstr); 3708 3709 /* 3710 * Finally, cache and tlb information 3711 */ 3712 switch (x86_which_cacheinfo(cpi)) { 3713 case X86_VENDOR_Intel: 3714 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3715 break; 3716 case X86_VENDOR_Cyrix: 3717 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3718 break; 3719 case X86_VENDOR_AMD: 3720 amd_cache_info(cpi, cpu_devi); 3721 break; 3722 default: 3723 break; 3724 } 3725 } 3726 3727 struct l2info { 3728 int *l2i_csz; 3729 int *l2i_lsz; 3730 int *l2i_assoc; 3731 int l2i_ret; 3732 }; 3733 3734 /* 3735 * A cacheinfo walker that fetches the size, line-size and associativity 3736 * of the L2 cache 3737 */ 3738 static int 3739 intel_l2cinfo(void *arg, const struct cachetab *ct) 3740 { 3741 struct l2info *l2i = arg; 3742 int *ip; 3743 3744 if (ct->ct_label != l2_cache_str && 3745 ct->ct_label != sl2_cache_str) 3746 return (0); /* not an L2 -- keep walking */ 3747 3748 if ((ip = l2i->l2i_csz) != NULL) 3749 *ip = ct->ct_size; 3750 if ((ip = l2i->l2i_lsz) != NULL) 3751 *ip = ct->ct_line_size; 3752 if ((ip = l2i->l2i_assoc) != NULL) 3753 *ip = ct->ct_assoc; 3754 l2i->l2i_ret = ct->ct_size; 3755 return (1); /* was an L2 -- terminate walk */ 3756 } 3757 3758 /* 3759 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3760 * 3761 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3762 * value is the associativity, the associativity for the L2 cache and 3763 * tlb is encoded in the following table. The 4 bit L2 value serves as 3764 * an index into the amd_afd[] array to determine the associativity. 3765 * -1 is undefined. 0 is fully associative. 3766 */ 3767 3768 static int amd_afd[] = 3769 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3770 3771 static void 3772 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3773 { 3774 struct cpuid_regs *cp; 3775 uint_t size, assoc; 3776 int i; 3777 int *ip; 3778 3779 if (cpi->cpi_xmaxeax < 0x80000006) 3780 return; 3781 cp = &cpi->cpi_extd[6]; 3782 3783 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3784 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3785 uint_t cachesz = size * 1024; 3786 assoc = amd_afd[i]; 3787 3788 ASSERT(assoc != -1); 3789 3790 if ((ip = l2i->l2i_csz) != NULL) 3791 *ip = cachesz; 3792 if ((ip = l2i->l2i_lsz) != NULL) 3793 *ip = BITX(cp->cp_ecx, 7, 0); 3794 if ((ip = l2i->l2i_assoc) != NULL) 3795 *ip = assoc; 3796 l2i->l2i_ret = cachesz; 3797 } 3798 } 3799 3800 int 3801 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3802 { 3803 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3804 struct l2info __l2info, *l2i = &__l2info; 3805 3806 l2i->l2i_csz = csz; 3807 l2i->l2i_lsz = lsz; 3808 l2i->l2i_assoc = assoc; 3809 l2i->l2i_ret = -1; 3810 3811 switch (x86_which_cacheinfo(cpi)) { 3812 case X86_VENDOR_Intel: 3813 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3814 break; 3815 case X86_VENDOR_Cyrix: 3816 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3817 break; 3818 case X86_VENDOR_AMD: 3819 amd_l2cacheinfo(cpi, l2i); 3820 break; 3821 default: 3822 break; 3823 } 3824 return (l2i->l2i_ret); 3825 } 3826 3827 #if !defined(__xpv) 3828 3829 uint32_t * 3830 cpuid_mwait_alloc(cpu_t *cpu) 3831 { 3832 uint32_t *ret; 3833 size_t mwait_size; 3834 3835 ASSERT(cpuid_checkpass(cpu, 2)); 3836 3837 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3838 if (mwait_size == 0) 3839 return (NULL); 3840 3841 /* 3842 * kmem_alloc() returns cache line size aligned data for mwait_size 3843 * allocations. mwait_size is currently cache line sized. Neither 3844 * of these implementation details are guarantied to be true in the 3845 * future. 3846 * 3847 * First try allocating mwait_size as kmem_alloc() currently returns 3848 * correctly aligned memory. If kmem_alloc() does not return 3849 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3850 * 3851 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3852 * decide to free this memory. 3853 */ 3854 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3855 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3856 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3857 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3858 *ret = MWAIT_RUNNING; 3859 return (ret); 3860 } else { 3861 kmem_free(ret, mwait_size); 3862 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3863 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3864 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3865 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3866 *ret = MWAIT_RUNNING; 3867 return (ret); 3868 } 3869 } 3870 3871 void 3872 cpuid_mwait_free(cpu_t *cpu) 3873 { 3874 ASSERT(cpuid_checkpass(cpu, 2)); 3875 3876 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3877 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3878 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3879 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3880 } 3881 3882 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3883 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3884 } 3885 3886 void 3887 patch_tsc_read(int flag) 3888 { 3889 size_t cnt; 3890 3891 switch (flag) { 3892 case X86_NO_TSC: 3893 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 3894 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 3895 break; 3896 case X86_HAVE_TSCP: 3897 cnt = &_tscp_end - &_tscp_start; 3898 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 3899 break; 3900 case X86_TSC_MFENCE: 3901 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 3902 (void) memcpy((void *)tsc_read, 3903 (void *)&_tsc_mfence_start, cnt); 3904 break; 3905 case X86_TSC_LFENCE: 3906 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 3907 (void) memcpy((void *)tsc_read, 3908 (void *)&_tsc_lfence_start, cnt); 3909 break; 3910 default: 3911 break; 3912 } 3913 } 3914 3915 int 3916 cpuid_deep_cstates_supported(void) 3917 { 3918 struct cpuid_info *cpi; 3919 struct cpuid_regs regs; 3920 3921 ASSERT(cpuid_checkpass(CPU, 1)); 3922 3923 cpi = CPU->cpu_m.mcpu_cpi; 3924 3925 if (!(x86_feature & X86_CPUID)) 3926 return (0); 3927 3928 switch (cpi->cpi_vendor) { 3929 case X86_VENDOR_Intel: 3930 if (cpi->cpi_xmaxeax < 0x80000007) 3931 return (0); 3932 3933 /* 3934 * TSC run at a constant rate in all ACPI C-states? 3935 */ 3936 regs.cp_eax = 0x80000007; 3937 (void) __cpuid_insn(®s); 3938 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 3939 3940 default: 3941 return (0); 3942 } 3943 } 3944 3945 #endif /* !__xpv */ 3946 3947 void 3948 post_startup_cpu_fixups(void) 3949 { 3950 #ifndef __xpv 3951 /* 3952 * Some AMD processors support C1E state. Entering this state will 3953 * cause the local APIC timer to stop, which we can't deal with at 3954 * this time. 3955 */ 3956 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 3957 on_trap_data_t otd; 3958 uint64_t reg; 3959 3960 if (!on_trap(&otd, OT_DATA_ACCESS)) { 3961 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 3962 /* Disable C1E state if it is enabled by BIOS */ 3963 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 3964 AMD_ACTONCMPHALT_MASK) { 3965 reg &= ~(AMD_ACTONCMPHALT_MASK << 3966 AMD_ACTONCMPHALT_SHIFT); 3967 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 3968 } 3969 } 3970 no_trap(); 3971 } 3972 #endif /* !__xpv */ 3973 } 3974 3975 /* 3976 * Starting with the Westmere processor the local 3977 * APIC timer will continue running in all C-states, 3978 * including the deepest C-states. 3979 */ 3980 int 3981 cpuid_arat_supported(void) 3982 { 3983 struct cpuid_info *cpi; 3984 struct cpuid_regs regs; 3985 3986 ASSERT(cpuid_checkpass(CPU, 1)); 3987 ASSERT(x86_feature & X86_CPUID); 3988 3989 cpi = CPU->cpu_m.mcpu_cpi; 3990 3991 switch (cpi->cpi_vendor) { 3992 case X86_VENDOR_Intel: 3993 /* 3994 * Always-running Local APIC Timer is 3995 * indicated by CPUID.6.EAX[2]. 3996 */ 3997 if (cpi->cpi_maxeax >= 6) { 3998 regs.cp_eax = 6; 3999 (void) cpuid_insn(NULL, ®s); 4000 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4001 } else { 4002 return (0); 4003 } 4004 default: 4005 return (0); 4006 } 4007 } 4008 4009 #if defined(__amd64) && !defined(__xpv) 4010 /* 4011 * Patch in versions of bcopy for high performance Intel Nhm processors 4012 * and later... 4013 */ 4014 void 4015 patch_memops(uint_t vendor) 4016 { 4017 size_t cnt, i; 4018 caddr_t to, from; 4019 4020 if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) { 4021 cnt = &bcopy_patch_end - &bcopy_patch_start; 4022 to = &bcopy_ck_size; 4023 from = &bcopy_patch_start; 4024 for (i = 0; i < cnt; i++) { 4025 *to++ = *from++; 4026 } 4027 } 4028 } 4029 #endif /* __amd64 && !__xpv */ 4030