1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Various routines to handle identification 28 * and classification of x86 processors. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/archsystm.h> 33 #include <sys/x86_archext.h> 34 #include <sys/kmem.h> 35 #include <sys/systm.h> 36 #include <sys/cmn_err.h> 37 #include <sys/sunddi.h> 38 #include <sys/sunndi.h> 39 #include <sys/cpuvar.h> 40 #include <sys/processor.h> 41 #include <sys/sysmacros.h> 42 #include <sys/pg.h> 43 #include <sys/fp.h> 44 #include <sys/controlregs.h> 45 #include <sys/auxv_386.h> 46 #include <sys/bitmap.h> 47 #include <sys/memnode.h> 48 49 #ifdef __xpv 50 #include <sys/hypervisor.h> 51 #else 52 #include <sys/ontrap.h> 53 #endif 54 55 /* 56 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 57 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 58 * them accordingly. For most modern processors, feature detection occurs here 59 * in pass 1. 60 * 61 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 62 * for the boot CPU and does the basic analysis that the early kernel needs. 63 * x86_feature is set based on the return value of cpuid_pass1() of the boot 64 * CPU. 65 * 66 * Pass 1 includes: 67 * 68 * o Determining vendor/model/family/stepping and setting x86_type and 69 * x86_vendor accordingly. 70 * o Processing the feature flags returned by the cpuid instruction while 71 * applying any workarounds or tricks for the specific processor. 72 * o Mapping the feature flags into Solaris feature bits (X86_*). 73 * o Processing extended feature flags if supported by the processor, 74 * again while applying specific processor knowledge. 75 * o Determining the CMT characteristics of the system. 76 * 77 * Pass 1 is done on non-boot CPUs during their initialization and the results 78 * are used only as a meager attempt at ensuring that all processors within the 79 * system support the same features. 80 * 81 * Pass 2 of cpuid feature analysis happens just at the beginning 82 * of startup(). It just copies in and corrects the remainder 83 * of the cpuid data we depend on: standard cpuid functions that we didn't 84 * need for pass1 feature analysis, and extended cpuid functions beyond the 85 * simple feature processing done in pass1. 86 * 87 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 88 * particular kernel memory allocation has been made available. It creates a 89 * readable brand string based on the data collected in the first two passes. 90 * 91 * Pass 4 of cpuid analysis is invoked after post_startup() when all 92 * the support infrastructure for various hardware features has been 93 * initialized. It determines which processor features will be reported 94 * to userland via the aux vector. 95 * 96 * All passes are executed on all CPUs, but only the boot CPU determines what 97 * features the kernel will use. 98 * 99 * Much of the worst junk in this file is for the support of processors 100 * that didn't really implement the cpuid instruction properly. 101 * 102 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 103 * the pass numbers. Accordingly, changes to the pass code may require changes 104 * to the accessor code. 105 */ 106 107 uint_t x86_feature = 0; 108 uint_t x86_vendor = X86_VENDOR_IntelClone; 109 uint_t x86_type = X86_TYPE_OTHER; 110 uint_t x86_clflush_size = 0; 111 112 uint_t pentiumpro_bug4046376; 113 uint_t pentiumpro_bug4064495; 114 115 uint_t enable486; 116 /* 117 * This is set to platform type Solaris is running on. 118 */ 119 static int platform_type = HW_NATIVE; 120 121 /* 122 * monitor/mwait info. 123 * 124 * size_actual and buf_actual are the real address and size allocated to get 125 * proper mwait_buf alignement. buf_actual and size_actual should be passed 126 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 127 * processor cache-line alignment, but this is not guarantied in the furture. 128 */ 129 struct mwait_info { 130 size_t mon_min; /* min size to avoid missed wakeups */ 131 size_t mon_max; /* size to avoid false wakeups */ 132 size_t size_actual; /* size actually allocated */ 133 void *buf_actual; /* memory actually allocated */ 134 uint32_t support; /* processor support of monitor/mwait */ 135 }; 136 137 /* 138 * These constants determine how many of the elements of the 139 * cpuid we cache in the cpuid_info data structure; the 140 * remaining elements are accessible via the cpuid instruction. 141 */ 142 143 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 144 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 145 146 struct cpuid_info { 147 uint_t cpi_pass; /* last pass completed */ 148 /* 149 * standard function information 150 */ 151 uint_t cpi_maxeax; /* fn 0: %eax */ 152 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 153 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 154 155 uint_t cpi_family; /* fn 1: extended family */ 156 uint_t cpi_model; /* fn 1: extended model */ 157 uint_t cpi_step; /* fn 1: stepping */ 158 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 159 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 160 int cpi_clogid; /* fn 1: %ebx: thread # */ 161 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 162 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 163 uint_t cpi_ncache; /* fn 2: number of elements */ 164 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 165 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 166 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 167 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 168 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 169 /* 170 * extended function information 171 */ 172 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 173 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 174 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 175 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 176 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 177 id_t cpi_coreid; /* same coreid => strands share core */ 178 int cpi_pkgcoreid; /* core number within single package */ 179 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 180 /* Intel: fn 4: %eax[31-26] */ 181 /* 182 * supported feature information 183 */ 184 uint32_t cpi_support[5]; 185 #define STD_EDX_FEATURES 0 186 #define AMD_EDX_FEATURES 1 187 #define TM_EDX_FEATURES 2 188 #define STD_ECX_FEATURES 3 189 #define AMD_ECX_FEATURES 4 190 /* 191 * Synthesized information, where known. 192 */ 193 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 194 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 195 uint32_t cpi_socket; /* Chip package/socket type */ 196 197 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 198 uint32_t cpi_apicid; 199 }; 200 201 202 static struct cpuid_info cpuid_info0; 203 204 /* 205 * These bit fields are defined by the Intel Application Note AP-485 206 * "Intel Processor Identification and the CPUID Instruction" 207 */ 208 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 209 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 210 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 211 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 212 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 213 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 214 215 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 216 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 217 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 218 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 219 220 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 221 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 222 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 223 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 224 225 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 226 #define CPI_XMAXEAX_MAX 0x80000100 227 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 228 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 229 230 /* 231 * Function 4 (Deterministic Cache Parameters) macros 232 * Defined by Intel Application Note AP-485 233 */ 234 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 235 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 236 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 237 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 238 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 239 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 240 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 241 242 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 243 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 244 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 245 246 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 247 248 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 249 250 251 /* 252 * A couple of shorthand macros to identify "later" P6-family chips 253 * like the Pentium M and Core. First, the "older" P6-based stuff 254 * (loosely defined as "pre-Pentium-4"): 255 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 256 */ 257 258 #define IS_LEGACY_P6(cpi) ( \ 259 cpi->cpi_family == 6 && \ 260 (cpi->cpi_model == 1 || \ 261 cpi->cpi_model == 3 || \ 262 cpi->cpi_model == 5 || \ 263 cpi->cpi_model == 6 || \ 264 cpi->cpi_model == 7 || \ 265 cpi->cpi_model == 8 || \ 266 cpi->cpi_model == 0xA || \ 267 cpi->cpi_model == 0xB) \ 268 ) 269 270 /* A "new F6" is everything with family 6 that's not the above */ 271 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 272 273 /* Extended family/model support */ 274 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 275 cpi->cpi_family >= 0xf) 276 277 /* 278 * Info for monitor/mwait idle loop. 279 * 280 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 281 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 282 * 2006. 283 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 284 * Documentation Updates" #33633, Rev 2.05, December 2006. 285 */ 286 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 287 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 288 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 289 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 290 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 291 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 292 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 293 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 294 /* 295 * Number of sub-cstates for a given c-state. 296 */ 297 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 298 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 299 300 /* 301 * Functions we consune from cpuid_subr.c; don't publish these in a header 302 * file to try and keep people using the expected cpuid_* interfaces. 303 */ 304 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 305 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 306 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 307 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 308 309 /* 310 * Apply up various platform-dependent restrictions where the 311 * underlying platform restrictions mean the CPU can be marked 312 * as less capable than its cpuid instruction would imply. 313 */ 314 #if defined(__xpv) 315 static void 316 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 317 { 318 switch (eax) { 319 case 1: { 320 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 321 0 : CPUID_INTC_EDX_MCA; 322 cp->cp_edx &= 323 ~(mcamask | 324 CPUID_INTC_EDX_PSE | 325 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 326 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 327 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 328 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 329 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 330 break; 331 } 332 333 case 0x80000001: 334 cp->cp_edx &= 335 ~(CPUID_AMD_EDX_PSE | 336 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 337 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 338 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 339 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 340 CPUID_AMD_EDX_TSCP); 341 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 342 break; 343 default: 344 break; 345 } 346 347 switch (vendor) { 348 case X86_VENDOR_Intel: 349 switch (eax) { 350 case 4: 351 /* 352 * Zero out the (ncores-per-chip - 1) field 353 */ 354 cp->cp_eax &= 0x03fffffff; 355 break; 356 default: 357 break; 358 } 359 break; 360 case X86_VENDOR_AMD: 361 switch (eax) { 362 case 0x80000008: 363 /* 364 * Zero out the (ncores-per-chip - 1) field 365 */ 366 cp->cp_ecx &= 0xffffff00; 367 break; 368 default: 369 break; 370 } 371 break; 372 default: 373 break; 374 } 375 } 376 #else 377 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 378 #endif 379 380 /* 381 * Some undocumented ways of patching the results of the cpuid 382 * instruction to permit running Solaris 10 on future cpus that 383 * we don't currently support. Could be set to non-zero values 384 * via settings in eeprom. 385 */ 386 387 uint32_t cpuid_feature_ecx_include; 388 uint32_t cpuid_feature_ecx_exclude; 389 uint32_t cpuid_feature_edx_include; 390 uint32_t cpuid_feature_edx_exclude; 391 392 void 393 cpuid_alloc_space(cpu_t *cpu) 394 { 395 /* 396 * By convention, cpu0 is the boot cpu, which is set up 397 * before memory allocation is available. All other cpus get 398 * their cpuid_info struct allocated here. 399 */ 400 ASSERT(cpu->cpu_id != 0); 401 cpu->cpu_m.mcpu_cpi = 402 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 403 } 404 405 void 406 cpuid_free_space(cpu_t *cpu) 407 { 408 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 409 int i; 410 411 ASSERT(cpu->cpu_id != 0); 412 413 /* 414 * Free up any function 4 related dynamic storage 415 */ 416 for (i = 1; i < cpi->cpi_std_4_size; i++) 417 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 418 if (cpi->cpi_std_4_size > 0) 419 kmem_free(cpi->cpi_std_4, 420 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 421 422 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 423 } 424 425 #if !defined(__xpv) 426 427 static void 428 determine_platform() 429 { 430 struct cpuid_regs cp; 431 char *xen_str; 432 uint32_t xen_signature[4]; 433 434 /* 435 * In a fully virtualized domain, Xen's pseudo-cpuid function 436 * 0x40000000 returns a string representing the Xen signature in 437 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 438 * function. 439 */ 440 cp.cp_eax = 0x40000000; 441 (void) __cpuid_insn(&cp); 442 xen_signature[0] = cp.cp_ebx; 443 xen_signature[1] = cp.cp_ecx; 444 xen_signature[2] = cp.cp_edx; 445 xen_signature[3] = 0; 446 xen_str = (char *)xen_signature; 447 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) { 448 platform_type = HW_XEN_HVM; 449 } else if (vmware_platform()) { /* running under vmware hypervisor? */ 450 platform_type = HW_VMWARE; 451 } 452 } 453 454 int 455 get_hwenv(void) 456 { 457 return (platform_type); 458 } 459 460 int 461 is_controldom(void) 462 { 463 return (0); 464 } 465 466 #else 467 468 int 469 get_hwenv(void) 470 { 471 return (HW_XEN_PV); 472 } 473 474 int 475 is_controldom(void) 476 { 477 return (DOMAIN_IS_INITDOMAIN(xen_info)); 478 } 479 480 #endif /* __xpv */ 481 482 uint_t 483 cpuid_pass1(cpu_t *cpu) 484 { 485 uint32_t mask_ecx, mask_edx; 486 uint_t feature = X86_CPUID; 487 struct cpuid_info *cpi; 488 struct cpuid_regs *cp; 489 int xcpuid; 490 #if !defined(__xpv) 491 extern int idle_cpu_prefer_mwait; 492 #endif 493 494 /* 495 * Space statically allocated for cpu0, ensure pointer is set 496 */ 497 if (cpu->cpu_id == 0) 498 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 499 cpi = cpu->cpu_m.mcpu_cpi; 500 ASSERT(cpi != NULL); 501 cp = &cpi->cpi_std[0]; 502 cp->cp_eax = 0; 503 cpi->cpi_maxeax = __cpuid_insn(cp); 504 { 505 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 506 *iptr++ = cp->cp_ebx; 507 *iptr++ = cp->cp_edx; 508 *iptr++ = cp->cp_ecx; 509 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 510 } 511 512 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 513 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 514 515 /* 516 * Limit the range in case of weird hardware 517 */ 518 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 519 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 520 if (cpi->cpi_maxeax < 1) 521 goto pass1_done; 522 523 cp = &cpi->cpi_std[1]; 524 cp->cp_eax = 1; 525 (void) __cpuid_insn(cp); 526 527 /* 528 * Extract identifying constants for easy access. 529 */ 530 cpi->cpi_model = CPI_MODEL(cpi); 531 cpi->cpi_family = CPI_FAMILY(cpi); 532 533 if (cpi->cpi_family == 0xf) 534 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 535 536 /* 537 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 538 * Intel, and presumably everyone else, uses model == 0xf, as 539 * one would expect (max value means possible overflow). Sigh. 540 */ 541 542 switch (cpi->cpi_vendor) { 543 case X86_VENDOR_Intel: 544 if (IS_EXTENDED_MODEL_INTEL(cpi)) 545 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 546 break; 547 case X86_VENDOR_AMD: 548 if (CPI_FAMILY(cpi) == 0xf) 549 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 550 break; 551 default: 552 if (cpi->cpi_model == 0xf) 553 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 554 break; 555 } 556 557 cpi->cpi_step = CPI_STEP(cpi); 558 cpi->cpi_brandid = CPI_BRANDID(cpi); 559 560 /* 561 * *default* assumptions: 562 * - believe %edx feature word 563 * - ignore %ecx feature word 564 * - 32-bit virtual and physical addressing 565 */ 566 mask_edx = 0xffffffff; 567 mask_ecx = 0; 568 569 cpi->cpi_pabits = cpi->cpi_vabits = 32; 570 571 switch (cpi->cpi_vendor) { 572 case X86_VENDOR_Intel: 573 if (cpi->cpi_family == 5) 574 x86_type = X86_TYPE_P5; 575 else if (IS_LEGACY_P6(cpi)) { 576 x86_type = X86_TYPE_P6; 577 pentiumpro_bug4046376 = 1; 578 pentiumpro_bug4064495 = 1; 579 /* 580 * Clear the SEP bit when it was set erroneously 581 */ 582 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 583 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 584 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 585 x86_type = X86_TYPE_P4; 586 /* 587 * We don't currently depend on any of the %ecx 588 * features until Prescott, so we'll only check 589 * this from P4 onwards. We might want to revisit 590 * that idea later. 591 */ 592 mask_ecx = 0xffffffff; 593 } else if (cpi->cpi_family > 0xf) 594 mask_ecx = 0xffffffff; 595 /* 596 * We don't support MONITOR/MWAIT if leaf 5 is not available 597 * to obtain the monitor linesize. 598 */ 599 if (cpi->cpi_maxeax < 5) 600 mask_ecx &= ~CPUID_INTC_ECX_MON; 601 break; 602 case X86_VENDOR_IntelClone: 603 default: 604 break; 605 case X86_VENDOR_AMD: 606 #if defined(OPTERON_ERRATUM_108) 607 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 608 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 609 cpi->cpi_model = 0xc; 610 } else 611 #endif 612 if (cpi->cpi_family == 5) { 613 /* 614 * AMD K5 and K6 615 * 616 * These CPUs have an incomplete implementation 617 * of MCA/MCE which we mask away. 618 */ 619 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 620 621 /* 622 * Model 0 uses the wrong (APIC) bit 623 * to indicate PGE. Fix it here. 624 */ 625 if (cpi->cpi_model == 0) { 626 if (cp->cp_edx & 0x200) { 627 cp->cp_edx &= ~0x200; 628 cp->cp_edx |= CPUID_INTC_EDX_PGE; 629 } 630 } 631 632 /* 633 * Early models had problems w/ MMX; disable. 634 */ 635 if (cpi->cpi_model < 6) 636 mask_edx &= ~CPUID_INTC_EDX_MMX; 637 } 638 639 /* 640 * For newer families, SSE3 and CX16, at least, are valid; 641 * enable all 642 */ 643 if (cpi->cpi_family >= 0xf) 644 mask_ecx = 0xffffffff; 645 /* 646 * We don't support MONITOR/MWAIT if leaf 5 is not available 647 * to obtain the monitor linesize. 648 */ 649 if (cpi->cpi_maxeax < 5) 650 mask_ecx &= ~CPUID_INTC_ECX_MON; 651 652 #if !defined(__xpv) 653 /* 654 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 655 * processors. AMD does not intend MWAIT to be used in the cpu 656 * idle loop on current and future processors. 10h and future 657 * AMD processors use more power in MWAIT than HLT. 658 * Pre-family-10h Opterons do not have the MWAIT instruction. 659 */ 660 idle_cpu_prefer_mwait = 0; 661 #endif 662 663 break; 664 case X86_VENDOR_TM: 665 /* 666 * workaround the NT workaround in CMS 4.1 667 */ 668 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 669 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 670 cp->cp_edx |= CPUID_INTC_EDX_CX8; 671 break; 672 case X86_VENDOR_Centaur: 673 /* 674 * workaround the NT workarounds again 675 */ 676 if (cpi->cpi_family == 6) 677 cp->cp_edx |= CPUID_INTC_EDX_CX8; 678 break; 679 case X86_VENDOR_Cyrix: 680 /* 681 * We rely heavily on the probing in locore 682 * to actually figure out what parts, if any, 683 * of the Cyrix cpuid instruction to believe. 684 */ 685 switch (x86_type) { 686 case X86_TYPE_CYRIX_486: 687 mask_edx = 0; 688 break; 689 case X86_TYPE_CYRIX_6x86: 690 mask_edx = 0; 691 break; 692 case X86_TYPE_CYRIX_6x86L: 693 mask_edx = 694 CPUID_INTC_EDX_DE | 695 CPUID_INTC_EDX_CX8; 696 break; 697 case X86_TYPE_CYRIX_6x86MX: 698 mask_edx = 699 CPUID_INTC_EDX_DE | 700 CPUID_INTC_EDX_MSR | 701 CPUID_INTC_EDX_CX8 | 702 CPUID_INTC_EDX_PGE | 703 CPUID_INTC_EDX_CMOV | 704 CPUID_INTC_EDX_MMX; 705 break; 706 case X86_TYPE_CYRIX_GXm: 707 mask_edx = 708 CPUID_INTC_EDX_MSR | 709 CPUID_INTC_EDX_CX8 | 710 CPUID_INTC_EDX_CMOV | 711 CPUID_INTC_EDX_MMX; 712 break; 713 case X86_TYPE_CYRIX_MediaGX: 714 break; 715 case X86_TYPE_CYRIX_MII: 716 case X86_TYPE_VIA_CYRIX_III: 717 mask_edx = 718 CPUID_INTC_EDX_DE | 719 CPUID_INTC_EDX_TSC | 720 CPUID_INTC_EDX_MSR | 721 CPUID_INTC_EDX_CX8 | 722 CPUID_INTC_EDX_PGE | 723 CPUID_INTC_EDX_CMOV | 724 CPUID_INTC_EDX_MMX; 725 break; 726 default: 727 break; 728 } 729 break; 730 } 731 732 #if defined(__xpv) 733 /* 734 * Do not support MONITOR/MWAIT under a hypervisor 735 */ 736 mask_ecx &= ~CPUID_INTC_ECX_MON; 737 #endif /* __xpv */ 738 739 /* 740 * Now we've figured out the masks that determine 741 * which bits we choose to believe, apply the masks 742 * to the feature words, then map the kernel's view 743 * of these feature words into its feature word. 744 */ 745 cp->cp_edx &= mask_edx; 746 cp->cp_ecx &= mask_ecx; 747 748 /* 749 * apply any platform restrictions (we don't call this 750 * immediately after __cpuid_insn here, because we need the 751 * workarounds applied above first) 752 */ 753 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 754 755 /* 756 * fold in overrides from the "eeprom" mechanism 757 */ 758 cp->cp_edx |= cpuid_feature_edx_include; 759 cp->cp_edx &= ~cpuid_feature_edx_exclude; 760 761 cp->cp_ecx |= cpuid_feature_ecx_include; 762 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 763 764 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 765 feature |= X86_LARGEPAGE; 766 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 767 feature |= X86_TSC; 768 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 769 feature |= X86_MSR; 770 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 771 feature |= X86_MTRR; 772 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 773 feature |= X86_PGE; 774 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 775 feature |= X86_CMOV; 776 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 777 feature |= X86_MMX; 778 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 779 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 780 feature |= X86_MCA; 781 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 782 feature |= X86_PAE; 783 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 784 feature |= X86_CX8; 785 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 786 feature |= X86_CX16; 787 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 788 feature |= X86_PAT; 789 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 790 feature |= X86_SEP; 791 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 792 /* 793 * In our implementation, fxsave/fxrstor 794 * are prerequisites before we'll even 795 * try and do SSE things. 796 */ 797 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 798 feature |= X86_SSE; 799 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 800 feature |= X86_SSE2; 801 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 802 feature |= X86_SSE3; 803 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 804 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 805 feature |= X86_SSSE3; 806 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 807 feature |= X86_SSE4_1; 808 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 809 feature |= X86_SSE4_2; 810 } 811 } 812 if (cp->cp_edx & CPUID_INTC_EDX_DE) 813 feature |= X86_DE; 814 #if !defined(__xpv) 815 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 816 817 /* 818 * We require the CLFLUSH instruction for erratum workaround 819 * to use MONITOR/MWAIT. 820 */ 821 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 822 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 823 feature |= X86_MWAIT; 824 } else { 825 extern int idle_cpu_assert_cflush_monitor; 826 827 /* 828 * All processors we are aware of which have 829 * MONITOR/MWAIT also have CLFLUSH. 830 */ 831 if (idle_cpu_assert_cflush_monitor) { 832 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 833 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 834 } 835 } 836 } 837 #endif /* __xpv */ 838 839 /* 840 * Only need it first time, rest of the cpus would follow suite. 841 * we only capture this for the bootcpu. 842 */ 843 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 844 feature |= X86_CLFSH; 845 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 846 } 847 848 if (feature & X86_PAE) 849 cpi->cpi_pabits = 36; 850 851 /* 852 * Hyperthreading configuration is slightly tricky on Intel 853 * and pure clones, and even trickier on AMD. 854 * 855 * (AMD chose to set the HTT bit on their CMP processors, 856 * even though they're not actually hyperthreaded. Thus it 857 * takes a bit more work to figure out what's really going 858 * on ... see the handling of the CMP_LGCY bit below) 859 */ 860 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 861 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 862 if (cpi->cpi_ncpu_per_chip > 1) 863 feature |= X86_HTT; 864 } else { 865 cpi->cpi_ncpu_per_chip = 1; 866 } 867 868 /* 869 * Work on the "extended" feature information, doing 870 * some basic initialization for cpuid_pass2() 871 */ 872 xcpuid = 0; 873 switch (cpi->cpi_vendor) { 874 case X86_VENDOR_Intel: 875 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 876 xcpuid++; 877 break; 878 case X86_VENDOR_AMD: 879 if (cpi->cpi_family > 5 || 880 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 881 xcpuid++; 882 break; 883 case X86_VENDOR_Cyrix: 884 /* 885 * Only these Cyrix CPUs are -known- to support 886 * extended cpuid operations. 887 */ 888 if (x86_type == X86_TYPE_VIA_CYRIX_III || 889 x86_type == X86_TYPE_CYRIX_GXm) 890 xcpuid++; 891 break; 892 case X86_VENDOR_Centaur: 893 case X86_VENDOR_TM: 894 default: 895 xcpuid++; 896 break; 897 } 898 899 if (xcpuid) { 900 cp = &cpi->cpi_extd[0]; 901 cp->cp_eax = 0x80000000; 902 cpi->cpi_xmaxeax = __cpuid_insn(cp); 903 } 904 905 if (cpi->cpi_xmaxeax & 0x80000000) { 906 907 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 908 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 909 910 switch (cpi->cpi_vendor) { 911 case X86_VENDOR_Intel: 912 case X86_VENDOR_AMD: 913 if (cpi->cpi_xmaxeax < 0x80000001) 914 break; 915 cp = &cpi->cpi_extd[1]; 916 cp->cp_eax = 0x80000001; 917 (void) __cpuid_insn(cp); 918 919 if (cpi->cpi_vendor == X86_VENDOR_AMD && 920 cpi->cpi_family == 5 && 921 cpi->cpi_model == 6 && 922 cpi->cpi_step == 6) { 923 /* 924 * K6 model 6 uses bit 10 to indicate SYSC 925 * Later models use bit 11. Fix it here. 926 */ 927 if (cp->cp_edx & 0x400) { 928 cp->cp_edx &= ~0x400; 929 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 930 } 931 } 932 933 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 934 935 /* 936 * Compute the additions to the kernel's feature word. 937 */ 938 if (cp->cp_edx & CPUID_AMD_EDX_NX) 939 feature |= X86_NX; 940 941 /* 942 * Regardless whether or not we boot 64-bit, 943 * we should have a way to identify whether 944 * the CPU is capable of running 64-bit. 945 */ 946 if (cp->cp_edx & CPUID_AMD_EDX_LM) 947 feature |= X86_64; 948 949 #if defined(__amd64) 950 /* 1 GB large page - enable only for 64 bit kernel */ 951 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 952 feature |= X86_1GPG; 953 #endif 954 955 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 956 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 957 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 958 feature |= X86_SSE4A; 959 960 /* 961 * If both the HTT and CMP_LGCY bits are set, 962 * then we're not actually HyperThreaded. Read 963 * "AMD CPUID Specification" for more details. 964 */ 965 if (cpi->cpi_vendor == X86_VENDOR_AMD && 966 (feature & X86_HTT) && 967 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 968 feature &= ~X86_HTT; 969 feature |= X86_CMP; 970 } 971 #if defined(__amd64) 972 /* 973 * It's really tricky to support syscall/sysret in 974 * the i386 kernel; we rely on sysenter/sysexit 975 * instead. In the amd64 kernel, things are -way- 976 * better. 977 */ 978 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 979 feature |= X86_ASYSC; 980 981 /* 982 * While we're thinking about system calls, note 983 * that AMD processors don't support sysenter 984 * in long mode at all, so don't try to program them. 985 */ 986 if (x86_vendor == X86_VENDOR_AMD) 987 feature &= ~X86_SEP; 988 #endif 989 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 990 feature |= X86_TSCP; 991 break; 992 default: 993 break; 994 } 995 996 /* 997 * Get CPUID data about processor cores and hyperthreads. 998 */ 999 switch (cpi->cpi_vendor) { 1000 case X86_VENDOR_Intel: 1001 if (cpi->cpi_maxeax >= 4) { 1002 cp = &cpi->cpi_std[4]; 1003 cp->cp_eax = 4; 1004 cp->cp_ecx = 0; 1005 (void) __cpuid_insn(cp); 1006 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1007 } 1008 /*FALLTHROUGH*/ 1009 case X86_VENDOR_AMD: 1010 if (cpi->cpi_xmaxeax < 0x80000008) 1011 break; 1012 cp = &cpi->cpi_extd[8]; 1013 cp->cp_eax = 0x80000008; 1014 (void) __cpuid_insn(cp); 1015 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1016 1017 /* 1018 * Virtual and physical address limits from 1019 * cpuid override previously guessed values. 1020 */ 1021 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1022 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1023 break; 1024 default: 1025 break; 1026 } 1027 1028 /* 1029 * Derive the number of cores per chip 1030 */ 1031 switch (cpi->cpi_vendor) { 1032 case X86_VENDOR_Intel: 1033 if (cpi->cpi_maxeax < 4) { 1034 cpi->cpi_ncore_per_chip = 1; 1035 break; 1036 } else { 1037 cpi->cpi_ncore_per_chip = 1038 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1039 } 1040 break; 1041 case X86_VENDOR_AMD: 1042 if (cpi->cpi_xmaxeax < 0x80000008) { 1043 cpi->cpi_ncore_per_chip = 1; 1044 break; 1045 } else { 1046 /* 1047 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1048 * 1 less than the number of physical cores on 1049 * the chip. In family 0x10 this value can 1050 * be affected by "downcoring" - it reflects 1051 * 1 less than the number of cores actually 1052 * enabled on this node. 1053 */ 1054 cpi->cpi_ncore_per_chip = 1055 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1056 } 1057 break; 1058 default: 1059 cpi->cpi_ncore_per_chip = 1; 1060 break; 1061 } 1062 1063 /* 1064 * Get CPUID data about TSC Invariance in Deep C-State. 1065 */ 1066 switch (cpi->cpi_vendor) { 1067 case X86_VENDOR_Intel: 1068 if (cpi->cpi_maxeax >= 7) { 1069 cp = &cpi->cpi_extd[7]; 1070 cp->cp_eax = 0x80000007; 1071 cp->cp_ecx = 0; 1072 (void) __cpuid_insn(cp); 1073 } 1074 break; 1075 default: 1076 break; 1077 } 1078 } else { 1079 cpi->cpi_ncore_per_chip = 1; 1080 } 1081 1082 /* 1083 * If more than one core, then this processor is CMP. 1084 */ 1085 if (cpi->cpi_ncore_per_chip > 1) 1086 feature |= X86_CMP; 1087 1088 /* 1089 * If the number of cores is the same as the number 1090 * of CPUs, then we cannot have HyperThreading. 1091 */ 1092 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1093 feature &= ~X86_HTT; 1094 1095 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1096 /* 1097 * Single-core single-threaded processors. 1098 */ 1099 cpi->cpi_chipid = -1; 1100 cpi->cpi_clogid = 0; 1101 cpi->cpi_coreid = cpu->cpu_id; 1102 cpi->cpi_pkgcoreid = 0; 1103 } else if (cpi->cpi_ncpu_per_chip > 1) { 1104 uint_t i; 1105 uint_t chipid_shift = 0; 1106 uint_t coreid_shift = 0; 1107 uint_t apic_id = CPI_APIC_ID(cpi); 1108 1109 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1110 chipid_shift++; 1111 cpi->cpi_chipid = apic_id >> chipid_shift; 1112 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1113 1114 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1115 if (feature & X86_CMP) { 1116 /* 1117 * Multi-core (and possibly multi-threaded) 1118 * processors. 1119 */ 1120 uint_t ncpu_per_core; 1121 if (cpi->cpi_ncore_per_chip == 1) 1122 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1123 else if (cpi->cpi_ncore_per_chip > 1) 1124 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1125 cpi->cpi_ncore_per_chip; 1126 /* 1127 * 8bit APIC IDs on dual core Pentiums 1128 * look like this: 1129 * 1130 * +-----------------------+------+------+ 1131 * | Physical Package ID | MC | HT | 1132 * +-----------------------+------+------+ 1133 * <------- chipid --------> 1134 * <------- coreid ---------------> 1135 * <--- clogid --> 1136 * <------> 1137 * pkgcoreid 1138 * 1139 * Where the number of bits necessary to 1140 * represent MC and HT fields together equals 1141 * to the minimum number of bits necessary to 1142 * store the value of cpi->cpi_ncpu_per_chip. 1143 * Of those bits, the MC part uses the number 1144 * of bits necessary to store the value of 1145 * cpi->cpi_ncore_per_chip. 1146 */ 1147 for (i = 1; i < ncpu_per_core; i <<= 1) 1148 coreid_shift++; 1149 cpi->cpi_coreid = apic_id >> coreid_shift; 1150 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1151 coreid_shift; 1152 } else if (feature & X86_HTT) { 1153 /* 1154 * Single-core multi-threaded processors. 1155 */ 1156 cpi->cpi_coreid = cpi->cpi_chipid; 1157 cpi->cpi_pkgcoreid = 0; 1158 } 1159 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1160 /* 1161 * AMD CMP chips currently have a single thread per 1162 * core, with 2 cores on family 0xf and 2, 3 or 4 1163 * cores on family 0x10. 1164 * 1165 * Since no two cpus share a core we must assign a 1166 * distinct coreid per cpu, and we do this by using 1167 * the cpu_id. This scheme does not, however, 1168 * guarantee that sibling cores of a chip will have 1169 * sequential coreids starting at a multiple of the 1170 * number of cores per chip - that is usually the 1171 * case, but if the ACPI MADT table is presented 1172 * in a different order then we need to perform a 1173 * few more gymnastics for the pkgcoreid. 1174 * 1175 * In family 0xf CMPs there are 2 cores on all nodes 1176 * present - no mixing of single and dual core parts. 1177 * 1178 * In family 0x10 CMPs cpuid fn 2 ECX[15:12] 1179 * "ApicIdCoreIdSize[3:0]" tells us how 1180 * many least-significant bits in the ApicId 1181 * are used to represent the core number 1182 * within the node. Cores are always 1183 * numbered sequentially from 0 regardless 1184 * of how many or which are disabled, and 1185 * there seems to be no way to discover the 1186 * real core id when some are disabled. 1187 */ 1188 cpi->cpi_coreid = cpu->cpu_id; 1189 1190 if (cpi->cpi_family == 0x10 && 1191 cpi->cpi_xmaxeax >= 0x80000008) { 1192 int coreidsz = 1193 BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 1194 1195 cpi->cpi_pkgcoreid = 1196 apic_id & ((1 << coreidsz) - 1); 1197 } else { 1198 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 1199 } 1200 } else { 1201 /* 1202 * All other processors are currently 1203 * assumed to have single cores. 1204 */ 1205 cpi->cpi_coreid = cpi->cpi_chipid; 1206 cpi->cpi_pkgcoreid = 0; 1207 } 1208 } 1209 1210 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1211 1212 /* 1213 * Synthesize chip "revision" and socket type 1214 */ 1215 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1216 cpi->cpi_model, cpi->cpi_step); 1217 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1218 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1219 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1220 cpi->cpi_model, cpi->cpi_step); 1221 1222 pass1_done: 1223 #if !defined(__xpv) 1224 determine_platform(); 1225 #endif 1226 cpi->cpi_pass = 1; 1227 return (feature); 1228 } 1229 1230 /* 1231 * Make copies of the cpuid table entries we depend on, in 1232 * part for ease of parsing now, in part so that we have only 1233 * one place to correct any of it, in part for ease of 1234 * later export to userland, and in part so we can look at 1235 * this stuff in a crash dump. 1236 */ 1237 1238 /*ARGSUSED*/ 1239 void 1240 cpuid_pass2(cpu_t *cpu) 1241 { 1242 uint_t n, nmax; 1243 int i; 1244 struct cpuid_regs *cp; 1245 uint8_t *dp; 1246 uint32_t *iptr; 1247 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1248 1249 ASSERT(cpi->cpi_pass == 1); 1250 1251 if (cpi->cpi_maxeax < 1) 1252 goto pass2_done; 1253 1254 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1255 nmax = NMAX_CPI_STD; 1256 /* 1257 * (We already handled n == 0 and n == 1 in pass 1) 1258 */ 1259 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1260 cp->cp_eax = n; 1261 1262 /* 1263 * CPUID function 4 expects %ecx to be initialized 1264 * with an index which indicates which cache to return 1265 * information about. The OS is expected to call function 4 1266 * with %ecx set to 0, 1, 2, ... until it returns with 1267 * EAX[4:0] set to 0, which indicates there are no more 1268 * caches. 1269 * 1270 * Here, populate cpi_std[4] with the information returned by 1271 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1272 * when dynamic memory allocation becomes available. 1273 * 1274 * Note: we need to explicitly initialize %ecx here, since 1275 * function 4 may have been previously invoked. 1276 */ 1277 if (n == 4) 1278 cp->cp_ecx = 0; 1279 1280 (void) __cpuid_insn(cp); 1281 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1282 switch (n) { 1283 case 2: 1284 /* 1285 * "the lower 8 bits of the %eax register 1286 * contain a value that identifies the number 1287 * of times the cpuid [instruction] has to be 1288 * executed to obtain a complete image of the 1289 * processor's caching systems." 1290 * 1291 * How *do* they make this stuff up? 1292 */ 1293 cpi->cpi_ncache = sizeof (*cp) * 1294 BITX(cp->cp_eax, 7, 0); 1295 if (cpi->cpi_ncache == 0) 1296 break; 1297 cpi->cpi_ncache--; /* skip count byte */ 1298 1299 /* 1300 * Well, for now, rather than attempt to implement 1301 * this slightly dubious algorithm, we just look 1302 * at the first 15 .. 1303 */ 1304 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1305 cpi->cpi_ncache = sizeof (*cp) - 1; 1306 1307 dp = cpi->cpi_cacheinfo; 1308 if (BITX(cp->cp_eax, 31, 31) == 0) { 1309 uint8_t *p = (void *)&cp->cp_eax; 1310 for (i = 1; i < 4; i++) 1311 if (p[i] != 0) 1312 *dp++ = p[i]; 1313 } 1314 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1315 uint8_t *p = (void *)&cp->cp_ebx; 1316 for (i = 0; i < 4; i++) 1317 if (p[i] != 0) 1318 *dp++ = p[i]; 1319 } 1320 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1321 uint8_t *p = (void *)&cp->cp_ecx; 1322 for (i = 0; i < 4; i++) 1323 if (p[i] != 0) 1324 *dp++ = p[i]; 1325 } 1326 if (BITX(cp->cp_edx, 31, 31) == 0) { 1327 uint8_t *p = (void *)&cp->cp_edx; 1328 for (i = 0; i < 4; i++) 1329 if (p[i] != 0) 1330 *dp++ = p[i]; 1331 } 1332 break; 1333 1334 case 3: /* Processor serial number, if PSN supported */ 1335 break; 1336 1337 case 4: /* Deterministic cache parameters */ 1338 break; 1339 1340 case 5: /* Monitor/Mwait parameters */ 1341 { 1342 size_t mwait_size; 1343 1344 /* 1345 * check cpi_mwait.support which was set in cpuid_pass1 1346 */ 1347 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1348 break; 1349 1350 /* 1351 * Protect ourself from insane mwait line size. 1352 * Workaround for incomplete hardware emulator(s). 1353 */ 1354 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1355 if (mwait_size < sizeof (uint32_t) || 1356 !ISP2(mwait_size)) { 1357 #if DEBUG 1358 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1359 "size %ld", cpu->cpu_id, (long)mwait_size); 1360 #endif 1361 break; 1362 } 1363 1364 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1365 cpi->cpi_mwait.mon_max = mwait_size; 1366 if (MWAIT_EXTENSION(cpi)) { 1367 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1368 if (MWAIT_INT_ENABLE(cpi)) 1369 cpi->cpi_mwait.support |= 1370 MWAIT_ECX_INT_ENABLE; 1371 } 1372 break; 1373 } 1374 default: 1375 break; 1376 } 1377 } 1378 1379 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1380 struct cpuid_regs regs; 1381 1382 cp = ®s; 1383 cp->cp_eax = 0xB; 1384 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1385 1386 (void) __cpuid_insn(cp); 1387 1388 /* 1389 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1390 * indicates that the extended topology enumeration leaf is 1391 * available. 1392 */ 1393 if (cp->cp_ebx) { 1394 uint32_t x2apic_id; 1395 uint_t coreid_shift = 0; 1396 uint_t ncpu_per_core = 1; 1397 uint_t chipid_shift = 0; 1398 uint_t ncpu_per_chip = 1; 1399 uint_t i; 1400 uint_t level; 1401 1402 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1403 cp->cp_eax = 0xB; 1404 cp->cp_ecx = i; 1405 1406 (void) __cpuid_insn(cp); 1407 level = CPI_CPU_LEVEL_TYPE(cp); 1408 1409 if (level == 1) { 1410 x2apic_id = cp->cp_edx; 1411 coreid_shift = BITX(cp->cp_eax, 4, 0); 1412 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1413 } else if (level == 2) { 1414 x2apic_id = cp->cp_edx; 1415 chipid_shift = BITX(cp->cp_eax, 4, 0); 1416 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1417 } 1418 } 1419 1420 cpi->cpi_apicid = x2apic_id; 1421 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1422 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1423 ncpu_per_core; 1424 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1425 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1426 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1427 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1428 } 1429 1430 /* Make cp NULL so that we don't stumble on others */ 1431 cp = NULL; 1432 } 1433 1434 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1435 goto pass2_done; 1436 1437 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1438 nmax = NMAX_CPI_EXTD; 1439 /* 1440 * Copy the extended properties, fixing them as we go. 1441 * (We already handled n == 0 and n == 1 in pass 1) 1442 */ 1443 iptr = (void *)cpi->cpi_brandstr; 1444 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1445 cp->cp_eax = 0x80000000 + n; 1446 (void) __cpuid_insn(cp); 1447 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1448 switch (n) { 1449 case 2: 1450 case 3: 1451 case 4: 1452 /* 1453 * Extract the brand string 1454 */ 1455 *iptr++ = cp->cp_eax; 1456 *iptr++ = cp->cp_ebx; 1457 *iptr++ = cp->cp_ecx; 1458 *iptr++ = cp->cp_edx; 1459 break; 1460 case 5: 1461 switch (cpi->cpi_vendor) { 1462 case X86_VENDOR_AMD: 1463 /* 1464 * The Athlon and Duron were the first 1465 * parts to report the sizes of the 1466 * TLB for large pages. Before then, 1467 * we don't trust the data. 1468 */ 1469 if (cpi->cpi_family < 6 || 1470 (cpi->cpi_family == 6 && 1471 cpi->cpi_model < 1)) 1472 cp->cp_eax = 0; 1473 break; 1474 default: 1475 break; 1476 } 1477 break; 1478 case 6: 1479 switch (cpi->cpi_vendor) { 1480 case X86_VENDOR_AMD: 1481 /* 1482 * The Athlon and Duron were the first 1483 * AMD parts with L2 TLB's. 1484 * Before then, don't trust the data. 1485 */ 1486 if (cpi->cpi_family < 6 || 1487 cpi->cpi_family == 6 && 1488 cpi->cpi_model < 1) 1489 cp->cp_eax = cp->cp_ebx = 0; 1490 /* 1491 * AMD Duron rev A0 reports L2 1492 * cache size incorrectly as 1K 1493 * when it is really 64K 1494 */ 1495 if (cpi->cpi_family == 6 && 1496 cpi->cpi_model == 3 && 1497 cpi->cpi_step == 0) { 1498 cp->cp_ecx &= 0xffff; 1499 cp->cp_ecx |= 0x400000; 1500 } 1501 break; 1502 case X86_VENDOR_Cyrix: /* VIA C3 */ 1503 /* 1504 * VIA C3 processors are a bit messed 1505 * up w.r.t. encoding cache sizes in %ecx 1506 */ 1507 if (cpi->cpi_family != 6) 1508 break; 1509 /* 1510 * model 7 and 8 were incorrectly encoded 1511 * 1512 * xxx is model 8 really broken? 1513 */ 1514 if (cpi->cpi_model == 7 || 1515 cpi->cpi_model == 8) 1516 cp->cp_ecx = 1517 BITX(cp->cp_ecx, 31, 24) << 16 | 1518 BITX(cp->cp_ecx, 23, 16) << 12 | 1519 BITX(cp->cp_ecx, 15, 8) << 8 | 1520 BITX(cp->cp_ecx, 7, 0); 1521 /* 1522 * model 9 stepping 1 has wrong associativity 1523 */ 1524 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1525 cp->cp_ecx |= 8 << 12; 1526 break; 1527 case X86_VENDOR_Intel: 1528 /* 1529 * Extended L2 Cache features function. 1530 * First appeared on Prescott. 1531 */ 1532 default: 1533 break; 1534 } 1535 break; 1536 default: 1537 break; 1538 } 1539 } 1540 1541 pass2_done: 1542 cpi->cpi_pass = 2; 1543 } 1544 1545 static const char * 1546 intel_cpubrand(const struct cpuid_info *cpi) 1547 { 1548 int i; 1549 1550 if ((x86_feature & X86_CPUID) == 0 || 1551 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1552 return ("i486"); 1553 1554 switch (cpi->cpi_family) { 1555 case 5: 1556 return ("Intel Pentium(r)"); 1557 case 6: 1558 switch (cpi->cpi_model) { 1559 uint_t celeron, xeon; 1560 const struct cpuid_regs *cp; 1561 case 0: 1562 case 1: 1563 case 2: 1564 return ("Intel Pentium(r) Pro"); 1565 case 3: 1566 case 4: 1567 return ("Intel Pentium(r) II"); 1568 case 6: 1569 return ("Intel Celeron(r)"); 1570 case 5: 1571 case 7: 1572 celeron = xeon = 0; 1573 cp = &cpi->cpi_std[2]; /* cache info */ 1574 1575 for (i = 1; i < 4; i++) { 1576 uint_t tmp; 1577 1578 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1579 if (tmp == 0x40) 1580 celeron++; 1581 if (tmp >= 0x44 && tmp <= 0x45) 1582 xeon++; 1583 } 1584 1585 for (i = 0; i < 2; i++) { 1586 uint_t tmp; 1587 1588 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1589 if (tmp == 0x40) 1590 celeron++; 1591 else if (tmp >= 0x44 && tmp <= 0x45) 1592 xeon++; 1593 } 1594 1595 for (i = 0; i < 4; i++) { 1596 uint_t tmp; 1597 1598 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1599 if (tmp == 0x40) 1600 celeron++; 1601 else if (tmp >= 0x44 && tmp <= 0x45) 1602 xeon++; 1603 } 1604 1605 for (i = 0; i < 4; i++) { 1606 uint_t tmp; 1607 1608 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1609 if (tmp == 0x40) 1610 celeron++; 1611 else if (tmp >= 0x44 && tmp <= 0x45) 1612 xeon++; 1613 } 1614 1615 if (celeron) 1616 return ("Intel Celeron(r)"); 1617 if (xeon) 1618 return (cpi->cpi_model == 5 ? 1619 "Intel Pentium(r) II Xeon(tm)" : 1620 "Intel Pentium(r) III Xeon(tm)"); 1621 return (cpi->cpi_model == 5 ? 1622 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1623 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1624 default: 1625 break; 1626 } 1627 default: 1628 break; 1629 } 1630 1631 /* BrandID is present if the field is nonzero */ 1632 if (cpi->cpi_brandid != 0) { 1633 static const struct { 1634 uint_t bt_bid; 1635 const char *bt_str; 1636 } brand_tbl[] = { 1637 { 0x1, "Intel(r) Celeron(r)" }, 1638 { 0x2, "Intel(r) Pentium(r) III" }, 1639 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1640 { 0x4, "Intel(r) Pentium(r) III" }, 1641 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1642 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1643 { 0x8, "Intel(r) Pentium(r) 4" }, 1644 { 0x9, "Intel(r) Pentium(r) 4" }, 1645 { 0xa, "Intel(r) Celeron(r)" }, 1646 { 0xb, "Intel(r) Xeon(tm)" }, 1647 { 0xc, "Intel(r) Xeon(tm) MP" }, 1648 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1649 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1650 { 0x11, "Mobile Genuine Intel(r)" }, 1651 { 0x12, "Intel(r) Celeron(r) M" }, 1652 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1653 { 0x14, "Intel(r) Celeron(r)" }, 1654 { 0x15, "Mobile Genuine Intel(r)" }, 1655 { 0x16, "Intel(r) Pentium(r) M" }, 1656 { 0x17, "Mobile Intel(r) Celeron(r)" } 1657 }; 1658 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1659 uint_t sgn; 1660 1661 sgn = (cpi->cpi_family << 8) | 1662 (cpi->cpi_model << 4) | cpi->cpi_step; 1663 1664 for (i = 0; i < btblmax; i++) 1665 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1666 break; 1667 if (i < btblmax) { 1668 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1669 return ("Intel(r) Celeron(r)"); 1670 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1671 return ("Intel(r) Xeon(tm) MP"); 1672 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1673 return ("Intel(r) Xeon(tm)"); 1674 return (brand_tbl[i].bt_str); 1675 } 1676 } 1677 1678 return (NULL); 1679 } 1680 1681 static const char * 1682 amd_cpubrand(const struct cpuid_info *cpi) 1683 { 1684 if ((x86_feature & X86_CPUID) == 0 || 1685 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1686 return ("i486 compatible"); 1687 1688 switch (cpi->cpi_family) { 1689 case 5: 1690 switch (cpi->cpi_model) { 1691 case 0: 1692 case 1: 1693 case 2: 1694 case 3: 1695 case 4: 1696 case 5: 1697 return ("AMD-K5(r)"); 1698 case 6: 1699 case 7: 1700 return ("AMD-K6(r)"); 1701 case 8: 1702 return ("AMD-K6(r)-2"); 1703 case 9: 1704 return ("AMD-K6(r)-III"); 1705 default: 1706 return ("AMD (family 5)"); 1707 } 1708 case 6: 1709 switch (cpi->cpi_model) { 1710 case 1: 1711 return ("AMD-K7(tm)"); 1712 case 0: 1713 case 2: 1714 case 4: 1715 return ("AMD Athlon(tm)"); 1716 case 3: 1717 case 7: 1718 return ("AMD Duron(tm)"); 1719 case 6: 1720 case 8: 1721 case 10: 1722 /* 1723 * Use the L2 cache size to distinguish 1724 */ 1725 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1726 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1727 default: 1728 return ("AMD (family 6)"); 1729 } 1730 default: 1731 break; 1732 } 1733 1734 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1735 cpi->cpi_brandid != 0) { 1736 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1737 case 3: 1738 return ("AMD Opteron(tm) UP 1xx"); 1739 case 4: 1740 return ("AMD Opteron(tm) DP 2xx"); 1741 case 5: 1742 return ("AMD Opteron(tm) MP 8xx"); 1743 default: 1744 return ("AMD Opteron(tm)"); 1745 } 1746 } 1747 1748 return (NULL); 1749 } 1750 1751 static const char * 1752 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1753 { 1754 if ((x86_feature & X86_CPUID) == 0 || 1755 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1756 type == X86_TYPE_CYRIX_486) 1757 return ("i486 compatible"); 1758 1759 switch (type) { 1760 case X86_TYPE_CYRIX_6x86: 1761 return ("Cyrix 6x86"); 1762 case X86_TYPE_CYRIX_6x86L: 1763 return ("Cyrix 6x86L"); 1764 case X86_TYPE_CYRIX_6x86MX: 1765 return ("Cyrix 6x86MX"); 1766 case X86_TYPE_CYRIX_GXm: 1767 return ("Cyrix GXm"); 1768 case X86_TYPE_CYRIX_MediaGX: 1769 return ("Cyrix MediaGX"); 1770 case X86_TYPE_CYRIX_MII: 1771 return ("Cyrix M2"); 1772 case X86_TYPE_VIA_CYRIX_III: 1773 return ("VIA Cyrix M3"); 1774 default: 1775 /* 1776 * Have another wild guess .. 1777 */ 1778 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1779 return ("Cyrix 5x86"); 1780 else if (cpi->cpi_family == 5) { 1781 switch (cpi->cpi_model) { 1782 case 2: 1783 return ("Cyrix 6x86"); /* Cyrix M1 */ 1784 case 4: 1785 return ("Cyrix MediaGX"); 1786 default: 1787 break; 1788 } 1789 } else if (cpi->cpi_family == 6) { 1790 switch (cpi->cpi_model) { 1791 case 0: 1792 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1793 case 5: 1794 case 6: 1795 case 7: 1796 case 8: 1797 case 9: 1798 return ("VIA C3"); 1799 default: 1800 break; 1801 } 1802 } 1803 break; 1804 } 1805 return (NULL); 1806 } 1807 1808 /* 1809 * This only gets called in the case that the CPU extended 1810 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1811 * aren't available, or contain null bytes for some reason. 1812 */ 1813 static void 1814 fabricate_brandstr(struct cpuid_info *cpi) 1815 { 1816 const char *brand = NULL; 1817 1818 switch (cpi->cpi_vendor) { 1819 case X86_VENDOR_Intel: 1820 brand = intel_cpubrand(cpi); 1821 break; 1822 case X86_VENDOR_AMD: 1823 brand = amd_cpubrand(cpi); 1824 break; 1825 case X86_VENDOR_Cyrix: 1826 brand = cyrix_cpubrand(cpi, x86_type); 1827 break; 1828 case X86_VENDOR_NexGen: 1829 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1830 brand = "NexGen Nx586"; 1831 break; 1832 case X86_VENDOR_Centaur: 1833 if (cpi->cpi_family == 5) 1834 switch (cpi->cpi_model) { 1835 case 4: 1836 brand = "Centaur C6"; 1837 break; 1838 case 8: 1839 brand = "Centaur C2"; 1840 break; 1841 case 9: 1842 brand = "Centaur C3"; 1843 break; 1844 default: 1845 break; 1846 } 1847 break; 1848 case X86_VENDOR_Rise: 1849 if (cpi->cpi_family == 5 && 1850 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1851 brand = "Rise mP6"; 1852 break; 1853 case X86_VENDOR_SiS: 1854 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1855 brand = "SiS 55x"; 1856 break; 1857 case X86_VENDOR_TM: 1858 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1859 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1860 break; 1861 case X86_VENDOR_NSC: 1862 case X86_VENDOR_UMC: 1863 default: 1864 break; 1865 } 1866 if (brand) { 1867 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1868 return; 1869 } 1870 1871 /* 1872 * If all else fails ... 1873 */ 1874 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1875 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1876 cpi->cpi_model, cpi->cpi_step); 1877 } 1878 1879 /* 1880 * This routine is called just after kernel memory allocation 1881 * becomes available on cpu0, and as part of mp_startup() on 1882 * the other cpus. 1883 * 1884 * Fixup the brand string, and collect any information from cpuid 1885 * that requires dynamicically allocated storage to represent. 1886 */ 1887 /*ARGSUSED*/ 1888 void 1889 cpuid_pass3(cpu_t *cpu) 1890 { 1891 int i, max, shft, level, size; 1892 struct cpuid_regs regs; 1893 struct cpuid_regs *cp; 1894 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1895 1896 ASSERT(cpi->cpi_pass == 2); 1897 1898 /* 1899 * Function 4: Deterministic cache parameters 1900 * 1901 * Take this opportunity to detect the number of threads 1902 * sharing the last level cache, and construct a corresponding 1903 * cache id. The respective cpuid_info members are initialized 1904 * to the default case of "no last level cache sharing". 1905 */ 1906 cpi->cpi_ncpu_shr_last_cache = 1; 1907 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1908 1909 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1910 1911 /* 1912 * Find the # of elements (size) returned by fn 4, and along 1913 * the way detect last level cache sharing details. 1914 */ 1915 bzero(®s, sizeof (regs)); 1916 cp = ®s; 1917 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1918 cp->cp_eax = 4; 1919 cp->cp_ecx = i; 1920 1921 (void) __cpuid_insn(cp); 1922 1923 if (CPI_CACHE_TYPE(cp) == 0) 1924 break; 1925 level = CPI_CACHE_LVL(cp); 1926 if (level > max) { 1927 max = level; 1928 cpi->cpi_ncpu_shr_last_cache = 1929 CPI_NTHR_SHR_CACHE(cp) + 1; 1930 } 1931 } 1932 cpi->cpi_std_4_size = size = i; 1933 1934 /* 1935 * Allocate the cpi_std_4 array. The first element 1936 * references the regs for fn 4, %ecx == 0, which 1937 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1938 */ 1939 if (size > 0) { 1940 cpi->cpi_std_4 = 1941 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1942 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1943 1944 /* 1945 * Allocate storage to hold the additional regs 1946 * for function 4, %ecx == 1 .. cpi_std_4_size. 1947 * 1948 * The regs for fn 4, %ecx == 0 has already 1949 * been allocated as indicated above. 1950 */ 1951 for (i = 1; i < size; i++) { 1952 cp = cpi->cpi_std_4[i] = 1953 kmem_zalloc(sizeof (regs), KM_SLEEP); 1954 cp->cp_eax = 4; 1955 cp->cp_ecx = i; 1956 1957 (void) __cpuid_insn(cp); 1958 } 1959 } 1960 /* 1961 * Determine the number of bits needed to represent 1962 * the number of CPUs sharing the last level cache. 1963 * 1964 * Shift off that number of bits from the APIC id to 1965 * derive the cache id. 1966 */ 1967 shft = 0; 1968 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1969 shft++; 1970 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 1971 } 1972 1973 /* 1974 * Now fixup the brand string 1975 */ 1976 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1977 fabricate_brandstr(cpi); 1978 } else { 1979 1980 /* 1981 * If we successfully extracted a brand string from the cpuid 1982 * instruction, clean it up by removing leading spaces and 1983 * similar junk. 1984 */ 1985 if (cpi->cpi_brandstr[0]) { 1986 size_t maxlen = sizeof (cpi->cpi_brandstr); 1987 char *src, *dst; 1988 1989 dst = src = (char *)cpi->cpi_brandstr; 1990 src[maxlen - 1] = '\0'; 1991 /* 1992 * strip leading spaces 1993 */ 1994 while (*src == ' ') 1995 src++; 1996 /* 1997 * Remove any 'Genuine' or "Authentic" prefixes 1998 */ 1999 if (strncmp(src, "Genuine ", 8) == 0) 2000 src += 8; 2001 if (strncmp(src, "Authentic ", 10) == 0) 2002 src += 10; 2003 2004 /* 2005 * Now do an in-place copy. 2006 * Map (R) to (r) and (TM) to (tm). 2007 * The era of teletypes is long gone, and there's 2008 * -really- no need to shout. 2009 */ 2010 while (*src != '\0') { 2011 if (src[0] == '(') { 2012 if (strncmp(src + 1, "R)", 2) == 0) { 2013 (void) strncpy(dst, "(r)", 3); 2014 src += 3; 2015 dst += 3; 2016 continue; 2017 } 2018 if (strncmp(src + 1, "TM)", 3) == 0) { 2019 (void) strncpy(dst, "(tm)", 4); 2020 src += 4; 2021 dst += 4; 2022 continue; 2023 } 2024 } 2025 *dst++ = *src++; 2026 } 2027 *dst = '\0'; 2028 2029 /* 2030 * Finally, remove any trailing spaces 2031 */ 2032 while (--dst > cpi->cpi_brandstr) 2033 if (*dst == ' ') 2034 *dst = '\0'; 2035 else 2036 break; 2037 } else 2038 fabricate_brandstr(cpi); 2039 } 2040 cpi->cpi_pass = 3; 2041 } 2042 2043 /* 2044 * This routine is called out of bind_hwcap() much later in the life 2045 * of the kernel (post_startup()). The job of this routine is to resolve 2046 * the hardware feature support and kernel support for those features into 2047 * what we're actually going to tell applications via the aux vector. 2048 */ 2049 uint_t 2050 cpuid_pass4(cpu_t *cpu) 2051 { 2052 struct cpuid_info *cpi; 2053 uint_t hwcap_flags = 0; 2054 2055 if (cpu == NULL) 2056 cpu = CPU; 2057 cpi = cpu->cpu_m.mcpu_cpi; 2058 2059 ASSERT(cpi->cpi_pass == 3); 2060 2061 if (cpi->cpi_maxeax >= 1) { 2062 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2063 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2064 2065 *edx = CPI_FEATURES_EDX(cpi); 2066 *ecx = CPI_FEATURES_ECX(cpi); 2067 2068 /* 2069 * [these require explicit kernel support] 2070 */ 2071 if ((x86_feature & X86_SEP) == 0) 2072 *edx &= ~CPUID_INTC_EDX_SEP; 2073 2074 if ((x86_feature & X86_SSE) == 0) 2075 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2076 if ((x86_feature & X86_SSE2) == 0) 2077 *edx &= ~CPUID_INTC_EDX_SSE2; 2078 2079 if ((x86_feature & X86_HTT) == 0) 2080 *edx &= ~CPUID_INTC_EDX_HTT; 2081 2082 if ((x86_feature & X86_SSE3) == 0) 2083 *ecx &= ~CPUID_INTC_ECX_SSE3; 2084 2085 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2086 if ((x86_feature & X86_SSSE3) == 0) 2087 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2088 if ((x86_feature & X86_SSE4_1) == 0) 2089 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2090 if ((x86_feature & X86_SSE4_2) == 0) 2091 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2092 } 2093 2094 /* 2095 * [no explicit support required beyond x87 fp context] 2096 */ 2097 if (!fpu_exists) 2098 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2099 2100 /* 2101 * Now map the supported feature vector to things that we 2102 * think userland will care about. 2103 */ 2104 if (*edx & CPUID_INTC_EDX_SEP) 2105 hwcap_flags |= AV_386_SEP; 2106 if (*edx & CPUID_INTC_EDX_SSE) 2107 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2108 if (*edx & CPUID_INTC_EDX_SSE2) 2109 hwcap_flags |= AV_386_SSE2; 2110 if (*ecx & CPUID_INTC_ECX_SSE3) 2111 hwcap_flags |= AV_386_SSE3; 2112 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2113 if (*ecx & CPUID_INTC_ECX_SSSE3) 2114 hwcap_flags |= AV_386_SSSE3; 2115 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2116 hwcap_flags |= AV_386_SSE4_1; 2117 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2118 hwcap_flags |= AV_386_SSE4_2; 2119 if (*ecx & CPUID_INTC_ECX_MOVBE) 2120 hwcap_flags |= AV_386_MOVBE; 2121 } 2122 if (*ecx & CPUID_INTC_ECX_POPCNT) 2123 hwcap_flags |= AV_386_POPCNT; 2124 if (*edx & CPUID_INTC_EDX_FPU) 2125 hwcap_flags |= AV_386_FPU; 2126 if (*edx & CPUID_INTC_EDX_MMX) 2127 hwcap_flags |= AV_386_MMX; 2128 2129 if (*edx & CPUID_INTC_EDX_TSC) 2130 hwcap_flags |= AV_386_TSC; 2131 if (*edx & CPUID_INTC_EDX_CX8) 2132 hwcap_flags |= AV_386_CX8; 2133 if (*edx & CPUID_INTC_EDX_CMOV) 2134 hwcap_flags |= AV_386_CMOV; 2135 if (*ecx & CPUID_INTC_ECX_MON) 2136 hwcap_flags |= AV_386_MON; 2137 if (*ecx & CPUID_INTC_ECX_CX16) 2138 hwcap_flags |= AV_386_CX16; 2139 } 2140 2141 if (x86_feature & X86_HTT) 2142 hwcap_flags |= AV_386_PAUSE; 2143 2144 if (cpi->cpi_xmaxeax < 0x80000001) 2145 goto pass4_done; 2146 2147 switch (cpi->cpi_vendor) { 2148 struct cpuid_regs cp; 2149 uint32_t *edx, *ecx; 2150 2151 case X86_VENDOR_Intel: 2152 /* 2153 * Seems like Intel duplicated what we necessary 2154 * here to make the initial crop of 64-bit OS's work. 2155 * Hopefully, those are the only "extended" bits 2156 * they'll add. 2157 */ 2158 /*FALLTHROUGH*/ 2159 2160 case X86_VENDOR_AMD: 2161 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2162 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2163 2164 *edx = CPI_FEATURES_XTD_EDX(cpi); 2165 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2166 2167 /* 2168 * [these features require explicit kernel support] 2169 */ 2170 switch (cpi->cpi_vendor) { 2171 case X86_VENDOR_Intel: 2172 if ((x86_feature & X86_TSCP) == 0) 2173 *edx &= ~CPUID_AMD_EDX_TSCP; 2174 break; 2175 2176 case X86_VENDOR_AMD: 2177 if ((x86_feature & X86_TSCP) == 0) 2178 *edx &= ~CPUID_AMD_EDX_TSCP; 2179 if ((x86_feature & X86_SSE4A) == 0) 2180 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2181 break; 2182 2183 default: 2184 break; 2185 } 2186 2187 /* 2188 * [no explicit support required beyond 2189 * x87 fp context and exception handlers] 2190 */ 2191 if (!fpu_exists) 2192 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2193 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2194 2195 if ((x86_feature & X86_NX) == 0) 2196 *edx &= ~CPUID_AMD_EDX_NX; 2197 #if !defined(__amd64) 2198 *edx &= ~CPUID_AMD_EDX_LM; 2199 #endif 2200 /* 2201 * Now map the supported feature vector to 2202 * things that we think userland will care about. 2203 */ 2204 #if defined(__amd64) 2205 if (*edx & CPUID_AMD_EDX_SYSC) 2206 hwcap_flags |= AV_386_AMD_SYSC; 2207 #endif 2208 if (*edx & CPUID_AMD_EDX_MMXamd) 2209 hwcap_flags |= AV_386_AMD_MMX; 2210 if (*edx & CPUID_AMD_EDX_3DNow) 2211 hwcap_flags |= AV_386_AMD_3DNow; 2212 if (*edx & CPUID_AMD_EDX_3DNowx) 2213 hwcap_flags |= AV_386_AMD_3DNowx; 2214 2215 switch (cpi->cpi_vendor) { 2216 case X86_VENDOR_AMD: 2217 if (*edx & CPUID_AMD_EDX_TSCP) 2218 hwcap_flags |= AV_386_TSCP; 2219 if (*ecx & CPUID_AMD_ECX_AHF64) 2220 hwcap_flags |= AV_386_AHF; 2221 if (*ecx & CPUID_AMD_ECX_SSE4A) 2222 hwcap_flags |= AV_386_AMD_SSE4A; 2223 if (*ecx & CPUID_AMD_ECX_LZCNT) 2224 hwcap_flags |= AV_386_AMD_LZCNT; 2225 break; 2226 2227 case X86_VENDOR_Intel: 2228 if (*edx & CPUID_AMD_EDX_TSCP) 2229 hwcap_flags |= AV_386_TSCP; 2230 /* 2231 * Aarrgh. 2232 * Intel uses a different bit in the same word. 2233 */ 2234 if (*ecx & CPUID_INTC_ECX_AHF64) 2235 hwcap_flags |= AV_386_AHF; 2236 break; 2237 2238 default: 2239 break; 2240 } 2241 break; 2242 2243 case X86_VENDOR_TM: 2244 cp.cp_eax = 0x80860001; 2245 (void) __cpuid_insn(&cp); 2246 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2247 break; 2248 2249 default: 2250 break; 2251 } 2252 2253 pass4_done: 2254 cpi->cpi_pass = 4; 2255 return (hwcap_flags); 2256 } 2257 2258 2259 /* 2260 * Simulate the cpuid instruction using the data we previously 2261 * captured about this CPU. We try our best to return the truth 2262 * about the hardware, independently of kernel support. 2263 */ 2264 uint32_t 2265 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2266 { 2267 struct cpuid_info *cpi; 2268 struct cpuid_regs *xcp; 2269 2270 if (cpu == NULL) 2271 cpu = CPU; 2272 cpi = cpu->cpu_m.mcpu_cpi; 2273 2274 ASSERT(cpuid_checkpass(cpu, 3)); 2275 2276 /* 2277 * CPUID data is cached in two separate places: cpi_std for standard 2278 * CPUID functions, and cpi_extd for extended CPUID functions. 2279 */ 2280 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2281 xcp = &cpi->cpi_std[cp->cp_eax]; 2282 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2283 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2284 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2285 else 2286 /* 2287 * The caller is asking for data from an input parameter which 2288 * the kernel has not cached. In this case we go fetch from 2289 * the hardware and return the data directly to the user. 2290 */ 2291 return (__cpuid_insn(cp)); 2292 2293 cp->cp_eax = xcp->cp_eax; 2294 cp->cp_ebx = xcp->cp_ebx; 2295 cp->cp_ecx = xcp->cp_ecx; 2296 cp->cp_edx = xcp->cp_edx; 2297 return (cp->cp_eax); 2298 } 2299 2300 int 2301 cpuid_checkpass(cpu_t *cpu, int pass) 2302 { 2303 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2304 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2305 } 2306 2307 int 2308 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2309 { 2310 ASSERT(cpuid_checkpass(cpu, 3)); 2311 2312 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2313 } 2314 2315 int 2316 cpuid_is_cmt(cpu_t *cpu) 2317 { 2318 if (cpu == NULL) 2319 cpu = CPU; 2320 2321 ASSERT(cpuid_checkpass(cpu, 1)); 2322 2323 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2324 } 2325 2326 /* 2327 * AMD and Intel both implement the 64-bit variant of the syscall 2328 * instruction (syscallq), so if there's -any- support for syscall, 2329 * cpuid currently says "yes, we support this". 2330 * 2331 * However, Intel decided to -not- implement the 32-bit variant of the 2332 * syscall instruction, so we provide a predicate to allow our caller 2333 * to test that subtlety here. 2334 * 2335 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2336 * even in the case where the hardware would in fact support it. 2337 */ 2338 /*ARGSUSED*/ 2339 int 2340 cpuid_syscall32_insn(cpu_t *cpu) 2341 { 2342 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2343 2344 #if !defined(__xpv) 2345 if (cpu == NULL) 2346 cpu = CPU; 2347 2348 /*CSTYLED*/ 2349 { 2350 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2351 2352 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2353 cpi->cpi_xmaxeax >= 0x80000001 && 2354 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2355 return (1); 2356 } 2357 #endif 2358 return (0); 2359 } 2360 2361 int 2362 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2363 { 2364 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2365 2366 static const char fmt[] = 2367 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2368 static const char fmt_ht[] = 2369 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2370 2371 ASSERT(cpuid_checkpass(cpu, 1)); 2372 2373 if (cpuid_is_cmt(cpu)) 2374 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2375 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2376 cpi->cpi_family, cpi->cpi_model, 2377 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2378 return (snprintf(s, n, fmt, 2379 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2380 cpi->cpi_family, cpi->cpi_model, 2381 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2382 } 2383 2384 const char * 2385 cpuid_getvendorstr(cpu_t *cpu) 2386 { 2387 ASSERT(cpuid_checkpass(cpu, 1)); 2388 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2389 } 2390 2391 uint_t 2392 cpuid_getvendor(cpu_t *cpu) 2393 { 2394 ASSERT(cpuid_checkpass(cpu, 1)); 2395 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2396 } 2397 2398 uint_t 2399 cpuid_getfamily(cpu_t *cpu) 2400 { 2401 ASSERT(cpuid_checkpass(cpu, 1)); 2402 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2403 } 2404 2405 uint_t 2406 cpuid_getmodel(cpu_t *cpu) 2407 { 2408 ASSERT(cpuid_checkpass(cpu, 1)); 2409 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2410 } 2411 2412 uint_t 2413 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2414 { 2415 ASSERT(cpuid_checkpass(cpu, 1)); 2416 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2417 } 2418 2419 uint_t 2420 cpuid_get_ncore_per_chip(cpu_t *cpu) 2421 { 2422 ASSERT(cpuid_checkpass(cpu, 1)); 2423 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2424 } 2425 2426 uint_t 2427 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2428 { 2429 ASSERT(cpuid_checkpass(cpu, 2)); 2430 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2431 } 2432 2433 id_t 2434 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2435 { 2436 ASSERT(cpuid_checkpass(cpu, 2)); 2437 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2438 } 2439 2440 uint_t 2441 cpuid_getstep(cpu_t *cpu) 2442 { 2443 ASSERT(cpuid_checkpass(cpu, 1)); 2444 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2445 } 2446 2447 uint_t 2448 cpuid_getsig(struct cpu *cpu) 2449 { 2450 ASSERT(cpuid_checkpass(cpu, 1)); 2451 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2452 } 2453 2454 uint32_t 2455 cpuid_getchiprev(struct cpu *cpu) 2456 { 2457 ASSERT(cpuid_checkpass(cpu, 1)); 2458 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2459 } 2460 2461 const char * 2462 cpuid_getchiprevstr(struct cpu *cpu) 2463 { 2464 ASSERT(cpuid_checkpass(cpu, 1)); 2465 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2466 } 2467 2468 uint32_t 2469 cpuid_getsockettype(struct cpu *cpu) 2470 { 2471 ASSERT(cpuid_checkpass(cpu, 1)); 2472 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2473 } 2474 2475 int 2476 cpuid_get_chipid(cpu_t *cpu) 2477 { 2478 ASSERT(cpuid_checkpass(cpu, 1)); 2479 2480 if (cpuid_is_cmt(cpu)) 2481 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2482 return (cpu->cpu_id); 2483 } 2484 2485 id_t 2486 cpuid_get_coreid(cpu_t *cpu) 2487 { 2488 ASSERT(cpuid_checkpass(cpu, 1)); 2489 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2490 } 2491 2492 int 2493 cpuid_get_pkgcoreid(cpu_t *cpu) 2494 { 2495 ASSERT(cpuid_checkpass(cpu, 1)); 2496 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2497 } 2498 2499 int 2500 cpuid_get_clogid(cpu_t *cpu) 2501 { 2502 ASSERT(cpuid_checkpass(cpu, 1)); 2503 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2504 } 2505 2506 void 2507 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2508 { 2509 struct cpuid_info *cpi; 2510 2511 if (cpu == NULL) 2512 cpu = CPU; 2513 cpi = cpu->cpu_m.mcpu_cpi; 2514 2515 ASSERT(cpuid_checkpass(cpu, 1)); 2516 2517 if (pabits) 2518 *pabits = cpi->cpi_pabits; 2519 if (vabits) 2520 *vabits = cpi->cpi_vabits; 2521 } 2522 2523 /* 2524 * Returns the number of data TLB entries for a corresponding 2525 * pagesize. If it can't be computed, or isn't known, the 2526 * routine returns zero. If you ask about an architecturally 2527 * impossible pagesize, the routine will panic (so that the 2528 * hat implementor knows that things are inconsistent.) 2529 */ 2530 uint_t 2531 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2532 { 2533 struct cpuid_info *cpi; 2534 uint_t dtlb_nent = 0; 2535 2536 if (cpu == NULL) 2537 cpu = CPU; 2538 cpi = cpu->cpu_m.mcpu_cpi; 2539 2540 ASSERT(cpuid_checkpass(cpu, 1)); 2541 2542 /* 2543 * Check the L2 TLB info 2544 */ 2545 if (cpi->cpi_xmaxeax >= 0x80000006) { 2546 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2547 2548 switch (pagesize) { 2549 2550 case 4 * 1024: 2551 /* 2552 * All zero in the top 16 bits of the register 2553 * indicates a unified TLB. Size is in low 16 bits. 2554 */ 2555 if ((cp->cp_ebx & 0xffff0000) == 0) 2556 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2557 else 2558 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2559 break; 2560 2561 case 2 * 1024 * 1024: 2562 if ((cp->cp_eax & 0xffff0000) == 0) 2563 dtlb_nent = cp->cp_eax & 0x0000ffff; 2564 else 2565 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2566 break; 2567 2568 default: 2569 panic("unknown L2 pagesize"); 2570 /*NOTREACHED*/ 2571 } 2572 } 2573 2574 if (dtlb_nent != 0) 2575 return (dtlb_nent); 2576 2577 /* 2578 * No L2 TLB support for this size, try L1. 2579 */ 2580 if (cpi->cpi_xmaxeax >= 0x80000005) { 2581 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2582 2583 switch (pagesize) { 2584 case 4 * 1024: 2585 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2586 break; 2587 case 2 * 1024 * 1024: 2588 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2589 break; 2590 default: 2591 panic("unknown L1 d-TLB pagesize"); 2592 /*NOTREACHED*/ 2593 } 2594 } 2595 2596 return (dtlb_nent); 2597 } 2598 2599 /* 2600 * Return 0 if the erratum is not present or not applicable, positive 2601 * if it is, and negative if the status of the erratum is unknown. 2602 * 2603 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2604 * Processors" #25759, Rev 3.57, August 2005 2605 */ 2606 int 2607 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2608 { 2609 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2610 uint_t eax; 2611 2612 /* 2613 * Bail out if this CPU isn't an AMD CPU, or if it's 2614 * a legacy (32-bit) AMD CPU. 2615 */ 2616 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2617 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2618 cpi->cpi_family == 6) 2619 2620 return (0); 2621 2622 eax = cpi->cpi_std[1].cp_eax; 2623 2624 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2625 #define SH_B3(eax) (eax == 0xf51) 2626 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2627 2628 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2629 2630 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2631 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2632 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2633 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2634 2635 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2636 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2637 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2638 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2639 2640 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2641 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2642 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2643 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2644 #define BH_E4(eax) (eax == 0x20fb1) 2645 #define SH_E5(eax) (eax == 0x20f42) 2646 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2647 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2648 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2649 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2650 DH_E6(eax) || JH_E6(eax)) 2651 2652 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2653 #define DR_B0(eax) (eax == 0x100f20) 2654 #define DR_B1(eax) (eax == 0x100f21) 2655 #define DR_BA(eax) (eax == 0x100f2a) 2656 #define DR_B2(eax) (eax == 0x100f22) 2657 #define DR_B3(eax) (eax == 0x100f23) 2658 #define RB_C0(eax) (eax == 0x100f40) 2659 2660 switch (erratum) { 2661 case 1: 2662 return (cpi->cpi_family < 0x10); 2663 case 51: /* what does the asterisk mean? */ 2664 return (B(eax) || SH_C0(eax) || CG(eax)); 2665 case 52: 2666 return (B(eax)); 2667 case 57: 2668 return (cpi->cpi_family <= 0x11); 2669 case 58: 2670 return (B(eax)); 2671 case 60: 2672 return (cpi->cpi_family <= 0x11); 2673 case 61: 2674 case 62: 2675 case 63: 2676 case 64: 2677 case 65: 2678 case 66: 2679 case 68: 2680 case 69: 2681 case 70: 2682 case 71: 2683 return (B(eax)); 2684 case 72: 2685 return (SH_B0(eax)); 2686 case 74: 2687 return (B(eax)); 2688 case 75: 2689 return (cpi->cpi_family < 0x10); 2690 case 76: 2691 return (B(eax)); 2692 case 77: 2693 return (cpi->cpi_family <= 0x11); 2694 case 78: 2695 return (B(eax) || SH_C0(eax)); 2696 case 79: 2697 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2698 case 80: 2699 case 81: 2700 case 82: 2701 return (B(eax)); 2702 case 83: 2703 return (B(eax) || SH_C0(eax) || CG(eax)); 2704 case 85: 2705 return (cpi->cpi_family < 0x10); 2706 case 86: 2707 return (SH_C0(eax) || CG(eax)); 2708 case 88: 2709 #if !defined(__amd64) 2710 return (0); 2711 #else 2712 return (B(eax) || SH_C0(eax)); 2713 #endif 2714 case 89: 2715 return (cpi->cpi_family < 0x10); 2716 case 90: 2717 return (B(eax) || SH_C0(eax) || CG(eax)); 2718 case 91: 2719 case 92: 2720 return (B(eax) || SH_C0(eax)); 2721 case 93: 2722 return (SH_C0(eax)); 2723 case 94: 2724 return (B(eax) || SH_C0(eax) || CG(eax)); 2725 case 95: 2726 #if !defined(__amd64) 2727 return (0); 2728 #else 2729 return (B(eax) || SH_C0(eax)); 2730 #endif 2731 case 96: 2732 return (B(eax) || SH_C0(eax) || CG(eax)); 2733 case 97: 2734 case 98: 2735 return (SH_C0(eax) || CG(eax)); 2736 case 99: 2737 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2738 case 100: 2739 return (B(eax) || SH_C0(eax)); 2740 case 101: 2741 case 103: 2742 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2743 case 104: 2744 return (SH_C0(eax) || CG(eax) || D0(eax)); 2745 case 105: 2746 case 106: 2747 case 107: 2748 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2749 case 108: 2750 return (DH_CG(eax)); 2751 case 109: 2752 return (SH_C0(eax) || CG(eax) || D0(eax)); 2753 case 110: 2754 return (D0(eax) || EX(eax)); 2755 case 111: 2756 return (CG(eax)); 2757 case 112: 2758 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2759 case 113: 2760 return (eax == 0x20fc0); 2761 case 114: 2762 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2763 case 115: 2764 return (SH_E0(eax) || JH_E1(eax)); 2765 case 116: 2766 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2767 case 117: 2768 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2769 case 118: 2770 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2771 JH_E6(eax)); 2772 case 121: 2773 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2774 case 122: 2775 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2776 case 123: 2777 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2778 case 131: 2779 return (cpi->cpi_family < 0x10); 2780 case 6336786: 2781 /* 2782 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2783 * if this is a K8 family or newer processor 2784 */ 2785 if (CPI_FAMILY(cpi) == 0xf) { 2786 struct cpuid_regs regs; 2787 regs.cp_eax = 0x80000007; 2788 (void) __cpuid_insn(®s); 2789 return (!(regs.cp_edx & 0x100)); 2790 } 2791 return (0); 2792 case 6323525: 2793 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2794 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2795 2796 case 6671130: 2797 /* 2798 * check for processors (pre-Shanghai) that do not provide 2799 * optimal management of 1gb ptes in its tlb. 2800 */ 2801 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 2802 2803 case 298: 2804 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 2805 DR_B2(eax) || RB_C0(eax)); 2806 2807 default: 2808 return (-1); 2809 2810 } 2811 } 2812 2813 /* 2814 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 2815 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 2816 */ 2817 int 2818 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 2819 { 2820 struct cpuid_info *cpi; 2821 uint_t osvwid; 2822 static int osvwfeature = -1; 2823 uint64_t osvwlength; 2824 2825 2826 cpi = cpu->cpu_m.mcpu_cpi; 2827 2828 /* confirm OSVW supported */ 2829 if (osvwfeature == -1) { 2830 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 2831 } else { 2832 /* assert that osvw feature setting is consistent on all cpus */ 2833 ASSERT(osvwfeature == 2834 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 2835 } 2836 if (!osvwfeature) 2837 return (-1); 2838 2839 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 2840 2841 switch (erratum) { 2842 case 298: /* osvwid is 0 */ 2843 osvwid = 0; 2844 if (osvwlength <= (uint64_t)osvwid) { 2845 /* osvwid 0 is unknown */ 2846 return (-1); 2847 } 2848 2849 /* 2850 * Check the OSVW STATUS MSR to determine the state 2851 * of the erratum where: 2852 * 0 - fixed by HW 2853 * 1 - BIOS has applied the workaround when BIOS 2854 * workaround is available. (Or for other errata, 2855 * OS workaround is required.) 2856 * For a value of 1, caller will confirm that the 2857 * erratum 298 workaround has indeed been applied by BIOS. 2858 * 2859 * A 1 may be set in cpus that have a HW fix 2860 * in a mixed cpu system. Regarding erratum 298: 2861 * In a multiprocessor platform, the workaround above 2862 * should be applied to all processors regardless of 2863 * silicon revision when an affected processor is 2864 * present. 2865 */ 2866 2867 return (rdmsr(MSR_AMD_OSVW_STATUS + 2868 (osvwid / OSVW_ID_CNT_PER_MSR)) & 2869 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 2870 2871 default: 2872 return (-1); 2873 } 2874 } 2875 2876 static const char assoc_str[] = "associativity"; 2877 static const char line_str[] = "line-size"; 2878 static const char size_str[] = "size"; 2879 2880 static void 2881 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2882 uint32_t val) 2883 { 2884 char buf[128]; 2885 2886 /* 2887 * ndi_prop_update_int() is used because it is desirable for 2888 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2889 */ 2890 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2891 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2892 } 2893 2894 /* 2895 * Intel-style cache/tlb description 2896 * 2897 * Standard cpuid level 2 gives a randomly ordered 2898 * selection of tags that index into a table that describes 2899 * cache and tlb properties. 2900 */ 2901 2902 static const char l1_icache_str[] = "l1-icache"; 2903 static const char l1_dcache_str[] = "l1-dcache"; 2904 static const char l2_cache_str[] = "l2-cache"; 2905 static const char l3_cache_str[] = "l3-cache"; 2906 static const char itlb4k_str[] = "itlb-4K"; 2907 static const char dtlb4k_str[] = "dtlb-4K"; 2908 static const char itlb2M_str[] = "itlb-2M"; 2909 static const char itlb4M_str[] = "itlb-4M"; 2910 static const char dtlb4M_str[] = "dtlb-4M"; 2911 static const char dtlb24_str[] = "dtlb0-2M-4M"; 2912 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2913 static const char itlb24_str[] = "itlb-2M-4M"; 2914 static const char dtlb44_str[] = "dtlb-4K-4M"; 2915 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2916 static const char sl2_cache_str[] = "sectored-l2-cache"; 2917 static const char itrace_str[] = "itrace-cache"; 2918 static const char sl3_cache_str[] = "sectored-l3-cache"; 2919 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 2920 2921 static const struct cachetab { 2922 uint8_t ct_code; 2923 uint8_t ct_assoc; 2924 uint16_t ct_line_size; 2925 size_t ct_size; 2926 const char *ct_label; 2927 } intel_ctab[] = { 2928 /* 2929 * maintain descending order! 2930 * 2931 * Codes ignored - Reason 2932 * ---------------------- 2933 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 2934 * f0H/f1H - Currently we do not interpret prefetch size by design 2935 */ 2936 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 2937 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 2938 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 2939 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 2940 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 2941 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 2942 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 2943 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 2944 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 2945 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 2946 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 2947 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 2948 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 2949 { 0xc0, 4, 0, 8, dtlb44_str }, 2950 { 0xba, 4, 0, 64, dtlb4k_str }, 2951 { 0xb4, 4, 0, 256, dtlb4k_str }, 2952 { 0xb3, 4, 0, 128, dtlb4k_str }, 2953 { 0xb2, 4, 0, 64, itlb4k_str }, 2954 { 0xb0, 4, 0, 128, itlb4k_str }, 2955 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2956 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2957 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2958 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2959 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2960 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2961 { 0x80, 8, 64, 512*1024, l2_cache_str}, 2962 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2963 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2964 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2965 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2966 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2967 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2968 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2969 { 0x73, 8, 0, 64*1024, itrace_str}, 2970 { 0x72, 8, 0, 32*1024, itrace_str}, 2971 { 0x71, 8, 0, 16*1024, itrace_str}, 2972 { 0x70, 8, 0, 12*1024, itrace_str}, 2973 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2974 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2975 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2976 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2977 { 0x5d, 0, 0, 256, dtlb44_str}, 2978 { 0x5c, 0, 0, 128, dtlb44_str}, 2979 { 0x5b, 0, 0, 64, dtlb44_str}, 2980 { 0x5a, 4, 0, 32, dtlb24_str}, 2981 { 0x59, 0, 0, 16, dtlb4k_str}, 2982 { 0x57, 4, 0, 16, dtlb4k_str}, 2983 { 0x56, 4, 0, 16, dtlb4M_str}, 2984 { 0x55, 0, 0, 7, itlb24_str}, 2985 { 0x52, 0, 0, 256, itlb424_str}, 2986 { 0x51, 0, 0, 128, itlb424_str}, 2987 { 0x50, 0, 0, 64, itlb424_str}, 2988 { 0x4f, 0, 0, 32, itlb4k_str}, 2989 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 2990 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 2991 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 2992 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 2993 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 2994 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 2995 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 2996 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 2997 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 2998 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 2999 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3000 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3001 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3002 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3003 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3004 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3005 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3006 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3007 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3008 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3009 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3010 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3011 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3012 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3013 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3014 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3015 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3016 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3017 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3018 { 0x0b, 4, 0, 4, itlb4M_str}, 3019 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3020 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3021 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3022 { 0x05, 4, 0, 32, dtlb4M_str}, 3023 { 0x04, 4, 0, 8, dtlb4M_str}, 3024 { 0x03, 4, 0, 64, dtlb4k_str}, 3025 { 0x02, 4, 0, 2, itlb4M_str}, 3026 { 0x01, 4, 0, 32, itlb4k_str}, 3027 { 0 } 3028 }; 3029 3030 static const struct cachetab cyrix_ctab[] = { 3031 { 0x70, 4, 0, 32, "tlb-4K" }, 3032 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3033 { 0 } 3034 }; 3035 3036 /* 3037 * Search a cache table for a matching entry 3038 */ 3039 static const struct cachetab * 3040 find_cacheent(const struct cachetab *ct, uint_t code) 3041 { 3042 if (code != 0) { 3043 for (; ct->ct_code != 0; ct++) 3044 if (ct->ct_code <= code) 3045 break; 3046 if (ct->ct_code == code) 3047 return (ct); 3048 } 3049 return (NULL); 3050 } 3051 3052 /* 3053 * Populate cachetab entry with L2 or L3 cache-information using 3054 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3055 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3056 * information is found. 3057 */ 3058 static int 3059 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3060 { 3061 uint32_t level, i; 3062 int ret = 0; 3063 3064 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3065 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3066 3067 if (level == 2 || level == 3) { 3068 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3069 ct->ct_line_size = 3070 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3071 ct->ct_size = ct->ct_assoc * 3072 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3073 ct->ct_line_size * 3074 (cpi->cpi_std_4[i]->cp_ecx + 1); 3075 3076 if (level == 2) { 3077 ct->ct_label = l2_cache_str; 3078 } else if (level == 3) { 3079 ct->ct_label = l3_cache_str; 3080 } 3081 ret = 1; 3082 } 3083 } 3084 3085 return (ret); 3086 } 3087 3088 /* 3089 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3090 * The walk is terminated if the walker returns non-zero. 3091 */ 3092 static void 3093 intel_walk_cacheinfo(struct cpuid_info *cpi, 3094 void *arg, int (*func)(void *, const struct cachetab *)) 3095 { 3096 const struct cachetab *ct; 3097 struct cachetab des_49_ct, des_b1_ct; 3098 uint8_t *dp; 3099 int i; 3100 3101 if ((dp = cpi->cpi_cacheinfo) == NULL) 3102 return; 3103 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3104 /* 3105 * For overloaded descriptor 0x49 we use cpuid function 4 3106 * if supported by the current processor, to create 3107 * cache information. 3108 * For overloaded descriptor 0xb1 we use X86_PAE flag 3109 * to disambiguate the cache information. 3110 */ 3111 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3112 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3113 ct = &des_49_ct; 3114 } else if (*dp == 0xb1) { 3115 des_b1_ct.ct_code = 0xb1; 3116 des_b1_ct.ct_assoc = 4; 3117 des_b1_ct.ct_line_size = 0; 3118 if (x86_feature & X86_PAE) { 3119 des_b1_ct.ct_size = 8; 3120 des_b1_ct.ct_label = itlb2M_str; 3121 } else { 3122 des_b1_ct.ct_size = 4; 3123 des_b1_ct.ct_label = itlb4M_str; 3124 } 3125 ct = &des_b1_ct; 3126 } else { 3127 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3128 continue; 3129 } 3130 } 3131 3132 if (func(arg, ct) != 0) { 3133 break; 3134 } 3135 } 3136 } 3137 3138 /* 3139 * (Like the Intel one, except for Cyrix CPUs) 3140 */ 3141 static void 3142 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3143 void *arg, int (*func)(void *, const struct cachetab *)) 3144 { 3145 const struct cachetab *ct; 3146 uint8_t *dp; 3147 int i; 3148 3149 if ((dp = cpi->cpi_cacheinfo) == NULL) 3150 return; 3151 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3152 /* 3153 * Search Cyrix-specific descriptor table first .. 3154 */ 3155 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3156 if (func(arg, ct) != 0) 3157 break; 3158 continue; 3159 } 3160 /* 3161 * .. else fall back to the Intel one 3162 */ 3163 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3164 if (func(arg, ct) != 0) 3165 break; 3166 continue; 3167 } 3168 } 3169 } 3170 3171 /* 3172 * A cacheinfo walker that adds associativity, line-size, and size properties 3173 * to the devinfo node it is passed as an argument. 3174 */ 3175 static int 3176 add_cacheent_props(void *arg, const struct cachetab *ct) 3177 { 3178 dev_info_t *devi = arg; 3179 3180 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3181 if (ct->ct_line_size != 0) 3182 add_cache_prop(devi, ct->ct_label, line_str, 3183 ct->ct_line_size); 3184 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3185 return (0); 3186 } 3187 3188 3189 static const char fully_assoc[] = "fully-associative?"; 3190 3191 /* 3192 * AMD style cache/tlb description 3193 * 3194 * Extended functions 5 and 6 directly describe properties of 3195 * tlbs and various cache levels. 3196 */ 3197 static void 3198 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3199 { 3200 switch (assoc) { 3201 case 0: /* reserved; ignore */ 3202 break; 3203 default: 3204 add_cache_prop(devi, label, assoc_str, assoc); 3205 break; 3206 case 0xff: 3207 add_cache_prop(devi, label, fully_assoc, 1); 3208 break; 3209 } 3210 } 3211 3212 static void 3213 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3214 { 3215 if (size == 0) 3216 return; 3217 add_cache_prop(devi, label, size_str, size); 3218 add_amd_assoc(devi, label, assoc); 3219 } 3220 3221 static void 3222 add_amd_cache(dev_info_t *devi, const char *label, 3223 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3224 { 3225 if (size == 0 || line_size == 0) 3226 return; 3227 add_amd_assoc(devi, label, assoc); 3228 /* 3229 * Most AMD parts have a sectored cache. Multiple cache lines are 3230 * associated with each tag. A sector consists of all cache lines 3231 * associated with a tag. For example, the AMD K6-III has a sector 3232 * size of 2 cache lines per tag. 3233 */ 3234 if (lines_per_tag != 0) 3235 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3236 add_cache_prop(devi, label, line_str, line_size); 3237 add_cache_prop(devi, label, size_str, size * 1024); 3238 } 3239 3240 static void 3241 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3242 { 3243 switch (assoc) { 3244 case 0: /* off */ 3245 break; 3246 case 1: 3247 case 2: 3248 case 4: 3249 add_cache_prop(devi, label, assoc_str, assoc); 3250 break; 3251 case 6: 3252 add_cache_prop(devi, label, assoc_str, 8); 3253 break; 3254 case 8: 3255 add_cache_prop(devi, label, assoc_str, 16); 3256 break; 3257 case 0xf: 3258 add_cache_prop(devi, label, fully_assoc, 1); 3259 break; 3260 default: /* reserved; ignore */ 3261 break; 3262 } 3263 } 3264 3265 static void 3266 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3267 { 3268 if (size == 0 || assoc == 0) 3269 return; 3270 add_amd_l2_assoc(devi, label, assoc); 3271 add_cache_prop(devi, label, size_str, size); 3272 } 3273 3274 static void 3275 add_amd_l2_cache(dev_info_t *devi, const char *label, 3276 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3277 { 3278 if (size == 0 || assoc == 0 || line_size == 0) 3279 return; 3280 add_amd_l2_assoc(devi, label, assoc); 3281 if (lines_per_tag != 0) 3282 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3283 add_cache_prop(devi, label, line_str, line_size); 3284 add_cache_prop(devi, label, size_str, size * 1024); 3285 } 3286 3287 static void 3288 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3289 { 3290 struct cpuid_regs *cp; 3291 3292 if (cpi->cpi_xmaxeax < 0x80000005) 3293 return; 3294 cp = &cpi->cpi_extd[5]; 3295 3296 /* 3297 * 4M/2M L1 TLB configuration 3298 * 3299 * We report the size for 2M pages because AMD uses two 3300 * TLB entries for one 4M page. 3301 */ 3302 add_amd_tlb(devi, "dtlb-2M", 3303 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3304 add_amd_tlb(devi, "itlb-2M", 3305 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3306 3307 /* 3308 * 4K L1 TLB configuration 3309 */ 3310 3311 switch (cpi->cpi_vendor) { 3312 uint_t nentries; 3313 case X86_VENDOR_TM: 3314 if (cpi->cpi_family >= 5) { 3315 /* 3316 * Crusoe processors have 256 TLB entries, but 3317 * cpuid data format constrains them to only 3318 * reporting 255 of them. 3319 */ 3320 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3321 nentries = 256; 3322 /* 3323 * Crusoe processors also have a unified TLB 3324 */ 3325 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3326 nentries); 3327 break; 3328 } 3329 /*FALLTHROUGH*/ 3330 default: 3331 add_amd_tlb(devi, itlb4k_str, 3332 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3333 add_amd_tlb(devi, dtlb4k_str, 3334 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3335 break; 3336 } 3337 3338 /* 3339 * data L1 cache configuration 3340 */ 3341 3342 add_amd_cache(devi, l1_dcache_str, 3343 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3344 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3345 3346 /* 3347 * code L1 cache configuration 3348 */ 3349 3350 add_amd_cache(devi, l1_icache_str, 3351 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3352 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3353 3354 if (cpi->cpi_xmaxeax < 0x80000006) 3355 return; 3356 cp = &cpi->cpi_extd[6]; 3357 3358 /* Check for a unified L2 TLB for large pages */ 3359 3360 if (BITX(cp->cp_eax, 31, 16) == 0) 3361 add_amd_l2_tlb(devi, "l2-tlb-2M", 3362 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3363 else { 3364 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3365 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3366 add_amd_l2_tlb(devi, "l2-itlb-2M", 3367 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3368 } 3369 3370 /* Check for a unified L2 TLB for 4K pages */ 3371 3372 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3373 add_amd_l2_tlb(devi, "l2-tlb-4K", 3374 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3375 } else { 3376 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3377 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3378 add_amd_l2_tlb(devi, "l2-itlb-4K", 3379 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3380 } 3381 3382 add_amd_l2_cache(devi, l2_cache_str, 3383 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3384 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3385 } 3386 3387 /* 3388 * There are two basic ways that the x86 world describes it cache 3389 * and tlb architecture - Intel's way and AMD's way. 3390 * 3391 * Return which flavor of cache architecture we should use 3392 */ 3393 static int 3394 x86_which_cacheinfo(struct cpuid_info *cpi) 3395 { 3396 switch (cpi->cpi_vendor) { 3397 case X86_VENDOR_Intel: 3398 if (cpi->cpi_maxeax >= 2) 3399 return (X86_VENDOR_Intel); 3400 break; 3401 case X86_VENDOR_AMD: 3402 /* 3403 * The K5 model 1 was the first part from AMD that reported 3404 * cache sizes via extended cpuid functions. 3405 */ 3406 if (cpi->cpi_family > 5 || 3407 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3408 return (X86_VENDOR_AMD); 3409 break; 3410 case X86_VENDOR_TM: 3411 if (cpi->cpi_family >= 5) 3412 return (X86_VENDOR_AMD); 3413 /*FALLTHROUGH*/ 3414 default: 3415 /* 3416 * If they have extended CPU data for 0x80000005 3417 * then we assume they have AMD-format cache 3418 * information. 3419 * 3420 * If not, and the vendor happens to be Cyrix, 3421 * then try our-Cyrix specific handler. 3422 * 3423 * If we're not Cyrix, then assume we're using Intel's 3424 * table-driven format instead. 3425 */ 3426 if (cpi->cpi_xmaxeax >= 0x80000005) 3427 return (X86_VENDOR_AMD); 3428 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3429 return (X86_VENDOR_Cyrix); 3430 else if (cpi->cpi_maxeax >= 2) 3431 return (X86_VENDOR_Intel); 3432 break; 3433 } 3434 return (-1); 3435 } 3436 3437 /* 3438 * create a node for the given cpu under the prom root node. 3439 * Also, create a cpu node in the device tree. 3440 */ 3441 static dev_info_t *cpu_nex_devi = NULL; 3442 static kmutex_t cpu_node_lock; 3443 3444 /* 3445 * Called from post_startup() and mp_startup() 3446 */ 3447 void 3448 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3449 { 3450 dev_info_t *cpu_devi; 3451 int create; 3452 3453 mutex_enter(&cpu_node_lock); 3454 3455 /* 3456 * create a nexus node for all cpus identified as 'cpu_id' under 3457 * the root node. 3458 */ 3459 if (cpu_nex_devi == NULL) { 3460 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3461 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3462 mutex_exit(&cpu_node_lock); 3463 return; 3464 } 3465 (void) ndi_devi_online(cpu_nex_devi, 0); 3466 } 3467 3468 /* 3469 * create a child node for cpu identified as 'cpu_id' 3470 */ 3471 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3472 cpu_id); 3473 if (cpu_devi == NULL) { 3474 mutex_exit(&cpu_node_lock); 3475 return; 3476 } 3477 3478 /* device_type */ 3479 3480 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3481 "device_type", "cpu"); 3482 3483 /* reg */ 3484 3485 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3486 "reg", cpu_id); 3487 3488 /* cpu-mhz, and clock-frequency */ 3489 3490 if (cpu_freq > 0) { 3491 long long mul; 3492 3493 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3494 "cpu-mhz", cpu_freq); 3495 3496 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3497 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3498 "clock-frequency", (int)mul); 3499 } 3500 3501 (void) ndi_devi_online(cpu_devi, 0); 3502 3503 if ((x86_feature & X86_CPUID) == 0) { 3504 mutex_exit(&cpu_node_lock); 3505 return; 3506 } 3507 3508 /* vendor-id */ 3509 3510 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3511 "vendor-id", cpi->cpi_vendorstr); 3512 3513 if (cpi->cpi_maxeax == 0) { 3514 mutex_exit(&cpu_node_lock); 3515 return; 3516 } 3517 3518 /* 3519 * family, model, and step 3520 */ 3521 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3522 "family", CPI_FAMILY(cpi)); 3523 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3524 "cpu-model", CPI_MODEL(cpi)); 3525 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3526 "stepping-id", CPI_STEP(cpi)); 3527 3528 /* type */ 3529 3530 switch (cpi->cpi_vendor) { 3531 case X86_VENDOR_Intel: 3532 create = 1; 3533 break; 3534 default: 3535 create = 0; 3536 break; 3537 } 3538 if (create) 3539 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3540 "type", CPI_TYPE(cpi)); 3541 3542 /* ext-family */ 3543 3544 switch (cpi->cpi_vendor) { 3545 case X86_VENDOR_Intel: 3546 case X86_VENDOR_AMD: 3547 create = cpi->cpi_family >= 0xf; 3548 break; 3549 default: 3550 create = 0; 3551 break; 3552 } 3553 if (create) 3554 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3555 "ext-family", CPI_FAMILY_XTD(cpi)); 3556 3557 /* ext-model */ 3558 3559 switch (cpi->cpi_vendor) { 3560 case X86_VENDOR_Intel: 3561 create = IS_EXTENDED_MODEL_INTEL(cpi); 3562 break; 3563 case X86_VENDOR_AMD: 3564 create = CPI_FAMILY(cpi) == 0xf; 3565 break; 3566 default: 3567 create = 0; 3568 break; 3569 } 3570 if (create) 3571 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3572 "ext-model", CPI_MODEL_XTD(cpi)); 3573 3574 /* generation */ 3575 3576 switch (cpi->cpi_vendor) { 3577 case X86_VENDOR_AMD: 3578 /* 3579 * AMD K5 model 1 was the first part to support this 3580 */ 3581 create = cpi->cpi_xmaxeax >= 0x80000001; 3582 break; 3583 default: 3584 create = 0; 3585 break; 3586 } 3587 if (create) 3588 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3589 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3590 3591 /* brand-id */ 3592 3593 switch (cpi->cpi_vendor) { 3594 case X86_VENDOR_Intel: 3595 /* 3596 * brand id first appeared on Pentium III Xeon model 8, 3597 * and Celeron model 8 processors and Opteron 3598 */ 3599 create = cpi->cpi_family > 6 || 3600 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3601 break; 3602 case X86_VENDOR_AMD: 3603 create = cpi->cpi_family >= 0xf; 3604 break; 3605 default: 3606 create = 0; 3607 break; 3608 } 3609 if (create && cpi->cpi_brandid != 0) { 3610 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3611 "brand-id", cpi->cpi_brandid); 3612 } 3613 3614 /* chunks, and apic-id */ 3615 3616 switch (cpi->cpi_vendor) { 3617 /* 3618 * first available on Pentium IV and Opteron (K8) 3619 */ 3620 case X86_VENDOR_Intel: 3621 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3622 break; 3623 case X86_VENDOR_AMD: 3624 create = cpi->cpi_family >= 0xf; 3625 break; 3626 default: 3627 create = 0; 3628 break; 3629 } 3630 if (create) { 3631 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3632 "chunks", CPI_CHUNKS(cpi)); 3633 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3634 "apic-id", cpi->cpi_apicid); 3635 if (cpi->cpi_chipid >= 0) { 3636 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3637 "chip#", cpi->cpi_chipid); 3638 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3639 "clog#", cpi->cpi_clogid); 3640 } 3641 } 3642 3643 /* cpuid-features */ 3644 3645 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3646 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3647 3648 3649 /* cpuid-features-ecx */ 3650 3651 switch (cpi->cpi_vendor) { 3652 case X86_VENDOR_Intel: 3653 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3654 break; 3655 default: 3656 create = 0; 3657 break; 3658 } 3659 if (create) 3660 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3661 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3662 3663 /* ext-cpuid-features */ 3664 3665 switch (cpi->cpi_vendor) { 3666 case X86_VENDOR_Intel: 3667 case X86_VENDOR_AMD: 3668 case X86_VENDOR_Cyrix: 3669 case X86_VENDOR_TM: 3670 case X86_VENDOR_Centaur: 3671 create = cpi->cpi_xmaxeax >= 0x80000001; 3672 break; 3673 default: 3674 create = 0; 3675 break; 3676 } 3677 if (create) { 3678 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3679 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3680 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3681 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3682 } 3683 3684 /* 3685 * Brand String first appeared in Intel Pentium IV, AMD K5 3686 * model 1, and Cyrix GXm. On earlier models we try and 3687 * simulate something similar .. so this string should always 3688 * same -something- about the processor, however lame. 3689 */ 3690 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3691 "brand-string", cpi->cpi_brandstr); 3692 3693 /* 3694 * Finally, cache and tlb information 3695 */ 3696 switch (x86_which_cacheinfo(cpi)) { 3697 case X86_VENDOR_Intel: 3698 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3699 break; 3700 case X86_VENDOR_Cyrix: 3701 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3702 break; 3703 case X86_VENDOR_AMD: 3704 amd_cache_info(cpi, cpu_devi); 3705 break; 3706 default: 3707 break; 3708 } 3709 3710 mutex_exit(&cpu_node_lock); 3711 } 3712 3713 struct l2info { 3714 int *l2i_csz; 3715 int *l2i_lsz; 3716 int *l2i_assoc; 3717 int l2i_ret; 3718 }; 3719 3720 /* 3721 * A cacheinfo walker that fetches the size, line-size and associativity 3722 * of the L2 cache 3723 */ 3724 static int 3725 intel_l2cinfo(void *arg, const struct cachetab *ct) 3726 { 3727 struct l2info *l2i = arg; 3728 int *ip; 3729 3730 if (ct->ct_label != l2_cache_str && 3731 ct->ct_label != sl2_cache_str) 3732 return (0); /* not an L2 -- keep walking */ 3733 3734 if ((ip = l2i->l2i_csz) != NULL) 3735 *ip = ct->ct_size; 3736 if ((ip = l2i->l2i_lsz) != NULL) 3737 *ip = ct->ct_line_size; 3738 if ((ip = l2i->l2i_assoc) != NULL) 3739 *ip = ct->ct_assoc; 3740 l2i->l2i_ret = ct->ct_size; 3741 return (1); /* was an L2 -- terminate walk */ 3742 } 3743 3744 /* 3745 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3746 * 3747 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3748 * value is the associativity, the associativity for the L2 cache and 3749 * tlb is encoded in the following table. The 4 bit L2 value serves as 3750 * an index into the amd_afd[] array to determine the associativity. 3751 * -1 is undefined. 0 is fully associative. 3752 */ 3753 3754 static int amd_afd[] = 3755 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3756 3757 static void 3758 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3759 { 3760 struct cpuid_regs *cp; 3761 uint_t size, assoc; 3762 int i; 3763 int *ip; 3764 3765 if (cpi->cpi_xmaxeax < 0x80000006) 3766 return; 3767 cp = &cpi->cpi_extd[6]; 3768 3769 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3770 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3771 uint_t cachesz = size * 1024; 3772 assoc = amd_afd[i]; 3773 3774 ASSERT(assoc != -1); 3775 3776 if ((ip = l2i->l2i_csz) != NULL) 3777 *ip = cachesz; 3778 if ((ip = l2i->l2i_lsz) != NULL) 3779 *ip = BITX(cp->cp_ecx, 7, 0); 3780 if ((ip = l2i->l2i_assoc) != NULL) 3781 *ip = assoc; 3782 l2i->l2i_ret = cachesz; 3783 } 3784 } 3785 3786 int 3787 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3788 { 3789 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3790 struct l2info __l2info, *l2i = &__l2info; 3791 3792 l2i->l2i_csz = csz; 3793 l2i->l2i_lsz = lsz; 3794 l2i->l2i_assoc = assoc; 3795 l2i->l2i_ret = -1; 3796 3797 switch (x86_which_cacheinfo(cpi)) { 3798 case X86_VENDOR_Intel: 3799 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3800 break; 3801 case X86_VENDOR_Cyrix: 3802 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3803 break; 3804 case X86_VENDOR_AMD: 3805 amd_l2cacheinfo(cpi, l2i); 3806 break; 3807 default: 3808 break; 3809 } 3810 return (l2i->l2i_ret); 3811 } 3812 3813 #if !defined(__xpv) 3814 3815 uint32_t * 3816 cpuid_mwait_alloc(cpu_t *cpu) 3817 { 3818 uint32_t *ret; 3819 size_t mwait_size; 3820 3821 ASSERT(cpuid_checkpass(cpu, 2)); 3822 3823 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3824 if (mwait_size == 0) 3825 return (NULL); 3826 3827 /* 3828 * kmem_alloc() returns cache line size aligned data for mwait_size 3829 * allocations. mwait_size is currently cache line sized. Neither 3830 * of these implementation details are guarantied to be true in the 3831 * future. 3832 * 3833 * First try allocating mwait_size as kmem_alloc() currently returns 3834 * correctly aligned memory. If kmem_alloc() does not return 3835 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3836 * 3837 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3838 * decide to free this memory. 3839 */ 3840 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3841 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3842 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3843 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3844 *ret = MWAIT_RUNNING; 3845 return (ret); 3846 } else { 3847 kmem_free(ret, mwait_size); 3848 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3849 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3850 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3851 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3852 *ret = MWAIT_RUNNING; 3853 return (ret); 3854 } 3855 } 3856 3857 void 3858 cpuid_mwait_free(cpu_t *cpu) 3859 { 3860 ASSERT(cpuid_checkpass(cpu, 2)); 3861 3862 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3863 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3864 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3865 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3866 } 3867 3868 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3869 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3870 } 3871 3872 void 3873 patch_tsc_read(int flag) 3874 { 3875 size_t cnt; 3876 3877 switch (flag) { 3878 case X86_NO_TSC: 3879 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 3880 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 3881 break; 3882 case X86_HAVE_TSCP: 3883 cnt = &_tscp_end - &_tscp_start; 3884 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 3885 break; 3886 case X86_TSC_MFENCE: 3887 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 3888 (void) memcpy((void *)tsc_read, 3889 (void *)&_tsc_mfence_start, cnt); 3890 break; 3891 case X86_TSC_LFENCE: 3892 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 3893 (void) memcpy((void *)tsc_read, 3894 (void *)&_tsc_lfence_start, cnt); 3895 break; 3896 default: 3897 break; 3898 } 3899 } 3900 3901 int 3902 cpuid_deep_cstates_supported(void) 3903 { 3904 struct cpuid_info *cpi; 3905 struct cpuid_regs regs; 3906 3907 ASSERT(cpuid_checkpass(CPU, 1)); 3908 3909 cpi = CPU->cpu_m.mcpu_cpi; 3910 3911 if (!(x86_feature & X86_CPUID)) 3912 return (0); 3913 3914 switch (cpi->cpi_vendor) { 3915 case X86_VENDOR_Intel: 3916 if (cpi->cpi_xmaxeax < 0x80000007) 3917 return (0); 3918 3919 /* 3920 * TSC run at a constant rate in all ACPI C-states? 3921 */ 3922 regs.cp_eax = 0x80000007; 3923 (void) __cpuid_insn(®s); 3924 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 3925 3926 default: 3927 return (0); 3928 } 3929 } 3930 3931 #endif /* !__xpv */ 3932 3933 void 3934 post_startup_cpu_fixups(void) 3935 { 3936 #ifndef __xpv 3937 /* 3938 * Some AMD processors support C1E state. Entering this state will 3939 * cause the local APIC timer to stop, which we can't deal with at 3940 * this time. 3941 */ 3942 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 3943 on_trap_data_t otd; 3944 uint64_t reg; 3945 3946 if (!on_trap(&otd, OT_DATA_ACCESS)) { 3947 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 3948 /* Disable C1E state if it is enabled by BIOS */ 3949 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 3950 AMD_ACTONCMPHALT_MASK) { 3951 reg &= ~(AMD_ACTONCMPHALT_MASK << 3952 AMD_ACTONCMPHALT_SHIFT); 3953 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 3954 } 3955 } 3956 no_trap(); 3957 } 3958 #endif /* !__xpv */ 3959 } 3960 3961 #if defined(__amd64) && !defined(__xpv) 3962 /* 3963 * Patch in versions of bcopy for high performance Intel Nhm processors 3964 * and later... 3965 */ 3966 void 3967 patch_memops(uint_t vendor) 3968 { 3969 size_t cnt, i; 3970 caddr_t to, from; 3971 3972 if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) { 3973 cnt = &bcopy_patch_end - &bcopy_patch_start; 3974 to = &bcopy_ck_size; 3975 from = &bcopy_patch_start; 3976 for (i = 0; i < cnt; i++) { 3977 *to++ = *from++; 3978 } 3979 } 3980 } 3981 #endif /* __amd64 && !__xpv */ 3982