1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 /* 31 * Various routines to handle identification 32 * and classification of x86 processors. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/archsystm.h> 37 #include <sys/x86_archext.h> 38 #include <sys/kmem.h> 39 #include <sys/systm.h> 40 #include <sys/cmn_err.h> 41 #include <sys/sunddi.h> 42 #include <sys/sunndi.h> 43 #include <sys/cpuvar.h> 44 #include <sys/processor.h> 45 #include <sys/sysmacros.h> 46 #include <sys/pg.h> 47 #include <sys/fp.h> 48 #include <sys/controlregs.h> 49 #include <sys/auxv_386.h> 50 #include <sys/bitmap.h> 51 #include <sys/memnode.h> 52 53 #ifdef __xpv 54 #include <sys/hypervisor.h> 55 #else 56 #include <sys/ontrap.h> 57 #endif 58 59 /* 60 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 61 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 62 * them accordingly. For most modern processors, feature detection occurs here 63 * in pass 1. 64 * 65 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 66 * for the boot CPU and does the basic analysis that the early kernel needs. 67 * x86_feature is set based on the return value of cpuid_pass1() of the boot 68 * CPU. 69 * 70 * Pass 1 includes: 71 * 72 * o Determining vendor/model/family/stepping and setting x86_type and 73 * x86_vendor accordingly. 74 * o Processing the feature flags returned by the cpuid instruction while 75 * applying any workarounds or tricks for the specific processor. 76 * o Mapping the feature flags into Solaris feature bits (X86_*). 77 * o Processing extended feature flags if supported by the processor, 78 * again while applying specific processor knowledge. 79 * o Determining the CMT characteristics of the system. 80 * 81 * Pass 1 is done on non-boot CPUs during their initialization and the results 82 * are used only as a meager attempt at ensuring that all processors within the 83 * system support the same features. 84 * 85 * Pass 2 of cpuid feature analysis happens just at the beginning 86 * of startup(). It just copies in and corrects the remainder 87 * of the cpuid data we depend on: standard cpuid functions that we didn't 88 * need for pass1 feature analysis, and extended cpuid functions beyond the 89 * simple feature processing done in pass1. 90 * 91 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 92 * particular kernel memory allocation has been made available. It creates a 93 * readable brand string based on the data collected in the first two passes. 94 * 95 * Pass 4 of cpuid analysis is invoked after post_startup() when all 96 * the support infrastructure for various hardware features has been 97 * initialized. It determines which processor features will be reported 98 * to userland via the aux vector. 99 * 100 * All passes are executed on all CPUs, but only the boot CPU determines what 101 * features the kernel will use. 102 * 103 * Much of the worst junk in this file is for the support of processors 104 * that didn't really implement the cpuid instruction properly. 105 * 106 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 107 * the pass numbers. Accordingly, changes to the pass code may require changes 108 * to the accessor code. 109 */ 110 111 uint_t x86_feature = 0; 112 uint_t x86_vendor = X86_VENDOR_IntelClone; 113 uint_t x86_type = X86_TYPE_OTHER; 114 uint_t x86_clflush_size = 0; 115 116 uint_t pentiumpro_bug4046376; 117 uint_t pentiumpro_bug4064495; 118 119 uint_t enable486; 120 /* 121 * This is set to platform type Solaris is running on. 122 */ 123 static int platform_type = HW_NATIVE; 124 125 /* 126 * monitor/mwait info. 127 * 128 * size_actual and buf_actual are the real address and size allocated to get 129 * proper mwait_buf alignement. buf_actual and size_actual should be passed 130 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 131 * processor cache-line alignment, but this is not guarantied in the furture. 132 */ 133 struct mwait_info { 134 size_t mon_min; /* min size to avoid missed wakeups */ 135 size_t mon_max; /* size to avoid false wakeups */ 136 size_t size_actual; /* size actually allocated */ 137 void *buf_actual; /* memory actually allocated */ 138 uint32_t support; /* processor support of monitor/mwait */ 139 }; 140 141 /* 142 * These constants determine how many of the elements of the 143 * cpuid we cache in the cpuid_info data structure; the 144 * remaining elements are accessible via the cpuid instruction. 145 */ 146 147 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 148 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 149 150 struct cpuid_info { 151 uint_t cpi_pass; /* last pass completed */ 152 /* 153 * standard function information 154 */ 155 uint_t cpi_maxeax; /* fn 0: %eax */ 156 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 157 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 158 159 uint_t cpi_family; /* fn 1: extended family */ 160 uint_t cpi_model; /* fn 1: extended model */ 161 uint_t cpi_step; /* fn 1: stepping */ 162 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 163 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 164 int cpi_clogid; /* fn 1: %ebx: thread # */ 165 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 166 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 167 uint_t cpi_ncache; /* fn 2: number of elements */ 168 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 169 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 170 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 171 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 172 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 173 /* 174 * extended function information 175 */ 176 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 177 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 178 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 179 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 180 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 181 id_t cpi_coreid; /* same coreid => strands share core */ 182 int cpi_pkgcoreid; /* core number within single package */ 183 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 184 /* Intel: fn 4: %eax[31-26] */ 185 /* 186 * supported feature information 187 */ 188 uint32_t cpi_support[5]; 189 #define STD_EDX_FEATURES 0 190 #define AMD_EDX_FEATURES 1 191 #define TM_EDX_FEATURES 2 192 #define STD_ECX_FEATURES 3 193 #define AMD_ECX_FEATURES 4 194 /* 195 * Synthesized information, where known. 196 */ 197 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 198 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 199 uint32_t cpi_socket; /* Chip package/socket type */ 200 201 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 202 uint32_t cpi_apicid; 203 }; 204 205 206 static struct cpuid_info cpuid_info0; 207 208 /* 209 * These bit fields are defined by the Intel Application Note AP-485 210 * "Intel Processor Identification and the CPUID Instruction" 211 */ 212 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 213 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 214 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 215 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 216 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 217 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 218 219 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 220 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 221 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 222 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 223 224 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 225 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 226 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 227 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 228 229 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 230 #define CPI_XMAXEAX_MAX 0x80000100 231 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 232 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 233 234 /* 235 * Function 4 (Deterministic Cache Parameters) macros 236 * Defined by Intel Application Note AP-485 237 */ 238 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 239 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 240 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 241 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 242 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 243 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 244 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 245 246 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 247 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 248 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 249 250 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 251 252 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 253 254 255 /* 256 * A couple of shorthand macros to identify "later" P6-family chips 257 * like the Pentium M and Core. First, the "older" P6-based stuff 258 * (loosely defined as "pre-Pentium-4"): 259 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 260 */ 261 262 #define IS_LEGACY_P6(cpi) ( \ 263 cpi->cpi_family == 6 && \ 264 (cpi->cpi_model == 1 || \ 265 cpi->cpi_model == 3 || \ 266 cpi->cpi_model == 5 || \ 267 cpi->cpi_model == 6 || \ 268 cpi->cpi_model == 7 || \ 269 cpi->cpi_model == 8 || \ 270 cpi->cpi_model == 0xA || \ 271 cpi->cpi_model == 0xB) \ 272 ) 273 274 /* A "new F6" is everything with family 6 that's not the above */ 275 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 276 277 /* Extended family/model support */ 278 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 279 cpi->cpi_family >= 0xf) 280 281 /* 282 * Info for monitor/mwait idle loop. 283 * 284 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 285 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 286 * 2006. 287 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 288 * Documentation Updates" #33633, Rev 2.05, December 2006. 289 */ 290 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 291 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 292 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 293 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 294 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 295 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 296 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 297 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 298 /* 299 * Number of sub-cstates for a given c-state. 300 */ 301 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 302 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 303 304 /* 305 * Functions we consune from cpuid_subr.c; don't publish these in a header 306 * file to try and keep people using the expected cpuid_* interfaces. 307 */ 308 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 309 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 310 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 311 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 312 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 313 314 /* 315 * Apply up various platform-dependent restrictions where the 316 * underlying platform restrictions mean the CPU can be marked 317 * as less capable than its cpuid instruction would imply. 318 */ 319 #if defined(__xpv) 320 static void 321 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 322 { 323 switch (eax) { 324 case 1: { 325 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 326 0 : CPUID_INTC_EDX_MCA; 327 cp->cp_edx &= 328 ~(mcamask | 329 CPUID_INTC_EDX_PSE | 330 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 331 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 332 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 333 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 334 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 335 break; 336 } 337 338 case 0x80000001: 339 cp->cp_edx &= 340 ~(CPUID_AMD_EDX_PSE | 341 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 342 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 343 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 344 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 345 CPUID_AMD_EDX_TSCP); 346 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 347 break; 348 default: 349 break; 350 } 351 352 switch (vendor) { 353 case X86_VENDOR_Intel: 354 switch (eax) { 355 case 4: 356 /* 357 * Zero out the (ncores-per-chip - 1) field 358 */ 359 cp->cp_eax &= 0x03fffffff; 360 break; 361 default: 362 break; 363 } 364 break; 365 case X86_VENDOR_AMD: 366 switch (eax) { 367 case 0x80000008: 368 /* 369 * Zero out the (ncores-per-chip - 1) field 370 */ 371 cp->cp_ecx &= 0xffffff00; 372 break; 373 default: 374 break; 375 } 376 break; 377 default: 378 break; 379 } 380 } 381 #else 382 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 383 #endif 384 385 /* 386 * Some undocumented ways of patching the results of the cpuid 387 * instruction to permit running Solaris 10 on future cpus that 388 * we don't currently support. Could be set to non-zero values 389 * via settings in eeprom. 390 */ 391 392 uint32_t cpuid_feature_ecx_include; 393 uint32_t cpuid_feature_ecx_exclude; 394 uint32_t cpuid_feature_edx_include; 395 uint32_t cpuid_feature_edx_exclude; 396 397 void 398 cpuid_alloc_space(cpu_t *cpu) 399 { 400 /* 401 * By convention, cpu0 is the boot cpu, which is set up 402 * before memory allocation is available. All other cpus get 403 * their cpuid_info struct allocated here. 404 */ 405 ASSERT(cpu->cpu_id != 0); 406 cpu->cpu_m.mcpu_cpi = 407 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 408 } 409 410 void 411 cpuid_free_space(cpu_t *cpu) 412 { 413 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 414 int i; 415 416 ASSERT(cpu->cpu_id != 0); 417 418 /* 419 * Free up any function 4 related dynamic storage 420 */ 421 for (i = 1; i < cpi->cpi_std_4_size; i++) 422 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 423 if (cpi->cpi_std_4_size > 0) 424 kmem_free(cpi->cpi_std_4, 425 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 426 427 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 428 } 429 430 #if !defined(__xpv) 431 432 static void 433 determine_platform() 434 { 435 struct cpuid_regs cp; 436 char *xen_str; 437 uint32_t xen_signature[4]; 438 439 /* 440 * In a fully virtualized domain, Xen's pseudo-cpuid function 441 * 0x40000000 returns a string representing the Xen signature in 442 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 443 * function. 444 */ 445 cp.cp_eax = 0x40000000; 446 (void) __cpuid_insn(&cp); 447 xen_signature[0] = cp.cp_ebx; 448 xen_signature[1] = cp.cp_ecx; 449 xen_signature[2] = cp.cp_edx; 450 xen_signature[3] = 0; 451 xen_str = (char *)xen_signature; 452 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) { 453 platform_type = HW_XEN_HVM; 454 } else if (vmware_platform()) { /* running under vmware hypervisor? */ 455 platform_type = HW_VMWARE; 456 } 457 } 458 459 int 460 get_hwenv(void) 461 { 462 return (platform_type); 463 } 464 465 int 466 is_controldom(void) 467 { 468 return (0); 469 } 470 471 #else 472 473 int 474 get_hwenv(void) 475 { 476 return (HW_XEN_PV); 477 } 478 479 int 480 is_controldom(void) 481 { 482 return (DOMAIN_IS_INITDOMAIN(xen_info)); 483 } 484 485 #endif /* __xpv */ 486 487 uint_t 488 cpuid_pass1(cpu_t *cpu) 489 { 490 uint32_t mask_ecx, mask_edx; 491 uint_t feature = X86_CPUID; 492 struct cpuid_info *cpi; 493 struct cpuid_regs *cp; 494 int xcpuid; 495 #if !defined(__xpv) 496 extern int idle_cpu_prefer_mwait; 497 #endif 498 499 500 #if !defined(__xpv) 501 determine_platform(); 502 #endif 503 /* 504 * Space statically allocated for cpu0, ensure pointer is set 505 */ 506 if (cpu->cpu_id == 0) 507 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 508 cpi = cpu->cpu_m.mcpu_cpi; 509 ASSERT(cpi != NULL); 510 cp = &cpi->cpi_std[0]; 511 cp->cp_eax = 0; 512 cpi->cpi_maxeax = __cpuid_insn(cp); 513 { 514 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 515 *iptr++ = cp->cp_ebx; 516 *iptr++ = cp->cp_edx; 517 *iptr++ = cp->cp_ecx; 518 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 519 } 520 521 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 522 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 523 524 /* 525 * Limit the range in case of weird hardware 526 */ 527 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 528 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 529 if (cpi->cpi_maxeax < 1) 530 goto pass1_done; 531 532 cp = &cpi->cpi_std[1]; 533 cp->cp_eax = 1; 534 (void) __cpuid_insn(cp); 535 536 /* 537 * Extract identifying constants for easy access. 538 */ 539 cpi->cpi_model = CPI_MODEL(cpi); 540 cpi->cpi_family = CPI_FAMILY(cpi); 541 542 if (cpi->cpi_family == 0xf) 543 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 544 545 /* 546 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 547 * Intel, and presumably everyone else, uses model == 0xf, as 548 * one would expect (max value means possible overflow). Sigh. 549 */ 550 551 switch (cpi->cpi_vendor) { 552 case X86_VENDOR_Intel: 553 if (IS_EXTENDED_MODEL_INTEL(cpi)) 554 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 555 break; 556 case X86_VENDOR_AMD: 557 if (CPI_FAMILY(cpi) == 0xf) 558 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 559 break; 560 default: 561 if (cpi->cpi_model == 0xf) 562 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 563 break; 564 } 565 566 cpi->cpi_step = CPI_STEP(cpi); 567 cpi->cpi_brandid = CPI_BRANDID(cpi); 568 569 /* 570 * *default* assumptions: 571 * - believe %edx feature word 572 * - ignore %ecx feature word 573 * - 32-bit virtual and physical addressing 574 */ 575 mask_edx = 0xffffffff; 576 mask_ecx = 0; 577 578 cpi->cpi_pabits = cpi->cpi_vabits = 32; 579 580 switch (cpi->cpi_vendor) { 581 case X86_VENDOR_Intel: 582 if (cpi->cpi_family == 5) 583 x86_type = X86_TYPE_P5; 584 else if (IS_LEGACY_P6(cpi)) { 585 x86_type = X86_TYPE_P6; 586 pentiumpro_bug4046376 = 1; 587 pentiumpro_bug4064495 = 1; 588 /* 589 * Clear the SEP bit when it was set erroneously 590 */ 591 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 592 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 593 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 594 x86_type = X86_TYPE_P4; 595 /* 596 * We don't currently depend on any of the %ecx 597 * features until Prescott, so we'll only check 598 * this from P4 onwards. We might want to revisit 599 * that idea later. 600 */ 601 mask_ecx = 0xffffffff; 602 } else if (cpi->cpi_family > 0xf) 603 mask_ecx = 0xffffffff; 604 /* 605 * We don't support MONITOR/MWAIT if leaf 5 is not available 606 * to obtain the monitor linesize. 607 */ 608 if (cpi->cpi_maxeax < 5) 609 mask_ecx &= ~CPUID_INTC_ECX_MON; 610 break; 611 case X86_VENDOR_IntelClone: 612 default: 613 break; 614 case X86_VENDOR_AMD: 615 #if defined(OPTERON_ERRATUM_108) 616 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 617 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 618 cpi->cpi_model = 0xc; 619 } else 620 #endif 621 if (cpi->cpi_family == 5) { 622 /* 623 * AMD K5 and K6 624 * 625 * These CPUs have an incomplete implementation 626 * of MCA/MCE which we mask away. 627 */ 628 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 629 630 /* 631 * Model 0 uses the wrong (APIC) bit 632 * to indicate PGE. Fix it here. 633 */ 634 if (cpi->cpi_model == 0) { 635 if (cp->cp_edx & 0x200) { 636 cp->cp_edx &= ~0x200; 637 cp->cp_edx |= CPUID_INTC_EDX_PGE; 638 } 639 } 640 641 /* 642 * Early models had problems w/ MMX; disable. 643 */ 644 if (cpi->cpi_model < 6) 645 mask_edx &= ~CPUID_INTC_EDX_MMX; 646 } 647 648 /* 649 * For newer families, SSE3 and CX16, at least, are valid; 650 * enable all 651 */ 652 if (cpi->cpi_family >= 0xf) 653 mask_ecx = 0xffffffff; 654 /* 655 * We don't support MONITOR/MWAIT if leaf 5 is not available 656 * to obtain the monitor linesize. 657 */ 658 if (cpi->cpi_maxeax < 5) 659 mask_ecx &= ~CPUID_INTC_ECX_MON; 660 661 #if !defined(__xpv) 662 /* 663 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 664 * processors. AMD does not intend MWAIT to be used in the cpu 665 * idle loop on current and future processors. 10h and future 666 * AMD processors use more power in MWAIT than HLT. 667 * Pre-family-10h Opterons do not have the MWAIT instruction. 668 */ 669 idle_cpu_prefer_mwait = 0; 670 #endif 671 672 break; 673 case X86_VENDOR_TM: 674 /* 675 * workaround the NT workaround in CMS 4.1 676 */ 677 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 678 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 679 cp->cp_edx |= CPUID_INTC_EDX_CX8; 680 break; 681 case X86_VENDOR_Centaur: 682 /* 683 * workaround the NT workarounds again 684 */ 685 if (cpi->cpi_family == 6) 686 cp->cp_edx |= CPUID_INTC_EDX_CX8; 687 break; 688 case X86_VENDOR_Cyrix: 689 /* 690 * We rely heavily on the probing in locore 691 * to actually figure out what parts, if any, 692 * of the Cyrix cpuid instruction to believe. 693 */ 694 switch (x86_type) { 695 case X86_TYPE_CYRIX_486: 696 mask_edx = 0; 697 break; 698 case X86_TYPE_CYRIX_6x86: 699 mask_edx = 0; 700 break; 701 case X86_TYPE_CYRIX_6x86L: 702 mask_edx = 703 CPUID_INTC_EDX_DE | 704 CPUID_INTC_EDX_CX8; 705 break; 706 case X86_TYPE_CYRIX_6x86MX: 707 mask_edx = 708 CPUID_INTC_EDX_DE | 709 CPUID_INTC_EDX_MSR | 710 CPUID_INTC_EDX_CX8 | 711 CPUID_INTC_EDX_PGE | 712 CPUID_INTC_EDX_CMOV | 713 CPUID_INTC_EDX_MMX; 714 break; 715 case X86_TYPE_CYRIX_GXm: 716 mask_edx = 717 CPUID_INTC_EDX_MSR | 718 CPUID_INTC_EDX_CX8 | 719 CPUID_INTC_EDX_CMOV | 720 CPUID_INTC_EDX_MMX; 721 break; 722 case X86_TYPE_CYRIX_MediaGX: 723 break; 724 case X86_TYPE_CYRIX_MII: 725 case X86_TYPE_VIA_CYRIX_III: 726 mask_edx = 727 CPUID_INTC_EDX_DE | 728 CPUID_INTC_EDX_TSC | 729 CPUID_INTC_EDX_MSR | 730 CPUID_INTC_EDX_CX8 | 731 CPUID_INTC_EDX_PGE | 732 CPUID_INTC_EDX_CMOV | 733 CPUID_INTC_EDX_MMX; 734 break; 735 default: 736 break; 737 } 738 break; 739 } 740 741 #if defined(__xpv) 742 /* 743 * Do not support MONITOR/MWAIT under a hypervisor 744 */ 745 mask_ecx &= ~CPUID_INTC_ECX_MON; 746 #endif /* __xpv */ 747 748 /* 749 * Now we've figured out the masks that determine 750 * which bits we choose to believe, apply the masks 751 * to the feature words, then map the kernel's view 752 * of these feature words into its feature word. 753 */ 754 cp->cp_edx &= mask_edx; 755 cp->cp_ecx &= mask_ecx; 756 757 /* 758 * apply any platform restrictions (we don't call this 759 * immediately after __cpuid_insn here, because we need the 760 * workarounds applied above first) 761 */ 762 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 763 764 /* 765 * fold in overrides from the "eeprom" mechanism 766 */ 767 cp->cp_edx |= cpuid_feature_edx_include; 768 cp->cp_edx &= ~cpuid_feature_edx_exclude; 769 770 cp->cp_ecx |= cpuid_feature_ecx_include; 771 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 772 773 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 774 feature |= X86_LARGEPAGE; 775 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 776 feature |= X86_TSC; 777 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 778 feature |= X86_MSR; 779 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 780 feature |= X86_MTRR; 781 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 782 feature |= X86_PGE; 783 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 784 feature |= X86_CMOV; 785 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 786 feature |= X86_MMX; 787 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 788 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 789 feature |= X86_MCA; 790 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 791 feature |= X86_PAE; 792 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 793 feature |= X86_CX8; 794 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 795 feature |= X86_CX16; 796 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 797 feature |= X86_PAT; 798 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 799 feature |= X86_SEP; 800 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 801 /* 802 * In our implementation, fxsave/fxrstor 803 * are prerequisites before we'll even 804 * try and do SSE things. 805 */ 806 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 807 feature |= X86_SSE; 808 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 809 feature |= X86_SSE2; 810 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 811 feature |= X86_SSE3; 812 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 813 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 814 feature |= X86_SSSE3; 815 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 816 feature |= X86_SSE4_1; 817 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 818 feature |= X86_SSE4_2; 819 if (cp->cp_ecx & CPUID_INTC_ECX_AES) 820 feature |= X86_AES; 821 } 822 } 823 if (cp->cp_edx & CPUID_INTC_EDX_DE) 824 feature |= X86_DE; 825 #if !defined(__xpv) 826 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 827 828 /* 829 * We require the CLFLUSH instruction for erratum workaround 830 * to use MONITOR/MWAIT. 831 */ 832 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 833 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 834 feature |= X86_MWAIT; 835 } else { 836 extern int idle_cpu_assert_cflush_monitor; 837 838 /* 839 * All processors we are aware of which have 840 * MONITOR/MWAIT also have CLFLUSH. 841 */ 842 if (idle_cpu_assert_cflush_monitor) { 843 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 844 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 845 } 846 } 847 } 848 #endif /* __xpv */ 849 850 /* 851 * Only need it first time, rest of the cpus would follow suite. 852 * we only capture this for the bootcpu. 853 */ 854 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 855 feature |= X86_CLFSH; 856 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 857 } 858 859 if (feature & X86_PAE) 860 cpi->cpi_pabits = 36; 861 862 /* 863 * Hyperthreading configuration is slightly tricky on Intel 864 * and pure clones, and even trickier on AMD. 865 * 866 * (AMD chose to set the HTT bit on their CMP processors, 867 * even though they're not actually hyperthreaded. Thus it 868 * takes a bit more work to figure out what's really going 869 * on ... see the handling of the CMP_LGCY bit below) 870 */ 871 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 872 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 873 if (cpi->cpi_ncpu_per_chip > 1) 874 feature |= X86_HTT; 875 } else { 876 cpi->cpi_ncpu_per_chip = 1; 877 } 878 879 /* 880 * Work on the "extended" feature information, doing 881 * some basic initialization for cpuid_pass2() 882 */ 883 xcpuid = 0; 884 switch (cpi->cpi_vendor) { 885 case X86_VENDOR_Intel: 886 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 887 xcpuid++; 888 break; 889 case X86_VENDOR_AMD: 890 if (cpi->cpi_family > 5 || 891 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 892 xcpuid++; 893 break; 894 case X86_VENDOR_Cyrix: 895 /* 896 * Only these Cyrix CPUs are -known- to support 897 * extended cpuid operations. 898 */ 899 if (x86_type == X86_TYPE_VIA_CYRIX_III || 900 x86_type == X86_TYPE_CYRIX_GXm) 901 xcpuid++; 902 break; 903 case X86_VENDOR_Centaur: 904 case X86_VENDOR_TM: 905 default: 906 xcpuid++; 907 break; 908 } 909 910 if (xcpuid) { 911 cp = &cpi->cpi_extd[0]; 912 cp->cp_eax = 0x80000000; 913 cpi->cpi_xmaxeax = __cpuid_insn(cp); 914 } 915 916 if (cpi->cpi_xmaxeax & 0x80000000) { 917 918 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 919 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 920 921 switch (cpi->cpi_vendor) { 922 case X86_VENDOR_Intel: 923 case X86_VENDOR_AMD: 924 if (cpi->cpi_xmaxeax < 0x80000001) 925 break; 926 cp = &cpi->cpi_extd[1]; 927 cp->cp_eax = 0x80000001; 928 (void) __cpuid_insn(cp); 929 930 if (cpi->cpi_vendor == X86_VENDOR_AMD && 931 cpi->cpi_family == 5 && 932 cpi->cpi_model == 6 && 933 cpi->cpi_step == 6) { 934 /* 935 * K6 model 6 uses bit 10 to indicate SYSC 936 * Later models use bit 11. Fix it here. 937 */ 938 if (cp->cp_edx & 0x400) { 939 cp->cp_edx &= ~0x400; 940 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 941 } 942 } 943 944 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 945 946 /* 947 * Compute the additions to the kernel's feature word. 948 */ 949 if (cp->cp_edx & CPUID_AMD_EDX_NX) 950 feature |= X86_NX; 951 952 /* 953 * Regardless whether or not we boot 64-bit, 954 * we should have a way to identify whether 955 * the CPU is capable of running 64-bit. 956 */ 957 if (cp->cp_edx & CPUID_AMD_EDX_LM) 958 feature |= X86_64; 959 960 #if defined(__amd64) 961 /* 1 GB large page - enable only for 64 bit kernel */ 962 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 963 feature |= X86_1GPG; 964 #endif 965 966 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 967 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 968 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 969 feature |= X86_SSE4A; 970 971 /* 972 * If both the HTT and CMP_LGCY bits are set, 973 * then we're not actually HyperThreaded. Read 974 * "AMD CPUID Specification" for more details. 975 */ 976 if (cpi->cpi_vendor == X86_VENDOR_AMD && 977 (feature & X86_HTT) && 978 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 979 feature &= ~X86_HTT; 980 feature |= X86_CMP; 981 } 982 #if defined(__amd64) 983 /* 984 * It's really tricky to support syscall/sysret in 985 * the i386 kernel; we rely on sysenter/sysexit 986 * instead. In the amd64 kernel, things are -way- 987 * better. 988 */ 989 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 990 feature |= X86_ASYSC; 991 992 /* 993 * While we're thinking about system calls, note 994 * that AMD processors don't support sysenter 995 * in long mode at all, so don't try to program them. 996 */ 997 if (x86_vendor == X86_VENDOR_AMD) 998 feature &= ~X86_SEP; 999 #endif 1000 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1001 feature |= X86_TSCP; 1002 break; 1003 default: 1004 break; 1005 } 1006 1007 /* 1008 * Get CPUID data about processor cores and hyperthreads. 1009 */ 1010 switch (cpi->cpi_vendor) { 1011 case X86_VENDOR_Intel: 1012 if (cpi->cpi_maxeax >= 4) { 1013 cp = &cpi->cpi_std[4]; 1014 cp->cp_eax = 4; 1015 cp->cp_ecx = 0; 1016 (void) __cpuid_insn(cp); 1017 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1018 } 1019 /*FALLTHROUGH*/ 1020 case X86_VENDOR_AMD: 1021 if (cpi->cpi_xmaxeax < 0x80000008) 1022 break; 1023 cp = &cpi->cpi_extd[8]; 1024 cp->cp_eax = 0x80000008; 1025 (void) __cpuid_insn(cp); 1026 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1027 1028 /* 1029 * Virtual and physical address limits from 1030 * cpuid override previously guessed values. 1031 */ 1032 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1033 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1034 break; 1035 default: 1036 break; 1037 } 1038 1039 /* 1040 * Derive the number of cores per chip 1041 */ 1042 switch (cpi->cpi_vendor) { 1043 case X86_VENDOR_Intel: 1044 if (cpi->cpi_maxeax < 4) { 1045 cpi->cpi_ncore_per_chip = 1; 1046 break; 1047 } else { 1048 cpi->cpi_ncore_per_chip = 1049 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1050 } 1051 break; 1052 case X86_VENDOR_AMD: 1053 if (cpi->cpi_xmaxeax < 0x80000008) { 1054 cpi->cpi_ncore_per_chip = 1; 1055 break; 1056 } else { 1057 /* 1058 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1059 * 1 less than the number of physical cores on 1060 * the chip. In family 0x10 this value can 1061 * be affected by "downcoring" - it reflects 1062 * 1 less than the number of cores actually 1063 * enabled on this node. 1064 */ 1065 cpi->cpi_ncore_per_chip = 1066 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1067 } 1068 break; 1069 default: 1070 cpi->cpi_ncore_per_chip = 1; 1071 break; 1072 } 1073 1074 /* 1075 * Get CPUID data about TSC Invariance in Deep C-State. 1076 */ 1077 switch (cpi->cpi_vendor) { 1078 case X86_VENDOR_Intel: 1079 if (cpi->cpi_maxeax >= 7) { 1080 cp = &cpi->cpi_extd[7]; 1081 cp->cp_eax = 0x80000007; 1082 cp->cp_ecx = 0; 1083 (void) __cpuid_insn(cp); 1084 } 1085 break; 1086 default: 1087 break; 1088 } 1089 } else { 1090 cpi->cpi_ncore_per_chip = 1; 1091 } 1092 1093 /* 1094 * If more than one core, then this processor is CMP. 1095 */ 1096 if (cpi->cpi_ncore_per_chip > 1) 1097 feature |= X86_CMP; 1098 1099 /* 1100 * If the number of cores is the same as the number 1101 * of CPUs, then we cannot have HyperThreading. 1102 */ 1103 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1104 feature &= ~X86_HTT; 1105 1106 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1107 /* 1108 * Single-core single-threaded processors. 1109 */ 1110 cpi->cpi_chipid = -1; 1111 cpi->cpi_clogid = 0; 1112 cpi->cpi_coreid = cpu->cpu_id; 1113 cpi->cpi_pkgcoreid = 0; 1114 } else if (cpi->cpi_ncpu_per_chip > 1) { 1115 uint_t i; 1116 uint_t chipid_shift = 0; 1117 uint_t coreid_shift = 0; 1118 uint_t apic_id = CPI_APIC_ID(cpi); 1119 1120 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1121 chipid_shift++; 1122 cpi->cpi_chipid = apic_id >> chipid_shift; 1123 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1124 1125 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1126 if (feature & X86_CMP) { 1127 /* 1128 * Multi-core (and possibly multi-threaded) 1129 * processors. 1130 */ 1131 uint_t ncpu_per_core; 1132 if (cpi->cpi_ncore_per_chip == 1) 1133 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1134 else if (cpi->cpi_ncore_per_chip > 1) 1135 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1136 cpi->cpi_ncore_per_chip; 1137 /* 1138 * 8bit APIC IDs on dual core Pentiums 1139 * look like this: 1140 * 1141 * +-----------------------+------+------+ 1142 * | Physical Package ID | MC | HT | 1143 * +-----------------------+------+------+ 1144 * <------- chipid --------> 1145 * <------- coreid ---------------> 1146 * <--- clogid --> 1147 * <------> 1148 * pkgcoreid 1149 * 1150 * Where the number of bits necessary to 1151 * represent MC and HT fields together equals 1152 * to the minimum number of bits necessary to 1153 * store the value of cpi->cpi_ncpu_per_chip. 1154 * Of those bits, the MC part uses the number 1155 * of bits necessary to store the value of 1156 * cpi->cpi_ncore_per_chip. 1157 */ 1158 for (i = 1; i < ncpu_per_core; i <<= 1) 1159 coreid_shift++; 1160 cpi->cpi_coreid = apic_id >> coreid_shift; 1161 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1162 coreid_shift; 1163 } else if (feature & X86_HTT) { 1164 /* 1165 * Single-core multi-threaded processors. 1166 */ 1167 cpi->cpi_coreid = cpi->cpi_chipid; 1168 cpi->cpi_pkgcoreid = 0; 1169 } 1170 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1171 /* 1172 * AMD CMP chips currently have a single thread per 1173 * core, with 2 cores on family 0xf and 2, 3 or 4 1174 * cores on family 0x10. 1175 * 1176 * Since no two cpus share a core we must assign a 1177 * distinct coreid per cpu, and we do this by using 1178 * the cpu_id. This scheme does not, however, 1179 * guarantee that sibling cores of a chip will have 1180 * sequential coreids starting at a multiple of the 1181 * number of cores per chip - that is usually the 1182 * case, but if the ACPI MADT table is presented 1183 * in a different order then we need to perform a 1184 * few more gymnastics for the pkgcoreid. 1185 * 1186 * In family 0xf CMPs there are 2 cores on all nodes 1187 * present - no mixing of single and dual core parts. 1188 * 1189 * In family 0x10 CMPs cpuid fn 2 ECX[15:12] 1190 * "ApicIdCoreIdSize[3:0]" tells us how 1191 * many least-significant bits in the ApicId 1192 * are used to represent the core number 1193 * within the node. Cores are always 1194 * numbered sequentially from 0 regardless 1195 * of how many or which are disabled, and 1196 * there seems to be no way to discover the 1197 * real core id when some are disabled. 1198 */ 1199 cpi->cpi_coreid = cpu->cpu_id; 1200 1201 if (cpi->cpi_family == 0x10 && 1202 cpi->cpi_xmaxeax >= 0x80000008) { 1203 int coreidsz = 1204 BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 1205 1206 cpi->cpi_pkgcoreid = 1207 apic_id & ((1 << coreidsz) - 1); 1208 } else { 1209 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 1210 } 1211 } else { 1212 /* 1213 * All other processors are currently 1214 * assumed to have single cores. 1215 */ 1216 cpi->cpi_coreid = cpi->cpi_chipid; 1217 cpi->cpi_pkgcoreid = 0; 1218 } 1219 } 1220 1221 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1222 1223 /* 1224 * Synthesize chip "revision" and socket type 1225 */ 1226 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1227 cpi->cpi_model, cpi->cpi_step); 1228 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1229 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1230 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1231 cpi->cpi_model, cpi->cpi_step); 1232 1233 pass1_done: 1234 cpi->cpi_pass = 1; 1235 return (feature); 1236 } 1237 1238 /* 1239 * Make copies of the cpuid table entries we depend on, in 1240 * part for ease of parsing now, in part so that we have only 1241 * one place to correct any of it, in part for ease of 1242 * later export to userland, and in part so we can look at 1243 * this stuff in a crash dump. 1244 */ 1245 1246 /*ARGSUSED*/ 1247 void 1248 cpuid_pass2(cpu_t *cpu) 1249 { 1250 uint_t n, nmax; 1251 int i; 1252 struct cpuid_regs *cp; 1253 uint8_t *dp; 1254 uint32_t *iptr; 1255 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1256 1257 ASSERT(cpi->cpi_pass == 1); 1258 1259 if (cpi->cpi_maxeax < 1) 1260 goto pass2_done; 1261 1262 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1263 nmax = NMAX_CPI_STD; 1264 /* 1265 * (We already handled n == 0 and n == 1 in pass 1) 1266 */ 1267 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1268 cp->cp_eax = n; 1269 1270 /* 1271 * CPUID function 4 expects %ecx to be initialized 1272 * with an index which indicates which cache to return 1273 * information about. The OS is expected to call function 4 1274 * with %ecx set to 0, 1, 2, ... until it returns with 1275 * EAX[4:0] set to 0, which indicates there are no more 1276 * caches. 1277 * 1278 * Here, populate cpi_std[4] with the information returned by 1279 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1280 * when dynamic memory allocation becomes available. 1281 * 1282 * Note: we need to explicitly initialize %ecx here, since 1283 * function 4 may have been previously invoked. 1284 */ 1285 if (n == 4) 1286 cp->cp_ecx = 0; 1287 1288 (void) __cpuid_insn(cp); 1289 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1290 switch (n) { 1291 case 2: 1292 /* 1293 * "the lower 8 bits of the %eax register 1294 * contain a value that identifies the number 1295 * of times the cpuid [instruction] has to be 1296 * executed to obtain a complete image of the 1297 * processor's caching systems." 1298 * 1299 * How *do* they make this stuff up? 1300 */ 1301 cpi->cpi_ncache = sizeof (*cp) * 1302 BITX(cp->cp_eax, 7, 0); 1303 if (cpi->cpi_ncache == 0) 1304 break; 1305 cpi->cpi_ncache--; /* skip count byte */ 1306 1307 /* 1308 * Well, for now, rather than attempt to implement 1309 * this slightly dubious algorithm, we just look 1310 * at the first 15 .. 1311 */ 1312 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1313 cpi->cpi_ncache = sizeof (*cp) - 1; 1314 1315 dp = cpi->cpi_cacheinfo; 1316 if (BITX(cp->cp_eax, 31, 31) == 0) { 1317 uint8_t *p = (void *)&cp->cp_eax; 1318 for (i = 1; i < 4; i++) 1319 if (p[i] != 0) 1320 *dp++ = p[i]; 1321 } 1322 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1323 uint8_t *p = (void *)&cp->cp_ebx; 1324 for (i = 0; i < 4; i++) 1325 if (p[i] != 0) 1326 *dp++ = p[i]; 1327 } 1328 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1329 uint8_t *p = (void *)&cp->cp_ecx; 1330 for (i = 0; i < 4; i++) 1331 if (p[i] != 0) 1332 *dp++ = p[i]; 1333 } 1334 if (BITX(cp->cp_edx, 31, 31) == 0) { 1335 uint8_t *p = (void *)&cp->cp_edx; 1336 for (i = 0; i < 4; i++) 1337 if (p[i] != 0) 1338 *dp++ = p[i]; 1339 } 1340 break; 1341 1342 case 3: /* Processor serial number, if PSN supported */ 1343 break; 1344 1345 case 4: /* Deterministic cache parameters */ 1346 break; 1347 1348 case 5: /* Monitor/Mwait parameters */ 1349 { 1350 size_t mwait_size; 1351 1352 /* 1353 * check cpi_mwait.support which was set in cpuid_pass1 1354 */ 1355 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1356 break; 1357 1358 /* 1359 * Protect ourself from insane mwait line size. 1360 * Workaround for incomplete hardware emulator(s). 1361 */ 1362 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1363 if (mwait_size < sizeof (uint32_t) || 1364 !ISP2(mwait_size)) { 1365 #if DEBUG 1366 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1367 "size %ld", cpu->cpu_id, (long)mwait_size); 1368 #endif 1369 break; 1370 } 1371 1372 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1373 cpi->cpi_mwait.mon_max = mwait_size; 1374 if (MWAIT_EXTENSION(cpi)) { 1375 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1376 if (MWAIT_INT_ENABLE(cpi)) 1377 cpi->cpi_mwait.support |= 1378 MWAIT_ECX_INT_ENABLE; 1379 } 1380 break; 1381 } 1382 default: 1383 break; 1384 } 1385 } 1386 1387 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1388 struct cpuid_regs regs; 1389 1390 cp = ®s; 1391 cp->cp_eax = 0xB; 1392 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1393 1394 (void) __cpuid_insn(cp); 1395 1396 /* 1397 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1398 * indicates that the extended topology enumeration leaf is 1399 * available. 1400 */ 1401 if (cp->cp_ebx) { 1402 uint32_t x2apic_id; 1403 uint_t coreid_shift = 0; 1404 uint_t ncpu_per_core = 1; 1405 uint_t chipid_shift = 0; 1406 uint_t ncpu_per_chip = 1; 1407 uint_t i; 1408 uint_t level; 1409 1410 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1411 cp->cp_eax = 0xB; 1412 cp->cp_ecx = i; 1413 1414 (void) __cpuid_insn(cp); 1415 level = CPI_CPU_LEVEL_TYPE(cp); 1416 1417 if (level == 1) { 1418 x2apic_id = cp->cp_edx; 1419 coreid_shift = BITX(cp->cp_eax, 4, 0); 1420 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1421 } else if (level == 2) { 1422 x2apic_id = cp->cp_edx; 1423 chipid_shift = BITX(cp->cp_eax, 4, 0); 1424 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1425 } 1426 } 1427 1428 cpi->cpi_apicid = x2apic_id; 1429 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1430 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1431 ncpu_per_core; 1432 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1433 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1434 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1435 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1436 } 1437 1438 /* Make cp NULL so that we don't stumble on others */ 1439 cp = NULL; 1440 } 1441 1442 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1443 goto pass2_done; 1444 1445 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1446 nmax = NMAX_CPI_EXTD; 1447 /* 1448 * Copy the extended properties, fixing them as we go. 1449 * (We already handled n == 0 and n == 1 in pass 1) 1450 */ 1451 iptr = (void *)cpi->cpi_brandstr; 1452 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1453 cp->cp_eax = 0x80000000 + n; 1454 (void) __cpuid_insn(cp); 1455 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1456 switch (n) { 1457 case 2: 1458 case 3: 1459 case 4: 1460 /* 1461 * Extract the brand string 1462 */ 1463 *iptr++ = cp->cp_eax; 1464 *iptr++ = cp->cp_ebx; 1465 *iptr++ = cp->cp_ecx; 1466 *iptr++ = cp->cp_edx; 1467 break; 1468 case 5: 1469 switch (cpi->cpi_vendor) { 1470 case X86_VENDOR_AMD: 1471 /* 1472 * The Athlon and Duron were the first 1473 * parts to report the sizes of the 1474 * TLB for large pages. Before then, 1475 * we don't trust the data. 1476 */ 1477 if (cpi->cpi_family < 6 || 1478 (cpi->cpi_family == 6 && 1479 cpi->cpi_model < 1)) 1480 cp->cp_eax = 0; 1481 break; 1482 default: 1483 break; 1484 } 1485 break; 1486 case 6: 1487 switch (cpi->cpi_vendor) { 1488 case X86_VENDOR_AMD: 1489 /* 1490 * The Athlon and Duron were the first 1491 * AMD parts with L2 TLB's. 1492 * Before then, don't trust the data. 1493 */ 1494 if (cpi->cpi_family < 6 || 1495 cpi->cpi_family == 6 && 1496 cpi->cpi_model < 1) 1497 cp->cp_eax = cp->cp_ebx = 0; 1498 /* 1499 * AMD Duron rev A0 reports L2 1500 * cache size incorrectly as 1K 1501 * when it is really 64K 1502 */ 1503 if (cpi->cpi_family == 6 && 1504 cpi->cpi_model == 3 && 1505 cpi->cpi_step == 0) { 1506 cp->cp_ecx &= 0xffff; 1507 cp->cp_ecx |= 0x400000; 1508 } 1509 break; 1510 case X86_VENDOR_Cyrix: /* VIA C3 */ 1511 /* 1512 * VIA C3 processors are a bit messed 1513 * up w.r.t. encoding cache sizes in %ecx 1514 */ 1515 if (cpi->cpi_family != 6) 1516 break; 1517 /* 1518 * model 7 and 8 were incorrectly encoded 1519 * 1520 * xxx is model 8 really broken? 1521 */ 1522 if (cpi->cpi_model == 7 || 1523 cpi->cpi_model == 8) 1524 cp->cp_ecx = 1525 BITX(cp->cp_ecx, 31, 24) << 16 | 1526 BITX(cp->cp_ecx, 23, 16) << 12 | 1527 BITX(cp->cp_ecx, 15, 8) << 8 | 1528 BITX(cp->cp_ecx, 7, 0); 1529 /* 1530 * model 9 stepping 1 has wrong associativity 1531 */ 1532 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1533 cp->cp_ecx |= 8 << 12; 1534 break; 1535 case X86_VENDOR_Intel: 1536 /* 1537 * Extended L2 Cache features function. 1538 * First appeared on Prescott. 1539 */ 1540 default: 1541 break; 1542 } 1543 break; 1544 default: 1545 break; 1546 } 1547 } 1548 1549 pass2_done: 1550 cpi->cpi_pass = 2; 1551 } 1552 1553 static const char * 1554 intel_cpubrand(const struct cpuid_info *cpi) 1555 { 1556 int i; 1557 1558 if ((x86_feature & X86_CPUID) == 0 || 1559 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1560 return ("i486"); 1561 1562 switch (cpi->cpi_family) { 1563 case 5: 1564 return ("Intel Pentium(r)"); 1565 case 6: 1566 switch (cpi->cpi_model) { 1567 uint_t celeron, xeon; 1568 const struct cpuid_regs *cp; 1569 case 0: 1570 case 1: 1571 case 2: 1572 return ("Intel Pentium(r) Pro"); 1573 case 3: 1574 case 4: 1575 return ("Intel Pentium(r) II"); 1576 case 6: 1577 return ("Intel Celeron(r)"); 1578 case 5: 1579 case 7: 1580 celeron = xeon = 0; 1581 cp = &cpi->cpi_std[2]; /* cache info */ 1582 1583 for (i = 1; i < 4; i++) { 1584 uint_t tmp; 1585 1586 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1587 if (tmp == 0x40) 1588 celeron++; 1589 if (tmp >= 0x44 && tmp <= 0x45) 1590 xeon++; 1591 } 1592 1593 for (i = 0; i < 2; i++) { 1594 uint_t tmp; 1595 1596 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1597 if (tmp == 0x40) 1598 celeron++; 1599 else if (tmp >= 0x44 && tmp <= 0x45) 1600 xeon++; 1601 } 1602 1603 for (i = 0; i < 4; i++) { 1604 uint_t tmp; 1605 1606 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1607 if (tmp == 0x40) 1608 celeron++; 1609 else if (tmp >= 0x44 && tmp <= 0x45) 1610 xeon++; 1611 } 1612 1613 for (i = 0; i < 4; i++) { 1614 uint_t tmp; 1615 1616 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1617 if (tmp == 0x40) 1618 celeron++; 1619 else if (tmp >= 0x44 && tmp <= 0x45) 1620 xeon++; 1621 } 1622 1623 if (celeron) 1624 return ("Intel Celeron(r)"); 1625 if (xeon) 1626 return (cpi->cpi_model == 5 ? 1627 "Intel Pentium(r) II Xeon(tm)" : 1628 "Intel Pentium(r) III Xeon(tm)"); 1629 return (cpi->cpi_model == 5 ? 1630 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1631 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1632 default: 1633 break; 1634 } 1635 default: 1636 break; 1637 } 1638 1639 /* BrandID is present if the field is nonzero */ 1640 if (cpi->cpi_brandid != 0) { 1641 static const struct { 1642 uint_t bt_bid; 1643 const char *bt_str; 1644 } brand_tbl[] = { 1645 { 0x1, "Intel(r) Celeron(r)" }, 1646 { 0x2, "Intel(r) Pentium(r) III" }, 1647 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1648 { 0x4, "Intel(r) Pentium(r) III" }, 1649 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1650 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1651 { 0x8, "Intel(r) Pentium(r) 4" }, 1652 { 0x9, "Intel(r) Pentium(r) 4" }, 1653 { 0xa, "Intel(r) Celeron(r)" }, 1654 { 0xb, "Intel(r) Xeon(tm)" }, 1655 { 0xc, "Intel(r) Xeon(tm) MP" }, 1656 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1657 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1658 { 0x11, "Mobile Genuine Intel(r)" }, 1659 { 0x12, "Intel(r) Celeron(r) M" }, 1660 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1661 { 0x14, "Intel(r) Celeron(r)" }, 1662 { 0x15, "Mobile Genuine Intel(r)" }, 1663 { 0x16, "Intel(r) Pentium(r) M" }, 1664 { 0x17, "Mobile Intel(r) Celeron(r)" } 1665 }; 1666 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1667 uint_t sgn; 1668 1669 sgn = (cpi->cpi_family << 8) | 1670 (cpi->cpi_model << 4) | cpi->cpi_step; 1671 1672 for (i = 0; i < btblmax; i++) 1673 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1674 break; 1675 if (i < btblmax) { 1676 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1677 return ("Intel(r) Celeron(r)"); 1678 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1679 return ("Intel(r) Xeon(tm) MP"); 1680 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1681 return ("Intel(r) Xeon(tm)"); 1682 return (brand_tbl[i].bt_str); 1683 } 1684 } 1685 1686 return (NULL); 1687 } 1688 1689 static const char * 1690 amd_cpubrand(const struct cpuid_info *cpi) 1691 { 1692 if ((x86_feature & X86_CPUID) == 0 || 1693 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1694 return ("i486 compatible"); 1695 1696 switch (cpi->cpi_family) { 1697 case 5: 1698 switch (cpi->cpi_model) { 1699 case 0: 1700 case 1: 1701 case 2: 1702 case 3: 1703 case 4: 1704 case 5: 1705 return ("AMD-K5(r)"); 1706 case 6: 1707 case 7: 1708 return ("AMD-K6(r)"); 1709 case 8: 1710 return ("AMD-K6(r)-2"); 1711 case 9: 1712 return ("AMD-K6(r)-III"); 1713 default: 1714 return ("AMD (family 5)"); 1715 } 1716 case 6: 1717 switch (cpi->cpi_model) { 1718 case 1: 1719 return ("AMD-K7(tm)"); 1720 case 0: 1721 case 2: 1722 case 4: 1723 return ("AMD Athlon(tm)"); 1724 case 3: 1725 case 7: 1726 return ("AMD Duron(tm)"); 1727 case 6: 1728 case 8: 1729 case 10: 1730 /* 1731 * Use the L2 cache size to distinguish 1732 */ 1733 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1734 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1735 default: 1736 return ("AMD (family 6)"); 1737 } 1738 default: 1739 break; 1740 } 1741 1742 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1743 cpi->cpi_brandid != 0) { 1744 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1745 case 3: 1746 return ("AMD Opteron(tm) UP 1xx"); 1747 case 4: 1748 return ("AMD Opteron(tm) DP 2xx"); 1749 case 5: 1750 return ("AMD Opteron(tm) MP 8xx"); 1751 default: 1752 return ("AMD Opteron(tm)"); 1753 } 1754 } 1755 1756 return (NULL); 1757 } 1758 1759 static const char * 1760 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1761 { 1762 if ((x86_feature & X86_CPUID) == 0 || 1763 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1764 type == X86_TYPE_CYRIX_486) 1765 return ("i486 compatible"); 1766 1767 switch (type) { 1768 case X86_TYPE_CYRIX_6x86: 1769 return ("Cyrix 6x86"); 1770 case X86_TYPE_CYRIX_6x86L: 1771 return ("Cyrix 6x86L"); 1772 case X86_TYPE_CYRIX_6x86MX: 1773 return ("Cyrix 6x86MX"); 1774 case X86_TYPE_CYRIX_GXm: 1775 return ("Cyrix GXm"); 1776 case X86_TYPE_CYRIX_MediaGX: 1777 return ("Cyrix MediaGX"); 1778 case X86_TYPE_CYRIX_MII: 1779 return ("Cyrix M2"); 1780 case X86_TYPE_VIA_CYRIX_III: 1781 return ("VIA Cyrix M3"); 1782 default: 1783 /* 1784 * Have another wild guess .. 1785 */ 1786 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1787 return ("Cyrix 5x86"); 1788 else if (cpi->cpi_family == 5) { 1789 switch (cpi->cpi_model) { 1790 case 2: 1791 return ("Cyrix 6x86"); /* Cyrix M1 */ 1792 case 4: 1793 return ("Cyrix MediaGX"); 1794 default: 1795 break; 1796 } 1797 } else if (cpi->cpi_family == 6) { 1798 switch (cpi->cpi_model) { 1799 case 0: 1800 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1801 case 5: 1802 case 6: 1803 case 7: 1804 case 8: 1805 case 9: 1806 return ("VIA C3"); 1807 default: 1808 break; 1809 } 1810 } 1811 break; 1812 } 1813 return (NULL); 1814 } 1815 1816 /* 1817 * This only gets called in the case that the CPU extended 1818 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1819 * aren't available, or contain null bytes for some reason. 1820 */ 1821 static void 1822 fabricate_brandstr(struct cpuid_info *cpi) 1823 { 1824 const char *brand = NULL; 1825 1826 switch (cpi->cpi_vendor) { 1827 case X86_VENDOR_Intel: 1828 brand = intel_cpubrand(cpi); 1829 break; 1830 case X86_VENDOR_AMD: 1831 brand = amd_cpubrand(cpi); 1832 break; 1833 case X86_VENDOR_Cyrix: 1834 brand = cyrix_cpubrand(cpi, x86_type); 1835 break; 1836 case X86_VENDOR_NexGen: 1837 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1838 brand = "NexGen Nx586"; 1839 break; 1840 case X86_VENDOR_Centaur: 1841 if (cpi->cpi_family == 5) 1842 switch (cpi->cpi_model) { 1843 case 4: 1844 brand = "Centaur C6"; 1845 break; 1846 case 8: 1847 brand = "Centaur C2"; 1848 break; 1849 case 9: 1850 brand = "Centaur C3"; 1851 break; 1852 default: 1853 break; 1854 } 1855 break; 1856 case X86_VENDOR_Rise: 1857 if (cpi->cpi_family == 5 && 1858 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1859 brand = "Rise mP6"; 1860 break; 1861 case X86_VENDOR_SiS: 1862 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1863 brand = "SiS 55x"; 1864 break; 1865 case X86_VENDOR_TM: 1866 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1867 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1868 break; 1869 case X86_VENDOR_NSC: 1870 case X86_VENDOR_UMC: 1871 default: 1872 break; 1873 } 1874 if (brand) { 1875 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1876 return; 1877 } 1878 1879 /* 1880 * If all else fails ... 1881 */ 1882 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1883 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1884 cpi->cpi_model, cpi->cpi_step); 1885 } 1886 1887 /* 1888 * This routine is called just after kernel memory allocation 1889 * becomes available on cpu0, and as part of mp_startup() on 1890 * the other cpus. 1891 * 1892 * Fixup the brand string, and collect any information from cpuid 1893 * that requires dynamicically allocated storage to represent. 1894 */ 1895 /*ARGSUSED*/ 1896 void 1897 cpuid_pass3(cpu_t *cpu) 1898 { 1899 int i, max, shft, level, size; 1900 struct cpuid_regs regs; 1901 struct cpuid_regs *cp; 1902 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1903 1904 ASSERT(cpi->cpi_pass == 2); 1905 1906 /* 1907 * Function 4: Deterministic cache parameters 1908 * 1909 * Take this opportunity to detect the number of threads 1910 * sharing the last level cache, and construct a corresponding 1911 * cache id. The respective cpuid_info members are initialized 1912 * to the default case of "no last level cache sharing". 1913 */ 1914 cpi->cpi_ncpu_shr_last_cache = 1; 1915 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1916 1917 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1918 1919 /* 1920 * Find the # of elements (size) returned by fn 4, and along 1921 * the way detect last level cache sharing details. 1922 */ 1923 bzero(®s, sizeof (regs)); 1924 cp = ®s; 1925 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1926 cp->cp_eax = 4; 1927 cp->cp_ecx = i; 1928 1929 (void) __cpuid_insn(cp); 1930 1931 if (CPI_CACHE_TYPE(cp) == 0) 1932 break; 1933 level = CPI_CACHE_LVL(cp); 1934 if (level > max) { 1935 max = level; 1936 cpi->cpi_ncpu_shr_last_cache = 1937 CPI_NTHR_SHR_CACHE(cp) + 1; 1938 } 1939 } 1940 cpi->cpi_std_4_size = size = i; 1941 1942 /* 1943 * Allocate the cpi_std_4 array. The first element 1944 * references the regs for fn 4, %ecx == 0, which 1945 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1946 */ 1947 if (size > 0) { 1948 cpi->cpi_std_4 = 1949 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1950 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1951 1952 /* 1953 * Allocate storage to hold the additional regs 1954 * for function 4, %ecx == 1 .. cpi_std_4_size. 1955 * 1956 * The regs for fn 4, %ecx == 0 has already 1957 * been allocated as indicated above. 1958 */ 1959 for (i = 1; i < size; i++) { 1960 cp = cpi->cpi_std_4[i] = 1961 kmem_zalloc(sizeof (regs), KM_SLEEP); 1962 cp->cp_eax = 4; 1963 cp->cp_ecx = i; 1964 1965 (void) __cpuid_insn(cp); 1966 } 1967 } 1968 /* 1969 * Determine the number of bits needed to represent 1970 * the number of CPUs sharing the last level cache. 1971 * 1972 * Shift off that number of bits from the APIC id to 1973 * derive the cache id. 1974 */ 1975 shft = 0; 1976 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1977 shft++; 1978 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 1979 } 1980 1981 /* 1982 * Now fixup the brand string 1983 */ 1984 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1985 fabricate_brandstr(cpi); 1986 } else { 1987 1988 /* 1989 * If we successfully extracted a brand string from the cpuid 1990 * instruction, clean it up by removing leading spaces and 1991 * similar junk. 1992 */ 1993 if (cpi->cpi_brandstr[0]) { 1994 size_t maxlen = sizeof (cpi->cpi_brandstr); 1995 char *src, *dst; 1996 1997 dst = src = (char *)cpi->cpi_brandstr; 1998 src[maxlen - 1] = '\0'; 1999 /* 2000 * strip leading spaces 2001 */ 2002 while (*src == ' ') 2003 src++; 2004 /* 2005 * Remove any 'Genuine' or "Authentic" prefixes 2006 */ 2007 if (strncmp(src, "Genuine ", 8) == 0) 2008 src += 8; 2009 if (strncmp(src, "Authentic ", 10) == 0) 2010 src += 10; 2011 2012 /* 2013 * Now do an in-place copy. 2014 * Map (R) to (r) and (TM) to (tm). 2015 * The era of teletypes is long gone, and there's 2016 * -really- no need to shout. 2017 */ 2018 while (*src != '\0') { 2019 if (src[0] == '(') { 2020 if (strncmp(src + 1, "R)", 2) == 0) { 2021 (void) strncpy(dst, "(r)", 3); 2022 src += 3; 2023 dst += 3; 2024 continue; 2025 } 2026 if (strncmp(src + 1, "TM)", 3) == 0) { 2027 (void) strncpy(dst, "(tm)", 4); 2028 src += 4; 2029 dst += 4; 2030 continue; 2031 } 2032 } 2033 *dst++ = *src++; 2034 } 2035 *dst = '\0'; 2036 2037 /* 2038 * Finally, remove any trailing spaces 2039 */ 2040 while (--dst > cpi->cpi_brandstr) 2041 if (*dst == ' ') 2042 *dst = '\0'; 2043 else 2044 break; 2045 } else 2046 fabricate_brandstr(cpi); 2047 } 2048 cpi->cpi_pass = 3; 2049 } 2050 2051 /* 2052 * This routine is called out of bind_hwcap() much later in the life 2053 * of the kernel (post_startup()). The job of this routine is to resolve 2054 * the hardware feature support and kernel support for those features into 2055 * what we're actually going to tell applications via the aux vector. 2056 */ 2057 uint_t 2058 cpuid_pass4(cpu_t *cpu) 2059 { 2060 struct cpuid_info *cpi; 2061 uint_t hwcap_flags = 0; 2062 2063 if (cpu == NULL) 2064 cpu = CPU; 2065 cpi = cpu->cpu_m.mcpu_cpi; 2066 2067 ASSERT(cpi->cpi_pass == 3); 2068 2069 if (cpi->cpi_maxeax >= 1) { 2070 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2071 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2072 2073 *edx = CPI_FEATURES_EDX(cpi); 2074 *ecx = CPI_FEATURES_ECX(cpi); 2075 2076 /* 2077 * [these require explicit kernel support] 2078 */ 2079 if ((x86_feature & X86_SEP) == 0) 2080 *edx &= ~CPUID_INTC_EDX_SEP; 2081 2082 if ((x86_feature & X86_SSE) == 0) 2083 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2084 if ((x86_feature & X86_SSE2) == 0) 2085 *edx &= ~CPUID_INTC_EDX_SSE2; 2086 2087 if ((x86_feature & X86_HTT) == 0) 2088 *edx &= ~CPUID_INTC_EDX_HTT; 2089 2090 if ((x86_feature & X86_SSE3) == 0) 2091 *ecx &= ~CPUID_INTC_ECX_SSE3; 2092 2093 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2094 if ((x86_feature & X86_SSSE3) == 0) 2095 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2096 if ((x86_feature & X86_SSE4_1) == 0) 2097 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2098 if ((x86_feature & X86_SSE4_2) == 0) 2099 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2100 if ((x86_feature & X86_AES) == 0) 2101 *ecx &= ~CPUID_INTC_ECX_AES; 2102 } 2103 2104 /* 2105 * [no explicit support required beyond x87 fp context] 2106 */ 2107 if (!fpu_exists) 2108 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2109 2110 /* 2111 * Now map the supported feature vector to things that we 2112 * think userland will care about. 2113 */ 2114 if (*edx & CPUID_INTC_EDX_SEP) 2115 hwcap_flags |= AV_386_SEP; 2116 if (*edx & CPUID_INTC_EDX_SSE) 2117 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2118 if (*edx & CPUID_INTC_EDX_SSE2) 2119 hwcap_flags |= AV_386_SSE2; 2120 if (*ecx & CPUID_INTC_ECX_SSE3) 2121 hwcap_flags |= AV_386_SSE3; 2122 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2123 if (*ecx & CPUID_INTC_ECX_SSSE3) 2124 hwcap_flags |= AV_386_SSSE3; 2125 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2126 hwcap_flags |= AV_386_SSE4_1; 2127 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2128 hwcap_flags |= AV_386_SSE4_2; 2129 if (*ecx & CPUID_INTC_ECX_MOVBE) 2130 hwcap_flags |= AV_386_MOVBE; 2131 if (*ecx & CPUID_INTC_ECX_AES) 2132 hwcap_flags |= AV_386_AES; 2133 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2134 hwcap_flags |= AV_386_PCLMULQDQ; 2135 } 2136 if (*ecx & CPUID_INTC_ECX_POPCNT) 2137 hwcap_flags |= AV_386_POPCNT; 2138 if (*edx & CPUID_INTC_EDX_FPU) 2139 hwcap_flags |= AV_386_FPU; 2140 if (*edx & CPUID_INTC_EDX_MMX) 2141 hwcap_flags |= AV_386_MMX; 2142 2143 if (*edx & CPUID_INTC_EDX_TSC) 2144 hwcap_flags |= AV_386_TSC; 2145 if (*edx & CPUID_INTC_EDX_CX8) 2146 hwcap_flags |= AV_386_CX8; 2147 if (*edx & CPUID_INTC_EDX_CMOV) 2148 hwcap_flags |= AV_386_CMOV; 2149 if (*ecx & CPUID_INTC_ECX_MON) 2150 hwcap_flags |= AV_386_MON; 2151 if (*ecx & CPUID_INTC_ECX_CX16) 2152 hwcap_flags |= AV_386_CX16; 2153 } 2154 2155 if (x86_feature & X86_HTT) 2156 hwcap_flags |= AV_386_PAUSE; 2157 2158 if (cpi->cpi_xmaxeax < 0x80000001) 2159 goto pass4_done; 2160 2161 switch (cpi->cpi_vendor) { 2162 struct cpuid_regs cp; 2163 uint32_t *edx, *ecx; 2164 2165 case X86_VENDOR_Intel: 2166 /* 2167 * Seems like Intel duplicated what we necessary 2168 * here to make the initial crop of 64-bit OS's work. 2169 * Hopefully, those are the only "extended" bits 2170 * they'll add. 2171 */ 2172 /*FALLTHROUGH*/ 2173 2174 case X86_VENDOR_AMD: 2175 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2176 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2177 2178 *edx = CPI_FEATURES_XTD_EDX(cpi); 2179 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2180 2181 /* 2182 * [these features require explicit kernel support] 2183 */ 2184 switch (cpi->cpi_vendor) { 2185 case X86_VENDOR_Intel: 2186 if ((x86_feature & X86_TSCP) == 0) 2187 *edx &= ~CPUID_AMD_EDX_TSCP; 2188 break; 2189 2190 case X86_VENDOR_AMD: 2191 if ((x86_feature & X86_TSCP) == 0) 2192 *edx &= ~CPUID_AMD_EDX_TSCP; 2193 if ((x86_feature & X86_SSE4A) == 0) 2194 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2195 break; 2196 2197 default: 2198 break; 2199 } 2200 2201 /* 2202 * [no explicit support required beyond 2203 * x87 fp context and exception handlers] 2204 */ 2205 if (!fpu_exists) 2206 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2207 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2208 2209 if ((x86_feature & X86_NX) == 0) 2210 *edx &= ~CPUID_AMD_EDX_NX; 2211 #if !defined(__amd64) 2212 *edx &= ~CPUID_AMD_EDX_LM; 2213 #endif 2214 /* 2215 * Now map the supported feature vector to 2216 * things that we think userland will care about. 2217 */ 2218 #if defined(__amd64) 2219 if (*edx & CPUID_AMD_EDX_SYSC) 2220 hwcap_flags |= AV_386_AMD_SYSC; 2221 #endif 2222 if (*edx & CPUID_AMD_EDX_MMXamd) 2223 hwcap_flags |= AV_386_AMD_MMX; 2224 if (*edx & CPUID_AMD_EDX_3DNow) 2225 hwcap_flags |= AV_386_AMD_3DNow; 2226 if (*edx & CPUID_AMD_EDX_3DNowx) 2227 hwcap_flags |= AV_386_AMD_3DNowx; 2228 2229 switch (cpi->cpi_vendor) { 2230 case X86_VENDOR_AMD: 2231 if (*edx & CPUID_AMD_EDX_TSCP) 2232 hwcap_flags |= AV_386_TSCP; 2233 if (*ecx & CPUID_AMD_ECX_AHF64) 2234 hwcap_flags |= AV_386_AHF; 2235 if (*ecx & CPUID_AMD_ECX_SSE4A) 2236 hwcap_flags |= AV_386_AMD_SSE4A; 2237 if (*ecx & CPUID_AMD_ECX_LZCNT) 2238 hwcap_flags |= AV_386_AMD_LZCNT; 2239 break; 2240 2241 case X86_VENDOR_Intel: 2242 if (*edx & CPUID_AMD_EDX_TSCP) 2243 hwcap_flags |= AV_386_TSCP; 2244 /* 2245 * Aarrgh. 2246 * Intel uses a different bit in the same word. 2247 */ 2248 if (*ecx & CPUID_INTC_ECX_AHF64) 2249 hwcap_flags |= AV_386_AHF; 2250 break; 2251 2252 default: 2253 break; 2254 } 2255 break; 2256 2257 case X86_VENDOR_TM: 2258 cp.cp_eax = 0x80860001; 2259 (void) __cpuid_insn(&cp); 2260 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2261 break; 2262 2263 default: 2264 break; 2265 } 2266 2267 pass4_done: 2268 cpi->cpi_pass = 4; 2269 return (hwcap_flags); 2270 } 2271 2272 2273 /* 2274 * Simulate the cpuid instruction using the data we previously 2275 * captured about this CPU. We try our best to return the truth 2276 * about the hardware, independently of kernel support. 2277 */ 2278 uint32_t 2279 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2280 { 2281 struct cpuid_info *cpi; 2282 struct cpuid_regs *xcp; 2283 2284 if (cpu == NULL) 2285 cpu = CPU; 2286 cpi = cpu->cpu_m.mcpu_cpi; 2287 2288 ASSERT(cpuid_checkpass(cpu, 3)); 2289 2290 /* 2291 * CPUID data is cached in two separate places: cpi_std for standard 2292 * CPUID functions, and cpi_extd for extended CPUID functions. 2293 */ 2294 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2295 xcp = &cpi->cpi_std[cp->cp_eax]; 2296 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2297 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2298 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2299 else 2300 /* 2301 * The caller is asking for data from an input parameter which 2302 * the kernel has not cached. In this case we go fetch from 2303 * the hardware and return the data directly to the user. 2304 */ 2305 return (__cpuid_insn(cp)); 2306 2307 cp->cp_eax = xcp->cp_eax; 2308 cp->cp_ebx = xcp->cp_ebx; 2309 cp->cp_ecx = xcp->cp_ecx; 2310 cp->cp_edx = xcp->cp_edx; 2311 return (cp->cp_eax); 2312 } 2313 2314 int 2315 cpuid_checkpass(cpu_t *cpu, int pass) 2316 { 2317 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2318 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2319 } 2320 2321 int 2322 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2323 { 2324 ASSERT(cpuid_checkpass(cpu, 3)); 2325 2326 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2327 } 2328 2329 int 2330 cpuid_is_cmt(cpu_t *cpu) 2331 { 2332 if (cpu == NULL) 2333 cpu = CPU; 2334 2335 ASSERT(cpuid_checkpass(cpu, 1)); 2336 2337 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2338 } 2339 2340 /* 2341 * AMD and Intel both implement the 64-bit variant of the syscall 2342 * instruction (syscallq), so if there's -any- support for syscall, 2343 * cpuid currently says "yes, we support this". 2344 * 2345 * However, Intel decided to -not- implement the 32-bit variant of the 2346 * syscall instruction, so we provide a predicate to allow our caller 2347 * to test that subtlety here. 2348 * 2349 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2350 * even in the case where the hardware would in fact support it. 2351 */ 2352 /*ARGSUSED*/ 2353 int 2354 cpuid_syscall32_insn(cpu_t *cpu) 2355 { 2356 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2357 2358 #if !defined(__xpv) 2359 if (cpu == NULL) 2360 cpu = CPU; 2361 2362 /*CSTYLED*/ 2363 { 2364 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2365 2366 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2367 cpi->cpi_xmaxeax >= 0x80000001 && 2368 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2369 return (1); 2370 } 2371 #endif 2372 return (0); 2373 } 2374 2375 int 2376 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2377 { 2378 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2379 2380 static const char fmt[] = 2381 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2382 static const char fmt_ht[] = 2383 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2384 2385 ASSERT(cpuid_checkpass(cpu, 1)); 2386 2387 if (cpuid_is_cmt(cpu)) 2388 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2389 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2390 cpi->cpi_family, cpi->cpi_model, 2391 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2392 return (snprintf(s, n, fmt, 2393 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2394 cpi->cpi_family, cpi->cpi_model, 2395 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2396 } 2397 2398 const char * 2399 cpuid_getvendorstr(cpu_t *cpu) 2400 { 2401 ASSERT(cpuid_checkpass(cpu, 1)); 2402 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2403 } 2404 2405 uint_t 2406 cpuid_getvendor(cpu_t *cpu) 2407 { 2408 ASSERT(cpuid_checkpass(cpu, 1)); 2409 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2410 } 2411 2412 uint_t 2413 cpuid_getfamily(cpu_t *cpu) 2414 { 2415 ASSERT(cpuid_checkpass(cpu, 1)); 2416 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2417 } 2418 2419 uint_t 2420 cpuid_getmodel(cpu_t *cpu) 2421 { 2422 ASSERT(cpuid_checkpass(cpu, 1)); 2423 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2424 } 2425 2426 uint_t 2427 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2428 { 2429 ASSERT(cpuid_checkpass(cpu, 1)); 2430 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2431 } 2432 2433 uint_t 2434 cpuid_get_ncore_per_chip(cpu_t *cpu) 2435 { 2436 ASSERT(cpuid_checkpass(cpu, 1)); 2437 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2438 } 2439 2440 uint_t 2441 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2442 { 2443 ASSERT(cpuid_checkpass(cpu, 2)); 2444 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2445 } 2446 2447 id_t 2448 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2449 { 2450 ASSERT(cpuid_checkpass(cpu, 2)); 2451 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2452 } 2453 2454 uint_t 2455 cpuid_getstep(cpu_t *cpu) 2456 { 2457 ASSERT(cpuid_checkpass(cpu, 1)); 2458 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2459 } 2460 2461 uint_t 2462 cpuid_getsig(struct cpu *cpu) 2463 { 2464 ASSERT(cpuid_checkpass(cpu, 1)); 2465 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2466 } 2467 2468 uint32_t 2469 cpuid_getchiprev(struct cpu *cpu) 2470 { 2471 ASSERT(cpuid_checkpass(cpu, 1)); 2472 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2473 } 2474 2475 const char * 2476 cpuid_getchiprevstr(struct cpu *cpu) 2477 { 2478 ASSERT(cpuid_checkpass(cpu, 1)); 2479 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2480 } 2481 2482 uint32_t 2483 cpuid_getsockettype(struct cpu *cpu) 2484 { 2485 ASSERT(cpuid_checkpass(cpu, 1)); 2486 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2487 } 2488 2489 const char * 2490 cpuid_getsocketstr(cpu_t *cpu) 2491 { 2492 static const char *socketstr = NULL; 2493 struct cpuid_info *cpi; 2494 2495 ASSERT(cpuid_checkpass(cpu, 1)); 2496 cpi = cpu->cpu_m.mcpu_cpi; 2497 2498 /* Assume that socket types are the same across the system */ 2499 if (socketstr == NULL) 2500 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 2501 cpi->cpi_model, cpi->cpi_step); 2502 2503 2504 return (socketstr); 2505 } 2506 2507 int 2508 cpuid_get_chipid(cpu_t *cpu) 2509 { 2510 ASSERT(cpuid_checkpass(cpu, 1)); 2511 2512 if (cpuid_is_cmt(cpu)) 2513 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2514 return (cpu->cpu_id); 2515 } 2516 2517 id_t 2518 cpuid_get_coreid(cpu_t *cpu) 2519 { 2520 ASSERT(cpuid_checkpass(cpu, 1)); 2521 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2522 } 2523 2524 int 2525 cpuid_get_pkgcoreid(cpu_t *cpu) 2526 { 2527 ASSERT(cpuid_checkpass(cpu, 1)); 2528 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2529 } 2530 2531 int 2532 cpuid_get_clogid(cpu_t *cpu) 2533 { 2534 ASSERT(cpuid_checkpass(cpu, 1)); 2535 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2536 } 2537 2538 void 2539 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2540 { 2541 struct cpuid_info *cpi; 2542 2543 if (cpu == NULL) 2544 cpu = CPU; 2545 cpi = cpu->cpu_m.mcpu_cpi; 2546 2547 ASSERT(cpuid_checkpass(cpu, 1)); 2548 2549 if (pabits) 2550 *pabits = cpi->cpi_pabits; 2551 if (vabits) 2552 *vabits = cpi->cpi_vabits; 2553 } 2554 2555 /* 2556 * Returns the number of data TLB entries for a corresponding 2557 * pagesize. If it can't be computed, or isn't known, the 2558 * routine returns zero. If you ask about an architecturally 2559 * impossible pagesize, the routine will panic (so that the 2560 * hat implementor knows that things are inconsistent.) 2561 */ 2562 uint_t 2563 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2564 { 2565 struct cpuid_info *cpi; 2566 uint_t dtlb_nent = 0; 2567 2568 if (cpu == NULL) 2569 cpu = CPU; 2570 cpi = cpu->cpu_m.mcpu_cpi; 2571 2572 ASSERT(cpuid_checkpass(cpu, 1)); 2573 2574 /* 2575 * Check the L2 TLB info 2576 */ 2577 if (cpi->cpi_xmaxeax >= 0x80000006) { 2578 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2579 2580 switch (pagesize) { 2581 2582 case 4 * 1024: 2583 /* 2584 * All zero in the top 16 bits of the register 2585 * indicates a unified TLB. Size is in low 16 bits. 2586 */ 2587 if ((cp->cp_ebx & 0xffff0000) == 0) 2588 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2589 else 2590 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2591 break; 2592 2593 case 2 * 1024 * 1024: 2594 if ((cp->cp_eax & 0xffff0000) == 0) 2595 dtlb_nent = cp->cp_eax & 0x0000ffff; 2596 else 2597 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2598 break; 2599 2600 default: 2601 panic("unknown L2 pagesize"); 2602 /*NOTREACHED*/ 2603 } 2604 } 2605 2606 if (dtlb_nent != 0) 2607 return (dtlb_nent); 2608 2609 /* 2610 * No L2 TLB support for this size, try L1. 2611 */ 2612 if (cpi->cpi_xmaxeax >= 0x80000005) { 2613 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2614 2615 switch (pagesize) { 2616 case 4 * 1024: 2617 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2618 break; 2619 case 2 * 1024 * 1024: 2620 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2621 break; 2622 default: 2623 panic("unknown L1 d-TLB pagesize"); 2624 /*NOTREACHED*/ 2625 } 2626 } 2627 2628 return (dtlb_nent); 2629 } 2630 2631 /* 2632 * Return 0 if the erratum is not present or not applicable, positive 2633 * if it is, and negative if the status of the erratum is unknown. 2634 * 2635 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2636 * Processors" #25759, Rev 3.57, August 2005 2637 */ 2638 int 2639 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2640 { 2641 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2642 uint_t eax; 2643 2644 /* 2645 * Bail out if this CPU isn't an AMD CPU, or if it's 2646 * a legacy (32-bit) AMD CPU. 2647 */ 2648 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2649 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2650 cpi->cpi_family == 6) 2651 2652 return (0); 2653 2654 eax = cpi->cpi_std[1].cp_eax; 2655 2656 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2657 #define SH_B3(eax) (eax == 0xf51) 2658 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2659 2660 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2661 2662 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2663 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2664 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2665 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2666 2667 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2668 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2669 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2670 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2671 2672 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2673 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2674 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2675 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2676 #define BH_E4(eax) (eax == 0x20fb1) 2677 #define SH_E5(eax) (eax == 0x20f42) 2678 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2679 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2680 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2681 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2682 DH_E6(eax) || JH_E6(eax)) 2683 2684 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2685 #define DR_B0(eax) (eax == 0x100f20) 2686 #define DR_B1(eax) (eax == 0x100f21) 2687 #define DR_BA(eax) (eax == 0x100f2a) 2688 #define DR_B2(eax) (eax == 0x100f22) 2689 #define DR_B3(eax) (eax == 0x100f23) 2690 #define RB_C0(eax) (eax == 0x100f40) 2691 2692 switch (erratum) { 2693 case 1: 2694 return (cpi->cpi_family < 0x10); 2695 case 51: /* what does the asterisk mean? */ 2696 return (B(eax) || SH_C0(eax) || CG(eax)); 2697 case 52: 2698 return (B(eax)); 2699 case 57: 2700 return (cpi->cpi_family <= 0x11); 2701 case 58: 2702 return (B(eax)); 2703 case 60: 2704 return (cpi->cpi_family <= 0x11); 2705 case 61: 2706 case 62: 2707 case 63: 2708 case 64: 2709 case 65: 2710 case 66: 2711 case 68: 2712 case 69: 2713 case 70: 2714 case 71: 2715 return (B(eax)); 2716 case 72: 2717 return (SH_B0(eax)); 2718 case 74: 2719 return (B(eax)); 2720 case 75: 2721 return (cpi->cpi_family < 0x10); 2722 case 76: 2723 return (B(eax)); 2724 case 77: 2725 return (cpi->cpi_family <= 0x11); 2726 case 78: 2727 return (B(eax) || SH_C0(eax)); 2728 case 79: 2729 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2730 case 80: 2731 case 81: 2732 case 82: 2733 return (B(eax)); 2734 case 83: 2735 return (B(eax) || SH_C0(eax) || CG(eax)); 2736 case 85: 2737 return (cpi->cpi_family < 0x10); 2738 case 86: 2739 return (SH_C0(eax) || CG(eax)); 2740 case 88: 2741 #if !defined(__amd64) 2742 return (0); 2743 #else 2744 return (B(eax) || SH_C0(eax)); 2745 #endif 2746 case 89: 2747 return (cpi->cpi_family < 0x10); 2748 case 90: 2749 return (B(eax) || SH_C0(eax) || CG(eax)); 2750 case 91: 2751 case 92: 2752 return (B(eax) || SH_C0(eax)); 2753 case 93: 2754 return (SH_C0(eax)); 2755 case 94: 2756 return (B(eax) || SH_C0(eax) || CG(eax)); 2757 case 95: 2758 #if !defined(__amd64) 2759 return (0); 2760 #else 2761 return (B(eax) || SH_C0(eax)); 2762 #endif 2763 case 96: 2764 return (B(eax) || SH_C0(eax) || CG(eax)); 2765 case 97: 2766 case 98: 2767 return (SH_C0(eax) || CG(eax)); 2768 case 99: 2769 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2770 case 100: 2771 return (B(eax) || SH_C0(eax)); 2772 case 101: 2773 case 103: 2774 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2775 case 104: 2776 return (SH_C0(eax) || CG(eax) || D0(eax)); 2777 case 105: 2778 case 106: 2779 case 107: 2780 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2781 case 108: 2782 return (DH_CG(eax)); 2783 case 109: 2784 return (SH_C0(eax) || CG(eax) || D0(eax)); 2785 case 110: 2786 return (D0(eax) || EX(eax)); 2787 case 111: 2788 return (CG(eax)); 2789 case 112: 2790 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2791 case 113: 2792 return (eax == 0x20fc0); 2793 case 114: 2794 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2795 case 115: 2796 return (SH_E0(eax) || JH_E1(eax)); 2797 case 116: 2798 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2799 case 117: 2800 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2801 case 118: 2802 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2803 JH_E6(eax)); 2804 case 121: 2805 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2806 case 122: 2807 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2808 case 123: 2809 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2810 case 131: 2811 return (cpi->cpi_family < 0x10); 2812 case 6336786: 2813 /* 2814 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2815 * if this is a K8 family or newer processor 2816 */ 2817 if (CPI_FAMILY(cpi) == 0xf) { 2818 struct cpuid_regs regs; 2819 regs.cp_eax = 0x80000007; 2820 (void) __cpuid_insn(®s); 2821 return (!(regs.cp_edx & 0x100)); 2822 } 2823 return (0); 2824 case 6323525: 2825 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2826 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2827 2828 case 6671130: 2829 /* 2830 * check for processors (pre-Shanghai) that do not provide 2831 * optimal management of 1gb ptes in its tlb. 2832 */ 2833 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 2834 2835 case 298: 2836 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 2837 DR_B2(eax) || RB_C0(eax)); 2838 2839 default: 2840 return (-1); 2841 2842 } 2843 } 2844 2845 /* 2846 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 2847 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 2848 */ 2849 int 2850 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 2851 { 2852 struct cpuid_info *cpi; 2853 uint_t osvwid; 2854 static int osvwfeature = -1; 2855 uint64_t osvwlength; 2856 2857 2858 cpi = cpu->cpu_m.mcpu_cpi; 2859 2860 /* confirm OSVW supported */ 2861 if (osvwfeature == -1) { 2862 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 2863 } else { 2864 /* assert that osvw feature setting is consistent on all cpus */ 2865 ASSERT(osvwfeature == 2866 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 2867 } 2868 if (!osvwfeature) 2869 return (-1); 2870 2871 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 2872 2873 switch (erratum) { 2874 case 298: /* osvwid is 0 */ 2875 osvwid = 0; 2876 if (osvwlength <= (uint64_t)osvwid) { 2877 /* osvwid 0 is unknown */ 2878 return (-1); 2879 } 2880 2881 /* 2882 * Check the OSVW STATUS MSR to determine the state 2883 * of the erratum where: 2884 * 0 - fixed by HW 2885 * 1 - BIOS has applied the workaround when BIOS 2886 * workaround is available. (Or for other errata, 2887 * OS workaround is required.) 2888 * For a value of 1, caller will confirm that the 2889 * erratum 298 workaround has indeed been applied by BIOS. 2890 * 2891 * A 1 may be set in cpus that have a HW fix 2892 * in a mixed cpu system. Regarding erratum 298: 2893 * In a multiprocessor platform, the workaround above 2894 * should be applied to all processors regardless of 2895 * silicon revision when an affected processor is 2896 * present. 2897 */ 2898 2899 return (rdmsr(MSR_AMD_OSVW_STATUS + 2900 (osvwid / OSVW_ID_CNT_PER_MSR)) & 2901 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 2902 2903 default: 2904 return (-1); 2905 } 2906 } 2907 2908 static const char assoc_str[] = "associativity"; 2909 static const char line_str[] = "line-size"; 2910 static const char size_str[] = "size"; 2911 2912 static void 2913 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2914 uint32_t val) 2915 { 2916 char buf[128]; 2917 2918 /* 2919 * ndi_prop_update_int() is used because it is desirable for 2920 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2921 */ 2922 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2923 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2924 } 2925 2926 /* 2927 * Intel-style cache/tlb description 2928 * 2929 * Standard cpuid level 2 gives a randomly ordered 2930 * selection of tags that index into a table that describes 2931 * cache and tlb properties. 2932 */ 2933 2934 static const char l1_icache_str[] = "l1-icache"; 2935 static const char l1_dcache_str[] = "l1-dcache"; 2936 static const char l2_cache_str[] = "l2-cache"; 2937 static const char l3_cache_str[] = "l3-cache"; 2938 static const char itlb4k_str[] = "itlb-4K"; 2939 static const char dtlb4k_str[] = "dtlb-4K"; 2940 static const char itlb2M_str[] = "itlb-2M"; 2941 static const char itlb4M_str[] = "itlb-4M"; 2942 static const char dtlb4M_str[] = "dtlb-4M"; 2943 static const char dtlb24_str[] = "dtlb0-2M-4M"; 2944 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2945 static const char itlb24_str[] = "itlb-2M-4M"; 2946 static const char dtlb44_str[] = "dtlb-4K-4M"; 2947 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2948 static const char sl2_cache_str[] = "sectored-l2-cache"; 2949 static const char itrace_str[] = "itrace-cache"; 2950 static const char sl3_cache_str[] = "sectored-l3-cache"; 2951 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 2952 2953 static const struct cachetab { 2954 uint8_t ct_code; 2955 uint8_t ct_assoc; 2956 uint16_t ct_line_size; 2957 size_t ct_size; 2958 const char *ct_label; 2959 } intel_ctab[] = { 2960 /* 2961 * maintain descending order! 2962 * 2963 * Codes ignored - Reason 2964 * ---------------------- 2965 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 2966 * f0H/f1H - Currently we do not interpret prefetch size by design 2967 */ 2968 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 2969 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 2970 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 2971 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 2972 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 2973 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 2974 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 2975 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 2976 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 2977 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 2978 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 2979 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 2980 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 2981 { 0xc0, 4, 0, 8, dtlb44_str }, 2982 { 0xba, 4, 0, 64, dtlb4k_str }, 2983 { 0xb4, 4, 0, 256, dtlb4k_str }, 2984 { 0xb3, 4, 0, 128, dtlb4k_str }, 2985 { 0xb2, 4, 0, 64, itlb4k_str }, 2986 { 0xb0, 4, 0, 128, itlb4k_str }, 2987 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2988 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2989 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2990 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2991 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2992 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2993 { 0x80, 8, 64, 512*1024, l2_cache_str}, 2994 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2995 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2996 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2997 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2998 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2999 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3000 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3001 { 0x73, 8, 0, 64*1024, itrace_str}, 3002 { 0x72, 8, 0, 32*1024, itrace_str}, 3003 { 0x71, 8, 0, 16*1024, itrace_str}, 3004 { 0x70, 8, 0, 12*1024, itrace_str}, 3005 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3006 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3007 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3008 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3009 { 0x5d, 0, 0, 256, dtlb44_str}, 3010 { 0x5c, 0, 0, 128, dtlb44_str}, 3011 { 0x5b, 0, 0, 64, dtlb44_str}, 3012 { 0x5a, 4, 0, 32, dtlb24_str}, 3013 { 0x59, 0, 0, 16, dtlb4k_str}, 3014 { 0x57, 4, 0, 16, dtlb4k_str}, 3015 { 0x56, 4, 0, 16, dtlb4M_str}, 3016 { 0x55, 0, 0, 7, itlb24_str}, 3017 { 0x52, 0, 0, 256, itlb424_str}, 3018 { 0x51, 0, 0, 128, itlb424_str}, 3019 { 0x50, 0, 0, 64, itlb424_str}, 3020 { 0x4f, 0, 0, 32, itlb4k_str}, 3021 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3022 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3023 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3024 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3025 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3026 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3027 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3028 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3029 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3030 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3031 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3032 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3033 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3034 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3035 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3036 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3037 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3038 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3039 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3040 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3041 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3042 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3043 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3044 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3045 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3046 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3047 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3048 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3049 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3050 { 0x0b, 4, 0, 4, itlb4M_str}, 3051 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3052 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3053 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3054 { 0x05, 4, 0, 32, dtlb4M_str}, 3055 { 0x04, 4, 0, 8, dtlb4M_str}, 3056 { 0x03, 4, 0, 64, dtlb4k_str}, 3057 { 0x02, 4, 0, 2, itlb4M_str}, 3058 { 0x01, 4, 0, 32, itlb4k_str}, 3059 { 0 } 3060 }; 3061 3062 static const struct cachetab cyrix_ctab[] = { 3063 { 0x70, 4, 0, 32, "tlb-4K" }, 3064 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3065 { 0 } 3066 }; 3067 3068 /* 3069 * Search a cache table for a matching entry 3070 */ 3071 static const struct cachetab * 3072 find_cacheent(const struct cachetab *ct, uint_t code) 3073 { 3074 if (code != 0) { 3075 for (; ct->ct_code != 0; ct++) 3076 if (ct->ct_code <= code) 3077 break; 3078 if (ct->ct_code == code) 3079 return (ct); 3080 } 3081 return (NULL); 3082 } 3083 3084 /* 3085 * Populate cachetab entry with L2 or L3 cache-information using 3086 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3087 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3088 * information is found. 3089 */ 3090 static int 3091 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3092 { 3093 uint32_t level, i; 3094 int ret = 0; 3095 3096 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3097 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3098 3099 if (level == 2 || level == 3) { 3100 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3101 ct->ct_line_size = 3102 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3103 ct->ct_size = ct->ct_assoc * 3104 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3105 ct->ct_line_size * 3106 (cpi->cpi_std_4[i]->cp_ecx + 1); 3107 3108 if (level == 2) { 3109 ct->ct_label = l2_cache_str; 3110 } else if (level == 3) { 3111 ct->ct_label = l3_cache_str; 3112 } 3113 ret = 1; 3114 } 3115 } 3116 3117 return (ret); 3118 } 3119 3120 /* 3121 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3122 * The walk is terminated if the walker returns non-zero. 3123 */ 3124 static void 3125 intel_walk_cacheinfo(struct cpuid_info *cpi, 3126 void *arg, int (*func)(void *, const struct cachetab *)) 3127 { 3128 const struct cachetab *ct; 3129 struct cachetab des_49_ct, des_b1_ct; 3130 uint8_t *dp; 3131 int i; 3132 3133 if ((dp = cpi->cpi_cacheinfo) == NULL) 3134 return; 3135 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3136 /* 3137 * For overloaded descriptor 0x49 we use cpuid function 4 3138 * if supported by the current processor, to create 3139 * cache information. 3140 * For overloaded descriptor 0xb1 we use X86_PAE flag 3141 * to disambiguate the cache information. 3142 */ 3143 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3144 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3145 ct = &des_49_ct; 3146 } else if (*dp == 0xb1) { 3147 des_b1_ct.ct_code = 0xb1; 3148 des_b1_ct.ct_assoc = 4; 3149 des_b1_ct.ct_line_size = 0; 3150 if (x86_feature & X86_PAE) { 3151 des_b1_ct.ct_size = 8; 3152 des_b1_ct.ct_label = itlb2M_str; 3153 } else { 3154 des_b1_ct.ct_size = 4; 3155 des_b1_ct.ct_label = itlb4M_str; 3156 } 3157 ct = &des_b1_ct; 3158 } else { 3159 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3160 continue; 3161 } 3162 } 3163 3164 if (func(arg, ct) != 0) { 3165 break; 3166 } 3167 } 3168 } 3169 3170 /* 3171 * (Like the Intel one, except for Cyrix CPUs) 3172 */ 3173 static void 3174 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3175 void *arg, int (*func)(void *, const struct cachetab *)) 3176 { 3177 const struct cachetab *ct; 3178 uint8_t *dp; 3179 int i; 3180 3181 if ((dp = cpi->cpi_cacheinfo) == NULL) 3182 return; 3183 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3184 /* 3185 * Search Cyrix-specific descriptor table first .. 3186 */ 3187 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3188 if (func(arg, ct) != 0) 3189 break; 3190 continue; 3191 } 3192 /* 3193 * .. else fall back to the Intel one 3194 */ 3195 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3196 if (func(arg, ct) != 0) 3197 break; 3198 continue; 3199 } 3200 } 3201 } 3202 3203 /* 3204 * A cacheinfo walker that adds associativity, line-size, and size properties 3205 * to the devinfo node it is passed as an argument. 3206 */ 3207 static int 3208 add_cacheent_props(void *arg, const struct cachetab *ct) 3209 { 3210 dev_info_t *devi = arg; 3211 3212 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3213 if (ct->ct_line_size != 0) 3214 add_cache_prop(devi, ct->ct_label, line_str, 3215 ct->ct_line_size); 3216 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3217 return (0); 3218 } 3219 3220 3221 static const char fully_assoc[] = "fully-associative?"; 3222 3223 /* 3224 * AMD style cache/tlb description 3225 * 3226 * Extended functions 5 and 6 directly describe properties of 3227 * tlbs and various cache levels. 3228 */ 3229 static void 3230 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3231 { 3232 switch (assoc) { 3233 case 0: /* reserved; ignore */ 3234 break; 3235 default: 3236 add_cache_prop(devi, label, assoc_str, assoc); 3237 break; 3238 case 0xff: 3239 add_cache_prop(devi, label, fully_assoc, 1); 3240 break; 3241 } 3242 } 3243 3244 static void 3245 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3246 { 3247 if (size == 0) 3248 return; 3249 add_cache_prop(devi, label, size_str, size); 3250 add_amd_assoc(devi, label, assoc); 3251 } 3252 3253 static void 3254 add_amd_cache(dev_info_t *devi, const char *label, 3255 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3256 { 3257 if (size == 0 || line_size == 0) 3258 return; 3259 add_amd_assoc(devi, label, assoc); 3260 /* 3261 * Most AMD parts have a sectored cache. Multiple cache lines are 3262 * associated with each tag. A sector consists of all cache lines 3263 * associated with a tag. For example, the AMD K6-III has a sector 3264 * size of 2 cache lines per tag. 3265 */ 3266 if (lines_per_tag != 0) 3267 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3268 add_cache_prop(devi, label, line_str, line_size); 3269 add_cache_prop(devi, label, size_str, size * 1024); 3270 } 3271 3272 static void 3273 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3274 { 3275 switch (assoc) { 3276 case 0: /* off */ 3277 break; 3278 case 1: 3279 case 2: 3280 case 4: 3281 add_cache_prop(devi, label, assoc_str, assoc); 3282 break; 3283 case 6: 3284 add_cache_prop(devi, label, assoc_str, 8); 3285 break; 3286 case 8: 3287 add_cache_prop(devi, label, assoc_str, 16); 3288 break; 3289 case 0xf: 3290 add_cache_prop(devi, label, fully_assoc, 1); 3291 break; 3292 default: /* reserved; ignore */ 3293 break; 3294 } 3295 } 3296 3297 static void 3298 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3299 { 3300 if (size == 0 || assoc == 0) 3301 return; 3302 add_amd_l2_assoc(devi, label, assoc); 3303 add_cache_prop(devi, label, size_str, size); 3304 } 3305 3306 static void 3307 add_amd_l2_cache(dev_info_t *devi, const char *label, 3308 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3309 { 3310 if (size == 0 || assoc == 0 || line_size == 0) 3311 return; 3312 add_amd_l2_assoc(devi, label, assoc); 3313 if (lines_per_tag != 0) 3314 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3315 add_cache_prop(devi, label, line_str, line_size); 3316 add_cache_prop(devi, label, size_str, size * 1024); 3317 } 3318 3319 static void 3320 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3321 { 3322 struct cpuid_regs *cp; 3323 3324 if (cpi->cpi_xmaxeax < 0x80000005) 3325 return; 3326 cp = &cpi->cpi_extd[5]; 3327 3328 /* 3329 * 4M/2M L1 TLB configuration 3330 * 3331 * We report the size for 2M pages because AMD uses two 3332 * TLB entries for one 4M page. 3333 */ 3334 add_amd_tlb(devi, "dtlb-2M", 3335 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3336 add_amd_tlb(devi, "itlb-2M", 3337 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3338 3339 /* 3340 * 4K L1 TLB configuration 3341 */ 3342 3343 switch (cpi->cpi_vendor) { 3344 uint_t nentries; 3345 case X86_VENDOR_TM: 3346 if (cpi->cpi_family >= 5) { 3347 /* 3348 * Crusoe processors have 256 TLB entries, but 3349 * cpuid data format constrains them to only 3350 * reporting 255 of them. 3351 */ 3352 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3353 nentries = 256; 3354 /* 3355 * Crusoe processors also have a unified TLB 3356 */ 3357 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3358 nentries); 3359 break; 3360 } 3361 /*FALLTHROUGH*/ 3362 default: 3363 add_amd_tlb(devi, itlb4k_str, 3364 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3365 add_amd_tlb(devi, dtlb4k_str, 3366 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3367 break; 3368 } 3369 3370 /* 3371 * data L1 cache configuration 3372 */ 3373 3374 add_amd_cache(devi, l1_dcache_str, 3375 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3376 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3377 3378 /* 3379 * code L1 cache configuration 3380 */ 3381 3382 add_amd_cache(devi, l1_icache_str, 3383 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3384 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3385 3386 if (cpi->cpi_xmaxeax < 0x80000006) 3387 return; 3388 cp = &cpi->cpi_extd[6]; 3389 3390 /* Check for a unified L2 TLB for large pages */ 3391 3392 if (BITX(cp->cp_eax, 31, 16) == 0) 3393 add_amd_l2_tlb(devi, "l2-tlb-2M", 3394 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3395 else { 3396 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3397 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3398 add_amd_l2_tlb(devi, "l2-itlb-2M", 3399 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3400 } 3401 3402 /* Check for a unified L2 TLB for 4K pages */ 3403 3404 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3405 add_amd_l2_tlb(devi, "l2-tlb-4K", 3406 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3407 } else { 3408 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3409 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3410 add_amd_l2_tlb(devi, "l2-itlb-4K", 3411 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3412 } 3413 3414 add_amd_l2_cache(devi, l2_cache_str, 3415 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3416 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3417 } 3418 3419 /* 3420 * There are two basic ways that the x86 world describes it cache 3421 * and tlb architecture - Intel's way and AMD's way. 3422 * 3423 * Return which flavor of cache architecture we should use 3424 */ 3425 static int 3426 x86_which_cacheinfo(struct cpuid_info *cpi) 3427 { 3428 switch (cpi->cpi_vendor) { 3429 case X86_VENDOR_Intel: 3430 if (cpi->cpi_maxeax >= 2) 3431 return (X86_VENDOR_Intel); 3432 break; 3433 case X86_VENDOR_AMD: 3434 /* 3435 * The K5 model 1 was the first part from AMD that reported 3436 * cache sizes via extended cpuid functions. 3437 */ 3438 if (cpi->cpi_family > 5 || 3439 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3440 return (X86_VENDOR_AMD); 3441 break; 3442 case X86_VENDOR_TM: 3443 if (cpi->cpi_family >= 5) 3444 return (X86_VENDOR_AMD); 3445 /*FALLTHROUGH*/ 3446 default: 3447 /* 3448 * If they have extended CPU data for 0x80000005 3449 * then we assume they have AMD-format cache 3450 * information. 3451 * 3452 * If not, and the vendor happens to be Cyrix, 3453 * then try our-Cyrix specific handler. 3454 * 3455 * If we're not Cyrix, then assume we're using Intel's 3456 * table-driven format instead. 3457 */ 3458 if (cpi->cpi_xmaxeax >= 0x80000005) 3459 return (X86_VENDOR_AMD); 3460 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3461 return (X86_VENDOR_Cyrix); 3462 else if (cpi->cpi_maxeax >= 2) 3463 return (X86_VENDOR_Intel); 3464 break; 3465 } 3466 return (-1); 3467 } 3468 3469 /* 3470 * create a node for the given cpu under the prom root node. 3471 * Also, create a cpu node in the device tree. 3472 */ 3473 static dev_info_t *cpu_nex_devi = NULL; 3474 static kmutex_t cpu_node_lock; 3475 3476 /* 3477 * Called from post_startup() and mp_startup() 3478 */ 3479 void 3480 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3481 { 3482 dev_info_t *cpu_devi; 3483 int create; 3484 3485 mutex_enter(&cpu_node_lock); 3486 3487 /* 3488 * create a nexus node for all cpus identified as 'cpu_id' under 3489 * the root node. 3490 */ 3491 if (cpu_nex_devi == NULL) { 3492 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3493 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3494 mutex_exit(&cpu_node_lock); 3495 return; 3496 } 3497 (void) ndi_devi_online(cpu_nex_devi, 0); 3498 } 3499 3500 /* 3501 * create a child node for cpu identified as 'cpu_id' 3502 */ 3503 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3504 cpu_id); 3505 if (cpu_devi == NULL) { 3506 mutex_exit(&cpu_node_lock); 3507 return; 3508 } 3509 3510 /* device_type */ 3511 3512 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3513 "device_type", "cpu"); 3514 3515 /* reg */ 3516 3517 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3518 "reg", cpu_id); 3519 3520 /* cpu-mhz, and clock-frequency */ 3521 3522 if (cpu_freq > 0) { 3523 long long mul; 3524 3525 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3526 "cpu-mhz", cpu_freq); 3527 3528 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3529 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3530 "clock-frequency", (int)mul); 3531 } 3532 3533 (void) ndi_devi_online(cpu_devi, 0); 3534 3535 if ((x86_feature & X86_CPUID) == 0) { 3536 mutex_exit(&cpu_node_lock); 3537 return; 3538 } 3539 3540 /* vendor-id */ 3541 3542 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3543 "vendor-id", cpi->cpi_vendorstr); 3544 3545 if (cpi->cpi_maxeax == 0) { 3546 mutex_exit(&cpu_node_lock); 3547 return; 3548 } 3549 3550 /* 3551 * family, model, and step 3552 */ 3553 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3554 "family", CPI_FAMILY(cpi)); 3555 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3556 "cpu-model", CPI_MODEL(cpi)); 3557 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3558 "stepping-id", CPI_STEP(cpi)); 3559 3560 /* type */ 3561 3562 switch (cpi->cpi_vendor) { 3563 case X86_VENDOR_Intel: 3564 create = 1; 3565 break; 3566 default: 3567 create = 0; 3568 break; 3569 } 3570 if (create) 3571 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3572 "type", CPI_TYPE(cpi)); 3573 3574 /* ext-family */ 3575 3576 switch (cpi->cpi_vendor) { 3577 case X86_VENDOR_Intel: 3578 case X86_VENDOR_AMD: 3579 create = cpi->cpi_family >= 0xf; 3580 break; 3581 default: 3582 create = 0; 3583 break; 3584 } 3585 if (create) 3586 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3587 "ext-family", CPI_FAMILY_XTD(cpi)); 3588 3589 /* ext-model */ 3590 3591 switch (cpi->cpi_vendor) { 3592 case X86_VENDOR_Intel: 3593 create = IS_EXTENDED_MODEL_INTEL(cpi); 3594 break; 3595 case X86_VENDOR_AMD: 3596 create = CPI_FAMILY(cpi) == 0xf; 3597 break; 3598 default: 3599 create = 0; 3600 break; 3601 } 3602 if (create) 3603 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3604 "ext-model", CPI_MODEL_XTD(cpi)); 3605 3606 /* generation */ 3607 3608 switch (cpi->cpi_vendor) { 3609 case X86_VENDOR_AMD: 3610 /* 3611 * AMD K5 model 1 was the first part to support this 3612 */ 3613 create = cpi->cpi_xmaxeax >= 0x80000001; 3614 break; 3615 default: 3616 create = 0; 3617 break; 3618 } 3619 if (create) 3620 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3621 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3622 3623 /* brand-id */ 3624 3625 switch (cpi->cpi_vendor) { 3626 case X86_VENDOR_Intel: 3627 /* 3628 * brand id first appeared on Pentium III Xeon model 8, 3629 * and Celeron model 8 processors and Opteron 3630 */ 3631 create = cpi->cpi_family > 6 || 3632 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3633 break; 3634 case X86_VENDOR_AMD: 3635 create = cpi->cpi_family >= 0xf; 3636 break; 3637 default: 3638 create = 0; 3639 break; 3640 } 3641 if (create && cpi->cpi_brandid != 0) { 3642 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3643 "brand-id", cpi->cpi_brandid); 3644 } 3645 3646 /* chunks, and apic-id */ 3647 3648 switch (cpi->cpi_vendor) { 3649 /* 3650 * first available on Pentium IV and Opteron (K8) 3651 */ 3652 case X86_VENDOR_Intel: 3653 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3654 break; 3655 case X86_VENDOR_AMD: 3656 create = cpi->cpi_family >= 0xf; 3657 break; 3658 default: 3659 create = 0; 3660 break; 3661 } 3662 if (create) { 3663 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3664 "chunks", CPI_CHUNKS(cpi)); 3665 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3666 "apic-id", cpi->cpi_apicid); 3667 if (cpi->cpi_chipid >= 0) { 3668 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3669 "chip#", cpi->cpi_chipid); 3670 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3671 "clog#", cpi->cpi_clogid); 3672 } 3673 } 3674 3675 /* cpuid-features */ 3676 3677 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3678 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3679 3680 3681 /* cpuid-features-ecx */ 3682 3683 switch (cpi->cpi_vendor) { 3684 case X86_VENDOR_Intel: 3685 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3686 break; 3687 default: 3688 create = 0; 3689 break; 3690 } 3691 if (create) 3692 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3693 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3694 3695 /* ext-cpuid-features */ 3696 3697 switch (cpi->cpi_vendor) { 3698 case X86_VENDOR_Intel: 3699 case X86_VENDOR_AMD: 3700 case X86_VENDOR_Cyrix: 3701 case X86_VENDOR_TM: 3702 case X86_VENDOR_Centaur: 3703 create = cpi->cpi_xmaxeax >= 0x80000001; 3704 break; 3705 default: 3706 create = 0; 3707 break; 3708 } 3709 if (create) { 3710 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3711 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3712 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3713 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3714 } 3715 3716 /* 3717 * Brand String first appeared in Intel Pentium IV, AMD K5 3718 * model 1, and Cyrix GXm. On earlier models we try and 3719 * simulate something similar .. so this string should always 3720 * same -something- about the processor, however lame. 3721 */ 3722 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3723 "brand-string", cpi->cpi_brandstr); 3724 3725 /* 3726 * Finally, cache and tlb information 3727 */ 3728 switch (x86_which_cacheinfo(cpi)) { 3729 case X86_VENDOR_Intel: 3730 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3731 break; 3732 case X86_VENDOR_Cyrix: 3733 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3734 break; 3735 case X86_VENDOR_AMD: 3736 amd_cache_info(cpi, cpu_devi); 3737 break; 3738 default: 3739 break; 3740 } 3741 3742 mutex_exit(&cpu_node_lock); 3743 } 3744 3745 struct l2info { 3746 int *l2i_csz; 3747 int *l2i_lsz; 3748 int *l2i_assoc; 3749 int l2i_ret; 3750 }; 3751 3752 /* 3753 * A cacheinfo walker that fetches the size, line-size and associativity 3754 * of the L2 cache 3755 */ 3756 static int 3757 intel_l2cinfo(void *arg, const struct cachetab *ct) 3758 { 3759 struct l2info *l2i = arg; 3760 int *ip; 3761 3762 if (ct->ct_label != l2_cache_str && 3763 ct->ct_label != sl2_cache_str) 3764 return (0); /* not an L2 -- keep walking */ 3765 3766 if ((ip = l2i->l2i_csz) != NULL) 3767 *ip = ct->ct_size; 3768 if ((ip = l2i->l2i_lsz) != NULL) 3769 *ip = ct->ct_line_size; 3770 if ((ip = l2i->l2i_assoc) != NULL) 3771 *ip = ct->ct_assoc; 3772 l2i->l2i_ret = ct->ct_size; 3773 return (1); /* was an L2 -- terminate walk */ 3774 } 3775 3776 /* 3777 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3778 * 3779 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3780 * value is the associativity, the associativity for the L2 cache and 3781 * tlb is encoded in the following table. The 4 bit L2 value serves as 3782 * an index into the amd_afd[] array to determine the associativity. 3783 * -1 is undefined. 0 is fully associative. 3784 */ 3785 3786 static int amd_afd[] = 3787 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3788 3789 static void 3790 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3791 { 3792 struct cpuid_regs *cp; 3793 uint_t size, assoc; 3794 int i; 3795 int *ip; 3796 3797 if (cpi->cpi_xmaxeax < 0x80000006) 3798 return; 3799 cp = &cpi->cpi_extd[6]; 3800 3801 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3802 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3803 uint_t cachesz = size * 1024; 3804 assoc = amd_afd[i]; 3805 3806 ASSERT(assoc != -1); 3807 3808 if ((ip = l2i->l2i_csz) != NULL) 3809 *ip = cachesz; 3810 if ((ip = l2i->l2i_lsz) != NULL) 3811 *ip = BITX(cp->cp_ecx, 7, 0); 3812 if ((ip = l2i->l2i_assoc) != NULL) 3813 *ip = assoc; 3814 l2i->l2i_ret = cachesz; 3815 } 3816 } 3817 3818 int 3819 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3820 { 3821 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3822 struct l2info __l2info, *l2i = &__l2info; 3823 3824 l2i->l2i_csz = csz; 3825 l2i->l2i_lsz = lsz; 3826 l2i->l2i_assoc = assoc; 3827 l2i->l2i_ret = -1; 3828 3829 switch (x86_which_cacheinfo(cpi)) { 3830 case X86_VENDOR_Intel: 3831 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3832 break; 3833 case X86_VENDOR_Cyrix: 3834 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3835 break; 3836 case X86_VENDOR_AMD: 3837 amd_l2cacheinfo(cpi, l2i); 3838 break; 3839 default: 3840 break; 3841 } 3842 return (l2i->l2i_ret); 3843 } 3844 3845 #if !defined(__xpv) 3846 3847 uint32_t * 3848 cpuid_mwait_alloc(cpu_t *cpu) 3849 { 3850 uint32_t *ret; 3851 size_t mwait_size; 3852 3853 ASSERT(cpuid_checkpass(cpu, 2)); 3854 3855 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3856 if (mwait_size == 0) 3857 return (NULL); 3858 3859 /* 3860 * kmem_alloc() returns cache line size aligned data for mwait_size 3861 * allocations. mwait_size is currently cache line sized. Neither 3862 * of these implementation details are guarantied to be true in the 3863 * future. 3864 * 3865 * First try allocating mwait_size as kmem_alloc() currently returns 3866 * correctly aligned memory. If kmem_alloc() does not return 3867 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3868 * 3869 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3870 * decide to free this memory. 3871 */ 3872 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3873 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3874 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3875 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3876 *ret = MWAIT_RUNNING; 3877 return (ret); 3878 } else { 3879 kmem_free(ret, mwait_size); 3880 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3881 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3882 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3883 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3884 *ret = MWAIT_RUNNING; 3885 return (ret); 3886 } 3887 } 3888 3889 void 3890 cpuid_mwait_free(cpu_t *cpu) 3891 { 3892 ASSERT(cpuid_checkpass(cpu, 2)); 3893 3894 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3895 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3896 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3897 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3898 } 3899 3900 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3901 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3902 } 3903 3904 void 3905 patch_tsc_read(int flag) 3906 { 3907 size_t cnt; 3908 3909 switch (flag) { 3910 case X86_NO_TSC: 3911 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 3912 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 3913 break; 3914 case X86_HAVE_TSCP: 3915 cnt = &_tscp_end - &_tscp_start; 3916 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 3917 break; 3918 case X86_TSC_MFENCE: 3919 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 3920 (void) memcpy((void *)tsc_read, 3921 (void *)&_tsc_mfence_start, cnt); 3922 break; 3923 case X86_TSC_LFENCE: 3924 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 3925 (void) memcpy((void *)tsc_read, 3926 (void *)&_tsc_lfence_start, cnt); 3927 break; 3928 default: 3929 break; 3930 } 3931 } 3932 3933 int 3934 cpuid_deep_cstates_supported(void) 3935 { 3936 struct cpuid_info *cpi; 3937 struct cpuid_regs regs; 3938 3939 ASSERT(cpuid_checkpass(CPU, 1)); 3940 3941 cpi = CPU->cpu_m.mcpu_cpi; 3942 3943 if (!(x86_feature & X86_CPUID)) 3944 return (0); 3945 3946 switch (cpi->cpi_vendor) { 3947 case X86_VENDOR_Intel: 3948 if (cpi->cpi_xmaxeax < 0x80000007) 3949 return (0); 3950 3951 /* 3952 * TSC run at a constant rate in all ACPI C-states? 3953 */ 3954 regs.cp_eax = 0x80000007; 3955 (void) __cpuid_insn(®s); 3956 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 3957 3958 default: 3959 return (0); 3960 } 3961 } 3962 3963 #endif /* !__xpv */ 3964 3965 void 3966 post_startup_cpu_fixups(void) 3967 { 3968 #ifndef __xpv 3969 /* 3970 * Some AMD processors support C1E state. Entering this state will 3971 * cause the local APIC timer to stop, which we can't deal with at 3972 * this time. 3973 */ 3974 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 3975 on_trap_data_t otd; 3976 uint64_t reg; 3977 3978 if (!on_trap(&otd, OT_DATA_ACCESS)) { 3979 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 3980 /* Disable C1E state if it is enabled by BIOS */ 3981 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 3982 AMD_ACTONCMPHALT_MASK) { 3983 reg &= ~(AMD_ACTONCMPHALT_MASK << 3984 AMD_ACTONCMPHALT_SHIFT); 3985 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 3986 } 3987 } 3988 no_trap(); 3989 } 3990 #endif /* !__xpv */ 3991 } 3992 3993 /* 3994 * Starting with the Westmere processor the local 3995 * APIC timer will continue running in all C-states, 3996 * including the deepest C-states. 3997 */ 3998 int 3999 cpuid_arat_supported(void) 4000 { 4001 struct cpuid_info *cpi; 4002 struct cpuid_regs regs; 4003 4004 ASSERT(cpuid_checkpass(CPU, 1)); 4005 ASSERT(x86_feature & X86_CPUID); 4006 4007 cpi = CPU->cpu_m.mcpu_cpi; 4008 4009 switch (cpi->cpi_vendor) { 4010 case X86_VENDOR_Intel: 4011 /* 4012 * Always-running Local APIC Timer is 4013 * indicated by CPUID.6.EAX[2]. 4014 */ 4015 if (cpi->cpi_maxeax >= 6) { 4016 regs.cp_eax = 6; 4017 (void) cpuid_insn(NULL, ®s); 4018 return (regs.cp_eax & CPUID_CSTATE_ARAT); 4019 } else { 4020 return (0); 4021 } 4022 default: 4023 return (0); 4024 } 4025 } 4026 4027 #if defined(__amd64) && !defined(__xpv) 4028 /* 4029 * Patch in versions of bcopy for high performance Intel Nhm processors 4030 * and later... 4031 */ 4032 void 4033 patch_memops(uint_t vendor) 4034 { 4035 size_t cnt, i; 4036 caddr_t to, from; 4037 4038 if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) { 4039 cnt = &bcopy_patch_end - &bcopy_patch_start; 4040 to = &bcopy_ck_size; 4041 from = &bcopy_patch_start; 4042 for (i = 0; i < cnt; i++) { 4043 *to++ = *from++; 4044 } 4045 } 4046 } 4047 #endif /* __amd64 && !__xpv */ 4048