1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 /* 31 * Various routines to handle identification 32 * and classification of x86 processors. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/archsystm.h> 37 #include <sys/x86_archext.h> 38 #include <sys/kmem.h> 39 #include <sys/systm.h> 40 #include <sys/cmn_err.h> 41 #include <sys/sunddi.h> 42 #include <sys/sunndi.h> 43 #include <sys/cpuvar.h> 44 #include <sys/processor.h> 45 #include <sys/sysmacros.h> 46 #include <sys/pg.h> 47 #include <sys/fp.h> 48 #include <sys/controlregs.h> 49 #include <sys/auxv_386.h> 50 #include <sys/bitmap.h> 51 #include <sys/memnode.h> 52 53 #ifdef __xpv 54 #include <sys/hypervisor.h> 55 #else 56 #include <sys/ontrap.h> 57 #endif 58 59 /* 60 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 61 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 62 * them accordingly. For most modern processors, feature detection occurs here 63 * in pass 1. 64 * 65 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 66 * for the boot CPU and does the basic analysis that the early kernel needs. 67 * x86_feature is set based on the return value of cpuid_pass1() of the boot 68 * CPU. 69 * 70 * Pass 1 includes: 71 * 72 * o Determining vendor/model/family/stepping and setting x86_type and 73 * x86_vendor accordingly. 74 * o Processing the feature flags returned by the cpuid instruction while 75 * applying any workarounds or tricks for the specific processor. 76 * o Mapping the feature flags into Solaris feature bits (X86_*). 77 * o Processing extended feature flags if supported by the processor, 78 * again while applying specific processor knowledge. 79 * o Determining the CMT characteristics of the system. 80 * 81 * Pass 1 is done on non-boot CPUs during their initialization and the results 82 * are used only as a meager attempt at ensuring that all processors within the 83 * system support the same features. 84 * 85 * Pass 2 of cpuid feature analysis happens just at the beginning 86 * of startup(). It just copies in and corrects the remainder 87 * of the cpuid data we depend on: standard cpuid functions that we didn't 88 * need for pass1 feature analysis, and extended cpuid functions beyond the 89 * simple feature processing done in pass1. 90 * 91 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 92 * particular kernel memory allocation has been made available. It creates a 93 * readable brand string based on the data collected in the first two passes. 94 * 95 * Pass 4 of cpuid analysis is invoked after post_startup() when all 96 * the support infrastructure for various hardware features has been 97 * initialized. It determines which processor features will be reported 98 * to userland via the aux vector. 99 * 100 * All passes are executed on all CPUs, but only the boot CPU determines what 101 * features the kernel will use. 102 * 103 * Much of the worst junk in this file is for the support of processors 104 * that didn't really implement the cpuid instruction properly. 105 * 106 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 107 * the pass numbers. Accordingly, changes to the pass code may require changes 108 * to the accessor code. 109 */ 110 111 uint_t x86_feature = 0; 112 uint_t x86_vendor = X86_VENDOR_IntelClone; 113 uint_t x86_type = X86_TYPE_OTHER; 114 uint_t x86_clflush_size = 0; 115 116 uint_t pentiumpro_bug4046376; 117 uint_t pentiumpro_bug4064495; 118 119 uint_t enable486; 120 /* 121 * This is set to platform type Solaris is running on. 122 */ 123 static int platform_type = HW_NATIVE; 124 125 /* 126 * monitor/mwait info. 127 * 128 * size_actual and buf_actual are the real address and size allocated to get 129 * proper mwait_buf alignement. buf_actual and size_actual should be passed 130 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 131 * processor cache-line alignment, but this is not guarantied in the furture. 132 */ 133 struct mwait_info { 134 size_t mon_min; /* min size to avoid missed wakeups */ 135 size_t mon_max; /* size to avoid false wakeups */ 136 size_t size_actual; /* size actually allocated */ 137 void *buf_actual; /* memory actually allocated */ 138 uint32_t support; /* processor support of monitor/mwait */ 139 }; 140 141 /* 142 * These constants determine how many of the elements of the 143 * cpuid we cache in the cpuid_info data structure; the 144 * remaining elements are accessible via the cpuid instruction. 145 */ 146 147 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 148 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 149 150 struct cpuid_info { 151 uint_t cpi_pass; /* last pass completed */ 152 /* 153 * standard function information 154 */ 155 uint_t cpi_maxeax; /* fn 0: %eax */ 156 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 157 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 158 159 uint_t cpi_family; /* fn 1: extended family */ 160 uint_t cpi_model; /* fn 1: extended model */ 161 uint_t cpi_step; /* fn 1: stepping */ 162 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 163 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 164 int cpi_clogid; /* fn 1: %ebx: thread # */ 165 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 166 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 167 uint_t cpi_ncache; /* fn 2: number of elements */ 168 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 169 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 170 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 171 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 172 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 173 /* 174 * extended function information 175 */ 176 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 177 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 178 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 179 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 180 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 181 id_t cpi_coreid; /* same coreid => strands share core */ 182 int cpi_pkgcoreid; /* core number within single package */ 183 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 184 /* Intel: fn 4: %eax[31-26] */ 185 /* 186 * supported feature information 187 */ 188 uint32_t cpi_support[5]; 189 #define STD_EDX_FEATURES 0 190 #define AMD_EDX_FEATURES 1 191 #define TM_EDX_FEATURES 2 192 #define STD_ECX_FEATURES 3 193 #define AMD_ECX_FEATURES 4 194 /* 195 * Synthesized information, where known. 196 */ 197 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 198 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 199 uint32_t cpi_socket; /* Chip package/socket type */ 200 201 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 202 uint32_t cpi_apicid; 203 }; 204 205 206 static struct cpuid_info cpuid_info0; 207 208 /* 209 * These bit fields are defined by the Intel Application Note AP-485 210 * "Intel Processor Identification and the CPUID Instruction" 211 */ 212 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 213 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 214 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 215 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 216 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 217 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 218 219 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 220 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 221 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 222 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 223 224 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 225 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 226 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 227 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 228 229 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 230 #define CPI_XMAXEAX_MAX 0x80000100 231 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 232 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 233 234 /* 235 * Function 4 (Deterministic Cache Parameters) macros 236 * Defined by Intel Application Note AP-485 237 */ 238 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 239 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 240 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 241 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 242 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 243 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 244 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 245 246 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 247 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 248 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 249 250 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 251 252 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 253 254 255 /* 256 * A couple of shorthand macros to identify "later" P6-family chips 257 * like the Pentium M and Core. First, the "older" P6-based stuff 258 * (loosely defined as "pre-Pentium-4"): 259 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 260 */ 261 262 #define IS_LEGACY_P6(cpi) ( \ 263 cpi->cpi_family == 6 && \ 264 (cpi->cpi_model == 1 || \ 265 cpi->cpi_model == 3 || \ 266 cpi->cpi_model == 5 || \ 267 cpi->cpi_model == 6 || \ 268 cpi->cpi_model == 7 || \ 269 cpi->cpi_model == 8 || \ 270 cpi->cpi_model == 0xA || \ 271 cpi->cpi_model == 0xB) \ 272 ) 273 274 /* A "new F6" is everything with family 6 that's not the above */ 275 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 276 277 /* Extended family/model support */ 278 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 279 cpi->cpi_family >= 0xf) 280 281 /* 282 * Info for monitor/mwait idle loop. 283 * 284 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 285 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 286 * 2006. 287 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 288 * Documentation Updates" #33633, Rev 2.05, December 2006. 289 */ 290 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 291 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 292 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 293 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 294 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 295 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 296 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 297 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 298 /* 299 * Number of sub-cstates for a given c-state. 300 */ 301 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 302 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 303 304 /* 305 * Functions we consune from cpuid_subr.c; don't publish these in a header 306 * file to try and keep people using the expected cpuid_* interfaces. 307 */ 308 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 309 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 310 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 311 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 312 313 /* 314 * Apply up various platform-dependent restrictions where the 315 * underlying platform restrictions mean the CPU can be marked 316 * as less capable than its cpuid instruction would imply. 317 */ 318 #if defined(__xpv) 319 static void 320 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 321 { 322 switch (eax) { 323 case 1: { 324 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 325 0 : CPUID_INTC_EDX_MCA; 326 cp->cp_edx &= 327 ~(mcamask | 328 CPUID_INTC_EDX_PSE | 329 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 330 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 331 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 332 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 333 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 334 break; 335 } 336 337 case 0x80000001: 338 cp->cp_edx &= 339 ~(CPUID_AMD_EDX_PSE | 340 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 341 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 342 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 343 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 344 CPUID_AMD_EDX_TSCP); 345 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 346 break; 347 default: 348 break; 349 } 350 351 switch (vendor) { 352 case X86_VENDOR_Intel: 353 switch (eax) { 354 case 4: 355 /* 356 * Zero out the (ncores-per-chip - 1) field 357 */ 358 cp->cp_eax &= 0x03fffffff; 359 break; 360 default: 361 break; 362 } 363 break; 364 case X86_VENDOR_AMD: 365 switch (eax) { 366 case 0x80000008: 367 /* 368 * Zero out the (ncores-per-chip - 1) field 369 */ 370 cp->cp_ecx &= 0xffffff00; 371 break; 372 default: 373 break; 374 } 375 break; 376 default: 377 break; 378 } 379 } 380 #else 381 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 382 #endif 383 384 /* 385 * Some undocumented ways of patching the results of the cpuid 386 * instruction to permit running Solaris 10 on future cpus that 387 * we don't currently support. Could be set to non-zero values 388 * via settings in eeprom. 389 */ 390 391 uint32_t cpuid_feature_ecx_include; 392 uint32_t cpuid_feature_ecx_exclude; 393 uint32_t cpuid_feature_edx_include; 394 uint32_t cpuid_feature_edx_exclude; 395 396 void 397 cpuid_alloc_space(cpu_t *cpu) 398 { 399 /* 400 * By convention, cpu0 is the boot cpu, which is set up 401 * before memory allocation is available. All other cpus get 402 * their cpuid_info struct allocated here. 403 */ 404 ASSERT(cpu->cpu_id != 0); 405 cpu->cpu_m.mcpu_cpi = 406 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 407 } 408 409 void 410 cpuid_free_space(cpu_t *cpu) 411 { 412 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 413 int i; 414 415 ASSERT(cpu->cpu_id != 0); 416 417 /* 418 * Free up any function 4 related dynamic storage 419 */ 420 for (i = 1; i < cpi->cpi_std_4_size; i++) 421 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 422 if (cpi->cpi_std_4_size > 0) 423 kmem_free(cpi->cpi_std_4, 424 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 425 426 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 427 } 428 429 #if !defined(__xpv) 430 431 static void 432 determine_platform() 433 { 434 struct cpuid_regs cp; 435 char *xen_str; 436 uint32_t xen_signature[4]; 437 438 /* 439 * In a fully virtualized domain, Xen's pseudo-cpuid function 440 * 0x40000000 returns a string representing the Xen signature in 441 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 442 * function. 443 */ 444 cp.cp_eax = 0x40000000; 445 (void) __cpuid_insn(&cp); 446 xen_signature[0] = cp.cp_ebx; 447 xen_signature[1] = cp.cp_ecx; 448 xen_signature[2] = cp.cp_edx; 449 xen_signature[3] = 0; 450 xen_str = (char *)xen_signature; 451 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) { 452 platform_type = HW_XEN_HVM; 453 } else if (vmware_platform()) { /* running under vmware hypervisor? */ 454 platform_type = HW_VMWARE; 455 } 456 } 457 458 int 459 get_hwenv(void) 460 { 461 return (platform_type); 462 } 463 464 int 465 is_controldom(void) 466 { 467 return (0); 468 } 469 470 #else 471 472 int 473 get_hwenv(void) 474 { 475 return (HW_XEN_PV); 476 } 477 478 int 479 is_controldom(void) 480 { 481 return (DOMAIN_IS_INITDOMAIN(xen_info)); 482 } 483 484 #endif /* __xpv */ 485 486 uint_t 487 cpuid_pass1(cpu_t *cpu) 488 { 489 uint32_t mask_ecx, mask_edx; 490 uint_t feature = X86_CPUID; 491 struct cpuid_info *cpi; 492 struct cpuid_regs *cp; 493 int xcpuid; 494 #if !defined(__xpv) 495 extern int idle_cpu_prefer_mwait; 496 #endif 497 498 /* 499 * Space statically allocated for cpu0, ensure pointer is set 500 */ 501 if (cpu->cpu_id == 0) 502 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 503 cpi = cpu->cpu_m.mcpu_cpi; 504 ASSERT(cpi != NULL); 505 cp = &cpi->cpi_std[0]; 506 cp->cp_eax = 0; 507 cpi->cpi_maxeax = __cpuid_insn(cp); 508 { 509 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 510 *iptr++ = cp->cp_ebx; 511 *iptr++ = cp->cp_edx; 512 *iptr++ = cp->cp_ecx; 513 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 514 } 515 516 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 517 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 518 519 /* 520 * Limit the range in case of weird hardware 521 */ 522 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 523 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 524 if (cpi->cpi_maxeax < 1) 525 goto pass1_done; 526 527 cp = &cpi->cpi_std[1]; 528 cp->cp_eax = 1; 529 (void) __cpuid_insn(cp); 530 531 /* 532 * Extract identifying constants for easy access. 533 */ 534 cpi->cpi_model = CPI_MODEL(cpi); 535 cpi->cpi_family = CPI_FAMILY(cpi); 536 537 if (cpi->cpi_family == 0xf) 538 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 539 540 /* 541 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 542 * Intel, and presumably everyone else, uses model == 0xf, as 543 * one would expect (max value means possible overflow). Sigh. 544 */ 545 546 switch (cpi->cpi_vendor) { 547 case X86_VENDOR_Intel: 548 if (IS_EXTENDED_MODEL_INTEL(cpi)) 549 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 550 break; 551 case X86_VENDOR_AMD: 552 if (CPI_FAMILY(cpi) == 0xf) 553 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 554 break; 555 default: 556 if (cpi->cpi_model == 0xf) 557 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 558 break; 559 } 560 561 cpi->cpi_step = CPI_STEP(cpi); 562 cpi->cpi_brandid = CPI_BRANDID(cpi); 563 564 /* 565 * *default* assumptions: 566 * - believe %edx feature word 567 * - ignore %ecx feature word 568 * - 32-bit virtual and physical addressing 569 */ 570 mask_edx = 0xffffffff; 571 mask_ecx = 0; 572 573 cpi->cpi_pabits = cpi->cpi_vabits = 32; 574 575 switch (cpi->cpi_vendor) { 576 case X86_VENDOR_Intel: 577 if (cpi->cpi_family == 5) 578 x86_type = X86_TYPE_P5; 579 else if (IS_LEGACY_P6(cpi)) { 580 x86_type = X86_TYPE_P6; 581 pentiumpro_bug4046376 = 1; 582 pentiumpro_bug4064495 = 1; 583 /* 584 * Clear the SEP bit when it was set erroneously 585 */ 586 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 587 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 588 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 589 x86_type = X86_TYPE_P4; 590 /* 591 * We don't currently depend on any of the %ecx 592 * features until Prescott, so we'll only check 593 * this from P4 onwards. We might want to revisit 594 * that idea later. 595 */ 596 mask_ecx = 0xffffffff; 597 } else if (cpi->cpi_family > 0xf) 598 mask_ecx = 0xffffffff; 599 /* 600 * We don't support MONITOR/MWAIT if leaf 5 is not available 601 * to obtain the monitor linesize. 602 */ 603 if (cpi->cpi_maxeax < 5) 604 mask_ecx &= ~CPUID_INTC_ECX_MON; 605 break; 606 case X86_VENDOR_IntelClone: 607 default: 608 break; 609 case X86_VENDOR_AMD: 610 #if defined(OPTERON_ERRATUM_108) 611 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 612 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 613 cpi->cpi_model = 0xc; 614 } else 615 #endif 616 if (cpi->cpi_family == 5) { 617 /* 618 * AMD K5 and K6 619 * 620 * These CPUs have an incomplete implementation 621 * of MCA/MCE which we mask away. 622 */ 623 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 624 625 /* 626 * Model 0 uses the wrong (APIC) bit 627 * to indicate PGE. Fix it here. 628 */ 629 if (cpi->cpi_model == 0) { 630 if (cp->cp_edx & 0x200) { 631 cp->cp_edx &= ~0x200; 632 cp->cp_edx |= CPUID_INTC_EDX_PGE; 633 } 634 } 635 636 /* 637 * Early models had problems w/ MMX; disable. 638 */ 639 if (cpi->cpi_model < 6) 640 mask_edx &= ~CPUID_INTC_EDX_MMX; 641 } 642 643 /* 644 * For newer families, SSE3 and CX16, at least, are valid; 645 * enable all 646 */ 647 if (cpi->cpi_family >= 0xf) 648 mask_ecx = 0xffffffff; 649 /* 650 * We don't support MONITOR/MWAIT if leaf 5 is not available 651 * to obtain the monitor linesize. 652 */ 653 if (cpi->cpi_maxeax < 5) 654 mask_ecx &= ~CPUID_INTC_ECX_MON; 655 656 #if !defined(__xpv) 657 /* 658 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 659 * processors. AMD does not intend MWAIT to be used in the cpu 660 * idle loop on current and future processors. 10h and future 661 * AMD processors use more power in MWAIT than HLT. 662 * Pre-family-10h Opterons do not have the MWAIT instruction. 663 */ 664 idle_cpu_prefer_mwait = 0; 665 #endif 666 667 break; 668 case X86_VENDOR_TM: 669 /* 670 * workaround the NT workaround in CMS 4.1 671 */ 672 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 673 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 674 cp->cp_edx |= CPUID_INTC_EDX_CX8; 675 break; 676 case X86_VENDOR_Centaur: 677 /* 678 * workaround the NT workarounds again 679 */ 680 if (cpi->cpi_family == 6) 681 cp->cp_edx |= CPUID_INTC_EDX_CX8; 682 break; 683 case X86_VENDOR_Cyrix: 684 /* 685 * We rely heavily on the probing in locore 686 * to actually figure out what parts, if any, 687 * of the Cyrix cpuid instruction to believe. 688 */ 689 switch (x86_type) { 690 case X86_TYPE_CYRIX_486: 691 mask_edx = 0; 692 break; 693 case X86_TYPE_CYRIX_6x86: 694 mask_edx = 0; 695 break; 696 case X86_TYPE_CYRIX_6x86L: 697 mask_edx = 698 CPUID_INTC_EDX_DE | 699 CPUID_INTC_EDX_CX8; 700 break; 701 case X86_TYPE_CYRIX_6x86MX: 702 mask_edx = 703 CPUID_INTC_EDX_DE | 704 CPUID_INTC_EDX_MSR | 705 CPUID_INTC_EDX_CX8 | 706 CPUID_INTC_EDX_PGE | 707 CPUID_INTC_EDX_CMOV | 708 CPUID_INTC_EDX_MMX; 709 break; 710 case X86_TYPE_CYRIX_GXm: 711 mask_edx = 712 CPUID_INTC_EDX_MSR | 713 CPUID_INTC_EDX_CX8 | 714 CPUID_INTC_EDX_CMOV | 715 CPUID_INTC_EDX_MMX; 716 break; 717 case X86_TYPE_CYRIX_MediaGX: 718 break; 719 case X86_TYPE_CYRIX_MII: 720 case X86_TYPE_VIA_CYRIX_III: 721 mask_edx = 722 CPUID_INTC_EDX_DE | 723 CPUID_INTC_EDX_TSC | 724 CPUID_INTC_EDX_MSR | 725 CPUID_INTC_EDX_CX8 | 726 CPUID_INTC_EDX_PGE | 727 CPUID_INTC_EDX_CMOV | 728 CPUID_INTC_EDX_MMX; 729 break; 730 default: 731 break; 732 } 733 break; 734 } 735 736 #if defined(__xpv) 737 /* 738 * Do not support MONITOR/MWAIT under a hypervisor 739 */ 740 mask_ecx &= ~CPUID_INTC_ECX_MON; 741 #endif /* __xpv */ 742 743 /* 744 * Now we've figured out the masks that determine 745 * which bits we choose to believe, apply the masks 746 * to the feature words, then map the kernel's view 747 * of these feature words into its feature word. 748 */ 749 cp->cp_edx &= mask_edx; 750 cp->cp_ecx &= mask_ecx; 751 752 /* 753 * apply any platform restrictions (we don't call this 754 * immediately after __cpuid_insn here, because we need the 755 * workarounds applied above first) 756 */ 757 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 758 759 /* 760 * fold in overrides from the "eeprom" mechanism 761 */ 762 cp->cp_edx |= cpuid_feature_edx_include; 763 cp->cp_edx &= ~cpuid_feature_edx_exclude; 764 765 cp->cp_ecx |= cpuid_feature_ecx_include; 766 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 767 768 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 769 feature |= X86_LARGEPAGE; 770 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 771 feature |= X86_TSC; 772 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 773 feature |= X86_MSR; 774 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 775 feature |= X86_MTRR; 776 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 777 feature |= X86_PGE; 778 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 779 feature |= X86_CMOV; 780 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 781 feature |= X86_MMX; 782 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 783 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 784 feature |= X86_MCA; 785 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 786 feature |= X86_PAE; 787 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 788 feature |= X86_CX8; 789 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 790 feature |= X86_CX16; 791 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 792 feature |= X86_PAT; 793 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 794 feature |= X86_SEP; 795 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 796 /* 797 * In our implementation, fxsave/fxrstor 798 * are prerequisites before we'll even 799 * try and do SSE things. 800 */ 801 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 802 feature |= X86_SSE; 803 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 804 feature |= X86_SSE2; 805 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 806 feature |= X86_SSE3; 807 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 808 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 809 feature |= X86_SSSE3; 810 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 811 feature |= X86_SSE4_1; 812 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 813 feature |= X86_SSE4_2; 814 if (cp->cp_ecx & CPUID_INTC_ECX_AES) 815 feature |= X86_AES; 816 } 817 } 818 if (cp->cp_edx & CPUID_INTC_EDX_DE) 819 feature |= X86_DE; 820 #if !defined(__xpv) 821 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 822 823 /* 824 * We require the CLFLUSH instruction for erratum workaround 825 * to use MONITOR/MWAIT. 826 */ 827 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 828 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 829 feature |= X86_MWAIT; 830 } else { 831 extern int idle_cpu_assert_cflush_monitor; 832 833 /* 834 * All processors we are aware of which have 835 * MONITOR/MWAIT also have CLFLUSH. 836 */ 837 if (idle_cpu_assert_cflush_monitor) { 838 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 839 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 840 } 841 } 842 } 843 #endif /* __xpv */ 844 845 /* 846 * Only need it first time, rest of the cpus would follow suite. 847 * we only capture this for the bootcpu. 848 */ 849 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 850 feature |= X86_CLFSH; 851 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 852 } 853 854 if (feature & X86_PAE) 855 cpi->cpi_pabits = 36; 856 857 /* 858 * Hyperthreading configuration is slightly tricky on Intel 859 * and pure clones, and even trickier on AMD. 860 * 861 * (AMD chose to set the HTT bit on their CMP processors, 862 * even though they're not actually hyperthreaded. Thus it 863 * takes a bit more work to figure out what's really going 864 * on ... see the handling of the CMP_LGCY bit below) 865 */ 866 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 867 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 868 if (cpi->cpi_ncpu_per_chip > 1) 869 feature |= X86_HTT; 870 } else { 871 cpi->cpi_ncpu_per_chip = 1; 872 } 873 874 /* 875 * Work on the "extended" feature information, doing 876 * some basic initialization for cpuid_pass2() 877 */ 878 xcpuid = 0; 879 switch (cpi->cpi_vendor) { 880 case X86_VENDOR_Intel: 881 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 882 xcpuid++; 883 break; 884 case X86_VENDOR_AMD: 885 if (cpi->cpi_family > 5 || 886 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 887 xcpuid++; 888 break; 889 case X86_VENDOR_Cyrix: 890 /* 891 * Only these Cyrix CPUs are -known- to support 892 * extended cpuid operations. 893 */ 894 if (x86_type == X86_TYPE_VIA_CYRIX_III || 895 x86_type == X86_TYPE_CYRIX_GXm) 896 xcpuid++; 897 break; 898 case X86_VENDOR_Centaur: 899 case X86_VENDOR_TM: 900 default: 901 xcpuid++; 902 break; 903 } 904 905 if (xcpuid) { 906 cp = &cpi->cpi_extd[0]; 907 cp->cp_eax = 0x80000000; 908 cpi->cpi_xmaxeax = __cpuid_insn(cp); 909 } 910 911 if (cpi->cpi_xmaxeax & 0x80000000) { 912 913 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 914 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 915 916 switch (cpi->cpi_vendor) { 917 case X86_VENDOR_Intel: 918 case X86_VENDOR_AMD: 919 if (cpi->cpi_xmaxeax < 0x80000001) 920 break; 921 cp = &cpi->cpi_extd[1]; 922 cp->cp_eax = 0x80000001; 923 (void) __cpuid_insn(cp); 924 925 if (cpi->cpi_vendor == X86_VENDOR_AMD && 926 cpi->cpi_family == 5 && 927 cpi->cpi_model == 6 && 928 cpi->cpi_step == 6) { 929 /* 930 * K6 model 6 uses bit 10 to indicate SYSC 931 * Later models use bit 11. Fix it here. 932 */ 933 if (cp->cp_edx & 0x400) { 934 cp->cp_edx &= ~0x400; 935 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 936 } 937 } 938 939 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 940 941 /* 942 * Compute the additions to the kernel's feature word. 943 */ 944 if (cp->cp_edx & CPUID_AMD_EDX_NX) 945 feature |= X86_NX; 946 947 /* 948 * Regardless whether or not we boot 64-bit, 949 * we should have a way to identify whether 950 * the CPU is capable of running 64-bit. 951 */ 952 if (cp->cp_edx & CPUID_AMD_EDX_LM) 953 feature |= X86_64; 954 955 #if defined(__amd64) 956 /* 1 GB large page - enable only for 64 bit kernel */ 957 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 958 feature |= X86_1GPG; 959 #endif 960 961 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 962 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 963 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 964 feature |= X86_SSE4A; 965 966 /* 967 * If both the HTT and CMP_LGCY bits are set, 968 * then we're not actually HyperThreaded. Read 969 * "AMD CPUID Specification" for more details. 970 */ 971 if (cpi->cpi_vendor == X86_VENDOR_AMD && 972 (feature & X86_HTT) && 973 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 974 feature &= ~X86_HTT; 975 feature |= X86_CMP; 976 } 977 #if defined(__amd64) 978 /* 979 * It's really tricky to support syscall/sysret in 980 * the i386 kernel; we rely on sysenter/sysexit 981 * instead. In the amd64 kernel, things are -way- 982 * better. 983 */ 984 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 985 feature |= X86_ASYSC; 986 987 /* 988 * While we're thinking about system calls, note 989 * that AMD processors don't support sysenter 990 * in long mode at all, so don't try to program them. 991 */ 992 if (x86_vendor == X86_VENDOR_AMD) 993 feature &= ~X86_SEP; 994 #endif 995 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 996 feature |= X86_TSCP; 997 break; 998 default: 999 break; 1000 } 1001 1002 /* 1003 * Get CPUID data about processor cores and hyperthreads. 1004 */ 1005 switch (cpi->cpi_vendor) { 1006 case X86_VENDOR_Intel: 1007 if (cpi->cpi_maxeax >= 4) { 1008 cp = &cpi->cpi_std[4]; 1009 cp->cp_eax = 4; 1010 cp->cp_ecx = 0; 1011 (void) __cpuid_insn(cp); 1012 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1013 } 1014 /*FALLTHROUGH*/ 1015 case X86_VENDOR_AMD: 1016 if (cpi->cpi_xmaxeax < 0x80000008) 1017 break; 1018 cp = &cpi->cpi_extd[8]; 1019 cp->cp_eax = 0x80000008; 1020 (void) __cpuid_insn(cp); 1021 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1022 1023 /* 1024 * Virtual and physical address limits from 1025 * cpuid override previously guessed values. 1026 */ 1027 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1028 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1029 break; 1030 default: 1031 break; 1032 } 1033 1034 /* 1035 * Derive the number of cores per chip 1036 */ 1037 switch (cpi->cpi_vendor) { 1038 case X86_VENDOR_Intel: 1039 if (cpi->cpi_maxeax < 4) { 1040 cpi->cpi_ncore_per_chip = 1; 1041 break; 1042 } else { 1043 cpi->cpi_ncore_per_chip = 1044 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1045 } 1046 break; 1047 case X86_VENDOR_AMD: 1048 if (cpi->cpi_xmaxeax < 0x80000008) { 1049 cpi->cpi_ncore_per_chip = 1; 1050 break; 1051 } else { 1052 /* 1053 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1054 * 1 less than the number of physical cores on 1055 * the chip. In family 0x10 this value can 1056 * be affected by "downcoring" - it reflects 1057 * 1 less than the number of cores actually 1058 * enabled on this node. 1059 */ 1060 cpi->cpi_ncore_per_chip = 1061 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1062 } 1063 break; 1064 default: 1065 cpi->cpi_ncore_per_chip = 1; 1066 break; 1067 } 1068 1069 /* 1070 * Get CPUID data about TSC Invariance in Deep C-State. 1071 */ 1072 switch (cpi->cpi_vendor) { 1073 case X86_VENDOR_Intel: 1074 if (cpi->cpi_maxeax >= 7) { 1075 cp = &cpi->cpi_extd[7]; 1076 cp->cp_eax = 0x80000007; 1077 cp->cp_ecx = 0; 1078 (void) __cpuid_insn(cp); 1079 } 1080 break; 1081 default: 1082 break; 1083 } 1084 } else { 1085 cpi->cpi_ncore_per_chip = 1; 1086 } 1087 1088 /* 1089 * If more than one core, then this processor is CMP. 1090 */ 1091 if (cpi->cpi_ncore_per_chip > 1) 1092 feature |= X86_CMP; 1093 1094 /* 1095 * If the number of cores is the same as the number 1096 * of CPUs, then we cannot have HyperThreading. 1097 */ 1098 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1099 feature &= ~X86_HTT; 1100 1101 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1102 /* 1103 * Single-core single-threaded processors. 1104 */ 1105 cpi->cpi_chipid = -1; 1106 cpi->cpi_clogid = 0; 1107 cpi->cpi_coreid = cpu->cpu_id; 1108 cpi->cpi_pkgcoreid = 0; 1109 } else if (cpi->cpi_ncpu_per_chip > 1) { 1110 uint_t i; 1111 uint_t chipid_shift = 0; 1112 uint_t coreid_shift = 0; 1113 uint_t apic_id = CPI_APIC_ID(cpi); 1114 1115 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1116 chipid_shift++; 1117 cpi->cpi_chipid = apic_id >> chipid_shift; 1118 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1119 1120 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1121 if (feature & X86_CMP) { 1122 /* 1123 * Multi-core (and possibly multi-threaded) 1124 * processors. 1125 */ 1126 uint_t ncpu_per_core; 1127 if (cpi->cpi_ncore_per_chip == 1) 1128 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1129 else if (cpi->cpi_ncore_per_chip > 1) 1130 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1131 cpi->cpi_ncore_per_chip; 1132 /* 1133 * 8bit APIC IDs on dual core Pentiums 1134 * look like this: 1135 * 1136 * +-----------------------+------+------+ 1137 * | Physical Package ID | MC | HT | 1138 * +-----------------------+------+------+ 1139 * <------- chipid --------> 1140 * <------- coreid ---------------> 1141 * <--- clogid --> 1142 * <------> 1143 * pkgcoreid 1144 * 1145 * Where the number of bits necessary to 1146 * represent MC and HT fields together equals 1147 * to the minimum number of bits necessary to 1148 * store the value of cpi->cpi_ncpu_per_chip. 1149 * Of those bits, the MC part uses the number 1150 * of bits necessary to store the value of 1151 * cpi->cpi_ncore_per_chip. 1152 */ 1153 for (i = 1; i < ncpu_per_core; i <<= 1) 1154 coreid_shift++; 1155 cpi->cpi_coreid = apic_id >> coreid_shift; 1156 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1157 coreid_shift; 1158 } else if (feature & X86_HTT) { 1159 /* 1160 * Single-core multi-threaded processors. 1161 */ 1162 cpi->cpi_coreid = cpi->cpi_chipid; 1163 cpi->cpi_pkgcoreid = 0; 1164 } 1165 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1166 /* 1167 * AMD CMP chips currently have a single thread per 1168 * core, with 2 cores on family 0xf and 2, 3 or 4 1169 * cores on family 0x10. 1170 * 1171 * Since no two cpus share a core we must assign a 1172 * distinct coreid per cpu, and we do this by using 1173 * the cpu_id. This scheme does not, however, 1174 * guarantee that sibling cores of a chip will have 1175 * sequential coreids starting at a multiple of the 1176 * number of cores per chip - that is usually the 1177 * case, but if the ACPI MADT table is presented 1178 * in a different order then we need to perform a 1179 * few more gymnastics for the pkgcoreid. 1180 * 1181 * In family 0xf CMPs there are 2 cores on all nodes 1182 * present - no mixing of single and dual core parts. 1183 * 1184 * In family 0x10 CMPs cpuid fn 2 ECX[15:12] 1185 * "ApicIdCoreIdSize[3:0]" tells us how 1186 * many least-significant bits in the ApicId 1187 * are used to represent the core number 1188 * within the node. Cores are always 1189 * numbered sequentially from 0 regardless 1190 * of how many or which are disabled, and 1191 * there seems to be no way to discover the 1192 * real core id when some are disabled. 1193 */ 1194 cpi->cpi_coreid = cpu->cpu_id; 1195 1196 if (cpi->cpi_family == 0x10 && 1197 cpi->cpi_xmaxeax >= 0x80000008) { 1198 int coreidsz = 1199 BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 1200 1201 cpi->cpi_pkgcoreid = 1202 apic_id & ((1 << coreidsz) - 1); 1203 } else { 1204 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 1205 } 1206 } else { 1207 /* 1208 * All other processors are currently 1209 * assumed to have single cores. 1210 */ 1211 cpi->cpi_coreid = cpi->cpi_chipid; 1212 cpi->cpi_pkgcoreid = 0; 1213 } 1214 } 1215 1216 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1217 1218 /* 1219 * Synthesize chip "revision" and socket type 1220 */ 1221 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1222 cpi->cpi_model, cpi->cpi_step); 1223 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1224 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1225 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1226 cpi->cpi_model, cpi->cpi_step); 1227 1228 pass1_done: 1229 #if !defined(__xpv) 1230 determine_platform(); 1231 #endif 1232 cpi->cpi_pass = 1; 1233 return (feature); 1234 } 1235 1236 /* 1237 * Make copies of the cpuid table entries we depend on, in 1238 * part for ease of parsing now, in part so that we have only 1239 * one place to correct any of it, in part for ease of 1240 * later export to userland, and in part so we can look at 1241 * this stuff in a crash dump. 1242 */ 1243 1244 /*ARGSUSED*/ 1245 void 1246 cpuid_pass2(cpu_t *cpu) 1247 { 1248 uint_t n, nmax; 1249 int i; 1250 struct cpuid_regs *cp; 1251 uint8_t *dp; 1252 uint32_t *iptr; 1253 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1254 1255 ASSERT(cpi->cpi_pass == 1); 1256 1257 if (cpi->cpi_maxeax < 1) 1258 goto pass2_done; 1259 1260 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1261 nmax = NMAX_CPI_STD; 1262 /* 1263 * (We already handled n == 0 and n == 1 in pass 1) 1264 */ 1265 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1266 cp->cp_eax = n; 1267 1268 /* 1269 * CPUID function 4 expects %ecx to be initialized 1270 * with an index which indicates which cache to return 1271 * information about. The OS is expected to call function 4 1272 * with %ecx set to 0, 1, 2, ... until it returns with 1273 * EAX[4:0] set to 0, which indicates there are no more 1274 * caches. 1275 * 1276 * Here, populate cpi_std[4] with the information returned by 1277 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1278 * when dynamic memory allocation becomes available. 1279 * 1280 * Note: we need to explicitly initialize %ecx here, since 1281 * function 4 may have been previously invoked. 1282 */ 1283 if (n == 4) 1284 cp->cp_ecx = 0; 1285 1286 (void) __cpuid_insn(cp); 1287 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1288 switch (n) { 1289 case 2: 1290 /* 1291 * "the lower 8 bits of the %eax register 1292 * contain a value that identifies the number 1293 * of times the cpuid [instruction] has to be 1294 * executed to obtain a complete image of the 1295 * processor's caching systems." 1296 * 1297 * How *do* they make this stuff up? 1298 */ 1299 cpi->cpi_ncache = sizeof (*cp) * 1300 BITX(cp->cp_eax, 7, 0); 1301 if (cpi->cpi_ncache == 0) 1302 break; 1303 cpi->cpi_ncache--; /* skip count byte */ 1304 1305 /* 1306 * Well, for now, rather than attempt to implement 1307 * this slightly dubious algorithm, we just look 1308 * at the first 15 .. 1309 */ 1310 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1311 cpi->cpi_ncache = sizeof (*cp) - 1; 1312 1313 dp = cpi->cpi_cacheinfo; 1314 if (BITX(cp->cp_eax, 31, 31) == 0) { 1315 uint8_t *p = (void *)&cp->cp_eax; 1316 for (i = 1; i < 4; i++) 1317 if (p[i] != 0) 1318 *dp++ = p[i]; 1319 } 1320 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1321 uint8_t *p = (void *)&cp->cp_ebx; 1322 for (i = 0; i < 4; i++) 1323 if (p[i] != 0) 1324 *dp++ = p[i]; 1325 } 1326 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1327 uint8_t *p = (void *)&cp->cp_ecx; 1328 for (i = 0; i < 4; i++) 1329 if (p[i] != 0) 1330 *dp++ = p[i]; 1331 } 1332 if (BITX(cp->cp_edx, 31, 31) == 0) { 1333 uint8_t *p = (void *)&cp->cp_edx; 1334 for (i = 0; i < 4; i++) 1335 if (p[i] != 0) 1336 *dp++ = p[i]; 1337 } 1338 break; 1339 1340 case 3: /* Processor serial number, if PSN supported */ 1341 break; 1342 1343 case 4: /* Deterministic cache parameters */ 1344 break; 1345 1346 case 5: /* Monitor/Mwait parameters */ 1347 { 1348 size_t mwait_size; 1349 1350 /* 1351 * check cpi_mwait.support which was set in cpuid_pass1 1352 */ 1353 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1354 break; 1355 1356 /* 1357 * Protect ourself from insane mwait line size. 1358 * Workaround for incomplete hardware emulator(s). 1359 */ 1360 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1361 if (mwait_size < sizeof (uint32_t) || 1362 !ISP2(mwait_size)) { 1363 #if DEBUG 1364 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1365 "size %ld", cpu->cpu_id, (long)mwait_size); 1366 #endif 1367 break; 1368 } 1369 1370 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1371 cpi->cpi_mwait.mon_max = mwait_size; 1372 if (MWAIT_EXTENSION(cpi)) { 1373 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1374 if (MWAIT_INT_ENABLE(cpi)) 1375 cpi->cpi_mwait.support |= 1376 MWAIT_ECX_INT_ENABLE; 1377 } 1378 break; 1379 } 1380 default: 1381 break; 1382 } 1383 } 1384 1385 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1386 struct cpuid_regs regs; 1387 1388 cp = ®s; 1389 cp->cp_eax = 0xB; 1390 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1391 1392 (void) __cpuid_insn(cp); 1393 1394 /* 1395 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1396 * indicates that the extended topology enumeration leaf is 1397 * available. 1398 */ 1399 if (cp->cp_ebx) { 1400 uint32_t x2apic_id; 1401 uint_t coreid_shift = 0; 1402 uint_t ncpu_per_core = 1; 1403 uint_t chipid_shift = 0; 1404 uint_t ncpu_per_chip = 1; 1405 uint_t i; 1406 uint_t level; 1407 1408 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1409 cp->cp_eax = 0xB; 1410 cp->cp_ecx = i; 1411 1412 (void) __cpuid_insn(cp); 1413 level = CPI_CPU_LEVEL_TYPE(cp); 1414 1415 if (level == 1) { 1416 x2apic_id = cp->cp_edx; 1417 coreid_shift = BITX(cp->cp_eax, 4, 0); 1418 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1419 } else if (level == 2) { 1420 x2apic_id = cp->cp_edx; 1421 chipid_shift = BITX(cp->cp_eax, 4, 0); 1422 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1423 } 1424 } 1425 1426 cpi->cpi_apicid = x2apic_id; 1427 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1428 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1429 ncpu_per_core; 1430 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1431 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1432 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1433 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1434 } 1435 1436 /* Make cp NULL so that we don't stumble on others */ 1437 cp = NULL; 1438 } 1439 1440 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1441 goto pass2_done; 1442 1443 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1444 nmax = NMAX_CPI_EXTD; 1445 /* 1446 * Copy the extended properties, fixing them as we go. 1447 * (We already handled n == 0 and n == 1 in pass 1) 1448 */ 1449 iptr = (void *)cpi->cpi_brandstr; 1450 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1451 cp->cp_eax = 0x80000000 + n; 1452 (void) __cpuid_insn(cp); 1453 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1454 switch (n) { 1455 case 2: 1456 case 3: 1457 case 4: 1458 /* 1459 * Extract the brand string 1460 */ 1461 *iptr++ = cp->cp_eax; 1462 *iptr++ = cp->cp_ebx; 1463 *iptr++ = cp->cp_ecx; 1464 *iptr++ = cp->cp_edx; 1465 break; 1466 case 5: 1467 switch (cpi->cpi_vendor) { 1468 case X86_VENDOR_AMD: 1469 /* 1470 * The Athlon and Duron were the first 1471 * parts to report the sizes of the 1472 * TLB for large pages. Before then, 1473 * we don't trust the data. 1474 */ 1475 if (cpi->cpi_family < 6 || 1476 (cpi->cpi_family == 6 && 1477 cpi->cpi_model < 1)) 1478 cp->cp_eax = 0; 1479 break; 1480 default: 1481 break; 1482 } 1483 break; 1484 case 6: 1485 switch (cpi->cpi_vendor) { 1486 case X86_VENDOR_AMD: 1487 /* 1488 * The Athlon and Duron were the first 1489 * AMD parts with L2 TLB's. 1490 * Before then, don't trust the data. 1491 */ 1492 if (cpi->cpi_family < 6 || 1493 cpi->cpi_family == 6 && 1494 cpi->cpi_model < 1) 1495 cp->cp_eax = cp->cp_ebx = 0; 1496 /* 1497 * AMD Duron rev A0 reports L2 1498 * cache size incorrectly as 1K 1499 * when it is really 64K 1500 */ 1501 if (cpi->cpi_family == 6 && 1502 cpi->cpi_model == 3 && 1503 cpi->cpi_step == 0) { 1504 cp->cp_ecx &= 0xffff; 1505 cp->cp_ecx |= 0x400000; 1506 } 1507 break; 1508 case X86_VENDOR_Cyrix: /* VIA C3 */ 1509 /* 1510 * VIA C3 processors are a bit messed 1511 * up w.r.t. encoding cache sizes in %ecx 1512 */ 1513 if (cpi->cpi_family != 6) 1514 break; 1515 /* 1516 * model 7 and 8 were incorrectly encoded 1517 * 1518 * xxx is model 8 really broken? 1519 */ 1520 if (cpi->cpi_model == 7 || 1521 cpi->cpi_model == 8) 1522 cp->cp_ecx = 1523 BITX(cp->cp_ecx, 31, 24) << 16 | 1524 BITX(cp->cp_ecx, 23, 16) << 12 | 1525 BITX(cp->cp_ecx, 15, 8) << 8 | 1526 BITX(cp->cp_ecx, 7, 0); 1527 /* 1528 * model 9 stepping 1 has wrong associativity 1529 */ 1530 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1531 cp->cp_ecx |= 8 << 12; 1532 break; 1533 case X86_VENDOR_Intel: 1534 /* 1535 * Extended L2 Cache features function. 1536 * First appeared on Prescott. 1537 */ 1538 default: 1539 break; 1540 } 1541 break; 1542 default: 1543 break; 1544 } 1545 } 1546 1547 pass2_done: 1548 cpi->cpi_pass = 2; 1549 } 1550 1551 static const char * 1552 intel_cpubrand(const struct cpuid_info *cpi) 1553 { 1554 int i; 1555 1556 if ((x86_feature & X86_CPUID) == 0 || 1557 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1558 return ("i486"); 1559 1560 switch (cpi->cpi_family) { 1561 case 5: 1562 return ("Intel Pentium(r)"); 1563 case 6: 1564 switch (cpi->cpi_model) { 1565 uint_t celeron, xeon; 1566 const struct cpuid_regs *cp; 1567 case 0: 1568 case 1: 1569 case 2: 1570 return ("Intel Pentium(r) Pro"); 1571 case 3: 1572 case 4: 1573 return ("Intel Pentium(r) II"); 1574 case 6: 1575 return ("Intel Celeron(r)"); 1576 case 5: 1577 case 7: 1578 celeron = xeon = 0; 1579 cp = &cpi->cpi_std[2]; /* cache info */ 1580 1581 for (i = 1; i < 4; i++) { 1582 uint_t tmp; 1583 1584 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1585 if (tmp == 0x40) 1586 celeron++; 1587 if (tmp >= 0x44 && tmp <= 0x45) 1588 xeon++; 1589 } 1590 1591 for (i = 0; i < 2; i++) { 1592 uint_t tmp; 1593 1594 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1595 if (tmp == 0x40) 1596 celeron++; 1597 else if (tmp >= 0x44 && tmp <= 0x45) 1598 xeon++; 1599 } 1600 1601 for (i = 0; i < 4; i++) { 1602 uint_t tmp; 1603 1604 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1605 if (tmp == 0x40) 1606 celeron++; 1607 else if (tmp >= 0x44 && tmp <= 0x45) 1608 xeon++; 1609 } 1610 1611 for (i = 0; i < 4; i++) { 1612 uint_t tmp; 1613 1614 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1615 if (tmp == 0x40) 1616 celeron++; 1617 else if (tmp >= 0x44 && tmp <= 0x45) 1618 xeon++; 1619 } 1620 1621 if (celeron) 1622 return ("Intel Celeron(r)"); 1623 if (xeon) 1624 return (cpi->cpi_model == 5 ? 1625 "Intel Pentium(r) II Xeon(tm)" : 1626 "Intel Pentium(r) III Xeon(tm)"); 1627 return (cpi->cpi_model == 5 ? 1628 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1629 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1630 default: 1631 break; 1632 } 1633 default: 1634 break; 1635 } 1636 1637 /* BrandID is present if the field is nonzero */ 1638 if (cpi->cpi_brandid != 0) { 1639 static const struct { 1640 uint_t bt_bid; 1641 const char *bt_str; 1642 } brand_tbl[] = { 1643 { 0x1, "Intel(r) Celeron(r)" }, 1644 { 0x2, "Intel(r) Pentium(r) III" }, 1645 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1646 { 0x4, "Intel(r) Pentium(r) III" }, 1647 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1648 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1649 { 0x8, "Intel(r) Pentium(r) 4" }, 1650 { 0x9, "Intel(r) Pentium(r) 4" }, 1651 { 0xa, "Intel(r) Celeron(r)" }, 1652 { 0xb, "Intel(r) Xeon(tm)" }, 1653 { 0xc, "Intel(r) Xeon(tm) MP" }, 1654 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1655 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1656 { 0x11, "Mobile Genuine Intel(r)" }, 1657 { 0x12, "Intel(r) Celeron(r) M" }, 1658 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1659 { 0x14, "Intel(r) Celeron(r)" }, 1660 { 0x15, "Mobile Genuine Intel(r)" }, 1661 { 0x16, "Intel(r) Pentium(r) M" }, 1662 { 0x17, "Mobile Intel(r) Celeron(r)" } 1663 }; 1664 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1665 uint_t sgn; 1666 1667 sgn = (cpi->cpi_family << 8) | 1668 (cpi->cpi_model << 4) | cpi->cpi_step; 1669 1670 for (i = 0; i < btblmax; i++) 1671 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1672 break; 1673 if (i < btblmax) { 1674 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1675 return ("Intel(r) Celeron(r)"); 1676 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1677 return ("Intel(r) Xeon(tm) MP"); 1678 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1679 return ("Intel(r) Xeon(tm)"); 1680 return (brand_tbl[i].bt_str); 1681 } 1682 } 1683 1684 return (NULL); 1685 } 1686 1687 static const char * 1688 amd_cpubrand(const struct cpuid_info *cpi) 1689 { 1690 if ((x86_feature & X86_CPUID) == 0 || 1691 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1692 return ("i486 compatible"); 1693 1694 switch (cpi->cpi_family) { 1695 case 5: 1696 switch (cpi->cpi_model) { 1697 case 0: 1698 case 1: 1699 case 2: 1700 case 3: 1701 case 4: 1702 case 5: 1703 return ("AMD-K5(r)"); 1704 case 6: 1705 case 7: 1706 return ("AMD-K6(r)"); 1707 case 8: 1708 return ("AMD-K6(r)-2"); 1709 case 9: 1710 return ("AMD-K6(r)-III"); 1711 default: 1712 return ("AMD (family 5)"); 1713 } 1714 case 6: 1715 switch (cpi->cpi_model) { 1716 case 1: 1717 return ("AMD-K7(tm)"); 1718 case 0: 1719 case 2: 1720 case 4: 1721 return ("AMD Athlon(tm)"); 1722 case 3: 1723 case 7: 1724 return ("AMD Duron(tm)"); 1725 case 6: 1726 case 8: 1727 case 10: 1728 /* 1729 * Use the L2 cache size to distinguish 1730 */ 1731 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1732 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1733 default: 1734 return ("AMD (family 6)"); 1735 } 1736 default: 1737 break; 1738 } 1739 1740 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1741 cpi->cpi_brandid != 0) { 1742 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1743 case 3: 1744 return ("AMD Opteron(tm) UP 1xx"); 1745 case 4: 1746 return ("AMD Opteron(tm) DP 2xx"); 1747 case 5: 1748 return ("AMD Opteron(tm) MP 8xx"); 1749 default: 1750 return ("AMD Opteron(tm)"); 1751 } 1752 } 1753 1754 return (NULL); 1755 } 1756 1757 static const char * 1758 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1759 { 1760 if ((x86_feature & X86_CPUID) == 0 || 1761 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1762 type == X86_TYPE_CYRIX_486) 1763 return ("i486 compatible"); 1764 1765 switch (type) { 1766 case X86_TYPE_CYRIX_6x86: 1767 return ("Cyrix 6x86"); 1768 case X86_TYPE_CYRIX_6x86L: 1769 return ("Cyrix 6x86L"); 1770 case X86_TYPE_CYRIX_6x86MX: 1771 return ("Cyrix 6x86MX"); 1772 case X86_TYPE_CYRIX_GXm: 1773 return ("Cyrix GXm"); 1774 case X86_TYPE_CYRIX_MediaGX: 1775 return ("Cyrix MediaGX"); 1776 case X86_TYPE_CYRIX_MII: 1777 return ("Cyrix M2"); 1778 case X86_TYPE_VIA_CYRIX_III: 1779 return ("VIA Cyrix M3"); 1780 default: 1781 /* 1782 * Have another wild guess .. 1783 */ 1784 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1785 return ("Cyrix 5x86"); 1786 else if (cpi->cpi_family == 5) { 1787 switch (cpi->cpi_model) { 1788 case 2: 1789 return ("Cyrix 6x86"); /* Cyrix M1 */ 1790 case 4: 1791 return ("Cyrix MediaGX"); 1792 default: 1793 break; 1794 } 1795 } else if (cpi->cpi_family == 6) { 1796 switch (cpi->cpi_model) { 1797 case 0: 1798 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1799 case 5: 1800 case 6: 1801 case 7: 1802 case 8: 1803 case 9: 1804 return ("VIA C3"); 1805 default: 1806 break; 1807 } 1808 } 1809 break; 1810 } 1811 return (NULL); 1812 } 1813 1814 /* 1815 * This only gets called in the case that the CPU extended 1816 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1817 * aren't available, or contain null bytes for some reason. 1818 */ 1819 static void 1820 fabricate_brandstr(struct cpuid_info *cpi) 1821 { 1822 const char *brand = NULL; 1823 1824 switch (cpi->cpi_vendor) { 1825 case X86_VENDOR_Intel: 1826 brand = intel_cpubrand(cpi); 1827 break; 1828 case X86_VENDOR_AMD: 1829 brand = amd_cpubrand(cpi); 1830 break; 1831 case X86_VENDOR_Cyrix: 1832 brand = cyrix_cpubrand(cpi, x86_type); 1833 break; 1834 case X86_VENDOR_NexGen: 1835 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1836 brand = "NexGen Nx586"; 1837 break; 1838 case X86_VENDOR_Centaur: 1839 if (cpi->cpi_family == 5) 1840 switch (cpi->cpi_model) { 1841 case 4: 1842 brand = "Centaur C6"; 1843 break; 1844 case 8: 1845 brand = "Centaur C2"; 1846 break; 1847 case 9: 1848 brand = "Centaur C3"; 1849 break; 1850 default: 1851 break; 1852 } 1853 break; 1854 case X86_VENDOR_Rise: 1855 if (cpi->cpi_family == 5 && 1856 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1857 brand = "Rise mP6"; 1858 break; 1859 case X86_VENDOR_SiS: 1860 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1861 brand = "SiS 55x"; 1862 break; 1863 case X86_VENDOR_TM: 1864 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1865 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1866 break; 1867 case X86_VENDOR_NSC: 1868 case X86_VENDOR_UMC: 1869 default: 1870 break; 1871 } 1872 if (brand) { 1873 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1874 return; 1875 } 1876 1877 /* 1878 * If all else fails ... 1879 */ 1880 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1881 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1882 cpi->cpi_model, cpi->cpi_step); 1883 } 1884 1885 /* 1886 * This routine is called just after kernel memory allocation 1887 * becomes available on cpu0, and as part of mp_startup() on 1888 * the other cpus. 1889 * 1890 * Fixup the brand string, and collect any information from cpuid 1891 * that requires dynamicically allocated storage to represent. 1892 */ 1893 /*ARGSUSED*/ 1894 void 1895 cpuid_pass3(cpu_t *cpu) 1896 { 1897 int i, max, shft, level, size; 1898 struct cpuid_regs regs; 1899 struct cpuid_regs *cp; 1900 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1901 1902 ASSERT(cpi->cpi_pass == 2); 1903 1904 /* 1905 * Function 4: Deterministic cache parameters 1906 * 1907 * Take this opportunity to detect the number of threads 1908 * sharing the last level cache, and construct a corresponding 1909 * cache id. The respective cpuid_info members are initialized 1910 * to the default case of "no last level cache sharing". 1911 */ 1912 cpi->cpi_ncpu_shr_last_cache = 1; 1913 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1914 1915 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1916 1917 /* 1918 * Find the # of elements (size) returned by fn 4, and along 1919 * the way detect last level cache sharing details. 1920 */ 1921 bzero(®s, sizeof (regs)); 1922 cp = ®s; 1923 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1924 cp->cp_eax = 4; 1925 cp->cp_ecx = i; 1926 1927 (void) __cpuid_insn(cp); 1928 1929 if (CPI_CACHE_TYPE(cp) == 0) 1930 break; 1931 level = CPI_CACHE_LVL(cp); 1932 if (level > max) { 1933 max = level; 1934 cpi->cpi_ncpu_shr_last_cache = 1935 CPI_NTHR_SHR_CACHE(cp) + 1; 1936 } 1937 } 1938 cpi->cpi_std_4_size = size = i; 1939 1940 /* 1941 * Allocate the cpi_std_4 array. The first element 1942 * references the regs for fn 4, %ecx == 0, which 1943 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1944 */ 1945 if (size > 0) { 1946 cpi->cpi_std_4 = 1947 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1948 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1949 1950 /* 1951 * Allocate storage to hold the additional regs 1952 * for function 4, %ecx == 1 .. cpi_std_4_size. 1953 * 1954 * The regs for fn 4, %ecx == 0 has already 1955 * been allocated as indicated above. 1956 */ 1957 for (i = 1; i < size; i++) { 1958 cp = cpi->cpi_std_4[i] = 1959 kmem_zalloc(sizeof (regs), KM_SLEEP); 1960 cp->cp_eax = 4; 1961 cp->cp_ecx = i; 1962 1963 (void) __cpuid_insn(cp); 1964 } 1965 } 1966 /* 1967 * Determine the number of bits needed to represent 1968 * the number of CPUs sharing the last level cache. 1969 * 1970 * Shift off that number of bits from the APIC id to 1971 * derive the cache id. 1972 */ 1973 shft = 0; 1974 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1975 shft++; 1976 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 1977 } 1978 1979 /* 1980 * Now fixup the brand string 1981 */ 1982 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1983 fabricate_brandstr(cpi); 1984 } else { 1985 1986 /* 1987 * If we successfully extracted a brand string from the cpuid 1988 * instruction, clean it up by removing leading spaces and 1989 * similar junk. 1990 */ 1991 if (cpi->cpi_brandstr[0]) { 1992 size_t maxlen = sizeof (cpi->cpi_brandstr); 1993 char *src, *dst; 1994 1995 dst = src = (char *)cpi->cpi_brandstr; 1996 src[maxlen - 1] = '\0'; 1997 /* 1998 * strip leading spaces 1999 */ 2000 while (*src == ' ') 2001 src++; 2002 /* 2003 * Remove any 'Genuine' or "Authentic" prefixes 2004 */ 2005 if (strncmp(src, "Genuine ", 8) == 0) 2006 src += 8; 2007 if (strncmp(src, "Authentic ", 10) == 0) 2008 src += 10; 2009 2010 /* 2011 * Now do an in-place copy. 2012 * Map (R) to (r) and (TM) to (tm). 2013 * The era of teletypes is long gone, and there's 2014 * -really- no need to shout. 2015 */ 2016 while (*src != '\0') { 2017 if (src[0] == '(') { 2018 if (strncmp(src + 1, "R)", 2) == 0) { 2019 (void) strncpy(dst, "(r)", 3); 2020 src += 3; 2021 dst += 3; 2022 continue; 2023 } 2024 if (strncmp(src + 1, "TM)", 3) == 0) { 2025 (void) strncpy(dst, "(tm)", 4); 2026 src += 4; 2027 dst += 4; 2028 continue; 2029 } 2030 } 2031 *dst++ = *src++; 2032 } 2033 *dst = '\0'; 2034 2035 /* 2036 * Finally, remove any trailing spaces 2037 */ 2038 while (--dst > cpi->cpi_brandstr) 2039 if (*dst == ' ') 2040 *dst = '\0'; 2041 else 2042 break; 2043 } else 2044 fabricate_brandstr(cpi); 2045 } 2046 cpi->cpi_pass = 3; 2047 } 2048 2049 /* 2050 * This routine is called out of bind_hwcap() much later in the life 2051 * of the kernel (post_startup()). The job of this routine is to resolve 2052 * the hardware feature support and kernel support for those features into 2053 * what we're actually going to tell applications via the aux vector. 2054 */ 2055 uint_t 2056 cpuid_pass4(cpu_t *cpu) 2057 { 2058 struct cpuid_info *cpi; 2059 uint_t hwcap_flags = 0; 2060 2061 if (cpu == NULL) 2062 cpu = CPU; 2063 cpi = cpu->cpu_m.mcpu_cpi; 2064 2065 ASSERT(cpi->cpi_pass == 3); 2066 2067 if (cpi->cpi_maxeax >= 1) { 2068 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2069 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2070 2071 *edx = CPI_FEATURES_EDX(cpi); 2072 *ecx = CPI_FEATURES_ECX(cpi); 2073 2074 /* 2075 * [these require explicit kernel support] 2076 */ 2077 if ((x86_feature & X86_SEP) == 0) 2078 *edx &= ~CPUID_INTC_EDX_SEP; 2079 2080 if ((x86_feature & X86_SSE) == 0) 2081 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2082 if ((x86_feature & X86_SSE2) == 0) 2083 *edx &= ~CPUID_INTC_EDX_SSE2; 2084 2085 if ((x86_feature & X86_HTT) == 0) 2086 *edx &= ~CPUID_INTC_EDX_HTT; 2087 2088 if ((x86_feature & X86_SSE3) == 0) 2089 *ecx &= ~CPUID_INTC_ECX_SSE3; 2090 2091 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2092 if ((x86_feature & X86_SSSE3) == 0) 2093 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2094 if ((x86_feature & X86_SSE4_1) == 0) 2095 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2096 if ((x86_feature & X86_SSE4_2) == 0) 2097 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2098 if ((x86_feature & X86_AES) == 0) 2099 *ecx &= ~CPUID_INTC_ECX_AES; 2100 } 2101 2102 /* 2103 * [no explicit support required beyond x87 fp context] 2104 */ 2105 if (!fpu_exists) 2106 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2107 2108 /* 2109 * Now map the supported feature vector to things that we 2110 * think userland will care about. 2111 */ 2112 if (*edx & CPUID_INTC_EDX_SEP) 2113 hwcap_flags |= AV_386_SEP; 2114 if (*edx & CPUID_INTC_EDX_SSE) 2115 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2116 if (*edx & CPUID_INTC_EDX_SSE2) 2117 hwcap_flags |= AV_386_SSE2; 2118 if (*ecx & CPUID_INTC_ECX_SSE3) 2119 hwcap_flags |= AV_386_SSE3; 2120 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2121 if (*ecx & CPUID_INTC_ECX_SSSE3) 2122 hwcap_flags |= AV_386_SSSE3; 2123 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2124 hwcap_flags |= AV_386_SSE4_1; 2125 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2126 hwcap_flags |= AV_386_SSE4_2; 2127 if (*ecx & CPUID_INTC_ECX_MOVBE) 2128 hwcap_flags |= AV_386_MOVBE; 2129 if (*ecx & CPUID_INTC_ECX_AES) 2130 hwcap_flags |= AV_386_AES; 2131 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 2132 hwcap_flags |= AV_386_PCLMULQDQ; 2133 } 2134 if (*ecx & CPUID_INTC_ECX_POPCNT) 2135 hwcap_flags |= AV_386_POPCNT; 2136 if (*edx & CPUID_INTC_EDX_FPU) 2137 hwcap_flags |= AV_386_FPU; 2138 if (*edx & CPUID_INTC_EDX_MMX) 2139 hwcap_flags |= AV_386_MMX; 2140 2141 if (*edx & CPUID_INTC_EDX_TSC) 2142 hwcap_flags |= AV_386_TSC; 2143 if (*edx & CPUID_INTC_EDX_CX8) 2144 hwcap_flags |= AV_386_CX8; 2145 if (*edx & CPUID_INTC_EDX_CMOV) 2146 hwcap_flags |= AV_386_CMOV; 2147 if (*ecx & CPUID_INTC_ECX_MON) 2148 hwcap_flags |= AV_386_MON; 2149 if (*ecx & CPUID_INTC_ECX_CX16) 2150 hwcap_flags |= AV_386_CX16; 2151 } 2152 2153 if (x86_feature & X86_HTT) 2154 hwcap_flags |= AV_386_PAUSE; 2155 2156 if (cpi->cpi_xmaxeax < 0x80000001) 2157 goto pass4_done; 2158 2159 switch (cpi->cpi_vendor) { 2160 struct cpuid_regs cp; 2161 uint32_t *edx, *ecx; 2162 2163 case X86_VENDOR_Intel: 2164 /* 2165 * Seems like Intel duplicated what we necessary 2166 * here to make the initial crop of 64-bit OS's work. 2167 * Hopefully, those are the only "extended" bits 2168 * they'll add. 2169 */ 2170 /*FALLTHROUGH*/ 2171 2172 case X86_VENDOR_AMD: 2173 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2174 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2175 2176 *edx = CPI_FEATURES_XTD_EDX(cpi); 2177 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2178 2179 /* 2180 * [these features require explicit kernel support] 2181 */ 2182 switch (cpi->cpi_vendor) { 2183 case X86_VENDOR_Intel: 2184 if ((x86_feature & X86_TSCP) == 0) 2185 *edx &= ~CPUID_AMD_EDX_TSCP; 2186 break; 2187 2188 case X86_VENDOR_AMD: 2189 if ((x86_feature & X86_TSCP) == 0) 2190 *edx &= ~CPUID_AMD_EDX_TSCP; 2191 if ((x86_feature & X86_SSE4A) == 0) 2192 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2193 break; 2194 2195 default: 2196 break; 2197 } 2198 2199 /* 2200 * [no explicit support required beyond 2201 * x87 fp context and exception handlers] 2202 */ 2203 if (!fpu_exists) 2204 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2205 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2206 2207 if ((x86_feature & X86_NX) == 0) 2208 *edx &= ~CPUID_AMD_EDX_NX; 2209 #if !defined(__amd64) 2210 *edx &= ~CPUID_AMD_EDX_LM; 2211 #endif 2212 /* 2213 * Now map the supported feature vector to 2214 * things that we think userland will care about. 2215 */ 2216 #if defined(__amd64) 2217 if (*edx & CPUID_AMD_EDX_SYSC) 2218 hwcap_flags |= AV_386_AMD_SYSC; 2219 #endif 2220 if (*edx & CPUID_AMD_EDX_MMXamd) 2221 hwcap_flags |= AV_386_AMD_MMX; 2222 if (*edx & CPUID_AMD_EDX_3DNow) 2223 hwcap_flags |= AV_386_AMD_3DNow; 2224 if (*edx & CPUID_AMD_EDX_3DNowx) 2225 hwcap_flags |= AV_386_AMD_3DNowx; 2226 2227 switch (cpi->cpi_vendor) { 2228 case X86_VENDOR_AMD: 2229 if (*edx & CPUID_AMD_EDX_TSCP) 2230 hwcap_flags |= AV_386_TSCP; 2231 if (*ecx & CPUID_AMD_ECX_AHF64) 2232 hwcap_flags |= AV_386_AHF; 2233 if (*ecx & CPUID_AMD_ECX_SSE4A) 2234 hwcap_flags |= AV_386_AMD_SSE4A; 2235 if (*ecx & CPUID_AMD_ECX_LZCNT) 2236 hwcap_flags |= AV_386_AMD_LZCNT; 2237 break; 2238 2239 case X86_VENDOR_Intel: 2240 if (*edx & CPUID_AMD_EDX_TSCP) 2241 hwcap_flags |= AV_386_TSCP; 2242 /* 2243 * Aarrgh. 2244 * Intel uses a different bit in the same word. 2245 */ 2246 if (*ecx & CPUID_INTC_ECX_AHF64) 2247 hwcap_flags |= AV_386_AHF; 2248 break; 2249 2250 default: 2251 break; 2252 } 2253 break; 2254 2255 case X86_VENDOR_TM: 2256 cp.cp_eax = 0x80860001; 2257 (void) __cpuid_insn(&cp); 2258 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2259 break; 2260 2261 default: 2262 break; 2263 } 2264 2265 pass4_done: 2266 cpi->cpi_pass = 4; 2267 return (hwcap_flags); 2268 } 2269 2270 2271 /* 2272 * Simulate the cpuid instruction using the data we previously 2273 * captured about this CPU. We try our best to return the truth 2274 * about the hardware, independently of kernel support. 2275 */ 2276 uint32_t 2277 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2278 { 2279 struct cpuid_info *cpi; 2280 struct cpuid_regs *xcp; 2281 2282 if (cpu == NULL) 2283 cpu = CPU; 2284 cpi = cpu->cpu_m.mcpu_cpi; 2285 2286 ASSERT(cpuid_checkpass(cpu, 3)); 2287 2288 /* 2289 * CPUID data is cached in two separate places: cpi_std for standard 2290 * CPUID functions, and cpi_extd for extended CPUID functions. 2291 */ 2292 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2293 xcp = &cpi->cpi_std[cp->cp_eax]; 2294 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2295 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2296 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2297 else 2298 /* 2299 * The caller is asking for data from an input parameter which 2300 * the kernel has not cached. In this case we go fetch from 2301 * the hardware and return the data directly to the user. 2302 */ 2303 return (__cpuid_insn(cp)); 2304 2305 cp->cp_eax = xcp->cp_eax; 2306 cp->cp_ebx = xcp->cp_ebx; 2307 cp->cp_ecx = xcp->cp_ecx; 2308 cp->cp_edx = xcp->cp_edx; 2309 return (cp->cp_eax); 2310 } 2311 2312 int 2313 cpuid_checkpass(cpu_t *cpu, int pass) 2314 { 2315 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2316 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2317 } 2318 2319 int 2320 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2321 { 2322 ASSERT(cpuid_checkpass(cpu, 3)); 2323 2324 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2325 } 2326 2327 int 2328 cpuid_is_cmt(cpu_t *cpu) 2329 { 2330 if (cpu == NULL) 2331 cpu = CPU; 2332 2333 ASSERT(cpuid_checkpass(cpu, 1)); 2334 2335 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2336 } 2337 2338 /* 2339 * AMD and Intel both implement the 64-bit variant of the syscall 2340 * instruction (syscallq), so if there's -any- support for syscall, 2341 * cpuid currently says "yes, we support this". 2342 * 2343 * However, Intel decided to -not- implement the 32-bit variant of the 2344 * syscall instruction, so we provide a predicate to allow our caller 2345 * to test that subtlety here. 2346 * 2347 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2348 * even in the case where the hardware would in fact support it. 2349 */ 2350 /*ARGSUSED*/ 2351 int 2352 cpuid_syscall32_insn(cpu_t *cpu) 2353 { 2354 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2355 2356 #if !defined(__xpv) 2357 if (cpu == NULL) 2358 cpu = CPU; 2359 2360 /*CSTYLED*/ 2361 { 2362 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2363 2364 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2365 cpi->cpi_xmaxeax >= 0x80000001 && 2366 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2367 return (1); 2368 } 2369 #endif 2370 return (0); 2371 } 2372 2373 int 2374 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2375 { 2376 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2377 2378 static const char fmt[] = 2379 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2380 static const char fmt_ht[] = 2381 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2382 2383 ASSERT(cpuid_checkpass(cpu, 1)); 2384 2385 if (cpuid_is_cmt(cpu)) 2386 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2387 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2388 cpi->cpi_family, cpi->cpi_model, 2389 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2390 return (snprintf(s, n, fmt, 2391 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2392 cpi->cpi_family, cpi->cpi_model, 2393 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2394 } 2395 2396 const char * 2397 cpuid_getvendorstr(cpu_t *cpu) 2398 { 2399 ASSERT(cpuid_checkpass(cpu, 1)); 2400 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2401 } 2402 2403 uint_t 2404 cpuid_getvendor(cpu_t *cpu) 2405 { 2406 ASSERT(cpuid_checkpass(cpu, 1)); 2407 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2408 } 2409 2410 uint_t 2411 cpuid_getfamily(cpu_t *cpu) 2412 { 2413 ASSERT(cpuid_checkpass(cpu, 1)); 2414 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2415 } 2416 2417 uint_t 2418 cpuid_getmodel(cpu_t *cpu) 2419 { 2420 ASSERT(cpuid_checkpass(cpu, 1)); 2421 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2422 } 2423 2424 uint_t 2425 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2426 { 2427 ASSERT(cpuid_checkpass(cpu, 1)); 2428 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2429 } 2430 2431 uint_t 2432 cpuid_get_ncore_per_chip(cpu_t *cpu) 2433 { 2434 ASSERT(cpuid_checkpass(cpu, 1)); 2435 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2436 } 2437 2438 uint_t 2439 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2440 { 2441 ASSERT(cpuid_checkpass(cpu, 2)); 2442 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2443 } 2444 2445 id_t 2446 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2447 { 2448 ASSERT(cpuid_checkpass(cpu, 2)); 2449 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2450 } 2451 2452 uint_t 2453 cpuid_getstep(cpu_t *cpu) 2454 { 2455 ASSERT(cpuid_checkpass(cpu, 1)); 2456 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2457 } 2458 2459 uint_t 2460 cpuid_getsig(struct cpu *cpu) 2461 { 2462 ASSERT(cpuid_checkpass(cpu, 1)); 2463 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2464 } 2465 2466 uint32_t 2467 cpuid_getchiprev(struct cpu *cpu) 2468 { 2469 ASSERT(cpuid_checkpass(cpu, 1)); 2470 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2471 } 2472 2473 const char * 2474 cpuid_getchiprevstr(struct cpu *cpu) 2475 { 2476 ASSERT(cpuid_checkpass(cpu, 1)); 2477 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2478 } 2479 2480 uint32_t 2481 cpuid_getsockettype(struct cpu *cpu) 2482 { 2483 ASSERT(cpuid_checkpass(cpu, 1)); 2484 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2485 } 2486 2487 int 2488 cpuid_get_chipid(cpu_t *cpu) 2489 { 2490 ASSERT(cpuid_checkpass(cpu, 1)); 2491 2492 if (cpuid_is_cmt(cpu)) 2493 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2494 return (cpu->cpu_id); 2495 } 2496 2497 id_t 2498 cpuid_get_coreid(cpu_t *cpu) 2499 { 2500 ASSERT(cpuid_checkpass(cpu, 1)); 2501 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2502 } 2503 2504 int 2505 cpuid_get_pkgcoreid(cpu_t *cpu) 2506 { 2507 ASSERT(cpuid_checkpass(cpu, 1)); 2508 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2509 } 2510 2511 int 2512 cpuid_get_clogid(cpu_t *cpu) 2513 { 2514 ASSERT(cpuid_checkpass(cpu, 1)); 2515 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2516 } 2517 2518 void 2519 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2520 { 2521 struct cpuid_info *cpi; 2522 2523 if (cpu == NULL) 2524 cpu = CPU; 2525 cpi = cpu->cpu_m.mcpu_cpi; 2526 2527 ASSERT(cpuid_checkpass(cpu, 1)); 2528 2529 if (pabits) 2530 *pabits = cpi->cpi_pabits; 2531 if (vabits) 2532 *vabits = cpi->cpi_vabits; 2533 } 2534 2535 /* 2536 * Returns the number of data TLB entries for a corresponding 2537 * pagesize. If it can't be computed, or isn't known, the 2538 * routine returns zero. If you ask about an architecturally 2539 * impossible pagesize, the routine will panic (so that the 2540 * hat implementor knows that things are inconsistent.) 2541 */ 2542 uint_t 2543 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2544 { 2545 struct cpuid_info *cpi; 2546 uint_t dtlb_nent = 0; 2547 2548 if (cpu == NULL) 2549 cpu = CPU; 2550 cpi = cpu->cpu_m.mcpu_cpi; 2551 2552 ASSERT(cpuid_checkpass(cpu, 1)); 2553 2554 /* 2555 * Check the L2 TLB info 2556 */ 2557 if (cpi->cpi_xmaxeax >= 0x80000006) { 2558 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2559 2560 switch (pagesize) { 2561 2562 case 4 * 1024: 2563 /* 2564 * All zero in the top 16 bits of the register 2565 * indicates a unified TLB. Size is in low 16 bits. 2566 */ 2567 if ((cp->cp_ebx & 0xffff0000) == 0) 2568 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2569 else 2570 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2571 break; 2572 2573 case 2 * 1024 * 1024: 2574 if ((cp->cp_eax & 0xffff0000) == 0) 2575 dtlb_nent = cp->cp_eax & 0x0000ffff; 2576 else 2577 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2578 break; 2579 2580 default: 2581 panic("unknown L2 pagesize"); 2582 /*NOTREACHED*/ 2583 } 2584 } 2585 2586 if (dtlb_nent != 0) 2587 return (dtlb_nent); 2588 2589 /* 2590 * No L2 TLB support for this size, try L1. 2591 */ 2592 if (cpi->cpi_xmaxeax >= 0x80000005) { 2593 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2594 2595 switch (pagesize) { 2596 case 4 * 1024: 2597 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2598 break; 2599 case 2 * 1024 * 1024: 2600 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2601 break; 2602 default: 2603 panic("unknown L1 d-TLB pagesize"); 2604 /*NOTREACHED*/ 2605 } 2606 } 2607 2608 return (dtlb_nent); 2609 } 2610 2611 /* 2612 * Return 0 if the erratum is not present or not applicable, positive 2613 * if it is, and negative if the status of the erratum is unknown. 2614 * 2615 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2616 * Processors" #25759, Rev 3.57, August 2005 2617 */ 2618 int 2619 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2620 { 2621 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2622 uint_t eax; 2623 2624 /* 2625 * Bail out if this CPU isn't an AMD CPU, or if it's 2626 * a legacy (32-bit) AMD CPU. 2627 */ 2628 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2629 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2630 cpi->cpi_family == 6) 2631 2632 return (0); 2633 2634 eax = cpi->cpi_std[1].cp_eax; 2635 2636 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2637 #define SH_B3(eax) (eax == 0xf51) 2638 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2639 2640 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2641 2642 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2643 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2644 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2645 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2646 2647 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2648 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2649 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2650 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2651 2652 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2653 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2654 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2655 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2656 #define BH_E4(eax) (eax == 0x20fb1) 2657 #define SH_E5(eax) (eax == 0x20f42) 2658 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2659 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2660 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2661 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2662 DH_E6(eax) || JH_E6(eax)) 2663 2664 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2665 #define DR_B0(eax) (eax == 0x100f20) 2666 #define DR_B1(eax) (eax == 0x100f21) 2667 #define DR_BA(eax) (eax == 0x100f2a) 2668 #define DR_B2(eax) (eax == 0x100f22) 2669 #define DR_B3(eax) (eax == 0x100f23) 2670 #define RB_C0(eax) (eax == 0x100f40) 2671 2672 switch (erratum) { 2673 case 1: 2674 return (cpi->cpi_family < 0x10); 2675 case 51: /* what does the asterisk mean? */ 2676 return (B(eax) || SH_C0(eax) || CG(eax)); 2677 case 52: 2678 return (B(eax)); 2679 case 57: 2680 return (cpi->cpi_family <= 0x11); 2681 case 58: 2682 return (B(eax)); 2683 case 60: 2684 return (cpi->cpi_family <= 0x11); 2685 case 61: 2686 case 62: 2687 case 63: 2688 case 64: 2689 case 65: 2690 case 66: 2691 case 68: 2692 case 69: 2693 case 70: 2694 case 71: 2695 return (B(eax)); 2696 case 72: 2697 return (SH_B0(eax)); 2698 case 74: 2699 return (B(eax)); 2700 case 75: 2701 return (cpi->cpi_family < 0x10); 2702 case 76: 2703 return (B(eax)); 2704 case 77: 2705 return (cpi->cpi_family <= 0x11); 2706 case 78: 2707 return (B(eax) || SH_C0(eax)); 2708 case 79: 2709 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2710 case 80: 2711 case 81: 2712 case 82: 2713 return (B(eax)); 2714 case 83: 2715 return (B(eax) || SH_C0(eax) || CG(eax)); 2716 case 85: 2717 return (cpi->cpi_family < 0x10); 2718 case 86: 2719 return (SH_C0(eax) || CG(eax)); 2720 case 88: 2721 #if !defined(__amd64) 2722 return (0); 2723 #else 2724 return (B(eax) || SH_C0(eax)); 2725 #endif 2726 case 89: 2727 return (cpi->cpi_family < 0x10); 2728 case 90: 2729 return (B(eax) || SH_C0(eax) || CG(eax)); 2730 case 91: 2731 case 92: 2732 return (B(eax) || SH_C0(eax)); 2733 case 93: 2734 return (SH_C0(eax)); 2735 case 94: 2736 return (B(eax) || SH_C0(eax) || CG(eax)); 2737 case 95: 2738 #if !defined(__amd64) 2739 return (0); 2740 #else 2741 return (B(eax) || SH_C0(eax)); 2742 #endif 2743 case 96: 2744 return (B(eax) || SH_C0(eax) || CG(eax)); 2745 case 97: 2746 case 98: 2747 return (SH_C0(eax) || CG(eax)); 2748 case 99: 2749 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2750 case 100: 2751 return (B(eax) || SH_C0(eax)); 2752 case 101: 2753 case 103: 2754 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2755 case 104: 2756 return (SH_C0(eax) || CG(eax) || D0(eax)); 2757 case 105: 2758 case 106: 2759 case 107: 2760 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2761 case 108: 2762 return (DH_CG(eax)); 2763 case 109: 2764 return (SH_C0(eax) || CG(eax) || D0(eax)); 2765 case 110: 2766 return (D0(eax) || EX(eax)); 2767 case 111: 2768 return (CG(eax)); 2769 case 112: 2770 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2771 case 113: 2772 return (eax == 0x20fc0); 2773 case 114: 2774 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2775 case 115: 2776 return (SH_E0(eax) || JH_E1(eax)); 2777 case 116: 2778 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2779 case 117: 2780 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2781 case 118: 2782 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2783 JH_E6(eax)); 2784 case 121: 2785 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2786 case 122: 2787 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2788 case 123: 2789 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2790 case 131: 2791 return (cpi->cpi_family < 0x10); 2792 case 6336786: 2793 /* 2794 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2795 * if this is a K8 family or newer processor 2796 */ 2797 if (CPI_FAMILY(cpi) == 0xf) { 2798 struct cpuid_regs regs; 2799 regs.cp_eax = 0x80000007; 2800 (void) __cpuid_insn(®s); 2801 return (!(regs.cp_edx & 0x100)); 2802 } 2803 return (0); 2804 case 6323525: 2805 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2806 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2807 2808 case 6671130: 2809 /* 2810 * check for processors (pre-Shanghai) that do not provide 2811 * optimal management of 1gb ptes in its tlb. 2812 */ 2813 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 2814 2815 case 298: 2816 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 2817 DR_B2(eax) || RB_C0(eax)); 2818 2819 default: 2820 return (-1); 2821 2822 } 2823 } 2824 2825 /* 2826 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 2827 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 2828 */ 2829 int 2830 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 2831 { 2832 struct cpuid_info *cpi; 2833 uint_t osvwid; 2834 static int osvwfeature = -1; 2835 uint64_t osvwlength; 2836 2837 2838 cpi = cpu->cpu_m.mcpu_cpi; 2839 2840 /* confirm OSVW supported */ 2841 if (osvwfeature == -1) { 2842 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 2843 } else { 2844 /* assert that osvw feature setting is consistent on all cpus */ 2845 ASSERT(osvwfeature == 2846 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 2847 } 2848 if (!osvwfeature) 2849 return (-1); 2850 2851 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 2852 2853 switch (erratum) { 2854 case 298: /* osvwid is 0 */ 2855 osvwid = 0; 2856 if (osvwlength <= (uint64_t)osvwid) { 2857 /* osvwid 0 is unknown */ 2858 return (-1); 2859 } 2860 2861 /* 2862 * Check the OSVW STATUS MSR to determine the state 2863 * of the erratum where: 2864 * 0 - fixed by HW 2865 * 1 - BIOS has applied the workaround when BIOS 2866 * workaround is available. (Or for other errata, 2867 * OS workaround is required.) 2868 * For a value of 1, caller will confirm that the 2869 * erratum 298 workaround has indeed been applied by BIOS. 2870 * 2871 * A 1 may be set in cpus that have a HW fix 2872 * in a mixed cpu system. Regarding erratum 298: 2873 * In a multiprocessor platform, the workaround above 2874 * should be applied to all processors regardless of 2875 * silicon revision when an affected processor is 2876 * present. 2877 */ 2878 2879 return (rdmsr(MSR_AMD_OSVW_STATUS + 2880 (osvwid / OSVW_ID_CNT_PER_MSR)) & 2881 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 2882 2883 default: 2884 return (-1); 2885 } 2886 } 2887 2888 static const char assoc_str[] = "associativity"; 2889 static const char line_str[] = "line-size"; 2890 static const char size_str[] = "size"; 2891 2892 static void 2893 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2894 uint32_t val) 2895 { 2896 char buf[128]; 2897 2898 /* 2899 * ndi_prop_update_int() is used because it is desirable for 2900 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2901 */ 2902 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2903 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2904 } 2905 2906 /* 2907 * Intel-style cache/tlb description 2908 * 2909 * Standard cpuid level 2 gives a randomly ordered 2910 * selection of tags that index into a table that describes 2911 * cache and tlb properties. 2912 */ 2913 2914 static const char l1_icache_str[] = "l1-icache"; 2915 static const char l1_dcache_str[] = "l1-dcache"; 2916 static const char l2_cache_str[] = "l2-cache"; 2917 static const char l3_cache_str[] = "l3-cache"; 2918 static const char itlb4k_str[] = "itlb-4K"; 2919 static const char dtlb4k_str[] = "dtlb-4K"; 2920 static const char itlb2M_str[] = "itlb-2M"; 2921 static const char itlb4M_str[] = "itlb-4M"; 2922 static const char dtlb4M_str[] = "dtlb-4M"; 2923 static const char dtlb24_str[] = "dtlb0-2M-4M"; 2924 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2925 static const char itlb24_str[] = "itlb-2M-4M"; 2926 static const char dtlb44_str[] = "dtlb-4K-4M"; 2927 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2928 static const char sl2_cache_str[] = "sectored-l2-cache"; 2929 static const char itrace_str[] = "itrace-cache"; 2930 static const char sl3_cache_str[] = "sectored-l3-cache"; 2931 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 2932 2933 static const struct cachetab { 2934 uint8_t ct_code; 2935 uint8_t ct_assoc; 2936 uint16_t ct_line_size; 2937 size_t ct_size; 2938 const char *ct_label; 2939 } intel_ctab[] = { 2940 /* 2941 * maintain descending order! 2942 * 2943 * Codes ignored - Reason 2944 * ---------------------- 2945 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 2946 * f0H/f1H - Currently we do not interpret prefetch size by design 2947 */ 2948 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 2949 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 2950 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 2951 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 2952 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 2953 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 2954 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 2955 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 2956 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 2957 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 2958 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 2959 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 2960 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 2961 { 0xc0, 4, 0, 8, dtlb44_str }, 2962 { 0xba, 4, 0, 64, dtlb4k_str }, 2963 { 0xb4, 4, 0, 256, dtlb4k_str }, 2964 { 0xb3, 4, 0, 128, dtlb4k_str }, 2965 { 0xb2, 4, 0, 64, itlb4k_str }, 2966 { 0xb0, 4, 0, 128, itlb4k_str }, 2967 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2968 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2969 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2970 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2971 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2972 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2973 { 0x80, 8, 64, 512*1024, l2_cache_str}, 2974 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2975 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2976 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2977 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2978 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2979 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2980 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2981 { 0x73, 8, 0, 64*1024, itrace_str}, 2982 { 0x72, 8, 0, 32*1024, itrace_str}, 2983 { 0x71, 8, 0, 16*1024, itrace_str}, 2984 { 0x70, 8, 0, 12*1024, itrace_str}, 2985 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2986 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2987 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2988 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2989 { 0x5d, 0, 0, 256, dtlb44_str}, 2990 { 0x5c, 0, 0, 128, dtlb44_str}, 2991 { 0x5b, 0, 0, 64, dtlb44_str}, 2992 { 0x5a, 4, 0, 32, dtlb24_str}, 2993 { 0x59, 0, 0, 16, dtlb4k_str}, 2994 { 0x57, 4, 0, 16, dtlb4k_str}, 2995 { 0x56, 4, 0, 16, dtlb4M_str}, 2996 { 0x55, 0, 0, 7, itlb24_str}, 2997 { 0x52, 0, 0, 256, itlb424_str}, 2998 { 0x51, 0, 0, 128, itlb424_str}, 2999 { 0x50, 0, 0, 64, itlb424_str}, 3000 { 0x4f, 0, 0, 32, itlb4k_str}, 3001 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3002 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3003 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3004 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3005 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3006 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3007 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3008 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3009 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3010 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3011 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3012 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3013 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3014 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3015 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3016 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3017 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3018 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3019 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3020 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3021 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3022 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3023 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3024 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3025 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3026 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3027 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3028 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3029 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3030 { 0x0b, 4, 0, 4, itlb4M_str}, 3031 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3032 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3033 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3034 { 0x05, 4, 0, 32, dtlb4M_str}, 3035 { 0x04, 4, 0, 8, dtlb4M_str}, 3036 { 0x03, 4, 0, 64, dtlb4k_str}, 3037 { 0x02, 4, 0, 2, itlb4M_str}, 3038 { 0x01, 4, 0, 32, itlb4k_str}, 3039 { 0 } 3040 }; 3041 3042 static const struct cachetab cyrix_ctab[] = { 3043 { 0x70, 4, 0, 32, "tlb-4K" }, 3044 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3045 { 0 } 3046 }; 3047 3048 /* 3049 * Search a cache table for a matching entry 3050 */ 3051 static const struct cachetab * 3052 find_cacheent(const struct cachetab *ct, uint_t code) 3053 { 3054 if (code != 0) { 3055 for (; ct->ct_code != 0; ct++) 3056 if (ct->ct_code <= code) 3057 break; 3058 if (ct->ct_code == code) 3059 return (ct); 3060 } 3061 return (NULL); 3062 } 3063 3064 /* 3065 * Populate cachetab entry with L2 or L3 cache-information using 3066 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3067 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3068 * information is found. 3069 */ 3070 static int 3071 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3072 { 3073 uint32_t level, i; 3074 int ret = 0; 3075 3076 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3077 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3078 3079 if (level == 2 || level == 3) { 3080 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3081 ct->ct_line_size = 3082 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3083 ct->ct_size = ct->ct_assoc * 3084 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3085 ct->ct_line_size * 3086 (cpi->cpi_std_4[i]->cp_ecx + 1); 3087 3088 if (level == 2) { 3089 ct->ct_label = l2_cache_str; 3090 } else if (level == 3) { 3091 ct->ct_label = l3_cache_str; 3092 } 3093 ret = 1; 3094 } 3095 } 3096 3097 return (ret); 3098 } 3099 3100 /* 3101 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3102 * The walk is terminated if the walker returns non-zero. 3103 */ 3104 static void 3105 intel_walk_cacheinfo(struct cpuid_info *cpi, 3106 void *arg, int (*func)(void *, const struct cachetab *)) 3107 { 3108 const struct cachetab *ct; 3109 struct cachetab des_49_ct, des_b1_ct; 3110 uint8_t *dp; 3111 int i; 3112 3113 if ((dp = cpi->cpi_cacheinfo) == NULL) 3114 return; 3115 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3116 /* 3117 * For overloaded descriptor 0x49 we use cpuid function 4 3118 * if supported by the current processor, to create 3119 * cache information. 3120 * For overloaded descriptor 0xb1 we use X86_PAE flag 3121 * to disambiguate the cache information. 3122 */ 3123 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3124 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3125 ct = &des_49_ct; 3126 } else if (*dp == 0xb1) { 3127 des_b1_ct.ct_code = 0xb1; 3128 des_b1_ct.ct_assoc = 4; 3129 des_b1_ct.ct_line_size = 0; 3130 if (x86_feature & X86_PAE) { 3131 des_b1_ct.ct_size = 8; 3132 des_b1_ct.ct_label = itlb2M_str; 3133 } else { 3134 des_b1_ct.ct_size = 4; 3135 des_b1_ct.ct_label = itlb4M_str; 3136 } 3137 ct = &des_b1_ct; 3138 } else { 3139 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3140 continue; 3141 } 3142 } 3143 3144 if (func(arg, ct) != 0) { 3145 break; 3146 } 3147 } 3148 } 3149 3150 /* 3151 * (Like the Intel one, except for Cyrix CPUs) 3152 */ 3153 static void 3154 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3155 void *arg, int (*func)(void *, const struct cachetab *)) 3156 { 3157 const struct cachetab *ct; 3158 uint8_t *dp; 3159 int i; 3160 3161 if ((dp = cpi->cpi_cacheinfo) == NULL) 3162 return; 3163 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3164 /* 3165 * Search Cyrix-specific descriptor table first .. 3166 */ 3167 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3168 if (func(arg, ct) != 0) 3169 break; 3170 continue; 3171 } 3172 /* 3173 * .. else fall back to the Intel one 3174 */ 3175 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3176 if (func(arg, ct) != 0) 3177 break; 3178 continue; 3179 } 3180 } 3181 } 3182 3183 /* 3184 * A cacheinfo walker that adds associativity, line-size, and size properties 3185 * to the devinfo node it is passed as an argument. 3186 */ 3187 static int 3188 add_cacheent_props(void *arg, const struct cachetab *ct) 3189 { 3190 dev_info_t *devi = arg; 3191 3192 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3193 if (ct->ct_line_size != 0) 3194 add_cache_prop(devi, ct->ct_label, line_str, 3195 ct->ct_line_size); 3196 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3197 return (0); 3198 } 3199 3200 3201 static const char fully_assoc[] = "fully-associative?"; 3202 3203 /* 3204 * AMD style cache/tlb description 3205 * 3206 * Extended functions 5 and 6 directly describe properties of 3207 * tlbs and various cache levels. 3208 */ 3209 static void 3210 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3211 { 3212 switch (assoc) { 3213 case 0: /* reserved; ignore */ 3214 break; 3215 default: 3216 add_cache_prop(devi, label, assoc_str, assoc); 3217 break; 3218 case 0xff: 3219 add_cache_prop(devi, label, fully_assoc, 1); 3220 break; 3221 } 3222 } 3223 3224 static void 3225 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3226 { 3227 if (size == 0) 3228 return; 3229 add_cache_prop(devi, label, size_str, size); 3230 add_amd_assoc(devi, label, assoc); 3231 } 3232 3233 static void 3234 add_amd_cache(dev_info_t *devi, const char *label, 3235 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3236 { 3237 if (size == 0 || line_size == 0) 3238 return; 3239 add_amd_assoc(devi, label, assoc); 3240 /* 3241 * Most AMD parts have a sectored cache. Multiple cache lines are 3242 * associated with each tag. A sector consists of all cache lines 3243 * associated with a tag. For example, the AMD K6-III has a sector 3244 * size of 2 cache lines per tag. 3245 */ 3246 if (lines_per_tag != 0) 3247 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3248 add_cache_prop(devi, label, line_str, line_size); 3249 add_cache_prop(devi, label, size_str, size * 1024); 3250 } 3251 3252 static void 3253 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3254 { 3255 switch (assoc) { 3256 case 0: /* off */ 3257 break; 3258 case 1: 3259 case 2: 3260 case 4: 3261 add_cache_prop(devi, label, assoc_str, assoc); 3262 break; 3263 case 6: 3264 add_cache_prop(devi, label, assoc_str, 8); 3265 break; 3266 case 8: 3267 add_cache_prop(devi, label, assoc_str, 16); 3268 break; 3269 case 0xf: 3270 add_cache_prop(devi, label, fully_assoc, 1); 3271 break; 3272 default: /* reserved; ignore */ 3273 break; 3274 } 3275 } 3276 3277 static void 3278 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3279 { 3280 if (size == 0 || assoc == 0) 3281 return; 3282 add_amd_l2_assoc(devi, label, assoc); 3283 add_cache_prop(devi, label, size_str, size); 3284 } 3285 3286 static void 3287 add_amd_l2_cache(dev_info_t *devi, const char *label, 3288 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3289 { 3290 if (size == 0 || assoc == 0 || line_size == 0) 3291 return; 3292 add_amd_l2_assoc(devi, label, assoc); 3293 if (lines_per_tag != 0) 3294 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3295 add_cache_prop(devi, label, line_str, line_size); 3296 add_cache_prop(devi, label, size_str, size * 1024); 3297 } 3298 3299 static void 3300 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3301 { 3302 struct cpuid_regs *cp; 3303 3304 if (cpi->cpi_xmaxeax < 0x80000005) 3305 return; 3306 cp = &cpi->cpi_extd[5]; 3307 3308 /* 3309 * 4M/2M L1 TLB configuration 3310 * 3311 * We report the size for 2M pages because AMD uses two 3312 * TLB entries for one 4M page. 3313 */ 3314 add_amd_tlb(devi, "dtlb-2M", 3315 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3316 add_amd_tlb(devi, "itlb-2M", 3317 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3318 3319 /* 3320 * 4K L1 TLB configuration 3321 */ 3322 3323 switch (cpi->cpi_vendor) { 3324 uint_t nentries; 3325 case X86_VENDOR_TM: 3326 if (cpi->cpi_family >= 5) { 3327 /* 3328 * Crusoe processors have 256 TLB entries, but 3329 * cpuid data format constrains them to only 3330 * reporting 255 of them. 3331 */ 3332 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3333 nentries = 256; 3334 /* 3335 * Crusoe processors also have a unified TLB 3336 */ 3337 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3338 nentries); 3339 break; 3340 } 3341 /*FALLTHROUGH*/ 3342 default: 3343 add_amd_tlb(devi, itlb4k_str, 3344 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3345 add_amd_tlb(devi, dtlb4k_str, 3346 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3347 break; 3348 } 3349 3350 /* 3351 * data L1 cache configuration 3352 */ 3353 3354 add_amd_cache(devi, l1_dcache_str, 3355 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3356 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3357 3358 /* 3359 * code L1 cache configuration 3360 */ 3361 3362 add_amd_cache(devi, l1_icache_str, 3363 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3364 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3365 3366 if (cpi->cpi_xmaxeax < 0x80000006) 3367 return; 3368 cp = &cpi->cpi_extd[6]; 3369 3370 /* Check for a unified L2 TLB for large pages */ 3371 3372 if (BITX(cp->cp_eax, 31, 16) == 0) 3373 add_amd_l2_tlb(devi, "l2-tlb-2M", 3374 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3375 else { 3376 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3377 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3378 add_amd_l2_tlb(devi, "l2-itlb-2M", 3379 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3380 } 3381 3382 /* Check for a unified L2 TLB for 4K pages */ 3383 3384 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3385 add_amd_l2_tlb(devi, "l2-tlb-4K", 3386 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3387 } else { 3388 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3389 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3390 add_amd_l2_tlb(devi, "l2-itlb-4K", 3391 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3392 } 3393 3394 add_amd_l2_cache(devi, l2_cache_str, 3395 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3396 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3397 } 3398 3399 /* 3400 * There are two basic ways that the x86 world describes it cache 3401 * and tlb architecture - Intel's way and AMD's way. 3402 * 3403 * Return which flavor of cache architecture we should use 3404 */ 3405 static int 3406 x86_which_cacheinfo(struct cpuid_info *cpi) 3407 { 3408 switch (cpi->cpi_vendor) { 3409 case X86_VENDOR_Intel: 3410 if (cpi->cpi_maxeax >= 2) 3411 return (X86_VENDOR_Intel); 3412 break; 3413 case X86_VENDOR_AMD: 3414 /* 3415 * The K5 model 1 was the first part from AMD that reported 3416 * cache sizes via extended cpuid functions. 3417 */ 3418 if (cpi->cpi_family > 5 || 3419 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3420 return (X86_VENDOR_AMD); 3421 break; 3422 case X86_VENDOR_TM: 3423 if (cpi->cpi_family >= 5) 3424 return (X86_VENDOR_AMD); 3425 /*FALLTHROUGH*/ 3426 default: 3427 /* 3428 * If they have extended CPU data for 0x80000005 3429 * then we assume they have AMD-format cache 3430 * information. 3431 * 3432 * If not, and the vendor happens to be Cyrix, 3433 * then try our-Cyrix specific handler. 3434 * 3435 * If we're not Cyrix, then assume we're using Intel's 3436 * table-driven format instead. 3437 */ 3438 if (cpi->cpi_xmaxeax >= 0x80000005) 3439 return (X86_VENDOR_AMD); 3440 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3441 return (X86_VENDOR_Cyrix); 3442 else if (cpi->cpi_maxeax >= 2) 3443 return (X86_VENDOR_Intel); 3444 break; 3445 } 3446 return (-1); 3447 } 3448 3449 /* 3450 * create a node for the given cpu under the prom root node. 3451 * Also, create a cpu node in the device tree. 3452 */ 3453 static dev_info_t *cpu_nex_devi = NULL; 3454 static kmutex_t cpu_node_lock; 3455 3456 /* 3457 * Called from post_startup() and mp_startup() 3458 */ 3459 void 3460 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3461 { 3462 dev_info_t *cpu_devi; 3463 int create; 3464 3465 mutex_enter(&cpu_node_lock); 3466 3467 /* 3468 * create a nexus node for all cpus identified as 'cpu_id' under 3469 * the root node. 3470 */ 3471 if (cpu_nex_devi == NULL) { 3472 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3473 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3474 mutex_exit(&cpu_node_lock); 3475 return; 3476 } 3477 (void) ndi_devi_online(cpu_nex_devi, 0); 3478 } 3479 3480 /* 3481 * create a child node for cpu identified as 'cpu_id' 3482 */ 3483 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3484 cpu_id); 3485 if (cpu_devi == NULL) { 3486 mutex_exit(&cpu_node_lock); 3487 return; 3488 } 3489 3490 /* device_type */ 3491 3492 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3493 "device_type", "cpu"); 3494 3495 /* reg */ 3496 3497 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3498 "reg", cpu_id); 3499 3500 /* cpu-mhz, and clock-frequency */ 3501 3502 if (cpu_freq > 0) { 3503 long long mul; 3504 3505 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3506 "cpu-mhz", cpu_freq); 3507 3508 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3509 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3510 "clock-frequency", (int)mul); 3511 } 3512 3513 (void) ndi_devi_online(cpu_devi, 0); 3514 3515 if ((x86_feature & X86_CPUID) == 0) { 3516 mutex_exit(&cpu_node_lock); 3517 return; 3518 } 3519 3520 /* vendor-id */ 3521 3522 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3523 "vendor-id", cpi->cpi_vendorstr); 3524 3525 if (cpi->cpi_maxeax == 0) { 3526 mutex_exit(&cpu_node_lock); 3527 return; 3528 } 3529 3530 /* 3531 * family, model, and step 3532 */ 3533 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3534 "family", CPI_FAMILY(cpi)); 3535 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3536 "cpu-model", CPI_MODEL(cpi)); 3537 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3538 "stepping-id", CPI_STEP(cpi)); 3539 3540 /* type */ 3541 3542 switch (cpi->cpi_vendor) { 3543 case X86_VENDOR_Intel: 3544 create = 1; 3545 break; 3546 default: 3547 create = 0; 3548 break; 3549 } 3550 if (create) 3551 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3552 "type", CPI_TYPE(cpi)); 3553 3554 /* ext-family */ 3555 3556 switch (cpi->cpi_vendor) { 3557 case X86_VENDOR_Intel: 3558 case X86_VENDOR_AMD: 3559 create = cpi->cpi_family >= 0xf; 3560 break; 3561 default: 3562 create = 0; 3563 break; 3564 } 3565 if (create) 3566 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3567 "ext-family", CPI_FAMILY_XTD(cpi)); 3568 3569 /* ext-model */ 3570 3571 switch (cpi->cpi_vendor) { 3572 case X86_VENDOR_Intel: 3573 create = IS_EXTENDED_MODEL_INTEL(cpi); 3574 break; 3575 case X86_VENDOR_AMD: 3576 create = CPI_FAMILY(cpi) == 0xf; 3577 break; 3578 default: 3579 create = 0; 3580 break; 3581 } 3582 if (create) 3583 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3584 "ext-model", CPI_MODEL_XTD(cpi)); 3585 3586 /* generation */ 3587 3588 switch (cpi->cpi_vendor) { 3589 case X86_VENDOR_AMD: 3590 /* 3591 * AMD K5 model 1 was the first part to support this 3592 */ 3593 create = cpi->cpi_xmaxeax >= 0x80000001; 3594 break; 3595 default: 3596 create = 0; 3597 break; 3598 } 3599 if (create) 3600 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3601 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3602 3603 /* brand-id */ 3604 3605 switch (cpi->cpi_vendor) { 3606 case X86_VENDOR_Intel: 3607 /* 3608 * brand id first appeared on Pentium III Xeon model 8, 3609 * and Celeron model 8 processors and Opteron 3610 */ 3611 create = cpi->cpi_family > 6 || 3612 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3613 break; 3614 case X86_VENDOR_AMD: 3615 create = cpi->cpi_family >= 0xf; 3616 break; 3617 default: 3618 create = 0; 3619 break; 3620 } 3621 if (create && cpi->cpi_brandid != 0) { 3622 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3623 "brand-id", cpi->cpi_brandid); 3624 } 3625 3626 /* chunks, and apic-id */ 3627 3628 switch (cpi->cpi_vendor) { 3629 /* 3630 * first available on Pentium IV and Opteron (K8) 3631 */ 3632 case X86_VENDOR_Intel: 3633 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3634 break; 3635 case X86_VENDOR_AMD: 3636 create = cpi->cpi_family >= 0xf; 3637 break; 3638 default: 3639 create = 0; 3640 break; 3641 } 3642 if (create) { 3643 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3644 "chunks", CPI_CHUNKS(cpi)); 3645 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3646 "apic-id", cpi->cpi_apicid); 3647 if (cpi->cpi_chipid >= 0) { 3648 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3649 "chip#", cpi->cpi_chipid); 3650 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3651 "clog#", cpi->cpi_clogid); 3652 } 3653 } 3654 3655 /* cpuid-features */ 3656 3657 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3658 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3659 3660 3661 /* cpuid-features-ecx */ 3662 3663 switch (cpi->cpi_vendor) { 3664 case X86_VENDOR_Intel: 3665 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3666 break; 3667 default: 3668 create = 0; 3669 break; 3670 } 3671 if (create) 3672 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3673 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3674 3675 /* ext-cpuid-features */ 3676 3677 switch (cpi->cpi_vendor) { 3678 case X86_VENDOR_Intel: 3679 case X86_VENDOR_AMD: 3680 case X86_VENDOR_Cyrix: 3681 case X86_VENDOR_TM: 3682 case X86_VENDOR_Centaur: 3683 create = cpi->cpi_xmaxeax >= 0x80000001; 3684 break; 3685 default: 3686 create = 0; 3687 break; 3688 } 3689 if (create) { 3690 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3691 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3692 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3693 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3694 } 3695 3696 /* 3697 * Brand String first appeared in Intel Pentium IV, AMD K5 3698 * model 1, and Cyrix GXm. On earlier models we try and 3699 * simulate something similar .. so this string should always 3700 * same -something- about the processor, however lame. 3701 */ 3702 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3703 "brand-string", cpi->cpi_brandstr); 3704 3705 /* 3706 * Finally, cache and tlb information 3707 */ 3708 switch (x86_which_cacheinfo(cpi)) { 3709 case X86_VENDOR_Intel: 3710 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3711 break; 3712 case X86_VENDOR_Cyrix: 3713 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3714 break; 3715 case X86_VENDOR_AMD: 3716 amd_cache_info(cpi, cpu_devi); 3717 break; 3718 default: 3719 break; 3720 } 3721 3722 mutex_exit(&cpu_node_lock); 3723 } 3724 3725 struct l2info { 3726 int *l2i_csz; 3727 int *l2i_lsz; 3728 int *l2i_assoc; 3729 int l2i_ret; 3730 }; 3731 3732 /* 3733 * A cacheinfo walker that fetches the size, line-size and associativity 3734 * of the L2 cache 3735 */ 3736 static int 3737 intel_l2cinfo(void *arg, const struct cachetab *ct) 3738 { 3739 struct l2info *l2i = arg; 3740 int *ip; 3741 3742 if (ct->ct_label != l2_cache_str && 3743 ct->ct_label != sl2_cache_str) 3744 return (0); /* not an L2 -- keep walking */ 3745 3746 if ((ip = l2i->l2i_csz) != NULL) 3747 *ip = ct->ct_size; 3748 if ((ip = l2i->l2i_lsz) != NULL) 3749 *ip = ct->ct_line_size; 3750 if ((ip = l2i->l2i_assoc) != NULL) 3751 *ip = ct->ct_assoc; 3752 l2i->l2i_ret = ct->ct_size; 3753 return (1); /* was an L2 -- terminate walk */ 3754 } 3755 3756 /* 3757 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3758 * 3759 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3760 * value is the associativity, the associativity for the L2 cache and 3761 * tlb is encoded in the following table. The 4 bit L2 value serves as 3762 * an index into the amd_afd[] array to determine the associativity. 3763 * -1 is undefined. 0 is fully associative. 3764 */ 3765 3766 static int amd_afd[] = 3767 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3768 3769 static void 3770 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3771 { 3772 struct cpuid_regs *cp; 3773 uint_t size, assoc; 3774 int i; 3775 int *ip; 3776 3777 if (cpi->cpi_xmaxeax < 0x80000006) 3778 return; 3779 cp = &cpi->cpi_extd[6]; 3780 3781 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3782 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3783 uint_t cachesz = size * 1024; 3784 assoc = amd_afd[i]; 3785 3786 ASSERT(assoc != -1); 3787 3788 if ((ip = l2i->l2i_csz) != NULL) 3789 *ip = cachesz; 3790 if ((ip = l2i->l2i_lsz) != NULL) 3791 *ip = BITX(cp->cp_ecx, 7, 0); 3792 if ((ip = l2i->l2i_assoc) != NULL) 3793 *ip = assoc; 3794 l2i->l2i_ret = cachesz; 3795 } 3796 } 3797 3798 int 3799 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3800 { 3801 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3802 struct l2info __l2info, *l2i = &__l2info; 3803 3804 l2i->l2i_csz = csz; 3805 l2i->l2i_lsz = lsz; 3806 l2i->l2i_assoc = assoc; 3807 l2i->l2i_ret = -1; 3808 3809 switch (x86_which_cacheinfo(cpi)) { 3810 case X86_VENDOR_Intel: 3811 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3812 break; 3813 case X86_VENDOR_Cyrix: 3814 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3815 break; 3816 case X86_VENDOR_AMD: 3817 amd_l2cacheinfo(cpi, l2i); 3818 break; 3819 default: 3820 break; 3821 } 3822 return (l2i->l2i_ret); 3823 } 3824 3825 #if !defined(__xpv) 3826 3827 uint32_t * 3828 cpuid_mwait_alloc(cpu_t *cpu) 3829 { 3830 uint32_t *ret; 3831 size_t mwait_size; 3832 3833 ASSERT(cpuid_checkpass(cpu, 2)); 3834 3835 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3836 if (mwait_size == 0) 3837 return (NULL); 3838 3839 /* 3840 * kmem_alloc() returns cache line size aligned data for mwait_size 3841 * allocations. mwait_size is currently cache line sized. Neither 3842 * of these implementation details are guarantied to be true in the 3843 * future. 3844 * 3845 * First try allocating mwait_size as kmem_alloc() currently returns 3846 * correctly aligned memory. If kmem_alloc() does not return 3847 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3848 * 3849 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3850 * decide to free this memory. 3851 */ 3852 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3853 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3854 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3855 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3856 *ret = MWAIT_RUNNING; 3857 return (ret); 3858 } else { 3859 kmem_free(ret, mwait_size); 3860 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3861 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3862 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3863 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3864 *ret = MWAIT_RUNNING; 3865 return (ret); 3866 } 3867 } 3868 3869 void 3870 cpuid_mwait_free(cpu_t *cpu) 3871 { 3872 ASSERT(cpuid_checkpass(cpu, 2)); 3873 3874 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3875 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3876 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3877 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3878 } 3879 3880 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3881 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3882 } 3883 3884 void 3885 patch_tsc_read(int flag) 3886 { 3887 size_t cnt; 3888 3889 switch (flag) { 3890 case X86_NO_TSC: 3891 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 3892 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 3893 break; 3894 case X86_HAVE_TSCP: 3895 cnt = &_tscp_end - &_tscp_start; 3896 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 3897 break; 3898 case X86_TSC_MFENCE: 3899 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 3900 (void) memcpy((void *)tsc_read, 3901 (void *)&_tsc_mfence_start, cnt); 3902 break; 3903 case X86_TSC_LFENCE: 3904 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 3905 (void) memcpy((void *)tsc_read, 3906 (void *)&_tsc_lfence_start, cnt); 3907 break; 3908 default: 3909 break; 3910 } 3911 } 3912 3913 int 3914 cpuid_deep_cstates_supported(void) 3915 { 3916 struct cpuid_info *cpi; 3917 struct cpuid_regs regs; 3918 3919 ASSERT(cpuid_checkpass(CPU, 1)); 3920 3921 cpi = CPU->cpu_m.mcpu_cpi; 3922 3923 if (!(x86_feature & X86_CPUID)) 3924 return (0); 3925 3926 switch (cpi->cpi_vendor) { 3927 case X86_VENDOR_Intel: 3928 if (cpi->cpi_xmaxeax < 0x80000007) 3929 return (0); 3930 3931 /* 3932 * TSC run at a constant rate in all ACPI C-states? 3933 */ 3934 regs.cp_eax = 0x80000007; 3935 (void) __cpuid_insn(®s); 3936 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 3937 3938 default: 3939 return (0); 3940 } 3941 } 3942 3943 #endif /* !__xpv */ 3944 3945 void 3946 post_startup_cpu_fixups(void) 3947 { 3948 #ifndef __xpv 3949 /* 3950 * Some AMD processors support C1E state. Entering this state will 3951 * cause the local APIC timer to stop, which we can't deal with at 3952 * this time. 3953 */ 3954 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 3955 on_trap_data_t otd; 3956 uint64_t reg; 3957 3958 if (!on_trap(&otd, OT_DATA_ACCESS)) { 3959 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 3960 /* Disable C1E state if it is enabled by BIOS */ 3961 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 3962 AMD_ACTONCMPHALT_MASK) { 3963 reg &= ~(AMD_ACTONCMPHALT_MASK << 3964 AMD_ACTONCMPHALT_SHIFT); 3965 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 3966 } 3967 } 3968 no_trap(); 3969 } 3970 #endif /* !__xpv */ 3971 } 3972 3973 /* 3974 * Starting with the Westmere processor the local 3975 * APIC timer will continue running in all C-states, 3976 * including the deepest C-states. 3977 */ 3978 int 3979 cpuid_arat_supported(void) 3980 { 3981 struct cpuid_info *cpi; 3982 struct cpuid_regs regs; 3983 3984 ASSERT(cpuid_checkpass(CPU, 1)); 3985 ASSERT(x86_feature & X86_CPUID); 3986 3987 cpi = CPU->cpu_m.mcpu_cpi; 3988 3989 switch (cpi->cpi_vendor) { 3990 case X86_VENDOR_Intel: 3991 /* 3992 * Always-running Local APIC Timer is 3993 * indicated by CPUID.6.EAX[2]. 3994 */ 3995 if (cpi->cpi_maxeax >= 6) { 3996 regs.cp_eax = 6; 3997 (void) cpuid_insn(NULL, ®s); 3998 return (regs.cp_eax & CPUID_CSTATE_ARAT); 3999 } else { 4000 return (0); 4001 } 4002 default: 4003 return (0); 4004 } 4005 } 4006 4007 #if defined(__amd64) && !defined(__xpv) 4008 /* 4009 * Patch in versions of bcopy for high performance Intel Nhm processors 4010 * and later... 4011 */ 4012 void 4013 patch_memops(uint_t vendor) 4014 { 4015 size_t cnt, i; 4016 caddr_t to, from; 4017 4018 if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) { 4019 cnt = &bcopy_patch_end - &bcopy_patch_start; 4020 to = &bcopy_ck_size; 4021 from = &bcopy_patch_start; 4022 for (i = 0; i < cnt; i++) { 4023 *to++ = *from++; 4024 } 4025 } 4026 } 4027 #endif /* __amd64 && !__xpv */ 4028