1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Various routines to handle identification 28 * and classification of x86 processors. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/archsystm.h> 33 #include <sys/x86_archext.h> 34 #include <sys/kmem.h> 35 #include <sys/systm.h> 36 #include <sys/cmn_err.h> 37 #include <sys/sunddi.h> 38 #include <sys/sunndi.h> 39 #include <sys/cpuvar.h> 40 #include <sys/processor.h> 41 #include <sys/sysmacros.h> 42 #include <sys/pg.h> 43 #include <sys/fp.h> 44 #include <sys/controlregs.h> 45 #include <sys/auxv_386.h> 46 #include <sys/bitmap.h> 47 #include <sys/memnode.h> 48 49 #ifdef __xpv 50 #include <sys/hypervisor.h> 51 #endif 52 53 /* 54 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 55 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 56 * them accordingly. For most modern processors, feature detection occurs here 57 * in pass 1. 58 * 59 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 60 * for the boot CPU and does the basic analysis that the early kernel needs. 61 * x86_feature is set based on the return value of cpuid_pass1() of the boot 62 * CPU. 63 * 64 * Pass 1 includes: 65 * 66 * o Determining vendor/model/family/stepping and setting x86_type and 67 * x86_vendor accordingly. 68 * o Processing the feature flags returned by the cpuid instruction while 69 * applying any workarounds or tricks for the specific processor. 70 * o Mapping the feature flags into Solaris feature bits (X86_*). 71 * o Processing extended feature flags if supported by the processor, 72 * again while applying specific processor knowledge. 73 * o Determining the CMT characteristics of the system. 74 * 75 * Pass 1 is done on non-boot CPUs during their initialization and the results 76 * are used only as a meager attempt at ensuring that all processors within the 77 * system support the same features. 78 * 79 * Pass 2 of cpuid feature analysis happens just at the beginning 80 * of startup(). It just copies in and corrects the remainder 81 * of the cpuid data we depend on: standard cpuid functions that we didn't 82 * need for pass1 feature analysis, and extended cpuid functions beyond the 83 * simple feature processing done in pass1. 84 * 85 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 86 * particular kernel memory allocation has been made available. It creates a 87 * readable brand string based on the data collected in the first two passes. 88 * 89 * Pass 4 of cpuid analysis is invoked after post_startup() when all 90 * the support infrastructure for various hardware features has been 91 * initialized. It determines which processor features will be reported 92 * to userland via the aux vector. 93 * 94 * All passes are executed on all CPUs, but only the boot CPU determines what 95 * features the kernel will use. 96 * 97 * Much of the worst junk in this file is for the support of processors 98 * that didn't really implement the cpuid instruction properly. 99 * 100 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 101 * the pass numbers. Accordingly, changes to the pass code may require changes 102 * to the accessor code. 103 */ 104 105 uint_t x86_feature = 0; 106 uint_t x86_vendor = X86_VENDOR_IntelClone; 107 uint_t x86_type = X86_TYPE_OTHER; 108 uint_t x86_clflush_size = 0; 109 110 uint_t pentiumpro_bug4046376; 111 uint_t pentiumpro_bug4064495; 112 113 uint_t enable486; 114 115 /* 116 * monitor/mwait info. 117 * 118 * size_actual and buf_actual are the real address and size allocated to get 119 * proper mwait_buf alignement. buf_actual and size_actual should be passed 120 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 121 * processor cache-line alignment, but this is not guarantied in the furture. 122 */ 123 struct mwait_info { 124 size_t mon_min; /* min size to avoid missed wakeups */ 125 size_t mon_max; /* size to avoid false wakeups */ 126 size_t size_actual; /* size actually allocated */ 127 void *buf_actual; /* memory actually allocated */ 128 uint32_t support; /* processor support of monitor/mwait */ 129 }; 130 131 /* 132 * These constants determine how many of the elements of the 133 * cpuid we cache in the cpuid_info data structure; the 134 * remaining elements are accessible via the cpuid instruction. 135 */ 136 137 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 138 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 139 140 struct cpuid_info { 141 uint_t cpi_pass; /* last pass completed */ 142 /* 143 * standard function information 144 */ 145 uint_t cpi_maxeax; /* fn 0: %eax */ 146 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 147 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 148 149 uint_t cpi_family; /* fn 1: extended family */ 150 uint_t cpi_model; /* fn 1: extended model */ 151 uint_t cpi_step; /* fn 1: stepping */ 152 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 153 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 154 int cpi_clogid; /* fn 1: %ebx: thread # */ 155 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 156 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 157 uint_t cpi_ncache; /* fn 2: number of elements */ 158 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 159 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 160 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 161 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 162 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 163 /* 164 * extended function information 165 */ 166 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 167 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 168 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 169 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 170 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 171 id_t cpi_coreid; /* same coreid => strands share core */ 172 int cpi_pkgcoreid; /* core number within single package */ 173 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 174 /* Intel: fn 4: %eax[31-26] */ 175 /* 176 * supported feature information 177 */ 178 uint32_t cpi_support[5]; 179 #define STD_EDX_FEATURES 0 180 #define AMD_EDX_FEATURES 1 181 #define TM_EDX_FEATURES 2 182 #define STD_ECX_FEATURES 3 183 #define AMD_ECX_FEATURES 4 184 /* 185 * Synthesized information, where known. 186 */ 187 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 188 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 189 uint32_t cpi_socket; /* Chip package/socket type */ 190 191 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 192 uint32_t cpi_apicid; 193 }; 194 195 196 static struct cpuid_info cpuid_info0; 197 198 /* 199 * These bit fields are defined by the Intel Application Note AP-485 200 * "Intel Processor Identification and the CPUID Instruction" 201 */ 202 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 203 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 204 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 205 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 206 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 207 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 208 209 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 210 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 211 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 212 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 213 214 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 215 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 216 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 217 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 218 219 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 220 #define CPI_XMAXEAX_MAX 0x80000100 221 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 222 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 223 224 /* 225 * Function 4 (Deterministic Cache Parameters) macros 226 * Defined by Intel Application Note AP-485 227 */ 228 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 229 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 230 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 231 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 232 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 233 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 234 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 235 236 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 237 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 238 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 239 240 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 241 242 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 243 244 245 /* 246 * A couple of shorthand macros to identify "later" P6-family chips 247 * like the Pentium M and Core. First, the "older" P6-based stuff 248 * (loosely defined as "pre-Pentium-4"): 249 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 250 */ 251 252 #define IS_LEGACY_P6(cpi) ( \ 253 cpi->cpi_family == 6 && \ 254 (cpi->cpi_model == 1 || \ 255 cpi->cpi_model == 3 || \ 256 cpi->cpi_model == 5 || \ 257 cpi->cpi_model == 6 || \ 258 cpi->cpi_model == 7 || \ 259 cpi->cpi_model == 8 || \ 260 cpi->cpi_model == 0xA || \ 261 cpi->cpi_model == 0xB) \ 262 ) 263 264 /* A "new F6" is everything with family 6 that's not the above */ 265 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 266 267 /* Extended family/model support */ 268 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 269 cpi->cpi_family >= 0xf) 270 271 /* 272 * Info for monitor/mwait idle loop. 273 * 274 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 275 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 276 * 2006. 277 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 278 * Documentation Updates" #33633, Rev 2.05, December 2006. 279 */ 280 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 281 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 282 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 283 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 284 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 285 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 286 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 287 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 288 /* 289 * Number of sub-cstates for a given c-state. 290 */ 291 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 292 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 293 294 /* 295 * Functions we consune from cpuid_subr.c; don't publish these in a header 296 * file to try and keep people using the expected cpuid_* interfaces. 297 */ 298 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 299 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 300 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 301 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 302 303 /* 304 * Apply up various platform-dependent restrictions where the 305 * underlying platform restrictions mean the CPU can be marked 306 * as less capable than its cpuid instruction would imply. 307 */ 308 #if defined(__xpv) 309 static void 310 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 311 { 312 switch (eax) { 313 case 1: { 314 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 315 0 : CPUID_INTC_EDX_MCA; 316 cp->cp_edx &= 317 ~(mcamask | 318 CPUID_INTC_EDX_PSE | 319 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 320 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 321 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 322 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 323 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 324 break; 325 } 326 327 case 0x80000001: 328 cp->cp_edx &= 329 ~(CPUID_AMD_EDX_PSE | 330 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 331 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 332 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 333 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 334 CPUID_AMD_EDX_TSCP); 335 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 336 break; 337 default: 338 break; 339 } 340 341 switch (vendor) { 342 case X86_VENDOR_Intel: 343 switch (eax) { 344 case 4: 345 /* 346 * Zero out the (ncores-per-chip - 1) field 347 */ 348 cp->cp_eax &= 0x03fffffff; 349 break; 350 default: 351 break; 352 } 353 break; 354 case X86_VENDOR_AMD: 355 switch (eax) { 356 case 0x80000008: 357 /* 358 * Zero out the (ncores-per-chip - 1) field 359 */ 360 cp->cp_ecx &= 0xffffff00; 361 break; 362 default: 363 break; 364 } 365 break; 366 default: 367 break; 368 } 369 } 370 #else 371 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 372 #endif 373 374 /* 375 * Some undocumented ways of patching the results of the cpuid 376 * instruction to permit running Solaris 10 on future cpus that 377 * we don't currently support. Could be set to non-zero values 378 * via settings in eeprom. 379 */ 380 381 uint32_t cpuid_feature_ecx_include; 382 uint32_t cpuid_feature_ecx_exclude; 383 uint32_t cpuid_feature_edx_include; 384 uint32_t cpuid_feature_edx_exclude; 385 386 void 387 cpuid_alloc_space(cpu_t *cpu) 388 { 389 /* 390 * By convention, cpu0 is the boot cpu, which is set up 391 * before memory allocation is available. All other cpus get 392 * their cpuid_info struct allocated here. 393 */ 394 ASSERT(cpu->cpu_id != 0); 395 cpu->cpu_m.mcpu_cpi = 396 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 397 } 398 399 void 400 cpuid_free_space(cpu_t *cpu) 401 { 402 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 403 int i; 404 405 ASSERT(cpu->cpu_id != 0); 406 407 /* 408 * Free up any function 4 related dynamic storage 409 */ 410 for (i = 1; i < cpi->cpi_std_4_size; i++) 411 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 412 if (cpi->cpi_std_4_size > 0) 413 kmem_free(cpi->cpi_std_4, 414 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 415 416 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 417 } 418 419 #if !defined(__xpv) 420 421 static void 422 check_for_hvm() 423 { 424 struct cpuid_regs cp; 425 char *xen_str; 426 uint32_t xen_signature[4]; 427 extern int xpv_is_hvm; 428 429 /* 430 * In a fully virtualized domain, Xen's pseudo-cpuid function 431 * 0x40000000 returns a string representing the Xen signature in 432 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 433 * function. 434 */ 435 cp.cp_eax = 0x40000000; 436 (void) __cpuid_insn(&cp); 437 xen_signature[0] = cp.cp_ebx; 438 xen_signature[1] = cp.cp_ecx; 439 xen_signature[2] = cp.cp_edx; 440 xen_signature[3] = 0; 441 xen_str = (char *)xen_signature; 442 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) 443 xpv_is_hvm = 1; 444 } 445 #endif /* __xpv */ 446 447 uint_t 448 cpuid_pass1(cpu_t *cpu) 449 { 450 uint32_t mask_ecx, mask_edx; 451 uint_t feature = X86_CPUID; 452 struct cpuid_info *cpi; 453 struct cpuid_regs *cp; 454 int xcpuid; 455 #if !defined(__xpv) 456 extern int idle_cpu_prefer_mwait; 457 #endif 458 459 /* 460 * Space statically allocated for cpu0, ensure pointer is set 461 */ 462 if (cpu->cpu_id == 0) 463 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 464 cpi = cpu->cpu_m.mcpu_cpi; 465 ASSERT(cpi != NULL); 466 cp = &cpi->cpi_std[0]; 467 cp->cp_eax = 0; 468 cpi->cpi_maxeax = __cpuid_insn(cp); 469 { 470 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 471 *iptr++ = cp->cp_ebx; 472 *iptr++ = cp->cp_edx; 473 *iptr++ = cp->cp_ecx; 474 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 475 } 476 477 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 478 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 479 480 /* 481 * Limit the range in case of weird hardware 482 */ 483 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 484 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 485 if (cpi->cpi_maxeax < 1) 486 goto pass1_done; 487 488 cp = &cpi->cpi_std[1]; 489 cp->cp_eax = 1; 490 (void) __cpuid_insn(cp); 491 492 /* 493 * Extract identifying constants for easy access. 494 */ 495 cpi->cpi_model = CPI_MODEL(cpi); 496 cpi->cpi_family = CPI_FAMILY(cpi); 497 498 if (cpi->cpi_family == 0xf) 499 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 500 501 /* 502 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 503 * Intel, and presumably everyone else, uses model == 0xf, as 504 * one would expect (max value means possible overflow). Sigh. 505 */ 506 507 switch (cpi->cpi_vendor) { 508 case X86_VENDOR_Intel: 509 if (IS_EXTENDED_MODEL_INTEL(cpi)) 510 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 511 break; 512 case X86_VENDOR_AMD: 513 if (CPI_FAMILY(cpi) == 0xf) 514 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 515 break; 516 default: 517 if (cpi->cpi_model == 0xf) 518 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 519 break; 520 } 521 522 cpi->cpi_step = CPI_STEP(cpi); 523 cpi->cpi_brandid = CPI_BRANDID(cpi); 524 525 /* 526 * *default* assumptions: 527 * - believe %edx feature word 528 * - ignore %ecx feature word 529 * - 32-bit virtual and physical addressing 530 */ 531 mask_edx = 0xffffffff; 532 mask_ecx = 0; 533 534 cpi->cpi_pabits = cpi->cpi_vabits = 32; 535 536 switch (cpi->cpi_vendor) { 537 case X86_VENDOR_Intel: 538 if (cpi->cpi_family == 5) 539 x86_type = X86_TYPE_P5; 540 else if (IS_LEGACY_P6(cpi)) { 541 x86_type = X86_TYPE_P6; 542 pentiumpro_bug4046376 = 1; 543 pentiumpro_bug4064495 = 1; 544 /* 545 * Clear the SEP bit when it was set erroneously 546 */ 547 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 548 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 549 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 550 x86_type = X86_TYPE_P4; 551 /* 552 * We don't currently depend on any of the %ecx 553 * features until Prescott, so we'll only check 554 * this from P4 onwards. We might want to revisit 555 * that idea later. 556 */ 557 mask_ecx = 0xffffffff; 558 } else if (cpi->cpi_family > 0xf) 559 mask_ecx = 0xffffffff; 560 /* 561 * We don't support MONITOR/MWAIT if leaf 5 is not available 562 * to obtain the monitor linesize. 563 */ 564 if (cpi->cpi_maxeax < 5) 565 mask_ecx &= ~CPUID_INTC_ECX_MON; 566 break; 567 case X86_VENDOR_IntelClone: 568 default: 569 break; 570 case X86_VENDOR_AMD: 571 #if defined(OPTERON_ERRATUM_108) 572 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 573 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 574 cpi->cpi_model = 0xc; 575 } else 576 #endif 577 if (cpi->cpi_family == 5) { 578 /* 579 * AMD K5 and K6 580 * 581 * These CPUs have an incomplete implementation 582 * of MCA/MCE which we mask away. 583 */ 584 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 585 586 /* 587 * Model 0 uses the wrong (APIC) bit 588 * to indicate PGE. Fix it here. 589 */ 590 if (cpi->cpi_model == 0) { 591 if (cp->cp_edx & 0x200) { 592 cp->cp_edx &= ~0x200; 593 cp->cp_edx |= CPUID_INTC_EDX_PGE; 594 } 595 } 596 597 /* 598 * Early models had problems w/ MMX; disable. 599 */ 600 if (cpi->cpi_model < 6) 601 mask_edx &= ~CPUID_INTC_EDX_MMX; 602 } 603 604 /* 605 * For newer families, SSE3 and CX16, at least, are valid; 606 * enable all 607 */ 608 if (cpi->cpi_family >= 0xf) 609 mask_ecx = 0xffffffff; 610 /* 611 * We don't support MONITOR/MWAIT if leaf 5 is not available 612 * to obtain the monitor linesize. 613 */ 614 if (cpi->cpi_maxeax < 5) 615 mask_ecx &= ~CPUID_INTC_ECX_MON; 616 617 #if !defined(__xpv) 618 /* 619 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 620 * processors. AMD does not intend MWAIT to be used in the cpu 621 * idle loop on current and future processors. 10h and future 622 * AMD processors use more power in MWAIT than HLT. 623 * Pre-family-10h Opterons do not have the MWAIT instruction. 624 */ 625 idle_cpu_prefer_mwait = 0; 626 #endif 627 628 break; 629 case X86_VENDOR_TM: 630 /* 631 * workaround the NT workaround in CMS 4.1 632 */ 633 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 634 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 635 cp->cp_edx |= CPUID_INTC_EDX_CX8; 636 break; 637 case X86_VENDOR_Centaur: 638 /* 639 * workaround the NT workarounds again 640 */ 641 if (cpi->cpi_family == 6) 642 cp->cp_edx |= CPUID_INTC_EDX_CX8; 643 break; 644 case X86_VENDOR_Cyrix: 645 /* 646 * We rely heavily on the probing in locore 647 * to actually figure out what parts, if any, 648 * of the Cyrix cpuid instruction to believe. 649 */ 650 switch (x86_type) { 651 case X86_TYPE_CYRIX_486: 652 mask_edx = 0; 653 break; 654 case X86_TYPE_CYRIX_6x86: 655 mask_edx = 0; 656 break; 657 case X86_TYPE_CYRIX_6x86L: 658 mask_edx = 659 CPUID_INTC_EDX_DE | 660 CPUID_INTC_EDX_CX8; 661 break; 662 case X86_TYPE_CYRIX_6x86MX: 663 mask_edx = 664 CPUID_INTC_EDX_DE | 665 CPUID_INTC_EDX_MSR | 666 CPUID_INTC_EDX_CX8 | 667 CPUID_INTC_EDX_PGE | 668 CPUID_INTC_EDX_CMOV | 669 CPUID_INTC_EDX_MMX; 670 break; 671 case X86_TYPE_CYRIX_GXm: 672 mask_edx = 673 CPUID_INTC_EDX_MSR | 674 CPUID_INTC_EDX_CX8 | 675 CPUID_INTC_EDX_CMOV | 676 CPUID_INTC_EDX_MMX; 677 break; 678 case X86_TYPE_CYRIX_MediaGX: 679 break; 680 case X86_TYPE_CYRIX_MII: 681 case X86_TYPE_VIA_CYRIX_III: 682 mask_edx = 683 CPUID_INTC_EDX_DE | 684 CPUID_INTC_EDX_TSC | 685 CPUID_INTC_EDX_MSR | 686 CPUID_INTC_EDX_CX8 | 687 CPUID_INTC_EDX_PGE | 688 CPUID_INTC_EDX_CMOV | 689 CPUID_INTC_EDX_MMX; 690 break; 691 default: 692 break; 693 } 694 break; 695 } 696 697 #if defined(__xpv) 698 /* 699 * Do not support MONITOR/MWAIT under a hypervisor 700 */ 701 mask_ecx &= ~CPUID_INTC_ECX_MON; 702 #endif /* __xpv */ 703 704 /* 705 * Now we've figured out the masks that determine 706 * which bits we choose to believe, apply the masks 707 * to the feature words, then map the kernel's view 708 * of these feature words into its feature word. 709 */ 710 cp->cp_edx &= mask_edx; 711 cp->cp_ecx &= mask_ecx; 712 713 /* 714 * apply any platform restrictions (we don't call this 715 * immediately after __cpuid_insn here, because we need the 716 * workarounds applied above first) 717 */ 718 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 719 720 /* 721 * fold in overrides from the "eeprom" mechanism 722 */ 723 cp->cp_edx |= cpuid_feature_edx_include; 724 cp->cp_edx &= ~cpuid_feature_edx_exclude; 725 726 cp->cp_ecx |= cpuid_feature_ecx_include; 727 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 728 729 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 730 feature |= X86_LARGEPAGE; 731 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 732 feature |= X86_TSC; 733 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 734 feature |= X86_MSR; 735 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 736 feature |= X86_MTRR; 737 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 738 feature |= X86_PGE; 739 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 740 feature |= X86_CMOV; 741 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 742 feature |= X86_MMX; 743 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 744 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 745 feature |= X86_MCA; 746 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 747 feature |= X86_PAE; 748 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 749 feature |= X86_CX8; 750 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 751 feature |= X86_CX16; 752 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 753 feature |= X86_PAT; 754 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 755 feature |= X86_SEP; 756 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 757 /* 758 * In our implementation, fxsave/fxrstor 759 * are prerequisites before we'll even 760 * try and do SSE things. 761 */ 762 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 763 feature |= X86_SSE; 764 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 765 feature |= X86_SSE2; 766 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 767 feature |= X86_SSE3; 768 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 769 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 770 feature |= X86_SSSE3; 771 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 772 feature |= X86_SSE4_1; 773 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 774 feature |= X86_SSE4_2; 775 } 776 } 777 if (cp->cp_edx & CPUID_INTC_EDX_DE) 778 feature |= X86_DE; 779 #if !defined(__xpv) 780 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 781 782 /* 783 * We require the CLFLUSH instruction for erratum workaround 784 * to use MONITOR/MWAIT. 785 */ 786 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 787 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 788 feature |= X86_MWAIT; 789 } else { 790 extern int idle_cpu_assert_cflush_monitor; 791 792 /* 793 * All processors we are aware of which have 794 * MONITOR/MWAIT also have CLFLUSH. 795 */ 796 if (idle_cpu_assert_cflush_monitor) { 797 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 798 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 799 } 800 } 801 } 802 #endif /* __xpv */ 803 804 /* 805 * Only need it first time, rest of the cpus would follow suite. 806 * we only capture this for the bootcpu. 807 */ 808 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 809 feature |= X86_CLFSH; 810 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 811 } 812 813 if (feature & X86_PAE) 814 cpi->cpi_pabits = 36; 815 816 /* 817 * Hyperthreading configuration is slightly tricky on Intel 818 * and pure clones, and even trickier on AMD. 819 * 820 * (AMD chose to set the HTT bit on their CMP processors, 821 * even though they're not actually hyperthreaded. Thus it 822 * takes a bit more work to figure out what's really going 823 * on ... see the handling of the CMP_LGCY bit below) 824 */ 825 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 826 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 827 if (cpi->cpi_ncpu_per_chip > 1) 828 feature |= X86_HTT; 829 } else { 830 cpi->cpi_ncpu_per_chip = 1; 831 } 832 833 /* 834 * Work on the "extended" feature information, doing 835 * some basic initialization for cpuid_pass2() 836 */ 837 xcpuid = 0; 838 switch (cpi->cpi_vendor) { 839 case X86_VENDOR_Intel: 840 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 841 xcpuid++; 842 break; 843 case X86_VENDOR_AMD: 844 if (cpi->cpi_family > 5 || 845 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 846 xcpuid++; 847 break; 848 case X86_VENDOR_Cyrix: 849 /* 850 * Only these Cyrix CPUs are -known- to support 851 * extended cpuid operations. 852 */ 853 if (x86_type == X86_TYPE_VIA_CYRIX_III || 854 x86_type == X86_TYPE_CYRIX_GXm) 855 xcpuid++; 856 break; 857 case X86_VENDOR_Centaur: 858 case X86_VENDOR_TM: 859 default: 860 xcpuid++; 861 break; 862 } 863 864 if (xcpuid) { 865 cp = &cpi->cpi_extd[0]; 866 cp->cp_eax = 0x80000000; 867 cpi->cpi_xmaxeax = __cpuid_insn(cp); 868 } 869 870 if (cpi->cpi_xmaxeax & 0x80000000) { 871 872 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 873 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 874 875 switch (cpi->cpi_vendor) { 876 case X86_VENDOR_Intel: 877 case X86_VENDOR_AMD: 878 if (cpi->cpi_xmaxeax < 0x80000001) 879 break; 880 cp = &cpi->cpi_extd[1]; 881 cp->cp_eax = 0x80000001; 882 (void) __cpuid_insn(cp); 883 884 if (cpi->cpi_vendor == X86_VENDOR_AMD && 885 cpi->cpi_family == 5 && 886 cpi->cpi_model == 6 && 887 cpi->cpi_step == 6) { 888 /* 889 * K6 model 6 uses bit 10 to indicate SYSC 890 * Later models use bit 11. Fix it here. 891 */ 892 if (cp->cp_edx & 0x400) { 893 cp->cp_edx &= ~0x400; 894 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 895 } 896 } 897 898 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 899 900 /* 901 * Compute the additions to the kernel's feature word. 902 */ 903 if (cp->cp_edx & CPUID_AMD_EDX_NX) 904 feature |= X86_NX; 905 906 /* 907 * Regardless whether or not we boot 64-bit, 908 * we should have a way to identify whether 909 * the CPU is capable of running 64-bit. 910 */ 911 if (cp->cp_edx & CPUID_AMD_EDX_LM) 912 feature |= X86_64; 913 914 #if defined(__amd64) 915 /* 1 GB large page - enable only for 64 bit kernel */ 916 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 917 feature |= X86_1GPG; 918 #endif 919 920 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 921 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 922 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 923 feature |= X86_SSE4A; 924 925 /* 926 * If both the HTT and CMP_LGCY bits are set, 927 * then we're not actually HyperThreaded. Read 928 * "AMD CPUID Specification" for more details. 929 */ 930 if (cpi->cpi_vendor == X86_VENDOR_AMD && 931 (feature & X86_HTT) && 932 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 933 feature &= ~X86_HTT; 934 feature |= X86_CMP; 935 } 936 #if defined(__amd64) 937 /* 938 * It's really tricky to support syscall/sysret in 939 * the i386 kernel; we rely on sysenter/sysexit 940 * instead. In the amd64 kernel, things are -way- 941 * better. 942 */ 943 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 944 feature |= X86_ASYSC; 945 946 /* 947 * While we're thinking about system calls, note 948 * that AMD processors don't support sysenter 949 * in long mode at all, so don't try to program them. 950 */ 951 if (x86_vendor == X86_VENDOR_AMD) 952 feature &= ~X86_SEP; 953 #endif 954 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 955 feature |= X86_TSCP; 956 break; 957 default: 958 break; 959 } 960 961 /* 962 * Get CPUID data about processor cores and hyperthreads. 963 */ 964 switch (cpi->cpi_vendor) { 965 case X86_VENDOR_Intel: 966 if (cpi->cpi_maxeax >= 4) { 967 cp = &cpi->cpi_std[4]; 968 cp->cp_eax = 4; 969 cp->cp_ecx = 0; 970 (void) __cpuid_insn(cp); 971 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 972 } 973 /*FALLTHROUGH*/ 974 case X86_VENDOR_AMD: 975 if (cpi->cpi_xmaxeax < 0x80000008) 976 break; 977 cp = &cpi->cpi_extd[8]; 978 cp->cp_eax = 0x80000008; 979 (void) __cpuid_insn(cp); 980 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 981 982 /* 983 * Virtual and physical address limits from 984 * cpuid override previously guessed values. 985 */ 986 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 987 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 988 break; 989 default: 990 break; 991 } 992 993 /* 994 * Derive the number of cores per chip 995 */ 996 switch (cpi->cpi_vendor) { 997 case X86_VENDOR_Intel: 998 if (cpi->cpi_maxeax < 4) { 999 cpi->cpi_ncore_per_chip = 1; 1000 break; 1001 } else { 1002 cpi->cpi_ncore_per_chip = 1003 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1004 } 1005 break; 1006 case X86_VENDOR_AMD: 1007 if (cpi->cpi_xmaxeax < 0x80000008) { 1008 cpi->cpi_ncore_per_chip = 1; 1009 break; 1010 } else { 1011 /* 1012 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1013 * 1 less than the number of physical cores on 1014 * the chip. In family 0x10 this value can 1015 * be affected by "downcoring" - it reflects 1016 * 1 less than the number of cores actually 1017 * enabled on this node. 1018 */ 1019 cpi->cpi_ncore_per_chip = 1020 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1021 } 1022 break; 1023 default: 1024 cpi->cpi_ncore_per_chip = 1; 1025 break; 1026 } 1027 1028 /* 1029 * Get CPUID data about TSC Invariance in Deep C-State. 1030 */ 1031 switch (cpi->cpi_vendor) { 1032 case X86_VENDOR_Intel: 1033 if (cpi->cpi_maxeax >= 7) { 1034 cp = &cpi->cpi_extd[7]; 1035 cp->cp_eax = 0x80000007; 1036 cp->cp_ecx = 0; 1037 (void) __cpuid_insn(cp); 1038 } 1039 break; 1040 default: 1041 break; 1042 } 1043 } else { 1044 cpi->cpi_ncore_per_chip = 1; 1045 } 1046 1047 /* 1048 * If more than one core, then this processor is CMP. 1049 */ 1050 if (cpi->cpi_ncore_per_chip > 1) 1051 feature |= X86_CMP; 1052 1053 /* 1054 * If the number of cores is the same as the number 1055 * of CPUs, then we cannot have HyperThreading. 1056 */ 1057 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1058 feature &= ~X86_HTT; 1059 1060 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1061 /* 1062 * Single-core single-threaded processors. 1063 */ 1064 cpi->cpi_chipid = -1; 1065 cpi->cpi_clogid = 0; 1066 cpi->cpi_coreid = cpu->cpu_id; 1067 cpi->cpi_pkgcoreid = 0; 1068 } else if (cpi->cpi_ncpu_per_chip > 1) { 1069 uint_t i; 1070 uint_t chipid_shift = 0; 1071 uint_t coreid_shift = 0; 1072 uint_t apic_id = CPI_APIC_ID(cpi); 1073 1074 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1075 chipid_shift++; 1076 cpi->cpi_chipid = apic_id >> chipid_shift; 1077 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1078 1079 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1080 if (feature & X86_CMP) { 1081 /* 1082 * Multi-core (and possibly multi-threaded) 1083 * processors. 1084 */ 1085 uint_t ncpu_per_core; 1086 if (cpi->cpi_ncore_per_chip == 1) 1087 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1088 else if (cpi->cpi_ncore_per_chip > 1) 1089 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1090 cpi->cpi_ncore_per_chip; 1091 /* 1092 * 8bit APIC IDs on dual core Pentiums 1093 * look like this: 1094 * 1095 * +-----------------------+------+------+ 1096 * | Physical Package ID | MC | HT | 1097 * +-----------------------+------+------+ 1098 * <------- chipid --------> 1099 * <------- coreid ---------------> 1100 * <--- clogid --> 1101 * <------> 1102 * pkgcoreid 1103 * 1104 * Where the number of bits necessary to 1105 * represent MC and HT fields together equals 1106 * to the minimum number of bits necessary to 1107 * store the value of cpi->cpi_ncpu_per_chip. 1108 * Of those bits, the MC part uses the number 1109 * of bits necessary to store the value of 1110 * cpi->cpi_ncore_per_chip. 1111 */ 1112 for (i = 1; i < ncpu_per_core; i <<= 1) 1113 coreid_shift++; 1114 cpi->cpi_coreid = apic_id >> coreid_shift; 1115 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1116 coreid_shift; 1117 } else if (feature & X86_HTT) { 1118 /* 1119 * Single-core multi-threaded processors. 1120 */ 1121 cpi->cpi_coreid = cpi->cpi_chipid; 1122 cpi->cpi_pkgcoreid = 0; 1123 } 1124 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1125 /* 1126 * AMD CMP chips currently have a single thread per 1127 * core, with 2 cores on family 0xf and 2, 3 or 4 1128 * cores on family 0x10. 1129 * 1130 * Since no two cpus share a core we must assign a 1131 * distinct coreid per cpu, and we do this by using 1132 * the cpu_id. This scheme does not, however, 1133 * guarantee that sibling cores of a chip will have 1134 * sequential coreids starting at a multiple of the 1135 * number of cores per chip - that is usually the 1136 * case, but if the ACPI MADT table is presented 1137 * in a different order then we need to perform a 1138 * few more gymnastics for the pkgcoreid. 1139 * 1140 * In family 0xf CMPs there are 2 cores on all nodes 1141 * present - no mixing of single and dual core parts. 1142 * 1143 * In family 0x10 CMPs cpuid fn 2 ECX[15:12] 1144 * "ApicIdCoreIdSize[3:0]" tells us how 1145 * many least-significant bits in the ApicId 1146 * are used to represent the core number 1147 * within the node. Cores are always 1148 * numbered sequentially from 0 regardless 1149 * of how many or which are disabled, and 1150 * there seems to be no way to discover the 1151 * real core id when some are disabled. 1152 */ 1153 cpi->cpi_coreid = cpu->cpu_id; 1154 1155 if (cpi->cpi_family == 0x10 && 1156 cpi->cpi_xmaxeax >= 0x80000008) { 1157 int coreidsz = 1158 BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 1159 1160 cpi->cpi_pkgcoreid = 1161 apic_id & ((1 << coreidsz) - 1); 1162 } else { 1163 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 1164 } 1165 } else { 1166 /* 1167 * All other processors are currently 1168 * assumed to have single cores. 1169 */ 1170 cpi->cpi_coreid = cpi->cpi_chipid; 1171 cpi->cpi_pkgcoreid = 0; 1172 } 1173 } 1174 1175 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1176 1177 /* 1178 * Synthesize chip "revision" and socket type 1179 */ 1180 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1181 cpi->cpi_model, cpi->cpi_step); 1182 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1183 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1184 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1185 cpi->cpi_model, cpi->cpi_step); 1186 1187 pass1_done: 1188 #if !defined(__xpv) 1189 check_for_hvm(); 1190 #endif 1191 cpi->cpi_pass = 1; 1192 return (feature); 1193 } 1194 1195 /* 1196 * Make copies of the cpuid table entries we depend on, in 1197 * part for ease of parsing now, in part so that we have only 1198 * one place to correct any of it, in part for ease of 1199 * later export to userland, and in part so we can look at 1200 * this stuff in a crash dump. 1201 */ 1202 1203 /*ARGSUSED*/ 1204 void 1205 cpuid_pass2(cpu_t *cpu) 1206 { 1207 uint_t n, nmax; 1208 int i; 1209 struct cpuid_regs *cp; 1210 uint8_t *dp; 1211 uint32_t *iptr; 1212 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1213 1214 ASSERT(cpi->cpi_pass == 1); 1215 1216 if (cpi->cpi_maxeax < 1) 1217 goto pass2_done; 1218 1219 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1220 nmax = NMAX_CPI_STD; 1221 /* 1222 * (We already handled n == 0 and n == 1 in pass 1) 1223 */ 1224 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1225 cp->cp_eax = n; 1226 1227 /* 1228 * CPUID function 4 expects %ecx to be initialized 1229 * with an index which indicates which cache to return 1230 * information about. The OS is expected to call function 4 1231 * with %ecx set to 0, 1, 2, ... until it returns with 1232 * EAX[4:0] set to 0, which indicates there are no more 1233 * caches. 1234 * 1235 * Here, populate cpi_std[4] with the information returned by 1236 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1237 * when dynamic memory allocation becomes available. 1238 * 1239 * Note: we need to explicitly initialize %ecx here, since 1240 * function 4 may have been previously invoked. 1241 */ 1242 if (n == 4) 1243 cp->cp_ecx = 0; 1244 1245 (void) __cpuid_insn(cp); 1246 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1247 switch (n) { 1248 case 2: 1249 /* 1250 * "the lower 8 bits of the %eax register 1251 * contain a value that identifies the number 1252 * of times the cpuid [instruction] has to be 1253 * executed to obtain a complete image of the 1254 * processor's caching systems." 1255 * 1256 * How *do* they make this stuff up? 1257 */ 1258 cpi->cpi_ncache = sizeof (*cp) * 1259 BITX(cp->cp_eax, 7, 0); 1260 if (cpi->cpi_ncache == 0) 1261 break; 1262 cpi->cpi_ncache--; /* skip count byte */ 1263 1264 /* 1265 * Well, for now, rather than attempt to implement 1266 * this slightly dubious algorithm, we just look 1267 * at the first 15 .. 1268 */ 1269 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1270 cpi->cpi_ncache = sizeof (*cp) - 1; 1271 1272 dp = cpi->cpi_cacheinfo; 1273 if (BITX(cp->cp_eax, 31, 31) == 0) { 1274 uint8_t *p = (void *)&cp->cp_eax; 1275 for (i = 1; i < 4; i++) 1276 if (p[i] != 0) 1277 *dp++ = p[i]; 1278 } 1279 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1280 uint8_t *p = (void *)&cp->cp_ebx; 1281 for (i = 0; i < 4; i++) 1282 if (p[i] != 0) 1283 *dp++ = p[i]; 1284 } 1285 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1286 uint8_t *p = (void *)&cp->cp_ecx; 1287 for (i = 0; i < 4; i++) 1288 if (p[i] != 0) 1289 *dp++ = p[i]; 1290 } 1291 if (BITX(cp->cp_edx, 31, 31) == 0) { 1292 uint8_t *p = (void *)&cp->cp_edx; 1293 for (i = 0; i < 4; i++) 1294 if (p[i] != 0) 1295 *dp++ = p[i]; 1296 } 1297 break; 1298 1299 case 3: /* Processor serial number, if PSN supported */ 1300 break; 1301 1302 case 4: /* Deterministic cache parameters */ 1303 break; 1304 1305 case 5: /* Monitor/Mwait parameters */ 1306 { 1307 size_t mwait_size; 1308 1309 /* 1310 * check cpi_mwait.support which was set in cpuid_pass1 1311 */ 1312 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1313 break; 1314 1315 /* 1316 * Protect ourself from insane mwait line size. 1317 * Workaround for incomplete hardware emulator(s). 1318 */ 1319 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1320 if (mwait_size < sizeof (uint32_t) || 1321 !ISP2(mwait_size)) { 1322 #if DEBUG 1323 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1324 "size %ld", cpu->cpu_id, (long)mwait_size); 1325 #endif 1326 break; 1327 } 1328 1329 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1330 cpi->cpi_mwait.mon_max = mwait_size; 1331 if (MWAIT_EXTENSION(cpi)) { 1332 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1333 if (MWAIT_INT_ENABLE(cpi)) 1334 cpi->cpi_mwait.support |= 1335 MWAIT_ECX_INT_ENABLE; 1336 } 1337 break; 1338 } 1339 default: 1340 break; 1341 } 1342 } 1343 1344 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1345 struct cpuid_regs regs; 1346 1347 cp = ®s; 1348 cp->cp_eax = 0xB; 1349 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1350 1351 (void) __cpuid_insn(cp); 1352 1353 /* 1354 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1355 * indicates that the extended topology enumeration leaf is 1356 * available. 1357 */ 1358 if (cp->cp_ebx) { 1359 uint32_t x2apic_id; 1360 uint_t coreid_shift = 0; 1361 uint_t ncpu_per_core = 1; 1362 uint_t chipid_shift = 0; 1363 uint_t ncpu_per_chip = 1; 1364 uint_t i; 1365 uint_t level; 1366 1367 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1368 cp->cp_eax = 0xB; 1369 cp->cp_ecx = i; 1370 1371 (void) __cpuid_insn(cp); 1372 level = CPI_CPU_LEVEL_TYPE(cp); 1373 1374 if (level == 1) { 1375 x2apic_id = cp->cp_edx; 1376 coreid_shift = BITX(cp->cp_eax, 4, 0); 1377 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1378 } else if (level == 2) { 1379 x2apic_id = cp->cp_edx; 1380 chipid_shift = BITX(cp->cp_eax, 4, 0); 1381 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1382 } 1383 } 1384 1385 cpi->cpi_apicid = x2apic_id; 1386 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1387 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1388 ncpu_per_core; 1389 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1390 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1391 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1392 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1393 } 1394 1395 /* Make cp NULL so that we don't stumble on others */ 1396 cp = NULL; 1397 } 1398 1399 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1400 goto pass2_done; 1401 1402 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1403 nmax = NMAX_CPI_EXTD; 1404 /* 1405 * Copy the extended properties, fixing them as we go. 1406 * (We already handled n == 0 and n == 1 in pass 1) 1407 */ 1408 iptr = (void *)cpi->cpi_brandstr; 1409 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1410 cp->cp_eax = 0x80000000 + n; 1411 (void) __cpuid_insn(cp); 1412 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1413 switch (n) { 1414 case 2: 1415 case 3: 1416 case 4: 1417 /* 1418 * Extract the brand string 1419 */ 1420 *iptr++ = cp->cp_eax; 1421 *iptr++ = cp->cp_ebx; 1422 *iptr++ = cp->cp_ecx; 1423 *iptr++ = cp->cp_edx; 1424 break; 1425 case 5: 1426 switch (cpi->cpi_vendor) { 1427 case X86_VENDOR_AMD: 1428 /* 1429 * The Athlon and Duron were the first 1430 * parts to report the sizes of the 1431 * TLB for large pages. Before then, 1432 * we don't trust the data. 1433 */ 1434 if (cpi->cpi_family < 6 || 1435 (cpi->cpi_family == 6 && 1436 cpi->cpi_model < 1)) 1437 cp->cp_eax = 0; 1438 break; 1439 default: 1440 break; 1441 } 1442 break; 1443 case 6: 1444 switch (cpi->cpi_vendor) { 1445 case X86_VENDOR_AMD: 1446 /* 1447 * The Athlon and Duron were the first 1448 * AMD parts with L2 TLB's. 1449 * Before then, don't trust the data. 1450 */ 1451 if (cpi->cpi_family < 6 || 1452 cpi->cpi_family == 6 && 1453 cpi->cpi_model < 1) 1454 cp->cp_eax = cp->cp_ebx = 0; 1455 /* 1456 * AMD Duron rev A0 reports L2 1457 * cache size incorrectly as 1K 1458 * when it is really 64K 1459 */ 1460 if (cpi->cpi_family == 6 && 1461 cpi->cpi_model == 3 && 1462 cpi->cpi_step == 0) { 1463 cp->cp_ecx &= 0xffff; 1464 cp->cp_ecx |= 0x400000; 1465 } 1466 break; 1467 case X86_VENDOR_Cyrix: /* VIA C3 */ 1468 /* 1469 * VIA C3 processors are a bit messed 1470 * up w.r.t. encoding cache sizes in %ecx 1471 */ 1472 if (cpi->cpi_family != 6) 1473 break; 1474 /* 1475 * model 7 and 8 were incorrectly encoded 1476 * 1477 * xxx is model 8 really broken? 1478 */ 1479 if (cpi->cpi_model == 7 || 1480 cpi->cpi_model == 8) 1481 cp->cp_ecx = 1482 BITX(cp->cp_ecx, 31, 24) << 16 | 1483 BITX(cp->cp_ecx, 23, 16) << 12 | 1484 BITX(cp->cp_ecx, 15, 8) << 8 | 1485 BITX(cp->cp_ecx, 7, 0); 1486 /* 1487 * model 9 stepping 1 has wrong associativity 1488 */ 1489 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1490 cp->cp_ecx |= 8 << 12; 1491 break; 1492 case X86_VENDOR_Intel: 1493 /* 1494 * Extended L2 Cache features function. 1495 * First appeared on Prescott. 1496 */ 1497 default: 1498 break; 1499 } 1500 break; 1501 default: 1502 break; 1503 } 1504 } 1505 1506 pass2_done: 1507 cpi->cpi_pass = 2; 1508 } 1509 1510 static const char * 1511 intel_cpubrand(const struct cpuid_info *cpi) 1512 { 1513 int i; 1514 1515 if ((x86_feature & X86_CPUID) == 0 || 1516 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1517 return ("i486"); 1518 1519 switch (cpi->cpi_family) { 1520 case 5: 1521 return ("Intel Pentium(r)"); 1522 case 6: 1523 switch (cpi->cpi_model) { 1524 uint_t celeron, xeon; 1525 const struct cpuid_regs *cp; 1526 case 0: 1527 case 1: 1528 case 2: 1529 return ("Intel Pentium(r) Pro"); 1530 case 3: 1531 case 4: 1532 return ("Intel Pentium(r) II"); 1533 case 6: 1534 return ("Intel Celeron(r)"); 1535 case 5: 1536 case 7: 1537 celeron = xeon = 0; 1538 cp = &cpi->cpi_std[2]; /* cache info */ 1539 1540 for (i = 1; i < 4; i++) { 1541 uint_t tmp; 1542 1543 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1544 if (tmp == 0x40) 1545 celeron++; 1546 if (tmp >= 0x44 && tmp <= 0x45) 1547 xeon++; 1548 } 1549 1550 for (i = 0; i < 2; i++) { 1551 uint_t tmp; 1552 1553 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1554 if (tmp == 0x40) 1555 celeron++; 1556 else if (tmp >= 0x44 && tmp <= 0x45) 1557 xeon++; 1558 } 1559 1560 for (i = 0; i < 4; i++) { 1561 uint_t tmp; 1562 1563 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1564 if (tmp == 0x40) 1565 celeron++; 1566 else if (tmp >= 0x44 && tmp <= 0x45) 1567 xeon++; 1568 } 1569 1570 for (i = 0; i < 4; i++) { 1571 uint_t tmp; 1572 1573 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1574 if (tmp == 0x40) 1575 celeron++; 1576 else if (tmp >= 0x44 && tmp <= 0x45) 1577 xeon++; 1578 } 1579 1580 if (celeron) 1581 return ("Intel Celeron(r)"); 1582 if (xeon) 1583 return (cpi->cpi_model == 5 ? 1584 "Intel Pentium(r) II Xeon(tm)" : 1585 "Intel Pentium(r) III Xeon(tm)"); 1586 return (cpi->cpi_model == 5 ? 1587 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1588 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1589 default: 1590 break; 1591 } 1592 default: 1593 break; 1594 } 1595 1596 /* BrandID is present if the field is nonzero */ 1597 if (cpi->cpi_brandid != 0) { 1598 static const struct { 1599 uint_t bt_bid; 1600 const char *bt_str; 1601 } brand_tbl[] = { 1602 { 0x1, "Intel(r) Celeron(r)" }, 1603 { 0x2, "Intel(r) Pentium(r) III" }, 1604 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1605 { 0x4, "Intel(r) Pentium(r) III" }, 1606 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1607 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1608 { 0x8, "Intel(r) Pentium(r) 4" }, 1609 { 0x9, "Intel(r) Pentium(r) 4" }, 1610 { 0xa, "Intel(r) Celeron(r)" }, 1611 { 0xb, "Intel(r) Xeon(tm)" }, 1612 { 0xc, "Intel(r) Xeon(tm) MP" }, 1613 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1614 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1615 { 0x11, "Mobile Genuine Intel(r)" }, 1616 { 0x12, "Intel(r) Celeron(r) M" }, 1617 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1618 { 0x14, "Intel(r) Celeron(r)" }, 1619 { 0x15, "Mobile Genuine Intel(r)" }, 1620 { 0x16, "Intel(r) Pentium(r) M" }, 1621 { 0x17, "Mobile Intel(r) Celeron(r)" } 1622 }; 1623 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1624 uint_t sgn; 1625 1626 sgn = (cpi->cpi_family << 8) | 1627 (cpi->cpi_model << 4) | cpi->cpi_step; 1628 1629 for (i = 0; i < btblmax; i++) 1630 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1631 break; 1632 if (i < btblmax) { 1633 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1634 return ("Intel(r) Celeron(r)"); 1635 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1636 return ("Intel(r) Xeon(tm) MP"); 1637 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1638 return ("Intel(r) Xeon(tm)"); 1639 return (brand_tbl[i].bt_str); 1640 } 1641 } 1642 1643 return (NULL); 1644 } 1645 1646 static const char * 1647 amd_cpubrand(const struct cpuid_info *cpi) 1648 { 1649 if ((x86_feature & X86_CPUID) == 0 || 1650 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1651 return ("i486 compatible"); 1652 1653 switch (cpi->cpi_family) { 1654 case 5: 1655 switch (cpi->cpi_model) { 1656 case 0: 1657 case 1: 1658 case 2: 1659 case 3: 1660 case 4: 1661 case 5: 1662 return ("AMD-K5(r)"); 1663 case 6: 1664 case 7: 1665 return ("AMD-K6(r)"); 1666 case 8: 1667 return ("AMD-K6(r)-2"); 1668 case 9: 1669 return ("AMD-K6(r)-III"); 1670 default: 1671 return ("AMD (family 5)"); 1672 } 1673 case 6: 1674 switch (cpi->cpi_model) { 1675 case 1: 1676 return ("AMD-K7(tm)"); 1677 case 0: 1678 case 2: 1679 case 4: 1680 return ("AMD Athlon(tm)"); 1681 case 3: 1682 case 7: 1683 return ("AMD Duron(tm)"); 1684 case 6: 1685 case 8: 1686 case 10: 1687 /* 1688 * Use the L2 cache size to distinguish 1689 */ 1690 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1691 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1692 default: 1693 return ("AMD (family 6)"); 1694 } 1695 default: 1696 break; 1697 } 1698 1699 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1700 cpi->cpi_brandid != 0) { 1701 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1702 case 3: 1703 return ("AMD Opteron(tm) UP 1xx"); 1704 case 4: 1705 return ("AMD Opteron(tm) DP 2xx"); 1706 case 5: 1707 return ("AMD Opteron(tm) MP 8xx"); 1708 default: 1709 return ("AMD Opteron(tm)"); 1710 } 1711 } 1712 1713 return (NULL); 1714 } 1715 1716 static const char * 1717 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1718 { 1719 if ((x86_feature & X86_CPUID) == 0 || 1720 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1721 type == X86_TYPE_CYRIX_486) 1722 return ("i486 compatible"); 1723 1724 switch (type) { 1725 case X86_TYPE_CYRIX_6x86: 1726 return ("Cyrix 6x86"); 1727 case X86_TYPE_CYRIX_6x86L: 1728 return ("Cyrix 6x86L"); 1729 case X86_TYPE_CYRIX_6x86MX: 1730 return ("Cyrix 6x86MX"); 1731 case X86_TYPE_CYRIX_GXm: 1732 return ("Cyrix GXm"); 1733 case X86_TYPE_CYRIX_MediaGX: 1734 return ("Cyrix MediaGX"); 1735 case X86_TYPE_CYRIX_MII: 1736 return ("Cyrix M2"); 1737 case X86_TYPE_VIA_CYRIX_III: 1738 return ("VIA Cyrix M3"); 1739 default: 1740 /* 1741 * Have another wild guess .. 1742 */ 1743 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1744 return ("Cyrix 5x86"); 1745 else if (cpi->cpi_family == 5) { 1746 switch (cpi->cpi_model) { 1747 case 2: 1748 return ("Cyrix 6x86"); /* Cyrix M1 */ 1749 case 4: 1750 return ("Cyrix MediaGX"); 1751 default: 1752 break; 1753 } 1754 } else if (cpi->cpi_family == 6) { 1755 switch (cpi->cpi_model) { 1756 case 0: 1757 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1758 case 5: 1759 case 6: 1760 case 7: 1761 case 8: 1762 case 9: 1763 return ("VIA C3"); 1764 default: 1765 break; 1766 } 1767 } 1768 break; 1769 } 1770 return (NULL); 1771 } 1772 1773 /* 1774 * This only gets called in the case that the CPU extended 1775 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1776 * aren't available, or contain null bytes for some reason. 1777 */ 1778 static void 1779 fabricate_brandstr(struct cpuid_info *cpi) 1780 { 1781 const char *brand = NULL; 1782 1783 switch (cpi->cpi_vendor) { 1784 case X86_VENDOR_Intel: 1785 brand = intel_cpubrand(cpi); 1786 break; 1787 case X86_VENDOR_AMD: 1788 brand = amd_cpubrand(cpi); 1789 break; 1790 case X86_VENDOR_Cyrix: 1791 brand = cyrix_cpubrand(cpi, x86_type); 1792 break; 1793 case X86_VENDOR_NexGen: 1794 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1795 brand = "NexGen Nx586"; 1796 break; 1797 case X86_VENDOR_Centaur: 1798 if (cpi->cpi_family == 5) 1799 switch (cpi->cpi_model) { 1800 case 4: 1801 brand = "Centaur C6"; 1802 break; 1803 case 8: 1804 brand = "Centaur C2"; 1805 break; 1806 case 9: 1807 brand = "Centaur C3"; 1808 break; 1809 default: 1810 break; 1811 } 1812 break; 1813 case X86_VENDOR_Rise: 1814 if (cpi->cpi_family == 5 && 1815 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1816 brand = "Rise mP6"; 1817 break; 1818 case X86_VENDOR_SiS: 1819 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1820 brand = "SiS 55x"; 1821 break; 1822 case X86_VENDOR_TM: 1823 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1824 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1825 break; 1826 case X86_VENDOR_NSC: 1827 case X86_VENDOR_UMC: 1828 default: 1829 break; 1830 } 1831 if (brand) { 1832 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1833 return; 1834 } 1835 1836 /* 1837 * If all else fails ... 1838 */ 1839 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1840 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1841 cpi->cpi_model, cpi->cpi_step); 1842 } 1843 1844 /* 1845 * This routine is called just after kernel memory allocation 1846 * becomes available on cpu0, and as part of mp_startup() on 1847 * the other cpus. 1848 * 1849 * Fixup the brand string, and collect any information from cpuid 1850 * that requires dynamicically allocated storage to represent. 1851 */ 1852 /*ARGSUSED*/ 1853 void 1854 cpuid_pass3(cpu_t *cpu) 1855 { 1856 int i, max, shft, level, size; 1857 struct cpuid_regs regs; 1858 struct cpuid_regs *cp; 1859 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1860 1861 ASSERT(cpi->cpi_pass == 2); 1862 1863 /* 1864 * Function 4: Deterministic cache parameters 1865 * 1866 * Take this opportunity to detect the number of threads 1867 * sharing the last level cache, and construct a corresponding 1868 * cache id. The respective cpuid_info members are initialized 1869 * to the default case of "no last level cache sharing". 1870 */ 1871 cpi->cpi_ncpu_shr_last_cache = 1; 1872 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1873 1874 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1875 1876 /* 1877 * Find the # of elements (size) returned by fn 4, and along 1878 * the way detect last level cache sharing details. 1879 */ 1880 bzero(®s, sizeof (regs)); 1881 cp = ®s; 1882 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1883 cp->cp_eax = 4; 1884 cp->cp_ecx = i; 1885 1886 (void) __cpuid_insn(cp); 1887 1888 if (CPI_CACHE_TYPE(cp) == 0) 1889 break; 1890 level = CPI_CACHE_LVL(cp); 1891 if (level > max) { 1892 max = level; 1893 cpi->cpi_ncpu_shr_last_cache = 1894 CPI_NTHR_SHR_CACHE(cp) + 1; 1895 } 1896 } 1897 cpi->cpi_std_4_size = size = i; 1898 1899 /* 1900 * Allocate the cpi_std_4 array. The first element 1901 * references the regs for fn 4, %ecx == 0, which 1902 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1903 */ 1904 if (size > 0) { 1905 cpi->cpi_std_4 = 1906 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1907 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1908 1909 /* 1910 * Allocate storage to hold the additional regs 1911 * for function 4, %ecx == 1 .. cpi_std_4_size. 1912 * 1913 * The regs for fn 4, %ecx == 0 has already 1914 * been allocated as indicated above. 1915 */ 1916 for (i = 1; i < size; i++) { 1917 cp = cpi->cpi_std_4[i] = 1918 kmem_zalloc(sizeof (regs), KM_SLEEP); 1919 cp->cp_eax = 4; 1920 cp->cp_ecx = i; 1921 1922 (void) __cpuid_insn(cp); 1923 } 1924 } 1925 /* 1926 * Determine the number of bits needed to represent 1927 * the number of CPUs sharing the last level cache. 1928 * 1929 * Shift off that number of bits from the APIC id to 1930 * derive the cache id. 1931 */ 1932 shft = 0; 1933 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1934 shft++; 1935 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 1936 } 1937 1938 /* 1939 * Now fixup the brand string 1940 */ 1941 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1942 fabricate_brandstr(cpi); 1943 } else { 1944 1945 /* 1946 * If we successfully extracted a brand string from the cpuid 1947 * instruction, clean it up by removing leading spaces and 1948 * similar junk. 1949 */ 1950 if (cpi->cpi_brandstr[0]) { 1951 size_t maxlen = sizeof (cpi->cpi_brandstr); 1952 char *src, *dst; 1953 1954 dst = src = (char *)cpi->cpi_brandstr; 1955 src[maxlen - 1] = '\0'; 1956 /* 1957 * strip leading spaces 1958 */ 1959 while (*src == ' ') 1960 src++; 1961 /* 1962 * Remove any 'Genuine' or "Authentic" prefixes 1963 */ 1964 if (strncmp(src, "Genuine ", 8) == 0) 1965 src += 8; 1966 if (strncmp(src, "Authentic ", 10) == 0) 1967 src += 10; 1968 1969 /* 1970 * Now do an in-place copy. 1971 * Map (R) to (r) and (TM) to (tm). 1972 * The era of teletypes is long gone, and there's 1973 * -really- no need to shout. 1974 */ 1975 while (*src != '\0') { 1976 if (src[0] == '(') { 1977 if (strncmp(src + 1, "R)", 2) == 0) { 1978 (void) strncpy(dst, "(r)", 3); 1979 src += 3; 1980 dst += 3; 1981 continue; 1982 } 1983 if (strncmp(src + 1, "TM)", 3) == 0) { 1984 (void) strncpy(dst, "(tm)", 4); 1985 src += 4; 1986 dst += 4; 1987 continue; 1988 } 1989 } 1990 *dst++ = *src++; 1991 } 1992 *dst = '\0'; 1993 1994 /* 1995 * Finally, remove any trailing spaces 1996 */ 1997 while (--dst > cpi->cpi_brandstr) 1998 if (*dst == ' ') 1999 *dst = '\0'; 2000 else 2001 break; 2002 } else 2003 fabricate_brandstr(cpi); 2004 } 2005 cpi->cpi_pass = 3; 2006 } 2007 2008 /* 2009 * This routine is called out of bind_hwcap() much later in the life 2010 * of the kernel (post_startup()). The job of this routine is to resolve 2011 * the hardware feature support and kernel support for those features into 2012 * what we're actually going to tell applications via the aux vector. 2013 */ 2014 uint_t 2015 cpuid_pass4(cpu_t *cpu) 2016 { 2017 struct cpuid_info *cpi; 2018 uint_t hwcap_flags = 0; 2019 2020 if (cpu == NULL) 2021 cpu = CPU; 2022 cpi = cpu->cpu_m.mcpu_cpi; 2023 2024 ASSERT(cpi->cpi_pass == 3); 2025 2026 if (cpi->cpi_maxeax >= 1) { 2027 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2028 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2029 2030 *edx = CPI_FEATURES_EDX(cpi); 2031 *ecx = CPI_FEATURES_ECX(cpi); 2032 2033 /* 2034 * [these require explicit kernel support] 2035 */ 2036 if ((x86_feature & X86_SEP) == 0) 2037 *edx &= ~CPUID_INTC_EDX_SEP; 2038 2039 if ((x86_feature & X86_SSE) == 0) 2040 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2041 if ((x86_feature & X86_SSE2) == 0) 2042 *edx &= ~CPUID_INTC_EDX_SSE2; 2043 2044 if ((x86_feature & X86_HTT) == 0) 2045 *edx &= ~CPUID_INTC_EDX_HTT; 2046 2047 if ((x86_feature & X86_SSE3) == 0) 2048 *ecx &= ~CPUID_INTC_ECX_SSE3; 2049 2050 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2051 if ((x86_feature & X86_SSSE3) == 0) 2052 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2053 if ((x86_feature & X86_SSE4_1) == 0) 2054 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2055 if ((x86_feature & X86_SSE4_2) == 0) 2056 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2057 } 2058 2059 /* 2060 * [no explicit support required beyond x87 fp context] 2061 */ 2062 if (!fpu_exists) 2063 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2064 2065 /* 2066 * Now map the supported feature vector to things that we 2067 * think userland will care about. 2068 */ 2069 if (*edx & CPUID_INTC_EDX_SEP) 2070 hwcap_flags |= AV_386_SEP; 2071 if (*edx & CPUID_INTC_EDX_SSE) 2072 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2073 if (*edx & CPUID_INTC_EDX_SSE2) 2074 hwcap_flags |= AV_386_SSE2; 2075 if (*ecx & CPUID_INTC_ECX_SSE3) 2076 hwcap_flags |= AV_386_SSE3; 2077 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2078 if (*ecx & CPUID_INTC_ECX_SSSE3) 2079 hwcap_flags |= AV_386_SSSE3; 2080 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2081 hwcap_flags |= AV_386_SSE4_1; 2082 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2083 hwcap_flags |= AV_386_SSE4_2; 2084 if (*ecx & CPUID_INTC_ECX_MOVBE) 2085 hwcap_flags |= AV_386_MOVBE; 2086 } 2087 if (*ecx & CPUID_INTC_ECX_POPCNT) 2088 hwcap_flags |= AV_386_POPCNT; 2089 if (*edx & CPUID_INTC_EDX_FPU) 2090 hwcap_flags |= AV_386_FPU; 2091 if (*edx & CPUID_INTC_EDX_MMX) 2092 hwcap_flags |= AV_386_MMX; 2093 2094 if (*edx & CPUID_INTC_EDX_TSC) 2095 hwcap_flags |= AV_386_TSC; 2096 if (*edx & CPUID_INTC_EDX_CX8) 2097 hwcap_flags |= AV_386_CX8; 2098 if (*edx & CPUID_INTC_EDX_CMOV) 2099 hwcap_flags |= AV_386_CMOV; 2100 if (*ecx & CPUID_INTC_ECX_MON) 2101 hwcap_flags |= AV_386_MON; 2102 if (*ecx & CPUID_INTC_ECX_CX16) 2103 hwcap_flags |= AV_386_CX16; 2104 } 2105 2106 if (x86_feature & X86_HTT) 2107 hwcap_flags |= AV_386_PAUSE; 2108 2109 if (cpi->cpi_xmaxeax < 0x80000001) 2110 goto pass4_done; 2111 2112 switch (cpi->cpi_vendor) { 2113 struct cpuid_regs cp; 2114 uint32_t *edx, *ecx; 2115 2116 case X86_VENDOR_Intel: 2117 /* 2118 * Seems like Intel duplicated what we necessary 2119 * here to make the initial crop of 64-bit OS's work. 2120 * Hopefully, those are the only "extended" bits 2121 * they'll add. 2122 */ 2123 /*FALLTHROUGH*/ 2124 2125 case X86_VENDOR_AMD: 2126 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2127 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2128 2129 *edx = CPI_FEATURES_XTD_EDX(cpi); 2130 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2131 2132 /* 2133 * [these features require explicit kernel support] 2134 */ 2135 switch (cpi->cpi_vendor) { 2136 case X86_VENDOR_Intel: 2137 if ((x86_feature & X86_TSCP) == 0) 2138 *edx &= ~CPUID_AMD_EDX_TSCP; 2139 break; 2140 2141 case X86_VENDOR_AMD: 2142 if ((x86_feature & X86_TSCP) == 0) 2143 *edx &= ~CPUID_AMD_EDX_TSCP; 2144 if ((x86_feature & X86_SSE4A) == 0) 2145 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2146 break; 2147 2148 default: 2149 break; 2150 } 2151 2152 /* 2153 * [no explicit support required beyond 2154 * x87 fp context and exception handlers] 2155 */ 2156 if (!fpu_exists) 2157 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2158 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2159 2160 if ((x86_feature & X86_NX) == 0) 2161 *edx &= ~CPUID_AMD_EDX_NX; 2162 #if !defined(__amd64) 2163 *edx &= ~CPUID_AMD_EDX_LM; 2164 #endif 2165 /* 2166 * Now map the supported feature vector to 2167 * things that we think userland will care about. 2168 */ 2169 #if defined(__amd64) 2170 if (*edx & CPUID_AMD_EDX_SYSC) 2171 hwcap_flags |= AV_386_AMD_SYSC; 2172 #endif 2173 if (*edx & CPUID_AMD_EDX_MMXamd) 2174 hwcap_flags |= AV_386_AMD_MMX; 2175 if (*edx & CPUID_AMD_EDX_3DNow) 2176 hwcap_flags |= AV_386_AMD_3DNow; 2177 if (*edx & CPUID_AMD_EDX_3DNowx) 2178 hwcap_flags |= AV_386_AMD_3DNowx; 2179 2180 switch (cpi->cpi_vendor) { 2181 case X86_VENDOR_AMD: 2182 if (*edx & CPUID_AMD_EDX_TSCP) 2183 hwcap_flags |= AV_386_TSCP; 2184 if (*ecx & CPUID_AMD_ECX_AHF64) 2185 hwcap_flags |= AV_386_AHF; 2186 if (*ecx & CPUID_AMD_ECX_SSE4A) 2187 hwcap_flags |= AV_386_AMD_SSE4A; 2188 if (*ecx & CPUID_AMD_ECX_LZCNT) 2189 hwcap_flags |= AV_386_AMD_LZCNT; 2190 break; 2191 2192 case X86_VENDOR_Intel: 2193 if (*edx & CPUID_AMD_EDX_TSCP) 2194 hwcap_flags |= AV_386_TSCP; 2195 /* 2196 * Aarrgh. 2197 * Intel uses a different bit in the same word. 2198 */ 2199 if (*ecx & CPUID_INTC_ECX_AHF64) 2200 hwcap_flags |= AV_386_AHF; 2201 break; 2202 2203 default: 2204 break; 2205 } 2206 break; 2207 2208 case X86_VENDOR_TM: 2209 cp.cp_eax = 0x80860001; 2210 (void) __cpuid_insn(&cp); 2211 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2212 break; 2213 2214 default: 2215 break; 2216 } 2217 2218 pass4_done: 2219 cpi->cpi_pass = 4; 2220 return (hwcap_flags); 2221 } 2222 2223 2224 /* 2225 * Simulate the cpuid instruction using the data we previously 2226 * captured about this CPU. We try our best to return the truth 2227 * about the hardware, independently of kernel support. 2228 */ 2229 uint32_t 2230 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2231 { 2232 struct cpuid_info *cpi; 2233 struct cpuid_regs *xcp; 2234 2235 if (cpu == NULL) 2236 cpu = CPU; 2237 cpi = cpu->cpu_m.mcpu_cpi; 2238 2239 ASSERT(cpuid_checkpass(cpu, 3)); 2240 2241 /* 2242 * CPUID data is cached in two separate places: cpi_std for standard 2243 * CPUID functions, and cpi_extd for extended CPUID functions. 2244 */ 2245 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2246 xcp = &cpi->cpi_std[cp->cp_eax]; 2247 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2248 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2249 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2250 else 2251 /* 2252 * The caller is asking for data from an input parameter which 2253 * the kernel has not cached. In this case we go fetch from 2254 * the hardware and return the data directly to the user. 2255 */ 2256 return (__cpuid_insn(cp)); 2257 2258 cp->cp_eax = xcp->cp_eax; 2259 cp->cp_ebx = xcp->cp_ebx; 2260 cp->cp_ecx = xcp->cp_ecx; 2261 cp->cp_edx = xcp->cp_edx; 2262 return (cp->cp_eax); 2263 } 2264 2265 int 2266 cpuid_checkpass(cpu_t *cpu, int pass) 2267 { 2268 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2269 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2270 } 2271 2272 int 2273 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2274 { 2275 ASSERT(cpuid_checkpass(cpu, 3)); 2276 2277 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2278 } 2279 2280 int 2281 cpuid_is_cmt(cpu_t *cpu) 2282 { 2283 if (cpu == NULL) 2284 cpu = CPU; 2285 2286 ASSERT(cpuid_checkpass(cpu, 1)); 2287 2288 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2289 } 2290 2291 /* 2292 * AMD and Intel both implement the 64-bit variant of the syscall 2293 * instruction (syscallq), so if there's -any- support for syscall, 2294 * cpuid currently says "yes, we support this". 2295 * 2296 * However, Intel decided to -not- implement the 32-bit variant of the 2297 * syscall instruction, so we provide a predicate to allow our caller 2298 * to test that subtlety here. 2299 * 2300 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2301 * even in the case where the hardware would in fact support it. 2302 */ 2303 /*ARGSUSED*/ 2304 int 2305 cpuid_syscall32_insn(cpu_t *cpu) 2306 { 2307 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2308 2309 #if !defined(__xpv) 2310 if (cpu == NULL) 2311 cpu = CPU; 2312 2313 /*CSTYLED*/ 2314 { 2315 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2316 2317 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2318 cpi->cpi_xmaxeax >= 0x80000001 && 2319 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2320 return (1); 2321 } 2322 #endif 2323 return (0); 2324 } 2325 2326 int 2327 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2328 { 2329 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2330 2331 static const char fmt[] = 2332 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2333 static const char fmt_ht[] = 2334 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2335 2336 ASSERT(cpuid_checkpass(cpu, 1)); 2337 2338 if (cpuid_is_cmt(cpu)) 2339 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2340 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2341 cpi->cpi_family, cpi->cpi_model, 2342 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2343 return (snprintf(s, n, fmt, 2344 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2345 cpi->cpi_family, cpi->cpi_model, 2346 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2347 } 2348 2349 const char * 2350 cpuid_getvendorstr(cpu_t *cpu) 2351 { 2352 ASSERT(cpuid_checkpass(cpu, 1)); 2353 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2354 } 2355 2356 uint_t 2357 cpuid_getvendor(cpu_t *cpu) 2358 { 2359 ASSERT(cpuid_checkpass(cpu, 1)); 2360 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2361 } 2362 2363 uint_t 2364 cpuid_getfamily(cpu_t *cpu) 2365 { 2366 ASSERT(cpuid_checkpass(cpu, 1)); 2367 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2368 } 2369 2370 uint_t 2371 cpuid_getmodel(cpu_t *cpu) 2372 { 2373 ASSERT(cpuid_checkpass(cpu, 1)); 2374 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2375 } 2376 2377 uint_t 2378 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2379 { 2380 ASSERT(cpuid_checkpass(cpu, 1)); 2381 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2382 } 2383 2384 uint_t 2385 cpuid_get_ncore_per_chip(cpu_t *cpu) 2386 { 2387 ASSERT(cpuid_checkpass(cpu, 1)); 2388 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2389 } 2390 2391 uint_t 2392 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2393 { 2394 ASSERT(cpuid_checkpass(cpu, 2)); 2395 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2396 } 2397 2398 id_t 2399 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2400 { 2401 ASSERT(cpuid_checkpass(cpu, 2)); 2402 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2403 } 2404 2405 uint_t 2406 cpuid_getstep(cpu_t *cpu) 2407 { 2408 ASSERT(cpuid_checkpass(cpu, 1)); 2409 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2410 } 2411 2412 uint_t 2413 cpuid_getsig(struct cpu *cpu) 2414 { 2415 ASSERT(cpuid_checkpass(cpu, 1)); 2416 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2417 } 2418 2419 uint32_t 2420 cpuid_getchiprev(struct cpu *cpu) 2421 { 2422 ASSERT(cpuid_checkpass(cpu, 1)); 2423 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2424 } 2425 2426 const char * 2427 cpuid_getchiprevstr(struct cpu *cpu) 2428 { 2429 ASSERT(cpuid_checkpass(cpu, 1)); 2430 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2431 } 2432 2433 uint32_t 2434 cpuid_getsockettype(struct cpu *cpu) 2435 { 2436 ASSERT(cpuid_checkpass(cpu, 1)); 2437 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2438 } 2439 2440 int 2441 cpuid_get_chipid(cpu_t *cpu) 2442 { 2443 ASSERT(cpuid_checkpass(cpu, 1)); 2444 2445 if (cpuid_is_cmt(cpu)) 2446 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2447 return (cpu->cpu_id); 2448 } 2449 2450 id_t 2451 cpuid_get_coreid(cpu_t *cpu) 2452 { 2453 ASSERT(cpuid_checkpass(cpu, 1)); 2454 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2455 } 2456 2457 int 2458 cpuid_get_pkgcoreid(cpu_t *cpu) 2459 { 2460 ASSERT(cpuid_checkpass(cpu, 1)); 2461 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2462 } 2463 2464 int 2465 cpuid_get_clogid(cpu_t *cpu) 2466 { 2467 ASSERT(cpuid_checkpass(cpu, 1)); 2468 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2469 } 2470 2471 void 2472 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2473 { 2474 struct cpuid_info *cpi; 2475 2476 if (cpu == NULL) 2477 cpu = CPU; 2478 cpi = cpu->cpu_m.mcpu_cpi; 2479 2480 ASSERT(cpuid_checkpass(cpu, 1)); 2481 2482 if (pabits) 2483 *pabits = cpi->cpi_pabits; 2484 if (vabits) 2485 *vabits = cpi->cpi_vabits; 2486 } 2487 2488 /* 2489 * Returns the number of data TLB entries for a corresponding 2490 * pagesize. If it can't be computed, or isn't known, the 2491 * routine returns zero. If you ask about an architecturally 2492 * impossible pagesize, the routine will panic (so that the 2493 * hat implementor knows that things are inconsistent.) 2494 */ 2495 uint_t 2496 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2497 { 2498 struct cpuid_info *cpi; 2499 uint_t dtlb_nent = 0; 2500 2501 if (cpu == NULL) 2502 cpu = CPU; 2503 cpi = cpu->cpu_m.mcpu_cpi; 2504 2505 ASSERT(cpuid_checkpass(cpu, 1)); 2506 2507 /* 2508 * Check the L2 TLB info 2509 */ 2510 if (cpi->cpi_xmaxeax >= 0x80000006) { 2511 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2512 2513 switch (pagesize) { 2514 2515 case 4 * 1024: 2516 /* 2517 * All zero in the top 16 bits of the register 2518 * indicates a unified TLB. Size is in low 16 bits. 2519 */ 2520 if ((cp->cp_ebx & 0xffff0000) == 0) 2521 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2522 else 2523 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2524 break; 2525 2526 case 2 * 1024 * 1024: 2527 if ((cp->cp_eax & 0xffff0000) == 0) 2528 dtlb_nent = cp->cp_eax & 0x0000ffff; 2529 else 2530 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2531 break; 2532 2533 default: 2534 panic("unknown L2 pagesize"); 2535 /*NOTREACHED*/ 2536 } 2537 } 2538 2539 if (dtlb_nent != 0) 2540 return (dtlb_nent); 2541 2542 /* 2543 * No L2 TLB support for this size, try L1. 2544 */ 2545 if (cpi->cpi_xmaxeax >= 0x80000005) { 2546 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2547 2548 switch (pagesize) { 2549 case 4 * 1024: 2550 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2551 break; 2552 case 2 * 1024 * 1024: 2553 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2554 break; 2555 default: 2556 panic("unknown L1 d-TLB pagesize"); 2557 /*NOTREACHED*/ 2558 } 2559 } 2560 2561 return (dtlb_nent); 2562 } 2563 2564 /* 2565 * Return 0 if the erratum is not present or not applicable, positive 2566 * if it is, and negative if the status of the erratum is unknown. 2567 * 2568 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2569 * Processors" #25759, Rev 3.57, August 2005 2570 */ 2571 int 2572 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2573 { 2574 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2575 uint_t eax; 2576 2577 /* 2578 * Bail out if this CPU isn't an AMD CPU, or if it's 2579 * a legacy (32-bit) AMD CPU. 2580 */ 2581 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2582 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2583 cpi->cpi_family == 6) 2584 2585 return (0); 2586 2587 eax = cpi->cpi_std[1].cp_eax; 2588 2589 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2590 #define SH_B3(eax) (eax == 0xf51) 2591 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2592 2593 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2594 2595 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2596 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2597 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2598 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2599 2600 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2601 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2602 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2603 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2604 2605 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2606 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2607 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2608 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2609 #define BH_E4(eax) (eax == 0x20fb1) 2610 #define SH_E5(eax) (eax == 0x20f42) 2611 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2612 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2613 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2614 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2615 DH_E6(eax) || JH_E6(eax)) 2616 2617 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2618 #define DR_B0(eax) (eax == 0x100f20) 2619 #define DR_B1(eax) (eax == 0x100f21) 2620 #define DR_BA(eax) (eax == 0x100f2a) 2621 #define DR_B2(eax) (eax == 0x100f22) 2622 #define DR_B3(eax) (eax == 0x100f23) 2623 #define RB_C0(eax) (eax == 0x100f40) 2624 2625 switch (erratum) { 2626 case 1: 2627 return (cpi->cpi_family < 0x10); 2628 case 51: /* what does the asterisk mean? */ 2629 return (B(eax) || SH_C0(eax) || CG(eax)); 2630 case 52: 2631 return (B(eax)); 2632 case 57: 2633 return (cpi->cpi_family <= 0x11); 2634 case 58: 2635 return (B(eax)); 2636 case 60: 2637 return (cpi->cpi_family <= 0x11); 2638 case 61: 2639 case 62: 2640 case 63: 2641 case 64: 2642 case 65: 2643 case 66: 2644 case 68: 2645 case 69: 2646 case 70: 2647 case 71: 2648 return (B(eax)); 2649 case 72: 2650 return (SH_B0(eax)); 2651 case 74: 2652 return (B(eax)); 2653 case 75: 2654 return (cpi->cpi_family < 0x10); 2655 case 76: 2656 return (B(eax)); 2657 case 77: 2658 return (cpi->cpi_family <= 0x11); 2659 case 78: 2660 return (B(eax) || SH_C0(eax)); 2661 case 79: 2662 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2663 case 80: 2664 case 81: 2665 case 82: 2666 return (B(eax)); 2667 case 83: 2668 return (B(eax) || SH_C0(eax) || CG(eax)); 2669 case 85: 2670 return (cpi->cpi_family < 0x10); 2671 case 86: 2672 return (SH_C0(eax) || CG(eax)); 2673 case 88: 2674 #if !defined(__amd64) 2675 return (0); 2676 #else 2677 return (B(eax) || SH_C0(eax)); 2678 #endif 2679 case 89: 2680 return (cpi->cpi_family < 0x10); 2681 case 90: 2682 return (B(eax) || SH_C0(eax) || CG(eax)); 2683 case 91: 2684 case 92: 2685 return (B(eax) || SH_C0(eax)); 2686 case 93: 2687 return (SH_C0(eax)); 2688 case 94: 2689 return (B(eax) || SH_C0(eax) || CG(eax)); 2690 case 95: 2691 #if !defined(__amd64) 2692 return (0); 2693 #else 2694 return (B(eax) || SH_C0(eax)); 2695 #endif 2696 case 96: 2697 return (B(eax) || SH_C0(eax) || CG(eax)); 2698 case 97: 2699 case 98: 2700 return (SH_C0(eax) || CG(eax)); 2701 case 99: 2702 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2703 case 100: 2704 return (B(eax) || SH_C0(eax)); 2705 case 101: 2706 case 103: 2707 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2708 case 104: 2709 return (SH_C0(eax) || CG(eax) || D0(eax)); 2710 case 105: 2711 case 106: 2712 case 107: 2713 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2714 case 108: 2715 return (DH_CG(eax)); 2716 case 109: 2717 return (SH_C0(eax) || CG(eax) || D0(eax)); 2718 case 110: 2719 return (D0(eax) || EX(eax)); 2720 case 111: 2721 return (CG(eax)); 2722 case 112: 2723 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2724 case 113: 2725 return (eax == 0x20fc0); 2726 case 114: 2727 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2728 case 115: 2729 return (SH_E0(eax) || JH_E1(eax)); 2730 case 116: 2731 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2732 case 117: 2733 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2734 case 118: 2735 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2736 JH_E6(eax)); 2737 case 121: 2738 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2739 case 122: 2740 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2741 case 123: 2742 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2743 case 131: 2744 return (cpi->cpi_family < 0x10); 2745 case 6336786: 2746 /* 2747 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2748 * if this is a K8 family or newer processor 2749 */ 2750 if (CPI_FAMILY(cpi) == 0xf) { 2751 struct cpuid_regs regs; 2752 regs.cp_eax = 0x80000007; 2753 (void) __cpuid_insn(®s); 2754 return (!(regs.cp_edx & 0x100)); 2755 } 2756 return (0); 2757 case 6323525: 2758 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2759 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2760 2761 case 6671130: 2762 /* 2763 * check for processors (pre-Shanghai) that do not provide 2764 * optimal management of 1gb ptes in its tlb. 2765 */ 2766 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 2767 2768 case 298: 2769 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 2770 DR_B2(eax) || RB_C0(eax)); 2771 2772 default: 2773 return (-1); 2774 2775 } 2776 } 2777 2778 /* 2779 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 2780 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 2781 */ 2782 int 2783 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 2784 { 2785 struct cpuid_info *cpi; 2786 uint_t osvwid; 2787 static int osvwfeature = -1; 2788 uint64_t osvwlength; 2789 2790 2791 cpi = cpu->cpu_m.mcpu_cpi; 2792 2793 /* confirm OSVW supported */ 2794 if (osvwfeature == -1) { 2795 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 2796 } else { 2797 /* assert that osvw feature setting is consistent on all cpus */ 2798 ASSERT(osvwfeature == 2799 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 2800 } 2801 if (!osvwfeature) 2802 return (-1); 2803 2804 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 2805 2806 switch (erratum) { 2807 case 298: /* osvwid is 0 */ 2808 osvwid = 0; 2809 if (osvwlength <= (uint64_t)osvwid) { 2810 /* osvwid 0 is unknown */ 2811 return (-1); 2812 } 2813 2814 /* 2815 * Check the OSVW STATUS MSR to determine the state 2816 * of the erratum where: 2817 * 0 - fixed by HW 2818 * 1 - BIOS has applied the workaround when BIOS 2819 * workaround is available. (Or for other errata, 2820 * OS workaround is required.) 2821 * For a value of 1, caller will confirm that the 2822 * erratum 298 workaround has indeed been applied by BIOS. 2823 * 2824 * A 1 may be set in cpus that have a HW fix 2825 * in a mixed cpu system. Regarding erratum 298: 2826 * In a multiprocessor platform, the workaround above 2827 * should be applied to all processors regardless of 2828 * silicon revision when an affected processor is 2829 * present. 2830 */ 2831 2832 return (rdmsr(MSR_AMD_OSVW_STATUS + 2833 (osvwid / OSVW_ID_CNT_PER_MSR)) & 2834 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 2835 2836 default: 2837 return (-1); 2838 } 2839 } 2840 2841 static const char assoc_str[] = "associativity"; 2842 static const char line_str[] = "line-size"; 2843 static const char size_str[] = "size"; 2844 2845 static void 2846 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2847 uint32_t val) 2848 { 2849 char buf[128]; 2850 2851 /* 2852 * ndi_prop_update_int() is used because it is desirable for 2853 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2854 */ 2855 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2856 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2857 } 2858 2859 /* 2860 * Intel-style cache/tlb description 2861 * 2862 * Standard cpuid level 2 gives a randomly ordered 2863 * selection of tags that index into a table that describes 2864 * cache and tlb properties. 2865 */ 2866 2867 static const char l1_icache_str[] = "l1-icache"; 2868 static const char l1_dcache_str[] = "l1-dcache"; 2869 static const char l2_cache_str[] = "l2-cache"; 2870 static const char l3_cache_str[] = "l3-cache"; 2871 static const char itlb4k_str[] = "itlb-4K"; 2872 static const char dtlb4k_str[] = "dtlb-4K"; 2873 static const char itlb2M_str[] = "itlb-2M"; 2874 static const char itlb4M_str[] = "itlb-4M"; 2875 static const char dtlb4M_str[] = "dtlb-4M"; 2876 static const char dtlb24_str[] = "dtlb0-2M-4M"; 2877 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2878 static const char itlb24_str[] = "itlb-2M-4M"; 2879 static const char dtlb44_str[] = "dtlb-4K-4M"; 2880 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2881 static const char sl2_cache_str[] = "sectored-l2-cache"; 2882 static const char itrace_str[] = "itrace-cache"; 2883 static const char sl3_cache_str[] = "sectored-l3-cache"; 2884 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 2885 2886 static const struct cachetab { 2887 uint8_t ct_code; 2888 uint8_t ct_assoc; 2889 uint16_t ct_line_size; 2890 size_t ct_size; 2891 const char *ct_label; 2892 } intel_ctab[] = { 2893 /* 2894 * maintain descending order! 2895 * 2896 * Codes ignored - Reason 2897 * ---------------------- 2898 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 2899 * f0H/f1H - Currently we do not interpret prefetch size by design 2900 */ 2901 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 2902 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 2903 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 2904 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 2905 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 2906 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 2907 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 2908 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 2909 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 2910 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 2911 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 2912 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 2913 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 2914 { 0xc0, 4, 0, 8, dtlb44_str }, 2915 { 0xba, 4, 0, 64, dtlb4k_str }, 2916 { 0xb4, 4, 0, 256, dtlb4k_str }, 2917 { 0xb3, 4, 0, 128, dtlb4k_str }, 2918 { 0xb2, 4, 0, 64, itlb4k_str }, 2919 { 0xb0, 4, 0, 128, itlb4k_str }, 2920 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2921 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2922 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2923 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2924 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2925 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2926 { 0x80, 8, 64, 512*1024, l2_cache_str}, 2927 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2928 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2929 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2930 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2931 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2932 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2933 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2934 { 0x73, 8, 0, 64*1024, itrace_str}, 2935 { 0x72, 8, 0, 32*1024, itrace_str}, 2936 { 0x71, 8, 0, 16*1024, itrace_str}, 2937 { 0x70, 8, 0, 12*1024, itrace_str}, 2938 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2939 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2940 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2941 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2942 { 0x5d, 0, 0, 256, dtlb44_str}, 2943 { 0x5c, 0, 0, 128, dtlb44_str}, 2944 { 0x5b, 0, 0, 64, dtlb44_str}, 2945 { 0x5a, 4, 0, 32, dtlb24_str}, 2946 { 0x59, 0, 0, 16, dtlb4k_str}, 2947 { 0x57, 4, 0, 16, dtlb4k_str}, 2948 { 0x56, 4, 0, 16, dtlb4M_str}, 2949 { 0x55, 0, 0, 7, itlb24_str}, 2950 { 0x52, 0, 0, 256, itlb424_str}, 2951 { 0x51, 0, 0, 128, itlb424_str}, 2952 { 0x50, 0, 0, 64, itlb424_str}, 2953 { 0x4f, 0, 0, 32, itlb4k_str}, 2954 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 2955 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 2956 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 2957 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 2958 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 2959 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 2960 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 2961 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 2962 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 2963 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 2964 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 2965 { 0x43, 4, 32, 512*1024, l2_cache_str}, 2966 { 0x42, 4, 32, 256*1024, l2_cache_str}, 2967 { 0x41, 4, 32, 128*1024, l2_cache_str}, 2968 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 2969 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 2970 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 2971 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 2972 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 2973 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 2974 { 0x30, 8, 64, 32*1024, l1_icache_str}, 2975 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 2976 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 2977 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 2978 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 2979 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 2980 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 2981 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 2982 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 2983 { 0x0b, 4, 0, 4, itlb4M_str}, 2984 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 2985 { 0x08, 4, 32, 16*1024, l1_icache_str}, 2986 { 0x06, 4, 32, 8*1024, l1_icache_str}, 2987 { 0x05, 4, 0, 32, dtlb4M_str}, 2988 { 0x04, 4, 0, 8, dtlb4M_str}, 2989 { 0x03, 4, 0, 64, dtlb4k_str}, 2990 { 0x02, 4, 0, 2, itlb4M_str}, 2991 { 0x01, 4, 0, 32, itlb4k_str}, 2992 { 0 } 2993 }; 2994 2995 static const struct cachetab cyrix_ctab[] = { 2996 { 0x70, 4, 0, 32, "tlb-4K" }, 2997 { 0x80, 4, 16, 16*1024, "l1-cache" }, 2998 { 0 } 2999 }; 3000 3001 /* 3002 * Search a cache table for a matching entry 3003 */ 3004 static const struct cachetab * 3005 find_cacheent(const struct cachetab *ct, uint_t code) 3006 { 3007 if (code != 0) { 3008 for (; ct->ct_code != 0; ct++) 3009 if (ct->ct_code <= code) 3010 break; 3011 if (ct->ct_code == code) 3012 return (ct); 3013 } 3014 return (NULL); 3015 } 3016 3017 /* 3018 * Populate cachetab entry with L2 or L3 cache-information using 3019 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3020 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3021 * information is found. 3022 */ 3023 static int 3024 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3025 { 3026 uint32_t level, i; 3027 int ret = 0; 3028 3029 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3030 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3031 3032 if (level == 2 || level == 3) { 3033 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3034 ct->ct_line_size = 3035 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3036 ct->ct_size = ct->ct_assoc * 3037 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3038 ct->ct_line_size * 3039 (cpi->cpi_std_4[i]->cp_ecx + 1); 3040 3041 if (level == 2) { 3042 ct->ct_label = l2_cache_str; 3043 } else if (level == 3) { 3044 ct->ct_label = l3_cache_str; 3045 } 3046 ret = 1; 3047 } 3048 } 3049 3050 return (ret); 3051 } 3052 3053 /* 3054 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3055 * The walk is terminated if the walker returns non-zero. 3056 */ 3057 static void 3058 intel_walk_cacheinfo(struct cpuid_info *cpi, 3059 void *arg, int (*func)(void *, const struct cachetab *)) 3060 { 3061 const struct cachetab *ct; 3062 struct cachetab des_49_ct, des_b1_ct; 3063 uint8_t *dp; 3064 int i; 3065 3066 if ((dp = cpi->cpi_cacheinfo) == NULL) 3067 return; 3068 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3069 /* 3070 * For overloaded descriptor 0x49 we use cpuid function 4 3071 * if supported by the current processor, to create 3072 * cache information. 3073 * For overloaded descriptor 0xb1 we use X86_PAE flag 3074 * to disambiguate the cache information. 3075 */ 3076 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3077 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3078 ct = &des_49_ct; 3079 } else if (*dp == 0xb1) { 3080 des_b1_ct.ct_code = 0xb1; 3081 des_b1_ct.ct_assoc = 4; 3082 des_b1_ct.ct_line_size = 0; 3083 if (x86_feature & X86_PAE) { 3084 des_b1_ct.ct_size = 8; 3085 des_b1_ct.ct_label = itlb2M_str; 3086 } else { 3087 des_b1_ct.ct_size = 4; 3088 des_b1_ct.ct_label = itlb4M_str; 3089 } 3090 ct = &des_b1_ct; 3091 } else { 3092 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3093 continue; 3094 } 3095 } 3096 3097 if (func(arg, ct) != 0) { 3098 break; 3099 } 3100 } 3101 } 3102 3103 /* 3104 * (Like the Intel one, except for Cyrix CPUs) 3105 */ 3106 static void 3107 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3108 void *arg, int (*func)(void *, const struct cachetab *)) 3109 { 3110 const struct cachetab *ct; 3111 uint8_t *dp; 3112 int i; 3113 3114 if ((dp = cpi->cpi_cacheinfo) == NULL) 3115 return; 3116 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3117 /* 3118 * Search Cyrix-specific descriptor table first .. 3119 */ 3120 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3121 if (func(arg, ct) != 0) 3122 break; 3123 continue; 3124 } 3125 /* 3126 * .. else fall back to the Intel one 3127 */ 3128 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3129 if (func(arg, ct) != 0) 3130 break; 3131 continue; 3132 } 3133 } 3134 } 3135 3136 /* 3137 * A cacheinfo walker that adds associativity, line-size, and size properties 3138 * to the devinfo node it is passed as an argument. 3139 */ 3140 static int 3141 add_cacheent_props(void *arg, const struct cachetab *ct) 3142 { 3143 dev_info_t *devi = arg; 3144 3145 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3146 if (ct->ct_line_size != 0) 3147 add_cache_prop(devi, ct->ct_label, line_str, 3148 ct->ct_line_size); 3149 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3150 return (0); 3151 } 3152 3153 3154 static const char fully_assoc[] = "fully-associative?"; 3155 3156 /* 3157 * AMD style cache/tlb description 3158 * 3159 * Extended functions 5 and 6 directly describe properties of 3160 * tlbs and various cache levels. 3161 */ 3162 static void 3163 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3164 { 3165 switch (assoc) { 3166 case 0: /* reserved; ignore */ 3167 break; 3168 default: 3169 add_cache_prop(devi, label, assoc_str, assoc); 3170 break; 3171 case 0xff: 3172 add_cache_prop(devi, label, fully_assoc, 1); 3173 break; 3174 } 3175 } 3176 3177 static void 3178 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3179 { 3180 if (size == 0) 3181 return; 3182 add_cache_prop(devi, label, size_str, size); 3183 add_amd_assoc(devi, label, assoc); 3184 } 3185 3186 static void 3187 add_amd_cache(dev_info_t *devi, const char *label, 3188 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3189 { 3190 if (size == 0 || line_size == 0) 3191 return; 3192 add_amd_assoc(devi, label, assoc); 3193 /* 3194 * Most AMD parts have a sectored cache. Multiple cache lines are 3195 * associated with each tag. A sector consists of all cache lines 3196 * associated with a tag. For example, the AMD K6-III has a sector 3197 * size of 2 cache lines per tag. 3198 */ 3199 if (lines_per_tag != 0) 3200 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3201 add_cache_prop(devi, label, line_str, line_size); 3202 add_cache_prop(devi, label, size_str, size * 1024); 3203 } 3204 3205 static void 3206 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3207 { 3208 switch (assoc) { 3209 case 0: /* off */ 3210 break; 3211 case 1: 3212 case 2: 3213 case 4: 3214 add_cache_prop(devi, label, assoc_str, assoc); 3215 break; 3216 case 6: 3217 add_cache_prop(devi, label, assoc_str, 8); 3218 break; 3219 case 8: 3220 add_cache_prop(devi, label, assoc_str, 16); 3221 break; 3222 case 0xf: 3223 add_cache_prop(devi, label, fully_assoc, 1); 3224 break; 3225 default: /* reserved; ignore */ 3226 break; 3227 } 3228 } 3229 3230 static void 3231 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3232 { 3233 if (size == 0 || assoc == 0) 3234 return; 3235 add_amd_l2_assoc(devi, label, assoc); 3236 add_cache_prop(devi, label, size_str, size); 3237 } 3238 3239 static void 3240 add_amd_l2_cache(dev_info_t *devi, const char *label, 3241 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3242 { 3243 if (size == 0 || assoc == 0 || line_size == 0) 3244 return; 3245 add_amd_l2_assoc(devi, label, assoc); 3246 if (lines_per_tag != 0) 3247 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3248 add_cache_prop(devi, label, line_str, line_size); 3249 add_cache_prop(devi, label, size_str, size * 1024); 3250 } 3251 3252 static void 3253 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3254 { 3255 struct cpuid_regs *cp; 3256 3257 if (cpi->cpi_xmaxeax < 0x80000005) 3258 return; 3259 cp = &cpi->cpi_extd[5]; 3260 3261 /* 3262 * 4M/2M L1 TLB configuration 3263 * 3264 * We report the size for 2M pages because AMD uses two 3265 * TLB entries for one 4M page. 3266 */ 3267 add_amd_tlb(devi, "dtlb-2M", 3268 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3269 add_amd_tlb(devi, "itlb-2M", 3270 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3271 3272 /* 3273 * 4K L1 TLB configuration 3274 */ 3275 3276 switch (cpi->cpi_vendor) { 3277 uint_t nentries; 3278 case X86_VENDOR_TM: 3279 if (cpi->cpi_family >= 5) { 3280 /* 3281 * Crusoe processors have 256 TLB entries, but 3282 * cpuid data format constrains them to only 3283 * reporting 255 of them. 3284 */ 3285 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3286 nentries = 256; 3287 /* 3288 * Crusoe processors also have a unified TLB 3289 */ 3290 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3291 nentries); 3292 break; 3293 } 3294 /*FALLTHROUGH*/ 3295 default: 3296 add_amd_tlb(devi, itlb4k_str, 3297 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3298 add_amd_tlb(devi, dtlb4k_str, 3299 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3300 break; 3301 } 3302 3303 /* 3304 * data L1 cache configuration 3305 */ 3306 3307 add_amd_cache(devi, l1_dcache_str, 3308 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3309 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3310 3311 /* 3312 * code L1 cache configuration 3313 */ 3314 3315 add_amd_cache(devi, l1_icache_str, 3316 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3317 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3318 3319 if (cpi->cpi_xmaxeax < 0x80000006) 3320 return; 3321 cp = &cpi->cpi_extd[6]; 3322 3323 /* Check for a unified L2 TLB for large pages */ 3324 3325 if (BITX(cp->cp_eax, 31, 16) == 0) 3326 add_amd_l2_tlb(devi, "l2-tlb-2M", 3327 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3328 else { 3329 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3330 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3331 add_amd_l2_tlb(devi, "l2-itlb-2M", 3332 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3333 } 3334 3335 /* Check for a unified L2 TLB for 4K pages */ 3336 3337 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3338 add_amd_l2_tlb(devi, "l2-tlb-4K", 3339 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3340 } else { 3341 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3342 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3343 add_amd_l2_tlb(devi, "l2-itlb-4K", 3344 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3345 } 3346 3347 add_amd_l2_cache(devi, l2_cache_str, 3348 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3349 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3350 } 3351 3352 /* 3353 * There are two basic ways that the x86 world describes it cache 3354 * and tlb architecture - Intel's way and AMD's way. 3355 * 3356 * Return which flavor of cache architecture we should use 3357 */ 3358 static int 3359 x86_which_cacheinfo(struct cpuid_info *cpi) 3360 { 3361 switch (cpi->cpi_vendor) { 3362 case X86_VENDOR_Intel: 3363 if (cpi->cpi_maxeax >= 2) 3364 return (X86_VENDOR_Intel); 3365 break; 3366 case X86_VENDOR_AMD: 3367 /* 3368 * The K5 model 1 was the first part from AMD that reported 3369 * cache sizes via extended cpuid functions. 3370 */ 3371 if (cpi->cpi_family > 5 || 3372 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3373 return (X86_VENDOR_AMD); 3374 break; 3375 case X86_VENDOR_TM: 3376 if (cpi->cpi_family >= 5) 3377 return (X86_VENDOR_AMD); 3378 /*FALLTHROUGH*/ 3379 default: 3380 /* 3381 * If they have extended CPU data for 0x80000005 3382 * then we assume they have AMD-format cache 3383 * information. 3384 * 3385 * If not, and the vendor happens to be Cyrix, 3386 * then try our-Cyrix specific handler. 3387 * 3388 * If we're not Cyrix, then assume we're using Intel's 3389 * table-driven format instead. 3390 */ 3391 if (cpi->cpi_xmaxeax >= 0x80000005) 3392 return (X86_VENDOR_AMD); 3393 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3394 return (X86_VENDOR_Cyrix); 3395 else if (cpi->cpi_maxeax >= 2) 3396 return (X86_VENDOR_Intel); 3397 break; 3398 } 3399 return (-1); 3400 } 3401 3402 /* 3403 * create a node for the given cpu under the prom root node. 3404 * Also, create a cpu node in the device tree. 3405 */ 3406 static dev_info_t *cpu_nex_devi = NULL; 3407 static kmutex_t cpu_node_lock; 3408 3409 /* 3410 * Called from post_startup() and mp_startup() 3411 */ 3412 void 3413 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3414 { 3415 dev_info_t *cpu_devi; 3416 int create; 3417 3418 mutex_enter(&cpu_node_lock); 3419 3420 /* 3421 * create a nexus node for all cpus identified as 'cpu_id' under 3422 * the root node. 3423 */ 3424 if (cpu_nex_devi == NULL) { 3425 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3426 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3427 mutex_exit(&cpu_node_lock); 3428 return; 3429 } 3430 (void) ndi_devi_online(cpu_nex_devi, 0); 3431 } 3432 3433 /* 3434 * create a child node for cpu identified as 'cpu_id' 3435 */ 3436 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3437 cpu_id); 3438 if (cpu_devi == NULL) { 3439 mutex_exit(&cpu_node_lock); 3440 return; 3441 } 3442 3443 /* device_type */ 3444 3445 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3446 "device_type", "cpu"); 3447 3448 /* reg */ 3449 3450 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3451 "reg", cpu_id); 3452 3453 /* cpu-mhz, and clock-frequency */ 3454 3455 if (cpu_freq > 0) { 3456 long long mul; 3457 3458 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3459 "cpu-mhz", cpu_freq); 3460 3461 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3462 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3463 "clock-frequency", (int)mul); 3464 } 3465 3466 (void) ndi_devi_online(cpu_devi, 0); 3467 3468 if ((x86_feature & X86_CPUID) == 0) { 3469 mutex_exit(&cpu_node_lock); 3470 return; 3471 } 3472 3473 /* vendor-id */ 3474 3475 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3476 "vendor-id", cpi->cpi_vendorstr); 3477 3478 if (cpi->cpi_maxeax == 0) { 3479 mutex_exit(&cpu_node_lock); 3480 return; 3481 } 3482 3483 /* 3484 * family, model, and step 3485 */ 3486 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3487 "family", CPI_FAMILY(cpi)); 3488 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3489 "cpu-model", CPI_MODEL(cpi)); 3490 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3491 "stepping-id", CPI_STEP(cpi)); 3492 3493 /* type */ 3494 3495 switch (cpi->cpi_vendor) { 3496 case X86_VENDOR_Intel: 3497 create = 1; 3498 break; 3499 default: 3500 create = 0; 3501 break; 3502 } 3503 if (create) 3504 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3505 "type", CPI_TYPE(cpi)); 3506 3507 /* ext-family */ 3508 3509 switch (cpi->cpi_vendor) { 3510 case X86_VENDOR_Intel: 3511 case X86_VENDOR_AMD: 3512 create = cpi->cpi_family >= 0xf; 3513 break; 3514 default: 3515 create = 0; 3516 break; 3517 } 3518 if (create) 3519 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3520 "ext-family", CPI_FAMILY_XTD(cpi)); 3521 3522 /* ext-model */ 3523 3524 switch (cpi->cpi_vendor) { 3525 case X86_VENDOR_Intel: 3526 create = IS_EXTENDED_MODEL_INTEL(cpi); 3527 break; 3528 case X86_VENDOR_AMD: 3529 create = CPI_FAMILY(cpi) == 0xf; 3530 break; 3531 default: 3532 create = 0; 3533 break; 3534 } 3535 if (create) 3536 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3537 "ext-model", CPI_MODEL_XTD(cpi)); 3538 3539 /* generation */ 3540 3541 switch (cpi->cpi_vendor) { 3542 case X86_VENDOR_AMD: 3543 /* 3544 * AMD K5 model 1 was the first part to support this 3545 */ 3546 create = cpi->cpi_xmaxeax >= 0x80000001; 3547 break; 3548 default: 3549 create = 0; 3550 break; 3551 } 3552 if (create) 3553 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3554 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3555 3556 /* brand-id */ 3557 3558 switch (cpi->cpi_vendor) { 3559 case X86_VENDOR_Intel: 3560 /* 3561 * brand id first appeared on Pentium III Xeon model 8, 3562 * and Celeron model 8 processors and Opteron 3563 */ 3564 create = cpi->cpi_family > 6 || 3565 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3566 break; 3567 case X86_VENDOR_AMD: 3568 create = cpi->cpi_family >= 0xf; 3569 break; 3570 default: 3571 create = 0; 3572 break; 3573 } 3574 if (create && cpi->cpi_brandid != 0) { 3575 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3576 "brand-id", cpi->cpi_brandid); 3577 } 3578 3579 /* chunks, and apic-id */ 3580 3581 switch (cpi->cpi_vendor) { 3582 /* 3583 * first available on Pentium IV and Opteron (K8) 3584 */ 3585 case X86_VENDOR_Intel: 3586 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3587 break; 3588 case X86_VENDOR_AMD: 3589 create = cpi->cpi_family >= 0xf; 3590 break; 3591 default: 3592 create = 0; 3593 break; 3594 } 3595 if (create) { 3596 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3597 "chunks", CPI_CHUNKS(cpi)); 3598 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3599 "apic-id", cpi->cpi_apicid); 3600 if (cpi->cpi_chipid >= 0) { 3601 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3602 "chip#", cpi->cpi_chipid); 3603 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3604 "clog#", cpi->cpi_clogid); 3605 } 3606 } 3607 3608 /* cpuid-features */ 3609 3610 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3611 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3612 3613 3614 /* cpuid-features-ecx */ 3615 3616 switch (cpi->cpi_vendor) { 3617 case X86_VENDOR_Intel: 3618 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3619 break; 3620 default: 3621 create = 0; 3622 break; 3623 } 3624 if (create) 3625 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3626 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3627 3628 /* ext-cpuid-features */ 3629 3630 switch (cpi->cpi_vendor) { 3631 case X86_VENDOR_Intel: 3632 case X86_VENDOR_AMD: 3633 case X86_VENDOR_Cyrix: 3634 case X86_VENDOR_TM: 3635 case X86_VENDOR_Centaur: 3636 create = cpi->cpi_xmaxeax >= 0x80000001; 3637 break; 3638 default: 3639 create = 0; 3640 break; 3641 } 3642 if (create) { 3643 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3644 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3645 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3646 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3647 } 3648 3649 /* 3650 * Brand String first appeared in Intel Pentium IV, AMD K5 3651 * model 1, and Cyrix GXm. On earlier models we try and 3652 * simulate something similar .. so this string should always 3653 * same -something- about the processor, however lame. 3654 */ 3655 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3656 "brand-string", cpi->cpi_brandstr); 3657 3658 /* 3659 * Finally, cache and tlb information 3660 */ 3661 switch (x86_which_cacheinfo(cpi)) { 3662 case X86_VENDOR_Intel: 3663 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3664 break; 3665 case X86_VENDOR_Cyrix: 3666 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3667 break; 3668 case X86_VENDOR_AMD: 3669 amd_cache_info(cpi, cpu_devi); 3670 break; 3671 default: 3672 break; 3673 } 3674 3675 mutex_exit(&cpu_node_lock); 3676 } 3677 3678 struct l2info { 3679 int *l2i_csz; 3680 int *l2i_lsz; 3681 int *l2i_assoc; 3682 int l2i_ret; 3683 }; 3684 3685 /* 3686 * A cacheinfo walker that fetches the size, line-size and associativity 3687 * of the L2 cache 3688 */ 3689 static int 3690 intel_l2cinfo(void *arg, const struct cachetab *ct) 3691 { 3692 struct l2info *l2i = arg; 3693 int *ip; 3694 3695 if (ct->ct_label != l2_cache_str && 3696 ct->ct_label != sl2_cache_str) 3697 return (0); /* not an L2 -- keep walking */ 3698 3699 if ((ip = l2i->l2i_csz) != NULL) 3700 *ip = ct->ct_size; 3701 if ((ip = l2i->l2i_lsz) != NULL) 3702 *ip = ct->ct_line_size; 3703 if ((ip = l2i->l2i_assoc) != NULL) 3704 *ip = ct->ct_assoc; 3705 l2i->l2i_ret = ct->ct_size; 3706 return (1); /* was an L2 -- terminate walk */ 3707 } 3708 3709 /* 3710 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3711 * 3712 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3713 * value is the associativity, the associativity for the L2 cache and 3714 * tlb is encoded in the following table. The 4 bit L2 value serves as 3715 * an index into the amd_afd[] array to determine the associativity. 3716 * -1 is undefined. 0 is fully associative. 3717 */ 3718 3719 static int amd_afd[] = 3720 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3721 3722 static void 3723 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3724 { 3725 struct cpuid_regs *cp; 3726 uint_t size, assoc; 3727 int i; 3728 int *ip; 3729 3730 if (cpi->cpi_xmaxeax < 0x80000006) 3731 return; 3732 cp = &cpi->cpi_extd[6]; 3733 3734 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3735 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3736 uint_t cachesz = size * 1024; 3737 assoc = amd_afd[i]; 3738 3739 ASSERT(assoc != -1); 3740 3741 if ((ip = l2i->l2i_csz) != NULL) 3742 *ip = cachesz; 3743 if ((ip = l2i->l2i_lsz) != NULL) 3744 *ip = BITX(cp->cp_ecx, 7, 0); 3745 if ((ip = l2i->l2i_assoc) != NULL) 3746 *ip = assoc; 3747 l2i->l2i_ret = cachesz; 3748 } 3749 } 3750 3751 int 3752 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3753 { 3754 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3755 struct l2info __l2info, *l2i = &__l2info; 3756 3757 l2i->l2i_csz = csz; 3758 l2i->l2i_lsz = lsz; 3759 l2i->l2i_assoc = assoc; 3760 l2i->l2i_ret = -1; 3761 3762 switch (x86_which_cacheinfo(cpi)) { 3763 case X86_VENDOR_Intel: 3764 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3765 break; 3766 case X86_VENDOR_Cyrix: 3767 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3768 break; 3769 case X86_VENDOR_AMD: 3770 amd_l2cacheinfo(cpi, l2i); 3771 break; 3772 default: 3773 break; 3774 } 3775 return (l2i->l2i_ret); 3776 } 3777 3778 #if !defined(__xpv) 3779 3780 uint32_t * 3781 cpuid_mwait_alloc(cpu_t *cpu) 3782 { 3783 uint32_t *ret; 3784 size_t mwait_size; 3785 3786 ASSERT(cpuid_checkpass(cpu, 2)); 3787 3788 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3789 if (mwait_size == 0) 3790 return (NULL); 3791 3792 /* 3793 * kmem_alloc() returns cache line size aligned data for mwait_size 3794 * allocations. mwait_size is currently cache line sized. Neither 3795 * of these implementation details are guarantied to be true in the 3796 * future. 3797 * 3798 * First try allocating mwait_size as kmem_alloc() currently returns 3799 * correctly aligned memory. If kmem_alloc() does not return 3800 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3801 * 3802 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3803 * decide to free this memory. 3804 */ 3805 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3806 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3807 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3808 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3809 *ret = MWAIT_RUNNING; 3810 return (ret); 3811 } else { 3812 kmem_free(ret, mwait_size); 3813 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3814 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3815 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3816 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3817 *ret = MWAIT_RUNNING; 3818 return (ret); 3819 } 3820 } 3821 3822 void 3823 cpuid_mwait_free(cpu_t *cpu) 3824 { 3825 ASSERT(cpuid_checkpass(cpu, 2)); 3826 3827 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3828 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3829 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3830 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3831 } 3832 3833 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3834 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3835 } 3836 3837 void 3838 patch_tsc_read(int flag) 3839 { 3840 size_t cnt; 3841 3842 switch (flag) { 3843 case X86_NO_TSC: 3844 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 3845 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 3846 break; 3847 case X86_HAVE_TSCP: 3848 cnt = &_tscp_end - &_tscp_start; 3849 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 3850 break; 3851 case X86_TSC_MFENCE: 3852 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 3853 (void) memcpy((void *)tsc_read, 3854 (void *)&_tsc_mfence_start, cnt); 3855 break; 3856 case X86_TSC_LFENCE: 3857 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 3858 (void) memcpy((void *)tsc_read, 3859 (void *)&_tsc_lfence_start, cnt); 3860 break; 3861 default: 3862 break; 3863 } 3864 } 3865 3866 int 3867 cpuid_deep_cstates_supported(void) 3868 { 3869 struct cpuid_info *cpi; 3870 struct cpuid_regs regs; 3871 3872 ASSERT(cpuid_checkpass(CPU, 1)); 3873 3874 cpi = CPU->cpu_m.mcpu_cpi; 3875 3876 if (!(x86_feature & X86_CPUID)) 3877 return (0); 3878 3879 switch (cpi->cpi_vendor) { 3880 case X86_VENDOR_Intel: 3881 if (cpi->cpi_xmaxeax < 0x80000007) 3882 return (0); 3883 3884 /* 3885 * TSC run at a constant rate in all ACPI C-states? 3886 */ 3887 regs.cp_eax = 0x80000007; 3888 (void) __cpuid_insn(®s); 3889 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 3890 3891 default: 3892 return (0); 3893 } 3894 } 3895 3896 #if defined(__amd64) && !defined(__xpv) 3897 /* 3898 * Patch in versions of bcopy for high performance Intel Nhm processors 3899 * and later... 3900 */ 3901 void 3902 patch_memops(uint_t vendor) 3903 { 3904 size_t cnt, i; 3905 caddr_t to, from; 3906 3907 if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) { 3908 cnt = &bcopy_patch_end - &bcopy_patch_start; 3909 to = &bcopy_ck_size; 3910 from = &bcopy_patch_start; 3911 for (i = 0; i < cnt; i++) { 3912 *to++ = *from++; 3913 } 3914 } 3915 } 3916 #endif /* __amd64 && !__xpv */ 3917 3918 #endif /* !__xpv */ 3919