1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Various routines to handle identification 28 * and classification of x86 processors. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/archsystm.h> 33 #include <sys/x86_archext.h> 34 #include <sys/kmem.h> 35 #include <sys/systm.h> 36 #include <sys/cmn_err.h> 37 #include <sys/sunddi.h> 38 #include <sys/sunndi.h> 39 #include <sys/cpuvar.h> 40 #include <sys/processor.h> 41 #include <sys/sysmacros.h> 42 #include <sys/pg.h> 43 #include <sys/fp.h> 44 #include <sys/controlregs.h> 45 #include <sys/auxv_386.h> 46 #include <sys/bitmap.h> 47 #include <sys/memnode.h> 48 49 #ifdef __xpv 50 #include <sys/hypervisor.h> 51 #endif 52 53 /* 54 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 55 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 56 * them accordingly. For most modern processors, feature detection occurs here 57 * in pass 1. 58 * 59 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 60 * for the boot CPU and does the basic analysis that the early kernel needs. 61 * x86_feature is set based on the return value of cpuid_pass1() of the boot 62 * CPU. 63 * 64 * Pass 1 includes: 65 * 66 * o Determining vendor/model/family/stepping and setting x86_type and 67 * x86_vendor accordingly. 68 * o Processing the feature flags returned by the cpuid instruction while 69 * applying any workarounds or tricks for the specific processor. 70 * o Mapping the feature flags into Solaris feature bits (X86_*). 71 * o Processing extended feature flags if supported by the processor, 72 * again while applying specific processor knowledge. 73 * o Determining the CMT characteristics of the system. 74 * 75 * Pass 1 is done on non-boot CPUs during their initialization and the results 76 * are used only as a meager attempt at ensuring that all processors within the 77 * system support the same features. 78 * 79 * Pass 2 of cpuid feature analysis happens just at the beginning 80 * of startup(). It just copies in and corrects the remainder 81 * of the cpuid data we depend on: standard cpuid functions that we didn't 82 * need for pass1 feature analysis, and extended cpuid functions beyond the 83 * simple feature processing done in pass1. 84 * 85 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 86 * particular kernel memory allocation has been made available. It creates a 87 * readable brand string based on the data collected in the first two passes. 88 * 89 * Pass 4 of cpuid analysis is invoked after post_startup() when all 90 * the support infrastructure for various hardware features has been 91 * initialized. It determines which processor features will be reported 92 * to userland via the aux vector. 93 * 94 * All passes are executed on all CPUs, but only the boot CPU determines what 95 * features the kernel will use. 96 * 97 * Much of the worst junk in this file is for the support of processors 98 * that didn't really implement the cpuid instruction properly. 99 * 100 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 101 * the pass numbers. Accordingly, changes to the pass code may require changes 102 * to the accessor code. 103 */ 104 105 uint_t x86_feature = 0; 106 uint_t x86_vendor = X86_VENDOR_IntelClone; 107 uint_t x86_type = X86_TYPE_OTHER; 108 109 uint_t pentiumpro_bug4046376; 110 uint_t pentiumpro_bug4064495; 111 112 uint_t enable486; 113 114 /* 115 * monitor/mwait info. 116 * 117 * size_actual and buf_actual are the real address and size allocated to get 118 * proper mwait_buf alignement. buf_actual and size_actual should be passed 119 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 120 * processor cache-line alignment, but this is not guarantied in the furture. 121 */ 122 struct mwait_info { 123 size_t mon_min; /* min size to avoid missed wakeups */ 124 size_t mon_max; /* size to avoid false wakeups */ 125 size_t size_actual; /* size actually allocated */ 126 void *buf_actual; /* memory actually allocated */ 127 uint32_t support; /* processor support of monitor/mwait */ 128 }; 129 130 /* 131 * These constants determine how many of the elements of the 132 * cpuid we cache in the cpuid_info data structure; the 133 * remaining elements are accessible via the cpuid instruction. 134 */ 135 136 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 137 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 138 139 struct cpuid_info { 140 uint_t cpi_pass; /* last pass completed */ 141 /* 142 * standard function information 143 */ 144 uint_t cpi_maxeax; /* fn 0: %eax */ 145 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 146 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 147 148 uint_t cpi_family; /* fn 1: extended family */ 149 uint_t cpi_model; /* fn 1: extended model */ 150 uint_t cpi_step; /* fn 1: stepping */ 151 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 152 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 153 int cpi_clogid; /* fn 1: %ebx: thread # */ 154 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 155 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 156 uint_t cpi_ncache; /* fn 2: number of elements */ 157 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 158 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 159 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 160 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 161 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 162 /* 163 * extended function information 164 */ 165 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 166 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 167 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 168 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 169 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 170 id_t cpi_coreid; /* same coreid => strands share core */ 171 int cpi_pkgcoreid; /* core number within single package */ 172 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 173 /* Intel: fn 4: %eax[31-26] */ 174 /* 175 * supported feature information 176 */ 177 uint32_t cpi_support[5]; 178 #define STD_EDX_FEATURES 0 179 #define AMD_EDX_FEATURES 1 180 #define TM_EDX_FEATURES 2 181 #define STD_ECX_FEATURES 3 182 #define AMD_ECX_FEATURES 4 183 /* 184 * Synthesized information, where known. 185 */ 186 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 187 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 188 uint32_t cpi_socket; /* Chip package/socket type */ 189 190 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 191 uint32_t cpi_apicid; 192 }; 193 194 195 static struct cpuid_info cpuid_info0; 196 197 /* 198 * These bit fields are defined by the Intel Application Note AP-485 199 * "Intel Processor Identification and the CPUID Instruction" 200 */ 201 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 202 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 203 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 204 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 205 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 206 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 207 208 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 209 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 210 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 211 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 212 213 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 214 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 215 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 216 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 217 218 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 219 #define CPI_XMAXEAX_MAX 0x80000100 220 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 221 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 222 223 /* 224 * Function 4 (Deterministic Cache Parameters) macros 225 * Defined by Intel Application Note AP-485 226 */ 227 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 228 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 229 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 230 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 231 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 232 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 233 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 234 235 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 236 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 237 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 238 239 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 240 241 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 242 243 244 /* 245 * A couple of shorthand macros to identify "later" P6-family chips 246 * like the Pentium M and Core. First, the "older" P6-based stuff 247 * (loosely defined as "pre-Pentium-4"): 248 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 249 */ 250 251 #define IS_LEGACY_P6(cpi) ( \ 252 cpi->cpi_family == 6 && \ 253 (cpi->cpi_model == 1 || \ 254 cpi->cpi_model == 3 || \ 255 cpi->cpi_model == 5 || \ 256 cpi->cpi_model == 6 || \ 257 cpi->cpi_model == 7 || \ 258 cpi->cpi_model == 8 || \ 259 cpi->cpi_model == 0xA || \ 260 cpi->cpi_model == 0xB) \ 261 ) 262 263 /* A "new F6" is everything with family 6 that's not the above */ 264 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 265 266 /* Extended family/model support */ 267 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 268 cpi->cpi_family >= 0xf) 269 270 /* 271 * Info for monitor/mwait idle loop. 272 * 273 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 274 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 275 * 2006. 276 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 277 * Documentation Updates" #33633, Rev 2.05, December 2006. 278 */ 279 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 280 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 281 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 282 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 283 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 284 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 285 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 286 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 287 /* 288 * Number of sub-cstates for a given c-state. 289 */ 290 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 291 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 292 293 /* 294 * Functions we consune from cpuid_subr.c; don't publish these in a header 295 * file to try and keep people using the expected cpuid_* interfaces. 296 */ 297 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 298 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 299 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 300 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 301 302 /* 303 * Apply up various platform-dependent restrictions where the 304 * underlying platform restrictions mean the CPU can be marked 305 * as less capable than its cpuid instruction would imply. 306 */ 307 #if defined(__xpv) 308 static void 309 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 310 { 311 switch (eax) { 312 case 1: { 313 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 314 0 : CPUID_INTC_EDX_MCA; 315 cp->cp_edx &= 316 ~(mcamask | 317 CPUID_INTC_EDX_PSE | 318 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 319 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 320 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 321 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 322 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 323 break; 324 } 325 326 case 0x80000001: 327 cp->cp_edx &= 328 ~(CPUID_AMD_EDX_PSE | 329 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 330 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 331 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 332 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 333 CPUID_AMD_EDX_TSCP); 334 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 335 break; 336 default: 337 break; 338 } 339 340 switch (vendor) { 341 case X86_VENDOR_Intel: 342 switch (eax) { 343 case 4: 344 /* 345 * Zero out the (ncores-per-chip - 1) field 346 */ 347 cp->cp_eax &= 0x03fffffff; 348 break; 349 default: 350 break; 351 } 352 break; 353 case X86_VENDOR_AMD: 354 switch (eax) { 355 case 0x80000008: 356 /* 357 * Zero out the (ncores-per-chip - 1) field 358 */ 359 cp->cp_ecx &= 0xffffff00; 360 break; 361 default: 362 break; 363 } 364 break; 365 default: 366 break; 367 } 368 } 369 #else 370 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 371 #endif 372 373 /* 374 * Some undocumented ways of patching the results of the cpuid 375 * instruction to permit running Solaris 10 on future cpus that 376 * we don't currently support. Could be set to non-zero values 377 * via settings in eeprom. 378 */ 379 380 uint32_t cpuid_feature_ecx_include; 381 uint32_t cpuid_feature_ecx_exclude; 382 uint32_t cpuid_feature_edx_include; 383 uint32_t cpuid_feature_edx_exclude; 384 385 void 386 cpuid_alloc_space(cpu_t *cpu) 387 { 388 /* 389 * By convention, cpu0 is the boot cpu, which is set up 390 * before memory allocation is available. All other cpus get 391 * their cpuid_info struct allocated here. 392 */ 393 ASSERT(cpu->cpu_id != 0); 394 cpu->cpu_m.mcpu_cpi = 395 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 396 } 397 398 void 399 cpuid_free_space(cpu_t *cpu) 400 { 401 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 402 int i; 403 404 ASSERT(cpu->cpu_id != 0); 405 406 /* 407 * Free up any function 4 related dynamic storage 408 */ 409 for (i = 1; i < cpi->cpi_std_4_size; i++) 410 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 411 if (cpi->cpi_std_4_size > 0) 412 kmem_free(cpi->cpi_std_4, 413 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 414 415 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 416 } 417 418 #if !defined(__xpv) 419 420 static void 421 check_for_hvm() 422 { 423 struct cpuid_regs cp; 424 char *xen_str; 425 uint32_t xen_signature[4]; 426 extern int xpv_is_hvm; 427 428 /* 429 * In a fully virtualized domain, Xen's pseudo-cpuid function 430 * 0x40000000 returns a string representing the Xen signature in 431 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 432 * function. 433 */ 434 cp.cp_eax = 0x40000000; 435 (void) __cpuid_insn(&cp); 436 xen_signature[0] = cp.cp_ebx; 437 xen_signature[1] = cp.cp_ecx; 438 xen_signature[2] = cp.cp_edx; 439 xen_signature[3] = 0; 440 xen_str = (char *)xen_signature; 441 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) 442 xpv_is_hvm = 1; 443 } 444 #endif /* __xpv */ 445 446 uint_t 447 cpuid_pass1(cpu_t *cpu) 448 { 449 uint32_t mask_ecx, mask_edx; 450 uint_t feature = X86_CPUID; 451 struct cpuid_info *cpi; 452 struct cpuid_regs *cp; 453 int xcpuid; 454 #if !defined(__xpv) 455 extern int idle_cpu_prefer_mwait; 456 #endif 457 458 /* 459 * Space statically allocated for cpu0, ensure pointer is set 460 */ 461 if (cpu->cpu_id == 0) 462 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 463 cpi = cpu->cpu_m.mcpu_cpi; 464 ASSERT(cpi != NULL); 465 cp = &cpi->cpi_std[0]; 466 cp->cp_eax = 0; 467 cpi->cpi_maxeax = __cpuid_insn(cp); 468 { 469 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 470 *iptr++ = cp->cp_ebx; 471 *iptr++ = cp->cp_edx; 472 *iptr++ = cp->cp_ecx; 473 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 474 } 475 476 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 477 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 478 479 /* 480 * Limit the range in case of weird hardware 481 */ 482 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 483 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 484 if (cpi->cpi_maxeax < 1) 485 goto pass1_done; 486 487 cp = &cpi->cpi_std[1]; 488 cp->cp_eax = 1; 489 (void) __cpuid_insn(cp); 490 491 /* 492 * Extract identifying constants for easy access. 493 */ 494 cpi->cpi_model = CPI_MODEL(cpi); 495 cpi->cpi_family = CPI_FAMILY(cpi); 496 497 if (cpi->cpi_family == 0xf) 498 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 499 500 /* 501 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 502 * Intel, and presumably everyone else, uses model == 0xf, as 503 * one would expect (max value means possible overflow). Sigh. 504 */ 505 506 switch (cpi->cpi_vendor) { 507 case X86_VENDOR_Intel: 508 if (IS_EXTENDED_MODEL_INTEL(cpi)) 509 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 510 break; 511 case X86_VENDOR_AMD: 512 if (CPI_FAMILY(cpi) == 0xf) 513 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 514 break; 515 default: 516 if (cpi->cpi_model == 0xf) 517 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 518 break; 519 } 520 521 cpi->cpi_step = CPI_STEP(cpi); 522 cpi->cpi_brandid = CPI_BRANDID(cpi); 523 524 /* 525 * *default* assumptions: 526 * - believe %edx feature word 527 * - ignore %ecx feature word 528 * - 32-bit virtual and physical addressing 529 */ 530 mask_edx = 0xffffffff; 531 mask_ecx = 0; 532 533 cpi->cpi_pabits = cpi->cpi_vabits = 32; 534 535 switch (cpi->cpi_vendor) { 536 case X86_VENDOR_Intel: 537 if (cpi->cpi_family == 5) 538 x86_type = X86_TYPE_P5; 539 else if (IS_LEGACY_P6(cpi)) { 540 x86_type = X86_TYPE_P6; 541 pentiumpro_bug4046376 = 1; 542 pentiumpro_bug4064495 = 1; 543 /* 544 * Clear the SEP bit when it was set erroneously 545 */ 546 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 547 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 548 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 549 x86_type = X86_TYPE_P4; 550 /* 551 * We don't currently depend on any of the %ecx 552 * features until Prescott, so we'll only check 553 * this from P4 onwards. We might want to revisit 554 * that idea later. 555 */ 556 mask_ecx = 0xffffffff; 557 } else if (cpi->cpi_family > 0xf) 558 mask_ecx = 0xffffffff; 559 /* 560 * We don't support MONITOR/MWAIT if leaf 5 is not available 561 * to obtain the monitor linesize. 562 */ 563 if (cpi->cpi_maxeax < 5) 564 mask_ecx &= ~CPUID_INTC_ECX_MON; 565 break; 566 case X86_VENDOR_IntelClone: 567 default: 568 break; 569 case X86_VENDOR_AMD: 570 #if defined(OPTERON_ERRATUM_108) 571 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 572 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 573 cpi->cpi_model = 0xc; 574 } else 575 #endif 576 if (cpi->cpi_family == 5) { 577 /* 578 * AMD K5 and K6 579 * 580 * These CPUs have an incomplete implementation 581 * of MCA/MCE which we mask away. 582 */ 583 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 584 585 /* 586 * Model 0 uses the wrong (APIC) bit 587 * to indicate PGE. Fix it here. 588 */ 589 if (cpi->cpi_model == 0) { 590 if (cp->cp_edx & 0x200) { 591 cp->cp_edx &= ~0x200; 592 cp->cp_edx |= CPUID_INTC_EDX_PGE; 593 } 594 } 595 596 /* 597 * Early models had problems w/ MMX; disable. 598 */ 599 if (cpi->cpi_model < 6) 600 mask_edx &= ~CPUID_INTC_EDX_MMX; 601 } 602 603 /* 604 * For newer families, SSE3 and CX16, at least, are valid; 605 * enable all 606 */ 607 if (cpi->cpi_family >= 0xf) 608 mask_ecx = 0xffffffff; 609 /* 610 * We don't support MONITOR/MWAIT if leaf 5 is not available 611 * to obtain the monitor linesize. 612 */ 613 if (cpi->cpi_maxeax < 5) 614 mask_ecx &= ~CPUID_INTC_ECX_MON; 615 616 #if !defined(__xpv) 617 /* 618 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 619 * processors. AMD does not intend MWAIT to be used in the cpu 620 * idle loop on current and future processors. 10h and future 621 * AMD processors use more power in MWAIT than HLT. 622 * Pre-family-10h Opterons do not have the MWAIT instruction. 623 */ 624 idle_cpu_prefer_mwait = 0; 625 #endif 626 627 break; 628 case X86_VENDOR_TM: 629 /* 630 * workaround the NT workaround in CMS 4.1 631 */ 632 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 633 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 634 cp->cp_edx |= CPUID_INTC_EDX_CX8; 635 break; 636 case X86_VENDOR_Centaur: 637 /* 638 * workaround the NT workarounds again 639 */ 640 if (cpi->cpi_family == 6) 641 cp->cp_edx |= CPUID_INTC_EDX_CX8; 642 break; 643 case X86_VENDOR_Cyrix: 644 /* 645 * We rely heavily on the probing in locore 646 * to actually figure out what parts, if any, 647 * of the Cyrix cpuid instruction to believe. 648 */ 649 switch (x86_type) { 650 case X86_TYPE_CYRIX_486: 651 mask_edx = 0; 652 break; 653 case X86_TYPE_CYRIX_6x86: 654 mask_edx = 0; 655 break; 656 case X86_TYPE_CYRIX_6x86L: 657 mask_edx = 658 CPUID_INTC_EDX_DE | 659 CPUID_INTC_EDX_CX8; 660 break; 661 case X86_TYPE_CYRIX_6x86MX: 662 mask_edx = 663 CPUID_INTC_EDX_DE | 664 CPUID_INTC_EDX_MSR | 665 CPUID_INTC_EDX_CX8 | 666 CPUID_INTC_EDX_PGE | 667 CPUID_INTC_EDX_CMOV | 668 CPUID_INTC_EDX_MMX; 669 break; 670 case X86_TYPE_CYRIX_GXm: 671 mask_edx = 672 CPUID_INTC_EDX_MSR | 673 CPUID_INTC_EDX_CX8 | 674 CPUID_INTC_EDX_CMOV | 675 CPUID_INTC_EDX_MMX; 676 break; 677 case X86_TYPE_CYRIX_MediaGX: 678 break; 679 case X86_TYPE_CYRIX_MII: 680 case X86_TYPE_VIA_CYRIX_III: 681 mask_edx = 682 CPUID_INTC_EDX_DE | 683 CPUID_INTC_EDX_TSC | 684 CPUID_INTC_EDX_MSR | 685 CPUID_INTC_EDX_CX8 | 686 CPUID_INTC_EDX_PGE | 687 CPUID_INTC_EDX_CMOV | 688 CPUID_INTC_EDX_MMX; 689 break; 690 default: 691 break; 692 } 693 break; 694 } 695 696 #if defined(__xpv) 697 /* 698 * Do not support MONITOR/MWAIT under a hypervisor 699 */ 700 mask_ecx &= ~CPUID_INTC_ECX_MON; 701 #endif /* __xpv */ 702 703 /* 704 * Now we've figured out the masks that determine 705 * which bits we choose to believe, apply the masks 706 * to the feature words, then map the kernel's view 707 * of these feature words into its feature word. 708 */ 709 cp->cp_edx &= mask_edx; 710 cp->cp_ecx &= mask_ecx; 711 712 /* 713 * apply any platform restrictions (we don't call this 714 * immediately after __cpuid_insn here, because we need the 715 * workarounds applied above first) 716 */ 717 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 718 719 /* 720 * fold in overrides from the "eeprom" mechanism 721 */ 722 cp->cp_edx |= cpuid_feature_edx_include; 723 cp->cp_edx &= ~cpuid_feature_edx_exclude; 724 725 cp->cp_ecx |= cpuid_feature_ecx_include; 726 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 727 728 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 729 feature |= X86_LARGEPAGE; 730 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 731 feature |= X86_TSC; 732 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 733 feature |= X86_MSR; 734 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 735 feature |= X86_MTRR; 736 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 737 feature |= X86_PGE; 738 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 739 feature |= X86_CMOV; 740 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 741 feature |= X86_MMX; 742 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 743 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 744 feature |= X86_MCA; 745 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 746 feature |= X86_PAE; 747 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 748 feature |= X86_CX8; 749 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 750 feature |= X86_CX16; 751 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 752 feature |= X86_PAT; 753 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 754 feature |= X86_SEP; 755 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 756 /* 757 * In our implementation, fxsave/fxrstor 758 * are prerequisites before we'll even 759 * try and do SSE things. 760 */ 761 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 762 feature |= X86_SSE; 763 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 764 feature |= X86_SSE2; 765 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 766 feature |= X86_SSE3; 767 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 768 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 769 feature |= X86_SSSE3; 770 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 771 feature |= X86_SSE4_1; 772 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 773 feature |= X86_SSE4_2; 774 } 775 } 776 if (cp->cp_edx & CPUID_INTC_EDX_DE) 777 feature |= X86_DE; 778 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 779 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 780 feature |= X86_MWAIT; 781 } 782 783 if (feature & X86_PAE) 784 cpi->cpi_pabits = 36; 785 786 /* 787 * Hyperthreading configuration is slightly tricky on Intel 788 * and pure clones, and even trickier on AMD. 789 * 790 * (AMD chose to set the HTT bit on their CMP processors, 791 * even though they're not actually hyperthreaded. Thus it 792 * takes a bit more work to figure out what's really going 793 * on ... see the handling of the CMP_LGCY bit below) 794 */ 795 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 796 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 797 if (cpi->cpi_ncpu_per_chip > 1) 798 feature |= X86_HTT; 799 } else { 800 cpi->cpi_ncpu_per_chip = 1; 801 } 802 803 /* 804 * Work on the "extended" feature information, doing 805 * some basic initialization for cpuid_pass2() 806 */ 807 xcpuid = 0; 808 switch (cpi->cpi_vendor) { 809 case X86_VENDOR_Intel: 810 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 811 xcpuid++; 812 break; 813 case X86_VENDOR_AMD: 814 if (cpi->cpi_family > 5 || 815 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 816 xcpuid++; 817 break; 818 case X86_VENDOR_Cyrix: 819 /* 820 * Only these Cyrix CPUs are -known- to support 821 * extended cpuid operations. 822 */ 823 if (x86_type == X86_TYPE_VIA_CYRIX_III || 824 x86_type == X86_TYPE_CYRIX_GXm) 825 xcpuid++; 826 break; 827 case X86_VENDOR_Centaur: 828 case X86_VENDOR_TM: 829 default: 830 xcpuid++; 831 break; 832 } 833 834 if (xcpuid) { 835 cp = &cpi->cpi_extd[0]; 836 cp->cp_eax = 0x80000000; 837 cpi->cpi_xmaxeax = __cpuid_insn(cp); 838 } 839 840 if (cpi->cpi_xmaxeax & 0x80000000) { 841 842 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 843 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 844 845 switch (cpi->cpi_vendor) { 846 case X86_VENDOR_Intel: 847 case X86_VENDOR_AMD: 848 if (cpi->cpi_xmaxeax < 0x80000001) 849 break; 850 cp = &cpi->cpi_extd[1]; 851 cp->cp_eax = 0x80000001; 852 (void) __cpuid_insn(cp); 853 854 if (cpi->cpi_vendor == X86_VENDOR_AMD && 855 cpi->cpi_family == 5 && 856 cpi->cpi_model == 6 && 857 cpi->cpi_step == 6) { 858 /* 859 * K6 model 6 uses bit 10 to indicate SYSC 860 * Later models use bit 11. Fix it here. 861 */ 862 if (cp->cp_edx & 0x400) { 863 cp->cp_edx &= ~0x400; 864 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 865 } 866 } 867 868 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 869 870 /* 871 * Compute the additions to the kernel's feature word. 872 */ 873 if (cp->cp_edx & CPUID_AMD_EDX_NX) 874 feature |= X86_NX; 875 876 #if defined(__amd64) 877 /* 1 GB large page - enable only for 64 bit kernel */ 878 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 879 feature |= X86_1GPG; 880 #endif 881 882 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 883 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 884 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 885 feature |= X86_SSE4A; 886 887 /* 888 * If both the HTT and CMP_LGCY bits are set, 889 * then we're not actually HyperThreaded. Read 890 * "AMD CPUID Specification" for more details. 891 */ 892 if (cpi->cpi_vendor == X86_VENDOR_AMD && 893 (feature & X86_HTT) && 894 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 895 feature &= ~X86_HTT; 896 feature |= X86_CMP; 897 } 898 #if defined(__amd64) 899 /* 900 * It's really tricky to support syscall/sysret in 901 * the i386 kernel; we rely on sysenter/sysexit 902 * instead. In the amd64 kernel, things are -way- 903 * better. 904 */ 905 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 906 feature |= X86_ASYSC; 907 908 /* 909 * While we're thinking about system calls, note 910 * that AMD processors don't support sysenter 911 * in long mode at all, so don't try to program them. 912 */ 913 if (x86_vendor == X86_VENDOR_AMD) 914 feature &= ~X86_SEP; 915 #endif 916 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 917 feature |= X86_TSCP; 918 break; 919 default: 920 break; 921 } 922 923 /* 924 * Get CPUID data about processor cores and hyperthreads. 925 */ 926 switch (cpi->cpi_vendor) { 927 case X86_VENDOR_Intel: 928 if (cpi->cpi_maxeax >= 4) { 929 cp = &cpi->cpi_std[4]; 930 cp->cp_eax = 4; 931 cp->cp_ecx = 0; 932 (void) __cpuid_insn(cp); 933 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 934 } 935 /*FALLTHROUGH*/ 936 case X86_VENDOR_AMD: 937 if (cpi->cpi_xmaxeax < 0x80000008) 938 break; 939 cp = &cpi->cpi_extd[8]; 940 cp->cp_eax = 0x80000008; 941 (void) __cpuid_insn(cp); 942 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 943 944 /* 945 * Virtual and physical address limits from 946 * cpuid override previously guessed values. 947 */ 948 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 949 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 950 break; 951 default: 952 break; 953 } 954 955 /* 956 * Derive the number of cores per chip 957 */ 958 switch (cpi->cpi_vendor) { 959 case X86_VENDOR_Intel: 960 if (cpi->cpi_maxeax < 4) { 961 cpi->cpi_ncore_per_chip = 1; 962 break; 963 } else { 964 cpi->cpi_ncore_per_chip = 965 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 966 } 967 break; 968 case X86_VENDOR_AMD: 969 if (cpi->cpi_xmaxeax < 0x80000008) { 970 cpi->cpi_ncore_per_chip = 1; 971 break; 972 } else { 973 /* 974 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 975 * 1 less than the number of physical cores on 976 * the chip. In family 0x10 this value can 977 * be affected by "downcoring" - it reflects 978 * 1 less than the number of cores actually 979 * enabled on this node. 980 */ 981 cpi->cpi_ncore_per_chip = 982 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 983 } 984 break; 985 default: 986 cpi->cpi_ncore_per_chip = 1; 987 break; 988 } 989 } else { 990 cpi->cpi_ncore_per_chip = 1; 991 } 992 993 /* 994 * If more than one core, then this processor is CMP. 995 */ 996 if (cpi->cpi_ncore_per_chip > 1) 997 feature |= X86_CMP; 998 999 /* 1000 * If the number of cores is the same as the number 1001 * of CPUs, then we cannot have HyperThreading. 1002 */ 1003 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1004 feature &= ~X86_HTT; 1005 1006 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1007 /* 1008 * Single-core single-threaded processors. 1009 */ 1010 cpi->cpi_chipid = -1; 1011 cpi->cpi_clogid = 0; 1012 cpi->cpi_coreid = cpu->cpu_id; 1013 cpi->cpi_pkgcoreid = 0; 1014 } else if (cpi->cpi_ncpu_per_chip > 1) { 1015 uint_t i; 1016 uint_t chipid_shift = 0; 1017 uint_t coreid_shift = 0; 1018 uint_t apic_id = CPI_APIC_ID(cpi); 1019 1020 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1021 chipid_shift++; 1022 cpi->cpi_chipid = apic_id >> chipid_shift; 1023 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1024 1025 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1026 if (feature & X86_CMP) { 1027 /* 1028 * Multi-core (and possibly multi-threaded) 1029 * processors. 1030 */ 1031 uint_t ncpu_per_core; 1032 if (cpi->cpi_ncore_per_chip == 1) 1033 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1034 else if (cpi->cpi_ncore_per_chip > 1) 1035 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1036 cpi->cpi_ncore_per_chip; 1037 /* 1038 * 8bit APIC IDs on dual core Pentiums 1039 * look like this: 1040 * 1041 * +-----------------------+------+------+ 1042 * | Physical Package ID | MC | HT | 1043 * +-----------------------+------+------+ 1044 * <------- chipid --------> 1045 * <------- coreid ---------------> 1046 * <--- clogid --> 1047 * <------> 1048 * pkgcoreid 1049 * 1050 * Where the number of bits necessary to 1051 * represent MC and HT fields together equals 1052 * to the minimum number of bits necessary to 1053 * store the value of cpi->cpi_ncpu_per_chip. 1054 * Of those bits, the MC part uses the number 1055 * of bits necessary to store the value of 1056 * cpi->cpi_ncore_per_chip. 1057 */ 1058 for (i = 1; i < ncpu_per_core; i <<= 1) 1059 coreid_shift++; 1060 cpi->cpi_coreid = apic_id >> coreid_shift; 1061 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1062 coreid_shift; 1063 } else if (feature & X86_HTT) { 1064 /* 1065 * Single-core multi-threaded processors. 1066 */ 1067 cpi->cpi_coreid = cpi->cpi_chipid; 1068 cpi->cpi_pkgcoreid = 0; 1069 } 1070 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1071 /* 1072 * AMD CMP chips currently have a single thread per 1073 * core, with 2 cores on family 0xf and 2, 3 or 4 1074 * cores on family 0x10. 1075 * 1076 * Since no two cpus share a core we must assign a 1077 * distinct coreid per cpu, and we do this by using 1078 * the cpu_id. This scheme does not, however, 1079 * guarantee that sibling cores of a chip will have 1080 * sequential coreids starting at a multiple of the 1081 * number of cores per chip - that is usually the 1082 * case, but if the ACPI MADT table is presented 1083 * in a different order then we need to perform a 1084 * few more gymnastics for the pkgcoreid. 1085 * 1086 * In family 0xf CMPs there are 2 cores on all nodes 1087 * present - no mixing of single and dual core parts. 1088 * 1089 * In family 0x10 CMPs cpuid fn 2 ECX[15:12] 1090 * "ApicIdCoreIdSize[3:0]" tells us how 1091 * many least-significant bits in the ApicId 1092 * are used to represent the core number 1093 * within the node. Cores are always 1094 * numbered sequentially from 0 regardless 1095 * of how many or which are disabled, and 1096 * there seems to be no way to discover the 1097 * real core id when some are disabled. 1098 */ 1099 cpi->cpi_coreid = cpu->cpu_id; 1100 1101 if (cpi->cpi_family == 0x10 && 1102 cpi->cpi_xmaxeax >= 0x80000008) { 1103 int coreidsz = 1104 BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 1105 1106 cpi->cpi_pkgcoreid = 1107 apic_id & ((1 << coreidsz) - 1); 1108 } else { 1109 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 1110 } 1111 } else { 1112 /* 1113 * All other processors are currently 1114 * assumed to have single cores. 1115 */ 1116 cpi->cpi_coreid = cpi->cpi_chipid; 1117 cpi->cpi_pkgcoreid = 0; 1118 } 1119 } 1120 1121 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1122 1123 /* 1124 * Synthesize chip "revision" and socket type 1125 */ 1126 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 1127 cpi->cpi_model, cpi->cpi_step); 1128 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 1129 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 1130 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 1131 cpi->cpi_model, cpi->cpi_step); 1132 1133 pass1_done: 1134 #if !defined(__xpv) 1135 check_for_hvm(); 1136 #endif 1137 cpi->cpi_pass = 1; 1138 return (feature); 1139 } 1140 1141 /* 1142 * Make copies of the cpuid table entries we depend on, in 1143 * part for ease of parsing now, in part so that we have only 1144 * one place to correct any of it, in part for ease of 1145 * later export to userland, and in part so we can look at 1146 * this stuff in a crash dump. 1147 */ 1148 1149 /*ARGSUSED*/ 1150 void 1151 cpuid_pass2(cpu_t *cpu) 1152 { 1153 uint_t n, nmax; 1154 int i; 1155 struct cpuid_regs *cp; 1156 uint8_t *dp; 1157 uint32_t *iptr; 1158 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1159 1160 ASSERT(cpi->cpi_pass == 1); 1161 1162 if (cpi->cpi_maxeax < 1) 1163 goto pass2_done; 1164 1165 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1166 nmax = NMAX_CPI_STD; 1167 /* 1168 * (We already handled n == 0 and n == 1 in pass 1) 1169 */ 1170 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1171 cp->cp_eax = n; 1172 1173 /* 1174 * CPUID function 4 expects %ecx to be initialized 1175 * with an index which indicates which cache to return 1176 * information about. The OS is expected to call function 4 1177 * with %ecx set to 0, 1, 2, ... until it returns with 1178 * EAX[4:0] set to 0, which indicates there are no more 1179 * caches. 1180 * 1181 * Here, populate cpi_std[4] with the information returned by 1182 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1183 * when dynamic memory allocation becomes available. 1184 * 1185 * Note: we need to explicitly initialize %ecx here, since 1186 * function 4 may have been previously invoked. 1187 */ 1188 if (n == 4) 1189 cp->cp_ecx = 0; 1190 1191 (void) __cpuid_insn(cp); 1192 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1193 switch (n) { 1194 case 2: 1195 /* 1196 * "the lower 8 bits of the %eax register 1197 * contain a value that identifies the number 1198 * of times the cpuid [instruction] has to be 1199 * executed to obtain a complete image of the 1200 * processor's caching systems." 1201 * 1202 * How *do* they make this stuff up? 1203 */ 1204 cpi->cpi_ncache = sizeof (*cp) * 1205 BITX(cp->cp_eax, 7, 0); 1206 if (cpi->cpi_ncache == 0) 1207 break; 1208 cpi->cpi_ncache--; /* skip count byte */ 1209 1210 /* 1211 * Well, for now, rather than attempt to implement 1212 * this slightly dubious algorithm, we just look 1213 * at the first 15 .. 1214 */ 1215 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1216 cpi->cpi_ncache = sizeof (*cp) - 1; 1217 1218 dp = cpi->cpi_cacheinfo; 1219 if (BITX(cp->cp_eax, 31, 31) == 0) { 1220 uint8_t *p = (void *)&cp->cp_eax; 1221 for (i = 1; i < 4; i++) 1222 if (p[i] != 0) 1223 *dp++ = p[i]; 1224 } 1225 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1226 uint8_t *p = (void *)&cp->cp_ebx; 1227 for (i = 0; i < 4; i++) 1228 if (p[i] != 0) 1229 *dp++ = p[i]; 1230 } 1231 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1232 uint8_t *p = (void *)&cp->cp_ecx; 1233 for (i = 0; i < 4; i++) 1234 if (p[i] != 0) 1235 *dp++ = p[i]; 1236 } 1237 if (BITX(cp->cp_edx, 31, 31) == 0) { 1238 uint8_t *p = (void *)&cp->cp_edx; 1239 for (i = 0; i < 4; i++) 1240 if (p[i] != 0) 1241 *dp++ = p[i]; 1242 } 1243 break; 1244 1245 case 3: /* Processor serial number, if PSN supported */ 1246 break; 1247 1248 case 4: /* Deterministic cache parameters */ 1249 break; 1250 1251 case 5: /* Monitor/Mwait parameters */ 1252 { 1253 size_t mwait_size; 1254 1255 /* 1256 * check cpi_mwait.support which was set in cpuid_pass1 1257 */ 1258 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1259 break; 1260 1261 /* 1262 * Protect ourself from insane mwait line size. 1263 * Workaround for incomplete hardware emulator(s). 1264 */ 1265 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1266 if (mwait_size < sizeof (uint32_t) || 1267 !ISP2(mwait_size)) { 1268 #if DEBUG 1269 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1270 "size %ld", 1271 cpu->cpu_id, (long)mwait_size); 1272 #endif 1273 break; 1274 } 1275 1276 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1277 cpi->cpi_mwait.mon_max = mwait_size; 1278 if (MWAIT_EXTENSION(cpi)) { 1279 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1280 if (MWAIT_INT_ENABLE(cpi)) 1281 cpi->cpi_mwait.support |= 1282 MWAIT_ECX_INT_ENABLE; 1283 } 1284 break; 1285 } 1286 default: 1287 break; 1288 } 1289 } 1290 1291 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1292 cp->cp_eax = 0xB; 1293 cp->cp_ecx = 0; 1294 1295 (void) __cpuid_insn(cp); 1296 1297 /* 1298 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1299 * indicates that the extended topology enumeration leaf is 1300 * available. 1301 */ 1302 if (cp->cp_ebx) { 1303 uint32_t x2apic_id; 1304 uint_t coreid_shift = 0; 1305 uint_t ncpu_per_core = 1; 1306 uint_t chipid_shift = 0; 1307 uint_t ncpu_per_chip = 1; 1308 uint_t i; 1309 uint_t level; 1310 1311 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1312 cp->cp_eax = 0xB; 1313 cp->cp_ecx = i; 1314 1315 (void) __cpuid_insn(cp); 1316 level = CPI_CPU_LEVEL_TYPE(cp); 1317 1318 if (level == 1) { 1319 x2apic_id = cp->cp_edx; 1320 coreid_shift = BITX(cp->cp_eax, 4, 0); 1321 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1322 } else if (level == 2) { 1323 x2apic_id = cp->cp_edx; 1324 chipid_shift = BITX(cp->cp_eax, 4, 0); 1325 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1326 } 1327 } 1328 1329 cpi->cpi_apicid = x2apic_id; 1330 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1331 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1332 ncpu_per_core; 1333 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1334 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1335 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1336 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1337 } 1338 } 1339 1340 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1341 goto pass2_done; 1342 1343 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1344 nmax = NMAX_CPI_EXTD; 1345 /* 1346 * Copy the extended properties, fixing them as we go. 1347 * (We already handled n == 0 and n == 1 in pass 1) 1348 */ 1349 iptr = (void *)cpi->cpi_brandstr; 1350 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1351 cp->cp_eax = 0x80000000 + n; 1352 (void) __cpuid_insn(cp); 1353 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1354 switch (n) { 1355 case 2: 1356 case 3: 1357 case 4: 1358 /* 1359 * Extract the brand string 1360 */ 1361 *iptr++ = cp->cp_eax; 1362 *iptr++ = cp->cp_ebx; 1363 *iptr++ = cp->cp_ecx; 1364 *iptr++ = cp->cp_edx; 1365 break; 1366 case 5: 1367 switch (cpi->cpi_vendor) { 1368 case X86_VENDOR_AMD: 1369 /* 1370 * The Athlon and Duron were the first 1371 * parts to report the sizes of the 1372 * TLB for large pages. Before then, 1373 * we don't trust the data. 1374 */ 1375 if (cpi->cpi_family < 6 || 1376 (cpi->cpi_family == 6 && 1377 cpi->cpi_model < 1)) 1378 cp->cp_eax = 0; 1379 break; 1380 default: 1381 break; 1382 } 1383 break; 1384 case 6: 1385 switch (cpi->cpi_vendor) { 1386 case X86_VENDOR_AMD: 1387 /* 1388 * The Athlon and Duron were the first 1389 * AMD parts with L2 TLB's. 1390 * Before then, don't trust the data. 1391 */ 1392 if (cpi->cpi_family < 6 || 1393 cpi->cpi_family == 6 && 1394 cpi->cpi_model < 1) 1395 cp->cp_eax = cp->cp_ebx = 0; 1396 /* 1397 * AMD Duron rev A0 reports L2 1398 * cache size incorrectly as 1K 1399 * when it is really 64K 1400 */ 1401 if (cpi->cpi_family == 6 && 1402 cpi->cpi_model == 3 && 1403 cpi->cpi_step == 0) { 1404 cp->cp_ecx &= 0xffff; 1405 cp->cp_ecx |= 0x400000; 1406 } 1407 break; 1408 case X86_VENDOR_Cyrix: /* VIA C3 */ 1409 /* 1410 * VIA C3 processors are a bit messed 1411 * up w.r.t. encoding cache sizes in %ecx 1412 */ 1413 if (cpi->cpi_family != 6) 1414 break; 1415 /* 1416 * model 7 and 8 were incorrectly encoded 1417 * 1418 * xxx is model 8 really broken? 1419 */ 1420 if (cpi->cpi_model == 7 || 1421 cpi->cpi_model == 8) 1422 cp->cp_ecx = 1423 BITX(cp->cp_ecx, 31, 24) << 16 | 1424 BITX(cp->cp_ecx, 23, 16) << 12 | 1425 BITX(cp->cp_ecx, 15, 8) << 8 | 1426 BITX(cp->cp_ecx, 7, 0); 1427 /* 1428 * model 9 stepping 1 has wrong associativity 1429 */ 1430 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1431 cp->cp_ecx |= 8 << 12; 1432 break; 1433 case X86_VENDOR_Intel: 1434 /* 1435 * Extended L2 Cache features function. 1436 * First appeared on Prescott. 1437 */ 1438 default: 1439 break; 1440 } 1441 break; 1442 default: 1443 break; 1444 } 1445 } 1446 1447 pass2_done: 1448 cpi->cpi_pass = 2; 1449 } 1450 1451 static const char * 1452 intel_cpubrand(const struct cpuid_info *cpi) 1453 { 1454 int i; 1455 1456 if ((x86_feature & X86_CPUID) == 0 || 1457 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1458 return ("i486"); 1459 1460 switch (cpi->cpi_family) { 1461 case 5: 1462 return ("Intel Pentium(r)"); 1463 case 6: 1464 switch (cpi->cpi_model) { 1465 uint_t celeron, xeon; 1466 const struct cpuid_regs *cp; 1467 case 0: 1468 case 1: 1469 case 2: 1470 return ("Intel Pentium(r) Pro"); 1471 case 3: 1472 case 4: 1473 return ("Intel Pentium(r) II"); 1474 case 6: 1475 return ("Intel Celeron(r)"); 1476 case 5: 1477 case 7: 1478 celeron = xeon = 0; 1479 cp = &cpi->cpi_std[2]; /* cache info */ 1480 1481 for (i = 1; i < 4; i++) { 1482 uint_t tmp; 1483 1484 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1485 if (tmp == 0x40) 1486 celeron++; 1487 if (tmp >= 0x44 && tmp <= 0x45) 1488 xeon++; 1489 } 1490 1491 for (i = 0; i < 2; i++) { 1492 uint_t tmp; 1493 1494 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1495 if (tmp == 0x40) 1496 celeron++; 1497 else if (tmp >= 0x44 && tmp <= 0x45) 1498 xeon++; 1499 } 1500 1501 for (i = 0; i < 4; i++) { 1502 uint_t tmp; 1503 1504 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1505 if (tmp == 0x40) 1506 celeron++; 1507 else if (tmp >= 0x44 && tmp <= 0x45) 1508 xeon++; 1509 } 1510 1511 for (i = 0; i < 4; i++) { 1512 uint_t tmp; 1513 1514 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1515 if (tmp == 0x40) 1516 celeron++; 1517 else if (tmp >= 0x44 && tmp <= 0x45) 1518 xeon++; 1519 } 1520 1521 if (celeron) 1522 return ("Intel Celeron(r)"); 1523 if (xeon) 1524 return (cpi->cpi_model == 5 ? 1525 "Intel Pentium(r) II Xeon(tm)" : 1526 "Intel Pentium(r) III Xeon(tm)"); 1527 return (cpi->cpi_model == 5 ? 1528 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1529 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1530 default: 1531 break; 1532 } 1533 default: 1534 break; 1535 } 1536 1537 /* BrandID is present if the field is nonzero */ 1538 if (cpi->cpi_brandid != 0) { 1539 static const struct { 1540 uint_t bt_bid; 1541 const char *bt_str; 1542 } brand_tbl[] = { 1543 { 0x1, "Intel(r) Celeron(r)" }, 1544 { 0x2, "Intel(r) Pentium(r) III" }, 1545 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1546 { 0x4, "Intel(r) Pentium(r) III" }, 1547 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1548 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1549 { 0x8, "Intel(r) Pentium(r) 4" }, 1550 { 0x9, "Intel(r) Pentium(r) 4" }, 1551 { 0xa, "Intel(r) Celeron(r)" }, 1552 { 0xb, "Intel(r) Xeon(tm)" }, 1553 { 0xc, "Intel(r) Xeon(tm) MP" }, 1554 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1555 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1556 { 0x11, "Mobile Genuine Intel(r)" }, 1557 { 0x12, "Intel(r) Celeron(r) M" }, 1558 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1559 { 0x14, "Intel(r) Celeron(r)" }, 1560 { 0x15, "Mobile Genuine Intel(r)" }, 1561 { 0x16, "Intel(r) Pentium(r) M" }, 1562 { 0x17, "Mobile Intel(r) Celeron(r)" } 1563 }; 1564 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1565 uint_t sgn; 1566 1567 sgn = (cpi->cpi_family << 8) | 1568 (cpi->cpi_model << 4) | cpi->cpi_step; 1569 1570 for (i = 0; i < btblmax; i++) 1571 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1572 break; 1573 if (i < btblmax) { 1574 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1575 return ("Intel(r) Celeron(r)"); 1576 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1577 return ("Intel(r) Xeon(tm) MP"); 1578 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1579 return ("Intel(r) Xeon(tm)"); 1580 return (brand_tbl[i].bt_str); 1581 } 1582 } 1583 1584 return (NULL); 1585 } 1586 1587 static const char * 1588 amd_cpubrand(const struct cpuid_info *cpi) 1589 { 1590 if ((x86_feature & X86_CPUID) == 0 || 1591 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1592 return ("i486 compatible"); 1593 1594 switch (cpi->cpi_family) { 1595 case 5: 1596 switch (cpi->cpi_model) { 1597 case 0: 1598 case 1: 1599 case 2: 1600 case 3: 1601 case 4: 1602 case 5: 1603 return ("AMD-K5(r)"); 1604 case 6: 1605 case 7: 1606 return ("AMD-K6(r)"); 1607 case 8: 1608 return ("AMD-K6(r)-2"); 1609 case 9: 1610 return ("AMD-K6(r)-III"); 1611 default: 1612 return ("AMD (family 5)"); 1613 } 1614 case 6: 1615 switch (cpi->cpi_model) { 1616 case 1: 1617 return ("AMD-K7(tm)"); 1618 case 0: 1619 case 2: 1620 case 4: 1621 return ("AMD Athlon(tm)"); 1622 case 3: 1623 case 7: 1624 return ("AMD Duron(tm)"); 1625 case 6: 1626 case 8: 1627 case 10: 1628 /* 1629 * Use the L2 cache size to distinguish 1630 */ 1631 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1632 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1633 default: 1634 return ("AMD (family 6)"); 1635 } 1636 default: 1637 break; 1638 } 1639 1640 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1641 cpi->cpi_brandid != 0) { 1642 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1643 case 3: 1644 return ("AMD Opteron(tm) UP 1xx"); 1645 case 4: 1646 return ("AMD Opteron(tm) DP 2xx"); 1647 case 5: 1648 return ("AMD Opteron(tm) MP 8xx"); 1649 default: 1650 return ("AMD Opteron(tm)"); 1651 } 1652 } 1653 1654 return (NULL); 1655 } 1656 1657 static const char * 1658 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1659 { 1660 if ((x86_feature & X86_CPUID) == 0 || 1661 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1662 type == X86_TYPE_CYRIX_486) 1663 return ("i486 compatible"); 1664 1665 switch (type) { 1666 case X86_TYPE_CYRIX_6x86: 1667 return ("Cyrix 6x86"); 1668 case X86_TYPE_CYRIX_6x86L: 1669 return ("Cyrix 6x86L"); 1670 case X86_TYPE_CYRIX_6x86MX: 1671 return ("Cyrix 6x86MX"); 1672 case X86_TYPE_CYRIX_GXm: 1673 return ("Cyrix GXm"); 1674 case X86_TYPE_CYRIX_MediaGX: 1675 return ("Cyrix MediaGX"); 1676 case X86_TYPE_CYRIX_MII: 1677 return ("Cyrix M2"); 1678 case X86_TYPE_VIA_CYRIX_III: 1679 return ("VIA Cyrix M3"); 1680 default: 1681 /* 1682 * Have another wild guess .. 1683 */ 1684 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1685 return ("Cyrix 5x86"); 1686 else if (cpi->cpi_family == 5) { 1687 switch (cpi->cpi_model) { 1688 case 2: 1689 return ("Cyrix 6x86"); /* Cyrix M1 */ 1690 case 4: 1691 return ("Cyrix MediaGX"); 1692 default: 1693 break; 1694 } 1695 } else if (cpi->cpi_family == 6) { 1696 switch (cpi->cpi_model) { 1697 case 0: 1698 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1699 case 5: 1700 case 6: 1701 case 7: 1702 case 8: 1703 case 9: 1704 return ("VIA C3"); 1705 default: 1706 break; 1707 } 1708 } 1709 break; 1710 } 1711 return (NULL); 1712 } 1713 1714 /* 1715 * This only gets called in the case that the CPU extended 1716 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1717 * aren't available, or contain null bytes for some reason. 1718 */ 1719 static void 1720 fabricate_brandstr(struct cpuid_info *cpi) 1721 { 1722 const char *brand = NULL; 1723 1724 switch (cpi->cpi_vendor) { 1725 case X86_VENDOR_Intel: 1726 brand = intel_cpubrand(cpi); 1727 break; 1728 case X86_VENDOR_AMD: 1729 brand = amd_cpubrand(cpi); 1730 break; 1731 case X86_VENDOR_Cyrix: 1732 brand = cyrix_cpubrand(cpi, x86_type); 1733 break; 1734 case X86_VENDOR_NexGen: 1735 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1736 brand = "NexGen Nx586"; 1737 break; 1738 case X86_VENDOR_Centaur: 1739 if (cpi->cpi_family == 5) 1740 switch (cpi->cpi_model) { 1741 case 4: 1742 brand = "Centaur C6"; 1743 break; 1744 case 8: 1745 brand = "Centaur C2"; 1746 break; 1747 case 9: 1748 brand = "Centaur C3"; 1749 break; 1750 default: 1751 break; 1752 } 1753 break; 1754 case X86_VENDOR_Rise: 1755 if (cpi->cpi_family == 5 && 1756 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1757 brand = "Rise mP6"; 1758 break; 1759 case X86_VENDOR_SiS: 1760 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1761 brand = "SiS 55x"; 1762 break; 1763 case X86_VENDOR_TM: 1764 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1765 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1766 break; 1767 case X86_VENDOR_NSC: 1768 case X86_VENDOR_UMC: 1769 default: 1770 break; 1771 } 1772 if (brand) { 1773 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1774 return; 1775 } 1776 1777 /* 1778 * If all else fails ... 1779 */ 1780 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1781 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1782 cpi->cpi_model, cpi->cpi_step); 1783 } 1784 1785 /* 1786 * This routine is called just after kernel memory allocation 1787 * becomes available on cpu0, and as part of mp_startup() on 1788 * the other cpus. 1789 * 1790 * Fixup the brand string, and collect any information from cpuid 1791 * that requires dynamicically allocated storage to represent. 1792 */ 1793 /*ARGSUSED*/ 1794 void 1795 cpuid_pass3(cpu_t *cpu) 1796 { 1797 int i, max, shft, level, size; 1798 struct cpuid_regs regs; 1799 struct cpuid_regs *cp; 1800 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1801 1802 ASSERT(cpi->cpi_pass == 2); 1803 1804 /* 1805 * Function 4: Deterministic cache parameters 1806 * 1807 * Take this opportunity to detect the number of threads 1808 * sharing the last level cache, and construct a corresponding 1809 * cache id. The respective cpuid_info members are initialized 1810 * to the default case of "no last level cache sharing". 1811 */ 1812 cpi->cpi_ncpu_shr_last_cache = 1; 1813 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1814 1815 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1816 1817 /* 1818 * Find the # of elements (size) returned by fn 4, and along 1819 * the way detect last level cache sharing details. 1820 */ 1821 bzero(®s, sizeof (regs)); 1822 cp = ®s; 1823 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1824 cp->cp_eax = 4; 1825 cp->cp_ecx = i; 1826 1827 (void) __cpuid_insn(cp); 1828 1829 if (CPI_CACHE_TYPE(cp) == 0) 1830 break; 1831 level = CPI_CACHE_LVL(cp); 1832 if (level > max) { 1833 max = level; 1834 cpi->cpi_ncpu_shr_last_cache = 1835 CPI_NTHR_SHR_CACHE(cp) + 1; 1836 } 1837 } 1838 cpi->cpi_std_4_size = size = i; 1839 1840 /* 1841 * Allocate the cpi_std_4 array. The first element 1842 * references the regs for fn 4, %ecx == 0, which 1843 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1844 */ 1845 if (size > 0) { 1846 cpi->cpi_std_4 = 1847 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1848 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1849 1850 /* 1851 * Allocate storage to hold the additional regs 1852 * for function 4, %ecx == 1 .. cpi_std_4_size. 1853 * 1854 * The regs for fn 4, %ecx == 0 has already 1855 * been allocated as indicated above. 1856 */ 1857 for (i = 1; i < size; i++) { 1858 cp = cpi->cpi_std_4[i] = 1859 kmem_zalloc(sizeof (regs), KM_SLEEP); 1860 cp->cp_eax = 4; 1861 cp->cp_ecx = i; 1862 1863 (void) __cpuid_insn(cp); 1864 } 1865 } 1866 /* 1867 * Determine the number of bits needed to represent 1868 * the number of CPUs sharing the last level cache. 1869 * 1870 * Shift off that number of bits from the APIC id to 1871 * derive the cache id. 1872 */ 1873 shft = 0; 1874 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1875 shft++; 1876 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 1877 } 1878 1879 /* 1880 * Now fixup the brand string 1881 */ 1882 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1883 fabricate_brandstr(cpi); 1884 } else { 1885 1886 /* 1887 * If we successfully extracted a brand string from the cpuid 1888 * instruction, clean it up by removing leading spaces and 1889 * similar junk. 1890 */ 1891 if (cpi->cpi_brandstr[0]) { 1892 size_t maxlen = sizeof (cpi->cpi_brandstr); 1893 char *src, *dst; 1894 1895 dst = src = (char *)cpi->cpi_brandstr; 1896 src[maxlen - 1] = '\0'; 1897 /* 1898 * strip leading spaces 1899 */ 1900 while (*src == ' ') 1901 src++; 1902 /* 1903 * Remove any 'Genuine' or "Authentic" prefixes 1904 */ 1905 if (strncmp(src, "Genuine ", 8) == 0) 1906 src += 8; 1907 if (strncmp(src, "Authentic ", 10) == 0) 1908 src += 10; 1909 1910 /* 1911 * Now do an in-place copy. 1912 * Map (R) to (r) and (TM) to (tm). 1913 * The era of teletypes is long gone, and there's 1914 * -really- no need to shout. 1915 */ 1916 while (*src != '\0') { 1917 if (src[0] == '(') { 1918 if (strncmp(src + 1, "R)", 2) == 0) { 1919 (void) strncpy(dst, "(r)", 3); 1920 src += 3; 1921 dst += 3; 1922 continue; 1923 } 1924 if (strncmp(src + 1, "TM)", 3) == 0) { 1925 (void) strncpy(dst, "(tm)", 4); 1926 src += 4; 1927 dst += 4; 1928 continue; 1929 } 1930 } 1931 *dst++ = *src++; 1932 } 1933 *dst = '\0'; 1934 1935 /* 1936 * Finally, remove any trailing spaces 1937 */ 1938 while (--dst > cpi->cpi_brandstr) 1939 if (*dst == ' ') 1940 *dst = '\0'; 1941 else 1942 break; 1943 } else 1944 fabricate_brandstr(cpi); 1945 } 1946 cpi->cpi_pass = 3; 1947 } 1948 1949 /* 1950 * This routine is called out of bind_hwcap() much later in the life 1951 * of the kernel (post_startup()). The job of this routine is to resolve 1952 * the hardware feature support and kernel support for those features into 1953 * what we're actually going to tell applications via the aux vector. 1954 */ 1955 uint_t 1956 cpuid_pass4(cpu_t *cpu) 1957 { 1958 struct cpuid_info *cpi; 1959 uint_t hwcap_flags = 0; 1960 1961 if (cpu == NULL) 1962 cpu = CPU; 1963 cpi = cpu->cpu_m.mcpu_cpi; 1964 1965 ASSERT(cpi->cpi_pass == 3); 1966 1967 if (cpi->cpi_maxeax >= 1) { 1968 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 1969 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 1970 1971 *edx = CPI_FEATURES_EDX(cpi); 1972 *ecx = CPI_FEATURES_ECX(cpi); 1973 1974 /* 1975 * [these require explicit kernel support] 1976 */ 1977 if ((x86_feature & X86_SEP) == 0) 1978 *edx &= ~CPUID_INTC_EDX_SEP; 1979 1980 if ((x86_feature & X86_SSE) == 0) 1981 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 1982 if ((x86_feature & X86_SSE2) == 0) 1983 *edx &= ~CPUID_INTC_EDX_SSE2; 1984 1985 if ((x86_feature & X86_HTT) == 0) 1986 *edx &= ~CPUID_INTC_EDX_HTT; 1987 1988 if ((x86_feature & X86_SSE3) == 0) 1989 *ecx &= ~CPUID_INTC_ECX_SSE3; 1990 1991 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1992 if ((x86_feature & X86_SSSE3) == 0) 1993 *ecx &= ~CPUID_INTC_ECX_SSSE3; 1994 if ((x86_feature & X86_SSE4_1) == 0) 1995 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 1996 if ((x86_feature & X86_SSE4_2) == 0) 1997 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 1998 } 1999 2000 /* 2001 * [no explicit support required beyond x87 fp context] 2002 */ 2003 if (!fpu_exists) 2004 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2005 2006 /* 2007 * Now map the supported feature vector to things that we 2008 * think userland will care about. 2009 */ 2010 if (*edx & CPUID_INTC_EDX_SEP) 2011 hwcap_flags |= AV_386_SEP; 2012 if (*edx & CPUID_INTC_EDX_SSE) 2013 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2014 if (*edx & CPUID_INTC_EDX_SSE2) 2015 hwcap_flags |= AV_386_SSE2; 2016 if (*ecx & CPUID_INTC_ECX_SSE3) 2017 hwcap_flags |= AV_386_SSE3; 2018 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2019 if (*ecx & CPUID_INTC_ECX_SSSE3) 2020 hwcap_flags |= AV_386_SSSE3; 2021 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2022 hwcap_flags |= AV_386_SSE4_1; 2023 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2024 hwcap_flags |= AV_386_SSE4_2; 2025 } 2026 if (*ecx & CPUID_INTC_ECX_POPCNT) 2027 hwcap_flags |= AV_386_POPCNT; 2028 if (*edx & CPUID_INTC_EDX_FPU) 2029 hwcap_flags |= AV_386_FPU; 2030 if (*edx & CPUID_INTC_EDX_MMX) 2031 hwcap_flags |= AV_386_MMX; 2032 2033 if (*edx & CPUID_INTC_EDX_TSC) 2034 hwcap_flags |= AV_386_TSC; 2035 if (*edx & CPUID_INTC_EDX_CX8) 2036 hwcap_flags |= AV_386_CX8; 2037 if (*edx & CPUID_INTC_EDX_CMOV) 2038 hwcap_flags |= AV_386_CMOV; 2039 if (*ecx & CPUID_INTC_ECX_MON) 2040 hwcap_flags |= AV_386_MON; 2041 if (*ecx & CPUID_INTC_ECX_CX16) 2042 hwcap_flags |= AV_386_CX16; 2043 } 2044 2045 if (x86_feature & X86_HTT) 2046 hwcap_flags |= AV_386_PAUSE; 2047 2048 if (cpi->cpi_xmaxeax < 0x80000001) 2049 goto pass4_done; 2050 2051 switch (cpi->cpi_vendor) { 2052 struct cpuid_regs cp; 2053 uint32_t *edx, *ecx; 2054 2055 case X86_VENDOR_Intel: 2056 /* 2057 * Seems like Intel duplicated what we necessary 2058 * here to make the initial crop of 64-bit OS's work. 2059 * Hopefully, those are the only "extended" bits 2060 * they'll add. 2061 */ 2062 /*FALLTHROUGH*/ 2063 2064 case X86_VENDOR_AMD: 2065 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2066 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2067 2068 *edx = CPI_FEATURES_XTD_EDX(cpi); 2069 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2070 2071 /* 2072 * [these features require explicit kernel support] 2073 */ 2074 switch (cpi->cpi_vendor) { 2075 case X86_VENDOR_Intel: 2076 if ((x86_feature & X86_TSCP) == 0) 2077 *edx &= ~CPUID_AMD_EDX_TSCP; 2078 break; 2079 2080 case X86_VENDOR_AMD: 2081 if ((x86_feature & X86_TSCP) == 0) 2082 *edx &= ~CPUID_AMD_EDX_TSCP; 2083 if ((x86_feature & X86_SSE4A) == 0) 2084 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2085 break; 2086 2087 default: 2088 break; 2089 } 2090 2091 /* 2092 * [no explicit support required beyond 2093 * x87 fp context and exception handlers] 2094 */ 2095 if (!fpu_exists) 2096 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2097 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2098 2099 if ((x86_feature & X86_NX) == 0) 2100 *edx &= ~CPUID_AMD_EDX_NX; 2101 #if !defined(__amd64) 2102 *edx &= ~CPUID_AMD_EDX_LM; 2103 #endif 2104 /* 2105 * Now map the supported feature vector to 2106 * things that we think userland will care about. 2107 */ 2108 #if defined(__amd64) 2109 if (*edx & CPUID_AMD_EDX_SYSC) 2110 hwcap_flags |= AV_386_AMD_SYSC; 2111 #endif 2112 if (*edx & CPUID_AMD_EDX_MMXamd) 2113 hwcap_flags |= AV_386_AMD_MMX; 2114 if (*edx & CPUID_AMD_EDX_3DNow) 2115 hwcap_flags |= AV_386_AMD_3DNow; 2116 if (*edx & CPUID_AMD_EDX_3DNowx) 2117 hwcap_flags |= AV_386_AMD_3DNowx; 2118 2119 switch (cpi->cpi_vendor) { 2120 case X86_VENDOR_AMD: 2121 if (*edx & CPUID_AMD_EDX_TSCP) 2122 hwcap_flags |= AV_386_TSCP; 2123 if (*ecx & CPUID_AMD_ECX_AHF64) 2124 hwcap_flags |= AV_386_AHF; 2125 if (*ecx & CPUID_AMD_ECX_SSE4A) 2126 hwcap_flags |= AV_386_AMD_SSE4A; 2127 if (*ecx & CPUID_AMD_ECX_LZCNT) 2128 hwcap_flags |= AV_386_AMD_LZCNT; 2129 break; 2130 2131 case X86_VENDOR_Intel: 2132 if (*edx & CPUID_AMD_EDX_TSCP) 2133 hwcap_flags |= AV_386_TSCP; 2134 /* 2135 * Aarrgh. 2136 * Intel uses a different bit in the same word. 2137 */ 2138 if (*ecx & CPUID_INTC_ECX_AHF64) 2139 hwcap_flags |= AV_386_AHF; 2140 break; 2141 2142 default: 2143 break; 2144 } 2145 break; 2146 2147 case X86_VENDOR_TM: 2148 cp.cp_eax = 0x80860001; 2149 (void) __cpuid_insn(&cp); 2150 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2151 break; 2152 2153 default: 2154 break; 2155 } 2156 2157 pass4_done: 2158 cpi->cpi_pass = 4; 2159 return (hwcap_flags); 2160 } 2161 2162 2163 /* 2164 * Simulate the cpuid instruction using the data we previously 2165 * captured about this CPU. We try our best to return the truth 2166 * about the hardware, independently of kernel support. 2167 */ 2168 uint32_t 2169 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2170 { 2171 struct cpuid_info *cpi; 2172 struct cpuid_regs *xcp; 2173 2174 if (cpu == NULL) 2175 cpu = CPU; 2176 cpi = cpu->cpu_m.mcpu_cpi; 2177 2178 ASSERT(cpuid_checkpass(cpu, 3)); 2179 2180 /* 2181 * CPUID data is cached in two separate places: cpi_std for standard 2182 * CPUID functions, and cpi_extd for extended CPUID functions. 2183 */ 2184 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2185 xcp = &cpi->cpi_std[cp->cp_eax]; 2186 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2187 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2188 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2189 else 2190 /* 2191 * The caller is asking for data from an input parameter which 2192 * the kernel has not cached. In this case we go fetch from 2193 * the hardware and return the data directly to the user. 2194 */ 2195 return (__cpuid_insn(cp)); 2196 2197 cp->cp_eax = xcp->cp_eax; 2198 cp->cp_ebx = xcp->cp_ebx; 2199 cp->cp_ecx = xcp->cp_ecx; 2200 cp->cp_edx = xcp->cp_edx; 2201 return (cp->cp_eax); 2202 } 2203 2204 int 2205 cpuid_checkpass(cpu_t *cpu, int pass) 2206 { 2207 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2208 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2209 } 2210 2211 int 2212 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2213 { 2214 ASSERT(cpuid_checkpass(cpu, 3)); 2215 2216 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2217 } 2218 2219 int 2220 cpuid_is_cmt(cpu_t *cpu) 2221 { 2222 if (cpu == NULL) 2223 cpu = CPU; 2224 2225 ASSERT(cpuid_checkpass(cpu, 1)); 2226 2227 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2228 } 2229 2230 /* 2231 * AMD and Intel both implement the 64-bit variant of the syscall 2232 * instruction (syscallq), so if there's -any- support for syscall, 2233 * cpuid currently says "yes, we support this". 2234 * 2235 * However, Intel decided to -not- implement the 32-bit variant of the 2236 * syscall instruction, so we provide a predicate to allow our caller 2237 * to test that subtlety here. 2238 * 2239 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2240 * even in the case where the hardware would in fact support it. 2241 */ 2242 /*ARGSUSED*/ 2243 int 2244 cpuid_syscall32_insn(cpu_t *cpu) 2245 { 2246 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2247 2248 #if !defined(__xpv) 2249 if (cpu == NULL) 2250 cpu = CPU; 2251 2252 /*CSTYLED*/ 2253 { 2254 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2255 2256 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2257 cpi->cpi_xmaxeax >= 0x80000001 && 2258 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2259 return (1); 2260 } 2261 #endif 2262 return (0); 2263 } 2264 2265 int 2266 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2267 { 2268 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2269 2270 static const char fmt[] = 2271 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2272 static const char fmt_ht[] = 2273 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2274 2275 ASSERT(cpuid_checkpass(cpu, 1)); 2276 2277 if (cpuid_is_cmt(cpu)) 2278 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2279 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2280 cpi->cpi_family, cpi->cpi_model, 2281 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2282 return (snprintf(s, n, fmt, 2283 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2284 cpi->cpi_family, cpi->cpi_model, 2285 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2286 } 2287 2288 const char * 2289 cpuid_getvendorstr(cpu_t *cpu) 2290 { 2291 ASSERT(cpuid_checkpass(cpu, 1)); 2292 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2293 } 2294 2295 uint_t 2296 cpuid_getvendor(cpu_t *cpu) 2297 { 2298 ASSERT(cpuid_checkpass(cpu, 1)); 2299 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2300 } 2301 2302 uint_t 2303 cpuid_getfamily(cpu_t *cpu) 2304 { 2305 ASSERT(cpuid_checkpass(cpu, 1)); 2306 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2307 } 2308 2309 uint_t 2310 cpuid_getmodel(cpu_t *cpu) 2311 { 2312 ASSERT(cpuid_checkpass(cpu, 1)); 2313 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2314 } 2315 2316 uint_t 2317 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2318 { 2319 ASSERT(cpuid_checkpass(cpu, 1)); 2320 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2321 } 2322 2323 uint_t 2324 cpuid_get_ncore_per_chip(cpu_t *cpu) 2325 { 2326 ASSERT(cpuid_checkpass(cpu, 1)); 2327 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2328 } 2329 2330 uint_t 2331 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2332 { 2333 ASSERT(cpuid_checkpass(cpu, 2)); 2334 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2335 } 2336 2337 id_t 2338 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2339 { 2340 ASSERT(cpuid_checkpass(cpu, 2)); 2341 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2342 } 2343 2344 uint_t 2345 cpuid_getstep(cpu_t *cpu) 2346 { 2347 ASSERT(cpuid_checkpass(cpu, 1)); 2348 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2349 } 2350 2351 uint_t 2352 cpuid_getsig(struct cpu *cpu) 2353 { 2354 ASSERT(cpuid_checkpass(cpu, 1)); 2355 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2356 } 2357 2358 uint32_t 2359 cpuid_getchiprev(struct cpu *cpu) 2360 { 2361 ASSERT(cpuid_checkpass(cpu, 1)); 2362 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2363 } 2364 2365 const char * 2366 cpuid_getchiprevstr(struct cpu *cpu) 2367 { 2368 ASSERT(cpuid_checkpass(cpu, 1)); 2369 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2370 } 2371 2372 uint32_t 2373 cpuid_getsockettype(struct cpu *cpu) 2374 { 2375 ASSERT(cpuid_checkpass(cpu, 1)); 2376 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2377 } 2378 2379 int 2380 cpuid_get_chipid(cpu_t *cpu) 2381 { 2382 ASSERT(cpuid_checkpass(cpu, 1)); 2383 2384 if (cpuid_is_cmt(cpu)) 2385 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2386 return (cpu->cpu_id); 2387 } 2388 2389 id_t 2390 cpuid_get_coreid(cpu_t *cpu) 2391 { 2392 ASSERT(cpuid_checkpass(cpu, 1)); 2393 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2394 } 2395 2396 int 2397 cpuid_get_pkgcoreid(cpu_t *cpu) 2398 { 2399 ASSERT(cpuid_checkpass(cpu, 1)); 2400 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2401 } 2402 2403 int 2404 cpuid_get_clogid(cpu_t *cpu) 2405 { 2406 ASSERT(cpuid_checkpass(cpu, 1)); 2407 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2408 } 2409 2410 void 2411 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2412 { 2413 struct cpuid_info *cpi; 2414 2415 if (cpu == NULL) 2416 cpu = CPU; 2417 cpi = cpu->cpu_m.mcpu_cpi; 2418 2419 ASSERT(cpuid_checkpass(cpu, 1)); 2420 2421 if (pabits) 2422 *pabits = cpi->cpi_pabits; 2423 if (vabits) 2424 *vabits = cpi->cpi_vabits; 2425 } 2426 2427 /* 2428 * Returns the number of data TLB entries for a corresponding 2429 * pagesize. If it can't be computed, or isn't known, the 2430 * routine returns zero. If you ask about an architecturally 2431 * impossible pagesize, the routine will panic (so that the 2432 * hat implementor knows that things are inconsistent.) 2433 */ 2434 uint_t 2435 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2436 { 2437 struct cpuid_info *cpi; 2438 uint_t dtlb_nent = 0; 2439 2440 if (cpu == NULL) 2441 cpu = CPU; 2442 cpi = cpu->cpu_m.mcpu_cpi; 2443 2444 ASSERT(cpuid_checkpass(cpu, 1)); 2445 2446 /* 2447 * Check the L2 TLB info 2448 */ 2449 if (cpi->cpi_xmaxeax >= 0x80000006) { 2450 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2451 2452 switch (pagesize) { 2453 2454 case 4 * 1024: 2455 /* 2456 * All zero in the top 16 bits of the register 2457 * indicates a unified TLB. Size is in low 16 bits. 2458 */ 2459 if ((cp->cp_ebx & 0xffff0000) == 0) 2460 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2461 else 2462 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2463 break; 2464 2465 case 2 * 1024 * 1024: 2466 if ((cp->cp_eax & 0xffff0000) == 0) 2467 dtlb_nent = cp->cp_eax & 0x0000ffff; 2468 else 2469 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2470 break; 2471 2472 default: 2473 panic("unknown L2 pagesize"); 2474 /*NOTREACHED*/ 2475 } 2476 } 2477 2478 if (dtlb_nent != 0) 2479 return (dtlb_nent); 2480 2481 /* 2482 * No L2 TLB support for this size, try L1. 2483 */ 2484 if (cpi->cpi_xmaxeax >= 0x80000005) { 2485 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2486 2487 switch (pagesize) { 2488 case 4 * 1024: 2489 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2490 break; 2491 case 2 * 1024 * 1024: 2492 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2493 break; 2494 default: 2495 panic("unknown L1 d-TLB pagesize"); 2496 /*NOTREACHED*/ 2497 } 2498 } 2499 2500 return (dtlb_nent); 2501 } 2502 2503 /* 2504 * Return 0 if the erratum is not present or not applicable, positive 2505 * if it is, and negative if the status of the erratum is unknown. 2506 * 2507 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2508 * Processors" #25759, Rev 3.57, August 2005 2509 */ 2510 int 2511 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2512 { 2513 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2514 uint_t eax; 2515 2516 /* 2517 * Bail out if this CPU isn't an AMD CPU, or if it's 2518 * a legacy (32-bit) AMD CPU. 2519 */ 2520 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2521 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2522 cpi->cpi_family == 6) 2523 2524 return (0); 2525 2526 eax = cpi->cpi_std[1].cp_eax; 2527 2528 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2529 #define SH_B3(eax) (eax == 0xf51) 2530 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2531 2532 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2533 2534 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2535 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2536 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2537 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2538 2539 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2540 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2541 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2542 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2543 2544 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2545 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2546 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2547 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2548 #define BH_E4(eax) (eax == 0x20fb1) 2549 #define SH_E5(eax) (eax == 0x20f42) 2550 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2551 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2552 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2553 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2554 DH_E6(eax) || JH_E6(eax)) 2555 2556 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2557 #define DR_B0(eax) (eax == 0x100f20) 2558 #define DR_B1(eax) (eax == 0x100f21) 2559 #define DR_BA(eax) (eax == 0x100f2a) 2560 #define DR_B2(eax) (eax == 0x100f22) 2561 #define DR_B3(eax) (eax == 0x100f23) 2562 #define RB_C0(eax) (eax == 0x100f40) 2563 2564 switch (erratum) { 2565 case 1: 2566 return (cpi->cpi_family < 0x10); 2567 case 51: /* what does the asterisk mean? */ 2568 return (B(eax) || SH_C0(eax) || CG(eax)); 2569 case 52: 2570 return (B(eax)); 2571 case 57: 2572 return (cpi->cpi_family <= 0x11); 2573 case 58: 2574 return (B(eax)); 2575 case 60: 2576 return (cpi->cpi_family <= 0x11); 2577 case 61: 2578 case 62: 2579 case 63: 2580 case 64: 2581 case 65: 2582 case 66: 2583 case 68: 2584 case 69: 2585 case 70: 2586 case 71: 2587 return (B(eax)); 2588 case 72: 2589 return (SH_B0(eax)); 2590 case 74: 2591 return (B(eax)); 2592 case 75: 2593 return (cpi->cpi_family < 0x10); 2594 case 76: 2595 return (B(eax)); 2596 case 77: 2597 return (cpi->cpi_family <= 0x11); 2598 case 78: 2599 return (B(eax) || SH_C0(eax)); 2600 case 79: 2601 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2602 case 80: 2603 case 81: 2604 case 82: 2605 return (B(eax)); 2606 case 83: 2607 return (B(eax) || SH_C0(eax) || CG(eax)); 2608 case 85: 2609 return (cpi->cpi_family < 0x10); 2610 case 86: 2611 return (SH_C0(eax) || CG(eax)); 2612 case 88: 2613 #if !defined(__amd64) 2614 return (0); 2615 #else 2616 return (B(eax) || SH_C0(eax)); 2617 #endif 2618 case 89: 2619 return (cpi->cpi_family < 0x10); 2620 case 90: 2621 return (B(eax) || SH_C0(eax) || CG(eax)); 2622 case 91: 2623 case 92: 2624 return (B(eax) || SH_C0(eax)); 2625 case 93: 2626 return (SH_C0(eax)); 2627 case 94: 2628 return (B(eax) || SH_C0(eax) || CG(eax)); 2629 case 95: 2630 #if !defined(__amd64) 2631 return (0); 2632 #else 2633 return (B(eax) || SH_C0(eax)); 2634 #endif 2635 case 96: 2636 return (B(eax) || SH_C0(eax) || CG(eax)); 2637 case 97: 2638 case 98: 2639 return (SH_C0(eax) || CG(eax)); 2640 case 99: 2641 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2642 case 100: 2643 return (B(eax) || SH_C0(eax)); 2644 case 101: 2645 case 103: 2646 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2647 case 104: 2648 return (SH_C0(eax) || CG(eax) || D0(eax)); 2649 case 105: 2650 case 106: 2651 case 107: 2652 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2653 case 108: 2654 return (DH_CG(eax)); 2655 case 109: 2656 return (SH_C0(eax) || CG(eax) || D0(eax)); 2657 case 110: 2658 return (D0(eax) || EX(eax)); 2659 case 111: 2660 return (CG(eax)); 2661 case 112: 2662 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2663 case 113: 2664 return (eax == 0x20fc0); 2665 case 114: 2666 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2667 case 115: 2668 return (SH_E0(eax) || JH_E1(eax)); 2669 case 116: 2670 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2671 case 117: 2672 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2673 case 118: 2674 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2675 JH_E6(eax)); 2676 case 121: 2677 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2678 case 122: 2679 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2680 case 123: 2681 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2682 case 131: 2683 return (cpi->cpi_family < 0x10); 2684 case 6336786: 2685 /* 2686 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2687 * if this is a K8 family or newer processor 2688 */ 2689 if (CPI_FAMILY(cpi) == 0xf) { 2690 struct cpuid_regs regs; 2691 regs.cp_eax = 0x80000007; 2692 (void) __cpuid_insn(®s); 2693 return (!(regs.cp_edx & 0x100)); 2694 } 2695 return (0); 2696 case 6323525: 2697 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2698 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2699 2700 case 6671130: 2701 /* 2702 * check for processors (pre-Shanghai) that do not provide 2703 * optimal management of 1gb ptes in its tlb. 2704 */ 2705 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 2706 2707 case 298: 2708 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 2709 DR_B2(eax) || RB_C0(eax)); 2710 2711 default: 2712 return (-1); 2713 2714 } 2715 } 2716 2717 /* 2718 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 2719 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 2720 */ 2721 int 2722 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 2723 { 2724 struct cpuid_info *cpi; 2725 uint_t osvwid; 2726 static int osvwfeature = -1; 2727 uint64_t osvwlength; 2728 2729 2730 cpi = cpu->cpu_m.mcpu_cpi; 2731 2732 /* confirm OSVW supported */ 2733 if (osvwfeature == -1) { 2734 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 2735 } else { 2736 /* assert that osvw feature setting is consistent on all cpus */ 2737 ASSERT(osvwfeature == 2738 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 2739 } 2740 if (!osvwfeature) 2741 return (-1); 2742 2743 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 2744 2745 switch (erratum) { 2746 case 298: /* osvwid is 0 */ 2747 osvwid = 0; 2748 if (osvwlength <= (uint64_t)osvwid) { 2749 /* osvwid 0 is unknown */ 2750 return (-1); 2751 } 2752 2753 /* 2754 * Check the OSVW STATUS MSR to determine the state 2755 * of the erratum where: 2756 * 0 - fixed by HW 2757 * 1 - BIOS has applied the workaround when BIOS 2758 * workaround is available. (Or for other errata, 2759 * OS workaround is required.) 2760 * For a value of 1, caller will confirm that the 2761 * erratum 298 workaround has indeed been applied by BIOS. 2762 * 2763 * A 1 may be set in cpus that have a HW fix 2764 * in a mixed cpu system. Regarding erratum 298: 2765 * In a multiprocessor platform, the workaround above 2766 * should be applied to all processors regardless of 2767 * silicon revision when an affected processor is 2768 * present. 2769 */ 2770 2771 return (rdmsr(MSR_AMD_OSVW_STATUS + 2772 (osvwid / OSVW_ID_CNT_PER_MSR)) & 2773 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 2774 2775 default: 2776 return (-1); 2777 } 2778 } 2779 2780 static const char assoc_str[] = "associativity"; 2781 static const char line_str[] = "line-size"; 2782 static const char size_str[] = "size"; 2783 2784 static void 2785 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2786 uint32_t val) 2787 { 2788 char buf[128]; 2789 2790 /* 2791 * ndi_prop_update_int() is used because it is desirable for 2792 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2793 */ 2794 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2795 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2796 } 2797 2798 /* 2799 * Intel-style cache/tlb description 2800 * 2801 * Standard cpuid level 2 gives a randomly ordered 2802 * selection of tags that index into a table that describes 2803 * cache and tlb properties. 2804 */ 2805 2806 static const char l1_icache_str[] = "l1-icache"; 2807 static const char l1_dcache_str[] = "l1-dcache"; 2808 static const char l2_cache_str[] = "l2-cache"; 2809 static const char l3_cache_str[] = "l3-cache"; 2810 static const char itlb4k_str[] = "itlb-4K"; 2811 static const char dtlb4k_str[] = "dtlb-4K"; 2812 static const char itlb2M_str[] = "itlb-2M"; 2813 static const char itlb4M_str[] = "itlb-4M"; 2814 static const char dtlb4M_str[] = "dtlb-4M"; 2815 static const char dtlb24_str[] = "dtlb0-2M-4M"; 2816 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2817 static const char itlb24_str[] = "itlb-2M-4M"; 2818 static const char dtlb44_str[] = "dtlb-4K-4M"; 2819 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2820 static const char sl2_cache_str[] = "sectored-l2-cache"; 2821 static const char itrace_str[] = "itrace-cache"; 2822 static const char sl3_cache_str[] = "sectored-l3-cache"; 2823 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 2824 2825 static const struct cachetab { 2826 uint8_t ct_code; 2827 uint8_t ct_assoc; 2828 uint16_t ct_line_size; 2829 size_t ct_size; 2830 const char *ct_label; 2831 } intel_ctab[] = { 2832 /* 2833 * maintain descending order! 2834 * 2835 * Codes ignored - Reason 2836 * ---------------------- 2837 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 2838 * f0H/f1H - Currently we do not interpret prefetch size by design 2839 */ 2840 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 2841 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 2842 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 2843 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 2844 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 2845 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 2846 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 2847 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 2848 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 2849 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 2850 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 2851 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 2852 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 2853 { 0xc0, 4, 0, 8, dtlb44_str }, 2854 { 0xba, 4, 0, 64, dtlb4k_str }, 2855 { 0xb4, 4, 0, 256, dtlb4k_str }, 2856 { 0xb3, 4, 0, 128, dtlb4k_str }, 2857 { 0xb2, 4, 0, 64, itlb4k_str }, 2858 { 0xb0, 4, 0, 128, itlb4k_str }, 2859 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2860 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2861 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2862 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2863 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2864 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2865 { 0x80, 8, 64, 512*1024, l2_cache_str}, 2866 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2867 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2868 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2869 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2870 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2871 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2872 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2873 { 0x73, 8, 0, 64*1024, itrace_str}, 2874 { 0x72, 8, 0, 32*1024, itrace_str}, 2875 { 0x71, 8, 0, 16*1024, itrace_str}, 2876 { 0x70, 8, 0, 12*1024, itrace_str}, 2877 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2878 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2879 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2880 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2881 { 0x5d, 0, 0, 256, dtlb44_str}, 2882 { 0x5c, 0, 0, 128, dtlb44_str}, 2883 { 0x5b, 0, 0, 64, dtlb44_str}, 2884 { 0x5a, 4, 0, 32, dtlb24_str}, 2885 { 0x59, 0, 0, 16, dtlb4k_str}, 2886 { 0x57, 4, 0, 16, dtlb4k_str}, 2887 { 0x56, 4, 0, 16, dtlb4M_str}, 2888 { 0x55, 0, 0, 7, itlb24_str}, 2889 { 0x52, 0, 0, 256, itlb424_str}, 2890 { 0x51, 0, 0, 128, itlb424_str}, 2891 { 0x50, 0, 0, 64, itlb424_str}, 2892 { 0x4f, 0, 0, 32, itlb4k_str}, 2893 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 2894 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 2895 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 2896 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 2897 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 2898 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 2899 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 2900 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 2901 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 2902 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 2903 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 2904 { 0x43, 4, 32, 512*1024, l2_cache_str}, 2905 { 0x42, 4, 32, 256*1024, l2_cache_str}, 2906 { 0x41, 4, 32, 128*1024, l2_cache_str}, 2907 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 2908 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 2909 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 2910 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 2911 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 2912 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 2913 { 0x30, 8, 64, 32*1024, l1_icache_str}, 2914 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 2915 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 2916 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 2917 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 2918 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 2919 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 2920 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 2921 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 2922 { 0x0b, 4, 0, 4, itlb4M_str}, 2923 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 2924 { 0x08, 4, 32, 16*1024, l1_icache_str}, 2925 { 0x06, 4, 32, 8*1024, l1_icache_str}, 2926 { 0x05, 4, 0, 32, dtlb4M_str}, 2927 { 0x04, 4, 0, 8, dtlb4M_str}, 2928 { 0x03, 4, 0, 64, dtlb4k_str}, 2929 { 0x02, 4, 0, 2, itlb4M_str}, 2930 { 0x01, 4, 0, 32, itlb4k_str}, 2931 { 0 } 2932 }; 2933 2934 static const struct cachetab cyrix_ctab[] = { 2935 { 0x70, 4, 0, 32, "tlb-4K" }, 2936 { 0x80, 4, 16, 16*1024, "l1-cache" }, 2937 { 0 } 2938 }; 2939 2940 /* 2941 * Search a cache table for a matching entry 2942 */ 2943 static const struct cachetab * 2944 find_cacheent(const struct cachetab *ct, uint_t code) 2945 { 2946 if (code != 0) { 2947 for (; ct->ct_code != 0; ct++) 2948 if (ct->ct_code <= code) 2949 break; 2950 if (ct->ct_code == code) 2951 return (ct); 2952 } 2953 return (NULL); 2954 } 2955 2956 /* 2957 * Populate cachetab entry with L2 or L3 cache-information using 2958 * cpuid function 4. This function is called from intel_walk_cacheinfo() 2959 * when descriptor 0x49 is encountered. It returns 0 if no such cache 2960 * information is found. 2961 */ 2962 static int 2963 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 2964 { 2965 uint32_t level, i; 2966 int ret = 0; 2967 2968 for (i = 0; i < cpi->cpi_std_4_size; i++) { 2969 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 2970 2971 if (level == 2 || level == 3) { 2972 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 2973 ct->ct_line_size = 2974 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 2975 ct->ct_size = ct->ct_assoc * 2976 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 2977 ct->ct_line_size * 2978 (cpi->cpi_std_4[i]->cp_ecx + 1); 2979 2980 if (level == 2) { 2981 ct->ct_label = l2_cache_str; 2982 } else if (level == 3) { 2983 ct->ct_label = l3_cache_str; 2984 } 2985 ret = 1; 2986 } 2987 } 2988 2989 return (ret); 2990 } 2991 2992 /* 2993 * Walk the cacheinfo descriptor, applying 'func' to every valid element 2994 * The walk is terminated if the walker returns non-zero. 2995 */ 2996 static void 2997 intel_walk_cacheinfo(struct cpuid_info *cpi, 2998 void *arg, int (*func)(void *, const struct cachetab *)) 2999 { 3000 const struct cachetab *ct; 3001 struct cachetab des_49_ct, des_b1_ct; 3002 uint8_t *dp; 3003 int i; 3004 3005 if ((dp = cpi->cpi_cacheinfo) == NULL) 3006 return; 3007 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3008 /* 3009 * For overloaded descriptor 0x49 we use cpuid function 4 3010 * if supported by the current processor, to create 3011 * cache information. 3012 * For overloaded descriptor 0xb1 we use X86_PAE flag 3013 * to disambiguate the cache information. 3014 */ 3015 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3016 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3017 ct = &des_49_ct; 3018 } else if (*dp == 0xb1) { 3019 des_b1_ct.ct_code = 0xb1; 3020 des_b1_ct.ct_assoc = 4; 3021 des_b1_ct.ct_line_size = 0; 3022 if (x86_feature & X86_PAE) { 3023 des_b1_ct.ct_size = 8; 3024 des_b1_ct.ct_label = itlb2M_str; 3025 } else { 3026 des_b1_ct.ct_size = 4; 3027 des_b1_ct.ct_label = itlb4M_str; 3028 } 3029 ct = &des_b1_ct; 3030 } else { 3031 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3032 continue; 3033 } 3034 } 3035 3036 if (func(arg, ct) != 0) { 3037 break; 3038 } 3039 } 3040 } 3041 3042 /* 3043 * (Like the Intel one, except for Cyrix CPUs) 3044 */ 3045 static void 3046 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3047 void *arg, int (*func)(void *, const struct cachetab *)) 3048 { 3049 const struct cachetab *ct; 3050 uint8_t *dp; 3051 int i; 3052 3053 if ((dp = cpi->cpi_cacheinfo) == NULL) 3054 return; 3055 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3056 /* 3057 * Search Cyrix-specific descriptor table first .. 3058 */ 3059 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3060 if (func(arg, ct) != 0) 3061 break; 3062 continue; 3063 } 3064 /* 3065 * .. else fall back to the Intel one 3066 */ 3067 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3068 if (func(arg, ct) != 0) 3069 break; 3070 continue; 3071 } 3072 } 3073 } 3074 3075 /* 3076 * A cacheinfo walker that adds associativity, line-size, and size properties 3077 * to the devinfo node it is passed as an argument. 3078 */ 3079 static int 3080 add_cacheent_props(void *arg, const struct cachetab *ct) 3081 { 3082 dev_info_t *devi = arg; 3083 3084 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3085 if (ct->ct_line_size != 0) 3086 add_cache_prop(devi, ct->ct_label, line_str, 3087 ct->ct_line_size); 3088 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3089 return (0); 3090 } 3091 3092 3093 static const char fully_assoc[] = "fully-associative?"; 3094 3095 /* 3096 * AMD style cache/tlb description 3097 * 3098 * Extended functions 5 and 6 directly describe properties of 3099 * tlbs and various cache levels. 3100 */ 3101 static void 3102 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3103 { 3104 switch (assoc) { 3105 case 0: /* reserved; ignore */ 3106 break; 3107 default: 3108 add_cache_prop(devi, label, assoc_str, assoc); 3109 break; 3110 case 0xff: 3111 add_cache_prop(devi, label, fully_assoc, 1); 3112 break; 3113 } 3114 } 3115 3116 static void 3117 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3118 { 3119 if (size == 0) 3120 return; 3121 add_cache_prop(devi, label, size_str, size); 3122 add_amd_assoc(devi, label, assoc); 3123 } 3124 3125 static void 3126 add_amd_cache(dev_info_t *devi, const char *label, 3127 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3128 { 3129 if (size == 0 || line_size == 0) 3130 return; 3131 add_amd_assoc(devi, label, assoc); 3132 /* 3133 * Most AMD parts have a sectored cache. Multiple cache lines are 3134 * associated with each tag. A sector consists of all cache lines 3135 * associated with a tag. For example, the AMD K6-III has a sector 3136 * size of 2 cache lines per tag. 3137 */ 3138 if (lines_per_tag != 0) 3139 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3140 add_cache_prop(devi, label, line_str, line_size); 3141 add_cache_prop(devi, label, size_str, size * 1024); 3142 } 3143 3144 static void 3145 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3146 { 3147 switch (assoc) { 3148 case 0: /* off */ 3149 break; 3150 case 1: 3151 case 2: 3152 case 4: 3153 add_cache_prop(devi, label, assoc_str, assoc); 3154 break; 3155 case 6: 3156 add_cache_prop(devi, label, assoc_str, 8); 3157 break; 3158 case 8: 3159 add_cache_prop(devi, label, assoc_str, 16); 3160 break; 3161 case 0xf: 3162 add_cache_prop(devi, label, fully_assoc, 1); 3163 break; 3164 default: /* reserved; ignore */ 3165 break; 3166 } 3167 } 3168 3169 static void 3170 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3171 { 3172 if (size == 0 || assoc == 0) 3173 return; 3174 add_amd_l2_assoc(devi, label, assoc); 3175 add_cache_prop(devi, label, size_str, size); 3176 } 3177 3178 static void 3179 add_amd_l2_cache(dev_info_t *devi, const char *label, 3180 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3181 { 3182 if (size == 0 || assoc == 0 || line_size == 0) 3183 return; 3184 add_amd_l2_assoc(devi, label, assoc); 3185 if (lines_per_tag != 0) 3186 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3187 add_cache_prop(devi, label, line_str, line_size); 3188 add_cache_prop(devi, label, size_str, size * 1024); 3189 } 3190 3191 static void 3192 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3193 { 3194 struct cpuid_regs *cp; 3195 3196 if (cpi->cpi_xmaxeax < 0x80000005) 3197 return; 3198 cp = &cpi->cpi_extd[5]; 3199 3200 /* 3201 * 4M/2M L1 TLB configuration 3202 * 3203 * We report the size for 2M pages because AMD uses two 3204 * TLB entries for one 4M page. 3205 */ 3206 add_amd_tlb(devi, "dtlb-2M", 3207 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3208 add_amd_tlb(devi, "itlb-2M", 3209 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3210 3211 /* 3212 * 4K L1 TLB configuration 3213 */ 3214 3215 switch (cpi->cpi_vendor) { 3216 uint_t nentries; 3217 case X86_VENDOR_TM: 3218 if (cpi->cpi_family >= 5) { 3219 /* 3220 * Crusoe processors have 256 TLB entries, but 3221 * cpuid data format constrains them to only 3222 * reporting 255 of them. 3223 */ 3224 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3225 nentries = 256; 3226 /* 3227 * Crusoe processors also have a unified TLB 3228 */ 3229 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3230 nentries); 3231 break; 3232 } 3233 /*FALLTHROUGH*/ 3234 default: 3235 add_amd_tlb(devi, itlb4k_str, 3236 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3237 add_amd_tlb(devi, dtlb4k_str, 3238 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3239 break; 3240 } 3241 3242 /* 3243 * data L1 cache configuration 3244 */ 3245 3246 add_amd_cache(devi, l1_dcache_str, 3247 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3248 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3249 3250 /* 3251 * code L1 cache configuration 3252 */ 3253 3254 add_amd_cache(devi, l1_icache_str, 3255 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3256 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3257 3258 if (cpi->cpi_xmaxeax < 0x80000006) 3259 return; 3260 cp = &cpi->cpi_extd[6]; 3261 3262 /* Check for a unified L2 TLB for large pages */ 3263 3264 if (BITX(cp->cp_eax, 31, 16) == 0) 3265 add_amd_l2_tlb(devi, "l2-tlb-2M", 3266 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3267 else { 3268 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3269 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3270 add_amd_l2_tlb(devi, "l2-itlb-2M", 3271 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3272 } 3273 3274 /* Check for a unified L2 TLB for 4K pages */ 3275 3276 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3277 add_amd_l2_tlb(devi, "l2-tlb-4K", 3278 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3279 } else { 3280 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3281 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3282 add_amd_l2_tlb(devi, "l2-itlb-4K", 3283 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3284 } 3285 3286 add_amd_l2_cache(devi, l2_cache_str, 3287 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3288 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3289 } 3290 3291 /* 3292 * There are two basic ways that the x86 world describes it cache 3293 * and tlb architecture - Intel's way and AMD's way. 3294 * 3295 * Return which flavor of cache architecture we should use 3296 */ 3297 static int 3298 x86_which_cacheinfo(struct cpuid_info *cpi) 3299 { 3300 switch (cpi->cpi_vendor) { 3301 case X86_VENDOR_Intel: 3302 if (cpi->cpi_maxeax >= 2) 3303 return (X86_VENDOR_Intel); 3304 break; 3305 case X86_VENDOR_AMD: 3306 /* 3307 * The K5 model 1 was the first part from AMD that reported 3308 * cache sizes via extended cpuid functions. 3309 */ 3310 if (cpi->cpi_family > 5 || 3311 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3312 return (X86_VENDOR_AMD); 3313 break; 3314 case X86_VENDOR_TM: 3315 if (cpi->cpi_family >= 5) 3316 return (X86_VENDOR_AMD); 3317 /*FALLTHROUGH*/ 3318 default: 3319 /* 3320 * If they have extended CPU data for 0x80000005 3321 * then we assume they have AMD-format cache 3322 * information. 3323 * 3324 * If not, and the vendor happens to be Cyrix, 3325 * then try our-Cyrix specific handler. 3326 * 3327 * If we're not Cyrix, then assume we're using Intel's 3328 * table-driven format instead. 3329 */ 3330 if (cpi->cpi_xmaxeax >= 0x80000005) 3331 return (X86_VENDOR_AMD); 3332 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3333 return (X86_VENDOR_Cyrix); 3334 else if (cpi->cpi_maxeax >= 2) 3335 return (X86_VENDOR_Intel); 3336 break; 3337 } 3338 return (-1); 3339 } 3340 3341 /* 3342 * create a node for the given cpu under the prom root node. 3343 * Also, create a cpu node in the device tree. 3344 */ 3345 static dev_info_t *cpu_nex_devi = NULL; 3346 static kmutex_t cpu_node_lock; 3347 3348 /* 3349 * Called from post_startup() and mp_startup() 3350 */ 3351 void 3352 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3353 { 3354 dev_info_t *cpu_devi; 3355 int create; 3356 3357 mutex_enter(&cpu_node_lock); 3358 3359 /* 3360 * create a nexus node for all cpus identified as 'cpu_id' under 3361 * the root node. 3362 */ 3363 if (cpu_nex_devi == NULL) { 3364 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3365 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3366 mutex_exit(&cpu_node_lock); 3367 return; 3368 } 3369 (void) ndi_devi_online(cpu_nex_devi, 0); 3370 } 3371 3372 /* 3373 * create a child node for cpu identified as 'cpu_id' 3374 */ 3375 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3376 cpu_id); 3377 if (cpu_devi == NULL) { 3378 mutex_exit(&cpu_node_lock); 3379 return; 3380 } 3381 3382 /* device_type */ 3383 3384 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3385 "device_type", "cpu"); 3386 3387 /* reg */ 3388 3389 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3390 "reg", cpu_id); 3391 3392 /* cpu-mhz, and clock-frequency */ 3393 3394 if (cpu_freq > 0) { 3395 long long mul; 3396 3397 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3398 "cpu-mhz", cpu_freq); 3399 3400 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3401 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3402 "clock-frequency", (int)mul); 3403 } 3404 3405 (void) ndi_devi_online(cpu_devi, 0); 3406 3407 if ((x86_feature & X86_CPUID) == 0) { 3408 mutex_exit(&cpu_node_lock); 3409 return; 3410 } 3411 3412 /* vendor-id */ 3413 3414 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3415 "vendor-id", cpi->cpi_vendorstr); 3416 3417 if (cpi->cpi_maxeax == 0) { 3418 mutex_exit(&cpu_node_lock); 3419 return; 3420 } 3421 3422 /* 3423 * family, model, and step 3424 */ 3425 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3426 "family", CPI_FAMILY(cpi)); 3427 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3428 "cpu-model", CPI_MODEL(cpi)); 3429 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3430 "stepping-id", CPI_STEP(cpi)); 3431 3432 /* type */ 3433 3434 switch (cpi->cpi_vendor) { 3435 case X86_VENDOR_Intel: 3436 create = 1; 3437 break; 3438 default: 3439 create = 0; 3440 break; 3441 } 3442 if (create) 3443 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3444 "type", CPI_TYPE(cpi)); 3445 3446 /* ext-family */ 3447 3448 switch (cpi->cpi_vendor) { 3449 case X86_VENDOR_Intel: 3450 case X86_VENDOR_AMD: 3451 create = cpi->cpi_family >= 0xf; 3452 break; 3453 default: 3454 create = 0; 3455 break; 3456 } 3457 if (create) 3458 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3459 "ext-family", CPI_FAMILY_XTD(cpi)); 3460 3461 /* ext-model */ 3462 3463 switch (cpi->cpi_vendor) { 3464 case X86_VENDOR_Intel: 3465 create = IS_EXTENDED_MODEL_INTEL(cpi); 3466 break; 3467 case X86_VENDOR_AMD: 3468 create = CPI_FAMILY(cpi) == 0xf; 3469 break; 3470 default: 3471 create = 0; 3472 break; 3473 } 3474 if (create) 3475 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3476 "ext-model", CPI_MODEL_XTD(cpi)); 3477 3478 /* generation */ 3479 3480 switch (cpi->cpi_vendor) { 3481 case X86_VENDOR_AMD: 3482 /* 3483 * AMD K5 model 1 was the first part to support this 3484 */ 3485 create = cpi->cpi_xmaxeax >= 0x80000001; 3486 break; 3487 default: 3488 create = 0; 3489 break; 3490 } 3491 if (create) 3492 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3493 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3494 3495 /* brand-id */ 3496 3497 switch (cpi->cpi_vendor) { 3498 case X86_VENDOR_Intel: 3499 /* 3500 * brand id first appeared on Pentium III Xeon model 8, 3501 * and Celeron model 8 processors and Opteron 3502 */ 3503 create = cpi->cpi_family > 6 || 3504 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3505 break; 3506 case X86_VENDOR_AMD: 3507 create = cpi->cpi_family >= 0xf; 3508 break; 3509 default: 3510 create = 0; 3511 break; 3512 } 3513 if (create && cpi->cpi_brandid != 0) { 3514 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3515 "brand-id", cpi->cpi_brandid); 3516 } 3517 3518 /* chunks, and apic-id */ 3519 3520 switch (cpi->cpi_vendor) { 3521 /* 3522 * first available on Pentium IV and Opteron (K8) 3523 */ 3524 case X86_VENDOR_Intel: 3525 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3526 break; 3527 case X86_VENDOR_AMD: 3528 create = cpi->cpi_family >= 0xf; 3529 break; 3530 default: 3531 create = 0; 3532 break; 3533 } 3534 if (create) { 3535 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3536 "chunks", CPI_CHUNKS(cpi)); 3537 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3538 "apic-id", cpi->cpi_apicid); 3539 if (cpi->cpi_chipid >= 0) { 3540 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3541 "chip#", cpi->cpi_chipid); 3542 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3543 "clog#", cpi->cpi_clogid); 3544 } 3545 } 3546 3547 /* cpuid-features */ 3548 3549 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3550 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3551 3552 3553 /* cpuid-features-ecx */ 3554 3555 switch (cpi->cpi_vendor) { 3556 case X86_VENDOR_Intel: 3557 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3558 break; 3559 default: 3560 create = 0; 3561 break; 3562 } 3563 if (create) 3564 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3565 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3566 3567 /* ext-cpuid-features */ 3568 3569 switch (cpi->cpi_vendor) { 3570 case X86_VENDOR_Intel: 3571 case X86_VENDOR_AMD: 3572 case X86_VENDOR_Cyrix: 3573 case X86_VENDOR_TM: 3574 case X86_VENDOR_Centaur: 3575 create = cpi->cpi_xmaxeax >= 0x80000001; 3576 break; 3577 default: 3578 create = 0; 3579 break; 3580 } 3581 if (create) { 3582 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3583 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3584 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3585 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3586 } 3587 3588 /* 3589 * Brand String first appeared in Intel Pentium IV, AMD K5 3590 * model 1, and Cyrix GXm. On earlier models we try and 3591 * simulate something similar .. so this string should always 3592 * same -something- about the processor, however lame. 3593 */ 3594 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3595 "brand-string", cpi->cpi_brandstr); 3596 3597 /* 3598 * Finally, cache and tlb information 3599 */ 3600 switch (x86_which_cacheinfo(cpi)) { 3601 case X86_VENDOR_Intel: 3602 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3603 break; 3604 case X86_VENDOR_Cyrix: 3605 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3606 break; 3607 case X86_VENDOR_AMD: 3608 amd_cache_info(cpi, cpu_devi); 3609 break; 3610 default: 3611 break; 3612 } 3613 3614 mutex_exit(&cpu_node_lock); 3615 } 3616 3617 struct l2info { 3618 int *l2i_csz; 3619 int *l2i_lsz; 3620 int *l2i_assoc; 3621 int l2i_ret; 3622 }; 3623 3624 /* 3625 * A cacheinfo walker that fetches the size, line-size and associativity 3626 * of the L2 cache 3627 */ 3628 static int 3629 intel_l2cinfo(void *arg, const struct cachetab *ct) 3630 { 3631 struct l2info *l2i = arg; 3632 int *ip; 3633 3634 if (ct->ct_label != l2_cache_str && 3635 ct->ct_label != sl2_cache_str) 3636 return (0); /* not an L2 -- keep walking */ 3637 3638 if ((ip = l2i->l2i_csz) != NULL) 3639 *ip = ct->ct_size; 3640 if ((ip = l2i->l2i_lsz) != NULL) 3641 *ip = ct->ct_line_size; 3642 if ((ip = l2i->l2i_assoc) != NULL) 3643 *ip = ct->ct_assoc; 3644 l2i->l2i_ret = ct->ct_size; 3645 return (1); /* was an L2 -- terminate walk */ 3646 } 3647 3648 /* 3649 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3650 * 3651 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3652 * value is the associativity, the associativity for the L2 cache and 3653 * tlb is encoded in the following table. The 4 bit L2 value serves as 3654 * an index into the amd_afd[] array to determine the associativity. 3655 * -1 is undefined. 0 is fully associative. 3656 */ 3657 3658 static int amd_afd[] = 3659 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3660 3661 static void 3662 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3663 { 3664 struct cpuid_regs *cp; 3665 uint_t size, assoc; 3666 int i; 3667 int *ip; 3668 3669 if (cpi->cpi_xmaxeax < 0x80000006) 3670 return; 3671 cp = &cpi->cpi_extd[6]; 3672 3673 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3674 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3675 uint_t cachesz = size * 1024; 3676 assoc = amd_afd[i]; 3677 3678 ASSERT(assoc != -1); 3679 3680 if ((ip = l2i->l2i_csz) != NULL) 3681 *ip = cachesz; 3682 if ((ip = l2i->l2i_lsz) != NULL) 3683 *ip = BITX(cp->cp_ecx, 7, 0); 3684 if ((ip = l2i->l2i_assoc) != NULL) 3685 *ip = assoc; 3686 l2i->l2i_ret = cachesz; 3687 } 3688 } 3689 3690 int 3691 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3692 { 3693 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3694 struct l2info __l2info, *l2i = &__l2info; 3695 3696 l2i->l2i_csz = csz; 3697 l2i->l2i_lsz = lsz; 3698 l2i->l2i_assoc = assoc; 3699 l2i->l2i_ret = -1; 3700 3701 switch (x86_which_cacheinfo(cpi)) { 3702 case X86_VENDOR_Intel: 3703 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3704 break; 3705 case X86_VENDOR_Cyrix: 3706 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3707 break; 3708 case X86_VENDOR_AMD: 3709 amd_l2cacheinfo(cpi, l2i); 3710 break; 3711 default: 3712 break; 3713 } 3714 return (l2i->l2i_ret); 3715 } 3716 3717 #if !defined(__xpv) 3718 3719 uint32_t * 3720 cpuid_mwait_alloc(cpu_t *cpu) 3721 { 3722 uint32_t *ret; 3723 size_t mwait_size; 3724 3725 ASSERT(cpuid_checkpass(cpu, 2)); 3726 3727 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3728 if (mwait_size == 0) 3729 return (NULL); 3730 3731 /* 3732 * kmem_alloc() returns cache line size aligned data for mwait_size 3733 * allocations. mwait_size is currently cache line sized. Neither 3734 * of these implementation details are guarantied to be true in the 3735 * future. 3736 * 3737 * First try allocating mwait_size as kmem_alloc() currently returns 3738 * correctly aligned memory. If kmem_alloc() does not return 3739 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3740 * 3741 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3742 * decide to free this memory. 3743 */ 3744 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3745 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3746 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3747 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3748 *ret = MWAIT_RUNNING; 3749 return (ret); 3750 } else { 3751 kmem_free(ret, mwait_size); 3752 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3753 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3754 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3755 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3756 *ret = MWAIT_RUNNING; 3757 return (ret); 3758 } 3759 } 3760 3761 void 3762 cpuid_mwait_free(cpu_t *cpu) 3763 { 3764 ASSERT(cpuid_checkpass(cpu, 2)); 3765 3766 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3767 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3768 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3769 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3770 } 3771 3772 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3773 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3774 } 3775 3776 void 3777 patch_tsc_read(int flag) 3778 { 3779 size_t cnt; 3780 3781 switch (flag) { 3782 case X86_NO_TSC: 3783 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 3784 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 3785 break; 3786 case X86_HAVE_TSCP: 3787 cnt = &_tscp_end - &_tscp_start; 3788 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 3789 break; 3790 case X86_TSC_MFENCE: 3791 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 3792 (void) memcpy((void *)tsc_read, 3793 (void *)&_tsc_mfence_start, cnt); 3794 break; 3795 case X86_TSC_LFENCE: 3796 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 3797 (void) memcpy((void *)tsc_read, 3798 (void *)&_tsc_lfence_start, cnt); 3799 break; 3800 default: 3801 break; 3802 } 3803 } 3804 3805 #endif /* !__xpv */ 3806