1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Various routines to handle identification 30 * and classification of x86 processors. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/archsystm.h> 35 #include <sys/x86_archext.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 #include <sys/cmn_err.h> 39 #include <sys/sunddi.h> 40 #include <sys/sunndi.h> 41 #include <sys/cpuvar.h> 42 #include <sys/processor.h> 43 #include <sys/sysmacros.h> 44 #include <sys/pg.h> 45 #include <sys/fp.h> 46 #include <sys/controlregs.h> 47 #include <sys/auxv_386.h> 48 #include <sys/bitmap.h> 49 #include <sys/memnode.h> 50 51 /* 52 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 53 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 54 * them accordingly. For most modern processors, feature detection occurs here 55 * in pass 1. 56 * 57 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 58 * for the boot CPU and does the basic analysis that the early kernel needs. 59 * x86_feature is set based on the return value of cpuid_pass1() of the boot 60 * CPU. 61 * 62 * Pass 1 includes: 63 * 64 * o Determining vendor/model/family/stepping and setting x86_type and 65 * x86_vendor accordingly. 66 * o Processing the feature flags returned by the cpuid instruction while 67 * applying any workarounds or tricks for the specific processor. 68 * o Mapping the feature flags into Solaris feature bits (X86_*). 69 * o Processing extended feature flags if supported by the processor, 70 * again while applying specific processor knowledge. 71 * o Determining the CMT characteristics of the system. 72 * 73 * Pass 1 is done on non-boot CPUs during their initialization and the results 74 * are used only as a meager attempt at ensuring that all processors within the 75 * system support the same features. 76 * 77 * Pass 2 of cpuid feature analysis happens just at the beginning 78 * of startup(). It just copies in and corrects the remainder 79 * of the cpuid data we depend on: standard cpuid functions that we didn't 80 * need for pass1 feature analysis, and extended cpuid functions beyond the 81 * simple feature processing done in pass1. 82 * 83 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 84 * particular kernel memory allocation has been made available. It creates a 85 * readable brand string based on the data collected in the first two passes. 86 * 87 * Pass 4 of cpuid analysis is invoked after post_startup() when all 88 * the support infrastructure for various hardware features has been 89 * initialized. It determines which processor features will be reported 90 * to userland via the aux vector. 91 * 92 * All passes are executed on all CPUs, but only the boot CPU determines what 93 * features the kernel will use. 94 * 95 * Much of the worst junk in this file is for the support of processors 96 * that didn't really implement the cpuid instruction properly. 97 * 98 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 99 * the pass numbers. Accordingly, changes to the pass code may require changes 100 * to the accessor code. 101 */ 102 103 uint_t x86_feature = 0; 104 uint_t x86_vendor = X86_VENDOR_IntelClone; 105 uint_t x86_type = X86_TYPE_OTHER; 106 107 uint_t pentiumpro_bug4046376; 108 uint_t pentiumpro_bug4064495; 109 110 uint_t enable486; 111 112 /* 113 * This set of strings are for processors rumored to support the cpuid 114 * instruction, and is used by locore.s to figure out how to set x86_vendor 115 */ 116 const char CyrixInstead[] = "CyrixInstead"; 117 118 /* 119 * monitor/mwait info. 120 * 121 * size_actual and buf_actual are the real address and size allocated to get 122 * proper mwait_buf alignement. buf_actual and size_actual should be passed 123 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 124 * processor cache-line alignment, but this is not guarantied in the furture. 125 */ 126 struct mwait_info { 127 size_t mon_min; /* min size to avoid missed wakeups */ 128 size_t mon_max; /* size to avoid false wakeups */ 129 size_t size_actual; /* size actually allocated */ 130 void *buf_actual; /* memory actually allocated */ 131 uint32_t support; /* processor support of monitor/mwait */ 132 }; 133 134 /* 135 * These constants determine how many of the elements of the 136 * cpuid we cache in the cpuid_info data structure; the 137 * remaining elements are accessible via the cpuid instruction. 138 */ 139 140 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 141 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 142 143 struct cpuid_info { 144 uint_t cpi_pass; /* last pass completed */ 145 /* 146 * standard function information 147 */ 148 uint_t cpi_maxeax; /* fn 0: %eax */ 149 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 150 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 151 152 uint_t cpi_family; /* fn 1: extended family */ 153 uint_t cpi_model; /* fn 1: extended model */ 154 uint_t cpi_step; /* fn 1: stepping */ 155 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 156 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 157 int cpi_clogid; /* fn 1: %ebx: thread # */ 158 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 159 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 160 uint_t cpi_ncache; /* fn 2: number of elements */ 161 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 162 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 163 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 164 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 165 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 166 /* 167 * extended function information 168 */ 169 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 170 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 171 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 172 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 173 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 174 id_t cpi_coreid; /* same coreid => strands share core */ 175 int cpi_pkgcoreid; /* core number within single package */ 176 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 177 /* Intel: fn 4: %eax[31-26] */ 178 /* 179 * supported feature information 180 */ 181 uint32_t cpi_support[5]; 182 #define STD_EDX_FEATURES 0 183 #define AMD_EDX_FEATURES 1 184 #define TM_EDX_FEATURES 2 185 #define STD_ECX_FEATURES 3 186 #define AMD_ECX_FEATURES 4 187 /* 188 * Synthesized information, where known. 189 */ 190 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 191 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 192 uint32_t cpi_socket; /* Chip package/socket type */ 193 194 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 195 uint32_t cpi_apicid; 196 }; 197 198 199 static struct cpuid_info cpuid_info0; 200 201 /* 202 * These bit fields are defined by the Intel Application Note AP-485 203 * "Intel Processor Identification and the CPUID Instruction" 204 */ 205 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 206 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 207 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 208 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 209 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 210 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 211 212 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 213 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 214 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 215 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 216 217 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 218 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 219 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 220 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 221 222 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 223 #define CPI_XMAXEAX_MAX 0x80000100 224 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 225 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 226 227 /* 228 * Function 4 (Deterministic Cache Parameters) macros 229 * Defined by Intel Application Note AP-485 230 */ 231 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 232 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 233 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 234 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 235 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 236 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 237 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 238 239 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 240 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 241 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 242 243 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 244 245 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 246 247 248 /* 249 * A couple of shorthand macros to identify "later" P6-family chips 250 * like the Pentium M and Core. First, the "older" P6-based stuff 251 * (loosely defined as "pre-Pentium-4"): 252 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 253 */ 254 255 #define IS_LEGACY_P6(cpi) ( \ 256 cpi->cpi_family == 6 && \ 257 (cpi->cpi_model == 1 || \ 258 cpi->cpi_model == 3 || \ 259 cpi->cpi_model == 5 || \ 260 cpi->cpi_model == 6 || \ 261 cpi->cpi_model == 7 || \ 262 cpi->cpi_model == 8 || \ 263 cpi->cpi_model == 0xA || \ 264 cpi->cpi_model == 0xB) \ 265 ) 266 267 /* A "new F6" is everything with family 6 that's not the above */ 268 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 269 270 /* Extended family/model support */ 271 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 272 cpi->cpi_family >= 0xf) 273 274 /* 275 * AMD family 0xf and family 0x10 socket types. 276 * First index : 277 * 0 for family 0xf, revs B thru E 278 * 1 for family 0xf, revs F and G 279 * 2 for family 0x10, rev B 280 * Second index by (model & 0x3) 281 */ 282 static uint32_t amd_skts[3][4] = { 283 /* 284 * Family 0xf revisions B through E 285 */ 286 #define A_SKTS_0 0 287 { 288 X86_SOCKET_754, /* 0b00 */ 289 X86_SOCKET_940, /* 0b01 */ 290 X86_SOCKET_754, /* 0b10 */ 291 X86_SOCKET_939 /* 0b11 */ 292 }, 293 /* 294 * Family 0xf revisions F and G 295 */ 296 #define A_SKTS_1 1 297 { 298 X86_SOCKET_S1g1, /* 0b00 */ 299 X86_SOCKET_F1207, /* 0b01 */ 300 X86_SOCKET_UNKNOWN, /* 0b10 */ 301 X86_SOCKET_AM2 /* 0b11 */ 302 }, 303 /* 304 * Family 0x10 revisions A and B 305 * It is not clear whether, as new sockets release, that 306 * model & 0x3 will id socket for this family 307 */ 308 #define A_SKTS_2 2 309 { 310 X86_SOCKET_F1207, /* 0b00 */ 311 X86_SOCKET_F1207, /* 0b01 */ 312 X86_SOCKET_F1207, /* 0b10 */ 313 X86_SOCKET_F1207, /* 0b11 */ 314 } 315 }; 316 317 /* 318 * Table for mapping AMD Family 0xf and AMD Family 0x10 model/stepping 319 * combination to chip "revision" and socket type. 320 * 321 * The first member of this array that matches a given family, extended model 322 * plus model range, and stepping range will be considered a match. 323 */ 324 static const struct amd_rev_mapent { 325 uint_t rm_family; 326 uint_t rm_modello; 327 uint_t rm_modelhi; 328 uint_t rm_steplo; 329 uint_t rm_stephi; 330 uint32_t rm_chiprev; 331 const char *rm_chiprevstr; 332 int rm_sktidx; 333 } amd_revmap[] = { 334 /* 335 * =============== AuthenticAMD Family 0xf =============== 336 */ 337 338 /* 339 * Rev B includes model 0x4 stepping 0 and model 0x5 stepping 0 and 1. 340 */ 341 { 0xf, 0x04, 0x04, 0x0, 0x0, X86_CHIPREV_AMD_F_REV_B, "B", A_SKTS_0 }, 342 { 0xf, 0x05, 0x05, 0x0, 0x1, X86_CHIPREV_AMD_F_REV_B, "B", A_SKTS_0 }, 343 /* 344 * Rev C0 includes model 0x4 stepping 8 and model 0x5 stepping 8 345 */ 346 { 0xf, 0x04, 0x05, 0x8, 0x8, X86_CHIPREV_AMD_F_REV_C0, "C0", A_SKTS_0 }, 347 /* 348 * Rev CG is the rest of extended model 0x0 - i.e., everything 349 * but the rev B and C0 combinations covered above. 350 */ 351 { 0xf, 0x00, 0x0f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_CG, "CG", A_SKTS_0 }, 352 /* 353 * Rev D has extended model 0x1. 354 */ 355 { 0xf, 0x10, 0x1f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_D, "D", A_SKTS_0 }, 356 /* 357 * Rev E has extended model 0x2. 358 * Extended model 0x3 is unused but available to grow into. 359 */ 360 { 0xf, 0x20, 0x3f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_E, "E", A_SKTS_0 }, 361 /* 362 * Rev F has extended models 0x4 and 0x5. 363 */ 364 { 0xf, 0x40, 0x5f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_F, "F", A_SKTS_1 }, 365 /* 366 * Rev G has extended model 0x6. 367 */ 368 { 0xf, 0x60, 0x6f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_G, "G", A_SKTS_1 }, 369 370 /* 371 * =============== AuthenticAMD Family 0x10 =============== 372 */ 373 374 /* 375 * Rev A has model 0 and stepping 0/1/2 for DR-{A0,A1,A2}. 376 * Give all of model 0 stepping range to rev A. 377 */ 378 { 0x10, 0x00, 0x00, 0x0, 0x2, X86_CHIPREV_AMD_10_REV_A, "A", A_SKTS_2 }, 379 380 /* 381 * Rev B has model 2 and steppings 0/1/0xa/2 for DR-{B0,B1,BA,B2}. 382 * Give all of model 2 stepping range to rev B. 383 */ 384 { 0x10, 0x02, 0x02, 0x0, 0xf, X86_CHIPREV_AMD_10_REV_B, "B", A_SKTS_2 }, 385 }; 386 387 /* 388 * Info for monitor/mwait idle loop. 389 * 390 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 391 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 392 * 2006. 393 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 394 * Documentation Updates" #33633, Rev 2.05, December 2006. 395 */ 396 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 397 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 398 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 399 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 400 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 401 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 402 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 403 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 404 /* 405 * Number of sub-cstates for a given c-state. 406 */ 407 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 408 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 409 410 static void 411 synth_amd_info(struct cpuid_info *cpi) 412 { 413 const struct amd_rev_mapent *rmp; 414 uint_t family, model, step; 415 int i; 416 417 /* 418 * Currently only AMD family 0xf and family 0x10 use these fields. 419 */ 420 if (cpi->cpi_family != 0xf && cpi->cpi_family != 0x10) 421 return; 422 423 family = cpi->cpi_family; 424 model = cpi->cpi_model; 425 step = cpi->cpi_step; 426 427 for (i = 0, rmp = amd_revmap; i < sizeof (amd_revmap) / sizeof (*rmp); 428 i++, rmp++) { 429 if (family == rmp->rm_family && 430 model >= rmp->rm_modello && model <= rmp->rm_modelhi && 431 step >= rmp->rm_steplo && step <= rmp->rm_stephi) { 432 cpi->cpi_chiprev = rmp->rm_chiprev; 433 cpi->cpi_chiprevstr = rmp->rm_chiprevstr; 434 cpi->cpi_socket = amd_skts[rmp->rm_sktidx][model & 0x3]; 435 return; 436 } 437 } 438 } 439 440 static void 441 synth_info(struct cpuid_info *cpi) 442 { 443 cpi->cpi_chiprev = X86_CHIPREV_UNKNOWN; 444 cpi->cpi_chiprevstr = "Unknown"; 445 cpi->cpi_socket = X86_SOCKET_UNKNOWN; 446 447 switch (cpi->cpi_vendor) { 448 case X86_VENDOR_AMD: 449 synth_amd_info(cpi); 450 break; 451 452 default: 453 break; 454 455 } 456 } 457 458 /* 459 * Apply up various platform-dependent restrictions where the 460 * underlying platform restrictions mean the CPU can be marked 461 * as less capable than its cpuid instruction would imply. 462 */ 463 #if defined(__xpv) 464 static void 465 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 466 { 467 switch (eax) { 468 case 1: 469 cp->cp_edx &= 470 ~(CPUID_INTC_EDX_PSE | 471 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 472 CPUID_INTC_EDX_MCA | /* XXPV true on dom0? */ 473 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 474 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 475 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 476 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 477 break; 478 479 case 0x80000001: 480 cp->cp_edx &= 481 ~(CPUID_AMD_EDX_PSE | 482 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 483 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 484 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 485 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 486 CPUID_AMD_EDX_TSCP); 487 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 488 break; 489 default: 490 break; 491 } 492 493 switch (vendor) { 494 case X86_VENDOR_Intel: 495 switch (eax) { 496 case 4: 497 /* 498 * Zero out the (ncores-per-chip - 1) field 499 */ 500 cp->cp_eax &= 0x03fffffff; 501 break; 502 default: 503 break; 504 } 505 break; 506 case X86_VENDOR_AMD: 507 switch (eax) { 508 case 0x80000008: 509 /* 510 * Zero out the (ncores-per-chip - 1) field 511 */ 512 cp->cp_ecx &= 0xffffff00; 513 break; 514 default: 515 break; 516 } 517 break; 518 default: 519 break; 520 } 521 } 522 #else 523 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 524 #endif 525 526 /* 527 * Some undocumented ways of patching the results of the cpuid 528 * instruction to permit running Solaris 10 on future cpus that 529 * we don't currently support. Could be set to non-zero values 530 * via settings in eeprom. 531 */ 532 533 uint32_t cpuid_feature_ecx_include; 534 uint32_t cpuid_feature_ecx_exclude; 535 uint32_t cpuid_feature_edx_include; 536 uint32_t cpuid_feature_edx_exclude; 537 538 void 539 cpuid_alloc_space(cpu_t *cpu) 540 { 541 /* 542 * By convention, cpu0 is the boot cpu, which is set up 543 * before memory allocation is available. All other cpus get 544 * their cpuid_info struct allocated here. 545 */ 546 ASSERT(cpu->cpu_id != 0); 547 cpu->cpu_m.mcpu_cpi = 548 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 549 } 550 551 void 552 cpuid_free_space(cpu_t *cpu) 553 { 554 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 555 int i; 556 557 ASSERT(cpu->cpu_id != 0); 558 559 /* 560 * Free up any function 4 related dynamic storage 561 */ 562 for (i = 1; i < cpi->cpi_std_4_size; i++) 563 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 564 if (cpi->cpi_std_4_size > 0) 565 kmem_free(cpi->cpi_std_4, 566 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 567 568 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 569 } 570 571 #if !defined(__xpv) 572 573 static void 574 check_for_hvm() 575 { 576 struct cpuid_regs cp; 577 char *xen_str; 578 uint32_t xen_signature[4]; 579 extern int xpv_is_hvm; 580 581 /* 582 * In a fully virtualized domain, Xen's pseudo-cpuid function 583 * 0x40000000 returns a string representing the Xen signature in 584 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 585 * function. 586 */ 587 cp.cp_eax = 0x40000000; 588 (void) __cpuid_insn(&cp); 589 xen_signature[0] = cp.cp_ebx; 590 xen_signature[1] = cp.cp_ecx; 591 xen_signature[2] = cp.cp_edx; 592 xen_signature[3] = 0; 593 xen_str = (char *)xen_signature; 594 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) 595 xpv_is_hvm = 1; 596 } 597 #endif /* __xpv */ 598 599 uint_t 600 cpuid_pass1(cpu_t *cpu) 601 { 602 uint32_t mask_ecx, mask_edx; 603 uint_t feature = X86_CPUID; 604 struct cpuid_info *cpi; 605 struct cpuid_regs *cp; 606 int xcpuid; 607 #if !defined(__xpv) 608 extern int idle_cpu_prefer_mwait; 609 #endif 610 611 /* 612 * Space statically allocated for cpu0, ensure pointer is set 613 */ 614 if (cpu->cpu_id == 0) 615 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 616 cpi = cpu->cpu_m.mcpu_cpi; 617 ASSERT(cpi != NULL); 618 cp = &cpi->cpi_std[0]; 619 cp->cp_eax = 0; 620 cpi->cpi_maxeax = __cpuid_insn(cp); 621 { 622 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 623 *iptr++ = cp->cp_ebx; 624 *iptr++ = cp->cp_edx; 625 *iptr++ = cp->cp_ecx; 626 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 627 } 628 629 /* 630 * Map the vendor string to a type code 631 */ 632 if (strcmp(cpi->cpi_vendorstr, "GenuineIntel") == 0) 633 cpi->cpi_vendor = X86_VENDOR_Intel; 634 else if (strcmp(cpi->cpi_vendorstr, "AuthenticAMD") == 0) 635 cpi->cpi_vendor = X86_VENDOR_AMD; 636 else if (strcmp(cpi->cpi_vendorstr, "GenuineTMx86") == 0) 637 cpi->cpi_vendor = X86_VENDOR_TM; 638 else if (strcmp(cpi->cpi_vendorstr, CyrixInstead) == 0) 639 /* 640 * CyrixInstead is a variable used by the Cyrix detection code 641 * in locore. 642 */ 643 cpi->cpi_vendor = X86_VENDOR_Cyrix; 644 else if (strcmp(cpi->cpi_vendorstr, "UMC UMC UMC ") == 0) 645 cpi->cpi_vendor = X86_VENDOR_UMC; 646 else if (strcmp(cpi->cpi_vendorstr, "NexGenDriven") == 0) 647 cpi->cpi_vendor = X86_VENDOR_NexGen; 648 else if (strcmp(cpi->cpi_vendorstr, "CentaurHauls") == 0) 649 cpi->cpi_vendor = X86_VENDOR_Centaur; 650 else if (strcmp(cpi->cpi_vendorstr, "RiseRiseRise") == 0) 651 cpi->cpi_vendor = X86_VENDOR_Rise; 652 else if (strcmp(cpi->cpi_vendorstr, "SiS SiS SiS ") == 0) 653 cpi->cpi_vendor = X86_VENDOR_SiS; 654 else if (strcmp(cpi->cpi_vendorstr, "Geode by NSC") == 0) 655 cpi->cpi_vendor = X86_VENDOR_NSC; 656 else 657 cpi->cpi_vendor = X86_VENDOR_IntelClone; 658 659 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 660 661 /* 662 * Limit the range in case of weird hardware 663 */ 664 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 665 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 666 if (cpi->cpi_maxeax < 1) 667 goto pass1_done; 668 669 cp = &cpi->cpi_std[1]; 670 cp->cp_eax = 1; 671 (void) __cpuid_insn(cp); 672 673 /* 674 * Extract identifying constants for easy access. 675 */ 676 cpi->cpi_model = CPI_MODEL(cpi); 677 cpi->cpi_family = CPI_FAMILY(cpi); 678 679 if (cpi->cpi_family == 0xf) 680 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 681 682 /* 683 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 684 * Intel, and presumably everyone else, uses model == 0xf, as 685 * one would expect (max value means possible overflow). Sigh. 686 */ 687 688 switch (cpi->cpi_vendor) { 689 case X86_VENDOR_Intel: 690 if (IS_EXTENDED_MODEL_INTEL(cpi)) 691 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 692 break; 693 case X86_VENDOR_AMD: 694 if (CPI_FAMILY(cpi) == 0xf) 695 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 696 break; 697 default: 698 if (cpi->cpi_model == 0xf) 699 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 700 break; 701 } 702 703 cpi->cpi_step = CPI_STEP(cpi); 704 cpi->cpi_brandid = CPI_BRANDID(cpi); 705 706 /* 707 * *default* assumptions: 708 * - believe %edx feature word 709 * - ignore %ecx feature word 710 * - 32-bit virtual and physical addressing 711 */ 712 mask_edx = 0xffffffff; 713 mask_ecx = 0; 714 715 cpi->cpi_pabits = cpi->cpi_vabits = 32; 716 717 switch (cpi->cpi_vendor) { 718 case X86_VENDOR_Intel: 719 if (cpi->cpi_family == 5) 720 x86_type = X86_TYPE_P5; 721 else if (IS_LEGACY_P6(cpi)) { 722 x86_type = X86_TYPE_P6; 723 pentiumpro_bug4046376 = 1; 724 pentiumpro_bug4064495 = 1; 725 /* 726 * Clear the SEP bit when it was set erroneously 727 */ 728 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 729 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 730 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 731 x86_type = X86_TYPE_P4; 732 /* 733 * We don't currently depend on any of the %ecx 734 * features until Prescott, so we'll only check 735 * this from P4 onwards. We might want to revisit 736 * that idea later. 737 */ 738 mask_ecx = 0xffffffff; 739 } else if (cpi->cpi_family > 0xf) 740 mask_ecx = 0xffffffff; 741 /* 742 * We don't support MONITOR/MWAIT if leaf 5 is not available 743 * to obtain the monitor linesize. 744 */ 745 if (cpi->cpi_maxeax < 5) 746 mask_ecx &= ~CPUID_INTC_ECX_MON; 747 break; 748 case X86_VENDOR_IntelClone: 749 default: 750 break; 751 case X86_VENDOR_AMD: 752 #if defined(OPTERON_ERRATUM_108) 753 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 754 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 755 cpi->cpi_model = 0xc; 756 } else 757 #endif 758 if (cpi->cpi_family == 5) { 759 /* 760 * AMD K5 and K6 761 * 762 * These CPUs have an incomplete implementation 763 * of MCA/MCE which we mask away. 764 */ 765 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 766 767 /* 768 * Model 0 uses the wrong (APIC) bit 769 * to indicate PGE. Fix it here. 770 */ 771 if (cpi->cpi_model == 0) { 772 if (cp->cp_edx & 0x200) { 773 cp->cp_edx &= ~0x200; 774 cp->cp_edx |= CPUID_INTC_EDX_PGE; 775 } 776 } 777 778 /* 779 * Early models had problems w/ MMX; disable. 780 */ 781 if (cpi->cpi_model < 6) 782 mask_edx &= ~CPUID_INTC_EDX_MMX; 783 } 784 785 /* 786 * For newer families, SSE3 and CX16, at least, are valid; 787 * enable all 788 */ 789 if (cpi->cpi_family >= 0xf) 790 mask_ecx = 0xffffffff; 791 /* 792 * We don't support MONITOR/MWAIT if leaf 5 is not available 793 * to obtain the monitor linesize. 794 */ 795 if (cpi->cpi_maxeax < 5) 796 mask_ecx &= ~CPUID_INTC_ECX_MON; 797 798 #if !defined(__xpv) 799 /* 800 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 801 * processors. AMD does not intend MWAIT to be used in the cpu 802 * idle loop on current and future processors. 10h and future 803 * AMD processors use more power in MWAIT than HLT. 804 * Pre-family-10h Opterons do not have the MWAIT instruction. 805 */ 806 idle_cpu_prefer_mwait = 0; 807 #endif 808 809 break; 810 case X86_VENDOR_TM: 811 /* 812 * workaround the NT workaround in CMS 4.1 813 */ 814 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 815 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 816 cp->cp_edx |= CPUID_INTC_EDX_CX8; 817 break; 818 case X86_VENDOR_Centaur: 819 /* 820 * workaround the NT workarounds again 821 */ 822 if (cpi->cpi_family == 6) 823 cp->cp_edx |= CPUID_INTC_EDX_CX8; 824 break; 825 case X86_VENDOR_Cyrix: 826 /* 827 * We rely heavily on the probing in locore 828 * to actually figure out what parts, if any, 829 * of the Cyrix cpuid instruction to believe. 830 */ 831 switch (x86_type) { 832 case X86_TYPE_CYRIX_486: 833 mask_edx = 0; 834 break; 835 case X86_TYPE_CYRIX_6x86: 836 mask_edx = 0; 837 break; 838 case X86_TYPE_CYRIX_6x86L: 839 mask_edx = 840 CPUID_INTC_EDX_DE | 841 CPUID_INTC_EDX_CX8; 842 break; 843 case X86_TYPE_CYRIX_6x86MX: 844 mask_edx = 845 CPUID_INTC_EDX_DE | 846 CPUID_INTC_EDX_MSR | 847 CPUID_INTC_EDX_CX8 | 848 CPUID_INTC_EDX_PGE | 849 CPUID_INTC_EDX_CMOV | 850 CPUID_INTC_EDX_MMX; 851 break; 852 case X86_TYPE_CYRIX_GXm: 853 mask_edx = 854 CPUID_INTC_EDX_MSR | 855 CPUID_INTC_EDX_CX8 | 856 CPUID_INTC_EDX_CMOV | 857 CPUID_INTC_EDX_MMX; 858 break; 859 case X86_TYPE_CYRIX_MediaGX: 860 break; 861 case X86_TYPE_CYRIX_MII: 862 case X86_TYPE_VIA_CYRIX_III: 863 mask_edx = 864 CPUID_INTC_EDX_DE | 865 CPUID_INTC_EDX_TSC | 866 CPUID_INTC_EDX_MSR | 867 CPUID_INTC_EDX_CX8 | 868 CPUID_INTC_EDX_PGE | 869 CPUID_INTC_EDX_CMOV | 870 CPUID_INTC_EDX_MMX; 871 break; 872 default: 873 break; 874 } 875 break; 876 } 877 878 #if defined(__xpv) 879 /* 880 * Do not support MONITOR/MWAIT under a hypervisor 881 */ 882 mask_ecx &= ~CPUID_INTC_ECX_MON; 883 #endif /* __xpv */ 884 885 /* 886 * Now we've figured out the masks that determine 887 * which bits we choose to believe, apply the masks 888 * to the feature words, then map the kernel's view 889 * of these feature words into its feature word. 890 */ 891 cp->cp_edx &= mask_edx; 892 cp->cp_ecx &= mask_ecx; 893 894 /* 895 * apply any platform restrictions (we don't call this 896 * immediately after __cpuid_insn here, because we need the 897 * workarounds applied above first) 898 */ 899 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 900 901 /* 902 * fold in overrides from the "eeprom" mechanism 903 */ 904 cp->cp_edx |= cpuid_feature_edx_include; 905 cp->cp_edx &= ~cpuid_feature_edx_exclude; 906 907 cp->cp_ecx |= cpuid_feature_ecx_include; 908 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 909 910 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 911 feature |= X86_LARGEPAGE; 912 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 913 feature |= X86_TSC; 914 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 915 feature |= X86_MSR; 916 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 917 feature |= X86_MTRR; 918 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 919 feature |= X86_PGE; 920 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 921 feature |= X86_CMOV; 922 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 923 feature |= X86_MMX; 924 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 925 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 926 feature |= X86_MCA; 927 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 928 feature |= X86_PAE; 929 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 930 feature |= X86_CX8; 931 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 932 feature |= X86_CX16; 933 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 934 feature |= X86_PAT; 935 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 936 feature |= X86_SEP; 937 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 938 /* 939 * In our implementation, fxsave/fxrstor 940 * are prerequisites before we'll even 941 * try and do SSE things. 942 */ 943 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 944 feature |= X86_SSE; 945 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 946 feature |= X86_SSE2; 947 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 948 feature |= X86_SSE3; 949 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 950 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 951 feature |= X86_SSSE3; 952 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 953 feature |= X86_SSE4_1; 954 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 955 feature |= X86_SSE4_2; 956 } 957 } 958 if (cp->cp_edx & CPUID_INTC_EDX_DE) 959 feature |= X86_DE; 960 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 961 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 962 feature |= X86_MWAIT; 963 } 964 965 if (feature & X86_PAE) 966 cpi->cpi_pabits = 36; 967 968 /* 969 * Hyperthreading configuration is slightly tricky on Intel 970 * and pure clones, and even trickier on AMD. 971 * 972 * (AMD chose to set the HTT bit on their CMP processors, 973 * even though they're not actually hyperthreaded. Thus it 974 * takes a bit more work to figure out what's really going 975 * on ... see the handling of the CMP_LGCY bit below) 976 */ 977 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 978 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 979 if (cpi->cpi_ncpu_per_chip > 1) 980 feature |= X86_HTT; 981 } else { 982 cpi->cpi_ncpu_per_chip = 1; 983 } 984 985 /* 986 * Work on the "extended" feature information, doing 987 * some basic initialization for cpuid_pass2() 988 */ 989 xcpuid = 0; 990 switch (cpi->cpi_vendor) { 991 case X86_VENDOR_Intel: 992 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 993 xcpuid++; 994 break; 995 case X86_VENDOR_AMD: 996 if (cpi->cpi_family > 5 || 997 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 998 xcpuid++; 999 break; 1000 case X86_VENDOR_Cyrix: 1001 /* 1002 * Only these Cyrix CPUs are -known- to support 1003 * extended cpuid operations. 1004 */ 1005 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1006 x86_type == X86_TYPE_CYRIX_GXm) 1007 xcpuid++; 1008 break; 1009 case X86_VENDOR_Centaur: 1010 case X86_VENDOR_TM: 1011 default: 1012 xcpuid++; 1013 break; 1014 } 1015 1016 if (xcpuid) { 1017 cp = &cpi->cpi_extd[0]; 1018 cp->cp_eax = 0x80000000; 1019 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1020 } 1021 1022 if (cpi->cpi_xmaxeax & 0x80000000) { 1023 1024 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1025 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1026 1027 switch (cpi->cpi_vendor) { 1028 case X86_VENDOR_Intel: 1029 case X86_VENDOR_AMD: 1030 if (cpi->cpi_xmaxeax < 0x80000001) 1031 break; 1032 cp = &cpi->cpi_extd[1]; 1033 cp->cp_eax = 0x80000001; 1034 (void) __cpuid_insn(cp); 1035 1036 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1037 cpi->cpi_family == 5 && 1038 cpi->cpi_model == 6 && 1039 cpi->cpi_step == 6) { 1040 /* 1041 * K6 model 6 uses bit 10 to indicate SYSC 1042 * Later models use bit 11. Fix it here. 1043 */ 1044 if (cp->cp_edx & 0x400) { 1045 cp->cp_edx &= ~0x400; 1046 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1047 } 1048 } 1049 1050 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1051 1052 /* 1053 * Compute the additions to the kernel's feature word. 1054 */ 1055 if (cp->cp_edx & CPUID_AMD_EDX_NX) 1056 feature |= X86_NX; 1057 1058 #if defined(__amd64) 1059 /* 1 GB large page - enable only for 64 bit kernel */ 1060 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 1061 feature |= X86_1GPG; 1062 #endif 1063 1064 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1065 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1066 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 1067 feature |= X86_SSE4A; 1068 1069 /* 1070 * If both the HTT and CMP_LGCY bits are set, 1071 * then we're not actually HyperThreaded. Read 1072 * "AMD CPUID Specification" for more details. 1073 */ 1074 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1075 (feature & X86_HTT) && 1076 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1077 feature &= ~X86_HTT; 1078 feature |= X86_CMP; 1079 } 1080 #if defined(__amd64) 1081 /* 1082 * It's really tricky to support syscall/sysret in 1083 * the i386 kernel; we rely on sysenter/sysexit 1084 * instead. In the amd64 kernel, things are -way- 1085 * better. 1086 */ 1087 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 1088 feature |= X86_ASYSC; 1089 1090 /* 1091 * While we're thinking about system calls, note 1092 * that AMD processors don't support sysenter 1093 * in long mode at all, so don't try to program them. 1094 */ 1095 if (x86_vendor == X86_VENDOR_AMD) 1096 feature &= ~X86_SEP; 1097 #endif 1098 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1099 feature |= X86_TSCP; 1100 break; 1101 default: 1102 break; 1103 } 1104 1105 /* 1106 * Get CPUID data about processor cores and hyperthreads. 1107 */ 1108 switch (cpi->cpi_vendor) { 1109 case X86_VENDOR_Intel: 1110 if (cpi->cpi_maxeax >= 4) { 1111 cp = &cpi->cpi_std[4]; 1112 cp->cp_eax = 4; 1113 cp->cp_ecx = 0; 1114 (void) __cpuid_insn(cp); 1115 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1116 } 1117 /*FALLTHROUGH*/ 1118 case X86_VENDOR_AMD: 1119 if (cpi->cpi_xmaxeax < 0x80000008) 1120 break; 1121 cp = &cpi->cpi_extd[8]; 1122 cp->cp_eax = 0x80000008; 1123 (void) __cpuid_insn(cp); 1124 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1125 1126 /* 1127 * Virtual and physical address limits from 1128 * cpuid override previously guessed values. 1129 */ 1130 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1131 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1132 break; 1133 default: 1134 break; 1135 } 1136 1137 /* 1138 * Derive the number of cores per chip 1139 */ 1140 switch (cpi->cpi_vendor) { 1141 case X86_VENDOR_Intel: 1142 if (cpi->cpi_maxeax < 4) { 1143 cpi->cpi_ncore_per_chip = 1; 1144 break; 1145 } else { 1146 cpi->cpi_ncore_per_chip = 1147 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1148 } 1149 break; 1150 case X86_VENDOR_AMD: 1151 if (cpi->cpi_xmaxeax < 0x80000008) { 1152 cpi->cpi_ncore_per_chip = 1; 1153 break; 1154 } else { 1155 /* 1156 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1157 * 1 less than the number of physical cores on 1158 * the chip. In family 0x10 this value can 1159 * be affected by "downcoring" - it reflects 1160 * 1 less than the number of cores actually 1161 * enabled on this node. 1162 */ 1163 cpi->cpi_ncore_per_chip = 1164 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1165 } 1166 break; 1167 default: 1168 cpi->cpi_ncore_per_chip = 1; 1169 break; 1170 } 1171 } else { 1172 cpi->cpi_ncore_per_chip = 1; 1173 } 1174 1175 /* 1176 * If more than one core, then this processor is CMP. 1177 */ 1178 if (cpi->cpi_ncore_per_chip > 1) 1179 feature |= X86_CMP; 1180 1181 /* 1182 * If the number of cores is the same as the number 1183 * of CPUs, then we cannot have HyperThreading. 1184 */ 1185 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1186 feature &= ~X86_HTT; 1187 1188 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1189 /* 1190 * Single-core single-threaded processors. 1191 */ 1192 cpi->cpi_chipid = -1; 1193 cpi->cpi_clogid = 0; 1194 cpi->cpi_coreid = cpu->cpu_id; 1195 cpi->cpi_pkgcoreid = 0; 1196 } else if (cpi->cpi_ncpu_per_chip > 1) { 1197 uint_t i; 1198 uint_t chipid_shift = 0; 1199 uint_t coreid_shift = 0; 1200 uint_t apic_id = CPI_APIC_ID(cpi); 1201 1202 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1203 chipid_shift++; 1204 cpi->cpi_chipid = apic_id >> chipid_shift; 1205 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1206 1207 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1208 if (feature & X86_CMP) { 1209 /* 1210 * Multi-core (and possibly multi-threaded) 1211 * processors. 1212 */ 1213 uint_t ncpu_per_core; 1214 if (cpi->cpi_ncore_per_chip == 1) 1215 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1216 else if (cpi->cpi_ncore_per_chip > 1) 1217 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1218 cpi->cpi_ncore_per_chip; 1219 /* 1220 * 8bit APIC IDs on dual core Pentiums 1221 * look like this: 1222 * 1223 * +-----------------------+------+------+ 1224 * | Physical Package ID | MC | HT | 1225 * +-----------------------+------+------+ 1226 * <------- chipid --------> 1227 * <------- coreid ---------------> 1228 * <--- clogid --> 1229 * <------> 1230 * pkgcoreid 1231 * 1232 * Where the number of bits necessary to 1233 * represent MC and HT fields together equals 1234 * to the minimum number of bits necessary to 1235 * store the value of cpi->cpi_ncpu_per_chip. 1236 * Of those bits, the MC part uses the number 1237 * of bits necessary to store the value of 1238 * cpi->cpi_ncore_per_chip. 1239 */ 1240 for (i = 1; i < ncpu_per_core; i <<= 1) 1241 coreid_shift++; 1242 cpi->cpi_coreid = apic_id >> coreid_shift; 1243 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1244 coreid_shift; 1245 } else if (feature & X86_HTT) { 1246 /* 1247 * Single-core multi-threaded processors. 1248 */ 1249 cpi->cpi_coreid = cpi->cpi_chipid; 1250 cpi->cpi_pkgcoreid = 0; 1251 } 1252 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1253 /* 1254 * AMD CMP chips currently have a single thread per 1255 * core, with 2 cores on family 0xf and 2, 3 or 4 1256 * cores on family 0x10. 1257 * 1258 * Since no two cpus share a core we must assign a 1259 * distinct coreid per cpu, and we do this by using 1260 * the cpu_id. This scheme does not, however, 1261 * guarantee that sibling cores of a chip will have 1262 * sequential coreids starting at a multiple of the 1263 * number of cores per chip - that is usually the 1264 * case, but if the ACPI MADT table is presented 1265 * in a different order then we need to perform a 1266 * few more gymnastics for the pkgcoreid. 1267 * 1268 * In family 0xf CMPs there are 2 cores on all nodes 1269 * present - no mixing of single and dual core parts. 1270 * 1271 * In family 0x10 CMPs cpuid fn 2 ECX[15:12] 1272 * "ApicIdCoreIdSize[3:0]" tells us how 1273 * many least-significant bits in the ApicId 1274 * are used to represent the core number 1275 * within the node. Cores are always 1276 * numbered sequentially from 0 regardless 1277 * of how many or which are disabled, and 1278 * there seems to be no way to discover the 1279 * real core id when some are disabled. 1280 */ 1281 cpi->cpi_coreid = cpu->cpu_id; 1282 1283 if (cpi->cpi_family == 0x10 && 1284 cpi->cpi_xmaxeax >= 0x80000008) { 1285 int coreidsz = 1286 BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 1287 1288 cpi->cpi_pkgcoreid = 1289 apic_id & ((1 << coreidsz) - 1); 1290 } else { 1291 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 1292 } 1293 } else { 1294 /* 1295 * All other processors are currently 1296 * assumed to have single cores. 1297 */ 1298 cpi->cpi_coreid = cpi->cpi_chipid; 1299 cpi->cpi_pkgcoreid = 0; 1300 } 1301 } 1302 1303 cpi->cpi_apicid = CPI_APIC_ID(cpi); 1304 1305 /* 1306 * Synthesize chip "revision" and socket type 1307 */ 1308 synth_info(cpi); 1309 1310 pass1_done: 1311 #if !defined(__xpv) 1312 check_for_hvm(); 1313 #endif 1314 cpi->cpi_pass = 1; 1315 return (feature); 1316 } 1317 1318 /* 1319 * Make copies of the cpuid table entries we depend on, in 1320 * part for ease of parsing now, in part so that we have only 1321 * one place to correct any of it, in part for ease of 1322 * later export to userland, and in part so we can look at 1323 * this stuff in a crash dump. 1324 */ 1325 1326 /*ARGSUSED*/ 1327 void 1328 cpuid_pass2(cpu_t *cpu) 1329 { 1330 uint_t n, nmax; 1331 int i; 1332 struct cpuid_regs *cp; 1333 uint8_t *dp; 1334 uint32_t *iptr; 1335 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1336 1337 ASSERT(cpi->cpi_pass == 1); 1338 1339 if (cpi->cpi_maxeax < 1) 1340 goto pass2_done; 1341 1342 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1343 nmax = NMAX_CPI_STD; 1344 /* 1345 * (We already handled n == 0 and n == 1 in pass 1) 1346 */ 1347 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1348 cp->cp_eax = n; 1349 1350 /* 1351 * CPUID function 4 expects %ecx to be initialized 1352 * with an index which indicates which cache to return 1353 * information about. The OS is expected to call function 4 1354 * with %ecx set to 0, 1, 2, ... until it returns with 1355 * EAX[4:0] set to 0, which indicates there are no more 1356 * caches. 1357 * 1358 * Here, populate cpi_std[4] with the information returned by 1359 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1360 * when dynamic memory allocation becomes available. 1361 * 1362 * Note: we need to explicitly initialize %ecx here, since 1363 * function 4 may have been previously invoked. 1364 */ 1365 if (n == 4) 1366 cp->cp_ecx = 0; 1367 1368 (void) __cpuid_insn(cp); 1369 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1370 switch (n) { 1371 case 2: 1372 /* 1373 * "the lower 8 bits of the %eax register 1374 * contain a value that identifies the number 1375 * of times the cpuid [instruction] has to be 1376 * executed to obtain a complete image of the 1377 * processor's caching systems." 1378 * 1379 * How *do* they make this stuff up? 1380 */ 1381 cpi->cpi_ncache = sizeof (*cp) * 1382 BITX(cp->cp_eax, 7, 0); 1383 if (cpi->cpi_ncache == 0) 1384 break; 1385 cpi->cpi_ncache--; /* skip count byte */ 1386 1387 /* 1388 * Well, for now, rather than attempt to implement 1389 * this slightly dubious algorithm, we just look 1390 * at the first 15 .. 1391 */ 1392 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1393 cpi->cpi_ncache = sizeof (*cp) - 1; 1394 1395 dp = cpi->cpi_cacheinfo; 1396 if (BITX(cp->cp_eax, 31, 31) == 0) { 1397 uint8_t *p = (void *)&cp->cp_eax; 1398 for (i = 1; i < 4; i++) 1399 if (p[i] != 0) 1400 *dp++ = p[i]; 1401 } 1402 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1403 uint8_t *p = (void *)&cp->cp_ebx; 1404 for (i = 0; i < 4; i++) 1405 if (p[i] != 0) 1406 *dp++ = p[i]; 1407 } 1408 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1409 uint8_t *p = (void *)&cp->cp_ecx; 1410 for (i = 0; i < 4; i++) 1411 if (p[i] != 0) 1412 *dp++ = p[i]; 1413 } 1414 if (BITX(cp->cp_edx, 31, 31) == 0) { 1415 uint8_t *p = (void *)&cp->cp_edx; 1416 for (i = 0; i < 4; i++) 1417 if (p[i] != 0) 1418 *dp++ = p[i]; 1419 } 1420 break; 1421 1422 case 3: /* Processor serial number, if PSN supported */ 1423 break; 1424 1425 case 4: /* Deterministic cache parameters */ 1426 break; 1427 1428 case 5: /* Monitor/Mwait parameters */ 1429 { 1430 size_t mwait_size; 1431 1432 /* 1433 * check cpi_mwait.support which was set in cpuid_pass1 1434 */ 1435 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1436 break; 1437 1438 /* 1439 * Protect ourself from insane mwait line size. 1440 * Workaround for incomplete hardware emulator(s). 1441 */ 1442 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1443 if (mwait_size < sizeof (uint32_t) || 1444 !ISP2(mwait_size)) { 1445 #if DEBUG 1446 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1447 "size %ld", 1448 cpu->cpu_id, (long)mwait_size); 1449 #endif 1450 break; 1451 } 1452 1453 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1454 cpi->cpi_mwait.mon_max = mwait_size; 1455 if (MWAIT_EXTENSION(cpi)) { 1456 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1457 if (MWAIT_INT_ENABLE(cpi)) 1458 cpi->cpi_mwait.support |= 1459 MWAIT_ECX_INT_ENABLE; 1460 } 1461 break; 1462 } 1463 default: 1464 break; 1465 } 1466 } 1467 1468 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) { 1469 cp->cp_eax = 0xB; 1470 cp->cp_ecx = 0; 1471 1472 (void) __cpuid_insn(cp); 1473 1474 /* 1475 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1476 * indicates that the extended topology enumeration leaf is 1477 * available. 1478 */ 1479 if (cp->cp_ebx) { 1480 uint32_t x2apic_id; 1481 uint_t coreid_shift = 0; 1482 uint_t ncpu_per_core = 1; 1483 uint_t chipid_shift = 0; 1484 uint_t ncpu_per_chip = 1; 1485 uint_t i; 1486 uint_t level; 1487 1488 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1489 cp->cp_eax = 0xB; 1490 cp->cp_ecx = i; 1491 1492 (void) __cpuid_insn(cp); 1493 level = CPI_CPU_LEVEL_TYPE(cp); 1494 1495 if (level == 1) { 1496 x2apic_id = cp->cp_edx; 1497 coreid_shift = BITX(cp->cp_eax, 4, 0); 1498 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1499 } else if (level == 2) { 1500 x2apic_id = cp->cp_edx; 1501 chipid_shift = BITX(cp->cp_eax, 4, 0); 1502 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1503 } 1504 } 1505 1506 cpi->cpi_apicid = x2apic_id; 1507 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1508 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1509 ncpu_per_core; 1510 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1511 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1512 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1513 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1514 } 1515 } 1516 1517 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1518 goto pass2_done; 1519 1520 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1521 nmax = NMAX_CPI_EXTD; 1522 /* 1523 * Copy the extended properties, fixing them as we go. 1524 * (We already handled n == 0 and n == 1 in pass 1) 1525 */ 1526 iptr = (void *)cpi->cpi_brandstr; 1527 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1528 cp->cp_eax = 0x80000000 + n; 1529 (void) __cpuid_insn(cp); 1530 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1531 switch (n) { 1532 case 2: 1533 case 3: 1534 case 4: 1535 /* 1536 * Extract the brand string 1537 */ 1538 *iptr++ = cp->cp_eax; 1539 *iptr++ = cp->cp_ebx; 1540 *iptr++ = cp->cp_ecx; 1541 *iptr++ = cp->cp_edx; 1542 break; 1543 case 5: 1544 switch (cpi->cpi_vendor) { 1545 case X86_VENDOR_AMD: 1546 /* 1547 * The Athlon and Duron were the first 1548 * parts to report the sizes of the 1549 * TLB for large pages. Before then, 1550 * we don't trust the data. 1551 */ 1552 if (cpi->cpi_family < 6 || 1553 (cpi->cpi_family == 6 && 1554 cpi->cpi_model < 1)) 1555 cp->cp_eax = 0; 1556 break; 1557 default: 1558 break; 1559 } 1560 break; 1561 case 6: 1562 switch (cpi->cpi_vendor) { 1563 case X86_VENDOR_AMD: 1564 /* 1565 * The Athlon and Duron were the first 1566 * AMD parts with L2 TLB's. 1567 * Before then, don't trust the data. 1568 */ 1569 if (cpi->cpi_family < 6 || 1570 cpi->cpi_family == 6 && 1571 cpi->cpi_model < 1) 1572 cp->cp_eax = cp->cp_ebx = 0; 1573 /* 1574 * AMD Duron rev A0 reports L2 1575 * cache size incorrectly as 1K 1576 * when it is really 64K 1577 */ 1578 if (cpi->cpi_family == 6 && 1579 cpi->cpi_model == 3 && 1580 cpi->cpi_step == 0) { 1581 cp->cp_ecx &= 0xffff; 1582 cp->cp_ecx |= 0x400000; 1583 } 1584 break; 1585 case X86_VENDOR_Cyrix: /* VIA C3 */ 1586 /* 1587 * VIA C3 processors are a bit messed 1588 * up w.r.t. encoding cache sizes in %ecx 1589 */ 1590 if (cpi->cpi_family != 6) 1591 break; 1592 /* 1593 * model 7 and 8 were incorrectly encoded 1594 * 1595 * xxx is model 8 really broken? 1596 */ 1597 if (cpi->cpi_model == 7 || 1598 cpi->cpi_model == 8) 1599 cp->cp_ecx = 1600 BITX(cp->cp_ecx, 31, 24) << 16 | 1601 BITX(cp->cp_ecx, 23, 16) << 12 | 1602 BITX(cp->cp_ecx, 15, 8) << 8 | 1603 BITX(cp->cp_ecx, 7, 0); 1604 /* 1605 * model 9 stepping 1 has wrong associativity 1606 */ 1607 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1608 cp->cp_ecx |= 8 << 12; 1609 break; 1610 case X86_VENDOR_Intel: 1611 /* 1612 * Extended L2 Cache features function. 1613 * First appeared on Prescott. 1614 */ 1615 default: 1616 break; 1617 } 1618 break; 1619 default: 1620 break; 1621 } 1622 } 1623 1624 pass2_done: 1625 cpi->cpi_pass = 2; 1626 } 1627 1628 static const char * 1629 intel_cpubrand(const struct cpuid_info *cpi) 1630 { 1631 int i; 1632 1633 if ((x86_feature & X86_CPUID) == 0 || 1634 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1635 return ("i486"); 1636 1637 switch (cpi->cpi_family) { 1638 case 5: 1639 return ("Intel Pentium(r)"); 1640 case 6: 1641 switch (cpi->cpi_model) { 1642 uint_t celeron, xeon; 1643 const struct cpuid_regs *cp; 1644 case 0: 1645 case 1: 1646 case 2: 1647 return ("Intel Pentium(r) Pro"); 1648 case 3: 1649 case 4: 1650 return ("Intel Pentium(r) II"); 1651 case 6: 1652 return ("Intel Celeron(r)"); 1653 case 5: 1654 case 7: 1655 celeron = xeon = 0; 1656 cp = &cpi->cpi_std[2]; /* cache info */ 1657 1658 for (i = 1; i < 4; i++) { 1659 uint_t tmp; 1660 1661 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1662 if (tmp == 0x40) 1663 celeron++; 1664 if (tmp >= 0x44 && tmp <= 0x45) 1665 xeon++; 1666 } 1667 1668 for (i = 0; i < 2; i++) { 1669 uint_t tmp; 1670 1671 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1672 if (tmp == 0x40) 1673 celeron++; 1674 else if (tmp >= 0x44 && tmp <= 0x45) 1675 xeon++; 1676 } 1677 1678 for (i = 0; i < 4; i++) { 1679 uint_t tmp; 1680 1681 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1682 if (tmp == 0x40) 1683 celeron++; 1684 else if (tmp >= 0x44 && tmp <= 0x45) 1685 xeon++; 1686 } 1687 1688 for (i = 0; i < 4; i++) { 1689 uint_t tmp; 1690 1691 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1692 if (tmp == 0x40) 1693 celeron++; 1694 else if (tmp >= 0x44 && tmp <= 0x45) 1695 xeon++; 1696 } 1697 1698 if (celeron) 1699 return ("Intel Celeron(r)"); 1700 if (xeon) 1701 return (cpi->cpi_model == 5 ? 1702 "Intel Pentium(r) II Xeon(tm)" : 1703 "Intel Pentium(r) III Xeon(tm)"); 1704 return (cpi->cpi_model == 5 ? 1705 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1706 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1707 default: 1708 break; 1709 } 1710 default: 1711 break; 1712 } 1713 1714 /* BrandID is present if the field is nonzero */ 1715 if (cpi->cpi_brandid != 0) { 1716 static const struct { 1717 uint_t bt_bid; 1718 const char *bt_str; 1719 } brand_tbl[] = { 1720 { 0x1, "Intel(r) Celeron(r)" }, 1721 { 0x2, "Intel(r) Pentium(r) III" }, 1722 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1723 { 0x4, "Intel(r) Pentium(r) III" }, 1724 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1725 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1726 { 0x8, "Intel(r) Pentium(r) 4" }, 1727 { 0x9, "Intel(r) Pentium(r) 4" }, 1728 { 0xa, "Intel(r) Celeron(r)" }, 1729 { 0xb, "Intel(r) Xeon(tm)" }, 1730 { 0xc, "Intel(r) Xeon(tm) MP" }, 1731 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1732 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1733 { 0x11, "Mobile Genuine Intel(r)" }, 1734 { 0x12, "Intel(r) Celeron(r) M" }, 1735 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1736 { 0x14, "Intel(r) Celeron(r)" }, 1737 { 0x15, "Mobile Genuine Intel(r)" }, 1738 { 0x16, "Intel(r) Pentium(r) M" }, 1739 { 0x17, "Mobile Intel(r) Celeron(r)" } 1740 }; 1741 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1742 uint_t sgn; 1743 1744 sgn = (cpi->cpi_family << 8) | 1745 (cpi->cpi_model << 4) | cpi->cpi_step; 1746 1747 for (i = 0; i < btblmax; i++) 1748 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1749 break; 1750 if (i < btblmax) { 1751 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1752 return ("Intel(r) Celeron(r)"); 1753 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1754 return ("Intel(r) Xeon(tm) MP"); 1755 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1756 return ("Intel(r) Xeon(tm)"); 1757 return (brand_tbl[i].bt_str); 1758 } 1759 } 1760 1761 return (NULL); 1762 } 1763 1764 static const char * 1765 amd_cpubrand(const struct cpuid_info *cpi) 1766 { 1767 if ((x86_feature & X86_CPUID) == 0 || 1768 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1769 return ("i486 compatible"); 1770 1771 switch (cpi->cpi_family) { 1772 case 5: 1773 switch (cpi->cpi_model) { 1774 case 0: 1775 case 1: 1776 case 2: 1777 case 3: 1778 case 4: 1779 case 5: 1780 return ("AMD-K5(r)"); 1781 case 6: 1782 case 7: 1783 return ("AMD-K6(r)"); 1784 case 8: 1785 return ("AMD-K6(r)-2"); 1786 case 9: 1787 return ("AMD-K6(r)-III"); 1788 default: 1789 return ("AMD (family 5)"); 1790 } 1791 case 6: 1792 switch (cpi->cpi_model) { 1793 case 1: 1794 return ("AMD-K7(tm)"); 1795 case 0: 1796 case 2: 1797 case 4: 1798 return ("AMD Athlon(tm)"); 1799 case 3: 1800 case 7: 1801 return ("AMD Duron(tm)"); 1802 case 6: 1803 case 8: 1804 case 10: 1805 /* 1806 * Use the L2 cache size to distinguish 1807 */ 1808 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1809 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1810 default: 1811 return ("AMD (family 6)"); 1812 } 1813 default: 1814 break; 1815 } 1816 1817 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1818 cpi->cpi_brandid != 0) { 1819 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1820 case 3: 1821 return ("AMD Opteron(tm) UP 1xx"); 1822 case 4: 1823 return ("AMD Opteron(tm) DP 2xx"); 1824 case 5: 1825 return ("AMD Opteron(tm) MP 8xx"); 1826 default: 1827 return ("AMD Opteron(tm)"); 1828 } 1829 } 1830 1831 return (NULL); 1832 } 1833 1834 static const char * 1835 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1836 { 1837 if ((x86_feature & X86_CPUID) == 0 || 1838 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1839 type == X86_TYPE_CYRIX_486) 1840 return ("i486 compatible"); 1841 1842 switch (type) { 1843 case X86_TYPE_CYRIX_6x86: 1844 return ("Cyrix 6x86"); 1845 case X86_TYPE_CYRIX_6x86L: 1846 return ("Cyrix 6x86L"); 1847 case X86_TYPE_CYRIX_6x86MX: 1848 return ("Cyrix 6x86MX"); 1849 case X86_TYPE_CYRIX_GXm: 1850 return ("Cyrix GXm"); 1851 case X86_TYPE_CYRIX_MediaGX: 1852 return ("Cyrix MediaGX"); 1853 case X86_TYPE_CYRIX_MII: 1854 return ("Cyrix M2"); 1855 case X86_TYPE_VIA_CYRIX_III: 1856 return ("VIA Cyrix M3"); 1857 default: 1858 /* 1859 * Have another wild guess .. 1860 */ 1861 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1862 return ("Cyrix 5x86"); 1863 else if (cpi->cpi_family == 5) { 1864 switch (cpi->cpi_model) { 1865 case 2: 1866 return ("Cyrix 6x86"); /* Cyrix M1 */ 1867 case 4: 1868 return ("Cyrix MediaGX"); 1869 default: 1870 break; 1871 } 1872 } else if (cpi->cpi_family == 6) { 1873 switch (cpi->cpi_model) { 1874 case 0: 1875 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1876 case 5: 1877 case 6: 1878 case 7: 1879 case 8: 1880 case 9: 1881 return ("VIA C3"); 1882 default: 1883 break; 1884 } 1885 } 1886 break; 1887 } 1888 return (NULL); 1889 } 1890 1891 /* 1892 * This only gets called in the case that the CPU extended 1893 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1894 * aren't available, or contain null bytes for some reason. 1895 */ 1896 static void 1897 fabricate_brandstr(struct cpuid_info *cpi) 1898 { 1899 const char *brand = NULL; 1900 1901 switch (cpi->cpi_vendor) { 1902 case X86_VENDOR_Intel: 1903 brand = intel_cpubrand(cpi); 1904 break; 1905 case X86_VENDOR_AMD: 1906 brand = amd_cpubrand(cpi); 1907 break; 1908 case X86_VENDOR_Cyrix: 1909 brand = cyrix_cpubrand(cpi, x86_type); 1910 break; 1911 case X86_VENDOR_NexGen: 1912 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1913 brand = "NexGen Nx586"; 1914 break; 1915 case X86_VENDOR_Centaur: 1916 if (cpi->cpi_family == 5) 1917 switch (cpi->cpi_model) { 1918 case 4: 1919 brand = "Centaur C6"; 1920 break; 1921 case 8: 1922 brand = "Centaur C2"; 1923 break; 1924 case 9: 1925 brand = "Centaur C3"; 1926 break; 1927 default: 1928 break; 1929 } 1930 break; 1931 case X86_VENDOR_Rise: 1932 if (cpi->cpi_family == 5 && 1933 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1934 brand = "Rise mP6"; 1935 break; 1936 case X86_VENDOR_SiS: 1937 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1938 brand = "SiS 55x"; 1939 break; 1940 case X86_VENDOR_TM: 1941 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1942 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1943 break; 1944 case X86_VENDOR_NSC: 1945 case X86_VENDOR_UMC: 1946 default: 1947 break; 1948 } 1949 if (brand) { 1950 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1951 return; 1952 } 1953 1954 /* 1955 * If all else fails ... 1956 */ 1957 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1958 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1959 cpi->cpi_model, cpi->cpi_step); 1960 } 1961 1962 /* 1963 * This routine is called just after kernel memory allocation 1964 * becomes available on cpu0, and as part of mp_startup() on 1965 * the other cpus. 1966 * 1967 * Fixup the brand string, and collect any information from cpuid 1968 * that requires dynamicically allocated storage to represent. 1969 */ 1970 /*ARGSUSED*/ 1971 void 1972 cpuid_pass3(cpu_t *cpu) 1973 { 1974 int i, max, shft, level, size; 1975 struct cpuid_regs regs; 1976 struct cpuid_regs *cp; 1977 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1978 1979 ASSERT(cpi->cpi_pass == 2); 1980 1981 /* 1982 * Function 4: Deterministic cache parameters 1983 * 1984 * Take this opportunity to detect the number of threads 1985 * sharing the last level cache, and construct a corresponding 1986 * cache id. The respective cpuid_info members are initialized 1987 * to the default case of "no last level cache sharing". 1988 */ 1989 cpi->cpi_ncpu_shr_last_cache = 1; 1990 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1991 1992 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1993 1994 /* 1995 * Find the # of elements (size) returned by fn 4, and along 1996 * the way detect last level cache sharing details. 1997 */ 1998 bzero(®s, sizeof (regs)); 1999 cp = ®s; 2000 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 2001 cp->cp_eax = 4; 2002 cp->cp_ecx = i; 2003 2004 (void) __cpuid_insn(cp); 2005 2006 if (CPI_CACHE_TYPE(cp) == 0) 2007 break; 2008 level = CPI_CACHE_LVL(cp); 2009 if (level > max) { 2010 max = level; 2011 cpi->cpi_ncpu_shr_last_cache = 2012 CPI_NTHR_SHR_CACHE(cp) + 1; 2013 } 2014 } 2015 cpi->cpi_std_4_size = size = i; 2016 2017 /* 2018 * Allocate the cpi_std_4 array. The first element 2019 * references the regs for fn 4, %ecx == 0, which 2020 * cpuid_pass2() stashed in cpi->cpi_std[4]. 2021 */ 2022 if (size > 0) { 2023 cpi->cpi_std_4 = 2024 kmem_alloc(size * sizeof (cp), KM_SLEEP); 2025 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 2026 2027 /* 2028 * Allocate storage to hold the additional regs 2029 * for function 4, %ecx == 1 .. cpi_std_4_size. 2030 * 2031 * The regs for fn 4, %ecx == 0 has already 2032 * been allocated as indicated above. 2033 */ 2034 for (i = 1; i < size; i++) { 2035 cp = cpi->cpi_std_4[i] = 2036 kmem_zalloc(sizeof (regs), KM_SLEEP); 2037 cp->cp_eax = 4; 2038 cp->cp_ecx = i; 2039 2040 (void) __cpuid_insn(cp); 2041 } 2042 } 2043 /* 2044 * Determine the number of bits needed to represent 2045 * the number of CPUs sharing the last level cache. 2046 * 2047 * Shift off that number of bits from the APIC id to 2048 * derive the cache id. 2049 */ 2050 shft = 0; 2051 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 2052 shft++; 2053 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 2054 } 2055 2056 /* 2057 * Now fixup the brand string 2058 */ 2059 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2060 fabricate_brandstr(cpi); 2061 } else { 2062 2063 /* 2064 * If we successfully extracted a brand string from the cpuid 2065 * instruction, clean it up by removing leading spaces and 2066 * similar junk. 2067 */ 2068 if (cpi->cpi_brandstr[0]) { 2069 size_t maxlen = sizeof (cpi->cpi_brandstr); 2070 char *src, *dst; 2071 2072 dst = src = (char *)cpi->cpi_brandstr; 2073 src[maxlen - 1] = '\0'; 2074 /* 2075 * strip leading spaces 2076 */ 2077 while (*src == ' ') 2078 src++; 2079 /* 2080 * Remove any 'Genuine' or "Authentic" prefixes 2081 */ 2082 if (strncmp(src, "Genuine ", 8) == 0) 2083 src += 8; 2084 if (strncmp(src, "Authentic ", 10) == 0) 2085 src += 10; 2086 2087 /* 2088 * Now do an in-place copy. 2089 * Map (R) to (r) and (TM) to (tm). 2090 * The era of teletypes is long gone, and there's 2091 * -really- no need to shout. 2092 */ 2093 while (*src != '\0') { 2094 if (src[0] == '(') { 2095 if (strncmp(src + 1, "R)", 2) == 0) { 2096 (void) strncpy(dst, "(r)", 3); 2097 src += 3; 2098 dst += 3; 2099 continue; 2100 } 2101 if (strncmp(src + 1, "TM)", 3) == 0) { 2102 (void) strncpy(dst, "(tm)", 4); 2103 src += 4; 2104 dst += 4; 2105 continue; 2106 } 2107 } 2108 *dst++ = *src++; 2109 } 2110 *dst = '\0'; 2111 2112 /* 2113 * Finally, remove any trailing spaces 2114 */ 2115 while (--dst > cpi->cpi_brandstr) 2116 if (*dst == ' ') 2117 *dst = '\0'; 2118 else 2119 break; 2120 } else 2121 fabricate_brandstr(cpi); 2122 } 2123 cpi->cpi_pass = 3; 2124 } 2125 2126 /* 2127 * This routine is called out of bind_hwcap() much later in the life 2128 * of the kernel (post_startup()). The job of this routine is to resolve 2129 * the hardware feature support and kernel support for those features into 2130 * what we're actually going to tell applications via the aux vector. 2131 */ 2132 uint_t 2133 cpuid_pass4(cpu_t *cpu) 2134 { 2135 struct cpuid_info *cpi; 2136 uint_t hwcap_flags = 0; 2137 2138 if (cpu == NULL) 2139 cpu = CPU; 2140 cpi = cpu->cpu_m.mcpu_cpi; 2141 2142 ASSERT(cpi->cpi_pass == 3); 2143 2144 if (cpi->cpi_maxeax >= 1) { 2145 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2146 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2147 2148 *edx = CPI_FEATURES_EDX(cpi); 2149 *ecx = CPI_FEATURES_ECX(cpi); 2150 2151 /* 2152 * [these require explicit kernel support] 2153 */ 2154 if ((x86_feature & X86_SEP) == 0) 2155 *edx &= ~CPUID_INTC_EDX_SEP; 2156 2157 if ((x86_feature & X86_SSE) == 0) 2158 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2159 if ((x86_feature & X86_SSE2) == 0) 2160 *edx &= ~CPUID_INTC_EDX_SSE2; 2161 2162 if ((x86_feature & X86_HTT) == 0) 2163 *edx &= ~CPUID_INTC_EDX_HTT; 2164 2165 if ((x86_feature & X86_SSE3) == 0) 2166 *ecx &= ~CPUID_INTC_ECX_SSE3; 2167 2168 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2169 if ((x86_feature & X86_SSSE3) == 0) 2170 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2171 if ((x86_feature & X86_SSE4_1) == 0) 2172 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2173 if ((x86_feature & X86_SSE4_2) == 0) 2174 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2175 } 2176 2177 /* 2178 * [no explicit support required beyond x87 fp context] 2179 */ 2180 if (!fpu_exists) 2181 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2182 2183 /* 2184 * Now map the supported feature vector to things that we 2185 * think userland will care about. 2186 */ 2187 if (*edx & CPUID_INTC_EDX_SEP) 2188 hwcap_flags |= AV_386_SEP; 2189 if (*edx & CPUID_INTC_EDX_SSE) 2190 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2191 if (*edx & CPUID_INTC_EDX_SSE2) 2192 hwcap_flags |= AV_386_SSE2; 2193 if (*ecx & CPUID_INTC_ECX_SSE3) 2194 hwcap_flags |= AV_386_SSE3; 2195 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2196 if (*ecx & CPUID_INTC_ECX_SSSE3) 2197 hwcap_flags |= AV_386_SSSE3; 2198 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2199 hwcap_flags |= AV_386_SSE4_1; 2200 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2201 hwcap_flags |= AV_386_SSE4_2; 2202 } 2203 if (*ecx & CPUID_INTC_ECX_POPCNT) 2204 hwcap_flags |= AV_386_POPCNT; 2205 if (*edx & CPUID_INTC_EDX_FPU) 2206 hwcap_flags |= AV_386_FPU; 2207 if (*edx & CPUID_INTC_EDX_MMX) 2208 hwcap_flags |= AV_386_MMX; 2209 2210 if (*edx & CPUID_INTC_EDX_TSC) 2211 hwcap_flags |= AV_386_TSC; 2212 if (*edx & CPUID_INTC_EDX_CX8) 2213 hwcap_flags |= AV_386_CX8; 2214 if (*edx & CPUID_INTC_EDX_CMOV) 2215 hwcap_flags |= AV_386_CMOV; 2216 if (*ecx & CPUID_INTC_ECX_MON) 2217 hwcap_flags |= AV_386_MON; 2218 if (*ecx & CPUID_INTC_ECX_CX16) 2219 hwcap_flags |= AV_386_CX16; 2220 } 2221 2222 if (x86_feature & X86_HTT) 2223 hwcap_flags |= AV_386_PAUSE; 2224 2225 if (cpi->cpi_xmaxeax < 0x80000001) 2226 goto pass4_done; 2227 2228 switch (cpi->cpi_vendor) { 2229 struct cpuid_regs cp; 2230 uint32_t *edx, *ecx; 2231 2232 case X86_VENDOR_Intel: 2233 /* 2234 * Seems like Intel duplicated what we necessary 2235 * here to make the initial crop of 64-bit OS's work. 2236 * Hopefully, those are the only "extended" bits 2237 * they'll add. 2238 */ 2239 /*FALLTHROUGH*/ 2240 2241 case X86_VENDOR_AMD: 2242 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2243 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2244 2245 *edx = CPI_FEATURES_XTD_EDX(cpi); 2246 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2247 2248 /* 2249 * [these features require explicit kernel support] 2250 */ 2251 switch (cpi->cpi_vendor) { 2252 case X86_VENDOR_Intel: 2253 if ((x86_feature & X86_TSCP) == 0) 2254 *edx &= ~CPUID_AMD_EDX_TSCP; 2255 break; 2256 2257 case X86_VENDOR_AMD: 2258 if ((x86_feature & X86_TSCP) == 0) 2259 *edx &= ~CPUID_AMD_EDX_TSCP; 2260 if ((x86_feature & X86_SSE4A) == 0) 2261 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2262 break; 2263 2264 default: 2265 break; 2266 } 2267 2268 /* 2269 * [no explicit support required beyond 2270 * x87 fp context and exception handlers] 2271 */ 2272 if (!fpu_exists) 2273 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2274 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2275 2276 if ((x86_feature & X86_NX) == 0) 2277 *edx &= ~CPUID_AMD_EDX_NX; 2278 #if !defined(__amd64) 2279 *edx &= ~CPUID_AMD_EDX_LM; 2280 #endif 2281 /* 2282 * Now map the supported feature vector to 2283 * things that we think userland will care about. 2284 */ 2285 #if defined(__amd64) 2286 if (*edx & CPUID_AMD_EDX_SYSC) 2287 hwcap_flags |= AV_386_AMD_SYSC; 2288 #endif 2289 if (*edx & CPUID_AMD_EDX_MMXamd) 2290 hwcap_flags |= AV_386_AMD_MMX; 2291 if (*edx & CPUID_AMD_EDX_3DNow) 2292 hwcap_flags |= AV_386_AMD_3DNow; 2293 if (*edx & CPUID_AMD_EDX_3DNowx) 2294 hwcap_flags |= AV_386_AMD_3DNowx; 2295 2296 switch (cpi->cpi_vendor) { 2297 case X86_VENDOR_AMD: 2298 if (*edx & CPUID_AMD_EDX_TSCP) 2299 hwcap_flags |= AV_386_TSCP; 2300 if (*ecx & CPUID_AMD_ECX_AHF64) 2301 hwcap_flags |= AV_386_AHF; 2302 if (*ecx & CPUID_AMD_ECX_SSE4A) 2303 hwcap_flags |= AV_386_AMD_SSE4A; 2304 if (*ecx & CPUID_AMD_ECX_LZCNT) 2305 hwcap_flags |= AV_386_AMD_LZCNT; 2306 break; 2307 2308 case X86_VENDOR_Intel: 2309 if (*edx & CPUID_AMD_EDX_TSCP) 2310 hwcap_flags |= AV_386_TSCP; 2311 /* 2312 * Aarrgh. 2313 * Intel uses a different bit in the same word. 2314 */ 2315 if (*ecx & CPUID_INTC_ECX_AHF64) 2316 hwcap_flags |= AV_386_AHF; 2317 break; 2318 2319 default: 2320 break; 2321 } 2322 break; 2323 2324 case X86_VENDOR_TM: 2325 cp.cp_eax = 0x80860001; 2326 (void) __cpuid_insn(&cp); 2327 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2328 break; 2329 2330 default: 2331 break; 2332 } 2333 2334 pass4_done: 2335 cpi->cpi_pass = 4; 2336 return (hwcap_flags); 2337 } 2338 2339 2340 /* 2341 * Simulate the cpuid instruction using the data we previously 2342 * captured about this CPU. We try our best to return the truth 2343 * about the hardware, independently of kernel support. 2344 */ 2345 uint32_t 2346 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2347 { 2348 struct cpuid_info *cpi; 2349 struct cpuid_regs *xcp; 2350 2351 if (cpu == NULL) 2352 cpu = CPU; 2353 cpi = cpu->cpu_m.mcpu_cpi; 2354 2355 ASSERT(cpuid_checkpass(cpu, 3)); 2356 2357 /* 2358 * CPUID data is cached in two separate places: cpi_std for standard 2359 * CPUID functions, and cpi_extd for extended CPUID functions. 2360 */ 2361 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2362 xcp = &cpi->cpi_std[cp->cp_eax]; 2363 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2364 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2365 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2366 else 2367 /* 2368 * The caller is asking for data from an input parameter which 2369 * the kernel has not cached. In this case we go fetch from 2370 * the hardware and return the data directly to the user. 2371 */ 2372 return (__cpuid_insn(cp)); 2373 2374 cp->cp_eax = xcp->cp_eax; 2375 cp->cp_ebx = xcp->cp_ebx; 2376 cp->cp_ecx = xcp->cp_ecx; 2377 cp->cp_edx = xcp->cp_edx; 2378 return (cp->cp_eax); 2379 } 2380 2381 int 2382 cpuid_checkpass(cpu_t *cpu, int pass) 2383 { 2384 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2385 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2386 } 2387 2388 int 2389 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2390 { 2391 ASSERT(cpuid_checkpass(cpu, 3)); 2392 2393 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2394 } 2395 2396 int 2397 cpuid_is_cmt(cpu_t *cpu) 2398 { 2399 if (cpu == NULL) 2400 cpu = CPU; 2401 2402 ASSERT(cpuid_checkpass(cpu, 1)); 2403 2404 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2405 } 2406 2407 /* 2408 * AMD and Intel both implement the 64-bit variant of the syscall 2409 * instruction (syscallq), so if there's -any- support for syscall, 2410 * cpuid currently says "yes, we support this". 2411 * 2412 * However, Intel decided to -not- implement the 32-bit variant of the 2413 * syscall instruction, so we provide a predicate to allow our caller 2414 * to test that subtlety here. 2415 * 2416 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2417 * even in the case where the hardware would in fact support it. 2418 */ 2419 /*ARGSUSED*/ 2420 int 2421 cpuid_syscall32_insn(cpu_t *cpu) 2422 { 2423 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2424 2425 #if !defined(__xpv) 2426 if (cpu == NULL) 2427 cpu = CPU; 2428 2429 /*CSTYLED*/ 2430 { 2431 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2432 2433 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2434 cpi->cpi_xmaxeax >= 0x80000001 && 2435 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2436 return (1); 2437 } 2438 #endif 2439 return (0); 2440 } 2441 2442 int 2443 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2444 { 2445 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2446 2447 static const char fmt[] = 2448 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2449 static const char fmt_ht[] = 2450 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2451 2452 ASSERT(cpuid_checkpass(cpu, 1)); 2453 2454 if (cpuid_is_cmt(cpu)) 2455 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2456 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2457 cpi->cpi_family, cpi->cpi_model, 2458 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2459 return (snprintf(s, n, fmt, 2460 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2461 cpi->cpi_family, cpi->cpi_model, 2462 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2463 } 2464 2465 const char * 2466 cpuid_getvendorstr(cpu_t *cpu) 2467 { 2468 ASSERT(cpuid_checkpass(cpu, 1)); 2469 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2470 } 2471 2472 uint_t 2473 cpuid_getvendor(cpu_t *cpu) 2474 { 2475 ASSERT(cpuid_checkpass(cpu, 1)); 2476 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2477 } 2478 2479 uint_t 2480 cpuid_getfamily(cpu_t *cpu) 2481 { 2482 ASSERT(cpuid_checkpass(cpu, 1)); 2483 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2484 } 2485 2486 uint_t 2487 cpuid_getmodel(cpu_t *cpu) 2488 { 2489 ASSERT(cpuid_checkpass(cpu, 1)); 2490 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2491 } 2492 2493 uint_t 2494 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2495 { 2496 ASSERT(cpuid_checkpass(cpu, 1)); 2497 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2498 } 2499 2500 uint_t 2501 cpuid_get_ncore_per_chip(cpu_t *cpu) 2502 { 2503 ASSERT(cpuid_checkpass(cpu, 1)); 2504 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2505 } 2506 2507 uint_t 2508 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2509 { 2510 ASSERT(cpuid_checkpass(cpu, 2)); 2511 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2512 } 2513 2514 id_t 2515 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2516 { 2517 ASSERT(cpuid_checkpass(cpu, 2)); 2518 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2519 } 2520 2521 uint_t 2522 cpuid_getstep(cpu_t *cpu) 2523 { 2524 ASSERT(cpuid_checkpass(cpu, 1)); 2525 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2526 } 2527 2528 uint_t 2529 cpuid_getsig(struct cpu *cpu) 2530 { 2531 ASSERT(cpuid_checkpass(cpu, 1)); 2532 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2533 } 2534 2535 uint32_t 2536 cpuid_getchiprev(struct cpu *cpu) 2537 { 2538 ASSERT(cpuid_checkpass(cpu, 1)); 2539 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2540 } 2541 2542 const char * 2543 cpuid_getchiprevstr(struct cpu *cpu) 2544 { 2545 ASSERT(cpuid_checkpass(cpu, 1)); 2546 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2547 } 2548 2549 uint32_t 2550 cpuid_getsockettype(struct cpu *cpu) 2551 { 2552 ASSERT(cpuid_checkpass(cpu, 1)); 2553 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2554 } 2555 2556 int 2557 cpuid_get_chipid(cpu_t *cpu) 2558 { 2559 ASSERT(cpuid_checkpass(cpu, 1)); 2560 2561 if (cpuid_is_cmt(cpu)) 2562 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2563 return (cpu->cpu_id); 2564 } 2565 2566 id_t 2567 cpuid_get_coreid(cpu_t *cpu) 2568 { 2569 ASSERT(cpuid_checkpass(cpu, 1)); 2570 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2571 } 2572 2573 int 2574 cpuid_get_pkgcoreid(cpu_t *cpu) 2575 { 2576 ASSERT(cpuid_checkpass(cpu, 1)); 2577 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2578 } 2579 2580 int 2581 cpuid_get_clogid(cpu_t *cpu) 2582 { 2583 ASSERT(cpuid_checkpass(cpu, 1)); 2584 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2585 } 2586 2587 void 2588 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2589 { 2590 struct cpuid_info *cpi; 2591 2592 if (cpu == NULL) 2593 cpu = CPU; 2594 cpi = cpu->cpu_m.mcpu_cpi; 2595 2596 ASSERT(cpuid_checkpass(cpu, 1)); 2597 2598 if (pabits) 2599 *pabits = cpi->cpi_pabits; 2600 if (vabits) 2601 *vabits = cpi->cpi_vabits; 2602 } 2603 2604 /* 2605 * Returns the number of data TLB entries for a corresponding 2606 * pagesize. If it can't be computed, or isn't known, the 2607 * routine returns zero. If you ask about an architecturally 2608 * impossible pagesize, the routine will panic (so that the 2609 * hat implementor knows that things are inconsistent.) 2610 */ 2611 uint_t 2612 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2613 { 2614 struct cpuid_info *cpi; 2615 uint_t dtlb_nent = 0; 2616 2617 if (cpu == NULL) 2618 cpu = CPU; 2619 cpi = cpu->cpu_m.mcpu_cpi; 2620 2621 ASSERT(cpuid_checkpass(cpu, 1)); 2622 2623 /* 2624 * Check the L2 TLB info 2625 */ 2626 if (cpi->cpi_xmaxeax >= 0x80000006) { 2627 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2628 2629 switch (pagesize) { 2630 2631 case 4 * 1024: 2632 /* 2633 * All zero in the top 16 bits of the register 2634 * indicates a unified TLB. Size is in low 16 bits. 2635 */ 2636 if ((cp->cp_ebx & 0xffff0000) == 0) 2637 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2638 else 2639 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2640 break; 2641 2642 case 2 * 1024 * 1024: 2643 if ((cp->cp_eax & 0xffff0000) == 0) 2644 dtlb_nent = cp->cp_eax & 0x0000ffff; 2645 else 2646 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2647 break; 2648 2649 default: 2650 panic("unknown L2 pagesize"); 2651 /*NOTREACHED*/ 2652 } 2653 } 2654 2655 if (dtlb_nent != 0) 2656 return (dtlb_nent); 2657 2658 /* 2659 * No L2 TLB support for this size, try L1. 2660 */ 2661 if (cpi->cpi_xmaxeax >= 0x80000005) { 2662 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2663 2664 switch (pagesize) { 2665 case 4 * 1024: 2666 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2667 break; 2668 case 2 * 1024 * 1024: 2669 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2670 break; 2671 default: 2672 panic("unknown L1 d-TLB pagesize"); 2673 /*NOTREACHED*/ 2674 } 2675 } 2676 2677 return (dtlb_nent); 2678 } 2679 2680 /* 2681 * Return 0 if the erratum is not present or not applicable, positive 2682 * if it is, and negative if the status of the erratum is unknown. 2683 * 2684 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2685 * Processors" #25759, Rev 3.57, August 2005 2686 */ 2687 int 2688 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2689 { 2690 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2691 uint_t eax; 2692 2693 /* 2694 * Bail out if this CPU isn't an AMD CPU, or if it's 2695 * a legacy (32-bit) AMD CPU. 2696 */ 2697 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2698 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2699 cpi->cpi_family == 6) 2700 2701 return (0); 2702 2703 eax = cpi->cpi_std[1].cp_eax; 2704 2705 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2706 #define SH_B3(eax) (eax == 0xf51) 2707 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2708 2709 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2710 2711 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2712 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2713 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2714 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2715 2716 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2717 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2718 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2719 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2720 2721 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2722 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2723 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2724 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2725 #define BH_E4(eax) (eax == 0x20fb1) 2726 #define SH_E5(eax) (eax == 0x20f42) 2727 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2728 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2729 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2730 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2731 DH_E6(eax) || JH_E6(eax)) 2732 2733 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2734 #define DR_B0(eax) (eax == 0x100f20) 2735 #define DR_B1(eax) (eax == 0x100f21) 2736 #define DR_BA(eax) (eax == 0x100f2a) 2737 #define DR_B2(eax) (eax == 0x100f22) 2738 #define DR_B3(eax) (eax == 0x100f23) 2739 #define RB_C0(eax) (eax == 0x100f40) 2740 2741 switch (erratum) { 2742 case 1: 2743 return (cpi->cpi_family < 0x10); 2744 case 51: /* what does the asterisk mean? */ 2745 return (B(eax) || SH_C0(eax) || CG(eax)); 2746 case 52: 2747 return (B(eax)); 2748 case 57: 2749 return (cpi->cpi_family <= 0x11); 2750 case 58: 2751 return (B(eax)); 2752 case 60: 2753 return (cpi->cpi_family <= 0x11); 2754 case 61: 2755 case 62: 2756 case 63: 2757 case 64: 2758 case 65: 2759 case 66: 2760 case 68: 2761 case 69: 2762 case 70: 2763 case 71: 2764 return (B(eax)); 2765 case 72: 2766 return (SH_B0(eax)); 2767 case 74: 2768 return (B(eax)); 2769 case 75: 2770 return (cpi->cpi_family < 0x10); 2771 case 76: 2772 return (B(eax)); 2773 case 77: 2774 return (cpi->cpi_family <= 0x11); 2775 case 78: 2776 return (B(eax) || SH_C0(eax)); 2777 case 79: 2778 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2779 case 80: 2780 case 81: 2781 case 82: 2782 return (B(eax)); 2783 case 83: 2784 return (B(eax) || SH_C0(eax) || CG(eax)); 2785 case 85: 2786 return (cpi->cpi_family < 0x10); 2787 case 86: 2788 return (SH_C0(eax) || CG(eax)); 2789 case 88: 2790 #if !defined(__amd64) 2791 return (0); 2792 #else 2793 return (B(eax) || SH_C0(eax)); 2794 #endif 2795 case 89: 2796 return (cpi->cpi_family < 0x10); 2797 case 90: 2798 return (B(eax) || SH_C0(eax) || CG(eax)); 2799 case 91: 2800 case 92: 2801 return (B(eax) || SH_C0(eax)); 2802 case 93: 2803 return (SH_C0(eax)); 2804 case 94: 2805 return (B(eax) || SH_C0(eax) || CG(eax)); 2806 case 95: 2807 #if !defined(__amd64) 2808 return (0); 2809 #else 2810 return (B(eax) || SH_C0(eax)); 2811 #endif 2812 case 96: 2813 return (B(eax) || SH_C0(eax) || CG(eax)); 2814 case 97: 2815 case 98: 2816 return (SH_C0(eax) || CG(eax)); 2817 case 99: 2818 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2819 case 100: 2820 return (B(eax) || SH_C0(eax)); 2821 case 101: 2822 case 103: 2823 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2824 case 104: 2825 return (SH_C0(eax) || CG(eax) || D0(eax)); 2826 case 105: 2827 case 106: 2828 case 107: 2829 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2830 case 108: 2831 return (DH_CG(eax)); 2832 case 109: 2833 return (SH_C0(eax) || CG(eax) || D0(eax)); 2834 case 110: 2835 return (D0(eax) || EX(eax)); 2836 case 111: 2837 return (CG(eax)); 2838 case 112: 2839 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2840 case 113: 2841 return (eax == 0x20fc0); 2842 case 114: 2843 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2844 case 115: 2845 return (SH_E0(eax) || JH_E1(eax)); 2846 case 116: 2847 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2848 case 117: 2849 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2850 case 118: 2851 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2852 JH_E6(eax)); 2853 case 121: 2854 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2855 case 122: 2856 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2857 case 123: 2858 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2859 case 131: 2860 return (cpi->cpi_family < 0x10); 2861 case 6336786: 2862 /* 2863 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2864 * if this is a K8 family or newer processor 2865 */ 2866 if (CPI_FAMILY(cpi) == 0xf) { 2867 struct cpuid_regs regs; 2868 regs.cp_eax = 0x80000007; 2869 (void) __cpuid_insn(®s); 2870 return (!(regs.cp_edx & 0x100)); 2871 } 2872 return (0); 2873 case 6323525: 2874 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2875 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2876 2877 case 6671130: 2878 /* 2879 * check for processors (pre-Shanghai) that do not provide 2880 * optimal management of 1gb ptes in its tlb. 2881 */ 2882 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 2883 2884 case 298: 2885 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 2886 DR_B2(eax) || RB_C0(eax)); 2887 2888 default: 2889 return (-1); 2890 2891 } 2892 } 2893 2894 /* 2895 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 2896 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 2897 */ 2898 int 2899 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 2900 { 2901 struct cpuid_info *cpi; 2902 uint_t osvwid; 2903 static int osvwfeature = -1; 2904 uint64_t osvwlength; 2905 2906 2907 cpi = cpu->cpu_m.mcpu_cpi; 2908 2909 /* confirm OSVW supported */ 2910 if (osvwfeature == -1) { 2911 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 2912 } else { 2913 /* assert that osvw feature setting is consistent on all cpus */ 2914 ASSERT(osvwfeature == 2915 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 2916 } 2917 if (!osvwfeature) 2918 return (-1); 2919 2920 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 2921 2922 switch (erratum) { 2923 case 298: /* osvwid is 0 */ 2924 osvwid = 0; 2925 if (osvwlength <= (uint64_t)osvwid) { 2926 /* osvwid 0 is unknown */ 2927 return (-1); 2928 } 2929 2930 /* 2931 * Check the OSVW STATUS MSR to determine the state 2932 * of the erratum where: 2933 * 0 - fixed by HW 2934 * 1 - BIOS has applied the workaround when BIOS 2935 * workaround is available. (Or for other errata, 2936 * OS workaround is required.) 2937 * For a value of 1, caller will confirm that the 2938 * erratum 298 workaround has indeed been applied by BIOS. 2939 * 2940 * A 1 may be set in cpus that have a HW fix 2941 * in a mixed cpu system. Regarding erratum 298: 2942 * In a multiprocessor platform, the workaround above 2943 * should be applied to all processors regardless of 2944 * silicon revision when an affected processor is 2945 * present. 2946 */ 2947 2948 return (rdmsr(MSR_AMD_OSVW_STATUS + 2949 (osvwid / OSVW_ID_CNT_PER_MSR)) & 2950 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 2951 2952 default: 2953 return (-1); 2954 } 2955 } 2956 2957 static const char assoc_str[] = "associativity"; 2958 static const char line_str[] = "line-size"; 2959 static const char size_str[] = "size"; 2960 2961 static void 2962 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2963 uint32_t val) 2964 { 2965 char buf[128]; 2966 2967 /* 2968 * ndi_prop_update_int() is used because it is desirable for 2969 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2970 */ 2971 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2972 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2973 } 2974 2975 /* 2976 * Intel-style cache/tlb description 2977 * 2978 * Standard cpuid level 2 gives a randomly ordered 2979 * selection of tags that index into a table that describes 2980 * cache and tlb properties. 2981 */ 2982 2983 static const char l1_icache_str[] = "l1-icache"; 2984 static const char l1_dcache_str[] = "l1-dcache"; 2985 static const char l2_cache_str[] = "l2-cache"; 2986 static const char l3_cache_str[] = "l3-cache"; 2987 static const char itlb4k_str[] = "itlb-4K"; 2988 static const char dtlb4k_str[] = "dtlb-4K"; 2989 static const char itlb2M_str[] = "itlb-2M"; 2990 static const char itlb4M_str[] = "itlb-4M"; 2991 static const char dtlb4M_str[] = "dtlb-4M"; 2992 static const char dtlb24_str[] = "dtlb0-2M-4M"; 2993 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2994 static const char itlb24_str[] = "itlb-2M-4M"; 2995 static const char dtlb44_str[] = "dtlb-4K-4M"; 2996 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2997 static const char sl2_cache_str[] = "sectored-l2-cache"; 2998 static const char itrace_str[] = "itrace-cache"; 2999 static const char sl3_cache_str[] = "sectored-l3-cache"; 3000 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 3001 3002 static const struct cachetab { 3003 uint8_t ct_code; 3004 uint8_t ct_assoc; 3005 uint16_t ct_line_size; 3006 size_t ct_size; 3007 const char *ct_label; 3008 } intel_ctab[] = { 3009 /* 3010 * maintain descending order! 3011 * 3012 * Codes ignored - Reason 3013 * ---------------------- 3014 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 3015 * f0H/f1H - Currently we do not interpret prefetch size by design 3016 */ 3017 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 3018 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 3019 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 3020 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 3021 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 3022 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 3023 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 3024 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 3025 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 3026 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 3027 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 3028 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 3029 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 3030 { 0xc0, 4, 0, 8, dtlb44_str }, 3031 { 0xba, 4, 0, 64, dtlb4k_str }, 3032 { 0xb4, 4, 0, 256, dtlb4k_str }, 3033 { 0xb3, 4, 0, 128, dtlb4k_str }, 3034 { 0xb2, 4, 0, 64, itlb4k_str }, 3035 { 0xb0, 4, 0, 128, itlb4k_str }, 3036 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 3037 { 0x86, 4, 64, 512*1024, l2_cache_str}, 3038 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 3039 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 3040 { 0x83, 8, 32, 512*1024, l2_cache_str}, 3041 { 0x82, 8, 32, 256*1024, l2_cache_str}, 3042 { 0x80, 8, 64, 512*1024, l2_cache_str}, 3043 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 3044 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 3045 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 3046 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 3047 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 3048 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 3049 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 3050 { 0x73, 8, 0, 64*1024, itrace_str}, 3051 { 0x72, 8, 0, 32*1024, itrace_str}, 3052 { 0x71, 8, 0, 16*1024, itrace_str}, 3053 { 0x70, 8, 0, 12*1024, itrace_str}, 3054 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 3055 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 3056 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 3057 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 3058 { 0x5d, 0, 0, 256, dtlb44_str}, 3059 { 0x5c, 0, 0, 128, dtlb44_str}, 3060 { 0x5b, 0, 0, 64, dtlb44_str}, 3061 { 0x5a, 4, 0, 32, dtlb24_str}, 3062 { 0x59, 0, 0, 16, dtlb4k_str}, 3063 { 0x57, 4, 0, 16, dtlb4k_str}, 3064 { 0x56, 4, 0, 16, dtlb4M_str}, 3065 { 0x55, 0, 0, 7, itlb24_str}, 3066 { 0x52, 0, 0, 256, itlb424_str}, 3067 { 0x51, 0, 0, 128, itlb424_str}, 3068 { 0x50, 0, 0, 64, itlb424_str}, 3069 { 0x4f, 0, 0, 32, itlb4k_str}, 3070 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 3071 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3072 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3073 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3074 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3075 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3076 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 3077 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3078 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3079 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3080 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3081 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3082 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3083 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3084 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3085 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3086 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3087 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3088 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3089 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3090 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3091 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3092 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3093 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3094 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3095 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3096 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 3097 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3098 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3099 { 0x0b, 4, 0, 4, itlb4M_str}, 3100 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3101 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3102 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3103 { 0x05, 4, 0, 32, dtlb4M_str}, 3104 { 0x04, 4, 0, 8, dtlb4M_str}, 3105 { 0x03, 4, 0, 64, dtlb4k_str}, 3106 { 0x02, 4, 0, 2, itlb4M_str}, 3107 { 0x01, 4, 0, 32, itlb4k_str}, 3108 { 0 } 3109 }; 3110 3111 static const struct cachetab cyrix_ctab[] = { 3112 { 0x70, 4, 0, 32, "tlb-4K" }, 3113 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3114 { 0 } 3115 }; 3116 3117 /* 3118 * Search a cache table for a matching entry 3119 */ 3120 static const struct cachetab * 3121 find_cacheent(const struct cachetab *ct, uint_t code) 3122 { 3123 if (code != 0) { 3124 for (; ct->ct_code != 0; ct++) 3125 if (ct->ct_code <= code) 3126 break; 3127 if (ct->ct_code == code) 3128 return (ct); 3129 } 3130 return (NULL); 3131 } 3132 3133 /* 3134 * Populate cachetab entry with L2 or L3 cache-information using 3135 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3136 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3137 * information is found. 3138 */ 3139 static int 3140 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3141 { 3142 uint32_t level, i; 3143 int ret = 0; 3144 3145 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3146 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3147 3148 if (level == 2 || level == 3) { 3149 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3150 ct->ct_line_size = 3151 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3152 ct->ct_size = ct->ct_assoc * 3153 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3154 ct->ct_line_size * 3155 (cpi->cpi_std_4[i]->cp_ecx + 1); 3156 3157 if (level == 2) { 3158 ct->ct_label = l2_cache_str; 3159 } else if (level == 3) { 3160 ct->ct_label = l3_cache_str; 3161 } 3162 ret = 1; 3163 } 3164 } 3165 3166 return (ret); 3167 } 3168 3169 /* 3170 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3171 * The walk is terminated if the walker returns non-zero. 3172 */ 3173 static void 3174 intel_walk_cacheinfo(struct cpuid_info *cpi, 3175 void *arg, int (*func)(void *, const struct cachetab *)) 3176 { 3177 const struct cachetab *ct; 3178 struct cachetab des_49_ct, des_b1_ct; 3179 uint8_t *dp; 3180 int i; 3181 3182 if ((dp = cpi->cpi_cacheinfo) == NULL) 3183 return; 3184 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3185 /* 3186 * For overloaded descriptor 0x49 we use cpuid function 4 3187 * if supported by the current processor, to create 3188 * cache information. 3189 * For overloaded descriptor 0xb1 we use X86_PAE flag 3190 * to disambiguate the cache information. 3191 */ 3192 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3193 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3194 ct = &des_49_ct; 3195 } else if (*dp == 0xb1) { 3196 des_b1_ct.ct_code = 0xb1; 3197 des_b1_ct.ct_assoc = 4; 3198 des_b1_ct.ct_line_size = 0; 3199 if (x86_feature & X86_PAE) { 3200 des_b1_ct.ct_size = 8; 3201 des_b1_ct.ct_label = itlb2M_str; 3202 } else { 3203 des_b1_ct.ct_size = 4; 3204 des_b1_ct.ct_label = itlb4M_str; 3205 } 3206 ct = &des_b1_ct; 3207 } else { 3208 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3209 continue; 3210 } 3211 } 3212 3213 if (func(arg, ct) != 0) { 3214 break; 3215 } 3216 } 3217 } 3218 3219 /* 3220 * (Like the Intel one, except for Cyrix CPUs) 3221 */ 3222 static void 3223 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3224 void *arg, int (*func)(void *, const struct cachetab *)) 3225 { 3226 const struct cachetab *ct; 3227 uint8_t *dp; 3228 int i; 3229 3230 if ((dp = cpi->cpi_cacheinfo) == NULL) 3231 return; 3232 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3233 /* 3234 * Search Cyrix-specific descriptor table first .. 3235 */ 3236 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3237 if (func(arg, ct) != 0) 3238 break; 3239 continue; 3240 } 3241 /* 3242 * .. else fall back to the Intel one 3243 */ 3244 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3245 if (func(arg, ct) != 0) 3246 break; 3247 continue; 3248 } 3249 } 3250 } 3251 3252 /* 3253 * A cacheinfo walker that adds associativity, line-size, and size properties 3254 * to the devinfo node it is passed as an argument. 3255 */ 3256 static int 3257 add_cacheent_props(void *arg, const struct cachetab *ct) 3258 { 3259 dev_info_t *devi = arg; 3260 3261 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3262 if (ct->ct_line_size != 0) 3263 add_cache_prop(devi, ct->ct_label, line_str, 3264 ct->ct_line_size); 3265 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3266 return (0); 3267 } 3268 3269 3270 static const char fully_assoc[] = "fully-associative?"; 3271 3272 /* 3273 * AMD style cache/tlb description 3274 * 3275 * Extended functions 5 and 6 directly describe properties of 3276 * tlbs and various cache levels. 3277 */ 3278 static void 3279 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3280 { 3281 switch (assoc) { 3282 case 0: /* reserved; ignore */ 3283 break; 3284 default: 3285 add_cache_prop(devi, label, assoc_str, assoc); 3286 break; 3287 case 0xff: 3288 add_cache_prop(devi, label, fully_assoc, 1); 3289 break; 3290 } 3291 } 3292 3293 static void 3294 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3295 { 3296 if (size == 0) 3297 return; 3298 add_cache_prop(devi, label, size_str, size); 3299 add_amd_assoc(devi, label, assoc); 3300 } 3301 3302 static void 3303 add_amd_cache(dev_info_t *devi, const char *label, 3304 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3305 { 3306 if (size == 0 || line_size == 0) 3307 return; 3308 add_amd_assoc(devi, label, assoc); 3309 /* 3310 * Most AMD parts have a sectored cache. Multiple cache lines are 3311 * associated with each tag. A sector consists of all cache lines 3312 * associated with a tag. For example, the AMD K6-III has a sector 3313 * size of 2 cache lines per tag. 3314 */ 3315 if (lines_per_tag != 0) 3316 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3317 add_cache_prop(devi, label, line_str, line_size); 3318 add_cache_prop(devi, label, size_str, size * 1024); 3319 } 3320 3321 static void 3322 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3323 { 3324 switch (assoc) { 3325 case 0: /* off */ 3326 break; 3327 case 1: 3328 case 2: 3329 case 4: 3330 add_cache_prop(devi, label, assoc_str, assoc); 3331 break; 3332 case 6: 3333 add_cache_prop(devi, label, assoc_str, 8); 3334 break; 3335 case 8: 3336 add_cache_prop(devi, label, assoc_str, 16); 3337 break; 3338 case 0xf: 3339 add_cache_prop(devi, label, fully_assoc, 1); 3340 break; 3341 default: /* reserved; ignore */ 3342 break; 3343 } 3344 } 3345 3346 static void 3347 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3348 { 3349 if (size == 0 || assoc == 0) 3350 return; 3351 add_amd_l2_assoc(devi, label, assoc); 3352 add_cache_prop(devi, label, size_str, size); 3353 } 3354 3355 static void 3356 add_amd_l2_cache(dev_info_t *devi, const char *label, 3357 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3358 { 3359 if (size == 0 || assoc == 0 || line_size == 0) 3360 return; 3361 add_amd_l2_assoc(devi, label, assoc); 3362 if (lines_per_tag != 0) 3363 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3364 add_cache_prop(devi, label, line_str, line_size); 3365 add_cache_prop(devi, label, size_str, size * 1024); 3366 } 3367 3368 static void 3369 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3370 { 3371 struct cpuid_regs *cp; 3372 3373 if (cpi->cpi_xmaxeax < 0x80000005) 3374 return; 3375 cp = &cpi->cpi_extd[5]; 3376 3377 /* 3378 * 4M/2M L1 TLB configuration 3379 * 3380 * We report the size for 2M pages because AMD uses two 3381 * TLB entries for one 4M page. 3382 */ 3383 add_amd_tlb(devi, "dtlb-2M", 3384 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3385 add_amd_tlb(devi, "itlb-2M", 3386 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3387 3388 /* 3389 * 4K L1 TLB configuration 3390 */ 3391 3392 switch (cpi->cpi_vendor) { 3393 uint_t nentries; 3394 case X86_VENDOR_TM: 3395 if (cpi->cpi_family >= 5) { 3396 /* 3397 * Crusoe processors have 256 TLB entries, but 3398 * cpuid data format constrains them to only 3399 * reporting 255 of them. 3400 */ 3401 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3402 nentries = 256; 3403 /* 3404 * Crusoe processors also have a unified TLB 3405 */ 3406 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3407 nentries); 3408 break; 3409 } 3410 /*FALLTHROUGH*/ 3411 default: 3412 add_amd_tlb(devi, itlb4k_str, 3413 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3414 add_amd_tlb(devi, dtlb4k_str, 3415 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3416 break; 3417 } 3418 3419 /* 3420 * data L1 cache configuration 3421 */ 3422 3423 add_amd_cache(devi, l1_dcache_str, 3424 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3425 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3426 3427 /* 3428 * code L1 cache configuration 3429 */ 3430 3431 add_amd_cache(devi, l1_icache_str, 3432 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3433 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3434 3435 if (cpi->cpi_xmaxeax < 0x80000006) 3436 return; 3437 cp = &cpi->cpi_extd[6]; 3438 3439 /* Check for a unified L2 TLB for large pages */ 3440 3441 if (BITX(cp->cp_eax, 31, 16) == 0) 3442 add_amd_l2_tlb(devi, "l2-tlb-2M", 3443 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3444 else { 3445 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3446 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3447 add_amd_l2_tlb(devi, "l2-itlb-2M", 3448 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3449 } 3450 3451 /* Check for a unified L2 TLB for 4K pages */ 3452 3453 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3454 add_amd_l2_tlb(devi, "l2-tlb-4K", 3455 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3456 } else { 3457 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3458 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3459 add_amd_l2_tlb(devi, "l2-itlb-4K", 3460 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3461 } 3462 3463 add_amd_l2_cache(devi, l2_cache_str, 3464 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3465 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3466 } 3467 3468 /* 3469 * There are two basic ways that the x86 world describes it cache 3470 * and tlb architecture - Intel's way and AMD's way. 3471 * 3472 * Return which flavor of cache architecture we should use 3473 */ 3474 static int 3475 x86_which_cacheinfo(struct cpuid_info *cpi) 3476 { 3477 switch (cpi->cpi_vendor) { 3478 case X86_VENDOR_Intel: 3479 if (cpi->cpi_maxeax >= 2) 3480 return (X86_VENDOR_Intel); 3481 break; 3482 case X86_VENDOR_AMD: 3483 /* 3484 * The K5 model 1 was the first part from AMD that reported 3485 * cache sizes via extended cpuid functions. 3486 */ 3487 if (cpi->cpi_family > 5 || 3488 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3489 return (X86_VENDOR_AMD); 3490 break; 3491 case X86_VENDOR_TM: 3492 if (cpi->cpi_family >= 5) 3493 return (X86_VENDOR_AMD); 3494 /*FALLTHROUGH*/ 3495 default: 3496 /* 3497 * If they have extended CPU data for 0x80000005 3498 * then we assume they have AMD-format cache 3499 * information. 3500 * 3501 * If not, and the vendor happens to be Cyrix, 3502 * then try our-Cyrix specific handler. 3503 * 3504 * If we're not Cyrix, then assume we're using Intel's 3505 * table-driven format instead. 3506 */ 3507 if (cpi->cpi_xmaxeax >= 0x80000005) 3508 return (X86_VENDOR_AMD); 3509 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3510 return (X86_VENDOR_Cyrix); 3511 else if (cpi->cpi_maxeax >= 2) 3512 return (X86_VENDOR_Intel); 3513 break; 3514 } 3515 return (-1); 3516 } 3517 3518 /* 3519 * create a node for the given cpu under the prom root node. 3520 * Also, create a cpu node in the device tree. 3521 */ 3522 static dev_info_t *cpu_nex_devi = NULL; 3523 static kmutex_t cpu_node_lock; 3524 3525 /* 3526 * Called from post_startup() and mp_startup() 3527 */ 3528 void 3529 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3530 { 3531 dev_info_t *cpu_devi; 3532 int create; 3533 3534 mutex_enter(&cpu_node_lock); 3535 3536 /* 3537 * create a nexus node for all cpus identified as 'cpu_id' under 3538 * the root node. 3539 */ 3540 if (cpu_nex_devi == NULL) { 3541 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3542 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3543 mutex_exit(&cpu_node_lock); 3544 return; 3545 } 3546 (void) ndi_devi_online(cpu_nex_devi, 0); 3547 } 3548 3549 /* 3550 * create a child node for cpu identified as 'cpu_id' 3551 */ 3552 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3553 cpu_id); 3554 if (cpu_devi == NULL) { 3555 mutex_exit(&cpu_node_lock); 3556 return; 3557 } 3558 3559 /* device_type */ 3560 3561 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3562 "device_type", "cpu"); 3563 3564 /* reg */ 3565 3566 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3567 "reg", cpu_id); 3568 3569 /* cpu-mhz, and clock-frequency */ 3570 3571 if (cpu_freq > 0) { 3572 long long mul; 3573 3574 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3575 "cpu-mhz", cpu_freq); 3576 3577 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3578 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3579 "clock-frequency", (int)mul); 3580 } 3581 3582 (void) ndi_devi_online(cpu_devi, 0); 3583 3584 if ((x86_feature & X86_CPUID) == 0) { 3585 mutex_exit(&cpu_node_lock); 3586 return; 3587 } 3588 3589 /* vendor-id */ 3590 3591 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3592 "vendor-id", cpi->cpi_vendorstr); 3593 3594 if (cpi->cpi_maxeax == 0) { 3595 mutex_exit(&cpu_node_lock); 3596 return; 3597 } 3598 3599 /* 3600 * family, model, and step 3601 */ 3602 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3603 "family", CPI_FAMILY(cpi)); 3604 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3605 "cpu-model", CPI_MODEL(cpi)); 3606 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3607 "stepping-id", CPI_STEP(cpi)); 3608 3609 /* type */ 3610 3611 switch (cpi->cpi_vendor) { 3612 case X86_VENDOR_Intel: 3613 create = 1; 3614 break; 3615 default: 3616 create = 0; 3617 break; 3618 } 3619 if (create) 3620 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3621 "type", CPI_TYPE(cpi)); 3622 3623 /* ext-family */ 3624 3625 switch (cpi->cpi_vendor) { 3626 case X86_VENDOR_Intel: 3627 case X86_VENDOR_AMD: 3628 create = cpi->cpi_family >= 0xf; 3629 break; 3630 default: 3631 create = 0; 3632 break; 3633 } 3634 if (create) 3635 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3636 "ext-family", CPI_FAMILY_XTD(cpi)); 3637 3638 /* ext-model */ 3639 3640 switch (cpi->cpi_vendor) { 3641 case X86_VENDOR_Intel: 3642 create = IS_EXTENDED_MODEL_INTEL(cpi); 3643 break; 3644 case X86_VENDOR_AMD: 3645 create = CPI_FAMILY(cpi) == 0xf; 3646 break; 3647 default: 3648 create = 0; 3649 break; 3650 } 3651 if (create) 3652 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3653 "ext-model", CPI_MODEL_XTD(cpi)); 3654 3655 /* generation */ 3656 3657 switch (cpi->cpi_vendor) { 3658 case X86_VENDOR_AMD: 3659 /* 3660 * AMD K5 model 1 was the first part to support this 3661 */ 3662 create = cpi->cpi_xmaxeax >= 0x80000001; 3663 break; 3664 default: 3665 create = 0; 3666 break; 3667 } 3668 if (create) 3669 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3670 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3671 3672 /* brand-id */ 3673 3674 switch (cpi->cpi_vendor) { 3675 case X86_VENDOR_Intel: 3676 /* 3677 * brand id first appeared on Pentium III Xeon model 8, 3678 * and Celeron model 8 processors and Opteron 3679 */ 3680 create = cpi->cpi_family > 6 || 3681 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3682 break; 3683 case X86_VENDOR_AMD: 3684 create = cpi->cpi_family >= 0xf; 3685 break; 3686 default: 3687 create = 0; 3688 break; 3689 } 3690 if (create && cpi->cpi_brandid != 0) { 3691 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3692 "brand-id", cpi->cpi_brandid); 3693 } 3694 3695 /* chunks, and apic-id */ 3696 3697 switch (cpi->cpi_vendor) { 3698 /* 3699 * first available on Pentium IV and Opteron (K8) 3700 */ 3701 case X86_VENDOR_Intel: 3702 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3703 break; 3704 case X86_VENDOR_AMD: 3705 create = cpi->cpi_family >= 0xf; 3706 break; 3707 default: 3708 create = 0; 3709 break; 3710 } 3711 if (create) { 3712 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3713 "chunks", CPI_CHUNKS(cpi)); 3714 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3715 "apic-id", cpi->cpi_apicid); 3716 if (cpi->cpi_chipid >= 0) { 3717 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3718 "chip#", cpi->cpi_chipid); 3719 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3720 "clog#", cpi->cpi_clogid); 3721 } 3722 } 3723 3724 /* cpuid-features */ 3725 3726 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3727 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3728 3729 3730 /* cpuid-features-ecx */ 3731 3732 switch (cpi->cpi_vendor) { 3733 case X86_VENDOR_Intel: 3734 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3735 break; 3736 default: 3737 create = 0; 3738 break; 3739 } 3740 if (create) 3741 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3742 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3743 3744 /* ext-cpuid-features */ 3745 3746 switch (cpi->cpi_vendor) { 3747 case X86_VENDOR_Intel: 3748 case X86_VENDOR_AMD: 3749 case X86_VENDOR_Cyrix: 3750 case X86_VENDOR_TM: 3751 case X86_VENDOR_Centaur: 3752 create = cpi->cpi_xmaxeax >= 0x80000001; 3753 break; 3754 default: 3755 create = 0; 3756 break; 3757 } 3758 if (create) { 3759 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3760 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3761 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3762 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3763 } 3764 3765 /* 3766 * Brand String first appeared in Intel Pentium IV, AMD K5 3767 * model 1, and Cyrix GXm. On earlier models we try and 3768 * simulate something similar .. so this string should always 3769 * same -something- about the processor, however lame. 3770 */ 3771 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3772 "brand-string", cpi->cpi_brandstr); 3773 3774 /* 3775 * Finally, cache and tlb information 3776 */ 3777 switch (x86_which_cacheinfo(cpi)) { 3778 case X86_VENDOR_Intel: 3779 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3780 break; 3781 case X86_VENDOR_Cyrix: 3782 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3783 break; 3784 case X86_VENDOR_AMD: 3785 amd_cache_info(cpi, cpu_devi); 3786 break; 3787 default: 3788 break; 3789 } 3790 3791 mutex_exit(&cpu_node_lock); 3792 } 3793 3794 struct l2info { 3795 int *l2i_csz; 3796 int *l2i_lsz; 3797 int *l2i_assoc; 3798 int l2i_ret; 3799 }; 3800 3801 /* 3802 * A cacheinfo walker that fetches the size, line-size and associativity 3803 * of the L2 cache 3804 */ 3805 static int 3806 intel_l2cinfo(void *arg, const struct cachetab *ct) 3807 { 3808 struct l2info *l2i = arg; 3809 int *ip; 3810 3811 if (ct->ct_label != l2_cache_str && 3812 ct->ct_label != sl2_cache_str) 3813 return (0); /* not an L2 -- keep walking */ 3814 3815 if ((ip = l2i->l2i_csz) != NULL) 3816 *ip = ct->ct_size; 3817 if ((ip = l2i->l2i_lsz) != NULL) 3818 *ip = ct->ct_line_size; 3819 if ((ip = l2i->l2i_assoc) != NULL) 3820 *ip = ct->ct_assoc; 3821 l2i->l2i_ret = ct->ct_size; 3822 return (1); /* was an L2 -- terminate walk */ 3823 } 3824 3825 /* 3826 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3827 * 3828 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3829 * value is the associativity, the associativity for the L2 cache and 3830 * tlb is encoded in the following table. The 4 bit L2 value serves as 3831 * an index into the amd_afd[] array to determine the associativity. 3832 * -1 is undefined. 0 is fully associative. 3833 */ 3834 3835 static int amd_afd[] = 3836 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3837 3838 static void 3839 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3840 { 3841 struct cpuid_regs *cp; 3842 uint_t size, assoc; 3843 int i; 3844 int *ip; 3845 3846 if (cpi->cpi_xmaxeax < 0x80000006) 3847 return; 3848 cp = &cpi->cpi_extd[6]; 3849 3850 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3851 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3852 uint_t cachesz = size * 1024; 3853 assoc = amd_afd[i]; 3854 3855 ASSERT(assoc != -1); 3856 3857 if ((ip = l2i->l2i_csz) != NULL) 3858 *ip = cachesz; 3859 if ((ip = l2i->l2i_lsz) != NULL) 3860 *ip = BITX(cp->cp_ecx, 7, 0); 3861 if ((ip = l2i->l2i_assoc) != NULL) 3862 *ip = assoc; 3863 l2i->l2i_ret = cachesz; 3864 } 3865 } 3866 3867 int 3868 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3869 { 3870 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3871 struct l2info __l2info, *l2i = &__l2info; 3872 3873 l2i->l2i_csz = csz; 3874 l2i->l2i_lsz = lsz; 3875 l2i->l2i_assoc = assoc; 3876 l2i->l2i_ret = -1; 3877 3878 switch (x86_which_cacheinfo(cpi)) { 3879 case X86_VENDOR_Intel: 3880 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3881 break; 3882 case X86_VENDOR_Cyrix: 3883 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3884 break; 3885 case X86_VENDOR_AMD: 3886 amd_l2cacheinfo(cpi, l2i); 3887 break; 3888 default: 3889 break; 3890 } 3891 return (l2i->l2i_ret); 3892 } 3893 3894 #if !defined(__xpv) 3895 3896 uint32_t * 3897 cpuid_mwait_alloc(cpu_t *cpu) 3898 { 3899 uint32_t *ret; 3900 size_t mwait_size; 3901 3902 ASSERT(cpuid_checkpass(cpu, 2)); 3903 3904 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3905 if (mwait_size == 0) 3906 return (NULL); 3907 3908 /* 3909 * kmem_alloc() returns cache line size aligned data for mwait_size 3910 * allocations. mwait_size is currently cache line sized. Neither 3911 * of these implementation details are guarantied to be true in the 3912 * future. 3913 * 3914 * First try allocating mwait_size as kmem_alloc() currently returns 3915 * correctly aligned memory. If kmem_alloc() does not return 3916 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3917 * 3918 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3919 * decide to free this memory. 3920 */ 3921 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3922 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3923 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3924 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3925 *ret = MWAIT_RUNNING; 3926 return (ret); 3927 } else { 3928 kmem_free(ret, mwait_size); 3929 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3930 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3931 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3932 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3933 *ret = MWAIT_RUNNING; 3934 return (ret); 3935 } 3936 } 3937 3938 void 3939 cpuid_mwait_free(cpu_t *cpu) 3940 { 3941 ASSERT(cpuid_checkpass(cpu, 2)); 3942 3943 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3944 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3945 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3946 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3947 } 3948 3949 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3950 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3951 } 3952 3953 void 3954 patch_tsc_read(int flag) 3955 { 3956 size_t cnt; 3957 switch (flag) { 3958 case X86_NO_TSC: 3959 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 3960 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 3961 break; 3962 case X86_HAVE_TSCP: 3963 cnt = &_tscp_end - &_tscp_start; 3964 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 3965 break; 3966 case X86_TSC_MFENCE: 3967 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 3968 (void) memcpy((void *)tsc_read, 3969 (void *)&_tsc_mfence_start, cnt); 3970 break; 3971 case X86_TSC_LFENCE: 3972 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 3973 (void) memcpy((void *)tsc_read, 3974 (void *)&_tsc_lfence_start, cnt); 3975 break; 3976 default: 3977 break; 3978 } 3979 } 3980 3981 #endif /* !__xpv */ 3982