1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Various routines to handle identification 30 * and classification of x86 processors. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/archsystm.h> 35 #include <sys/x86_archext.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 #include <sys/cmn_err.h> 39 #include <sys/sunddi.h> 40 #include <sys/sunndi.h> 41 #include <sys/cpuvar.h> 42 #include <sys/processor.h> 43 #include <sys/sysmacros.h> 44 #include <sys/pg.h> 45 #include <sys/fp.h> 46 #include <sys/controlregs.h> 47 #include <sys/auxv_386.h> 48 #include <sys/bitmap.h> 49 #include <sys/memnode.h> 50 51 /* 52 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 53 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 54 * them accordingly. For most modern processors, feature detection occurs here 55 * in pass 1. 56 * 57 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 58 * for the boot CPU and does the basic analysis that the early kernel needs. 59 * x86_feature is set based on the return value of cpuid_pass1() of the boot 60 * CPU. 61 * 62 * Pass 1 includes: 63 * 64 * o Determining vendor/model/family/stepping and setting x86_type and 65 * x86_vendor accordingly. 66 * o Processing the feature flags returned by the cpuid instruction while 67 * applying any workarounds or tricks for the specific processor. 68 * o Mapping the feature flags into Solaris feature bits (X86_*). 69 * o Processing extended feature flags if supported by the processor, 70 * again while applying specific processor knowledge. 71 * o Determining the CMT characteristics of the system. 72 * 73 * Pass 1 is done on non-boot CPUs during their initialization and the results 74 * are used only as a meager attempt at ensuring that all processors within the 75 * system support the same features. 76 * 77 * Pass 2 of cpuid feature analysis happens just at the beginning 78 * of startup(). It just copies in and corrects the remainder 79 * of the cpuid data we depend on: standard cpuid functions that we didn't 80 * need for pass1 feature analysis, and extended cpuid functions beyond the 81 * simple feature processing done in pass1. 82 * 83 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 84 * particular kernel memory allocation has been made available. It creates a 85 * readable brand string based on the data collected in the first two passes. 86 * 87 * Pass 4 of cpuid analysis is invoked after post_startup() when all 88 * the support infrastructure for various hardware features has been 89 * initialized. It determines which processor features will be reported 90 * to userland via the aux vector. 91 * 92 * All passes are executed on all CPUs, but only the boot CPU determines what 93 * features the kernel will use. 94 * 95 * Much of the worst junk in this file is for the support of processors 96 * that didn't really implement the cpuid instruction properly. 97 * 98 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 99 * the pass numbers. Accordingly, changes to the pass code may require changes 100 * to the accessor code. 101 */ 102 103 uint_t x86_feature = 0; 104 uint_t x86_vendor = X86_VENDOR_IntelClone; 105 uint_t x86_type = X86_TYPE_OTHER; 106 107 uint_t pentiumpro_bug4046376; 108 uint_t pentiumpro_bug4064495; 109 110 uint_t enable486; 111 112 /* 113 * This set of strings are for processors rumored to support the cpuid 114 * instruction, and is used by locore.s to figure out how to set x86_vendor 115 */ 116 const char CyrixInstead[] = "CyrixInstead"; 117 118 /* 119 * monitor/mwait info. 120 * 121 * size_actual and buf_actual are the real address and size allocated to get 122 * proper mwait_buf alignement. buf_actual and size_actual should be passed 123 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 124 * processor cache-line alignment, but this is not guarantied in the furture. 125 */ 126 struct mwait_info { 127 size_t mon_min; /* min size to avoid missed wakeups */ 128 size_t mon_max; /* size to avoid false wakeups */ 129 size_t size_actual; /* size actually allocated */ 130 void *buf_actual; /* memory actually allocated */ 131 uint32_t support; /* processor support of monitor/mwait */ 132 }; 133 134 /* 135 * These constants determine how many of the elements of the 136 * cpuid we cache in the cpuid_info data structure; the 137 * remaining elements are accessible via the cpuid instruction. 138 */ 139 140 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 141 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 142 143 struct cpuid_info { 144 uint_t cpi_pass; /* last pass completed */ 145 /* 146 * standard function information 147 */ 148 uint_t cpi_maxeax; /* fn 0: %eax */ 149 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 150 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 151 152 uint_t cpi_family; /* fn 1: extended family */ 153 uint_t cpi_model; /* fn 1: extended model */ 154 uint_t cpi_step; /* fn 1: stepping */ 155 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 156 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 157 int cpi_clogid; /* fn 1: %ebx: thread # */ 158 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 159 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 160 uint_t cpi_ncache; /* fn 2: number of elements */ 161 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 162 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 163 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 164 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 165 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 166 /* 167 * extended function information 168 */ 169 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 170 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 171 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 172 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 173 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 174 id_t cpi_coreid; /* same coreid => strands share core */ 175 int cpi_pkgcoreid; /* core number within single package */ 176 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 177 /* Intel: fn 4: %eax[31-26] */ 178 /* 179 * supported feature information 180 */ 181 uint32_t cpi_support[5]; 182 #define STD_EDX_FEATURES 0 183 #define AMD_EDX_FEATURES 1 184 #define TM_EDX_FEATURES 2 185 #define STD_ECX_FEATURES 3 186 #define AMD_ECX_FEATURES 4 187 /* 188 * Synthesized information, where known. 189 */ 190 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 191 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 192 uint32_t cpi_socket; /* Chip package/socket type */ 193 194 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 195 }; 196 197 198 static struct cpuid_info cpuid_info0; 199 200 /* 201 * These bit fields are defined by the Intel Application Note AP-485 202 * "Intel Processor Identification and the CPUID Instruction" 203 */ 204 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 205 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 206 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 207 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 208 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 209 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 210 211 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 212 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 213 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 214 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 215 216 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 217 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 218 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 219 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 220 221 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 222 #define CPI_XMAXEAX_MAX 0x80000100 223 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 224 225 /* 226 * Function 4 (Deterministic Cache Parameters) macros 227 * Defined by Intel Application Note AP-485 228 */ 229 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 230 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 231 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 232 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 233 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 234 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 235 236 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 237 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 238 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 239 240 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 241 242 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 243 244 245 /* 246 * A couple of shorthand macros to identify "later" P6-family chips 247 * like the Pentium M and Core. First, the "older" P6-based stuff 248 * (loosely defined as "pre-Pentium-4"): 249 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 250 */ 251 252 #define IS_LEGACY_P6(cpi) ( \ 253 cpi->cpi_family == 6 && \ 254 (cpi->cpi_model == 1 || \ 255 cpi->cpi_model == 3 || \ 256 cpi->cpi_model == 5 || \ 257 cpi->cpi_model == 6 || \ 258 cpi->cpi_model == 7 || \ 259 cpi->cpi_model == 8 || \ 260 cpi->cpi_model == 0xA || \ 261 cpi->cpi_model == 0xB) \ 262 ) 263 264 /* A "new F6" is everything with family 6 that's not the above */ 265 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 266 267 /* Extended family/model support */ 268 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 269 cpi->cpi_family >= 0xf) 270 271 /* 272 * AMD family 0xf and family 0x10 socket types. 273 * First index : 274 * 0 for family 0xf, revs B thru E 275 * 1 for family 0xf, revs F and G 276 * 2 for family 0x10, rev B 277 * Second index by (model & 0x3) 278 */ 279 static uint32_t amd_skts[3][4] = { 280 /* 281 * Family 0xf revisions B through E 282 */ 283 #define A_SKTS_0 0 284 { 285 X86_SOCKET_754, /* 0b00 */ 286 X86_SOCKET_940, /* 0b01 */ 287 X86_SOCKET_754, /* 0b10 */ 288 X86_SOCKET_939 /* 0b11 */ 289 }, 290 /* 291 * Family 0xf revisions F and G 292 */ 293 #define A_SKTS_1 1 294 { 295 X86_SOCKET_S1g1, /* 0b00 */ 296 X86_SOCKET_F1207, /* 0b01 */ 297 X86_SOCKET_UNKNOWN, /* 0b10 */ 298 X86_SOCKET_AM2 /* 0b11 */ 299 }, 300 /* 301 * Family 0x10 revisions A and B 302 * It is not clear whether, as new sockets release, that 303 * model & 0x3 will id socket for this family 304 */ 305 #define A_SKTS_2 2 306 { 307 X86_SOCKET_F1207, /* 0b00 */ 308 X86_SOCKET_F1207, /* 0b01 */ 309 X86_SOCKET_F1207, /* 0b10 */ 310 X86_SOCKET_F1207, /* 0b11 */ 311 } 312 }; 313 314 /* 315 * Table for mapping AMD Family 0xf and AMD Family 0x10 model/stepping 316 * combination to chip "revision" and socket type. 317 * 318 * The first member of this array that matches a given family, extended model 319 * plus model range, and stepping range will be considered a match. 320 */ 321 static const struct amd_rev_mapent { 322 uint_t rm_family; 323 uint_t rm_modello; 324 uint_t rm_modelhi; 325 uint_t rm_steplo; 326 uint_t rm_stephi; 327 uint32_t rm_chiprev; 328 const char *rm_chiprevstr; 329 int rm_sktidx; 330 } amd_revmap[] = { 331 /* 332 * =============== AuthenticAMD Family 0xf =============== 333 */ 334 335 /* 336 * Rev B includes model 0x4 stepping 0 and model 0x5 stepping 0 and 1. 337 */ 338 { 0xf, 0x04, 0x04, 0x0, 0x0, X86_CHIPREV_AMD_F_REV_B, "B", A_SKTS_0 }, 339 { 0xf, 0x05, 0x05, 0x0, 0x1, X86_CHIPREV_AMD_F_REV_B, "B", A_SKTS_0 }, 340 /* 341 * Rev C0 includes model 0x4 stepping 8 and model 0x5 stepping 8 342 */ 343 { 0xf, 0x04, 0x05, 0x8, 0x8, X86_CHIPREV_AMD_F_REV_C0, "C0", A_SKTS_0 }, 344 /* 345 * Rev CG is the rest of extended model 0x0 - i.e., everything 346 * but the rev B and C0 combinations covered above. 347 */ 348 { 0xf, 0x00, 0x0f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_CG, "CG", A_SKTS_0 }, 349 /* 350 * Rev D has extended model 0x1. 351 */ 352 { 0xf, 0x10, 0x1f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_D, "D", A_SKTS_0 }, 353 /* 354 * Rev E has extended model 0x2. 355 * Extended model 0x3 is unused but available to grow into. 356 */ 357 { 0xf, 0x20, 0x3f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_E, "E", A_SKTS_0 }, 358 /* 359 * Rev F has extended models 0x4 and 0x5. 360 */ 361 { 0xf, 0x40, 0x5f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_F, "F", A_SKTS_1 }, 362 /* 363 * Rev G has extended model 0x6. 364 */ 365 { 0xf, 0x60, 0x6f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_G, "G", A_SKTS_1 }, 366 367 /* 368 * =============== AuthenticAMD Family 0x10 =============== 369 */ 370 371 /* 372 * Rev A has model 0 and stepping 0/1/2 for DR-{A0,A1,A2}. 373 * Give all of model 0 stepping range to rev A. 374 */ 375 { 0x10, 0x00, 0x00, 0x0, 0x2, X86_CHIPREV_AMD_10_REV_A, "A", A_SKTS_2 }, 376 377 /* 378 * Rev B has model 2 and steppings 0/1/0xa/2 for DR-{B0,B1,BA,B2}. 379 * Give all of model 2 stepping range to rev B. 380 */ 381 { 0x10, 0x02, 0x02, 0x0, 0xf, X86_CHIPREV_AMD_10_REV_B, "B", A_SKTS_2 }, 382 }; 383 384 /* 385 * Info for monitor/mwait idle loop. 386 * 387 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 388 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 389 * 2006. 390 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 391 * Documentation Updates" #33633, Rev 2.05, December 2006. 392 */ 393 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 394 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 395 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 396 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 397 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 398 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 399 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 400 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 401 /* 402 * Number of sub-cstates for a given c-state. 403 */ 404 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 405 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 406 407 static void 408 synth_amd_info(struct cpuid_info *cpi) 409 { 410 const struct amd_rev_mapent *rmp; 411 uint_t family, model, step; 412 int i; 413 414 /* 415 * Currently only AMD family 0xf and family 0x10 use these fields. 416 */ 417 if (cpi->cpi_family != 0xf && cpi->cpi_family != 0x10) 418 return; 419 420 family = cpi->cpi_family; 421 model = cpi->cpi_model; 422 step = cpi->cpi_step; 423 424 for (i = 0, rmp = amd_revmap; i < sizeof (amd_revmap) / sizeof (*rmp); 425 i++, rmp++) { 426 if (family == rmp->rm_family && 427 model >= rmp->rm_modello && model <= rmp->rm_modelhi && 428 step >= rmp->rm_steplo && step <= rmp->rm_stephi) { 429 cpi->cpi_chiprev = rmp->rm_chiprev; 430 cpi->cpi_chiprevstr = rmp->rm_chiprevstr; 431 cpi->cpi_socket = amd_skts[rmp->rm_sktidx][model & 0x3]; 432 return; 433 } 434 } 435 } 436 437 static void 438 synth_info(struct cpuid_info *cpi) 439 { 440 cpi->cpi_chiprev = X86_CHIPREV_UNKNOWN; 441 cpi->cpi_chiprevstr = "Unknown"; 442 cpi->cpi_socket = X86_SOCKET_UNKNOWN; 443 444 switch (cpi->cpi_vendor) { 445 case X86_VENDOR_AMD: 446 synth_amd_info(cpi); 447 break; 448 449 default: 450 break; 451 452 } 453 } 454 455 /* 456 * Apply up various platform-dependent restrictions where the 457 * underlying platform restrictions mean the CPU can be marked 458 * as less capable than its cpuid instruction would imply. 459 */ 460 #if defined(__xpv) 461 static void 462 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 463 { 464 switch (eax) { 465 case 1: 466 cp->cp_edx &= 467 ~(CPUID_INTC_EDX_PSE | 468 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 469 CPUID_INTC_EDX_MCA | /* XXPV true on dom0? */ 470 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 471 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 472 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 473 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 474 break; 475 476 case 0x80000001: 477 cp->cp_edx &= 478 ~(CPUID_AMD_EDX_PSE | 479 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 480 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 481 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 482 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 483 CPUID_AMD_EDX_TSCP); 484 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 485 break; 486 default: 487 break; 488 } 489 490 switch (vendor) { 491 case X86_VENDOR_Intel: 492 switch (eax) { 493 case 4: 494 /* 495 * Zero out the (ncores-per-chip - 1) field 496 */ 497 cp->cp_eax &= 0x03fffffff; 498 break; 499 default: 500 break; 501 } 502 break; 503 case X86_VENDOR_AMD: 504 switch (eax) { 505 case 0x80000008: 506 /* 507 * Zero out the (ncores-per-chip - 1) field 508 */ 509 cp->cp_ecx &= 0xffffff00; 510 break; 511 default: 512 break; 513 } 514 break; 515 default: 516 break; 517 } 518 } 519 #else 520 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 521 #endif 522 523 /* 524 * Some undocumented ways of patching the results of the cpuid 525 * instruction to permit running Solaris 10 on future cpus that 526 * we don't currently support. Could be set to non-zero values 527 * via settings in eeprom. 528 */ 529 530 uint32_t cpuid_feature_ecx_include; 531 uint32_t cpuid_feature_ecx_exclude; 532 uint32_t cpuid_feature_edx_include; 533 uint32_t cpuid_feature_edx_exclude; 534 535 void 536 cpuid_alloc_space(cpu_t *cpu) 537 { 538 /* 539 * By convention, cpu0 is the boot cpu, which is set up 540 * before memory allocation is available. All other cpus get 541 * their cpuid_info struct allocated here. 542 */ 543 ASSERT(cpu->cpu_id != 0); 544 cpu->cpu_m.mcpu_cpi = 545 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 546 } 547 548 void 549 cpuid_free_space(cpu_t *cpu) 550 { 551 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 552 int i; 553 554 ASSERT(cpu->cpu_id != 0); 555 556 /* 557 * Free up any function 4 related dynamic storage 558 */ 559 for (i = 1; i < cpi->cpi_std_4_size; i++) 560 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 561 if (cpi->cpi_std_4_size > 0) 562 kmem_free(cpi->cpi_std_4, 563 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 564 565 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 566 } 567 568 #if !defined(__xpv) 569 570 static void 571 check_for_hvm() 572 { 573 struct cpuid_regs cp; 574 char *xen_str; 575 uint32_t xen_signature[4]; 576 extern int xpv_is_hvm; 577 578 /* 579 * In a fully virtualized domain, Xen's pseudo-cpuid function 580 * 0x40000000 returns a string representing the Xen signature in 581 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 582 * function. 583 */ 584 cp.cp_eax = 0x40000000; 585 (void) __cpuid_insn(&cp); 586 xen_signature[0] = cp.cp_ebx; 587 xen_signature[1] = cp.cp_ecx; 588 xen_signature[2] = cp.cp_edx; 589 xen_signature[3] = 0; 590 xen_str = (char *)xen_signature; 591 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) 592 xpv_is_hvm = 1; 593 } 594 #endif /* __xpv */ 595 596 uint_t 597 cpuid_pass1(cpu_t *cpu) 598 { 599 uint32_t mask_ecx, mask_edx; 600 uint_t feature = X86_CPUID; 601 struct cpuid_info *cpi; 602 struct cpuid_regs *cp; 603 int xcpuid; 604 #if !defined(__xpv) 605 extern int idle_cpu_prefer_mwait; 606 #endif 607 608 /* 609 * Space statically allocated for cpu0, ensure pointer is set 610 */ 611 if (cpu->cpu_id == 0) 612 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 613 cpi = cpu->cpu_m.mcpu_cpi; 614 ASSERT(cpi != NULL); 615 cp = &cpi->cpi_std[0]; 616 cp->cp_eax = 0; 617 cpi->cpi_maxeax = __cpuid_insn(cp); 618 { 619 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 620 *iptr++ = cp->cp_ebx; 621 *iptr++ = cp->cp_edx; 622 *iptr++ = cp->cp_ecx; 623 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 624 } 625 626 /* 627 * Map the vendor string to a type code 628 */ 629 if (strcmp(cpi->cpi_vendorstr, "GenuineIntel") == 0) 630 cpi->cpi_vendor = X86_VENDOR_Intel; 631 else if (strcmp(cpi->cpi_vendorstr, "AuthenticAMD") == 0) 632 cpi->cpi_vendor = X86_VENDOR_AMD; 633 else if (strcmp(cpi->cpi_vendorstr, "GenuineTMx86") == 0) 634 cpi->cpi_vendor = X86_VENDOR_TM; 635 else if (strcmp(cpi->cpi_vendorstr, CyrixInstead) == 0) 636 /* 637 * CyrixInstead is a variable used by the Cyrix detection code 638 * in locore. 639 */ 640 cpi->cpi_vendor = X86_VENDOR_Cyrix; 641 else if (strcmp(cpi->cpi_vendorstr, "UMC UMC UMC ") == 0) 642 cpi->cpi_vendor = X86_VENDOR_UMC; 643 else if (strcmp(cpi->cpi_vendorstr, "NexGenDriven") == 0) 644 cpi->cpi_vendor = X86_VENDOR_NexGen; 645 else if (strcmp(cpi->cpi_vendorstr, "CentaurHauls") == 0) 646 cpi->cpi_vendor = X86_VENDOR_Centaur; 647 else if (strcmp(cpi->cpi_vendorstr, "RiseRiseRise") == 0) 648 cpi->cpi_vendor = X86_VENDOR_Rise; 649 else if (strcmp(cpi->cpi_vendorstr, "SiS SiS SiS ") == 0) 650 cpi->cpi_vendor = X86_VENDOR_SiS; 651 else if (strcmp(cpi->cpi_vendorstr, "Geode by NSC") == 0) 652 cpi->cpi_vendor = X86_VENDOR_NSC; 653 else 654 cpi->cpi_vendor = X86_VENDOR_IntelClone; 655 656 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 657 658 /* 659 * Limit the range in case of weird hardware 660 */ 661 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 662 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 663 if (cpi->cpi_maxeax < 1) 664 goto pass1_done; 665 666 cp = &cpi->cpi_std[1]; 667 cp->cp_eax = 1; 668 (void) __cpuid_insn(cp); 669 670 /* 671 * Extract identifying constants for easy access. 672 */ 673 cpi->cpi_model = CPI_MODEL(cpi); 674 cpi->cpi_family = CPI_FAMILY(cpi); 675 676 if (cpi->cpi_family == 0xf) 677 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 678 679 /* 680 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 681 * Intel, and presumably everyone else, uses model == 0xf, as 682 * one would expect (max value means possible overflow). Sigh. 683 */ 684 685 switch (cpi->cpi_vendor) { 686 case X86_VENDOR_Intel: 687 if (IS_EXTENDED_MODEL_INTEL(cpi)) 688 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 689 break; 690 case X86_VENDOR_AMD: 691 if (CPI_FAMILY(cpi) == 0xf) 692 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 693 break; 694 default: 695 if (cpi->cpi_model == 0xf) 696 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 697 break; 698 } 699 700 cpi->cpi_step = CPI_STEP(cpi); 701 cpi->cpi_brandid = CPI_BRANDID(cpi); 702 703 /* 704 * *default* assumptions: 705 * - believe %edx feature word 706 * - ignore %ecx feature word 707 * - 32-bit virtual and physical addressing 708 */ 709 mask_edx = 0xffffffff; 710 mask_ecx = 0; 711 712 cpi->cpi_pabits = cpi->cpi_vabits = 32; 713 714 switch (cpi->cpi_vendor) { 715 case X86_VENDOR_Intel: 716 if (cpi->cpi_family == 5) 717 x86_type = X86_TYPE_P5; 718 else if (IS_LEGACY_P6(cpi)) { 719 x86_type = X86_TYPE_P6; 720 pentiumpro_bug4046376 = 1; 721 pentiumpro_bug4064495 = 1; 722 /* 723 * Clear the SEP bit when it was set erroneously 724 */ 725 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 726 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 727 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 728 x86_type = X86_TYPE_P4; 729 /* 730 * We don't currently depend on any of the %ecx 731 * features until Prescott, so we'll only check 732 * this from P4 onwards. We might want to revisit 733 * that idea later. 734 */ 735 mask_ecx = 0xffffffff; 736 } else if (cpi->cpi_family > 0xf) 737 mask_ecx = 0xffffffff; 738 /* 739 * We don't support MONITOR/MWAIT if leaf 5 is not available 740 * to obtain the monitor linesize. 741 */ 742 if (cpi->cpi_maxeax < 5) 743 mask_ecx &= ~CPUID_INTC_ECX_MON; 744 break; 745 case X86_VENDOR_IntelClone: 746 default: 747 break; 748 case X86_VENDOR_AMD: 749 #if defined(OPTERON_ERRATUM_108) 750 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 751 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 752 cpi->cpi_model = 0xc; 753 } else 754 #endif 755 if (cpi->cpi_family == 5) { 756 /* 757 * AMD K5 and K6 758 * 759 * These CPUs have an incomplete implementation 760 * of MCA/MCE which we mask away. 761 */ 762 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 763 764 /* 765 * Model 0 uses the wrong (APIC) bit 766 * to indicate PGE. Fix it here. 767 */ 768 if (cpi->cpi_model == 0) { 769 if (cp->cp_edx & 0x200) { 770 cp->cp_edx &= ~0x200; 771 cp->cp_edx |= CPUID_INTC_EDX_PGE; 772 } 773 } 774 775 /* 776 * Early models had problems w/ MMX; disable. 777 */ 778 if (cpi->cpi_model < 6) 779 mask_edx &= ~CPUID_INTC_EDX_MMX; 780 } 781 782 /* 783 * For newer families, SSE3 and CX16, at least, are valid; 784 * enable all 785 */ 786 if (cpi->cpi_family >= 0xf) 787 mask_ecx = 0xffffffff; 788 /* 789 * We don't support MONITOR/MWAIT if leaf 5 is not available 790 * to obtain the monitor linesize. 791 */ 792 if (cpi->cpi_maxeax < 5) 793 mask_ecx &= ~CPUID_INTC_ECX_MON; 794 795 #if !defined(__xpv) 796 /* 797 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 798 * processors. AMD does not intend MWAIT to be used in the cpu 799 * idle loop on current and future processors. 10h and future 800 * AMD processors use more power in MWAIT than HLT. 801 * Pre-family-10h Opterons do not have the MWAIT instruction. 802 */ 803 idle_cpu_prefer_mwait = 0; 804 #endif 805 806 break; 807 case X86_VENDOR_TM: 808 /* 809 * workaround the NT workaround in CMS 4.1 810 */ 811 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 812 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 813 cp->cp_edx |= CPUID_INTC_EDX_CX8; 814 break; 815 case X86_VENDOR_Centaur: 816 /* 817 * workaround the NT workarounds again 818 */ 819 if (cpi->cpi_family == 6) 820 cp->cp_edx |= CPUID_INTC_EDX_CX8; 821 break; 822 case X86_VENDOR_Cyrix: 823 /* 824 * We rely heavily on the probing in locore 825 * to actually figure out what parts, if any, 826 * of the Cyrix cpuid instruction to believe. 827 */ 828 switch (x86_type) { 829 case X86_TYPE_CYRIX_486: 830 mask_edx = 0; 831 break; 832 case X86_TYPE_CYRIX_6x86: 833 mask_edx = 0; 834 break; 835 case X86_TYPE_CYRIX_6x86L: 836 mask_edx = 837 CPUID_INTC_EDX_DE | 838 CPUID_INTC_EDX_CX8; 839 break; 840 case X86_TYPE_CYRIX_6x86MX: 841 mask_edx = 842 CPUID_INTC_EDX_DE | 843 CPUID_INTC_EDX_MSR | 844 CPUID_INTC_EDX_CX8 | 845 CPUID_INTC_EDX_PGE | 846 CPUID_INTC_EDX_CMOV | 847 CPUID_INTC_EDX_MMX; 848 break; 849 case X86_TYPE_CYRIX_GXm: 850 mask_edx = 851 CPUID_INTC_EDX_MSR | 852 CPUID_INTC_EDX_CX8 | 853 CPUID_INTC_EDX_CMOV | 854 CPUID_INTC_EDX_MMX; 855 break; 856 case X86_TYPE_CYRIX_MediaGX: 857 break; 858 case X86_TYPE_CYRIX_MII: 859 case X86_TYPE_VIA_CYRIX_III: 860 mask_edx = 861 CPUID_INTC_EDX_DE | 862 CPUID_INTC_EDX_TSC | 863 CPUID_INTC_EDX_MSR | 864 CPUID_INTC_EDX_CX8 | 865 CPUID_INTC_EDX_PGE | 866 CPUID_INTC_EDX_CMOV | 867 CPUID_INTC_EDX_MMX; 868 break; 869 default: 870 break; 871 } 872 break; 873 } 874 875 #if defined(__xpv) 876 /* 877 * Do not support MONITOR/MWAIT under a hypervisor 878 */ 879 mask_ecx &= ~CPUID_INTC_ECX_MON; 880 #endif /* __xpv */ 881 882 /* 883 * Now we've figured out the masks that determine 884 * which bits we choose to believe, apply the masks 885 * to the feature words, then map the kernel's view 886 * of these feature words into its feature word. 887 */ 888 cp->cp_edx &= mask_edx; 889 cp->cp_ecx &= mask_ecx; 890 891 /* 892 * apply any platform restrictions (we don't call this 893 * immediately after __cpuid_insn here, because we need the 894 * workarounds applied above first) 895 */ 896 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 897 898 /* 899 * fold in overrides from the "eeprom" mechanism 900 */ 901 cp->cp_edx |= cpuid_feature_edx_include; 902 cp->cp_edx &= ~cpuid_feature_edx_exclude; 903 904 cp->cp_ecx |= cpuid_feature_ecx_include; 905 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 906 907 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 908 feature |= X86_LARGEPAGE; 909 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 910 feature |= X86_TSC; 911 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 912 feature |= X86_MSR; 913 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 914 feature |= X86_MTRR; 915 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 916 feature |= X86_PGE; 917 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 918 feature |= X86_CMOV; 919 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 920 feature |= X86_MMX; 921 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 922 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 923 feature |= X86_MCA; 924 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 925 feature |= X86_PAE; 926 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 927 feature |= X86_CX8; 928 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 929 feature |= X86_CX16; 930 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 931 feature |= X86_PAT; 932 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 933 feature |= X86_SEP; 934 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 935 /* 936 * In our implementation, fxsave/fxrstor 937 * are prerequisites before we'll even 938 * try and do SSE things. 939 */ 940 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 941 feature |= X86_SSE; 942 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 943 feature |= X86_SSE2; 944 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 945 feature |= X86_SSE3; 946 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 947 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 948 feature |= X86_SSSE3; 949 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 950 feature |= X86_SSE4_1; 951 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 952 feature |= X86_SSE4_2; 953 } 954 } 955 if (cp->cp_edx & CPUID_INTC_EDX_DE) 956 feature |= X86_DE; 957 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 958 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 959 feature |= X86_MWAIT; 960 } 961 962 if (feature & X86_PAE) 963 cpi->cpi_pabits = 36; 964 965 /* 966 * Hyperthreading configuration is slightly tricky on Intel 967 * and pure clones, and even trickier on AMD. 968 * 969 * (AMD chose to set the HTT bit on their CMP processors, 970 * even though they're not actually hyperthreaded. Thus it 971 * takes a bit more work to figure out what's really going 972 * on ... see the handling of the CMP_LGCY bit below) 973 */ 974 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 975 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 976 if (cpi->cpi_ncpu_per_chip > 1) 977 feature |= X86_HTT; 978 } else { 979 cpi->cpi_ncpu_per_chip = 1; 980 } 981 982 /* 983 * Work on the "extended" feature information, doing 984 * some basic initialization for cpuid_pass2() 985 */ 986 xcpuid = 0; 987 switch (cpi->cpi_vendor) { 988 case X86_VENDOR_Intel: 989 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 990 xcpuid++; 991 break; 992 case X86_VENDOR_AMD: 993 if (cpi->cpi_family > 5 || 994 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 995 xcpuid++; 996 break; 997 case X86_VENDOR_Cyrix: 998 /* 999 * Only these Cyrix CPUs are -known- to support 1000 * extended cpuid operations. 1001 */ 1002 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1003 x86_type == X86_TYPE_CYRIX_GXm) 1004 xcpuid++; 1005 break; 1006 case X86_VENDOR_Centaur: 1007 case X86_VENDOR_TM: 1008 default: 1009 xcpuid++; 1010 break; 1011 } 1012 1013 if (xcpuid) { 1014 cp = &cpi->cpi_extd[0]; 1015 cp->cp_eax = 0x80000000; 1016 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1017 } 1018 1019 if (cpi->cpi_xmaxeax & 0x80000000) { 1020 1021 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1022 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1023 1024 switch (cpi->cpi_vendor) { 1025 case X86_VENDOR_Intel: 1026 case X86_VENDOR_AMD: 1027 if (cpi->cpi_xmaxeax < 0x80000001) 1028 break; 1029 cp = &cpi->cpi_extd[1]; 1030 cp->cp_eax = 0x80000001; 1031 (void) __cpuid_insn(cp); 1032 1033 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1034 cpi->cpi_family == 5 && 1035 cpi->cpi_model == 6 && 1036 cpi->cpi_step == 6) { 1037 /* 1038 * K6 model 6 uses bit 10 to indicate SYSC 1039 * Later models use bit 11. Fix it here. 1040 */ 1041 if (cp->cp_edx & 0x400) { 1042 cp->cp_edx &= ~0x400; 1043 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1044 } 1045 } 1046 1047 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1048 1049 /* 1050 * Compute the additions to the kernel's feature word. 1051 */ 1052 if (cp->cp_edx & CPUID_AMD_EDX_NX) 1053 feature |= X86_NX; 1054 1055 #if defined(__amd64) 1056 /* 1 GB large page - enable only for 64 bit kernel */ 1057 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 1058 feature |= X86_1GPG; 1059 #endif 1060 1061 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1062 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1063 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 1064 feature |= X86_SSE4A; 1065 1066 /* 1067 * If both the HTT and CMP_LGCY bits are set, 1068 * then we're not actually HyperThreaded. Read 1069 * "AMD CPUID Specification" for more details. 1070 */ 1071 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1072 (feature & X86_HTT) && 1073 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1074 feature &= ~X86_HTT; 1075 feature |= X86_CMP; 1076 } 1077 #if defined(__amd64) 1078 /* 1079 * It's really tricky to support syscall/sysret in 1080 * the i386 kernel; we rely on sysenter/sysexit 1081 * instead. In the amd64 kernel, things are -way- 1082 * better. 1083 */ 1084 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 1085 feature |= X86_ASYSC; 1086 1087 /* 1088 * While we're thinking about system calls, note 1089 * that AMD processors don't support sysenter 1090 * in long mode at all, so don't try to program them. 1091 */ 1092 if (x86_vendor == X86_VENDOR_AMD) 1093 feature &= ~X86_SEP; 1094 #endif 1095 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1096 feature |= X86_TSCP; 1097 break; 1098 default: 1099 break; 1100 } 1101 1102 /* 1103 * Get CPUID data about processor cores and hyperthreads. 1104 */ 1105 switch (cpi->cpi_vendor) { 1106 case X86_VENDOR_Intel: 1107 if (cpi->cpi_maxeax >= 4) { 1108 cp = &cpi->cpi_std[4]; 1109 cp->cp_eax = 4; 1110 cp->cp_ecx = 0; 1111 (void) __cpuid_insn(cp); 1112 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1113 } 1114 /*FALLTHROUGH*/ 1115 case X86_VENDOR_AMD: 1116 if (cpi->cpi_xmaxeax < 0x80000008) 1117 break; 1118 cp = &cpi->cpi_extd[8]; 1119 cp->cp_eax = 0x80000008; 1120 (void) __cpuid_insn(cp); 1121 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1122 1123 /* 1124 * Virtual and physical address limits from 1125 * cpuid override previously guessed values. 1126 */ 1127 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1128 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1129 break; 1130 default: 1131 break; 1132 } 1133 1134 /* 1135 * Derive the number of cores per chip 1136 */ 1137 switch (cpi->cpi_vendor) { 1138 case X86_VENDOR_Intel: 1139 if (cpi->cpi_maxeax < 4) { 1140 cpi->cpi_ncore_per_chip = 1; 1141 break; 1142 } else { 1143 cpi->cpi_ncore_per_chip = 1144 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1145 } 1146 break; 1147 case X86_VENDOR_AMD: 1148 if (cpi->cpi_xmaxeax < 0x80000008) { 1149 cpi->cpi_ncore_per_chip = 1; 1150 break; 1151 } else { 1152 /* 1153 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is 1154 * 1 less than the number of physical cores on 1155 * the chip. In family 0x10 this value can 1156 * be affected by "downcoring" - it reflects 1157 * 1 less than the number of cores actually 1158 * enabled on this node. 1159 */ 1160 cpi->cpi_ncore_per_chip = 1161 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1162 } 1163 break; 1164 default: 1165 cpi->cpi_ncore_per_chip = 1; 1166 break; 1167 } 1168 } else { 1169 cpi->cpi_ncore_per_chip = 1; 1170 } 1171 1172 /* 1173 * If more than one core, then this processor is CMP. 1174 */ 1175 if (cpi->cpi_ncore_per_chip > 1) 1176 feature |= X86_CMP; 1177 1178 /* 1179 * If the number of cores is the same as the number 1180 * of CPUs, then we cannot have HyperThreading. 1181 */ 1182 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1183 feature &= ~X86_HTT; 1184 1185 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1186 /* 1187 * Single-core single-threaded processors. 1188 */ 1189 cpi->cpi_chipid = -1; 1190 cpi->cpi_clogid = 0; 1191 cpi->cpi_coreid = cpu->cpu_id; 1192 cpi->cpi_pkgcoreid = 0; 1193 } else if (cpi->cpi_ncpu_per_chip > 1) { 1194 uint_t i; 1195 uint_t chipid_shift = 0; 1196 uint_t coreid_shift = 0; 1197 uint_t apic_id = CPI_APIC_ID(cpi); 1198 1199 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1200 chipid_shift++; 1201 cpi->cpi_chipid = apic_id >> chipid_shift; 1202 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1203 1204 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1205 if (feature & X86_CMP) { 1206 /* 1207 * Multi-core (and possibly multi-threaded) 1208 * processors. 1209 */ 1210 uint_t ncpu_per_core; 1211 if (cpi->cpi_ncore_per_chip == 1) 1212 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1213 else if (cpi->cpi_ncore_per_chip > 1) 1214 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1215 cpi->cpi_ncore_per_chip; 1216 /* 1217 * 8bit APIC IDs on dual core Pentiums 1218 * look like this: 1219 * 1220 * +-----------------------+------+------+ 1221 * | Physical Package ID | MC | HT | 1222 * +-----------------------+------+------+ 1223 * <------- chipid --------> 1224 * <------- coreid ---------------> 1225 * <--- clogid --> 1226 * <------> 1227 * pkgcoreid 1228 * 1229 * Where the number of bits necessary to 1230 * represent MC and HT fields together equals 1231 * to the minimum number of bits necessary to 1232 * store the value of cpi->cpi_ncpu_per_chip. 1233 * Of those bits, the MC part uses the number 1234 * of bits necessary to store the value of 1235 * cpi->cpi_ncore_per_chip. 1236 */ 1237 for (i = 1; i < ncpu_per_core; i <<= 1) 1238 coreid_shift++; 1239 cpi->cpi_coreid = apic_id >> coreid_shift; 1240 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1241 coreid_shift; 1242 } else if (feature & X86_HTT) { 1243 /* 1244 * Single-core multi-threaded processors. 1245 */ 1246 cpi->cpi_coreid = cpi->cpi_chipid; 1247 cpi->cpi_pkgcoreid = 0; 1248 } 1249 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1250 /* 1251 * AMD CMP chips currently have a single thread per 1252 * core, with 2 cores on family 0xf and 2, 3 or 4 1253 * cores on family 0x10. 1254 * 1255 * Since no two cpus share a core we must assign a 1256 * distinct coreid per cpu, and we do this by using 1257 * the cpu_id. This scheme does not, however, 1258 * guarantee that sibling cores of a chip will have 1259 * sequential coreids starting at a multiple of the 1260 * number of cores per chip - that is usually the 1261 * case, but if the ACPI MADT table is presented 1262 * in a different order then we need to perform a 1263 * few more gymnastics for the pkgcoreid. 1264 * 1265 * In family 0xf CMPs there are 2 cores on all nodes 1266 * present - no mixing of single and dual core parts. 1267 * 1268 * In family 0x10 CMPs cpuid fn 2 ECX[15:12] 1269 * "ApicIdCoreIdSize[3:0]" tells us how 1270 * many least-significant bits in the ApicId 1271 * are used to represent the core number 1272 * within the node. Cores are always 1273 * numbered sequentially from 0 regardless 1274 * of how many or which are disabled, and 1275 * there seems to be no way to discover the 1276 * real core id when some are disabled. 1277 */ 1278 cpi->cpi_coreid = cpu->cpu_id; 1279 1280 if (cpi->cpi_family == 0x10 && 1281 cpi->cpi_xmaxeax >= 0x80000008) { 1282 int coreidsz = 1283 BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 1284 1285 cpi->cpi_pkgcoreid = 1286 apic_id & ((1 << coreidsz) - 1); 1287 } else { 1288 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 1289 } 1290 } else { 1291 /* 1292 * All other processors are currently 1293 * assumed to have single cores. 1294 */ 1295 cpi->cpi_coreid = cpi->cpi_chipid; 1296 cpi->cpi_pkgcoreid = 0; 1297 } 1298 } 1299 1300 /* 1301 * Synthesize chip "revision" and socket type 1302 */ 1303 synth_info(cpi); 1304 1305 pass1_done: 1306 #if !defined(__xpv) 1307 check_for_hvm(); 1308 #endif 1309 cpi->cpi_pass = 1; 1310 return (feature); 1311 } 1312 1313 /* 1314 * Make copies of the cpuid table entries we depend on, in 1315 * part for ease of parsing now, in part so that we have only 1316 * one place to correct any of it, in part for ease of 1317 * later export to userland, and in part so we can look at 1318 * this stuff in a crash dump. 1319 */ 1320 1321 /*ARGSUSED*/ 1322 void 1323 cpuid_pass2(cpu_t *cpu) 1324 { 1325 uint_t n, nmax; 1326 int i; 1327 struct cpuid_regs *cp; 1328 uint8_t *dp; 1329 uint32_t *iptr; 1330 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1331 1332 ASSERT(cpi->cpi_pass == 1); 1333 1334 if (cpi->cpi_maxeax < 1) 1335 goto pass2_done; 1336 1337 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1338 nmax = NMAX_CPI_STD; 1339 /* 1340 * (We already handled n == 0 and n == 1 in pass 1) 1341 */ 1342 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1343 cp->cp_eax = n; 1344 1345 /* 1346 * CPUID function 4 expects %ecx to be initialized 1347 * with an index which indicates which cache to return 1348 * information about. The OS is expected to call function 4 1349 * with %ecx set to 0, 1, 2, ... until it returns with 1350 * EAX[4:0] set to 0, which indicates there are no more 1351 * caches. 1352 * 1353 * Here, populate cpi_std[4] with the information returned by 1354 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1355 * when dynamic memory allocation becomes available. 1356 * 1357 * Note: we need to explicitly initialize %ecx here, since 1358 * function 4 may have been previously invoked. 1359 */ 1360 if (n == 4) 1361 cp->cp_ecx = 0; 1362 1363 (void) __cpuid_insn(cp); 1364 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1365 switch (n) { 1366 case 2: 1367 /* 1368 * "the lower 8 bits of the %eax register 1369 * contain a value that identifies the number 1370 * of times the cpuid [instruction] has to be 1371 * executed to obtain a complete image of the 1372 * processor's caching systems." 1373 * 1374 * How *do* they make this stuff up? 1375 */ 1376 cpi->cpi_ncache = sizeof (*cp) * 1377 BITX(cp->cp_eax, 7, 0); 1378 if (cpi->cpi_ncache == 0) 1379 break; 1380 cpi->cpi_ncache--; /* skip count byte */ 1381 1382 /* 1383 * Well, for now, rather than attempt to implement 1384 * this slightly dubious algorithm, we just look 1385 * at the first 15 .. 1386 */ 1387 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1388 cpi->cpi_ncache = sizeof (*cp) - 1; 1389 1390 dp = cpi->cpi_cacheinfo; 1391 if (BITX(cp->cp_eax, 31, 31) == 0) { 1392 uint8_t *p = (void *)&cp->cp_eax; 1393 for (i = 1; i < 4; i++) 1394 if (p[i] != 0) 1395 *dp++ = p[i]; 1396 } 1397 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1398 uint8_t *p = (void *)&cp->cp_ebx; 1399 for (i = 0; i < 4; i++) 1400 if (p[i] != 0) 1401 *dp++ = p[i]; 1402 } 1403 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1404 uint8_t *p = (void *)&cp->cp_ecx; 1405 for (i = 0; i < 4; i++) 1406 if (p[i] != 0) 1407 *dp++ = p[i]; 1408 } 1409 if (BITX(cp->cp_edx, 31, 31) == 0) { 1410 uint8_t *p = (void *)&cp->cp_edx; 1411 for (i = 0; i < 4; i++) 1412 if (p[i] != 0) 1413 *dp++ = p[i]; 1414 } 1415 break; 1416 1417 case 3: /* Processor serial number, if PSN supported */ 1418 break; 1419 1420 case 4: /* Deterministic cache parameters */ 1421 break; 1422 1423 case 5: /* Monitor/Mwait parameters */ 1424 { 1425 size_t mwait_size; 1426 1427 /* 1428 * check cpi_mwait.support which was set in cpuid_pass1 1429 */ 1430 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1431 break; 1432 1433 /* 1434 * Protect ourself from insane mwait line size. 1435 * Workaround for incomplete hardware emulator(s). 1436 */ 1437 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1438 if (mwait_size < sizeof (uint32_t) || 1439 !ISP2(mwait_size)) { 1440 #if DEBUG 1441 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1442 "size %ld", 1443 cpu->cpu_id, (long)mwait_size); 1444 #endif 1445 break; 1446 } 1447 1448 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1449 cpi->cpi_mwait.mon_max = mwait_size; 1450 if (MWAIT_EXTENSION(cpi)) { 1451 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1452 if (MWAIT_INT_ENABLE(cpi)) 1453 cpi->cpi_mwait.support |= 1454 MWAIT_ECX_INT_ENABLE; 1455 } 1456 break; 1457 } 1458 default: 1459 break; 1460 } 1461 } 1462 1463 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1464 goto pass2_done; 1465 1466 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1467 nmax = NMAX_CPI_EXTD; 1468 /* 1469 * Copy the extended properties, fixing them as we go. 1470 * (We already handled n == 0 and n == 1 in pass 1) 1471 */ 1472 iptr = (void *)cpi->cpi_brandstr; 1473 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1474 cp->cp_eax = 0x80000000 + n; 1475 (void) __cpuid_insn(cp); 1476 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1477 switch (n) { 1478 case 2: 1479 case 3: 1480 case 4: 1481 /* 1482 * Extract the brand string 1483 */ 1484 *iptr++ = cp->cp_eax; 1485 *iptr++ = cp->cp_ebx; 1486 *iptr++ = cp->cp_ecx; 1487 *iptr++ = cp->cp_edx; 1488 break; 1489 case 5: 1490 switch (cpi->cpi_vendor) { 1491 case X86_VENDOR_AMD: 1492 /* 1493 * The Athlon and Duron were the first 1494 * parts to report the sizes of the 1495 * TLB for large pages. Before then, 1496 * we don't trust the data. 1497 */ 1498 if (cpi->cpi_family < 6 || 1499 (cpi->cpi_family == 6 && 1500 cpi->cpi_model < 1)) 1501 cp->cp_eax = 0; 1502 break; 1503 default: 1504 break; 1505 } 1506 break; 1507 case 6: 1508 switch (cpi->cpi_vendor) { 1509 case X86_VENDOR_AMD: 1510 /* 1511 * The Athlon and Duron were the first 1512 * AMD parts with L2 TLB's. 1513 * Before then, don't trust the data. 1514 */ 1515 if (cpi->cpi_family < 6 || 1516 cpi->cpi_family == 6 && 1517 cpi->cpi_model < 1) 1518 cp->cp_eax = cp->cp_ebx = 0; 1519 /* 1520 * AMD Duron rev A0 reports L2 1521 * cache size incorrectly as 1K 1522 * when it is really 64K 1523 */ 1524 if (cpi->cpi_family == 6 && 1525 cpi->cpi_model == 3 && 1526 cpi->cpi_step == 0) { 1527 cp->cp_ecx &= 0xffff; 1528 cp->cp_ecx |= 0x400000; 1529 } 1530 break; 1531 case X86_VENDOR_Cyrix: /* VIA C3 */ 1532 /* 1533 * VIA C3 processors are a bit messed 1534 * up w.r.t. encoding cache sizes in %ecx 1535 */ 1536 if (cpi->cpi_family != 6) 1537 break; 1538 /* 1539 * model 7 and 8 were incorrectly encoded 1540 * 1541 * xxx is model 8 really broken? 1542 */ 1543 if (cpi->cpi_model == 7 || 1544 cpi->cpi_model == 8) 1545 cp->cp_ecx = 1546 BITX(cp->cp_ecx, 31, 24) << 16 | 1547 BITX(cp->cp_ecx, 23, 16) << 12 | 1548 BITX(cp->cp_ecx, 15, 8) << 8 | 1549 BITX(cp->cp_ecx, 7, 0); 1550 /* 1551 * model 9 stepping 1 has wrong associativity 1552 */ 1553 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1554 cp->cp_ecx |= 8 << 12; 1555 break; 1556 case X86_VENDOR_Intel: 1557 /* 1558 * Extended L2 Cache features function. 1559 * First appeared on Prescott. 1560 */ 1561 default: 1562 break; 1563 } 1564 break; 1565 default: 1566 break; 1567 } 1568 } 1569 1570 pass2_done: 1571 cpi->cpi_pass = 2; 1572 } 1573 1574 static const char * 1575 intel_cpubrand(const struct cpuid_info *cpi) 1576 { 1577 int i; 1578 1579 if ((x86_feature & X86_CPUID) == 0 || 1580 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1581 return ("i486"); 1582 1583 switch (cpi->cpi_family) { 1584 case 5: 1585 return ("Intel Pentium(r)"); 1586 case 6: 1587 switch (cpi->cpi_model) { 1588 uint_t celeron, xeon; 1589 const struct cpuid_regs *cp; 1590 case 0: 1591 case 1: 1592 case 2: 1593 return ("Intel Pentium(r) Pro"); 1594 case 3: 1595 case 4: 1596 return ("Intel Pentium(r) II"); 1597 case 6: 1598 return ("Intel Celeron(r)"); 1599 case 5: 1600 case 7: 1601 celeron = xeon = 0; 1602 cp = &cpi->cpi_std[2]; /* cache info */ 1603 1604 for (i = 1; i < 4; i++) { 1605 uint_t tmp; 1606 1607 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1608 if (tmp == 0x40) 1609 celeron++; 1610 if (tmp >= 0x44 && tmp <= 0x45) 1611 xeon++; 1612 } 1613 1614 for (i = 0; i < 2; i++) { 1615 uint_t tmp; 1616 1617 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1618 if (tmp == 0x40) 1619 celeron++; 1620 else if (tmp >= 0x44 && tmp <= 0x45) 1621 xeon++; 1622 } 1623 1624 for (i = 0; i < 4; i++) { 1625 uint_t tmp; 1626 1627 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1628 if (tmp == 0x40) 1629 celeron++; 1630 else if (tmp >= 0x44 && tmp <= 0x45) 1631 xeon++; 1632 } 1633 1634 for (i = 0; i < 4; i++) { 1635 uint_t tmp; 1636 1637 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1638 if (tmp == 0x40) 1639 celeron++; 1640 else if (tmp >= 0x44 && tmp <= 0x45) 1641 xeon++; 1642 } 1643 1644 if (celeron) 1645 return ("Intel Celeron(r)"); 1646 if (xeon) 1647 return (cpi->cpi_model == 5 ? 1648 "Intel Pentium(r) II Xeon(tm)" : 1649 "Intel Pentium(r) III Xeon(tm)"); 1650 return (cpi->cpi_model == 5 ? 1651 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1652 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1653 default: 1654 break; 1655 } 1656 default: 1657 break; 1658 } 1659 1660 /* BrandID is present if the field is nonzero */ 1661 if (cpi->cpi_brandid != 0) { 1662 static const struct { 1663 uint_t bt_bid; 1664 const char *bt_str; 1665 } brand_tbl[] = { 1666 { 0x1, "Intel(r) Celeron(r)" }, 1667 { 0x2, "Intel(r) Pentium(r) III" }, 1668 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1669 { 0x4, "Intel(r) Pentium(r) III" }, 1670 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1671 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1672 { 0x8, "Intel(r) Pentium(r) 4" }, 1673 { 0x9, "Intel(r) Pentium(r) 4" }, 1674 { 0xa, "Intel(r) Celeron(r)" }, 1675 { 0xb, "Intel(r) Xeon(tm)" }, 1676 { 0xc, "Intel(r) Xeon(tm) MP" }, 1677 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1678 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1679 { 0x11, "Mobile Genuine Intel(r)" }, 1680 { 0x12, "Intel(r) Celeron(r) M" }, 1681 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1682 { 0x14, "Intel(r) Celeron(r)" }, 1683 { 0x15, "Mobile Genuine Intel(r)" }, 1684 { 0x16, "Intel(r) Pentium(r) M" }, 1685 { 0x17, "Mobile Intel(r) Celeron(r)" } 1686 }; 1687 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1688 uint_t sgn; 1689 1690 sgn = (cpi->cpi_family << 8) | 1691 (cpi->cpi_model << 4) | cpi->cpi_step; 1692 1693 for (i = 0; i < btblmax; i++) 1694 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1695 break; 1696 if (i < btblmax) { 1697 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1698 return ("Intel(r) Celeron(r)"); 1699 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1700 return ("Intel(r) Xeon(tm) MP"); 1701 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1702 return ("Intel(r) Xeon(tm)"); 1703 return (brand_tbl[i].bt_str); 1704 } 1705 } 1706 1707 return (NULL); 1708 } 1709 1710 static const char * 1711 amd_cpubrand(const struct cpuid_info *cpi) 1712 { 1713 if ((x86_feature & X86_CPUID) == 0 || 1714 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1715 return ("i486 compatible"); 1716 1717 switch (cpi->cpi_family) { 1718 case 5: 1719 switch (cpi->cpi_model) { 1720 case 0: 1721 case 1: 1722 case 2: 1723 case 3: 1724 case 4: 1725 case 5: 1726 return ("AMD-K5(r)"); 1727 case 6: 1728 case 7: 1729 return ("AMD-K6(r)"); 1730 case 8: 1731 return ("AMD-K6(r)-2"); 1732 case 9: 1733 return ("AMD-K6(r)-III"); 1734 default: 1735 return ("AMD (family 5)"); 1736 } 1737 case 6: 1738 switch (cpi->cpi_model) { 1739 case 1: 1740 return ("AMD-K7(tm)"); 1741 case 0: 1742 case 2: 1743 case 4: 1744 return ("AMD Athlon(tm)"); 1745 case 3: 1746 case 7: 1747 return ("AMD Duron(tm)"); 1748 case 6: 1749 case 8: 1750 case 10: 1751 /* 1752 * Use the L2 cache size to distinguish 1753 */ 1754 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1755 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1756 default: 1757 return ("AMD (family 6)"); 1758 } 1759 default: 1760 break; 1761 } 1762 1763 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1764 cpi->cpi_brandid != 0) { 1765 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1766 case 3: 1767 return ("AMD Opteron(tm) UP 1xx"); 1768 case 4: 1769 return ("AMD Opteron(tm) DP 2xx"); 1770 case 5: 1771 return ("AMD Opteron(tm) MP 8xx"); 1772 default: 1773 return ("AMD Opteron(tm)"); 1774 } 1775 } 1776 1777 return (NULL); 1778 } 1779 1780 static const char * 1781 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1782 { 1783 if ((x86_feature & X86_CPUID) == 0 || 1784 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1785 type == X86_TYPE_CYRIX_486) 1786 return ("i486 compatible"); 1787 1788 switch (type) { 1789 case X86_TYPE_CYRIX_6x86: 1790 return ("Cyrix 6x86"); 1791 case X86_TYPE_CYRIX_6x86L: 1792 return ("Cyrix 6x86L"); 1793 case X86_TYPE_CYRIX_6x86MX: 1794 return ("Cyrix 6x86MX"); 1795 case X86_TYPE_CYRIX_GXm: 1796 return ("Cyrix GXm"); 1797 case X86_TYPE_CYRIX_MediaGX: 1798 return ("Cyrix MediaGX"); 1799 case X86_TYPE_CYRIX_MII: 1800 return ("Cyrix M2"); 1801 case X86_TYPE_VIA_CYRIX_III: 1802 return ("VIA Cyrix M3"); 1803 default: 1804 /* 1805 * Have another wild guess .. 1806 */ 1807 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1808 return ("Cyrix 5x86"); 1809 else if (cpi->cpi_family == 5) { 1810 switch (cpi->cpi_model) { 1811 case 2: 1812 return ("Cyrix 6x86"); /* Cyrix M1 */ 1813 case 4: 1814 return ("Cyrix MediaGX"); 1815 default: 1816 break; 1817 } 1818 } else if (cpi->cpi_family == 6) { 1819 switch (cpi->cpi_model) { 1820 case 0: 1821 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1822 case 5: 1823 case 6: 1824 case 7: 1825 case 8: 1826 case 9: 1827 return ("VIA C3"); 1828 default: 1829 break; 1830 } 1831 } 1832 break; 1833 } 1834 return (NULL); 1835 } 1836 1837 /* 1838 * This only gets called in the case that the CPU extended 1839 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1840 * aren't available, or contain null bytes for some reason. 1841 */ 1842 static void 1843 fabricate_brandstr(struct cpuid_info *cpi) 1844 { 1845 const char *brand = NULL; 1846 1847 switch (cpi->cpi_vendor) { 1848 case X86_VENDOR_Intel: 1849 brand = intel_cpubrand(cpi); 1850 break; 1851 case X86_VENDOR_AMD: 1852 brand = amd_cpubrand(cpi); 1853 break; 1854 case X86_VENDOR_Cyrix: 1855 brand = cyrix_cpubrand(cpi, x86_type); 1856 break; 1857 case X86_VENDOR_NexGen: 1858 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1859 brand = "NexGen Nx586"; 1860 break; 1861 case X86_VENDOR_Centaur: 1862 if (cpi->cpi_family == 5) 1863 switch (cpi->cpi_model) { 1864 case 4: 1865 brand = "Centaur C6"; 1866 break; 1867 case 8: 1868 brand = "Centaur C2"; 1869 break; 1870 case 9: 1871 brand = "Centaur C3"; 1872 break; 1873 default: 1874 break; 1875 } 1876 break; 1877 case X86_VENDOR_Rise: 1878 if (cpi->cpi_family == 5 && 1879 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1880 brand = "Rise mP6"; 1881 break; 1882 case X86_VENDOR_SiS: 1883 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1884 brand = "SiS 55x"; 1885 break; 1886 case X86_VENDOR_TM: 1887 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1888 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1889 break; 1890 case X86_VENDOR_NSC: 1891 case X86_VENDOR_UMC: 1892 default: 1893 break; 1894 } 1895 if (brand) { 1896 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1897 return; 1898 } 1899 1900 /* 1901 * If all else fails ... 1902 */ 1903 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1904 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1905 cpi->cpi_model, cpi->cpi_step); 1906 } 1907 1908 /* 1909 * This routine is called just after kernel memory allocation 1910 * becomes available on cpu0, and as part of mp_startup() on 1911 * the other cpus. 1912 * 1913 * Fixup the brand string, and collect any information from cpuid 1914 * that requires dynamicically allocated storage to represent. 1915 */ 1916 /*ARGSUSED*/ 1917 void 1918 cpuid_pass3(cpu_t *cpu) 1919 { 1920 int i, max, shft, level, size; 1921 struct cpuid_regs regs; 1922 struct cpuid_regs *cp; 1923 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1924 1925 ASSERT(cpi->cpi_pass == 2); 1926 1927 /* 1928 * Function 4: Deterministic cache parameters 1929 * 1930 * Take this opportunity to detect the number of threads 1931 * sharing the last level cache, and construct a corresponding 1932 * cache id. The respective cpuid_info members are initialized 1933 * to the default case of "no last level cache sharing". 1934 */ 1935 cpi->cpi_ncpu_shr_last_cache = 1; 1936 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1937 1938 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1939 1940 /* 1941 * Find the # of elements (size) returned by fn 4, and along 1942 * the way detect last level cache sharing details. 1943 */ 1944 bzero(®s, sizeof (regs)); 1945 cp = ®s; 1946 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1947 cp->cp_eax = 4; 1948 cp->cp_ecx = i; 1949 1950 (void) __cpuid_insn(cp); 1951 1952 if (CPI_CACHE_TYPE(cp) == 0) 1953 break; 1954 level = CPI_CACHE_LVL(cp); 1955 if (level > max) { 1956 max = level; 1957 cpi->cpi_ncpu_shr_last_cache = 1958 CPI_NTHR_SHR_CACHE(cp) + 1; 1959 } 1960 } 1961 cpi->cpi_std_4_size = size = i; 1962 1963 /* 1964 * Allocate the cpi_std_4 array. The first element 1965 * references the regs for fn 4, %ecx == 0, which 1966 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1967 */ 1968 if (size > 0) { 1969 cpi->cpi_std_4 = 1970 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1971 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1972 1973 /* 1974 * Allocate storage to hold the additional regs 1975 * for function 4, %ecx == 1 .. cpi_std_4_size. 1976 * 1977 * The regs for fn 4, %ecx == 0 has already 1978 * been allocated as indicated above. 1979 */ 1980 for (i = 1; i < size; i++) { 1981 cp = cpi->cpi_std_4[i] = 1982 kmem_zalloc(sizeof (regs), KM_SLEEP); 1983 cp->cp_eax = 4; 1984 cp->cp_ecx = i; 1985 1986 (void) __cpuid_insn(cp); 1987 } 1988 } 1989 /* 1990 * Determine the number of bits needed to represent 1991 * the number of CPUs sharing the last level cache. 1992 * 1993 * Shift off that number of bits from the APIC id to 1994 * derive the cache id. 1995 */ 1996 shft = 0; 1997 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1998 shft++; 1999 cpi->cpi_last_lvl_cacheid = CPI_APIC_ID(cpi) >> shft; 2000 } 2001 2002 /* 2003 * Now fixup the brand string 2004 */ 2005 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 2006 fabricate_brandstr(cpi); 2007 } else { 2008 2009 /* 2010 * If we successfully extracted a brand string from the cpuid 2011 * instruction, clean it up by removing leading spaces and 2012 * similar junk. 2013 */ 2014 if (cpi->cpi_brandstr[0]) { 2015 size_t maxlen = sizeof (cpi->cpi_brandstr); 2016 char *src, *dst; 2017 2018 dst = src = (char *)cpi->cpi_brandstr; 2019 src[maxlen - 1] = '\0'; 2020 /* 2021 * strip leading spaces 2022 */ 2023 while (*src == ' ') 2024 src++; 2025 /* 2026 * Remove any 'Genuine' or "Authentic" prefixes 2027 */ 2028 if (strncmp(src, "Genuine ", 8) == 0) 2029 src += 8; 2030 if (strncmp(src, "Authentic ", 10) == 0) 2031 src += 10; 2032 2033 /* 2034 * Now do an in-place copy. 2035 * Map (R) to (r) and (TM) to (tm). 2036 * The era of teletypes is long gone, and there's 2037 * -really- no need to shout. 2038 */ 2039 while (*src != '\0') { 2040 if (src[0] == '(') { 2041 if (strncmp(src + 1, "R)", 2) == 0) { 2042 (void) strncpy(dst, "(r)", 3); 2043 src += 3; 2044 dst += 3; 2045 continue; 2046 } 2047 if (strncmp(src + 1, "TM)", 3) == 0) { 2048 (void) strncpy(dst, "(tm)", 4); 2049 src += 4; 2050 dst += 4; 2051 continue; 2052 } 2053 } 2054 *dst++ = *src++; 2055 } 2056 *dst = '\0'; 2057 2058 /* 2059 * Finally, remove any trailing spaces 2060 */ 2061 while (--dst > cpi->cpi_brandstr) 2062 if (*dst == ' ') 2063 *dst = '\0'; 2064 else 2065 break; 2066 } else 2067 fabricate_brandstr(cpi); 2068 } 2069 cpi->cpi_pass = 3; 2070 } 2071 2072 /* 2073 * This routine is called out of bind_hwcap() much later in the life 2074 * of the kernel (post_startup()). The job of this routine is to resolve 2075 * the hardware feature support and kernel support for those features into 2076 * what we're actually going to tell applications via the aux vector. 2077 */ 2078 uint_t 2079 cpuid_pass4(cpu_t *cpu) 2080 { 2081 struct cpuid_info *cpi; 2082 uint_t hwcap_flags = 0; 2083 2084 if (cpu == NULL) 2085 cpu = CPU; 2086 cpi = cpu->cpu_m.mcpu_cpi; 2087 2088 ASSERT(cpi->cpi_pass == 3); 2089 2090 if (cpi->cpi_maxeax >= 1) { 2091 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2092 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2093 2094 *edx = CPI_FEATURES_EDX(cpi); 2095 *ecx = CPI_FEATURES_ECX(cpi); 2096 2097 /* 2098 * [these require explicit kernel support] 2099 */ 2100 if ((x86_feature & X86_SEP) == 0) 2101 *edx &= ~CPUID_INTC_EDX_SEP; 2102 2103 if ((x86_feature & X86_SSE) == 0) 2104 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2105 if ((x86_feature & X86_SSE2) == 0) 2106 *edx &= ~CPUID_INTC_EDX_SSE2; 2107 2108 if ((x86_feature & X86_HTT) == 0) 2109 *edx &= ~CPUID_INTC_EDX_HTT; 2110 2111 if ((x86_feature & X86_SSE3) == 0) 2112 *ecx &= ~CPUID_INTC_ECX_SSE3; 2113 2114 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2115 if ((x86_feature & X86_SSSE3) == 0) 2116 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2117 if ((x86_feature & X86_SSE4_1) == 0) 2118 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2119 if ((x86_feature & X86_SSE4_2) == 0) 2120 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2121 } 2122 2123 /* 2124 * [no explicit support required beyond x87 fp context] 2125 */ 2126 if (!fpu_exists) 2127 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2128 2129 /* 2130 * Now map the supported feature vector to things that we 2131 * think userland will care about. 2132 */ 2133 if (*edx & CPUID_INTC_EDX_SEP) 2134 hwcap_flags |= AV_386_SEP; 2135 if (*edx & CPUID_INTC_EDX_SSE) 2136 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2137 if (*edx & CPUID_INTC_EDX_SSE2) 2138 hwcap_flags |= AV_386_SSE2; 2139 if (*ecx & CPUID_INTC_ECX_SSE3) 2140 hwcap_flags |= AV_386_SSE3; 2141 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2142 if (*ecx & CPUID_INTC_ECX_SSSE3) 2143 hwcap_flags |= AV_386_SSSE3; 2144 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2145 hwcap_flags |= AV_386_SSE4_1; 2146 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2147 hwcap_flags |= AV_386_SSE4_2; 2148 } 2149 if (*ecx & CPUID_INTC_ECX_POPCNT) 2150 hwcap_flags |= AV_386_POPCNT; 2151 if (*edx & CPUID_INTC_EDX_FPU) 2152 hwcap_flags |= AV_386_FPU; 2153 if (*edx & CPUID_INTC_EDX_MMX) 2154 hwcap_flags |= AV_386_MMX; 2155 2156 if (*edx & CPUID_INTC_EDX_TSC) 2157 hwcap_flags |= AV_386_TSC; 2158 if (*edx & CPUID_INTC_EDX_CX8) 2159 hwcap_flags |= AV_386_CX8; 2160 if (*edx & CPUID_INTC_EDX_CMOV) 2161 hwcap_flags |= AV_386_CMOV; 2162 if (*ecx & CPUID_INTC_ECX_MON) 2163 hwcap_flags |= AV_386_MON; 2164 if (*ecx & CPUID_INTC_ECX_CX16) 2165 hwcap_flags |= AV_386_CX16; 2166 } 2167 2168 if (x86_feature & X86_HTT) 2169 hwcap_flags |= AV_386_PAUSE; 2170 2171 if (cpi->cpi_xmaxeax < 0x80000001) 2172 goto pass4_done; 2173 2174 switch (cpi->cpi_vendor) { 2175 struct cpuid_regs cp; 2176 uint32_t *edx, *ecx; 2177 2178 case X86_VENDOR_Intel: 2179 /* 2180 * Seems like Intel duplicated what we necessary 2181 * here to make the initial crop of 64-bit OS's work. 2182 * Hopefully, those are the only "extended" bits 2183 * they'll add. 2184 */ 2185 /*FALLTHROUGH*/ 2186 2187 case X86_VENDOR_AMD: 2188 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2189 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2190 2191 *edx = CPI_FEATURES_XTD_EDX(cpi); 2192 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2193 2194 /* 2195 * [these features require explicit kernel support] 2196 */ 2197 switch (cpi->cpi_vendor) { 2198 case X86_VENDOR_Intel: 2199 if ((x86_feature & X86_TSCP) == 0) 2200 *edx &= ~CPUID_AMD_EDX_TSCP; 2201 break; 2202 2203 case X86_VENDOR_AMD: 2204 if ((x86_feature & X86_TSCP) == 0) 2205 *edx &= ~CPUID_AMD_EDX_TSCP; 2206 if ((x86_feature & X86_SSE4A) == 0) 2207 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2208 break; 2209 2210 default: 2211 break; 2212 } 2213 2214 /* 2215 * [no explicit support required beyond 2216 * x87 fp context and exception handlers] 2217 */ 2218 if (!fpu_exists) 2219 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2220 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2221 2222 if ((x86_feature & X86_NX) == 0) 2223 *edx &= ~CPUID_AMD_EDX_NX; 2224 #if !defined(__amd64) 2225 *edx &= ~CPUID_AMD_EDX_LM; 2226 #endif 2227 /* 2228 * Now map the supported feature vector to 2229 * things that we think userland will care about. 2230 */ 2231 #if defined(__amd64) 2232 if (*edx & CPUID_AMD_EDX_SYSC) 2233 hwcap_flags |= AV_386_AMD_SYSC; 2234 #endif 2235 if (*edx & CPUID_AMD_EDX_MMXamd) 2236 hwcap_flags |= AV_386_AMD_MMX; 2237 if (*edx & CPUID_AMD_EDX_3DNow) 2238 hwcap_flags |= AV_386_AMD_3DNow; 2239 if (*edx & CPUID_AMD_EDX_3DNowx) 2240 hwcap_flags |= AV_386_AMD_3DNowx; 2241 2242 switch (cpi->cpi_vendor) { 2243 case X86_VENDOR_AMD: 2244 if (*edx & CPUID_AMD_EDX_TSCP) 2245 hwcap_flags |= AV_386_TSCP; 2246 if (*ecx & CPUID_AMD_ECX_AHF64) 2247 hwcap_flags |= AV_386_AHF; 2248 if (*ecx & CPUID_AMD_ECX_SSE4A) 2249 hwcap_flags |= AV_386_AMD_SSE4A; 2250 if (*ecx & CPUID_AMD_ECX_LZCNT) 2251 hwcap_flags |= AV_386_AMD_LZCNT; 2252 break; 2253 2254 case X86_VENDOR_Intel: 2255 if (*edx & CPUID_AMD_EDX_TSCP) 2256 hwcap_flags |= AV_386_TSCP; 2257 /* 2258 * Aarrgh. 2259 * Intel uses a different bit in the same word. 2260 */ 2261 if (*ecx & CPUID_INTC_ECX_AHF64) 2262 hwcap_flags |= AV_386_AHF; 2263 break; 2264 2265 default: 2266 break; 2267 } 2268 break; 2269 2270 case X86_VENDOR_TM: 2271 cp.cp_eax = 0x80860001; 2272 (void) __cpuid_insn(&cp); 2273 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2274 break; 2275 2276 default: 2277 break; 2278 } 2279 2280 pass4_done: 2281 cpi->cpi_pass = 4; 2282 return (hwcap_flags); 2283 } 2284 2285 2286 /* 2287 * Simulate the cpuid instruction using the data we previously 2288 * captured about this CPU. We try our best to return the truth 2289 * about the hardware, independently of kernel support. 2290 */ 2291 uint32_t 2292 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2293 { 2294 struct cpuid_info *cpi; 2295 struct cpuid_regs *xcp; 2296 2297 if (cpu == NULL) 2298 cpu = CPU; 2299 cpi = cpu->cpu_m.mcpu_cpi; 2300 2301 ASSERT(cpuid_checkpass(cpu, 3)); 2302 2303 /* 2304 * CPUID data is cached in two separate places: cpi_std for standard 2305 * CPUID functions, and cpi_extd for extended CPUID functions. 2306 */ 2307 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2308 xcp = &cpi->cpi_std[cp->cp_eax]; 2309 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2310 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2311 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2312 else 2313 /* 2314 * The caller is asking for data from an input parameter which 2315 * the kernel has not cached. In this case we go fetch from 2316 * the hardware and return the data directly to the user. 2317 */ 2318 return (__cpuid_insn(cp)); 2319 2320 cp->cp_eax = xcp->cp_eax; 2321 cp->cp_ebx = xcp->cp_ebx; 2322 cp->cp_ecx = xcp->cp_ecx; 2323 cp->cp_edx = xcp->cp_edx; 2324 return (cp->cp_eax); 2325 } 2326 2327 int 2328 cpuid_checkpass(cpu_t *cpu, int pass) 2329 { 2330 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2331 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2332 } 2333 2334 int 2335 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2336 { 2337 ASSERT(cpuid_checkpass(cpu, 3)); 2338 2339 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2340 } 2341 2342 int 2343 cpuid_is_cmt(cpu_t *cpu) 2344 { 2345 if (cpu == NULL) 2346 cpu = CPU; 2347 2348 ASSERT(cpuid_checkpass(cpu, 1)); 2349 2350 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2351 } 2352 2353 /* 2354 * AMD and Intel both implement the 64-bit variant of the syscall 2355 * instruction (syscallq), so if there's -any- support for syscall, 2356 * cpuid currently says "yes, we support this". 2357 * 2358 * However, Intel decided to -not- implement the 32-bit variant of the 2359 * syscall instruction, so we provide a predicate to allow our caller 2360 * to test that subtlety here. 2361 * 2362 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2363 * even in the case where the hardware would in fact support it. 2364 */ 2365 /*ARGSUSED*/ 2366 int 2367 cpuid_syscall32_insn(cpu_t *cpu) 2368 { 2369 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2370 2371 #if !defined(__xpv) 2372 if (cpu == NULL) 2373 cpu = CPU; 2374 2375 /*CSTYLED*/ 2376 { 2377 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2378 2379 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2380 cpi->cpi_xmaxeax >= 0x80000001 && 2381 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2382 return (1); 2383 } 2384 #endif 2385 return (0); 2386 } 2387 2388 int 2389 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2390 { 2391 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2392 2393 static const char fmt[] = 2394 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2395 static const char fmt_ht[] = 2396 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2397 2398 ASSERT(cpuid_checkpass(cpu, 1)); 2399 2400 if (cpuid_is_cmt(cpu)) 2401 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2402 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2403 cpi->cpi_family, cpi->cpi_model, 2404 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2405 return (snprintf(s, n, fmt, 2406 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2407 cpi->cpi_family, cpi->cpi_model, 2408 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2409 } 2410 2411 const char * 2412 cpuid_getvendorstr(cpu_t *cpu) 2413 { 2414 ASSERT(cpuid_checkpass(cpu, 1)); 2415 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2416 } 2417 2418 uint_t 2419 cpuid_getvendor(cpu_t *cpu) 2420 { 2421 ASSERT(cpuid_checkpass(cpu, 1)); 2422 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2423 } 2424 2425 uint_t 2426 cpuid_getfamily(cpu_t *cpu) 2427 { 2428 ASSERT(cpuid_checkpass(cpu, 1)); 2429 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2430 } 2431 2432 uint_t 2433 cpuid_getmodel(cpu_t *cpu) 2434 { 2435 ASSERT(cpuid_checkpass(cpu, 1)); 2436 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2437 } 2438 2439 uint_t 2440 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2441 { 2442 ASSERT(cpuid_checkpass(cpu, 1)); 2443 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2444 } 2445 2446 uint_t 2447 cpuid_get_ncore_per_chip(cpu_t *cpu) 2448 { 2449 ASSERT(cpuid_checkpass(cpu, 1)); 2450 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2451 } 2452 2453 uint_t 2454 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2455 { 2456 ASSERT(cpuid_checkpass(cpu, 2)); 2457 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2458 } 2459 2460 id_t 2461 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2462 { 2463 ASSERT(cpuid_checkpass(cpu, 2)); 2464 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2465 } 2466 2467 uint_t 2468 cpuid_getstep(cpu_t *cpu) 2469 { 2470 ASSERT(cpuid_checkpass(cpu, 1)); 2471 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2472 } 2473 2474 uint_t 2475 cpuid_getsig(struct cpu *cpu) 2476 { 2477 ASSERT(cpuid_checkpass(cpu, 1)); 2478 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2479 } 2480 2481 uint32_t 2482 cpuid_getchiprev(struct cpu *cpu) 2483 { 2484 ASSERT(cpuid_checkpass(cpu, 1)); 2485 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2486 } 2487 2488 const char * 2489 cpuid_getchiprevstr(struct cpu *cpu) 2490 { 2491 ASSERT(cpuid_checkpass(cpu, 1)); 2492 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2493 } 2494 2495 uint32_t 2496 cpuid_getsockettype(struct cpu *cpu) 2497 { 2498 ASSERT(cpuid_checkpass(cpu, 1)); 2499 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2500 } 2501 2502 int 2503 cpuid_get_chipid(cpu_t *cpu) 2504 { 2505 ASSERT(cpuid_checkpass(cpu, 1)); 2506 2507 if (cpuid_is_cmt(cpu)) 2508 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2509 return (cpu->cpu_id); 2510 } 2511 2512 id_t 2513 cpuid_get_coreid(cpu_t *cpu) 2514 { 2515 ASSERT(cpuid_checkpass(cpu, 1)); 2516 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2517 } 2518 2519 int 2520 cpuid_get_pkgcoreid(cpu_t *cpu) 2521 { 2522 ASSERT(cpuid_checkpass(cpu, 1)); 2523 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 2524 } 2525 2526 int 2527 cpuid_get_clogid(cpu_t *cpu) 2528 { 2529 ASSERT(cpuid_checkpass(cpu, 1)); 2530 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2531 } 2532 2533 void 2534 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2535 { 2536 struct cpuid_info *cpi; 2537 2538 if (cpu == NULL) 2539 cpu = CPU; 2540 cpi = cpu->cpu_m.mcpu_cpi; 2541 2542 ASSERT(cpuid_checkpass(cpu, 1)); 2543 2544 if (pabits) 2545 *pabits = cpi->cpi_pabits; 2546 if (vabits) 2547 *vabits = cpi->cpi_vabits; 2548 } 2549 2550 /* 2551 * Returns the number of data TLB entries for a corresponding 2552 * pagesize. If it can't be computed, or isn't known, the 2553 * routine returns zero. If you ask about an architecturally 2554 * impossible pagesize, the routine will panic (so that the 2555 * hat implementor knows that things are inconsistent.) 2556 */ 2557 uint_t 2558 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2559 { 2560 struct cpuid_info *cpi; 2561 uint_t dtlb_nent = 0; 2562 2563 if (cpu == NULL) 2564 cpu = CPU; 2565 cpi = cpu->cpu_m.mcpu_cpi; 2566 2567 ASSERT(cpuid_checkpass(cpu, 1)); 2568 2569 /* 2570 * Check the L2 TLB info 2571 */ 2572 if (cpi->cpi_xmaxeax >= 0x80000006) { 2573 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2574 2575 switch (pagesize) { 2576 2577 case 4 * 1024: 2578 /* 2579 * All zero in the top 16 bits of the register 2580 * indicates a unified TLB. Size is in low 16 bits. 2581 */ 2582 if ((cp->cp_ebx & 0xffff0000) == 0) 2583 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2584 else 2585 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2586 break; 2587 2588 case 2 * 1024 * 1024: 2589 if ((cp->cp_eax & 0xffff0000) == 0) 2590 dtlb_nent = cp->cp_eax & 0x0000ffff; 2591 else 2592 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2593 break; 2594 2595 default: 2596 panic("unknown L2 pagesize"); 2597 /*NOTREACHED*/ 2598 } 2599 } 2600 2601 if (dtlb_nent != 0) 2602 return (dtlb_nent); 2603 2604 /* 2605 * No L2 TLB support for this size, try L1. 2606 */ 2607 if (cpi->cpi_xmaxeax >= 0x80000005) { 2608 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2609 2610 switch (pagesize) { 2611 case 4 * 1024: 2612 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2613 break; 2614 case 2 * 1024 * 1024: 2615 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2616 break; 2617 default: 2618 panic("unknown L1 d-TLB pagesize"); 2619 /*NOTREACHED*/ 2620 } 2621 } 2622 2623 return (dtlb_nent); 2624 } 2625 2626 /* 2627 * Return 0 if the erratum is not present or not applicable, positive 2628 * if it is, and negative if the status of the erratum is unknown. 2629 * 2630 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2631 * Processors" #25759, Rev 3.57, August 2005 2632 */ 2633 int 2634 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2635 { 2636 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2637 uint_t eax; 2638 2639 /* 2640 * Bail out if this CPU isn't an AMD CPU, or if it's 2641 * a legacy (32-bit) AMD CPU. 2642 */ 2643 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2644 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2645 cpi->cpi_family == 6) 2646 2647 return (0); 2648 2649 eax = cpi->cpi_std[1].cp_eax; 2650 2651 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2652 #define SH_B3(eax) (eax == 0xf51) 2653 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2654 2655 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2656 2657 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2658 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2659 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2660 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2661 2662 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2663 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2664 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2665 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2666 2667 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2668 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2669 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2670 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2671 #define BH_E4(eax) (eax == 0x20fb1) 2672 #define SH_E5(eax) (eax == 0x20f42) 2673 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2674 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2675 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2676 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2677 DH_E6(eax) || JH_E6(eax)) 2678 2679 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 2680 #define DR_B0(eax) (eax == 0x100f20) 2681 #define DR_B1(eax) (eax == 0x100f21) 2682 #define DR_BA(eax) (eax == 0x100f2a) 2683 #define DR_B2(eax) (eax == 0x100f22) 2684 #define DR_B3(eax) (eax == 0x100f23) 2685 #define RB_C0(eax) (eax == 0x100f40) 2686 2687 switch (erratum) { 2688 case 1: 2689 return (cpi->cpi_family < 0x10); 2690 case 51: /* what does the asterisk mean? */ 2691 return (B(eax) || SH_C0(eax) || CG(eax)); 2692 case 52: 2693 return (B(eax)); 2694 case 57: 2695 return (cpi->cpi_family <= 0x11); 2696 case 58: 2697 return (B(eax)); 2698 case 60: 2699 return (cpi->cpi_family <= 0x11); 2700 case 61: 2701 case 62: 2702 case 63: 2703 case 64: 2704 case 65: 2705 case 66: 2706 case 68: 2707 case 69: 2708 case 70: 2709 case 71: 2710 return (B(eax)); 2711 case 72: 2712 return (SH_B0(eax)); 2713 case 74: 2714 return (B(eax)); 2715 case 75: 2716 return (cpi->cpi_family < 0x10); 2717 case 76: 2718 return (B(eax)); 2719 case 77: 2720 return (cpi->cpi_family <= 0x11); 2721 case 78: 2722 return (B(eax) || SH_C0(eax)); 2723 case 79: 2724 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2725 case 80: 2726 case 81: 2727 case 82: 2728 return (B(eax)); 2729 case 83: 2730 return (B(eax) || SH_C0(eax) || CG(eax)); 2731 case 85: 2732 return (cpi->cpi_family < 0x10); 2733 case 86: 2734 return (SH_C0(eax) || CG(eax)); 2735 case 88: 2736 #if !defined(__amd64) 2737 return (0); 2738 #else 2739 return (B(eax) || SH_C0(eax)); 2740 #endif 2741 case 89: 2742 return (cpi->cpi_family < 0x10); 2743 case 90: 2744 return (B(eax) || SH_C0(eax) || CG(eax)); 2745 case 91: 2746 case 92: 2747 return (B(eax) || SH_C0(eax)); 2748 case 93: 2749 return (SH_C0(eax)); 2750 case 94: 2751 return (B(eax) || SH_C0(eax) || CG(eax)); 2752 case 95: 2753 #if !defined(__amd64) 2754 return (0); 2755 #else 2756 return (B(eax) || SH_C0(eax)); 2757 #endif 2758 case 96: 2759 return (B(eax) || SH_C0(eax) || CG(eax)); 2760 case 97: 2761 case 98: 2762 return (SH_C0(eax) || CG(eax)); 2763 case 99: 2764 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2765 case 100: 2766 return (B(eax) || SH_C0(eax)); 2767 case 101: 2768 case 103: 2769 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2770 case 104: 2771 return (SH_C0(eax) || CG(eax) || D0(eax)); 2772 case 105: 2773 case 106: 2774 case 107: 2775 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2776 case 108: 2777 return (DH_CG(eax)); 2778 case 109: 2779 return (SH_C0(eax) || CG(eax) || D0(eax)); 2780 case 110: 2781 return (D0(eax) || EX(eax)); 2782 case 111: 2783 return (CG(eax)); 2784 case 112: 2785 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2786 case 113: 2787 return (eax == 0x20fc0); 2788 case 114: 2789 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2790 case 115: 2791 return (SH_E0(eax) || JH_E1(eax)); 2792 case 116: 2793 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2794 case 117: 2795 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2796 case 118: 2797 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2798 JH_E6(eax)); 2799 case 121: 2800 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2801 case 122: 2802 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 2803 case 123: 2804 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2805 case 131: 2806 return (cpi->cpi_family < 0x10); 2807 case 6336786: 2808 /* 2809 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2810 * if this is a K8 family or newer processor 2811 */ 2812 if (CPI_FAMILY(cpi) == 0xf) { 2813 struct cpuid_regs regs; 2814 regs.cp_eax = 0x80000007; 2815 (void) __cpuid_insn(®s); 2816 return (!(regs.cp_edx & 0x100)); 2817 } 2818 return (0); 2819 case 6323525: 2820 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2821 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2822 2823 case 6671130: 2824 /* 2825 * check for processors (pre-Shanghai) that do not provide 2826 * optimal management of 1gb ptes in its tlb. 2827 */ 2828 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 2829 2830 case 298: 2831 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 2832 DR_B2(eax) || RB_C0(eax)); 2833 2834 default: 2835 return (-1); 2836 2837 } 2838 } 2839 2840 /* 2841 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 2842 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 2843 */ 2844 int 2845 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 2846 { 2847 struct cpuid_info *cpi; 2848 uint_t osvwid; 2849 static int osvwfeature = -1; 2850 uint64_t osvwlength; 2851 2852 2853 cpi = cpu->cpu_m.mcpu_cpi; 2854 2855 /* confirm OSVW supported */ 2856 if (osvwfeature == -1) { 2857 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 2858 } else { 2859 /* assert that osvw feature setting is consistent on all cpus */ 2860 ASSERT(osvwfeature == 2861 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 2862 } 2863 if (!osvwfeature) 2864 return (-1); 2865 2866 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 2867 2868 switch (erratum) { 2869 case 298: /* osvwid is 0 */ 2870 osvwid = 0; 2871 if (osvwlength <= (uint64_t)osvwid) { 2872 /* osvwid 0 is unknown */ 2873 return (-1); 2874 } 2875 2876 /* 2877 * Check the OSVW STATUS MSR to determine the state 2878 * of the erratum where: 2879 * 0 - fixed by HW 2880 * 1 - BIOS has applied the workaround when BIOS 2881 * workaround is available. (Or for other errata, 2882 * OS workaround is required.) 2883 * For a value of 1, caller will confirm that the 2884 * erratum 298 workaround has indeed been applied by BIOS. 2885 * 2886 * A 1 may be set in cpus that have a HW fix 2887 * in a mixed cpu system. Regarding erratum 298: 2888 * In a multiprocessor platform, the workaround above 2889 * should be applied to all processors regardless of 2890 * silicon revision when an affected processor is 2891 * present. 2892 */ 2893 2894 return (rdmsr(MSR_AMD_OSVW_STATUS + 2895 (osvwid / OSVW_ID_CNT_PER_MSR)) & 2896 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 2897 2898 default: 2899 return (-1); 2900 } 2901 } 2902 2903 static const char assoc_str[] = "associativity"; 2904 static const char line_str[] = "line-size"; 2905 static const char size_str[] = "size"; 2906 2907 static void 2908 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2909 uint32_t val) 2910 { 2911 char buf[128]; 2912 2913 /* 2914 * ndi_prop_update_int() is used because it is desirable for 2915 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2916 */ 2917 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2918 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2919 } 2920 2921 /* 2922 * Intel-style cache/tlb description 2923 * 2924 * Standard cpuid level 2 gives a randomly ordered 2925 * selection of tags that index into a table that describes 2926 * cache and tlb properties. 2927 */ 2928 2929 static const char l1_icache_str[] = "l1-icache"; 2930 static const char l1_dcache_str[] = "l1-dcache"; 2931 static const char l2_cache_str[] = "l2-cache"; 2932 static const char l3_cache_str[] = "l3-cache"; 2933 static const char itlb4k_str[] = "itlb-4K"; 2934 static const char dtlb4k_str[] = "dtlb-4K"; 2935 static const char itlb4M_str[] = "itlb-4M"; 2936 static const char dtlb4M_str[] = "dtlb-4M"; 2937 static const char dtlb24_str[] = "dtlb0-2M-4M"; 2938 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2939 static const char itlb24_str[] = "itlb-2M-4M"; 2940 static const char dtlb44_str[] = "dtlb-4K-4M"; 2941 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2942 static const char sl2_cache_str[] = "sectored-l2-cache"; 2943 static const char itrace_str[] = "itrace-cache"; 2944 static const char sl3_cache_str[] = "sectored-l3-cache"; 2945 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 2946 2947 static const struct cachetab { 2948 uint8_t ct_code; 2949 uint8_t ct_assoc; 2950 uint16_t ct_line_size; 2951 size_t ct_size; 2952 const char *ct_label; 2953 } intel_ctab[] = { 2954 /* maintain descending order! */ 2955 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 2956 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 2957 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 2958 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 2959 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 2960 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 2961 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 2962 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 2963 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 2964 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 2965 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 2966 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 2967 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 2968 { 0xb4, 4, 0, 256, dtlb4k_str }, 2969 { 0xb3, 4, 0, 128, dtlb4k_str }, 2970 { 0xb2, 4, 0, 64, itlb4k_str }, 2971 { 0xb0, 4, 0, 128, itlb4k_str }, 2972 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2973 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2974 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2975 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2976 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2977 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2978 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2979 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2980 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2981 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2982 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2983 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2984 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2985 { 0x73, 8, 0, 64*1024, itrace_str}, 2986 { 0x72, 8, 0, 32*1024, itrace_str}, 2987 { 0x71, 8, 0, 16*1024, itrace_str}, 2988 { 0x70, 8, 0, 12*1024, itrace_str}, 2989 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2990 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2991 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2992 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2993 { 0x5d, 0, 0, 256, dtlb44_str}, 2994 { 0x5c, 0, 0, 128, dtlb44_str}, 2995 { 0x5b, 0, 0, 64, dtlb44_str}, 2996 { 0x5a, 4, 0, 32, dtlb24_str}, 2997 { 0x55, 0, 0, 7, itlb24_str}, 2998 { 0x52, 0, 0, 256, itlb424_str}, 2999 { 0x51, 0, 0, 128, itlb424_str}, 3000 { 0x50, 0, 0, 64, itlb424_str}, 3001 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 3002 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 3003 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 3004 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 3005 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 3006 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 3007 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 3008 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 3009 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 3010 { 0x43, 4, 32, 512*1024, l2_cache_str}, 3011 { 0x42, 4, 32, 256*1024, l2_cache_str}, 3012 { 0x41, 4, 32, 128*1024, l2_cache_str}, 3013 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 3014 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 3015 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 3016 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 3017 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 3018 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 3019 { 0x30, 8, 64, 32*1024, l1_icache_str}, 3020 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 3021 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 3022 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 3023 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 3024 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 3025 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 3026 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 3027 { 0x0b, 4, 0, 4, itlb4M_str}, 3028 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 3029 { 0x08, 4, 32, 16*1024, l1_icache_str}, 3030 { 0x06, 4, 32, 8*1024, l1_icache_str}, 3031 { 0x04, 4, 0, 8, dtlb4M_str}, 3032 { 0x03, 4, 0, 64, dtlb4k_str}, 3033 { 0x02, 4, 0, 2, itlb4M_str}, 3034 { 0x01, 4, 0, 32, itlb4k_str}, 3035 { 0 } 3036 }; 3037 3038 static const struct cachetab cyrix_ctab[] = { 3039 { 0x70, 4, 0, 32, "tlb-4K" }, 3040 { 0x80, 4, 16, 16*1024, "l1-cache" }, 3041 { 0 } 3042 }; 3043 3044 /* 3045 * Search a cache table for a matching entry 3046 */ 3047 static const struct cachetab * 3048 find_cacheent(const struct cachetab *ct, uint_t code) 3049 { 3050 if (code != 0) { 3051 for (; ct->ct_code != 0; ct++) 3052 if (ct->ct_code <= code) 3053 break; 3054 if (ct->ct_code == code) 3055 return (ct); 3056 } 3057 return (NULL); 3058 } 3059 3060 /* 3061 * Populate cachetab entry with L2 or L3 cache-information using 3062 * cpuid function 4. This function is called from intel_walk_cacheinfo() 3063 * when descriptor 0x49 is encountered. It returns 0 if no such cache 3064 * information is found. 3065 */ 3066 static int 3067 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 3068 { 3069 uint32_t level, i; 3070 int ret = 0; 3071 3072 for (i = 0; i < cpi->cpi_std_4_size; i++) { 3073 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 3074 3075 if (level == 2 || level == 3) { 3076 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 3077 ct->ct_line_size = 3078 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 3079 ct->ct_size = ct->ct_assoc * 3080 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 3081 ct->ct_line_size * 3082 (cpi->cpi_std_4[i]->cp_ecx + 1); 3083 3084 if (level == 2) { 3085 ct->ct_label = l2_cache_str; 3086 } else if (level == 3) { 3087 ct->ct_label = l3_cache_str; 3088 } 3089 ret = 1; 3090 } 3091 } 3092 3093 return (ret); 3094 } 3095 3096 /* 3097 * Walk the cacheinfo descriptor, applying 'func' to every valid element 3098 * The walk is terminated if the walker returns non-zero. 3099 */ 3100 static void 3101 intel_walk_cacheinfo(struct cpuid_info *cpi, 3102 void *arg, int (*func)(void *, const struct cachetab *)) 3103 { 3104 const struct cachetab *ct; 3105 struct cachetab des_49_ct; 3106 uint8_t *dp; 3107 int i; 3108 3109 if ((dp = cpi->cpi_cacheinfo) == NULL) 3110 return; 3111 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3112 /* 3113 * For overloaded descriptor 0x49 we use cpuid function 4 3114 * if supported by the current processor, to create 3115 * cache information. 3116 */ 3117 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 3118 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 3119 ct = &des_49_ct; 3120 } else { 3121 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 3122 continue; 3123 } 3124 } 3125 3126 if (func(arg, ct) != 0) { 3127 break; 3128 } 3129 } 3130 } 3131 3132 /* 3133 * (Like the Intel one, except for Cyrix CPUs) 3134 */ 3135 static void 3136 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 3137 void *arg, int (*func)(void *, const struct cachetab *)) 3138 { 3139 const struct cachetab *ct; 3140 uint8_t *dp; 3141 int i; 3142 3143 if ((dp = cpi->cpi_cacheinfo) == NULL) 3144 return; 3145 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 3146 /* 3147 * Search Cyrix-specific descriptor table first .. 3148 */ 3149 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 3150 if (func(arg, ct) != 0) 3151 break; 3152 continue; 3153 } 3154 /* 3155 * .. else fall back to the Intel one 3156 */ 3157 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 3158 if (func(arg, ct) != 0) 3159 break; 3160 continue; 3161 } 3162 } 3163 } 3164 3165 /* 3166 * A cacheinfo walker that adds associativity, line-size, and size properties 3167 * to the devinfo node it is passed as an argument. 3168 */ 3169 static int 3170 add_cacheent_props(void *arg, const struct cachetab *ct) 3171 { 3172 dev_info_t *devi = arg; 3173 3174 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3175 if (ct->ct_line_size != 0) 3176 add_cache_prop(devi, ct->ct_label, line_str, 3177 ct->ct_line_size); 3178 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3179 return (0); 3180 } 3181 3182 3183 static const char fully_assoc[] = "fully-associative?"; 3184 3185 /* 3186 * AMD style cache/tlb description 3187 * 3188 * Extended functions 5 and 6 directly describe properties of 3189 * tlbs and various cache levels. 3190 */ 3191 static void 3192 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3193 { 3194 switch (assoc) { 3195 case 0: /* reserved; ignore */ 3196 break; 3197 default: 3198 add_cache_prop(devi, label, assoc_str, assoc); 3199 break; 3200 case 0xff: 3201 add_cache_prop(devi, label, fully_assoc, 1); 3202 break; 3203 } 3204 } 3205 3206 static void 3207 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3208 { 3209 if (size == 0) 3210 return; 3211 add_cache_prop(devi, label, size_str, size); 3212 add_amd_assoc(devi, label, assoc); 3213 } 3214 3215 static void 3216 add_amd_cache(dev_info_t *devi, const char *label, 3217 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3218 { 3219 if (size == 0 || line_size == 0) 3220 return; 3221 add_amd_assoc(devi, label, assoc); 3222 /* 3223 * Most AMD parts have a sectored cache. Multiple cache lines are 3224 * associated with each tag. A sector consists of all cache lines 3225 * associated with a tag. For example, the AMD K6-III has a sector 3226 * size of 2 cache lines per tag. 3227 */ 3228 if (lines_per_tag != 0) 3229 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3230 add_cache_prop(devi, label, line_str, line_size); 3231 add_cache_prop(devi, label, size_str, size * 1024); 3232 } 3233 3234 static void 3235 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3236 { 3237 switch (assoc) { 3238 case 0: /* off */ 3239 break; 3240 case 1: 3241 case 2: 3242 case 4: 3243 add_cache_prop(devi, label, assoc_str, assoc); 3244 break; 3245 case 6: 3246 add_cache_prop(devi, label, assoc_str, 8); 3247 break; 3248 case 8: 3249 add_cache_prop(devi, label, assoc_str, 16); 3250 break; 3251 case 0xf: 3252 add_cache_prop(devi, label, fully_assoc, 1); 3253 break; 3254 default: /* reserved; ignore */ 3255 break; 3256 } 3257 } 3258 3259 static void 3260 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3261 { 3262 if (size == 0 || assoc == 0) 3263 return; 3264 add_amd_l2_assoc(devi, label, assoc); 3265 add_cache_prop(devi, label, size_str, size); 3266 } 3267 3268 static void 3269 add_amd_l2_cache(dev_info_t *devi, const char *label, 3270 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3271 { 3272 if (size == 0 || assoc == 0 || line_size == 0) 3273 return; 3274 add_amd_l2_assoc(devi, label, assoc); 3275 if (lines_per_tag != 0) 3276 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3277 add_cache_prop(devi, label, line_str, line_size); 3278 add_cache_prop(devi, label, size_str, size * 1024); 3279 } 3280 3281 static void 3282 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3283 { 3284 struct cpuid_regs *cp; 3285 3286 if (cpi->cpi_xmaxeax < 0x80000005) 3287 return; 3288 cp = &cpi->cpi_extd[5]; 3289 3290 /* 3291 * 4M/2M L1 TLB configuration 3292 * 3293 * We report the size for 2M pages because AMD uses two 3294 * TLB entries for one 4M page. 3295 */ 3296 add_amd_tlb(devi, "dtlb-2M", 3297 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3298 add_amd_tlb(devi, "itlb-2M", 3299 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3300 3301 /* 3302 * 4K L1 TLB configuration 3303 */ 3304 3305 switch (cpi->cpi_vendor) { 3306 uint_t nentries; 3307 case X86_VENDOR_TM: 3308 if (cpi->cpi_family >= 5) { 3309 /* 3310 * Crusoe processors have 256 TLB entries, but 3311 * cpuid data format constrains them to only 3312 * reporting 255 of them. 3313 */ 3314 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3315 nentries = 256; 3316 /* 3317 * Crusoe processors also have a unified TLB 3318 */ 3319 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3320 nentries); 3321 break; 3322 } 3323 /*FALLTHROUGH*/ 3324 default: 3325 add_amd_tlb(devi, itlb4k_str, 3326 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3327 add_amd_tlb(devi, dtlb4k_str, 3328 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3329 break; 3330 } 3331 3332 /* 3333 * data L1 cache configuration 3334 */ 3335 3336 add_amd_cache(devi, l1_dcache_str, 3337 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3338 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3339 3340 /* 3341 * code L1 cache configuration 3342 */ 3343 3344 add_amd_cache(devi, l1_icache_str, 3345 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3346 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3347 3348 if (cpi->cpi_xmaxeax < 0x80000006) 3349 return; 3350 cp = &cpi->cpi_extd[6]; 3351 3352 /* Check for a unified L2 TLB for large pages */ 3353 3354 if (BITX(cp->cp_eax, 31, 16) == 0) 3355 add_amd_l2_tlb(devi, "l2-tlb-2M", 3356 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3357 else { 3358 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3359 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3360 add_amd_l2_tlb(devi, "l2-itlb-2M", 3361 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3362 } 3363 3364 /* Check for a unified L2 TLB for 4K pages */ 3365 3366 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3367 add_amd_l2_tlb(devi, "l2-tlb-4K", 3368 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3369 } else { 3370 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3371 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3372 add_amd_l2_tlb(devi, "l2-itlb-4K", 3373 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3374 } 3375 3376 add_amd_l2_cache(devi, l2_cache_str, 3377 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3378 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3379 } 3380 3381 /* 3382 * There are two basic ways that the x86 world describes it cache 3383 * and tlb architecture - Intel's way and AMD's way. 3384 * 3385 * Return which flavor of cache architecture we should use 3386 */ 3387 static int 3388 x86_which_cacheinfo(struct cpuid_info *cpi) 3389 { 3390 switch (cpi->cpi_vendor) { 3391 case X86_VENDOR_Intel: 3392 if (cpi->cpi_maxeax >= 2) 3393 return (X86_VENDOR_Intel); 3394 break; 3395 case X86_VENDOR_AMD: 3396 /* 3397 * The K5 model 1 was the first part from AMD that reported 3398 * cache sizes via extended cpuid functions. 3399 */ 3400 if (cpi->cpi_family > 5 || 3401 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3402 return (X86_VENDOR_AMD); 3403 break; 3404 case X86_VENDOR_TM: 3405 if (cpi->cpi_family >= 5) 3406 return (X86_VENDOR_AMD); 3407 /*FALLTHROUGH*/ 3408 default: 3409 /* 3410 * If they have extended CPU data for 0x80000005 3411 * then we assume they have AMD-format cache 3412 * information. 3413 * 3414 * If not, and the vendor happens to be Cyrix, 3415 * then try our-Cyrix specific handler. 3416 * 3417 * If we're not Cyrix, then assume we're using Intel's 3418 * table-driven format instead. 3419 */ 3420 if (cpi->cpi_xmaxeax >= 0x80000005) 3421 return (X86_VENDOR_AMD); 3422 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3423 return (X86_VENDOR_Cyrix); 3424 else if (cpi->cpi_maxeax >= 2) 3425 return (X86_VENDOR_Intel); 3426 break; 3427 } 3428 return (-1); 3429 } 3430 3431 /* 3432 * create a node for the given cpu under the prom root node. 3433 * Also, create a cpu node in the device tree. 3434 */ 3435 static dev_info_t *cpu_nex_devi = NULL; 3436 static kmutex_t cpu_node_lock; 3437 3438 /* 3439 * Called from post_startup() and mp_startup() 3440 */ 3441 void 3442 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3443 { 3444 dev_info_t *cpu_devi; 3445 int create; 3446 3447 mutex_enter(&cpu_node_lock); 3448 3449 /* 3450 * create a nexus node for all cpus identified as 'cpu_id' under 3451 * the root node. 3452 */ 3453 if (cpu_nex_devi == NULL) { 3454 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3455 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3456 mutex_exit(&cpu_node_lock); 3457 return; 3458 } 3459 (void) ndi_devi_online(cpu_nex_devi, 0); 3460 } 3461 3462 /* 3463 * create a child node for cpu identified as 'cpu_id' 3464 */ 3465 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3466 cpu_id); 3467 if (cpu_devi == NULL) { 3468 mutex_exit(&cpu_node_lock); 3469 return; 3470 } 3471 3472 /* device_type */ 3473 3474 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3475 "device_type", "cpu"); 3476 3477 /* reg */ 3478 3479 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3480 "reg", cpu_id); 3481 3482 /* cpu-mhz, and clock-frequency */ 3483 3484 if (cpu_freq > 0) { 3485 long long mul; 3486 3487 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3488 "cpu-mhz", cpu_freq); 3489 3490 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3491 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3492 "clock-frequency", (int)mul); 3493 } 3494 3495 (void) ndi_devi_online(cpu_devi, 0); 3496 3497 if ((x86_feature & X86_CPUID) == 0) { 3498 mutex_exit(&cpu_node_lock); 3499 return; 3500 } 3501 3502 /* vendor-id */ 3503 3504 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3505 "vendor-id", cpi->cpi_vendorstr); 3506 3507 if (cpi->cpi_maxeax == 0) { 3508 mutex_exit(&cpu_node_lock); 3509 return; 3510 } 3511 3512 /* 3513 * family, model, and step 3514 */ 3515 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3516 "family", CPI_FAMILY(cpi)); 3517 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3518 "cpu-model", CPI_MODEL(cpi)); 3519 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3520 "stepping-id", CPI_STEP(cpi)); 3521 3522 /* type */ 3523 3524 switch (cpi->cpi_vendor) { 3525 case X86_VENDOR_Intel: 3526 create = 1; 3527 break; 3528 default: 3529 create = 0; 3530 break; 3531 } 3532 if (create) 3533 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3534 "type", CPI_TYPE(cpi)); 3535 3536 /* ext-family */ 3537 3538 switch (cpi->cpi_vendor) { 3539 case X86_VENDOR_Intel: 3540 case X86_VENDOR_AMD: 3541 create = cpi->cpi_family >= 0xf; 3542 break; 3543 default: 3544 create = 0; 3545 break; 3546 } 3547 if (create) 3548 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3549 "ext-family", CPI_FAMILY_XTD(cpi)); 3550 3551 /* ext-model */ 3552 3553 switch (cpi->cpi_vendor) { 3554 case X86_VENDOR_Intel: 3555 create = IS_EXTENDED_MODEL_INTEL(cpi); 3556 break; 3557 case X86_VENDOR_AMD: 3558 create = CPI_FAMILY(cpi) == 0xf; 3559 break; 3560 default: 3561 create = 0; 3562 break; 3563 } 3564 if (create) 3565 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3566 "ext-model", CPI_MODEL_XTD(cpi)); 3567 3568 /* generation */ 3569 3570 switch (cpi->cpi_vendor) { 3571 case X86_VENDOR_AMD: 3572 /* 3573 * AMD K5 model 1 was the first part to support this 3574 */ 3575 create = cpi->cpi_xmaxeax >= 0x80000001; 3576 break; 3577 default: 3578 create = 0; 3579 break; 3580 } 3581 if (create) 3582 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3583 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3584 3585 /* brand-id */ 3586 3587 switch (cpi->cpi_vendor) { 3588 case X86_VENDOR_Intel: 3589 /* 3590 * brand id first appeared on Pentium III Xeon model 8, 3591 * and Celeron model 8 processors and Opteron 3592 */ 3593 create = cpi->cpi_family > 6 || 3594 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3595 break; 3596 case X86_VENDOR_AMD: 3597 create = cpi->cpi_family >= 0xf; 3598 break; 3599 default: 3600 create = 0; 3601 break; 3602 } 3603 if (create && cpi->cpi_brandid != 0) { 3604 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3605 "brand-id", cpi->cpi_brandid); 3606 } 3607 3608 /* chunks, and apic-id */ 3609 3610 switch (cpi->cpi_vendor) { 3611 /* 3612 * first available on Pentium IV and Opteron (K8) 3613 */ 3614 case X86_VENDOR_Intel: 3615 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3616 break; 3617 case X86_VENDOR_AMD: 3618 create = cpi->cpi_family >= 0xf; 3619 break; 3620 default: 3621 create = 0; 3622 break; 3623 } 3624 if (create) { 3625 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3626 "chunks", CPI_CHUNKS(cpi)); 3627 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3628 "apic-id", CPI_APIC_ID(cpi)); 3629 if (cpi->cpi_chipid >= 0) { 3630 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3631 "chip#", cpi->cpi_chipid); 3632 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3633 "clog#", cpi->cpi_clogid); 3634 } 3635 } 3636 3637 /* cpuid-features */ 3638 3639 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3640 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3641 3642 3643 /* cpuid-features-ecx */ 3644 3645 switch (cpi->cpi_vendor) { 3646 case X86_VENDOR_Intel: 3647 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3648 break; 3649 default: 3650 create = 0; 3651 break; 3652 } 3653 if (create) 3654 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3655 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3656 3657 /* ext-cpuid-features */ 3658 3659 switch (cpi->cpi_vendor) { 3660 case X86_VENDOR_Intel: 3661 case X86_VENDOR_AMD: 3662 case X86_VENDOR_Cyrix: 3663 case X86_VENDOR_TM: 3664 case X86_VENDOR_Centaur: 3665 create = cpi->cpi_xmaxeax >= 0x80000001; 3666 break; 3667 default: 3668 create = 0; 3669 break; 3670 } 3671 if (create) { 3672 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3673 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3674 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3675 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3676 } 3677 3678 /* 3679 * Brand String first appeared in Intel Pentium IV, AMD K5 3680 * model 1, and Cyrix GXm. On earlier models we try and 3681 * simulate something similar .. so this string should always 3682 * same -something- about the processor, however lame. 3683 */ 3684 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3685 "brand-string", cpi->cpi_brandstr); 3686 3687 /* 3688 * Finally, cache and tlb information 3689 */ 3690 switch (x86_which_cacheinfo(cpi)) { 3691 case X86_VENDOR_Intel: 3692 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3693 break; 3694 case X86_VENDOR_Cyrix: 3695 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3696 break; 3697 case X86_VENDOR_AMD: 3698 amd_cache_info(cpi, cpu_devi); 3699 break; 3700 default: 3701 break; 3702 } 3703 3704 mutex_exit(&cpu_node_lock); 3705 } 3706 3707 struct l2info { 3708 int *l2i_csz; 3709 int *l2i_lsz; 3710 int *l2i_assoc; 3711 int l2i_ret; 3712 }; 3713 3714 /* 3715 * A cacheinfo walker that fetches the size, line-size and associativity 3716 * of the L2 cache 3717 */ 3718 static int 3719 intel_l2cinfo(void *arg, const struct cachetab *ct) 3720 { 3721 struct l2info *l2i = arg; 3722 int *ip; 3723 3724 if (ct->ct_label != l2_cache_str && 3725 ct->ct_label != sl2_cache_str) 3726 return (0); /* not an L2 -- keep walking */ 3727 3728 if ((ip = l2i->l2i_csz) != NULL) 3729 *ip = ct->ct_size; 3730 if ((ip = l2i->l2i_lsz) != NULL) 3731 *ip = ct->ct_line_size; 3732 if ((ip = l2i->l2i_assoc) != NULL) 3733 *ip = ct->ct_assoc; 3734 l2i->l2i_ret = ct->ct_size; 3735 return (1); /* was an L2 -- terminate walk */ 3736 } 3737 3738 /* 3739 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3740 * 3741 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3742 * value is the associativity, the associativity for the L2 cache and 3743 * tlb is encoded in the following table. The 4 bit L2 value serves as 3744 * an index into the amd_afd[] array to determine the associativity. 3745 * -1 is undefined. 0 is fully associative. 3746 */ 3747 3748 static int amd_afd[] = 3749 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3750 3751 static void 3752 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3753 { 3754 struct cpuid_regs *cp; 3755 uint_t size, assoc; 3756 int i; 3757 int *ip; 3758 3759 if (cpi->cpi_xmaxeax < 0x80000006) 3760 return; 3761 cp = &cpi->cpi_extd[6]; 3762 3763 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3764 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3765 uint_t cachesz = size * 1024; 3766 assoc = amd_afd[i]; 3767 3768 ASSERT(assoc != -1); 3769 3770 if ((ip = l2i->l2i_csz) != NULL) 3771 *ip = cachesz; 3772 if ((ip = l2i->l2i_lsz) != NULL) 3773 *ip = BITX(cp->cp_ecx, 7, 0); 3774 if ((ip = l2i->l2i_assoc) != NULL) 3775 *ip = assoc; 3776 l2i->l2i_ret = cachesz; 3777 } 3778 } 3779 3780 int 3781 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3782 { 3783 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3784 struct l2info __l2info, *l2i = &__l2info; 3785 3786 l2i->l2i_csz = csz; 3787 l2i->l2i_lsz = lsz; 3788 l2i->l2i_assoc = assoc; 3789 l2i->l2i_ret = -1; 3790 3791 switch (x86_which_cacheinfo(cpi)) { 3792 case X86_VENDOR_Intel: 3793 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3794 break; 3795 case X86_VENDOR_Cyrix: 3796 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3797 break; 3798 case X86_VENDOR_AMD: 3799 amd_l2cacheinfo(cpi, l2i); 3800 break; 3801 default: 3802 break; 3803 } 3804 return (l2i->l2i_ret); 3805 } 3806 3807 #if !defined(__xpv) 3808 3809 uint32_t * 3810 cpuid_mwait_alloc(cpu_t *cpu) 3811 { 3812 uint32_t *ret; 3813 size_t mwait_size; 3814 3815 ASSERT(cpuid_checkpass(cpu, 2)); 3816 3817 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3818 if (mwait_size == 0) 3819 return (NULL); 3820 3821 /* 3822 * kmem_alloc() returns cache line size aligned data for mwait_size 3823 * allocations. mwait_size is currently cache line sized. Neither 3824 * of these implementation details are guarantied to be true in the 3825 * future. 3826 * 3827 * First try allocating mwait_size as kmem_alloc() currently returns 3828 * correctly aligned memory. If kmem_alloc() does not return 3829 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3830 * 3831 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3832 * decide to free this memory. 3833 */ 3834 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3835 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3836 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3837 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3838 *ret = MWAIT_RUNNING; 3839 return (ret); 3840 } else { 3841 kmem_free(ret, mwait_size); 3842 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3843 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3844 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3845 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3846 *ret = MWAIT_RUNNING; 3847 return (ret); 3848 } 3849 } 3850 3851 void 3852 cpuid_mwait_free(cpu_t *cpu) 3853 { 3854 ASSERT(cpuid_checkpass(cpu, 2)); 3855 3856 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3857 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3858 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3859 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3860 } 3861 3862 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3863 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3864 } 3865 3866 void 3867 patch_tsc_read(int flag) 3868 { 3869 size_t cnt; 3870 switch (flag) { 3871 case X86_NO_TSC: 3872 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 3873 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 3874 break; 3875 case X86_HAVE_TSCP: 3876 cnt = &_tscp_end - &_tscp_start; 3877 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 3878 break; 3879 case X86_TSC_MFENCE: 3880 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 3881 (void) memcpy((void *)tsc_read, 3882 (void *)&_tsc_mfence_start, cnt); 3883 break; 3884 case X86_TSC_LFENCE: 3885 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 3886 (void) memcpy((void *)tsc_read, 3887 (void *)&_tsc_lfence_start, cnt); 3888 break; 3889 default: 3890 break; 3891 } 3892 } 3893 3894 #endif /* !__xpv */ 3895