1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Various routines to handle identification 30 * and classification of x86 processors. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/archsystm.h> 35 #include <sys/x86_archext.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 #include <sys/cmn_err.h> 39 #include <sys/sunddi.h> 40 #include <sys/sunndi.h> 41 #include <sys/cpuvar.h> 42 #include <sys/processor.h> 43 #include <sys/sysmacros.h> 44 #include <sys/pg.h> 45 #include <sys/fp.h> 46 #include <sys/controlregs.h> 47 #include <sys/auxv_386.h> 48 #include <sys/bitmap.h> 49 #include <sys/memnode.h> 50 51 /* 52 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 53 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 54 * them accordingly. For most modern processors, feature detection occurs here 55 * in pass 1. 56 * 57 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 58 * for the boot CPU and does the basic analysis that the early kernel needs. 59 * x86_feature is set based on the return value of cpuid_pass1() of the boot 60 * CPU. 61 * 62 * Pass 1 includes: 63 * 64 * o Determining vendor/model/family/stepping and setting x86_type and 65 * x86_vendor accordingly. 66 * o Processing the feature flags returned by the cpuid instruction while 67 * applying any workarounds or tricks for the specific processor. 68 * o Mapping the feature flags into Solaris feature bits (X86_*). 69 * o Processing extended feature flags if supported by the processor, 70 * again while applying specific processor knowledge. 71 * o Determining the CMT characteristics of the system. 72 * 73 * Pass 1 is done on non-boot CPUs during their initialization and the results 74 * are used only as a meager attempt at ensuring that all processors within the 75 * system support the same features. 76 * 77 * Pass 2 of cpuid feature analysis happens just at the beginning 78 * of startup(). It just copies in and corrects the remainder 79 * of the cpuid data we depend on: standard cpuid functions that we didn't 80 * need for pass1 feature analysis, and extended cpuid functions beyond the 81 * simple feature processing done in pass1. 82 * 83 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 84 * particular kernel memory allocation has been made available. It creates a 85 * readable brand string based on the data collected in the first two passes. 86 * 87 * Pass 4 of cpuid analysis is invoked after post_startup() when all 88 * the support infrastructure for various hardware features has been 89 * initialized. It determines which processor features will be reported 90 * to userland via the aux vector. 91 * 92 * All passes are executed on all CPUs, but only the boot CPU determines what 93 * features the kernel will use. 94 * 95 * Much of the worst junk in this file is for the support of processors 96 * that didn't really implement the cpuid instruction properly. 97 * 98 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 99 * the pass numbers. Accordingly, changes to the pass code may require changes 100 * to the accessor code. 101 */ 102 103 uint_t x86_feature = 0; 104 uint_t x86_vendor = X86_VENDOR_IntelClone; 105 uint_t x86_type = X86_TYPE_OTHER; 106 107 uint_t pentiumpro_bug4046376; 108 uint_t pentiumpro_bug4064495; 109 110 uint_t enable486; 111 112 /* 113 * This set of strings are for processors rumored to support the cpuid 114 * instruction, and is used by locore.s to figure out how to set x86_vendor 115 */ 116 const char CyrixInstead[] = "CyrixInstead"; 117 118 /* 119 * monitor/mwait info. 120 * 121 * size_actual and buf_actual are the real address and size allocated to get 122 * proper mwait_buf alignement. buf_actual and size_actual should be passed 123 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 124 * processor cache-line alignment, but this is not guarantied in the furture. 125 */ 126 struct mwait_info { 127 size_t mon_min; /* min size to avoid missed wakeups */ 128 size_t mon_max; /* size to avoid false wakeups */ 129 size_t size_actual; /* size actually allocated */ 130 void *buf_actual; /* memory actually allocated */ 131 uint32_t support; /* processor support of monitor/mwait */ 132 }; 133 134 /* 135 * These constants determine how many of the elements of the 136 * cpuid we cache in the cpuid_info data structure; the 137 * remaining elements are accessible via the cpuid instruction. 138 */ 139 140 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 141 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 142 143 struct cpuid_info { 144 uint_t cpi_pass; /* last pass completed */ 145 /* 146 * standard function information 147 */ 148 uint_t cpi_maxeax; /* fn 0: %eax */ 149 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 150 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 151 152 uint_t cpi_family; /* fn 1: extended family */ 153 uint_t cpi_model; /* fn 1: extended model */ 154 uint_t cpi_step; /* fn 1: stepping */ 155 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 156 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 157 int cpi_clogid; /* fn 1: %ebx: thread # */ 158 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 159 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 160 uint_t cpi_ncache; /* fn 2: number of elements */ 161 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 162 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 163 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 164 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 165 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 166 /* 167 * extended function information 168 */ 169 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 170 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 171 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 172 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 173 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 174 id_t cpi_coreid; 175 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 176 /* Intel: fn 4: %eax[31-26] */ 177 /* 178 * supported feature information 179 */ 180 uint32_t cpi_support[5]; 181 #define STD_EDX_FEATURES 0 182 #define AMD_EDX_FEATURES 1 183 #define TM_EDX_FEATURES 2 184 #define STD_ECX_FEATURES 3 185 #define AMD_ECX_FEATURES 4 186 /* 187 * Synthesized information, where known. 188 */ 189 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 190 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 191 uint32_t cpi_socket; /* Chip package/socket type */ 192 193 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 194 }; 195 196 197 static struct cpuid_info cpuid_info0; 198 199 /* 200 * These bit fields are defined by the Intel Application Note AP-485 201 * "Intel Processor Identification and the CPUID Instruction" 202 */ 203 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 204 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 205 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 206 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 207 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 208 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 209 210 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 211 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 212 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 213 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 214 215 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 216 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 217 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 218 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 219 220 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 221 #define CPI_XMAXEAX_MAX 0x80000100 222 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 223 224 /* 225 * Function 4 (Deterministic Cache Parameters) macros 226 * Defined by Intel Application Note AP-485 227 */ 228 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 229 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 230 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 231 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 232 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 233 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 234 235 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 236 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 237 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 238 239 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 240 241 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 242 243 244 /* 245 * A couple of shorthand macros to identify "later" P6-family chips 246 * like the Pentium M and Core. First, the "older" P6-based stuff 247 * (loosely defined as "pre-Pentium-4"): 248 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 249 */ 250 251 #define IS_LEGACY_P6(cpi) ( \ 252 cpi->cpi_family == 6 && \ 253 (cpi->cpi_model == 1 || \ 254 cpi->cpi_model == 3 || \ 255 cpi->cpi_model == 5 || \ 256 cpi->cpi_model == 6 || \ 257 cpi->cpi_model == 7 || \ 258 cpi->cpi_model == 8 || \ 259 cpi->cpi_model == 0xA || \ 260 cpi->cpi_model == 0xB) \ 261 ) 262 263 /* A "new F6" is everything with family 6 that's not the above */ 264 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 265 266 /* Extended family/model support */ 267 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 268 cpi->cpi_family >= 0xf) 269 270 /* 271 * AMD family 0xf and family 0x10 socket types. 272 * First index : 273 * 0 for family 0xf, revs B thru E 274 * 1 for family 0xf, revs F and G 275 * 2 for family 0x10, rev B 276 * Second index by (model & 0x3) 277 */ 278 static uint32_t amd_skts[3][4] = { 279 { 280 X86_SOCKET_754, /* 0b00 */ 281 X86_SOCKET_940, /* 0b01 */ 282 X86_SOCKET_754, /* 0b10 */ 283 X86_SOCKET_939 /* 0b11 */ 284 }, 285 { 286 X86_SOCKET_S1g1, /* 0b00 */ 287 X86_SOCKET_F1207, /* 0b01 */ 288 X86_SOCKET_UNKNOWN, /* 0b10 */ 289 X86_SOCKET_AM2 /* 0b11 */ 290 }, 291 { 292 X86_SOCKET_F1207, /* 0b00 */ 293 X86_SOCKET_F1207, /* 0b01 */ 294 X86_SOCKET_F1207, /* 0b10 */ 295 X86_SOCKET_F1207 /* 0b11 */ 296 } 297 }; 298 299 /* 300 * Table for mapping AMD Family 0xf and AMD Family 0x10 model/stepping 301 * combination to chip "revision" and socket type. 302 * 303 * The first member of this array that matches a given family, extended model 304 * plus model range, and stepping range will be considered a match. 305 */ 306 static const struct amd_rev_mapent { 307 uint_t rm_family; 308 uint_t rm_modello; 309 uint_t rm_modelhi; 310 uint_t rm_steplo; 311 uint_t rm_stephi; 312 uint32_t rm_chiprev; 313 const char *rm_chiprevstr; 314 int rm_sktidx; 315 } amd_revmap[] = { 316 /* 317 * Rev B includes model 0x4 stepping 0 and model 0x5 stepping 0 and 1. 318 */ 319 { 0xf, 0x04, 0x04, 0x0, 0x0, X86_CHIPREV_AMD_F_REV_B, "B", 0 }, 320 { 0xf, 0x05, 0x05, 0x0, 0x1, X86_CHIPREV_AMD_F_REV_B, "B", 0 }, 321 /* 322 * Rev C0 includes model 0x4 stepping 8 and model 0x5 stepping 8 323 */ 324 { 0xf, 0x04, 0x05, 0x8, 0x8, X86_CHIPREV_AMD_F_REV_C0, "C0", 0 }, 325 /* 326 * Rev CG is the rest of extended model 0x0 - i.e., everything 327 * but the rev B and C0 combinations covered above. 328 */ 329 { 0xf, 0x00, 0x0f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_CG, "CG", 0 }, 330 /* 331 * Rev D has extended model 0x1. 332 */ 333 { 0xf, 0x10, 0x1f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_D, "D", 0 }, 334 /* 335 * Rev E has extended model 0x2. 336 * Extended model 0x3 is unused but available to grow into. 337 */ 338 { 0xf, 0x20, 0x3f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_E, "E", 0 }, 339 /* 340 * Rev F has extended models 0x4 and 0x5. 341 */ 342 { 0xf, 0x40, 0x5f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_F, "F", 1 }, 343 /* 344 * Rev G has extended model 0x6. 345 */ 346 { 0xf, 0x60, 0x6f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_G, "G", 1 }, 347 /* 348 * Family 0x10 Rev B has model 0x2. 349 */ 350 { 0x10, 0x02, 0x02, 0x0, 0xa, X86_CHIPREV_AMD_10_REV_B, "B", 2 } 351 }; 352 353 /* 354 * Info for monitor/mwait idle loop. 355 * 356 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 357 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 358 * 2006. 359 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 360 * Documentation Updates" #33633, Rev 2.05, December 2006. 361 */ 362 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 363 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 364 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 365 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 366 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 367 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 368 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 369 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 370 /* 371 * Number of sub-cstates for a given c-state. 372 */ 373 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 374 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 375 376 static void intel_cpuid_4_cache_info(void *, struct cpuid_info *); 377 378 static void 379 synth_amd_info(struct cpuid_info *cpi) 380 { 381 const struct amd_rev_mapent *rmp; 382 uint_t family, model, step; 383 int i; 384 385 /* 386 * Currently only AMD family 0xf and family 0x10 use these fields. 387 */ 388 if (cpi->cpi_family != 0xf && cpi->cpi_family != 0x10) 389 return; 390 391 family = cpi->cpi_family; 392 model = cpi->cpi_model; 393 step = cpi->cpi_step; 394 395 for (i = 0, rmp = amd_revmap; i < sizeof (amd_revmap) / sizeof (*rmp); 396 i++, rmp++) { 397 if (family == rmp->rm_family && 398 model >= rmp->rm_modello && model <= rmp->rm_modelhi && 399 step >= rmp->rm_steplo && step <= rmp->rm_stephi) { 400 cpi->cpi_chiprev = rmp->rm_chiprev; 401 cpi->cpi_chiprevstr = rmp->rm_chiprevstr; 402 cpi->cpi_socket = amd_skts[rmp->rm_sktidx][model & 0x3]; 403 return; 404 } 405 } 406 } 407 408 static void 409 synth_info(struct cpuid_info *cpi) 410 { 411 cpi->cpi_chiprev = X86_CHIPREV_UNKNOWN; 412 cpi->cpi_chiprevstr = "Unknown"; 413 cpi->cpi_socket = X86_SOCKET_UNKNOWN; 414 415 switch (cpi->cpi_vendor) { 416 case X86_VENDOR_AMD: 417 synth_amd_info(cpi); 418 break; 419 420 default: 421 break; 422 423 } 424 } 425 426 /* 427 * Apply up various platform-dependent restrictions where the 428 * underlying platform restrictions mean the CPU can be marked 429 * as less capable than its cpuid instruction would imply. 430 */ 431 #if defined(__xpv) 432 static void 433 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 434 { 435 switch (eax) { 436 case 1: 437 cp->cp_edx &= 438 ~(CPUID_INTC_EDX_PSE | 439 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 440 CPUID_INTC_EDX_MCA | /* XXPV true on dom0? */ 441 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 442 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 443 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 444 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 445 break; 446 447 case 0x80000001: 448 cp->cp_edx &= 449 ~(CPUID_AMD_EDX_PSE | 450 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 451 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 452 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 453 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 454 CPUID_AMD_EDX_TSCP); 455 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 456 break; 457 default: 458 break; 459 } 460 461 switch (vendor) { 462 case X86_VENDOR_Intel: 463 switch (eax) { 464 case 4: 465 /* 466 * Zero out the (ncores-per-chip - 1) field 467 */ 468 cp->cp_eax &= 0x03fffffff; 469 break; 470 default: 471 break; 472 } 473 break; 474 case X86_VENDOR_AMD: 475 switch (eax) { 476 case 0x80000008: 477 /* 478 * Zero out the (ncores-per-chip - 1) field 479 */ 480 cp->cp_ecx &= 0xffffff00; 481 break; 482 default: 483 break; 484 } 485 break; 486 default: 487 break; 488 } 489 } 490 #else 491 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 492 #endif 493 494 /* 495 * Some undocumented ways of patching the results of the cpuid 496 * instruction to permit running Solaris 10 on future cpus that 497 * we don't currently support. Could be set to non-zero values 498 * via settings in eeprom. 499 */ 500 501 uint32_t cpuid_feature_ecx_include; 502 uint32_t cpuid_feature_ecx_exclude; 503 uint32_t cpuid_feature_edx_include; 504 uint32_t cpuid_feature_edx_exclude; 505 506 void 507 cpuid_alloc_space(cpu_t *cpu) 508 { 509 /* 510 * By convention, cpu0 is the boot cpu, which is set up 511 * before memory allocation is available. All other cpus get 512 * their cpuid_info struct allocated here. 513 */ 514 ASSERT(cpu->cpu_id != 0); 515 cpu->cpu_m.mcpu_cpi = 516 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 517 } 518 519 void 520 cpuid_free_space(cpu_t *cpu) 521 { 522 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 523 int i; 524 525 ASSERT(cpu->cpu_id != 0); 526 527 /* 528 * Free up any function 4 related dynamic storage 529 */ 530 for (i = 1; i < cpi->cpi_std_4_size; i++) 531 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 532 if (cpi->cpi_std_4_size > 0) 533 kmem_free(cpi->cpi_std_4, 534 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 535 536 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 537 } 538 539 uint_t 540 cpuid_pass1(cpu_t *cpu) 541 { 542 uint32_t mask_ecx, mask_edx; 543 uint_t feature = X86_CPUID; 544 struct cpuid_info *cpi; 545 struct cpuid_regs *cp; 546 int xcpuid; 547 #if !defined(__xpv) 548 extern int idle_cpu_prefer_mwait; 549 #endif 550 551 /* 552 * Space statically allocated for cpu0, ensure pointer is set 553 */ 554 if (cpu->cpu_id == 0) 555 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 556 cpi = cpu->cpu_m.mcpu_cpi; 557 ASSERT(cpi != NULL); 558 cp = &cpi->cpi_std[0]; 559 cp->cp_eax = 0; 560 cpi->cpi_maxeax = __cpuid_insn(cp); 561 { 562 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 563 *iptr++ = cp->cp_ebx; 564 *iptr++ = cp->cp_edx; 565 *iptr++ = cp->cp_ecx; 566 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 567 } 568 569 /* 570 * Map the vendor string to a type code 571 */ 572 if (strcmp(cpi->cpi_vendorstr, "GenuineIntel") == 0) 573 cpi->cpi_vendor = X86_VENDOR_Intel; 574 else if (strcmp(cpi->cpi_vendorstr, "AuthenticAMD") == 0) 575 cpi->cpi_vendor = X86_VENDOR_AMD; 576 else if (strcmp(cpi->cpi_vendorstr, "GenuineTMx86") == 0) 577 cpi->cpi_vendor = X86_VENDOR_TM; 578 else if (strcmp(cpi->cpi_vendorstr, CyrixInstead) == 0) 579 /* 580 * CyrixInstead is a variable used by the Cyrix detection code 581 * in locore. 582 */ 583 cpi->cpi_vendor = X86_VENDOR_Cyrix; 584 else if (strcmp(cpi->cpi_vendorstr, "UMC UMC UMC ") == 0) 585 cpi->cpi_vendor = X86_VENDOR_UMC; 586 else if (strcmp(cpi->cpi_vendorstr, "NexGenDriven") == 0) 587 cpi->cpi_vendor = X86_VENDOR_NexGen; 588 else if (strcmp(cpi->cpi_vendorstr, "CentaurHauls") == 0) 589 cpi->cpi_vendor = X86_VENDOR_Centaur; 590 else if (strcmp(cpi->cpi_vendorstr, "RiseRiseRise") == 0) 591 cpi->cpi_vendor = X86_VENDOR_Rise; 592 else if (strcmp(cpi->cpi_vendorstr, "SiS SiS SiS ") == 0) 593 cpi->cpi_vendor = X86_VENDOR_SiS; 594 else if (strcmp(cpi->cpi_vendorstr, "Geode by NSC") == 0) 595 cpi->cpi_vendor = X86_VENDOR_NSC; 596 else 597 cpi->cpi_vendor = X86_VENDOR_IntelClone; 598 599 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 600 601 /* 602 * Limit the range in case of weird hardware 603 */ 604 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 605 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 606 if (cpi->cpi_maxeax < 1) 607 goto pass1_done; 608 609 cp = &cpi->cpi_std[1]; 610 cp->cp_eax = 1; 611 (void) __cpuid_insn(cp); 612 613 /* 614 * Extract identifying constants for easy access. 615 */ 616 cpi->cpi_model = CPI_MODEL(cpi); 617 cpi->cpi_family = CPI_FAMILY(cpi); 618 619 if (cpi->cpi_family == 0xf) 620 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 621 622 /* 623 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 624 * Intel, and presumably everyone else, uses model == 0xf, as 625 * one would expect (max value means possible overflow). Sigh. 626 */ 627 628 switch (cpi->cpi_vendor) { 629 case X86_VENDOR_Intel: 630 if (IS_EXTENDED_MODEL_INTEL(cpi)) 631 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 632 break; 633 case X86_VENDOR_AMD: 634 if (CPI_FAMILY(cpi) == 0xf) 635 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 636 break; 637 default: 638 if (cpi->cpi_model == 0xf) 639 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 640 break; 641 } 642 643 cpi->cpi_step = CPI_STEP(cpi); 644 cpi->cpi_brandid = CPI_BRANDID(cpi); 645 646 /* 647 * *default* assumptions: 648 * - believe %edx feature word 649 * - ignore %ecx feature word 650 * - 32-bit virtual and physical addressing 651 */ 652 mask_edx = 0xffffffff; 653 mask_ecx = 0; 654 655 cpi->cpi_pabits = cpi->cpi_vabits = 32; 656 657 switch (cpi->cpi_vendor) { 658 case X86_VENDOR_Intel: 659 if (cpi->cpi_family == 5) 660 x86_type = X86_TYPE_P5; 661 else if (IS_LEGACY_P6(cpi)) { 662 x86_type = X86_TYPE_P6; 663 pentiumpro_bug4046376 = 1; 664 pentiumpro_bug4064495 = 1; 665 /* 666 * Clear the SEP bit when it was set erroneously 667 */ 668 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 669 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 670 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 671 x86_type = X86_TYPE_P4; 672 /* 673 * We don't currently depend on any of the %ecx 674 * features until Prescott, so we'll only check 675 * this from P4 onwards. We might want to revisit 676 * that idea later. 677 */ 678 mask_ecx = 0xffffffff; 679 } else if (cpi->cpi_family > 0xf) 680 mask_ecx = 0xffffffff; 681 /* 682 * We don't support MONITOR/MWAIT if leaf 5 is not available 683 * to obtain the monitor linesize. 684 */ 685 if (cpi->cpi_maxeax < 5) 686 mask_ecx &= ~CPUID_INTC_ECX_MON; 687 break; 688 case X86_VENDOR_IntelClone: 689 default: 690 break; 691 case X86_VENDOR_AMD: 692 #if defined(OPTERON_ERRATUM_108) 693 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 694 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 695 cpi->cpi_model = 0xc; 696 } else 697 #endif 698 if (cpi->cpi_family == 5) { 699 /* 700 * AMD K5 and K6 701 * 702 * These CPUs have an incomplete implementation 703 * of MCA/MCE which we mask away. 704 */ 705 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 706 707 /* 708 * Model 0 uses the wrong (APIC) bit 709 * to indicate PGE. Fix it here. 710 */ 711 if (cpi->cpi_model == 0) { 712 if (cp->cp_edx & 0x200) { 713 cp->cp_edx &= ~0x200; 714 cp->cp_edx |= CPUID_INTC_EDX_PGE; 715 } 716 } 717 718 /* 719 * Early models had problems w/ MMX; disable. 720 */ 721 if (cpi->cpi_model < 6) 722 mask_edx &= ~CPUID_INTC_EDX_MMX; 723 } 724 725 /* 726 * For newer families, SSE3 and CX16, at least, are valid; 727 * enable all 728 */ 729 if (cpi->cpi_family >= 0xf) 730 mask_ecx = 0xffffffff; 731 /* 732 * We don't support MONITOR/MWAIT if leaf 5 is not available 733 * to obtain the monitor linesize. 734 */ 735 if (cpi->cpi_maxeax < 5) 736 mask_ecx &= ~CPUID_INTC_ECX_MON; 737 738 #if !defined(__xpv) 739 /* 740 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 741 * processors. AMD does not intend MWAIT to be used in the cpu 742 * idle loop on current and future processors. 10h and future 743 * AMD processors use more power in MWAIT than HLT. 744 * Pre-family-10h Opterons do not have the MWAIT instruction. 745 */ 746 idle_cpu_prefer_mwait = 0; 747 #endif 748 749 break; 750 case X86_VENDOR_TM: 751 /* 752 * workaround the NT workaround in CMS 4.1 753 */ 754 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 755 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 756 cp->cp_edx |= CPUID_INTC_EDX_CX8; 757 break; 758 case X86_VENDOR_Centaur: 759 /* 760 * workaround the NT workarounds again 761 */ 762 if (cpi->cpi_family == 6) 763 cp->cp_edx |= CPUID_INTC_EDX_CX8; 764 break; 765 case X86_VENDOR_Cyrix: 766 /* 767 * We rely heavily on the probing in locore 768 * to actually figure out what parts, if any, 769 * of the Cyrix cpuid instruction to believe. 770 */ 771 switch (x86_type) { 772 case X86_TYPE_CYRIX_486: 773 mask_edx = 0; 774 break; 775 case X86_TYPE_CYRIX_6x86: 776 mask_edx = 0; 777 break; 778 case X86_TYPE_CYRIX_6x86L: 779 mask_edx = 780 CPUID_INTC_EDX_DE | 781 CPUID_INTC_EDX_CX8; 782 break; 783 case X86_TYPE_CYRIX_6x86MX: 784 mask_edx = 785 CPUID_INTC_EDX_DE | 786 CPUID_INTC_EDX_MSR | 787 CPUID_INTC_EDX_CX8 | 788 CPUID_INTC_EDX_PGE | 789 CPUID_INTC_EDX_CMOV | 790 CPUID_INTC_EDX_MMX; 791 break; 792 case X86_TYPE_CYRIX_GXm: 793 mask_edx = 794 CPUID_INTC_EDX_MSR | 795 CPUID_INTC_EDX_CX8 | 796 CPUID_INTC_EDX_CMOV | 797 CPUID_INTC_EDX_MMX; 798 break; 799 case X86_TYPE_CYRIX_MediaGX: 800 break; 801 case X86_TYPE_CYRIX_MII: 802 case X86_TYPE_VIA_CYRIX_III: 803 mask_edx = 804 CPUID_INTC_EDX_DE | 805 CPUID_INTC_EDX_TSC | 806 CPUID_INTC_EDX_MSR | 807 CPUID_INTC_EDX_CX8 | 808 CPUID_INTC_EDX_PGE | 809 CPUID_INTC_EDX_CMOV | 810 CPUID_INTC_EDX_MMX; 811 break; 812 default: 813 break; 814 } 815 break; 816 } 817 818 #if defined(__xpv) 819 /* 820 * Do not support MONITOR/MWAIT under a hypervisor 821 */ 822 mask_ecx &= ~CPUID_INTC_ECX_MON; 823 #endif /* __xpv */ 824 825 /* 826 * Now we've figured out the masks that determine 827 * which bits we choose to believe, apply the masks 828 * to the feature words, then map the kernel's view 829 * of these feature words into its feature word. 830 */ 831 cp->cp_edx &= mask_edx; 832 cp->cp_ecx &= mask_ecx; 833 834 /* 835 * apply any platform restrictions (we don't call this 836 * immediately after __cpuid_insn here, because we need the 837 * workarounds applied above first) 838 */ 839 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 840 841 /* 842 * fold in overrides from the "eeprom" mechanism 843 */ 844 cp->cp_edx |= cpuid_feature_edx_include; 845 cp->cp_edx &= ~cpuid_feature_edx_exclude; 846 847 cp->cp_ecx |= cpuid_feature_ecx_include; 848 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 849 850 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 851 feature |= X86_LARGEPAGE; 852 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 853 feature |= X86_TSC; 854 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 855 feature |= X86_MSR; 856 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 857 feature |= X86_MTRR; 858 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 859 feature |= X86_PGE; 860 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 861 feature |= X86_CMOV; 862 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 863 feature |= X86_MMX; 864 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 865 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 866 feature |= X86_MCA; 867 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 868 feature |= X86_PAE; 869 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 870 feature |= X86_CX8; 871 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 872 feature |= X86_CX16; 873 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 874 feature |= X86_PAT; 875 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 876 feature |= X86_SEP; 877 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 878 /* 879 * In our implementation, fxsave/fxrstor 880 * are prerequisites before we'll even 881 * try and do SSE things. 882 */ 883 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 884 feature |= X86_SSE; 885 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 886 feature |= X86_SSE2; 887 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 888 feature |= X86_SSE3; 889 } 890 if (cp->cp_edx & CPUID_INTC_EDX_DE) 891 feature |= X86_DE; 892 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 893 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 894 feature |= X86_MWAIT; 895 } 896 897 if (feature & X86_PAE) 898 cpi->cpi_pabits = 36; 899 900 /* 901 * Hyperthreading configuration is slightly tricky on Intel 902 * and pure clones, and even trickier on AMD. 903 * 904 * (AMD chose to set the HTT bit on their CMP processors, 905 * even though they're not actually hyperthreaded. Thus it 906 * takes a bit more work to figure out what's really going 907 * on ... see the handling of the CMP_LGCY bit below) 908 */ 909 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 910 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 911 if (cpi->cpi_ncpu_per_chip > 1) 912 feature |= X86_HTT; 913 } else { 914 cpi->cpi_ncpu_per_chip = 1; 915 } 916 917 /* 918 * Work on the "extended" feature information, doing 919 * some basic initialization for cpuid_pass2() 920 */ 921 xcpuid = 0; 922 switch (cpi->cpi_vendor) { 923 case X86_VENDOR_Intel: 924 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 925 xcpuid++; 926 break; 927 case X86_VENDOR_AMD: 928 if (cpi->cpi_family > 5 || 929 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 930 xcpuid++; 931 break; 932 case X86_VENDOR_Cyrix: 933 /* 934 * Only these Cyrix CPUs are -known- to support 935 * extended cpuid operations. 936 */ 937 if (x86_type == X86_TYPE_VIA_CYRIX_III || 938 x86_type == X86_TYPE_CYRIX_GXm) 939 xcpuid++; 940 break; 941 case X86_VENDOR_Centaur: 942 case X86_VENDOR_TM: 943 default: 944 xcpuid++; 945 break; 946 } 947 948 if (xcpuid) { 949 cp = &cpi->cpi_extd[0]; 950 cp->cp_eax = 0x80000000; 951 cpi->cpi_xmaxeax = __cpuid_insn(cp); 952 } 953 954 if (cpi->cpi_xmaxeax & 0x80000000) { 955 956 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 957 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 958 959 switch (cpi->cpi_vendor) { 960 case X86_VENDOR_Intel: 961 case X86_VENDOR_AMD: 962 if (cpi->cpi_xmaxeax < 0x80000001) 963 break; 964 cp = &cpi->cpi_extd[1]; 965 cp->cp_eax = 0x80000001; 966 (void) __cpuid_insn(cp); 967 968 if (cpi->cpi_vendor == X86_VENDOR_AMD && 969 cpi->cpi_family == 5 && 970 cpi->cpi_model == 6 && 971 cpi->cpi_step == 6) { 972 /* 973 * K6 model 6 uses bit 10 to indicate SYSC 974 * Later models use bit 11. Fix it here. 975 */ 976 if (cp->cp_edx & 0x400) { 977 cp->cp_edx &= ~0x400; 978 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 979 } 980 } 981 982 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 983 984 /* 985 * Compute the additions to the kernel's feature word. 986 */ 987 if (cp->cp_edx & CPUID_AMD_EDX_NX) 988 feature |= X86_NX; 989 990 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 991 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 992 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 993 feature |= X86_SSE4A; 994 995 /* 996 * If both the HTT and CMP_LGCY bits are set, 997 * then we're not actually HyperThreaded. Read 998 * "AMD CPUID Specification" for more details. 999 */ 1000 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1001 (feature & X86_HTT) && 1002 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1003 feature &= ~X86_HTT; 1004 feature |= X86_CMP; 1005 } 1006 #if defined(__amd64) 1007 /* 1008 * It's really tricky to support syscall/sysret in 1009 * the i386 kernel; we rely on sysenter/sysexit 1010 * instead. In the amd64 kernel, things are -way- 1011 * better. 1012 */ 1013 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 1014 feature |= X86_ASYSC; 1015 1016 /* 1017 * While we're thinking about system calls, note 1018 * that AMD processors don't support sysenter 1019 * in long mode at all, so don't try to program them. 1020 */ 1021 if (x86_vendor == X86_VENDOR_AMD) 1022 feature &= ~X86_SEP; 1023 #endif 1024 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 1025 feature |= X86_TSCP; 1026 break; 1027 default: 1028 break; 1029 } 1030 1031 /* 1032 * Get CPUID data about processor cores and hyperthreads. 1033 */ 1034 switch (cpi->cpi_vendor) { 1035 case X86_VENDOR_Intel: 1036 if (cpi->cpi_maxeax >= 4) { 1037 cp = &cpi->cpi_std[4]; 1038 cp->cp_eax = 4; 1039 cp->cp_ecx = 0; 1040 (void) __cpuid_insn(cp); 1041 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1042 } 1043 /*FALLTHROUGH*/ 1044 case X86_VENDOR_AMD: 1045 if (cpi->cpi_xmaxeax < 0x80000008) 1046 break; 1047 cp = &cpi->cpi_extd[8]; 1048 cp->cp_eax = 0x80000008; 1049 (void) __cpuid_insn(cp); 1050 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1051 1052 /* 1053 * Virtual and physical address limits from 1054 * cpuid override previously guessed values. 1055 */ 1056 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1057 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1058 break; 1059 default: 1060 break; 1061 } 1062 1063 /* 1064 * Derive the number of cores per chip 1065 */ 1066 switch (cpi->cpi_vendor) { 1067 case X86_VENDOR_Intel: 1068 if (cpi->cpi_maxeax < 4) { 1069 cpi->cpi_ncore_per_chip = 1; 1070 break; 1071 } else { 1072 cpi->cpi_ncore_per_chip = 1073 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1074 } 1075 break; 1076 case X86_VENDOR_AMD: 1077 if (cpi->cpi_xmaxeax < 0x80000008) { 1078 cpi->cpi_ncore_per_chip = 1; 1079 break; 1080 } else { 1081 cpi->cpi_ncore_per_chip = 1082 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1083 } 1084 break; 1085 default: 1086 cpi->cpi_ncore_per_chip = 1; 1087 break; 1088 } 1089 } 1090 1091 /* 1092 * If more than one core, then this processor is CMP. 1093 */ 1094 if (cpi->cpi_ncore_per_chip > 1) 1095 feature |= X86_CMP; 1096 1097 /* 1098 * If the number of cores is the same as the number 1099 * of CPUs, then we cannot have HyperThreading. 1100 */ 1101 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1102 feature &= ~X86_HTT; 1103 1104 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1105 /* 1106 * Single-core single-threaded processors. 1107 */ 1108 cpi->cpi_chipid = -1; 1109 cpi->cpi_clogid = 0; 1110 cpi->cpi_coreid = cpu->cpu_id; 1111 } else if (cpi->cpi_ncpu_per_chip > 1) { 1112 uint_t i; 1113 uint_t chipid_shift = 0; 1114 uint_t coreid_shift = 0; 1115 uint_t apic_id = CPI_APIC_ID(cpi); 1116 1117 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1118 chipid_shift++; 1119 cpi->cpi_chipid = apic_id >> chipid_shift; 1120 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1121 1122 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1123 if (feature & X86_CMP) { 1124 /* 1125 * Multi-core (and possibly multi-threaded) 1126 * processors. 1127 */ 1128 uint_t ncpu_per_core; 1129 if (cpi->cpi_ncore_per_chip == 1) 1130 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1131 else if (cpi->cpi_ncore_per_chip > 1) 1132 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1133 cpi->cpi_ncore_per_chip; 1134 /* 1135 * 8bit APIC IDs on dual core Pentiums 1136 * look like this: 1137 * 1138 * +-----------------------+------+------+ 1139 * | Physical Package ID | MC | HT | 1140 * +-----------------------+------+------+ 1141 * <------- chipid --------> 1142 * <------- coreid ---------------> 1143 * <--- clogid --> 1144 * 1145 * Where the number of bits necessary to 1146 * represent MC and HT fields together equals 1147 * to the minimum number of bits necessary to 1148 * store the value of cpi->cpi_ncpu_per_chip. 1149 * Of those bits, the MC part uses the number 1150 * of bits necessary to store the value of 1151 * cpi->cpi_ncore_per_chip. 1152 */ 1153 for (i = 1; i < ncpu_per_core; i <<= 1) 1154 coreid_shift++; 1155 cpi->cpi_coreid = apic_id >> coreid_shift; 1156 } else if (feature & X86_HTT) { 1157 /* 1158 * Single-core multi-threaded processors. 1159 */ 1160 cpi->cpi_coreid = cpi->cpi_chipid; 1161 } 1162 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1163 /* 1164 * AMD currently only has dual-core processors with 1165 * single-threaded cores. If they ever release 1166 * multi-threaded processors, then this code 1167 * will have to be updated. 1168 */ 1169 cpi->cpi_coreid = cpu->cpu_id; 1170 } else { 1171 /* 1172 * All other processors are currently 1173 * assumed to have single cores. 1174 */ 1175 cpi->cpi_coreid = cpi->cpi_chipid; 1176 } 1177 } 1178 1179 /* 1180 * Synthesize chip "revision" and socket type 1181 */ 1182 synth_info(cpi); 1183 1184 pass1_done: 1185 cpi->cpi_pass = 1; 1186 return (feature); 1187 } 1188 1189 /* 1190 * Make copies of the cpuid table entries we depend on, in 1191 * part for ease of parsing now, in part so that we have only 1192 * one place to correct any of it, in part for ease of 1193 * later export to userland, and in part so we can look at 1194 * this stuff in a crash dump. 1195 */ 1196 1197 /*ARGSUSED*/ 1198 void 1199 cpuid_pass2(cpu_t *cpu) 1200 { 1201 uint_t n, nmax; 1202 int i; 1203 struct cpuid_regs *cp; 1204 uint8_t *dp; 1205 uint32_t *iptr; 1206 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1207 1208 ASSERT(cpi->cpi_pass == 1); 1209 1210 if (cpi->cpi_maxeax < 1) 1211 goto pass2_done; 1212 1213 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1214 nmax = NMAX_CPI_STD; 1215 /* 1216 * (We already handled n == 0 and n == 1 in pass 1) 1217 */ 1218 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1219 cp->cp_eax = n; 1220 1221 /* 1222 * CPUID function 4 expects %ecx to be initialized 1223 * with an index which indicates which cache to return 1224 * information about. The OS is expected to call function 4 1225 * with %ecx set to 0, 1, 2, ... until it returns with 1226 * EAX[4:0] set to 0, which indicates there are no more 1227 * caches. 1228 * 1229 * Here, populate cpi_std[4] with the information returned by 1230 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1231 * when dynamic memory allocation becomes available. 1232 * 1233 * Note: we need to explicitly initialize %ecx here, since 1234 * function 4 may have been previously invoked. 1235 */ 1236 if (n == 4) 1237 cp->cp_ecx = 0; 1238 1239 (void) __cpuid_insn(cp); 1240 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1241 switch (n) { 1242 case 2: 1243 /* 1244 * "the lower 8 bits of the %eax register 1245 * contain a value that identifies the number 1246 * of times the cpuid [instruction] has to be 1247 * executed to obtain a complete image of the 1248 * processor's caching systems." 1249 * 1250 * How *do* they make this stuff up? 1251 */ 1252 cpi->cpi_ncache = sizeof (*cp) * 1253 BITX(cp->cp_eax, 7, 0); 1254 if (cpi->cpi_ncache == 0) 1255 break; 1256 cpi->cpi_ncache--; /* skip count byte */ 1257 1258 /* 1259 * Well, for now, rather than attempt to implement 1260 * this slightly dubious algorithm, we just look 1261 * at the first 15 .. 1262 */ 1263 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1264 cpi->cpi_ncache = sizeof (*cp) - 1; 1265 1266 dp = cpi->cpi_cacheinfo; 1267 if (BITX(cp->cp_eax, 31, 31) == 0) { 1268 uint8_t *p = (void *)&cp->cp_eax; 1269 for (i = 1; i < 3; i++) 1270 if (p[i] != 0) 1271 *dp++ = p[i]; 1272 } 1273 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1274 uint8_t *p = (void *)&cp->cp_ebx; 1275 for (i = 0; i < 4; i++) 1276 if (p[i] != 0) 1277 *dp++ = p[i]; 1278 } 1279 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1280 uint8_t *p = (void *)&cp->cp_ecx; 1281 for (i = 0; i < 4; i++) 1282 if (p[i] != 0) 1283 *dp++ = p[i]; 1284 } 1285 if (BITX(cp->cp_edx, 31, 31) == 0) { 1286 uint8_t *p = (void *)&cp->cp_edx; 1287 for (i = 0; i < 4; i++) 1288 if (p[i] != 0) 1289 *dp++ = p[i]; 1290 } 1291 break; 1292 1293 case 3: /* Processor serial number, if PSN supported */ 1294 break; 1295 1296 case 4: /* Deterministic cache parameters */ 1297 break; 1298 1299 case 5: /* Monitor/Mwait parameters */ 1300 { 1301 size_t mwait_size; 1302 1303 /* 1304 * check cpi_mwait.support which was set in cpuid_pass1 1305 */ 1306 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1307 break; 1308 1309 /* 1310 * Protect ourself from insane mwait line size. 1311 * Workaround for incomplete hardware emulator(s). 1312 */ 1313 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1314 if (mwait_size < sizeof (uint32_t) || 1315 !ISP2(mwait_size)) { 1316 #if DEBUG 1317 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1318 "size %ld", 1319 cpu->cpu_id, (long)mwait_size); 1320 #endif 1321 break; 1322 } 1323 1324 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1325 cpi->cpi_mwait.mon_max = mwait_size; 1326 if (MWAIT_EXTENSION(cpi)) { 1327 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1328 if (MWAIT_INT_ENABLE(cpi)) 1329 cpi->cpi_mwait.support |= 1330 MWAIT_ECX_INT_ENABLE; 1331 } 1332 break; 1333 } 1334 default: 1335 break; 1336 } 1337 } 1338 1339 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1340 goto pass2_done; 1341 1342 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1343 nmax = NMAX_CPI_EXTD; 1344 /* 1345 * Copy the extended properties, fixing them as we go. 1346 * (We already handled n == 0 and n == 1 in pass 1) 1347 */ 1348 iptr = (void *)cpi->cpi_brandstr; 1349 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1350 cp->cp_eax = 0x80000000 + n; 1351 (void) __cpuid_insn(cp); 1352 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1353 switch (n) { 1354 case 2: 1355 case 3: 1356 case 4: 1357 /* 1358 * Extract the brand string 1359 */ 1360 *iptr++ = cp->cp_eax; 1361 *iptr++ = cp->cp_ebx; 1362 *iptr++ = cp->cp_ecx; 1363 *iptr++ = cp->cp_edx; 1364 break; 1365 case 5: 1366 switch (cpi->cpi_vendor) { 1367 case X86_VENDOR_AMD: 1368 /* 1369 * The Athlon and Duron were the first 1370 * parts to report the sizes of the 1371 * TLB for large pages. Before then, 1372 * we don't trust the data. 1373 */ 1374 if (cpi->cpi_family < 6 || 1375 (cpi->cpi_family == 6 && 1376 cpi->cpi_model < 1)) 1377 cp->cp_eax = 0; 1378 break; 1379 default: 1380 break; 1381 } 1382 break; 1383 case 6: 1384 switch (cpi->cpi_vendor) { 1385 case X86_VENDOR_AMD: 1386 /* 1387 * The Athlon and Duron were the first 1388 * AMD parts with L2 TLB's. 1389 * Before then, don't trust the data. 1390 */ 1391 if (cpi->cpi_family < 6 || 1392 cpi->cpi_family == 6 && 1393 cpi->cpi_model < 1) 1394 cp->cp_eax = cp->cp_ebx = 0; 1395 /* 1396 * AMD Duron rev A0 reports L2 1397 * cache size incorrectly as 1K 1398 * when it is really 64K 1399 */ 1400 if (cpi->cpi_family == 6 && 1401 cpi->cpi_model == 3 && 1402 cpi->cpi_step == 0) { 1403 cp->cp_ecx &= 0xffff; 1404 cp->cp_ecx |= 0x400000; 1405 } 1406 break; 1407 case X86_VENDOR_Cyrix: /* VIA C3 */ 1408 /* 1409 * VIA C3 processors are a bit messed 1410 * up w.r.t. encoding cache sizes in %ecx 1411 */ 1412 if (cpi->cpi_family != 6) 1413 break; 1414 /* 1415 * model 7 and 8 were incorrectly encoded 1416 * 1417 * xxx is model 8 really broken? 1418 */ 1419 if (cpi->cpi_model == 7 || 1420 cpi->cpi_model == 8) 1421 cp->cp_ecx = 1422 BITX(cp->cp_ecx, 31, 24) << 16 | 1423 BITX(cp->cp_ecx, 23, 16) << 12 | 1424 BITX(cp->cp_ecx, 15, 8) << 8 | 1425 BITX(cp->cp_ecx, 7, 0); 1426 /* 1427 * model 9 stepping 1 has wrong associativity 1428 */ 1429 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1430 cp->cp_ecx |= 8 << 12; 1431 break; 1432 case X86_VENDOR_Intel: 1433 /* 1434 * Extended L2 Cache features function. 1435 * First appeared on Prescott. 1436 */ 1437 default: 1438 break; 1439 } 1440 break; 1441 default: 1442 break; 1443 } 1444 } 1445 1446 pass2_done: 1447 cpi->cpi_pass = 2; 1448 } 1449 1450 static const char * 1451 intel_cpubrand(const struct cpuid_info *cpi) 1452 { 1453 int i; 1454 1455 if ((x86_feature & X86_CPUID) == 0 || 1456 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1457 return ("i486"); 1458 1459 switch (cpi->cpi_family) { 1460 case 5: 1461 return ("Intel Pentium(r)"); 1462 case 6: 1463 switch (cpi->cpi_model) { 1464 uint_t celeron, xeon; 1465 const struct cpuid_regs *cp; 1466 case 0: 1467 case 1: 1468 case 2: 1469 return ("Intel Pentium(r) Pro"); 1470 case 3: 1471 case 4: 1472 return ("Intel Pentium(r) II"); 1473 case 6: 1474 return ("Intel Celeron(r)"); 1475 case 5: 1476 case 7: 1477 celeron = xeon = 0; 1478 cp = &cpi->cpi_std[2]; /* cache info */ 1479 1480 for (i = 1; i < 3; i++) { 1481 uint_t tmp; 1482 1483 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1484 if (tmp == 0x40) 1485 celeron++; 1486 if (tmp >= 0x44 && tmp <= 0x45) 1487 xeon++; 1488 } 1489 1490 for (i = 0; i < 2; i++) { 1491 uint_t tmp; 1492 1493 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1494 if (tmp == 0x40) 1495 celeron++; 1496 else if (tmp >= 0x44 && tmp <= 0x45) 1497 xeon++; 1498 } 1499 1500 for (i = 0; i < 4; i++) { 1501 uint_t tmp; 1502 1503 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1504 if (tmp == 0x40) 1505 celeron++; 1506 else if (tmp >= 0x44 && tmp <= 0x45) 1507 xeon++; 1508 } 1509 1510 for (i = 0; i < 4; i++) { 1511 uint_t tmp; 1512 1513 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1514 if (tmp == 0x40) 1515 celeron++; 1516 else if (tmp >= 0x44 && tmp <= 0x45) 1517 xeon++; 1518 } 1519 1520 if (celeron) 1521 return ("Intel Celeron(r)"); 1522 if (xeon) 1523 return (cpi->cpi_model == 5 ? 1524 "Intel Pentium(r) II Xeon(tm)" : 1525 "Intel Pentium(r) III Xeon(tm)"); 1526 return (cpi->cpi_model == 5 ? 1527 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1528 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1529 default: 1530 break; 1531 } 1532 default: 1533 break; 1534 } 1535 1536 /* BrandID is present if the field is nonzero */ 1537 if (cpi->cpi_brandid != 0) { 1538 static const struct { 1539 uint_t bt_bid; 1540 const char *bt_str; 1541 } brand_tbl[] = { 1542 { 0x1, "Intel(r) Celeron(r)" }, 1543 { 0x2, "Intel(r) Pentium(r) III" }, 1544 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1545 { 0x4, "Intel(r) Pentium(r) III" }, 1546 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1547 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1548 { 0x8, "Intel(r) Pentium(r) 4" }, 1549 { 0x9, "Intel(r) Pentium(r) 4" }, 1550 { 0xa, "Intel(r) Celeron(r)" }, 1551 { 0xb, "Intel(r) Xeon(tm)" }, 1552 { 0xc, "Intel(r) Xeon(tm) MP" }, 1553 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1554 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1555 { 0x11, "Mobile Genuine Intel(r)" }, 1556 { 0x12, "Intel(r) Celeron(r) M" }, 1557 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1558 { 0x14, "Intel(r) Celeron(r)" }, 1559 { 0x15, "Mobile Genuine Intel(r)" }, 1560 { 0x16, "Intel(r) Pentium(r) M" }, 1561 { 0x17, "Mobile Intel(r) Celeron(r)" } 1562 }; 1563 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1564 uint_t sgn; 1565 1566 sgn = (cpi->cpi_family << 8) | 1567 (cpi->cpi_model << 4) | cpi->cpi_step; 1568 1569 for (i = 0; i < btblmax; i++) 1570 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1571 break; 1572 if (i < btblmax) { 1573 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1574 return ("Intel(r) Celeron(r)"); 1575 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1576 return ("Intel(r) Xeon(tm) MP"); 1577 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1578 return ("Intel(r) Xeon(tm)"); 1579 return (brand_tbl[i].bt_str); 1580 } 1581 } 1582 1583 return (NULL); 1584 } 1585 1586 static const char * 1587 amd_cpubrand(const struct cpuid_info *cpi) 1588 { 1589 if ((x86_feature & X86_CPUID) == 0 || 1590 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1591 return ("i486 compatible"); 1592 1593 switch (cpi->cpi_family) { 1594 case 5: 1595 switch (cpi->cpi_model) { 1596 case 0: 1597 case 1: 1598 case 2: 1599 case 3: 1600 case 4: 1601 case 5: 1602 return ("AMD-K5(r)"); 1603 case 6: 1604 case 7: 1605 return ("AMD-K6(r)"); 1606 case 8: 1607 return ("AMD-K6(r)-2"); 1608 case 9: 1609 return ("AMD-K6(r)-III"); 1610 default: 1611 return ("AMD (family 5)"); 1612 } 1613 case 6: 1614 switch (cpi->cpi_model) { 1615 case 1: 1616 return ("AMD-K7(tm)"); 1617 case 0: 1618 case 2: 1619 case 4: 1620 return ("AMD Athlon(tm)"); 1621 case 3: 1622 case 7: 1623 return ("AMD Duron(tm)"); 1624 case 6: 1625 case 8: 1626 case 10: 1627 /* 1628 * Use the L2 cache size to distinguish 1629 */ 1630 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1631 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1632 default: 1633 return ("AMD (family 6)"); 1634 } 1635 default: 1636 break; 1637 } 1638 1639 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1640 cpi->cpi_brandid != 0) { 1641 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1642 case 3: 1643 return ("AMD Opteron(tm) UP 1xx"); 1644 case 4: 1645 return ("AMD Opteron(tm) DP 2xx"); 1646 case 5: 1647 return ("AMD Opteron(tm) MP 8xx"); 1648 default: 1649 return ("AMD Opteron(tm)"); 1650 } 1651 } 1652 1653 return (NULL); 1654 } 1655 1656 static const char * 1657 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1658 { 1659 if ((x86_feature & X86_CPUID) == 0 || 1660 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1661 type == X86_TYPE_CYRIX_486) 1662 return ("i486 compatible"); 1663 1664 switch (type) { 1665 case X86_TYPE_CYRIX_6x86: 1666 return ("Cyrix 6x86"); 1667 case X86_TYPE_CYRIX_6x86L: 1668 return ("Cyrix 6x86L"); 1669 case X86_TYPE_CYRIX_6x86MX: 1670 return ("Cyrix 6x86MX"); 1671 case X86_TYPE_CYRIX_GXm: 1672 return ("Cyrix GXm"); 1673 case X86_TYPE_CYRIX_MediaGX: 1674 return ("Cyrix MediaGX"); 1675 case X86_TYPE_CYRIX_MII: 1676 return ("Cyrix M2"); 1677 case X86_TYPE_VIA_CYRIX_III: 1678 return ("VIA Cyrix M3"); 1679 default: 1680 /* 1681 * Have another wild guess .. 1682 */ 1683 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1684 return ("Cyrix 5x86"); 1685 else if (cpi->cpi_family == 5) { 1686 switch (cpi->cpi_model) { 1687 case 2: 1688 return ("Cyrix 6x86"); /* Cyrix M1 */ 1689 case 4: 1690 return ("Cyrix MediaGX"); 1691 default: 1692 break; 1693 } 1694 } else if (cpi->cpi_family == 6) { 1695 switch (cpi->cpi_model) { 1696 case 0: 1697 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1698 case 5: 1699 case 6: 1700 case 7: 1701 case 8: 1702 case 9: 1703 return ("VIA C3"); 1704 default: 1705 break; 1706 } 1707 } 1708 break; 1709 } 1710 return (NULL); 1711 } 1712 1713 /* 1714 * This only gets called in the case that the CPU extended 1715 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1716 * aren't available, or contain null bytes for some reason. 1717 */ 1718 static void 1719 fabricate_brandstr(struct cpuid_info *cpi) 1720 { 1721 const char *brand = NULL; 1722 1723 switch (cpi->cpi_vendor) { 1724 case X86_VENDOR_Intel: 1725 brand = intel_cpubrand(cpi); 1726 break; 1727 case X86_VENDOR_AMD: 1728 brand = amd_cpubrand(cpi); 1729 break; 1730 case X86_VENDOR_Cyrix: 1731 brand = cyrix_cpubrand(cpi, x86_type); 1732 break; 1733 case X86_VENDOR_NexGen: 1734 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1735 brand = "NexGen Nx586"; 1736 break; 1737 case X86_VENDOR_Centaur: 1738 if (cpi->cpi_family == 5) 1739 switch (cpi->cpi_model) { 1740 case 4: 1741 brand = "Centaur C6"; 1742 break; 1743 case 8: 1744 brand = "Centaur C2"; 1745 break; 1746 case 9: 1747 brand = "Centaur C3"; 1748 break; 1749 default: 1750 break; 1751 } 1752 break; 1753 case X86_VENDOR_Rise: 1754 if (cpi->cpi_family == 5 && 1755 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1756 brand = "Rise mP6"; 1757 break; 1758 case X86_VENDOR_SiS: 1759 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1760 brand = "SiS 55x"; 1761 break; 1762 case X86_VENDOR_TM: 1763 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1764 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1765 break; 1766 case X86_VENDOR_NSC: 1767 case X86_VENDOR_UMC: 1768 default: 1769 break; 1770 } 1771 if (brand) { 1772 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1773 return; 1774 } 1775 1776 /* 1777 * If all else fails ... 1778 */ 1779 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1780 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1781 cpi->cpi_model, cpi->cpi_step); 1782 } 1783 1784 /* 1785 * This routine is called just after kernel memory allocation 1786 * becomes available on cpu0, and as part of mp_startup() on 1787 * the other cpus. 1788 * 1789 * Fixup the brand string, and collect any information from cpuid 1790 * that requires dynamicically allocated storage to represent. 1791 */ 1792 /*ARGSUSED*/ 1793 void 1794 cpuid_pass3(cpu_t *cpu) 1795 { 1796 int i, max, shft, level, size; 1797 struct cpuid_regs regs; 1798 struct cpuid_regs *cp; 1799 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1800 1801 ASSERT(cpi->cpi_pass == 2); 1802 1803 /* 1804 * Function 4: Deterministic cache parameters 1805 * 1806 * Take this opportunity to detect the number of threads 1807 * sharing the last level cache, and construct a corresponding 1808 * cache id. The respective cpuid_info members are initialized 1809 * to the default case of "no last level cache sharing". 1810 */ 1811 cpi->cpi_ncpu_shr_last_cache = 1; 1812 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1813 1814 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1815 1816 /* 1817 * Find the # of elements (size) returned by fn 4, and along 1818 * the way detect last level cache sharing details. 1819 */ 1820 bzero(®s, sizeof (regs)); 1821 cp = ®s; 1822 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1823 cp->cp_eax = 4; 1824 cp->cp_ecx = i; 1825 1826 (void) __cpuid_insn(cp); 1827 1828 if (CPI_CACHE_TYPE(cp) == 0) 1829 break; 1830 level = CPI_CACHE_LVL(cp); 1831 if (level > max) { 1832 max = level; 1833 cpi->cpi_ncpu_shr_last_cache = 1834 CPI_NTHR_SHR_CACHE(cp) + 1; 1835 } 1836 } 1837 cpi->cpi_std_4_size = size = i; 1838 1839 /* 1840 * Allocate the cpi_std_4 array. The first element 1841 * references the regs for fn 4, %ecx == 0, which 1842 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1843 */ 1844 if (size > 0) { 1845 cpi->cpi_std_4 = 1846 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1847 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1848 1849 /* 1850 * Allocate storage to hold the additional regs 1851 * for function 4, %ecx == 1 .. cpi_std_4_size. 1852 * 1853 * The regs for fn 4, %ecx == 0 has already 1854 * been allocated as indicated above. 1855 */ 1856 for (i = 1; i < size; i++) { 1857 cp = cpi->cpi_std_4[i] = 1858 kmem_zalloc(sizeof (regs), KM_SLEEP); 1859 cp->cp_eax = 4; 1860 cp->cp_ecx = i; 1861 1862 (void) __cpuid_insn(cp); 1863 } 1864 } 1865 /* 1866 * Determine the number of bits needed to represent 1867 * the number of CPUs sharing the last level cache. 1868 * 1869 * Shift off that number of bits from the APIC id to 1870 * derive the cache id. 1871 */ 1872 shft = 0; 1873 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1874 shft++; 1875 cpi->cpi_last_lvl_cacheid = CPI_APIC_ID(cpi) >> shft; 1876 } 1877 1878 /* 1879 * Now fixup the brand string 1880 */ 1881 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1882 fabricate_brandstr(cpi); 1883 } else { 1884 1885 /* 1886 * If we successfully extracted a brand string from the cpuid 1887 * instruction, clean it up by removing leading spaces and 1888 * similar junk. 1889 */ 1890 if (cpi->cpi_brandstr[0]) { 1891 size_t maxlen = sizeof (cpi->cpi_brandstr); 1892 char *src, *dst; 1893 1894 dst = src = (char *)cpi->cpi_brandstr; 1895 src[maxlen - 1] = '\0'; 1896 /* 1897 * strip leading spaces 1898 */ 1899 while (*src == ' ') 1900 src++; 1901 /* 1902 * Remove any 'Genuine' or "Authentic" prefixes 1903 */ 1904 if (strncmp(src, "Genuine ", 8) == 0) 1905 src += 8; 1906 if (strncmp(src, "Authentic ", 10) == 0) 1907 src += 10; 1908 1909 /* 1910 * Now do an in-place copy. 1911 * Map (R) to (r) and (TM) to (tm). 1912 * The era of teletypes is long gone, and there's 1913 * -really- no need to shout. 1914 */ 1915 while (*src != '\0') { 1916 if (src[0] == '(') { 1917 if (strncmp(src + 1, "R)", 2) == 0) { 1918 (void) strncpy(dst, "(r)", 3); 1919 src += 3; 1920 dst += 3; 1921 continue; 1922 } 1923 if (strncmp(src + 1, "TM)", 3) == 0) { 1924 (void) strncpy(dst, "(tm)", 4); 1925 src += 4; 1926 dst += 4; 1927 continue; 1928 } 1929 } 1930 *dst++ = *src++; 1931 } 1932 *dst = '\0'; 1933 1934 /* 1935 * Finally, remove any trailing spaces 1936 */ 1937 while (--dst > cpi->cpi_brandstr) 1938 if (*dst == ' ') 1939 *dst = '\0'; 1940 else 1941 break; 1942 } else 1943 fabricate_brandstr(cpi); 1944 } 1945 cpi->cpi_pass = 3; 1946 } 1947 1948 /* 1949 * This routine is called out of bind_hwcap() much later in the life 1950 * of the kernel (post_startup()). The job of this routine is to resolve 1951 * the hardware feature support and kernel support for those features into 1952 * what we're actually going to tell applications via the aux vector. 1953 */ 1954 uint_t 1955 cpuid_pass4(cpu_t *cpu) 1956 { 1957 struct cpuid_info *cpi; 1958 uint_t hwcap_flags = 0; 1959 1960 if (cpu == NULL) 1961 cpu = CPU; 1962 cpi = cpu->cpu_m.mcpu_cpi; 1963 1964 ASSERT(cpi->cpi_pass == 3); 1965 1966 if (cpi->cpi_maxeax >= 1) { 1967 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 1968 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 1969 1970 *edx = CPI_FEATURES_EDX(cpi); 1971 *ecx = CPI_FEATURES_ECX(cpi); 1972 1973 /* 1974 * [these require explicit kernel support] 1975 */ 1976 if ((x86_feature & X86_SEP) == 0) 1977 *edx &= ~CPUID_INTC_EDX_SEP; 1978 1979 if ((x86_feature & X86_SSE) == 0) 1980 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 1981 if ((x86_feature & X86_SSE2) == 0) 1982 *edx &= ~CPUID_INTC_EDX_SSE2; 1983 1984 if ((x86_feature & X86_HTT) == 0) 1985 *edx &= ~CPUID_INTC_EDX_HTT; 1986 1987 if ((x86_feature & X86_SSE3) == 0) 1988 *ecx &= ~CPUID_INTC_ECX_SSE3; 1989 1990 /* 1991 * [no explicit support required beyond x87 fp context] 1992 */ 1993 if (!fpu_exists) 1994 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 1995 1996 /* 1997 * Now map the supported feature vector to things that we 1998 * think userland will care about. 1999 */ 2000 if (*edx & CPUID_INTC_EDX_SEP) 2001 hwcap_flags |= AV_386_SEP; 2002 if (*edx & CPUID_INTC_EDX_SSE) 2003 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2004 if (*edx & CPUID_INTC_EDX_SSE2) 2005 hwcap_flags |= AV_386_SSE2; 2006 if (*ecx & CPUID_INTC_ECX_SSE3) 2007 hwcap_flags |= AV_386_SSE3; 2008 if (*ecx & CPUID_INTC_ECX_POPCNT) 2009 hwcap_flags |= AV_386_POPCNT; 2010 if (*edx & CPUID_INTC_EDX_FPU) 2011 hwcap_flags |= AV_386_FPU; 2012 if (*edx & CPUID_INTC_EDX_MMX) 2013 hwcap_flags |= AV_386_MMX; 2014 2015 if (*edx & CPUID_INTC_EDX_TSC) 2016 hwcap_flags |= AV_386_TSC; 2017 if (*edx & CPUID_INTC_EDX_CX8) 2018 hwcap_flags |= AV_386_CX8; 2019 if (*edx & CPUID_INTC_EDX_CMOV) 2020 hwcap_flags |= AV_386_CMOV; 2021 if (*ecx & CPUID_INTC_ECX_MON) 2022 hwcap_flags |= AV_386_MON; 2023 if (*ecx & CPUID_INTC_ECX_CX16) 2024 hwcap_flags |= AV_386_CX16; 2025 } 2026 2027 if (x86_feature & X86_HTT) 2028 hwcap_flags |= AV_386_PAUSE; 2029 2030 if (cpi->cpi_xmaxeax < 0x80000001) 2031 goto pass4_done; 2032 2033 switch (cpi->cpi_vendor) { 2034 struct cpuid_regs cp; 2035 uint32_t *edx, *ecx; 2036 2037 case X86_VENDOR_Intel: 2038 /* 2039 * Seems like Intel duplicated what we necessary 2040 * here to make the initial crop of 64-bit OS's work. 2041 * Hopefully, those are the only "extended" bits 2042 * they'll add. 2043 */ 2044 /*FALLTHROUGH*/ 2045 2046 case X86_VENDOR_AMD: 2047 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2048 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2049 2050 *edx = CPI_FEATURES_XTD_EDX(cpi); 2051 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2052 2053 /* 2054 * [these features require explicit kernel support] 2055 */ 2056 switch (cpi->cpi_vendor) { 2057 case X86_VENDOR_Intel: 2058 break; 2059 2060 case X86_VENDOR_AMD: 2061 if ((x86_feature & X86_TSCP) == 0) 2062 *edx &= ~CPUID_AMD_EDX_TSCP; 2063 if ((x86_feature & X86_SSE4A) == 0) 2064 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2065 break; 2066 2067 default: 2068 break; 2069 } 2070 2071 /* 2072 * [no explicit support required beyond 2073 * x87 fp context and exception handlers] 2074 */ 2075 if (!fpu_exists) 2076 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2077 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2078 2079 if ((x86_feature & X86_NX) == 0) 2080 *edx &= ~CPUID_AMD_EDX_NX; 2081 #if !defined(__amd64) 2082 *edx &= ~CPUID_AMD_EDX_LM; 2083 #endif 2084 /* 2085 * Now map the supported feature vector to 2086 * things that we think userland will care about. 2087 */ 2088 #if defined(__amd64) 2089 if (*edx & CPUID_AMD_EDX_SYSC) 2090 hwcap_flags |= AV_386_AMD_SYSC; 2091 #endif 2092 if (*edx & CPUID_AMD_EDX_MMXamd) 2093 hwcap_flags |= AV_386_AMD_MMX; 2094 if (*edx & CPUID_AMD_EDX_3DNow) 2095 hwcap_flags |= AV_386_AMD_3DNow; 2096 if (*edx & CPUID_AMD_EDX_3DNowx) 2097 hwcap_flags |= AV_386_AMD_3DNowx; 2098 2099 switch (cpi->cpi_vendor) { 2100 case X86_VENDOR_AMD: 2101 if (*edx & CPUID_AMD_EDX_TSCP) 2102 hwcap_flags |= AV_386_TSCP; 2103 if (*ecx & CPUID_AMD_ECX_AHF64) 2104 hwcap_flags |= AV_386_AHF; 2105 if (*ecx & CPUID_AMD_ECX_SSE4A) 2106 hwcap_flags |= AV_386_AMD_SSE4A; 2107 if (*ecx & CPUID_AMD_ECX_LZCNT) 2108 hwcap_flags |= AV_386_AMD_LZCNT; 2109 break; 2110 2111 case X86_VENDOR_Intel: 2112 /* 2113 * Aarrgh. 2114 * Intel uses a different bit in the same word. 2115 */ 2116 if (*ecx & CPUID_INTC_ECX_AHF64) 2117 hwcap_flags |= AV_386_AHF; 2118 break; 2119 2120 default: 2121 break; 2122 } 2123 break; 2124 2125 case X86_VENDOR_TM: 2126 cp.cp_eax = 0x80860001; 2127 (void) __cpuid_insn(&cp); 2128 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2129 break; 2130 2131 default: 2132 break; 2133 } 2134 2135 pass4_done: 2136 cpi->cpi_pass = 4; 2137 return (hwcap_flags); 2138 } 2139 2140 2141 /* 2142 * Simulate the cpuid instruction using the data we previously 2143 * captured about this CPU. We try our best to return the truth 2144 * about the hardware, independently of kernel support. 2145 */ 2146 uint32_t 2147 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2148 { 2149 struct cpuid_info *cpi; 2150 struct cpuid_regs *xcp; 2151 2152 if (cpu == NULL) 2153 cpu = CPU; 2154 cpi = cpu->cpu_m.mcpu_cpi; 2155 2156 ASSERT(cpuid_checkpass(cpu, 3)); 2157 2158 /* 2159 * CPUID data is cached in two separate places: cpi_std for standard 2160 * CPUID functions, and cpi_extd for extended CPUID functions. 2161 */ 2162 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2163 xcp = &cpi->cpi_std[cp->cp_eax]; 2164 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2165 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2166 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2167 else 2168 /* 2169 * The caller is asking for data from an input parameter which 2170 * the kernel has not cached. In this case we go fetch from 2171 * the hardware and return the data directly to the user. 2172 */ 2173 return (__cpuid_insn(cp)); 2174 2175 cp->cp_eax = xcp->cp_eax; 2176 cp->cp_ebx = xcp->cp_ebx; 2177 cp->cp_ecx = xcp->cp_ecx; 2178 cp->cp_edx = xcp->cp_edx; 2179 return (cp->cp_eax); 2180 } 2181 2182 int 2183 cpuid_checkpass(cpu_t *cpu, int pass) 2184 { 2185 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2186 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2187 } 2188 2189 int 2190 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2191 { 2192 ASSERT(cpuid_checkpass(cpu, 3)); 2193 2194 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2195 } 2196 2197 int 2198 cpuid_is_cmt(cpu_t *cpu) 2199 { 2200 if (cpu == NULL) 2201 cpu = CPU; 2202 2203 ASSERT(cpuid_checkpass(cpu, 1)); 2204 2205 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2206 } 2207 2208 /* 2209 * AMD and Intel both implement the 64-bit variant of the syscall 2210 * instruction (syscallq), so if there's -any- support for syscall, 2211 * cpuid currently says "yes, we support this". 2212 * 2213 * However, Intel decided to -not- implement the 32-bit variant of the 2214 * syscall instruction, so we provide a predicate to allow our caller 2215 * to test that subtlety here. 2216 * 2217 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2218 * even in the case where the hardware would in fact support it. 2219 */ 2220 /*ARGSUSED*/ 2221 int 2222 cpuid_syscall32_insn(cpu_t *cpu) 2223 { 2224 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2225 2226 #if !defined(__xpv) 2227 if (cpu == NULL) 2228 cpu = CPU; 2229 2230 /*CSTYLED*/ 2231 { 2232 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2233 2234 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2235 cpi->cpi_xmaxeax >= 0x80000001 && 2236 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2237 return (1); 2238 } 2239 #endif 2240 return (0); 2241 } 2242 2243 int 2244 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2245 { 2246 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2247 2248 static const char fmt[] = 2249 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2250 static const char fmt_ht[] = 2251 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2252 2253 ASSERT(cpuid_checkpass(cpu, 1)); 2254 2255 if (cpuid_is_cmt(cpu)) 2256 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2257 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2258 cpi->cpi_family, cpi->cpi_model, 2259 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2260 return (snprintf(s, n, fmt, 2261 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2262 cpi->cpi_family, cpi->cpi_model, 2263 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2264 } 2265 2266 const char * 2267 cpuid_getvendorstr(cpu_t *cpu) 2268 { 2269 ASSERT(cpuid_checkpass(cpu, 1)); 2270 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2271 } 2272 2273 uint_t 2274 cpuid_getvendor(cpu_t *cpu) 2275 { 2276 ASSERT(cpuid_checkpass(cpu, 1)); 2277 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2278 } 2279 2280 uint_t 2281 cpuid_getfamily(cpu_t *cpu) 2282 { 2283 ASSERT(cpuid_checkpass(cpu, 1)); 2284 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2285 } 2286 2287 uint_t 2288 cpuid_getmodel(cpu_t *cpu) 2289 { 2290 ASSERT(cpuid_checkpass(cpu, 1)); 2291 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2292 } 2293 2294 uint_t 2295 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2296 { 2297 ASSERT(cpuid_checkpass(cpu, 1)); 2298 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2299 } 2300 2301 uint_t 2302 cpuid_get_ncore_per_chip(cpu_t *cpu) 2303 { 2304 ASSERT(cpuid_checkpass(cpu, 1)); 2305 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2306 } 2307 2308 uint_t 2309 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2310 { 2311 ASSERT(cpuid_checkpass(cpu, 2)); 2312 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2313 } 2314 2315 id_t 2316 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2317 { 2318 ASSERT(cpuid_checkpass(cpu, 2)); 2319 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2320 } 2321 2322 uint_t 2323 cpuid_getstep(cpu_t *cpu) 2324 { 2325 ASSERT(cpuid_checkpass(cpu, 1)); 2326 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2327 } 2328 2329 uint_t 2330 cpuid_getsig(struct cpu *cpu) 2331 { 2332 ASSERT(cpuid_checkpass(cpu, 1)); 2333 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2334 } 2335 2336 uint32_t 2337 cpuid_getchiprev(struct cpu *cpu) 2338 { 2339 ASSERT(cpuid_checkpass(cpu, 1)); 2340 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2341 } 2342 2343 const char * 2344 cpuid_getchiprevstr(struct cpu *cpu) 2345 { 2346 ASSERT(cpuid_checkpass(cpu, 1)); 2347 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2348 } 2349 2350 uint32_t 2351 cpuid_getsockettype(struct cpu *cpu) 2352 { 2353 ASSERT(cpuid_checkpass(cpu, 1)); 2354 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2355 } 2356 2357 int 2358 cpuid_get_chipid(cpu_t *cpu) 2359 { 2360 ASSERT(cpuid_checkpass(cpu, 1)); 2361 2362 if (cpuid_is_cmt(cpu)) 2363 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2364 return (cpu->cpu_id); 2365 } 2366 2367 id_t 2368 cpuid_get_coreid(cpu_t *cpu) 2369 { 2370 ASSERT(cpuid_checkpass(cpu, 1)); 2371 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2372 } 2373 2374 int 2375 cpuid_get_clogid(cpu_t *cpu) 2376 { 2377 ASSERT(cpuid_checkpass(cpu, 1)); 2378 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2379 } 2380 2381 void 2382 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2383 { 2384 struct cpuid_info *cpi; 2385 2386 if (cpu == NULL) 2387 cpu = CPU; 2388 cpi = cpu->cpu_m.mcpu_cpi; 2389 2390 ASSERT(cpuid_checkpass(cpu, 1)); 2391 2392 if (pabits) 2393 *pabits = cpi->cpi_pabits; 2394 if (vabits) 2395 *vabits = cpi->cpi_vabits; 2396 } 2397 2398 /* 2399 * Returns the number of data TLB entries for a corresponding 2400 * pagesize. If it can't be computed, or isn't known, the 2401 * routine returns zero. If you ask about an architecturally 2402 * impossible pagesize, the routine will panic (so that the 2403 * hat implementor knows that things are inconsistent.) 2404 */ 2405 uint_t 2406 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2407 { 2408 struct cpuid_info *cpi; 2409 uint_t dtlb_nent = 0; 2410 2411 if (cpu == NULL) 2412 cpu = CPU; 2413 cpi = cpu->cpu_m.mcpu_cpi; 2414 2415 ASSERT(cpuid_checkpass(cpu, 1)); 2416 2417 /* 2418 * Check the L2 TLB info 2419 */ 2420 if (cpi->cpi_xmaxeax >= 0x80000006) { 2421 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2422 2423 switch (pagesize) { 2424 2425 case 4 * 1024: 2426 /* 2427 * All zero in the top 16 bits of the register 2428 * indicates a unified TLB. Size is in low 16 bits. 2429 */ 2430 if ((cp->cp_ebx & 0xffff0000) == 0) 2431 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2432 else 2433 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2434 break; 2435 2436 case 2 * 1024 * 1024: 2437 if ((cp->cp_eax & 0xffff0000) == 0) 2438 dtlb_nent = cp->cp_eax & 0x0000ffff; 2439 else 2440 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2441 break; 2442 2443 default: 2444 panic("unknown L2 pagesize"); 2445 /*NOTREACHED*/ 2446 } 2447 } 2448 2449 if (dtlb_nent != 0) 2450 return (dtlb_nent); 2451 2452 /* 2453 * No L2 TLB support for this size, try L1. 2454 */ 2455 if (cpi->cpi_xmaxeax >= 0x80000005) { 2456 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2457 2458 switch (pagesize) { 2459 case 4 * 1024: 2460 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2461 break; 2462 case 2 * 1024 * 1024: 2463 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2464 break; 2465 default: 2466 panic("unknown L1 d-TLB pagesize"); 2467 /*NOTREACHED*/ 2468 } 2469 } 2470 2471 return (dtlb_nent); 2472 } 2473 2474 /* 2475 * Return 0 if the erratum is not present or not applicable, positive 2476 * if it is, and negative if the status of the erratum is unknown. 2477 * 2478 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2479 * Processors" #25759, Rev 3.57, August 2005 2480 */ 2481 int 2482 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2483 { 2484 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2485 uint_t eax; 2486 2487 /* 2488 * Bail out if this CPU isn't an AMD CPU, or if it's 2489 * a legacy (32-bit) AMD CPU. 2490 */ 2491 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2492 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2493 cpi->cpi_family == 6) 2494 2495 return (0); 2496 2497 eax = cpi->cpi_std[1].cp_eax; 2498 2499 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2500 #define SH_B3(eax) (eax == 0xf51) 2501 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2502 2503 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2504 2505 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2506 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2507 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2508 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2509 2510 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2511 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2512 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2513 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2514 2515 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2516 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2517 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2518 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2519 #define BH_E4(eax) (eax == 0x20fb1) 2520 #define SH_E5(eax) (eax == 0x20f42) 2521 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2522 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2523 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2524 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2525 DH_E6(eax) || JH_E6(eax)) 2526 2527 switch (erratum) { 2528 case 1: 2529 return (cpi->cpi_family < 0x10); 2530 case 51: /* what does the asterisk mean? */ 2531 return (B(eax) || SH_C0(eax) || CG(eax)); 2532 case 52: 2533 return (B(eax)); 2534 case 57: 2535 return (cpi->cpi_family <= 0x10); 2536 case 58: 2537 return (B(eax)); 2538 case 60: 2539 return (cpi->cpi_family <= 0x10); 2540 case 61: 2541 case 62: 2542 case 63: 2543 case 64: 2544 case 65: 2545 case 66: 2546 case 68: 2547 case 69: 2548 case 70: 2549 case 71: 2550 return (B(eax)); 2551 case 72: 2552 return (SH_B0(eax)); 2553 case 74: 2554 return (B(eax)); 2555 case 75: 2556 return (cpi->cpi_family < 0x10); 2557 case 76: 2558 return (B(eax)); 2559 case 77: 2560 return (cpi->cpi_family <= 0x10); 2561 case 78: 2562 return (B(eax) || SH_C0(eax)); 2563 case 79: 2564 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2565 case 80: 2566 case 81: 2567 case 82: 2568 return (B(eax)); 2569 case 83: 2570 return (B(eax) || SH_C0(eax) || CG(eax)); 2571 case 85: 2572 return (cpi->cpi_family < 0x10); 2573 case 86: 2574 return (SH_C0(eax) || CG(eax)); 2575 case 88: 2576 #if !defined(__amd64) 2577 return (0); 2578 #else 2579 return (B(eax) || SH_C0(eax)); 2580 #endif 2581 case 89: 2582 return (cpi->cpi_family < 0x10); 2583 case 90: 2584 return (B(eax) || SH_C0(eax) || CG(eax)); 2585 case 91: 2586 case 92: 2587 return (B(eax) || SH_C0(eax)); 2588 case 93: 2589 return (SH_C0(eax)); 2590 case 94: 2591 return (B(eax) || SH_C0(eax) || CG(eax)); 2592 case 95: 2593 #if !defined(__amd64) 2594 return (0); 2595 #else 2596 return (B(eax) || SH_C0(eax)); 2597 #endif 2598 case 96: 2599 return (B(eax) || SH_C0(eax) || CG(eax)); 2600 case 97: 2601 case 98: 2602 return (SH_C0(eax) || CG(eax)); 2603 case 99: 2604 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2605 case 100: 2606 return (B(eax) || SH_C0(eax)); 2607 case 101: 2608 case 103: 2609 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2610 case 104: 2611 return (SH_C0(eax) || CG(eax) || D0(eax)); 2612 case 105: 2613 case 106: 2614 case 107: 2615 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2616 case 108: 2617 return (DH_CG(eax)); 2618 case 109: 2619 return (SH_C0(eax) || CG(eax) || D0(eax)); 2620 case 110: 2621 return (D0(eax) || EX(eax)); 2622 case 111: 2623 return (CG(eax)); 2624 case 112: 2625 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2626 case 113: 2627 return (eax == 0x20fc0); 2628 case 114: 2629 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2630 case 115: 2631 return (SH_E0(eax) || JH_E1(eax)); 2632 case 116: 2633 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2634 case 117: 2635 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2636 case 118: 2637 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2638 JH_E6(eax)); 2639 case 121: 2640 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2641 case 122: 2642 return (cpi->cpi_family < 0x10); 2643 case 123: 2644 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2645 case 131: 2646 return (cpi->cpi_family < 0x10); 2647 case 6336786: 2648 /* 2649 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2650 * if this is a K8 family or newer processor 2651 */ 2652 if (CPI_FAMILY(cpi) == 0xf) { 2653 struct cpuid_regs regs; 2654 regs.cp_eax = 0x80000007; 2655 (void) __cpuid_insn(®s); 2656 return (!(regs.cp_edx & 0x100)); 2657 } 2658 return (0); 2659 case 6323525: 2660 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2661 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2662 2663 default: 2664 return (-1); 2665 } 2666 } 2667 2668 static const char assoc_str[] = "associativity"; 2669 static const char line_str[] = "line-size"; 2670 static const char size_str[] = "size"; 2671 2672 static void 2673 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2674 uint32_t val) 2675 { 2676 char buf[128]; 2677 2678 /* 2679 * ndi_prop_update_int() is used because it is desirable for 2680 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2681 */ 2682 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2683 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2684 } 2685 2686 /* 2687 * Intel-style cache/tlb description 2688 * 2689 * Standard cpuid level 2 gives a randomly ordered 2690 * selection of tags that index into a table that describes 2691 * cache and tlb properties. 2692 */ 2693 2694 static const char l1_icache_str[] = "l1-icache"; 2695 static const char l1_dcache_str[] = "l1-dcache"; 2696 static const char l2_cache_str[] = "l2-cache"; 2697 static const char l3_cache_str[] = "l3-cache"; 2698 static const char itlb4k_str[] = "itlb-4K"; 2699 static const char dtlb4k_str[] = "dtlb-4K"; 2700 static const char itlb4M_str[] = "itlb-4M"; 2701 static const char dtlb4M_str[] = "dtlb-4M"; 2702 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2703 static const char dtlb44_str[] = "dtlb-4K-4M"; 2704 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2705 static const char sl2_cache_str[] = "sectored-l2-cache"; 2706 static const char itrace_str[] = "itrace-cache"; 2707 static const char sl3_cache_str[] = "sectored-l3-cache"; 2708 2709 static const struct cachetab { 2710 uint8_t ct_code; 2711 uint8_t ct_assoc; 2712 uint16_t ct_line_size; 2713 size_t ct_size; 2714 const char *ct_label; 2715 } intel_ctab[] = { 2716 /* maintain descending order! */ 2717 { 0xb4, 4, 0, 256, dtlb4k_str }, 2718 { 0xb3, 4, 0, 128, dtlb4k_str }, 2719 { 0xb0, 4, 0, 128, itlb4k_str }, 2720 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2721 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2722 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2723 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2724 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2725 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2726 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2727 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2728 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2729 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2730 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2731 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2732 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2733 { 0x73, 8, 0, 64*1024, itrace_str}, 2734 { 0x72, 8, 0, 32*1024, itrace_str}, 2735 { 0x71, 8, 0, 16*1024, itrace_str}, 2736 { 0x70, 8, 0, 12*1024, itrace_str}, 2737 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2738 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2739 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2740 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2741 { 0x5d, 0, 0, 256, dtlb44_str}, 2742 { 0x5c, 0, 0, 128, dtlb44_str}, 2743 { 0x5b, 0, 0, 64, dtlb44_str}, 2744 { 0x52, 0, 0, 256, itlb424_str}, 2745 { 0x51, 0, 0, 128, itlb424_str}, 2746 { 0x50, 0, 0, 64, itlb424_str}, 2747 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 2748 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 2749 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 2750 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 2751 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 2752 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 2753 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 2754 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 2755 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 2756 { 0x43, 4, 32, 512*1024, l2_cache_str}, 2757 { 0x42, 4, 32, 256*1024, l2_cache_str}, 2758 { 0x41, 4, 32, 128*1024, l2_cache_str}, 2759 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 2760 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 2761 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 2762 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 2763 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 2764 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 2765 { 0x30, 8, 64, 32*1024, l1_icache_str}, 2766 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 2767 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 2768 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 2769 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 2770 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 2771 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 2772 { 0x0b, 4, 0, 4, itlb4M_str}, 2773 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 2774 { 0x08, 4, 32, 16*1024, l1_icache_str}, 2775 { 0x06, 4, 32, 8*1024, l1_icache_str}, 2776 { 0x04, 4, 0, 8, dtlb4M_str}, 2777 { 0x03, 4, 0, 64, dtlb4k_str}, 2778 { 0x02, 4, 0, 2, itlb4M_str}, 2779 { 0x01, 4, 0, 32, itlb4k_str}, 2780 { 0 } 2781 }; 2782 2783 static const struct cachetab cyrix_ctab[] = { 2784 { 0x70, 4, 0, 32, "tlb-4K" }, 2785 { 0x80, 4, 16, 16*1024, "l1-cache" }, 2786 { 0 } 2787 }; 2788 2789 /* 2790 * Search a cache table for a matching entry 2791 */ 2792 static const struct cachetab * 2793 find_cacheent(const struct cachetab *ct, uint_t code) 2794 { 2795 if (code != 0) { 2796 for (; ct->ct_code != 0; ct++) 2797 if (ct->ct_code <= code) 2798 break; 2799 if (ct->ct_code == code) 2800 return (ct); 2801 } 2802 return (NULL); 2803 } 2804 2805 /* 2806 * Walk the cacheinfo descriptor, applying 'func' to every valid element 2807 * The walk is terminated if the walker returns non-zero. 2808 */ 2809 static void 2810 intel_walk_cacheinfo(struct cpuid_info *cpi, 2811 void *arg, int (*func)(void *, const struct cachetab *)) 2812 { 2813 const struct cachetab *ct; 2814 uint8_t *dp; 2815 int i; 2816 2817 if ((dp = cpi->cpi_cacheinfo) == NULL) 2818 return; 2819 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2820 /* 2821 * For overloaded descriptor 0x49 we use cpuid function 4 2822 * if supported by the current processor, to update 2823 * cache information. 2824 */ 2825 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4) { 2826 intel_cpuid_4_cache_info(arg, cpi); 2827 continue; 2828 } 2829 2830 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2831 if (func(arg, ct) != 0) 2832 break; 2833 } 2834 } 2835 } 2836 2837 /* 2838 * (Like the Intel one, except for Cyrix CPUs) 2839 */ 2840 static void 2841 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 2842 void *arg, int (*func)(void *, const struct cachetab *)) 2843 { 2844 const struct cachetab *ct; 2845 uint8_t *dp; 2846 int i; 2847 2848 if ((dp = cpi->cpi_cacheinfo) == NULL) 2849 return; 2850 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2851 /* 2852 * Search Cyrix-specific descriptor table first .. 2853 */ 2854 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 2855 if (func(arg, ct) != 0) 2856 break; 2857 continue; 2858 } 2859 /* 2860 * .. else fall back to the Intel one 2861 */ 2862 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2863 if (func(arg, ct) != 0) 2864 break; 2865 continue; 2866 } 2867 } 2868 } 2869 2870 /* 2871 * A cacheinfo walker that adds associativity, line-size, and size properties 2872 * to the devinfo node it is passed as an argument. 2873 */ 2874 static int 2875 add_cacheent_props(void *arg, const struct cachetab *ct) 2876 { 2877 dev_info_t *devi = arg; 2878 2879 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 2880 if (ct->ct_line_size != 0) 2881 add_cache_prop(devi, ct->ct_label, line_str, 2882 ct->ct_line_size); 2883 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 2884 return (0); 2885 } 2886 2887 /* 2888 * Add L2 or L3 cache-information using cpuid function 4. This 2889 * function is called from intel_walk_cacheinfo() when descriptor 2890 * 0x49 is encountered. 2891 */ 2892 static void 2893 intel_cpuid_4_cache_info(void *arg, struct cpuid_info *cpi) 2894 { 2895 uint32_t level, i; 2896 2897 struct cachetab ct; 2898 2899 for (i = 0; i < cpi->cpi_std_4_size; i++) { 2900 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 2901 2902 if (level == 2 || level == 3) { 2903 ct.ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 2904 ct.ct_line_size = 2905 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 2906 ct.ct_size = ct.ct_assoc * 2907 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 2908 ct.ct_line_size * 2909 (cpi->cpi_std_4[i]->cp_ecx + 1); 2910 2911 if (level == 2) { 2912 ct.ct_label = l2_cache_str; 2913 } else if (level == 3) { 2914 ct.ct_label = l3_cache_str; 2915 } 2916 2917 (void) add_cacheent_props(arg, 2918 (const struct cachetab *) (&ct)); 2919 } 2920 } 2921 } 2922 2923 static const char fully_assoc[] = "fully-associative?"; 2924 2925 /* 2926 * AMD style cache/tlb description 2927 * 2928 * Extended functions 5 and 6 directly describe properties of 2929 * tlbs and various cache levels. 2930 */ 2931 static void 2932 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 2933 { 2934 switch (assoc) { 2935 case 0: /* reserved; ignore */ 2936 break; 2937 default: 2938 add_cache_prop(devi, label, assoc_str, assoc); 2939 break; 2940 case 0xff: 2941 add_cache_prop(devi, label, fully_assoc, 1); 2942 break; 2943 } 2944 } 2945 2946 static void 2947 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 2948 { 2949 if (size == 0) 2950 return; 2951 add_cache_prop(devi, label, size_str, size); 2952 add_amd_assoc(devi, label, assoc); 2953 } 2954 2955 static void 2956 add_amd_cache(dev_info_t *devi, const char *label, 2957 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 2958 { 2959 if (size == 0 || line_size == 0) 2960 return; 2961 add_amd_assoc(devi, label, assoc); 2962 /* 2963 * Most AMD parts have a sectored cache. Multiple cache lines are 2964 * associated with each tag. A sector consists of all cache lines 2965 * associated with a tag. For example, the AMD K6-III has a sector 2966 * size of 2 cache lines per tag. 2967 */ 2968 if (lines_per_tag != 0) 2969 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 2970 add_cache_prop(devi, label, line_str, line_size); 2971 add_cache_prop(devi, label, size_str, size * 1024); 2972 } 2973 2974 static void 2975 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 2976 { 2977 switch (assoc) { 2978 case 0: /* off */ 2979 break; 2980 case 1: 2981 case 2: 2982 case 4: 2983 add_cache_prop(devi, label, assoc_str, assoc); 2984 break; 2985 case 6: 2986 add_cache_prop(devi, label, assoc_str, 8); 2987 break; 2988 case 8: 2989 add_cache_prop(devi, label, assoc_str, 16); 2990 break; 2991 case 0xf: 2992 add_cache_prop(devi, label, fully_assoc, 1); 2993 break; 2994 default: /* reserved; ignore */ 2995 break; 2996 } 2997 } 2998 2999 static void 3000 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3001 { 3002 if (size == 0 || assoc == 0) 3003 return; 3004 add_amd_l2_assoc(devi, label, assoc); 3005 add_cache_prop(devi, label, size_str, size); 3006 } 3007 3008 static void 3009 add_amd_l2_cache(dev_info_t *devi, const char *label, 3010 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3011 { 3012 if (size == 0 || assoc == 0 || line_size == 0) 3013 return; 3014 add_amd_l2_assoc(devi, label, assoc); 3015 if (lines_per_tag != 0) 3016 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3017 add_cache_prop(devi, label, line_str, line_size); 3018 add_cache_prop(devi, label, size_str, size * 1024); 3019 } 3020 3021 static void 3022 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3023 { 3024 struct cpuid_regs *cp; 3025 3026 if (cpi->cpi_xmaxeax < 0x80000005) 3027 return; 3028 cp = &cpi->cpi_extd[5]; 3029 3030 /* 3031 * 4M/2M L1 TLB configuration 3032 * 3033 * We report the size for 2M pages because AMD uses two 3034 * TLB entries for one 4M page. 3035 */ 3036 add_amd_tlb(devi, "dtlb-2M", 3037 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3038 add_amd_tlb(devi, "itlb-2M", 3039 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3040 3041 /* 3042 * 4K L1 TLB configuration 3043 */ 3044 3045 switch (cpi->cpi_vendor) { 3046 uint_t nentries; 3047 case X86_VENDOR_TM: 3048 if (cpi->cpi_family >= 5) { 3049 /* 3050 * Crusoe processors have 256 TLB entries, but 3051 * cpuid data format constrains them to only 3052 * reporting 255 of them. 3053 */ 3054 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3055 nentries = 256; 3056 /* 3057 * Crusoe processors also have a unified TLB 3058 */ 3059 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3060 nentries); 3061 break; 3062 } 3063 /*FALLTHROUGH*/ 3064 default: 3065 add_amd_tlb(devi, itlb4k_str, 3066 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3067 add_amd_tlb(devi, dtlb4k_str, 3068 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3069 break; 3070 } 3071 3072 /* 3073 * data L1 cache configuration 3074 */ 3075 3076 add_amd_cache(devi, l1_dcache_str, 3077 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3078 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3079 3080 /* 3081 * code L1 cache configuration 3082 */ 3083 3084 add_amd_cache(devi, l1_icache_str, 3085 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3086 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3087 3088 if (cpi->cpi_xmaxeax < 0x80000006) 3089 return; 3090 cp = &cpi->cpi_extd[6]; 3091 3092 /* Check for a unified L2 TLB for large pages */ 3093 3094 if (BITX(cp->cp_eax, 31, 16) == 0) 3095 add_amd_l2_tlb(devi, "l2-tlb-2M", 3096 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3097 else { 3098 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3099 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3100 add_amd_l2_tlb(devi, "l2-itlb-2M", 3101 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3102 } 3103 3104 /* Check for a unified L2 TLB for 4K pages */ 3105 3106 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3107 add_amd_l2_tlb(devi, "l2-tlb-4K", 3108 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3109 } else { 3110 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3111 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3112 add_amd_l2_tlb(devi, "l2-itlb-4K", 3113 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3114 } 3115 3116 add_amd_l2_cache(devi, l2_cache_str, 3117 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3118 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3119 } 3120 3121 /* 3122 * There are two basic ways that the x86 world describes it cache 3123 * and tlb architecture - Intel's way and AMD's way. 3124 * 3125 * Return which flavor of cache architecture we should use 3126 */ 3127 static int 3128 x86_which_cacheinfo(struct cpuid_info *cpi) 3129 { 3130 switch (cpi->cpi_vendor) { 3131 case X86_VENDOR_Intel: 3132 if (cpi->cpi_maxeax >= 2) 3133 return (X86_VENDOR_Intel); 3134 break; 3135 case X86_VENDOR_AMD: 3136 /* 3137 * The K5 model 1 was the first part from AMD that reported 3138 * cache sizes via extended cpuid functions. 3139 */ 3140 if (cpi->cpi_family > 5 || 3141 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3142 return (X86_VENDOR_AMD); 3143 break; 3144 case X86_VENDOR_TM: 3145 if (cpi->cpi_family >= 5) 3146 return (X86_VENDOR_AMD); 3147 /*FALLTHROUGH*/ 3148 default: 3149 /* 3150 * If they have extended CPU data for 0x80000005 3151 * then we assume they have AMD-format cache 3152 * information. 3153 * 3154 * If not, and the vendor happens to be Cyrix, 3155 * then try our-Cyrix specific handler. 3156 * 3157 * If we're not Cyrix, then assume we're using Intel's 3158 * table-driven format instead. 3159 */ 3160 if (cpi->cpi_xmaxeax >= 0x80000005) 3161 return (X86_VENDOR_AMD); 3162 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3163 return (X86_VENDOR_Cyrix); 3164 else if (cpi->cpi_maxeax >= 2) 3165 return (X86_VENDOR_Intel); 3166 break; 3167 } 3168 return (-1); 3169 } 3170 3171 /* 3172 * create a node for the given cpu under the prom root node. 3173 * Also, create a cpu node in the device tree. 3174 */ 3175 static dev_info_t *cpu_nex_devi = NULL; 3176 static kmutex_t cpu_node_lock; 3177 3178 /* 3179 * Called from post_startup() and mp_startup() 3180 */ 3181 void 3182 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3183 { 3184 dev_info_t *cpu_devi; 3185 int create; 3186 3187 mutex_enter(&cpu_node_lock); 3188 3189 /* 3190 * create a nexus node for all cpus identified as 'cpu_id' under 3191 * the root node. 3192 */ 3193 if (cpu_nex_devi == NULL) { 3194 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3195 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3196 mutex_exit(&cpu_node_lock); 3197 return; 3198 } 3199 (void) ndi_devi_online(cpu_nex_devi, 0); 3200 } 3201 3202 /* 3203 * create a child node for cpu identified as 'cpu_id' 3204 */ 3205 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3206 cpu_id); 3207 if (cpu_devi == NULL) { 3208 mutex_exit(&cpu_node_lock); 3209 return; 3210 } 3211 3212 /* device_type */ 3213 3214 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3215 "device_type", "cpu"); 3216 3217 /* reg */ 3218 3219 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3220 "reg", cpu_id); 3221 3222 /* cpu-mhz, and clock-frequency */ 3223 3224 if (cpu_freq > 0) { 3225 long long mul; 3226 3227 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3228 "cpu-mhz", cpu_freq); 3229 3230 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3231 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3232 "clock-frequency", (int)mul); 3233 } 3234 3235 (void) ndi_devi_online(cpu_devi, 0); 3236 3237 if ((x86_feature & X86_CPUID) == 0) { 3238 mutex_exit(&cpu_node_lock); 3239 return; 3240 } 3241 3242 /* vendor-id */ 3243 3244 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3245 "vendor-id", cpi->cpi_vendorstr); 3246 3247 if (cpi->cpi_maxeax == 0) { 3248 mutex_exit(&cpu_node_lock); 3249 return; 3250 } 3251 3252 /* 3253 * family, model, and step 3254 */ 3255 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3256 "family", CPI_FAMILY(cpi)); 3257 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3258 "cpu-model", CPI_MODEL(cpi)); 3259 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3260 "stepping-id", CPI_STEP(cpi)); 3261 3262 /* type */ 3263 3264 switch (cpi->cpi_vendor) { 3265 case X86_VENDOR_Intel: 3266 create = 1; 3267 break; 3268 default: 3269 create = 0; 3270 break; 3271 } 3272 if (create) 3273 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3274 "type", CPI_TYPE(cpi)); 3275 3276 /* ext-family */ 3277 3278 switch (cpi->cpi_vendor) { 3279 case X86_VENDOR_Intel: 3280 case X86_VENDOR_AMD: 3281 create = cpi->cpi_family >= 0xf; 3282 break; 3283 default: 3284 create = 0; 3285 break; 3286 } 3287 if (create) 3288 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3289 "ext-family", CPI_FAMILY_XTD(cpi)); 3290 3291 /* ext-model */ 3292 3293 switch (cpi->cpi_vendor) { 3294 case X86_VENDOR_Intel: 3295 create = CPI_MODEL(cpi) == 0xf; 3296 break; 3297 case X86_VENDOR_AMD: 3298 create = CPI_FAMILY(cpi) == 0xf; 3299 break; 3300 default: 3301 create = 0; 3302 break; 3303 } 3304 if (create) 3305 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3306 "ext-model", CPI_MODEL_XTD(cpi)); 3307 3308 /* generation */ 3309 3310 switch (cpi->cpi_vendor) { 3311 case X86_VENDOR_AMD: 3312 /* 3313 * AMD K5 model 1 was the first part to support this 3314 */ 3315 create = cpi->cpi_xmaxeax >= 0x80000001; 3316 break; 3317 default: 3318 create = 0; 3319 break; 3320 } 3321 if (create) 3322 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3323 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3324 3325 /* brand-id */ 3326 3327 switch (cpi->cpi_vendor) { 3328 case X86_VENDOR_Intel: 3329 /* 3330 * brand id first appeared on Pentium III Xeon model 8, 3331 * and Celeron model 8 processors and Opteron 3332 */ 3333 create = cpi->cpi_family > 6 || 3334 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3335 break; 3336 case X86_VENDOR_AMD: 3337 create = cpi->cpi_family >= 0xf; 3338 break; 3339 default: 3340 create = 0; 3341 break; 3342 } 3343 if (create && cpi->cpi_brandid != 0) { 3344 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3345 "brand-id", cpi->cpi_brandid); 3346 } 3347 3348 /* chunks, and apic-id */ 3349 3350 switch (cpi->cpi_vendor) { 3351 /* 3352 * first available on Pentium IV and Opteron (K8) 3353 */ 3354 case X86_VENDOR_Intel: 3355 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3356 break; 3357 case X86_VENDOR_AMD: 3358 create = cpi->cpi_family >= 0xf; 3359 break; 3360 default: 3361 create = 0; 3362 break; 3363 } 3364 if (create) { 3365 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3366 "chunks", CPI_CHUNKS(cpi)); 3367 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3368 "apic-id", CPI_APIC_ID(cpi)); 3369 if (cpi->cpi_chipid >= 0) { 3370 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3371 "chip#", cpi->cpi_chipid); 3372 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3373 "clog#", cpi->cpi_clogid); 3374 } 3375 } 3376 3377 /* cpuid-features */ 3378 3379 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3380 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3381 3382 3383 /* cpuid-features-ecx */ 3384 3385 switch (cpi->cpi_vendor) { 3386 case X86_VENDOR_Intel: 3387 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3388 break; 3389 default: 3390 create = 0; 3391 break; 3392 } 3393 if (create) 3394 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3395 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3396 3397 /* ext-cpuid-features */ 3398 3399 switch (cpi->cpi_vendor) { 3400 case X86_VENDOR_Intel: 3401 case X86_VENDOR_AMD: 3402 case X86_VENDOR_Cyrix: 3403 case X86_VENDOR_TM: 3404 case X86_VENDOR_Centaur: 3405 create = cpi->cpi_xmaxeax >= 0x80000001; 3406 break; 3407 default: 3408 create = 0; 3409 break; 3410 } 3411 if (create) { 3412 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3413 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3414 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3415 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3416 } 3417 3418 /* 3419 * Brand String first appeared in Intel Pentium IV, AMD K5 3420 * model 1, and Cyrix GXm. On earlier models we try and 3421 * simulate something similar .. so this string should always 3422 * same -something- about the processor, however lame. 3423 */ 3424 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3425 "brand-string", cpi->cpi_brandstr); 3426 3427 /* 3428 * Finally, cache and tlb information 3429 */ 3430 switch (x86_which_cacheinfo(cpi)) { 3431 case X86_VENDOR_Intel: 3432 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3433 break; 3434 case X86_VENDOR_Cyrix: 3435 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3436 break; 3437 case X86_VENDOR_AMD: 3438 amd_cache_info(cpi, cpu_devi); 3439 break; 3440 default: 3441 break; 3442 } 3443 3444 mutex_exit(&cpu_node_lock); 3445 } 3446 3447 struct l2info { 3448 int *l2i_csz; 3449 int *l2i_lsz; 3450 int *l2i_assoc; 3451 int l2i_ret; 3452 }; 3453 3454 /* 3455 * A cacheinfo walker that fetches the size, line-size and associativity 3456 * of the L2 cache 3457 */ 3458 static int 3459 intel_l2cinfo(void *arg, const struct cachetab *ct) 3460 { 3461 struct l2info *l2i = arg; 3462 int *ip; 3463 3464 if (ct->ct_label != l2_cache_str && 3465 ct->ct_label != sl2_cache_str) 3466 return (0); /* not an L2 -- keep walking */ 3467 3468 if ((ip = l2i->l2i_csz) != NULL) 3469 *ip = ct->ct_size; 3470 if ((ip = l2i->l2i_lsz) != NULL) 3471 *ip = ct->ct_line_size; 3472 if ((ip = l2i->l2i_assoc) != NULL) 3473 *ip = ct->ct_assoc; 3474 l2i->l2i_ret = ct->ct_size; 3475 return (1); /* was an L2 -- terminate walk */ 3476 } 3477 3478 /* 3479 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3480 * 3481 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3482 * value is the associativity, the associativity for the L2 cache and 3483 * tlb is encoded in the following table. The 4 bit L2 value serves as 3484 * an index into the amd_afd[] array to determine the associativity. 3485 * -1 is undefined. 0 is fully associative. 3486 */ 3487 3488 static int amd_afd[] = 3489 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3490 3491 static void 3492 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3493 { 3494 struct cpuid_regs *cp; 3495 uint_t size, assoc; 3496 int i; 3497 int *ip; 3498 3499 if (cpi->cpi_xmaxeax < 0x80000006) 3500 return; 3501 cp = &cpi->cpi_extd[6]; 3502 3503 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3504 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3505 uint_t cachesz = size * 1024; 3506 assoc = amd_afd[i]; 3507 3508 ASSERT(assoc != -1); 3509 3510 if ((ip = l2i->l2i_csz) != NULL) 3511 *ip = cachesz; 3512 if ((ip = l2i->l2i_lsz) != NULL) 3513 *ip = BITX(cp->cp_ecx, 7, 0); 3514 if ((ip = l2i->l2i_assoc) != NULL) 3515 *ip = assoc; 3516 l2i->l2i_ret = cachesz; 3517 } 3518 } 3519 3520 int 3521 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3522 { 3523 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3524 struct l2info __l2info, *l2i = &__l2info; 3525 3526 l2i->l2i_csz = csz; 3527 l2i->l2i_lsz = lsz; 3528 l2i->l2i_assoc = assoc; 3529 l2i->l2i_ret = -1; 3530 3531 switch (x86_which_cacheinfo(cpi)) { 3532 case X86_VENDOR_Intel: 3533 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3534 break; 3535 case X86_VENDOR_Cyrix: 3536 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3537 break; 3538 case X86_VENDOR_AMD: 3539 amd_l2cacheinfo(cpi, l2i); 3540 break; 3541 default: 3542 break; 3543 } 3544 return (l2i->l2i_ret); 3545 } 3546 3547 #if !defined(__xpv) 3548 3549 uint32_t * 3550 cpuid_mwait_alloc(cpu_t *cpu) 3551 { 3552 uint32_t *ret; 3553 size_t mwait_size; 3554 3555 ASSERT(cpuid_checkpass(cpu, 2)); 3556 3557 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3558 if (mwait_size == 0) 3559 return (NULL); 3560 3561 /* 3562 * kmem_alloc() returns cache line size aligned data for mwait_size 3563 * allocations. mwait_size is currently cache line sized. Neither 3564 * of these implementation details are guarantied to be true in the 3565 * future. 3566 * 3567 * First try allocating mwait_size as kmem_alloc() currently returns 3568 * correctly aligned memory. If kmem_alloc() does not return 3569 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3570 * 3571 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3572 * decide to free this memory. 3573 */ 3574 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3575 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3576 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3577 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3578 *ret = MWAIT_RUNNING; 3579 return (ret); 3580 } else { 3581 kmem_free(ret, mwait_size); 3582 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3583 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3584 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3585 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3586 *ret = MWAIT_RUNNING; 3587 return (ret); 3588 } 3589 } 3590 3591 void 3592 cpuid_mwait_free(cpu_t *cpu) 3593 { 3594 ASSERT(cpuid_checkpass(cpu, 2)); 3595 3596 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3597 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3598 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3599 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3600 } 3601 3602 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3603 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3604 } 3605 3606 #endif /* !__xpv */ 3607