1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Various routines to handle identification 30 * and classification of x86 processors. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/archsystm.h> 35 #include <sys/x86_archext.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 #include <sys/cmn_err.h> 39 #include <sys/sunddi.h> 40 #include <sys/sunndi.h> 41 #include <sys/cpuvar.h> 42 #include <sys/processor.h> 43 #include <sys/pg.h> 44 #include <sys/fp.h> 45 #include <sys/controlregs.h> 46 #include <sys/auxv_386.h> 47 #include <sys/bitmap.h> 48 #include <sys/memnode.h> 49 50 /* 51 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 52 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 53 * them accordingly. For most modern processors, feature detection occurs here 54 * in pass 1. 55 * 56 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 57 * for the boot CPU and does the basic analysis that the early kernel needs. 58 * x86_feature is set based on the return value of cpuid_pass1() of the boot 59 * CPU. 60 * 61 * Pass 1 includes: 62 * 63 * o Determining vendor/model/family/stepping and setting x86_type and 64 * x86_vendor accordingly. 65 * o Processing the feature flags returned by the cpuid instruction while 66 * applying any workarounds or tricks for the specific processor. 67 * o Mapping the feature flags into Solaris feature bits (X86_*). 68 * o Processing extended feature flags if supported by the processor, 69 * again while applying specific processor knowledge. 70 * o Determining the CMT characteristics of the system. 71 * 72 * Pass 1 is done on non-boot CPUs during their initialization and the results 73 * are used only as a meager attempt at ensuring that all processors within the 74 * system support the same features. 75 * 76 * Pass 2 of cpuid feature analysis happens just at the beginning 77 * of startup(). It just copies in and corrects the remainder 78 * of the cpuid data we depend on: standard cpuid functions that we didn't 79 * need for pass1 feature analysis, and extended cpuid functions beyond the 80 * simple feature processing done in pass1. 81 * 82 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 83 * particular kernel memory allocation has been made available. It creates a 84 * readable brand string based on the data collected in the first two passes. 85 * 86 * Pass 4 of cpuid analysis is invoked after post_startup() when all 87 * the support infrastructure for various hardware features has been 88 * initialized. It determines which processor features will be reported 89 * to userland via the aux vector. 90 * 91 * All passes are executed on all CPUs, but only the boot CPU determines what 92 * features the kernel will use. 93 * 94 * Much of the worst junk in this file is for the support of processors 95 * that didn't really implement the cpuid instruction properly. 96 * 97 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 98 * the pass numbers. Accordingly, changes to the pass code may require changes 99 * to the accessor code. 100 */ 101 102 uint_t x86_feature = 0; 103 uint_t x86_vendor = X86_VENDOR_IntelClone; 104 uint_t x86_type = X86_TYPE_OTHER; 105 106 uint_t pentiumpro_bug4046376; 107 uint_t pentiumpro_bug4064495; 108 109 uint_t enable486; 110 111 /* 112 * This set of strings are for processors rumored to support the cpuid 113 * instruction, and is used by locore.s to figure out how to set x86_vendor 114 */ 115 const char CyrixInstead[] = "CyrixInstead"; 116 117 /* 118 * monitor/mwait info. 119 */ 120 struct mwait_info { 121 size_t mon_min; /* min size to avoid missed wakeups */ 122 size_t mon_max; /* size to avoid false wakeups */ 123 uint32_t support; /* processor support of monitor/mwait */ 124 }; 125 126 /* 127 * These constants determine how many of the elements of the 128 * cpuid we cache in the cpuid_info data structure; the 129 * remaining elements are accessible via the cpuid instruction. 130 */ 131 132 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 133 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 134 135 struct cpuid_info { 136 uint_t cpi_pass; /* last pass completed */ 137 /* 138 * standard function information 139 */ 140 uint_t cpi_maxeax; /* fn 0: %eax */ 141 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 142 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 143 144 uint_t cpi_family; /* fn 1: extended family */ 145 uint_t cpi_model; /* fn 1: extended model */ 146 uint_t cpi_step; /* fn 1: stepping */ 147 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 148 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 149 int cpi_clogid; /* fn 1: %ebx: thread # */ 150 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 151 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 152 uint_t cpi_ncache; /* fn 2: number of elements */ 153 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 154 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 155 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 156 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 157 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 158 /* 159 * extended function information 160 */ 161 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 162 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 163 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 164 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 165 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 166 id_t cpi_coreid; 167 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 168 /* Intel: fn 4: %eax[31-26] */ 169 /* 170 * supported feature information 171 */ 172 uint32_t cpi_support[5]; 173 #define STD_EDX_FEATURES 0 174 #define AMD_EDX_FEATURES 1 175 #define TM_EDX_FEATURES 2 176 #define STD_ECX_FEATURES 3 177 #define AMD_ECX_FEATURES 4 178 /* 179 * Synthesized information, where known. 180 */ 181 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 182 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 183 uint32_t cpi_socket; /* Chip package/socket type */ 184 185 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 186 }; 187 188 189 static struct cpuid_info cpuid_info0; 190 191 /* 192 * These bit fields are defined by the Intel Application Note AP-485 193 * "Intel Processor Identification and the CPUID Instruction" 194 */ 195 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 196 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 197 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 198 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 199 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 200 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 201 202 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 203 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 204 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 205 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 206 207 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 208 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 209 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 210 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 211 212 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 213 #define CPI_XMAXEAX_MAX 0x80000100 214 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 215 216 /* 217 * Function 4 (Deterministic Cache Parameters) macros 218 * Defined by Intel Application Note AP-485 219 */ 220 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 221 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 222 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 223 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 224 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 225 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 226 227 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 228 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 229 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 230 231 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 232 233 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 234 235 236 /* 237 * A couple of shorthand macros to identify "later" P6-family chips 238 * like the Pentium M and Core. First, the "older" P6-based stuff 239 * (loosely defined as "pre-Pentium-4"): 240 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 241 */ 242 243 #define IS_LEGACY_P6(cpi) ( \ 244 cpi->cpi_family == 6 && \ 245 (cpi->cpi_model == 1 || \ 246 cpi->cpi_model == 3 || \ 247 cpi->cpi_model == 5 || \ 248 cpi->cpi_model == 6 || \ 249 cpi->cpi_model == 7 || \ 250 cpi->cpi_model == 8 || \ 251 cpi->cpi_model == 0xA || \ 252 cpi->cpi_model == 0xB) \ 253 ) 254 255 /* A "new F6" is everything with family 6 that's not the above */ 256 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 257 258 /* Extended family/model support */ 259 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 260 cpi->cpi_family >= 0xf) 261 262 /* 263 * AMD family 0xf socket types. 264 * First index is 0 for revs B thru E, 1 for F and G. 265 * Second index by (model & 0x3) 266 */ 267 static uint32_t amd_skts[2][4] = { 268 { 269 X86_SOCKET_754, /* 0b00 */ 270 X86_SOCKET_940, /* 0b01 */ 271 X86_SOCKET_754, /* 0b10 */ 272 X86_SOCKET_939 /* 0b11 */ 273 }, 274 { 275 X86_SOCKET_S1g1, /* 0b00 */ 276 X86_SOCKET_F1207, /* 0b01 */ 277 X86_SOCKET_UNKNOWN, /* 0b10 */ 278 X86_SOCKET_AM2 /* 0b11 */ 279 } 280 }; 281 282 /* 283 * Table for mapping AMD Family 0xf model/stepping combination to 284 * chip "revision" and socket type. Only rm_family 0xf is used at the 285 * moment, but AMD family 0x10 will extend the exsiting revision names 286 * so will likely also use this table. 287 * 288 * The first member of this array that matches a given family, extended model 289 * plus model range, and stepping range will be considered a match. 290 */ 291 static const struct amd_rev_mapent { 292 uint_t rm_family; 293 uint_t rm_modello; 294 uint_t rm_modelhi; 295 uint_t rm_steplo; 296 uint_t rm_stephi; 297 uint32_t rm_chiprev; 298 const char *rm_chiprevstr; 299 int rm_sktidx; 300 } amd_revmap[] = { 301 /* 302 * Rev B includes model 0x4 stepping 0 and model 0x5 stepping 0 and 1. 303 */ 304 { 0xf, 0x04, 0x04, 0x0, 0x0, X86_CHIPREV_AMD_F_REV_B, "B", 0 }, 305 { 0xf, 0x05, 0x05, 0x0, 0x1, X86_CHIPREV_AMD_F_REV_B, "B", 0 }, 306 /* 307 * Rev C0 includes model 0x4 stepping 8 and model 0x5 stepping 8 308 */ 309 { 0xf, 0x04, 0x05, 0x8, 0x8, X86_CHIPREV_AMD_F_REV_C0, "C0", 0 }, 310 /* 311 * Rev CG is the rest of extended model 0x0 - i.e., everything 312 * but the rev B and C0 combinations covered above. 313 */ 314 { 0xf, 0x00, 0x0f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_CG, "CG", 0 }, 315 /* 316 * Rev D has extended model 0x1. 317 */ 318 { 0xf, 0x10, 0x1f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_D, "D", 0 }, 319 /* 320 * Rev E has extended model 0x2. 321 * Extended model 0x3 is unused but available to grow into. 322 */ 323 { 0xf, 0x20, 0x3f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_E, "E", 0 }, 324 /* 325 * Rev F has extended models 0x4 and 0x5. 326 */ 327 { 0xf, 0x40, 0x5f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_F, "F", 1 }, 328 /* 329 * Rev G has extended model 0x6. 330 */ 331 { 0xf, 0x60, 0x6f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_G, "G", 1 }, 332 }; 333 334 /* 335 * Info for monitor/mwait idle loop. 336 * 337 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 338 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 339 * 2006. 340 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 341 * Documentation Updates" #33633, Rev 2.05, December 2006. 342 */ 343 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 344 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 345 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 346 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 347 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 348 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 349 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 350 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 351 /* 352 * Number of sub-cstates for a given c-state. 353 */ 354 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 355 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 356 357 static void intel_cpuid_4_cache_info(void *, struct cpuid_info *); 358 359 static void 360 synth_amd_info(struct cpuid_info *cpi) 361 { 362 const struct amd_rev_mapent *rmp; 363 uint_t family, model, step; 364 int i; 365 366 /* 367 * Currently only AMD family 0xf uses these fields. 368 */ 369 if (cpi->cpi_family != 0xf) 370 return; 371 372 family = cpi->cpi_family; 373 model = cpi->cpi_model; 374 step = cpi->cpi_step; 375 376 for (i = 0, rmp = amd_revmap; i < sizeof (amd_revmap) / sizeof (*rmp); 377 i++, rmp++) { 378 if (family == rmp->rm_family && 379 model >= rmp->rm_modello && model <= rmp->rm_modelhi && 380 step >= rmp->rm_steplo && step <= rmp->rm_stephi) { 381 cpi->cpi_chiprev = rmp->rm_chiprev; 382 cpi->cpi_chiprevstr = rmp->rm_chiprevstr; 383 cpi->cpi_socket = amd_skts[rmp->rm_sktidx][model & 0x3]; 384 return; 385 } 386 } 387 } 388 389 static void 390 synth_info(struct cpuid_info *cpi) 391 { 392 cpi->cpi_chiprev = X86_CHIPREV_UNKNOWN; 393 cpi->cpi_chiprevstr = "Unknown"; 394 cpi->cpi_socket = X86_SOCKET_UNKNOWN; 395 396 switch (cpi->cpi_vendor) { 397 case X86_VENDOR_AMD: 398 synth_amd_info(cpi); 399 break; 400 401 default: 402 break; 403 404 } 405 } 406 407 /* 408 * Apply up various platform-dependent restrictions where the 409 * underlying platform restrictions mean the CPU can be marked 410 * as less capable than its cpuid instruction would imply. 411 */ 412 413 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 414 415 /* 416 * Some undocumented ways of patching the results of the cpuid 417 * instruction to permit running Solaris 10 on future cpus that 418 * we don't currently support. Could be set to non-zero values 419 * via settings in eeprom. 420 */ 421 422 uint32_t cpuid_feature_ecx_include; 423 uint32_t cpuid_feature_ecx_exclude; 424 uint32_t cpuid_feature_edx_include; 425 uint32_t cpuid_feature_edx_exclude; 426 427 void 428 cpuid_alloc_space(cpu_t *cpu) 429 { 430 /* 431 * By convention, cpu0 is the boot cpu, which is set up 432 * before memory allocation is available. All other cpus get 433 * their cpuid_info struct allocated here. 434 */ 435 ASSERT(cpu->cpu_id != 0); 436 cpu->cpu_m.mcpu_cpi = 437 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 438 } 439 440 void 441 cpuid_free_space(cpu_t *cpu) 442 { 443 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 444 int i; 445 446 ASSERT(cpu->cpu_id != 0); 447 448 /* 449 * Free up any function 4 related dynamic storage 450 */ 451 for (i = 1; i < cpi->cpi_std_4_size; i++) 452 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 453 if (cpi->cpi_std_4_size > 0) 454 kmem_free(cpi->cpi_std_4, 455 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 456 457 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 458 } 459 460 uint_t 461 cpuid_pass1(cpu_t *cpu) 462 { 463 uint32_t mask_ecx, mask_edx; 464 uint_t feature = X86_CPUID; 465 struct cpuid_info *cpi; 466 struct cpuid_regs *cp; 467 int xcpuid; 468 469 470 /* 471 * Space statically allocated for cpu0, ensure pointer is set 472 */ 473 if (cpu->cpu_id == 0) 474 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 475 cpi = cpu->cpu_m.mcpu_cpi; 476 ASSERT(cpi != NULL); 477 cp = &cpi->cpi_std[0]; 478 cp->cp_eax = 0; 479 cpi->cpi_maxeax = __cpuid_insn(cp); 480 { 481 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 482 *iptr++ = cp->cp_ebx; 483 *iptr++ = cp->cp_edx; 484 *iptr++ = cp->cp_ecx; 485 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 486 } 487 488 /* 489 * Map the vendor string to a type code 490 */ 491 if (strcmp(cpi->cpi_vendorstr, "GenuineIntel") == 0) 492 cpi->cpi_vendor = X86_VENDOR_Intel; 493 else if (strcmp(cpi->cpi_vendorstr, "AuthenticAMD") == 0) 494 cpi->cpi_vendor = X86_VENDOR_AMD; 495 else if (strcmp(cpi->cpi_vendorstr, "GenuineTMx86") == 0) 496 cpi->cpi_vendor = X86_VENDOR_TM; 497 else if (strcmp(cpi->cpi_vendorstr, CyrixInstead) == 0) 498 /* 499 * CyrixInstead is a variable used by the Cyrix detection code 500 * in locore. 501 */ 502 cpi->cpi_vendor = X86_VENDOR_Cyrix; 503 else if (strcmp(cpi->cpi_vendorstr, "UMC UMC UMC ") == 0) 504 cpi->cpi_vendor = X86_VENDOR_UMC; 505 else if (strcmp(cpi->cpi_vendorstr, "NexGenDriven") == 0) 506 cpi->cpi_vendor = X86_VENDOR_NexGen; 507 else if (strcmp(cpi->cpi_vendorstr, "CentaurHauls") == 0) 508 cpi->cpi_vendor = X86_VENDOR_Centaur; 509 else if (strcmp(cpi->cpi_vendorstr, "RiseRiseRise") == 0) 510 cpi->cpi_vendor = X86_VENDOR_Rise; 511 else if (strcmp(cpi->cpi_vendorstr, "SiS SiS SiS ") == 0) 512 cpi->cpi_vendor = X86_VENDOR_SiS; 513 else if (strcmp(cpi->cpi_vendorstr, "Geode by NSC") == 0) 514 cpi->cpi_vendor = X86_VENDOR_NSC; 515 else 516 cpi->cpi_vendor = X86_VENDOR_IntelClone; 517 518 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 519 520 /* 521 * Limit the range in case of weird hardware 522 */ 523 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 524 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 525 if (cpi->cpi_maxeax < 1) 526 goto pass1_done; 527 528 cp = &cpi->cpi_std[1]; 529 cp->cp_eax = 1; 530 (void) __cpuid_insn(cp); 531 532 /* 533 * Extract identifying constants for easy access. 534 */ 535 cpi->cpi_model = CPI_MODEL(cpi); 536 cpi->cpi_family = CPI_FAMILY(cpi); 537 538 if (cpi->cpi_family == 0xf) 539 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 540 541 /* 542 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 543 * Intel, and presumably everyone else, uses model == 0xf, as 544 * one would expect (max value means possible overflow). Sigh. 545 */ 546 547 switch (cpi->cpi_vendor) { 548 case X86_VENDOR_Intel: 549 if (IS_EXTENDED_MODEL_INTEL(cpi)) 550 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 551 break; 552 case X86_VENDOR_AMD: 553 if (CPI_FAMILY(cpi) == 0xf) 554 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 555 break; 556 default: 557 if (cpi->cpi_model == 0xf) 558 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 559 break; 560 } 561 562 cpi->cpi_step = CPI_STEP(cpi); 563 cpi->cpi_brandid = CPI_BRANDID(cpi); 564 565 /* 566 * *default* assumptions: 567 * - believe %edx feature word 568 * - ignore %ecx feature word 569 * - 32-bit virtual and physical addressing 570 */ 571 mask_edx = 0xffffffff; 572 mask_ecx = 0; 573 574 cpi->cpi_pabits = cpi->cpi_vabits = 32; 575 576 switch (cpi->cpi_vendor) { 577 case X86_VENDOR_Intel: 578 if (cpi->cpi_family == 5) 579 x86_type = X86_TYPE_P5; 580 else if (IS_LEGACY_P6(cpi)) { 581 x86_type = X86_TYPE_P6; 582 pentiumpro_bug4046376 = 1; 583 pentiumpro_bug4064495 = 1; 584 /* 585 * Clear the SEP bit when it was set erroneously 586 */ 587 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 588 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 589 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 590 x86_type = X86_TYPE_P4; 591 /* 592 * We don't currently depend on any of the %ecx 593 * features until Prescott, so we'll only check 594 * this from P4 onwards. We might want to revisit 595 * that idea later. 596 */ 597 mask_ecx = 0xffffffff; 598 } else if (cpi->cpi_family > 0xf) 599 mask_ecx = 0xffffffff; 600 /* 601 * We don't support MONITOR/MWAIT if leaf 5 is not available 602 * to obtain the monitor linesize. 603 */ 604 if (cpi->cpi_maxeax < 5) 605 mask_ecx &= ~CPUID_INTC_ECX_MON; 606 break; 607 case X86_VENDOR_IntelClone: 608 default: 609 break; 610 case X86_VENDOR_AMD: 611 #if defined(OPTERON_ERRATUM_108) 612 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 613 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 614 cpi->cpi_model = 0xc; 615 } else 616 #endif 617 if (cpi->cpi_family == 5) { 618 /* 619 * AMD K5 and K6 620 * 621 * These CPUs have an incomplete implementation 622 * of MCA/MCE which we mask away. 623 */ 624 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 625 626 /* 627 * Model 0 uses the wrong (APIC) bit 628 * to indicate PGE. Fix it here. 629 */ 630 if (cpi->cpi_model == 0) { 631 if (cp->cp_edx & 0x200) { 632 cp->cp_edx &= ~0x200; 633 cp->cp_edx |= CPUID_INTC_EDX_PGE; 634 } 635 } 636 637 /* 638 * Early models had problems w/ MMX; disable. 639 */ 640 if (cpi->cpi_model < 6) 641 mask_edx &= ~CPUID_INTC_EDX_MMX; 642 } 643 644 /* 645 * For newer families, SSE3 and CX16, at least, are valid; 646 * enable all 647 */ 648 if (cpi->cpi_family >= 0xf) 649 mask_ecx = 0xffffffff; 650 /* 651 * We don't support MONITOR/MWAIT if leaf 5 is not available 652 * to obtain the monitor linesize. 653 */ 654 if (cpi->cpi_maxeax < 5) 655 mask_ecx &= ~CPUID_INTC_ECX_MON; 656 break; 657 case X86_VENDOR_TM: 658 /* 659 * workaround the NT workaround in CMS 4.1 660 */ 661 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 662 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 663 cp->cp_edx |= CPUID_INTC_EDX_CX8; 664 break; 665 case X86_VENDOR_Centaur: 666 /* 667 * workaround the NT workarounds again 668 */ 669 if (cpi->cpi_family == 6) 670 cp->cp_edx |= CPUID_INTC_EDX_CX8; 671 break; 672 case X86_VENDOR_Cyrix: 673 /* 674 * We rely heavily on the probing in locore 675 * to actually figure out what parts, if any, 676 * of the Cyrix cpuid instruction to believe. 677 */ 678 switch (x86_type) { 679 case X86_TYPE_CYRIX_486: 680 mask_edx = 0; 681 break; 682 case X86_TYPE_CYRIX_6x86: 683 mask_edx = 0; 684 break; 685 case X86_TYPE_CYRIX_6x86L: 686 mask_edx = 687 CPUID_INTC_EDX_DE | 688 CPUID_INTC_EDX_CX8; 689 break; 690 case X86_TYPE_CYRIX_6x86MX: 691 mask_edx = 692 CPUID_INTC_EDX_DE | 693 CPUID_INTC_EDX_MSR | 694 CPUID_INTC_EDX_CX8 | 695 CPUID_INTC_EDX_PGE | 696 CPUID_INTC_EDX_CMOV | 697 CPUID_INTC_EDX_MMX; 698 break; 699 case X86_TYPE_CYRIX_GXm: 700 mask_edx = 701 CPUID_INTC_EDX_MSR | 702 CPUID_INTC_EDX_CX8 | 703 CPUID_INTC_EDX_CMOV | 704 CPUID_INTC_EDX_MMX; 705 break; 706 case X86_TYPE_CYRIX_MediaGX: 707 break; 708 case X86_TYPE_CYRIX_MII: 709 case X86_TYPE_VIA_CYRIX_III: 710 mask_edx = 711 CPUID_INTC_EDX_DE | 712 CPUID_INTC_EDX_TSC | 713 CPUID_INTC_EDX_MSR | 714 CPUID_INTC_EDX_CX8 | 715 CPUID_INTC_EDX_PGE | 716 CPUID_INTC_EDX_CMOV | 717 CPUID_INTC_EDX_MMX; 718 break; 719 default: 720 break; 721 } 722 break; 723 } 724 725 /* 726 * Now we've figured out the masks that determine 727 * which bits we choose to believe, apply the masks 728 * to the feature words, then map the kernel's view 729 * of these feature words into its feature word. 730 */ 731 cp->cp_edx &= mask_edx; 732 cp->cp_ecx &= mask_ecx; 733 734 /* 735 * apply any platform restrictions (we don't call this 736 * immediately after __cpuid_insn here, because we need the 737 * workarounds applied above first) 738 */ 739 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 740 741 /* 742 * fold in overrides from the "eeprom" mechanism 743 */ 744 cp->cp_edx |= cpuid_feature_edx_include; 745 cp->cp_edx &= ~cpuid_feature_edx_exclude; 746 747 cp->cp_ecx |= cpuid_feature_ecx_include; 748 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 749 750 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 751 feature |= X86_LARGEPAGE; 752 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 753 feature |= X86_TSC; 754 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 755 feature |= X86_MSR; 756 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 757 feature |= X86_MTRR; 758 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 759 feature |= X86_PGE; 760 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 761 feature |= X86_CMOV; 762 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 763 feature |= X86_MMX; 764 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 765 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 766 feature |= X86_MCA; 767 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 768 feature |= X86_PAE; 769 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 770 feature |= X86_CX8; 771 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 772 feature |= X86_CX16; 773 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 774 feature |= X86_PAT; 775 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 776 feature |= X86_SEP; 777 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 778 /* 779 * In our implementation, fxsave/fxrstor 780 * are prerequisites before we'll even 781 * try and do SSE things. 782 */ 783 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 784 feature |= X86_SSE; 785 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 786 feature |= X86_SSE2; 787 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 788 feature |= X86_SSE3; 789 } 790 if (cp->cp_edx & CPUID_INTC_EDX_DE) 791 feature |= X86_DE; 792 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 793 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 794 feature |= X86_MWAIT; 795 } 796 797 if (feature & X86_PAE) 798 cpi->cpi_pabits = 36; 799 800 /* 801 * Hyperthreading configuration is slightly tricky on Intel 802 * and pure clones, and even trickier on AMD. 803 * 804 * (AMD chose to set the HTT bit on their CMP processors, 805 * even though they're not actually hyperthreaded. Thus it 806 * takes a bit more work to figure out what's really going 807 * on ... see the handling of the CMP_LGCY bit below) 808 */ 809 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 810 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 811 if (cpi->cpi_ncpu_per_chip > 1) 812 feature |= X86_HTT; 813 } else { 814 cpi->cpi_ncpu_per_chip = 1; 815 } 816 817 /* 818 * Work on the "extended" feature information, doing 819 * some basic initialization for cpuid_pass2() 820 */ 821 xcpuid = 0; 822 switch (cpi->cpi_vendor) { 823 case X86_VENDOR_Intel: 824 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 825 xcpuid++; 826 break; 827 case X86_VENDOR_AMD: 828 if (cpi->cpi_family > 5 || 829 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 830 xcpuid++; 831 break; 832 case X86_VENDOR_Cyrix: 833 /* 834 * Only these Cyrix CPUs are -known- to support 835 * extended cpuid operations. 836 */ 837 if (x86_type == X86_TYPE_VIA_CYRIX_III || 838 x86_type == X86_TYPE_CYRIX_GXm) 839 xcpuid++; 840 break; 841 case X86_VENDOR_Centaur: 842 case X86_VENDOR_TM: 843 default: 844 xcpuid++; 845 break; 846 } 847 848 if (xcpuid) { 849 cp = &cpi->cpi_extd[0]; 850 cp->cp_eax = 0x80000000; 851 cpi->cpi_xmaxeax = __cpuid_insn(cp); 852 } 853 854 if (cpi->cpi_xmaxeax & 0x80000000) { 855 856 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 857 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 858 859 switch (cpi->cpi_vendor) { 860 case X86_VENDOR_Intel: 861 case X86_VENDOR_AMD: 862 if (cpi->cpi_xmaxeax < 0x80000001) 863 break; 864 cp = &cpi->cpi_extd[1]; 865 cp->cp_eax = 0x80000001; 866 (void) __cpuid_insn(cp); 867 868 if (cpi->cpi_vendor == X86_VENDOR_AMD && 869 cpi->cpi_family == 5 && 870 cpi->cpi_model == 6 && 871 cpi->cpi_step == 6) { 872 /* 873 * K6 model 6 uses bit 10 to indicate SYSC 874 * Later models use bit 11. Fix it here. 875 */ 876 if (cp->cp_edx & 0x400) { 877 cp->cp_edx &= ~0x400; 878 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 879 } 880 } 881 882 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 883 884 /* 885 * Compute the additions to the kernel's feature word. 886 */ 887 if (cp->cp_edx & CPUID_AMD_EDX_NX) 888 feature |= X86_NX; 889 890 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 891 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 892 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 893 feature |= X86_SSE4A; 894 895 /* 896 * If both the HTT and CMP_LGCY bits are set, 897 * then we're not actually HyperThreaded. Read 898 * "AMD CPUID Specification" for more details. 899 */ 900 if (cpi->cpi_vendor == X86_VENDOR_AMD && 901 (feature & X86_HTT) && 902 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 903 feature &= ~X86_HTT; 904 feature |= X86_CMP; 905 } 906 #if defined(__amd64) 907 /* 908 * It's really tricky to support syscall/sysret in 909 * the i386 kernel; we rely on sysenter/sysexit 910 * instead. In the amd64 kernel, things are -way- 911 * better. 912 */ 913 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 914 feature |= X86_ASYSC; 915 916 /* 917 * While we're thinking about system calls, note 918 * that AMD processors don't support sysenter 919 * in long mode at all, so don't try to program them. 920 */ 921 if (x86_vendor == X86_VENDOR_AMD) 922 feature &= ~X86_SEP; 923 #endif 924 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 925 feature |= X86_TSCP; 926 break; 927 default: 928 break; 929 } 930 931 /* 932 * Get CPUID data about processor cores and hyperthreads. 933 */ 934 switch (cpi->cpi_vendor) { 935 case X86_VENDOR_Intel: 936 if (cpi->cpi_maxeax >= 4) { 937 cp = &cpi->cpi_std[4]; 938 cp->cp_eax = 4; 939 cp->cp_ecx = 0; 940 (void) __cpuid_insn(cp); 941 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 942 } 943 /*FALLTHROUGH*/ 944 case X86_VENDOR_AMD: 945 if (cpi->cpi_xmaxeax < 0x80000008) 946 break; 947 cp = &cpi->cpi_extd[8]; 948 cp->cp_eax = 0x80000008; 949 (void) __cpuid_insn(cp); 950 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 951 952 /* 953 * Virtual and physical address limits from 954 * cpuid override previously guessed values. 955 */ 956 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 957 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 958 break; 959 default: 960 break; 961 } 962 963 /* 964 * Derive the number of cores per chip 965 */ 966 switch (cpi->cpi_vendor) { 967 case X86_VENDOR_Intel: 968 if (cpi->cpi_maxeax < 4) { 969 cpi->cpi_ncore_per_chip = 1; 970 break; 971 } else { 972 cpi->cpi_ncore_per_chip = 973 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 974 } 975 break; 976 case X86_VENDOR_AMD: 977 if (cpi->cpi_xmaxeax < 0x80000008) { 978 cpi->cpi_ncore_per_chip = 1; 979 break; 980 } else { 981 cpi->cpi_ncore_per_chip = 982 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 983 } 984 break; 985 default: 986 cpi->cpi_ncore_per_chip = 1; 987 break; 988 } 989 } 990 991 /* 992 * If more than one core, then this processor is CMP. 993 */ 994 if (cpi->cpi_ncore_per_chip > 1) 995 feature |= X86_CMP; 996 997 /* 998 * If the number of cores is the same as the number 999 * of CPUs, then we cannot have HyperThreading. 1000 */ 1001 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1002 feature &= ~X86_HTT; 1003 1004 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1005 /* 1006 * Single-core single-threaded processors. 1007 */ 1008 cpi->cpi_chipid = -1; 1009 cpi->cpi_clogid = 0; 1010 cpi->cpi_coreid = cpu->cpu_id; 1011 } else if (cpi->cpi_ncpu_per_chip > 1) { 1012 uint_t i; 1013 uint_t chipid_shift = 0; 1014 uint_t coreid_shift = 0; 1015 uint_t apic_id = CPI_APIC_ID(cpi); 1016 1017 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1018 chipid_shift++; 1019 cpi->cpi_chipid = apic_id >> chipid_shift; 1020 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1021 1022 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1023 if (feature & X86_CMP) { 1024 /* 1025 * Multi-core (and possibly multi-threaded) 1026 * processors. 1027 */ 1028 uint_t ncpu_per_core; 1029 if (cpi->cpi_ncore_per_chip == 1) 1030 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1031 else if (cpi->cpi_ncore_per_chip > 1) 1032 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1033 cpi->cpi_ncore_per_chip; 1034 /* 1035 * 8bit APIC IDs on dual core Pentiums 1036 * look like this: 1037 * 1038 * +-----------------------+------+------+ 1039 * | Physical Package ID | MC | HT | 1040 * +-----------------------+------+------+ 1041 * <------- chipid --------> 1042 * <------- coreid ---------------> 1043 * <--- clogid --> 1044 * 1045 * Where the number of bits necessary to 1046 * represent MC and HT fields together equals 1047 * to the minimum number of bits necessary to 1048 * store the value of cpi->cpi_ncpu_per_chip. 1049 * Of those bits, the MC part uses the number 1050 * of bits necessary to store the value of 1051 * cpi->cpi_ncore_per_chip. 1052 */ 1053 for (i = 1; i < ncpu_per_core; i <<= 1) 1054 coreid_shift++; 1055 cpi->cpi_coreid = apic_id >> coreid_shift; 1056 } else if (feature & X86_HTT) { 1057 /* 1058 * Single-core multi-threaded processors. 1059 */ 1060 cpi->cpi_coreid = cpi->cpi_chipid; 1061 } 1062 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1063 /* 1064 * AMD currently only has dual-core processors with 1065 * single-threaded cores. If they ever release 1066 * multi-threaded processors, then this code 1067 * will have to be updated. 1068 */ 1069 cpi->cpi_coreid = cpu->cpu_id; 1070 } else { 1071 /* 1072 * All other processors are currently 1073 * assumed to have single cores. 1074 */ 1075 cpi->cpi_coreid = cpi->cpi_chipid; 1076 } 1077 } 1078 1079 /* 1080 * Synthesize chip "revision" and socket type 1081 */ 1082 synth_info(cpi); 1083 1084 pass1_done: 1085 cpi->cpi_pass = 1; 1086 return (feature); 1087 } 1088 1089 /* 1090 * Make copies of the cpuid table entries we depend on, in 1091 * part for ease of parsing now, in part so that we have only 1092 * one place to correct any of it, in part for ease of 1093 * later export to userland, and in part so we can look at 1094 * this stuff in a crash dump. 1095 */ 1096 1097 /*ARGSUSED*/ 1098 void 1099 cpuid_pass2(cpu_t *cpu) 1100 { 1101 uint_t n, nmax; 1102 int i; 1103 struct cpuid_regs *cp; 1104 uint8_t *dp; 1105 uint32_t *iptr; 1106 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1107 1108 ASSERT(cpi->cpi_pass == 1); 1109 1110 if (cpi->cpi_maxeax < 1) 1111 goto pass2_done; 1112 1113 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1114 nmax = NMAX_CPI_STD; 1115 /* 1116 * (We already handled n == 0 and n == 1 in pass 1) 1117 */ 1118 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1119 cp->cp_eax = n; 1120 1121 /* 1122 * CPUID function 4 expects %ecx to be initialized 1123 * with an index which indicates which cache to return 1124 * information about. The OS is expected to call function 4 1125 * with %ecx set to 0, 1, 2, ... until it returns with 1126 * EAX[4:0] set to 0, which indicates there are no more 1127 * caches. 1128 * 1129 * Here, populate cpi_std[4] with the information returned by 1130 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1131 * when dynamic memory allocation becomes available. 1132 * 1133 * Note: we need to explicitly initialize %ecx here, since 1134 * function 4 may have been previously invoked. 1135 */ 1136 if (n == 4) 1137 cp->cp_ecx = 0; 1138 1139 (void) __cpuid_insn(cp); 1140 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1141 switch (n) { 1142 case 2: 1143 /* 1144 * "the lower 8 bits of the %eax register 1145 * contain a value that identifies the number 1146 * of times the cpuid [instruction] has to be 1147 * executed to obtain a complete image of the 1148 * processor's caching systems." 1149 * 1150 * How *do* they make this stuff up? 1151 */ 1152 cpi->cpi_ncache = sizeof (*cp) * 1153 BITX(cp->cp_eax, 7, 0); 1154 if (cpi->cpi_ncache == 0) 1155 break; 1156 cpi->cpi_ncache--; /* skip count byte */ 1157 1158 /* 1159 * Well, for now, rather than attempt to implement 1160 * this slightly dubious algorithm, we just look 1161 * at the first 15 .. 1162 */ 1163 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1164 cpi->cpi_ncache = sizeof (*cp) - 1; 1165 1166 dp = cpi->cpi_cacheinfo; 1167 if (BITX(cp->cp_eax, 31, 31) == 0) { 1168 uint8_t *p = (void *)&cp->cp_eax; 1169 for (i = 1; i < 3; i++) 1170 if (p[i] != 0) 1171 *dp++ = p[i]; 1172 } 1173 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1174 uint8_t *p = (void *)&cp->cp_ebx; 1175 for (i = 0; i < 4; i++) 1176 if (p[i] != 0) 1177 *dp++ = p[i]; 1178 } 1179 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1180 uint8_t *p = (void *)&cp->cp_ecx; 1181 for (i = 0; i < 4; i++) 1182 if (p[i] != 0) 1183 *dp++ = p[i]; 1184 } 1185 if (BITX(cp->cp_edx, 31, 31) == 0) { 1186 uint8_t *p = (void *)&cp->cp_edx; 1187 for (i = 0; i < 4; i++) 1188 if (p[i] != 0) 1189 *dp++ = p[i]; 1190 } 1191 break; 1192 1193 case 3: /* Processor serial number, if PSN supported */ 1194 break; 1195 1196 case 4: /* Deterministic cache parameters */ 1197 break; 1198 1199 case 5: /* Monitor/Mwait parameters */ 1200 1201 /* 1202 * check cpi_mwait.support which was set in cpuid_pass1 1203 */ 1204 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1205 break; 1206 1207 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1208 cpi->cpi_mwait.mon_max = (size_t)MWAIT_SIZE_MAX(cpi); 1209 if (MWAIT_EXTENSION(cpi)) { 1210 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1211 if (MWAIT_INT_ENABLE(cpi)) 1212 cpi->cpi_mwait.support |= 1213 MWAIT_ECX_INT_ENABLE; 1214 } 1215 break; 1216 default: 1217 break; 1218 } 1219 } 1220 1221 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1222 goto pass2_done; 1223 1224 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1225 nmax = NMAX_CPI_EXTD; 1226 /* 1227 * Copy the extended properties, fixing them as we go. 1228 * (We already handled n == 0 and n == 1 in pass 1) 1229 */ 1230 iptr = (void *)cpi->cpi_brandstr; 1231 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1232 cp->cp_eax = 0x80000000 + n; 1233 (void) __cpuid_insn(cp); 1234 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1235 switch (n) { 1236 case 2: 1237 case 3: 1238 case 4: 1239 /* 1240 * Extract the brand string 1241 */ 1242 *iptr++ = cp->cp_eax; 1243 *iptr++ = cp->cp_ebx; 1244 *iptr++ = cp->cp_ecx; 1245 *iptr++ = cp->cp_edx; 1246 break; 1247 case 5: 1248 switch (cpi->cpi_vendor) { 1249 case X86_VENDOR_AMD: 1250 /* 1251 * The Athlon and Duron were the first 1252 * parts to report the sizes of the 1253 * TLB for large pages. Before then, 1254 * we don't trust the data. 1255 */ 1256 if (cpi->cpi_family < 6 || 1257 (cpi->cpi_family == 6 && 1258 cpi->cpi_model < 1)) 1259 cp->cp_eax = 0; 1260 break; 1261 default: 1262 break; 1263 } 1264 break; 1265 case 6: 1266 switch (cpi->cpi_vendor) { 1267 case X86_VENDOR_AMD: 1268 /* 1269 * The Athlon and Duron were the first 1270 * AMD parts with L2 TLB's. 1271 * Before then, don't trust the data. 1272 */ 1273 if (cpi->cpi_family < 6 || 1274 cpi->cpi_family == 6 && 1275 cpi->cpi_model < 1) 1276 cp->cp_eax = cp->cp_ebx = 0; 1277 /* 1278 * AMD Duron rev A0 reports L2 1279 * cache size incorrectly as 1K 1280 * when it is really 64K 1281 */ 1282 if (cpi->cpi_family == 6 && 1283 cpi->cpi_model == 3 && 1284 cpi->cpi_step == 0) { 1285 cp->cp_ecx &= 0xffff; 1286 cp->cp_ecx |= 0x400000; 1287 } 1288 break; 1289 case X86_VENDOR_Cyrix: /* VIA C3 */ 1290 /* 1291 * VIA C3 processors are a bit messed 1292 * up w.r.t. encoding cache sizes in %ecx 1293 */ 1294 if (cpi->cpi_family != 6) 1295 break; 1296 /* 1297 * model 7 and 8 were incorrectly encoded 1298 * 1299 * xxx is model 8 really broken? 1300 */ 1301 if (cpi->cpi_model == 7 || 1302 cpi->cpi_model == 8) 1303 cp->cp_ecx = 1304 BITX(cp->cp_ecx, 31, 24) << 16 | 1305 BITX(cp->cp_ecx, 23, 16) << 12 | 1306 BITX(cp->cp_ecx, 15, 8) << 8 | 1307 BITX(cp->cp_ecx, 7, 0); 1308 /* 1309 * model 9 stepping 1 has wrong associativity 1310 */ 1311 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1312 cp->cp_ecx |= 8 << 12; 1313 break; 1314 case X86_VENDOR_Intel: 1315 /* 1316 * Extended L2 Cache features function. 1317 * First appeared on Prescott. 1318 */ 1319 default: 1320 break; 1321 } 1322 break; 1323 default: 1324 break; 1325 } 1326 } 1327 1328 pass2_done: 1329 cpi->cpi_pass = 2; 1330 } 1331 1332 static const char * 1333 intel_cpubrand(const struct cpuid_info *cpi) 1334 { 1335 int i; 1336 1337 if ((x86_feature & X86_CPUID) == 0 || 1338 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1339 return ("i486"); 1340 1341 switch (cpi->cpi_family) { 1342 case 5: 1343 return ("Intel Pentium(r)"); 1344 case 6: 1345 switch (cpi->cpi_model) { 1346 uint_t celeron, xeon; 1347 const struct cpuid_regs *cp; 1348 case 0: 1349 case 1: 1350 case 2: 1351 return ("Intel Pentium(r) Pro"); 1352 case 3: 1353 case 4: 1354 return ("Intel Pentium(r) II"); 1355 case 6: 1356 return ("Intel Celeron(r)"); 1357 case 5: 1358 case 7: 1359 celeron = xeon = 0; 1360 cp = &cpi->cpi_std[2]; /* cache info */ 1361 1362 for (i = 1; i < 3; i++) { 1363 uint_t tmp; 1364 1365 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1366 if (tmp == 0x40) 1367 celeron++; 1368 if (tmp >= 0x44 && tmp <= 0x45) 1369 xeon++; 1370 } 1371 1372 for (i = 0; i < 2; i++) { 1373 uint_t tmp; 1374 1375 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1376 if (tmp == 0x40) 1377 celeron++; 1378 else if (tmp >= 0x44 && tmp <= 0x45) 1379 xeon++; 1380 } 1381 1382 for (i = 0; i < 4; i++) { 1383 uint_t tmp; 1384 1385 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1386 if (tmp == 0x40) 1387 celeron++; 1388 else if (tmp >= 0x44 && tmp <= 0x45) 1389 xeon++; 1390 } 1391 1392 for (i = 0; i < 4; i++) { 1393 uint_t tmp; 1394 1395 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1396 if (tmp == 0x40) 1397 celeron++; 1398 else if (tmp >= 0x44 && tmp <= 0x45) 1399 xeon++; 1400 } 1401 1402 if (celeron) 1403 return ("Intel Celeron(r)"); 1404 if (xeon) 1405 return (cpi->cpi_model == 5 ? 1406 "Intel Pentium(r) II Xeon(tm)" : 1407 "Intel Pentium(r) III Xeon(tm)"); 1408 return (cpi->cpi_model == 5 ? 1409 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1410 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1411 default: 1412 break; 1413 } 1414 default: 1415 break; 1416 } 1417 1418 /* BrandID is present if the field is nonzero */ 1419 if (cpi->cpi_brandid != 0) { 1420 static const struct { 1421 uint_t bt_bid; 1422 const char *bt_str; 1423 } brand_tbl[] = { 1424 { 0x1, "Intel(r) Celeron(r)" }, 1425 { 0x2, "Intel(r) Pentium(r) III" }, 1426 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1427 { 0x4, "Intel(r) Pentium(r) III" }, 1428 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1429 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1430 { 0x8, "Intel(r) Pentium(r) 4" }, 1431 { 0x9, "Intel(r) Pentium(r) 4" }, 1432 { 0xa, "Intel(r) Celeron(r)" }, 1433 { 0xb, "Intel(r) Xeon(tm)" }, 1434 { 0xc, "Intel(r) Xeon(tm) MP" }, 1435 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1436 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1437 { 0x11, "Mobile Genuine Intel(r)" }, 1438 { 0x12, "Intel(r) Celeron(r) M" }, 1439 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1440 { 0x14, "Intel(r) Celeron(r)" }, 1441 { 0x15, "Mobile Genuine Intel(r)" }, 1442 { 0x16, "Intel(r) Pentium(r) M" }, 1443 { 0x17, "Mobile Intel(r) Celeron(r)" } 1444 }; 1445 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1446 uint_t sgn; 1447 1448 sgn = (cpi->cpi_family << 8) | 1449 (cpi->cpi_model << 4) | cpi->cpi_step; 1450 1451 for (i = 0; i < btblmax; i++) 1452 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1453 break; 1454 if (i < btblmax) { 1455 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1456 return ("Intel(r) Celeron(r)"); 1457 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1458 return ("Intel(r) Xeon(tm) MP"); 1459 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1460 return ("Intel(r) Xeon(tm)"); 1461 return (brand_tbl[i].bt_str); 1462 } 1463 } 1464 1465 return (NULL); 1466 } 1467 1468 static const char * 1469 amd_cpubrand(const struct cpuid_info *cpi) 1470 { 1471 if ((x86_feature & X86_CPUID) == 0 || 1472 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1473 return ("i486 compatible"); 1474 1475 switch (cpi->cpi_family) { 1476 case 5: 1477 switch (cpi->cpi_model) { 1478 case 0: 1479 case 1: 1480 case 2: 1481 case 3: 1482 case 4: 1483 case 5: 1484 return ("AMD-K5(r)"); 1485 case 6: 1486 case 7: 1487 return ("AMD-K6(r)"); 1488 case 8: 1489 return ("AMD-K6(r)-2"); 1490 case 9: 1491 return ("AMD-K6(r)-III"); 1492 default: 1493 return ("AMD (family 5)"); 1494 } 1495 case 6: 1496 switch (cpi->cpi_model) { 1497 case 1: 1498 return ("AMD-K7(tm)"); 1499 case 0: 1500 case 2: 1501 case 4: 1502 return ("AMD Athlon(tm)"); 1503 case 3: 1504 case 7: 1505 return ("AMD Duron(tm)"); 1506 case 6: 1507 case 8: 1508 case 10: 1509 /* 1510 * Use the L2 cache size to distinguish 1511 */ 1512 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1513 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1514 default: 1515 return ("AMD (family 6)"); 1516 } 1517 default: 1518 break; 1519 } 1520 1521 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1522 cpi->cpi_brandid != 0) { 1523 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1524 case 3: 1525 return ("AMD Opteron(tm) UP 1xx"); 1526 case 4: 1527 return ("AMD Opteron(tm) DP 2xx"); 1528 case 5: 1529 return ("AMD Opteron(tm) MP 8xx"); 1530 default: 1531 return ("AMD Opteron(tm)"); 1532 } 1533 } 1534 1535 return (NULL); 1536 } 1537 1538 static const char * 1539 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1540 { 1541 if ((x86_feature & X86_CPUID) == 0 || 1542 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1543 type == X86_TYPE_CYRIX_486) 1544 return ("i486 compatible"); 1545 1546 switch (type) { 1547 case X86_TYPE_CYRIX_6x86: 1548 return ("Cyrix 6x86"); 1549 case X86_TYPE_CYRIX_6x86L: 1550 return ("Cyrix 6x86L"); 1551 case X86_TYPE_CYRIX_6x86MX: 1552 return ("Cyrix 6x86MX"); 1553 case X86_TYPE_CYRIX_GXm: 1554 return ("Cyrix GXm"); 1555 case X86_TYPE_CYRIX_MediaGX: 1556 return ("Cyrix MediaGX"); 1557 case X86_TYPE_CYRIX_MII: 1558 return ("Cyrix M2"); 1559 case X86_TYPE_VIA_CYRIX_III: 1560 return ("VIA Cyrix M3"); 1561 default: 1562 /* 1563 * Have another wild guess .. 1564 */ 1565 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1566 return ("Cyrix 5x86"); 1567 else if (cpi->cpi_family == 5) { 1568 switch (cpi->cpi_model) { 1569 case 2: 1570 return ("Cyrix 6x86"); /* Cyrix M1 */ 1571 case 4: 1572 return ("Cyrix MediaGX"); 1573 default: 1574 break; 1575 } 1576 } else if (cpi->cpi_family == 6) { 1577 switch (cpi->cpi_model) { 1578 case 0: 1579 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1580 case 5: 1581 case 6: 1582 case 7: 1583 case 8: 1584 case 9: 1585 return ("VIA C3"); 1586 default: 1587 break; 1588 } 1589 } 1590 break; 1591 } 1592 return (NULL); 1593 } 1594 1595 /* 1596 * This only gets called in the case that the CPU extended 1597 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1598 * aren't available, or contain null bytes for some reason. 1599 */ 1600 static void 1601 fabricate_brandstr(struct cpuid_info *cpi) 1602 { 1603 const char *brand = NULL; 1604 1605 switch (cpi->cpi_vendor) { 1606 case X86_VENDOR_Intel: 1607 brand = intel_cpubrand(cpi); 1608 break; 1609 case X86_VENDOR_AMD: 1610 brand = amd_cpubrand(cpi); 1611 break; 1612 case X86_VENDOR_Cyrix: 1613 brand = cyrix_cpubrand(cpi, x86_type); 1614 break; 1615 case X86_VENDOR_NexGen: 1616 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1617 brand = "NexGen Nx586"; 1618 break; 1619 case X86_VENDOR_Centaur: 1620 if (cpi->cpi_family == 5) 1621 switch (cpi->cpi_model) { 1622 case 4: 1623 brand = "Centaur C6"; 1624 break; 1625 case 8: 1626 brand = "Centaur C2"; 1627 break; 1628 case 9: 1629 brand = "Centaur C3"; 1630 break; 1631 default: 1632 break; 1633 } 1634 break; 1635 case X86_VENDOR_Rise: 1636 if (cpi->cpi_family == 5 && 1637 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1638 brand = "Rise mP6"; 1639 break; 1640 case X86_VENDOR_SiS: 1641 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1642 brand = "SiS 55x"; 1643 break; 1644 case X86_VENDOR_TM: 1645 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1646 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1647 break; 1648 case X86_VENDOR_NSC: 1649 case X86_VENDOR_UMC: 1650 default: 1651 break; 1652 } 1653 if (brand) { 1654 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1655 return; 1656 } 1657 1658 /* 1659 * If all else fails ... 1660 */ 1661 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1662 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1663 cpi->cpi_model, cpi->cpi_step); 1664 } 1665 1666 /* 1667 * This routine is called just after kernel memory allocation 1668 * becomes available on cpu0, and as part of mp_startup() on 1669 * the other cpus. 1670 * 1671 * Fixup the brand string, and collect any information from cpuid 1672 * that requires dynamicically allocated storage to represent. 1673 */ 1674 /*ARGSUSED*/ 1675 void 1676 cpuid_pass3(cpu_t *cpu) 1677 { 1678 int i, max, shft, level, size; 1679 struct cpuid_regs regs; 1680 struct cpuid_regs *cp; 1681 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1682 1683 ASSERT(cpi->cpi_pass == 2); 1684 1685 /* 1686 * Function 4: Deterministic cache parameters 1687 * 1688 * Take this opportunity to detect the number of threads 1689 * sharing the last level cache, and construct a corresponding 1690 * cache id. The respective cpuid_info members are initialized 1691 * to the default case of "no last level cache sharing". 1692 */ 1693 cpi->cpi_ncpu_shr_last_cache = 1; 1694 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1695 1696 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1697 1698 /* 1699 * Find the # of elements (size) returned by fn 4, and along 1700 * the way detect last level cache sharing details. 1701 */ 1702 bzero(®s, sizeof (regs)); 1703 cp = ®s; 1704 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1705 cp->cp_eax = 4; 1706 cp->cp_ecx = i; 1707 1708 (void) __cpuid_insn(cp); 1709 1710 if (CPI_CACHE_TYPE(cp) == 0) 1711 break; 1712 level = CPI_CACHE_LVL(cp); 1713 if (level > max) { 1714 max = level; 1715 cpi->cpi_ncpu_shr_last_cache = 1716 CPI_NTHR_SHR_CACHE(cp) + 1; 1717 } 1718 } 1719 cpi->cpi_std_4_size = size = i; 1720 1721 /* 1722 * Allocate the cpi_std_4 array. The first element 1723 * references the regs for fn 4, %ecx == 0, which 1724 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1725 */ 1726 if (size > 0) { 1727 cpi->cpi_std_4 = 1728 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1729 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1730 1731 /* 1732 * Allocate storage to hold the additional regs 1733 * for function 4, %ecx == 1 .. cpi_std_4_size. 1734 * 1735 * The regs for fn 4, %ecx == 0 has already 1736 * been allocated as indicated above. 1737 */ 1738 for (i = 1; i < size; i++) { 1739 cp = cpi->cpi_std_4[i] = 1740 kmem_zalloc(sizeof (regs), KM_SLEEP); 1741 cp->cp_eax = 4; 1742 cp->cp_ecx = i; 1743 1744 (void) __cpuid_insn(cp); 1745 } 1746 } 1747 /* 1748 * Determine the number of bits needed to represent 1749 * the number of CPUs sharing the last level cache. 1750 * 1751 * Shift off that number of bits from the APIC id to 1752 * derive the cache id. 1753 */ 1754 shft = 0; 1755 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1756 shft++; 1757 cpi->cpi_last_lvl_cacheid = CPI_APIC_ID(cpi) >> shft; 1758 } 1759 1760 /* 1761 * Now fixup the brand string 1762 */ 1763 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1764 fabricate_brandstr(cpi); 1765 } else { 1766 1767 /* 1768 * If we successfully extracted a brand string from the cpuid 1769 * instruction, clean it up by removing leading spaces and 1770 * similar junk. 1771 */ 1772 if (cpi->cpi_brandstr[0]) { 1773 size_t maxlen = sizeof (cpi->cpi_brandstr); 1774 char *src, *dst; 1775 1776 dst = src = (char *)cpi->cpi_brandstr; 1777 src[maxlen - 1] = '\0'; 1778 /* 1779 * strip leading spaces 1780 */ 1781 while (*src == ' ') 1782 src++; 1783 /* 1784 * Remove any 'Genuine' or "Authentic" prefixes 1785 */ 1786 if (strncmp(src, "Genuine ", 8) == 0) 1787 src += 8; 1788 if (strncmp(src, "Authentic ", 10) == 0) 1789 src += 10; 1790 1791 /* 1792 * Now do an in-place copy. 1793 * Map (R) to (r) and (TM) to (tm). 1794 * The era of teletypes is long gone, and there's 1795 * -really- no need to shout. 1796 */ 1797 while (*src != '\0') { 1798 if (src[0] == '(') { 1799 if (strncmp(src + 1, "R)", 2) == 0) { 1800 (void) strncpy(dst, "(r)", 3); 1801 src += 3; 1802 dst += 3; 1803 continue; 1804 } 1805 if (strncmp(src + 1, "TM)", 3) == 0) { 1806 (void) strncpy(dst, "(tm)", 4); 1807 src += 4; 1808 dst += 4; 1809 continue; 1810 } 1811 } 1812 *dst++ = *src++; 1813 } 1814 *dst = '\0'; 1815 1816 /* 1817 * Finally, remove any trailing spaces 1818 */ 1819 while (--dst > cpi->cpi_brandstr) 1820 if (*dst == ' ') 1821 *dst = '\0'; 1822 else 1823 break; 1824 } else 1825 fabricate_brandstr(cpi); 1826 } 1827 cpi->cpi_pass = 3; 1828 } 1829 1830 /* 1831 * This routine is called out of bind_hwcap() much later in the life 1832 * of the kernel (post_startup()). The job of this routine is to resolve 1833 * the hardware feature support and kernel support for those features into 1834 * what we're actually going to tell applications via the aux vector. 1835 */ 1836 uint_t 1837 cpuid_pass4(cpu_t *cpu) 1838 { 1839 struct cpuid_info *cpi; 1840 uint_t hwcap_flags = 0; 1841 1842 if (cpu == NULL) 1843 cpu = CPU; 1844 cpi = cpu->cpu_m.mcpu_cpi; 1845 1846 ASSERT(cpi->cpi_pass == 3); 1847 1848 if (cpi->cpi_maxeax >= 1) { 1849 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 1850 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 1851 1852 *edx = CPI_FEATURES_EDX(cpi); 1853 *ecx = CPI_FEATURES_ECX(cpi); 1854 1855 /* 1856 * [these require explicit kernel support] 1857 */ 1858 if ((x86_feature & X86_SEP) == 0) 1859 *edx &= ~CPUID_INTC_EDX_SEP; 1860 1861 if ((x86_feature & X86_SSE) == 0) 1862 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 1863 if ((x86_feature & X86_SSE2) == 0) 1864 *edx &= ~CPUID_INTC_EDX_SSE2; 1865 1866 if ((x86_feature & X86_HTT) == 0) 1867 *edx &= ~CPUID_INTC_EDX_HTT; 1868 1869 if ((x86_feature & X86_SSE3) == 0) 1870 *ecx &= ~CPUID_INTC_ECX_SSE3; 1871 1872 /* 1873 * [no explicit support required beyond x87 fp context] 1874 */ 1875 if (!fpu_exists) 1876 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 1877 1878 /* 1879 * Now map the supported feature vector to things that we 1880 * think userland will care about. 1881 */ 1882 if (*edx & CPUID_INTC_EDX_SEP) 1883 hwcap_flags |= AV_386_SEP; 1884 if (*edx & CPUID_INTC_EDX_SSE) 1885 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 1886 if (*edx & CPUID_INTC_EDX_SSE2) 1887 hwcap_flags |= AV_386_SSE2; 1888 if (*ecx & CPUID_INTC_ECX_SSE3) 1889 hwcap_flags |= AV_386_SSE3; 1890 if (*ecx & CPUID_INTC_ECX_POPCNT) 1891 hwcap_flags |= AV_386_POPCNT; 1892 if (*edx & CPUID_INTC_EDX_FPU) 1893 hwcap_flags |= AV_386_FPU; 1894 if (*edx & CPUID_INTC_EDX_MMX) 1895 hwcap_flags |= AV_386_MMX; 1896 1897 if (*edx & CPUID_INTC_EDX_TSC) 1898 hwcap_flags |= AV_386_TSC; 1899 if (*edx & CPUID_INTC_EDX_CX8) 1900 hwcap_flags |= AV_386_CX8; 1901 if (*edx & CPUID_INTC_EDX_CMOV) 1902 hwcap_flags |= AV_386_CMOV; 1903 if (*ecx & CPUID_INTC_ECX_MON) 1904 hwcap_flags |= AV_386_MON; 1905 if (*ecx & CPUID_INTC_ECX_CX16) 1906 hwcap_flags |= AV_386_CX16; 1907 } 1908 1909 if (x86_feature & X86_HTT) 1910 hwcap_flags |= AV_386_PAUSE; 1911 1912 if (cpi->cpi_xmaxeax < 0x80000001) 1913 goto pass4_done; 1914 1915 switch (cpi->cpi_vendor) { 1916 struct cpuid_regs cp; 1917 uint32_t *edx, *ecx; 1918 1919 case X86_VENDOR_Intel: 1920 /* 1921 * Seems like Intel duplicated what we necessary 1922 * here to make the initial crop of 64-bit OS's work. 1923 * Hopefully, those are the only "extended" bits 1924 * they'll add. 1925 */ 1926 /*FALLTHROUGH*/ 1927 1928 case X86_VENDOR_AMD: 1929 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 1930 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 1931 1932 *edx = CPI_FEATURES_XTD_EDX(cpi); 1933 *ecx = CPI_FEATURES_XTD_ECX(cpi); 1934 1935 /* 1936 * [these features require explicit kernel support] 1937 */ 1938 switch (cpi->cpi_vendor) { 1939 case X86_VENDOR_Intel: 1940 break; 1941 1942 case X86_VENDOR_AMD: 1943 if ((x86_feature & X86_TSCP) == 0) 1944 *edx &= ~CPUID_AMD_EDX_TSCP; 1945 if ((x86_feature & X86_SSE4A) == 0) 1946 *ecx &= ~CPUID_AMD_ECX_SSE4A; 1947 break; 1948 1949 default: 1950 break; 1951 } 1952 1953 /* 1954 * [no explicit support required beyond 1955 * x87 fp context and exception handlers] 1956 */ 1957 if (!fpu_exists) 1958 *edx &= ~(CPUID_AMD_EDX_MMXamd | 1959 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 1960 1961 if ((x86_feature & X86_NX) == 0) 1962 *edx &= ~CPUID_AMD_EDX_NX; 1963 #if !defined(__amd64) 1964 *edx &= ~CPUID_AMD_EDX_LM; 1965 #endif 1966 /* 1967 * Now map the supported feature vector to 1968 * things that we think userland will care about. 1969 */ 1970 #if defined(__amd64) 1971 if (*edx & CPUID_AMD_EDX_SYSC) 1972 hwcap_flags |= AV_386_AMD_SYSC; 1973 #endif 1974 if (*edx & CPUID_AMD_EDX_MMXamd) 1975 hwcap_flags |= AV_386_AMD_MMX; 1976 if (*edx & CPUID_AMD_EDX_3DNow) 1977 hwcap_flags |= AV_386_AMD_3DNow; 1978 if (*edx & CPUID_AMD_EDX_3DNowx) 1979 hwcap_flags |= AV_386_AMD_3DNowx; 1980 1981 switch (cpi->cpi_vendor) { 1982 case X86_VENDOR_AMD: 1983 if (*edx & CPUID_AMD_EDX_TSCP) 1984 hwcap_flags |= AV_386_TSCP; 1985 if (*ecx & CPUID_AMD_ECX_AHF64) 1986 hwcap_flags |= AV_386_AHF; 1987 if (*ecx & CPUID_AMD_ECX_SSE4A) 1988 hwcap_flags |= AV_386_AMD_SSE4A; 1989 if (*ecx & CPUID_AMD_ECX_LZCNT) 1990 hwcap_flags |= AV_386_AMD_LZCNT; 1991 break; 1992 1993 case X86_VENDOR_Intel: 1994 /* 1995 * Aarrgh. 1996 * Intel uses a different bit in the same word. 1997 */ 1998 if (*ecx & CPUID_INTC_ECX_AHF64) 1999 hwcap_flags |= AV_386_AHF; 2000 break; 2001 2002 default: 2003 break; 2004 } 2005 break; 2006 2007 case X86_VENDOR_TM: 2008 cp.cp_eax = 0x80860001; 2009 (void) __cpuid_insn(&cp); 2010 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2011 break; 2012 2013 default: 2014 break; 2015 } 2016 2017 pass4_done: 2018 cpi->cpi_pass = 4; 2019 return (hwcap_flags); 2020 } 2021 2022 2023 /* 2024 * Simulate the cpuid instruction using the data we previously 2025 * captured about this CPU. We try our best to return the truth 2026 * about the hardware, independently of kernel support. 2027 */ 2028 uint32_t 2029 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2030 { 2031 struct cpuid_info *cpi; 2032 struct cpuid_regs *xcp; 2033 2034 if (cpu == NULL) 2035 cpu = CPU; 2036 cpi = cpu->cpu_m.mcpu_cpi; 2037 2038 ASSERT(cpuid_checkpass(cpu, 3)); 2039 2040 /* 2041 * CPUID data is cached in two separate places: cpi_std for standard 2042 * CPUID functions, and cpi_extd for extended CPUID functions. 2043 */ 2044 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2045 xcp = &cpi->cpi_std[cp->cp_eax]; 2046 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2047 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2048 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2049 else 2050 /* 2051 * The caller is asking for data from an input parameter which 2052 * the kernel has not cached. In this case we go fetch from 2053 * the hardware and return the data directly to the user. 2054 */ 2055 return (__cpuid_insn(cp)); 2056 2057 cp->cp_eax = xcp->cp_eax; 2058 cp->cp_ebx = xcp->cp_ebx; 2059 cp->cp_ecx = xcp->cp_ecx; 2060 cp->cp_edx = xcp->cp_edx; 2061 return (cp->cp_eax); 2062 } 2063 2064 int 2065 cpuid_checkpass(cpu_t *cpu, int pass) 2066 { 2067 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2068 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2069 } 2070 2071 int 2072 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2073 { 2074 ASSERT(cpuid_checkpass(cpu, 3)); 2075 2076 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2077 } 2078 2079 int 2080 cpuid_is_cmt(cpu_t *cpu) 2081 { 2082 if (cpu == NULL) 2083 cpu = CPU; 2084 2085 ASSERT(cpuid_checkpass(cpu, 1)); 2086 2087 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2088 } 2089 2090 /* 2091 * AMD and Intel both implement the 64-bit variant of the syscall 2092 * instruction (syscallq), so if there's -any- support for syscall, 2093 * cpuid currently says "yes, we support this". 2094 * 2095 * However, Intel decided to -not- implement the 32-bit variant of the 2096 * syscall instruction, so we provide a predicate to allow our caller 2097 * to test that subtlety here. 2098 */ 2099 /*ARGSUSED*/ 2100 int 2101 cpuid_syscall32_insn(cpu_t *cpu) 2102 { 2103 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2104 2105 if (cpu == NULL) 2106 cpu = CPU; 2107 2108 /*CSTYLED*/ 2109 { 2110 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2111 2112 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2113 cpi->cpi_xmaxeax >= 0x80000001 && 2114 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2115 return (1); 2116 } 2117 return (0); 2118 } 2119 2120 int 2121 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2122 { 2123 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2124 2125 static const char fmt[] = 2126 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2127 static const char fmt_ht[] = 2128 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2129 2130 ASSERT(cpuid_checkpass(cpu, 1)); 2131 2132 if (cpuid_is_cmt(cpu)) 2133 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2134 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2135 cpi->cpi_family, cpi->cpi_model, 2136 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2137 return (snprintf(s, n, fmt, 2138 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2139 cpi->cpi_family, cpi->cpi_model, 2140 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2141 } 2142 2143 const char * 2144 cpuid_getvendorstr(cpu_t *cpu) 2145 { 2146 ASSERT(cpuid_checkpass(cpu, 1)); 2147 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2148 } 2149 2150 uint_t 2151 cpuid_getvendor(cpu_t *cpu) 2152 { 2153 ASSERT(cpuid_checkpass(cpu, 1)); 2154 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2155 } 2156 2157 uint_t 2158 cpuid_getfamily(cpu_t *cpu) 2159 { 2160 ASSERT(cpuid_checkpass(cpu, 1)); 2161 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2162 } 2163 2164 uint_t 2165 cpuid_getmodel(cpu_t *cpu) 2166 { 2167 ASSERT(cpuid_checkpass(cpu, 1)); 2168 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2169 } 2170 2171 uint_t 2172 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2173 { 2174 ASSERT(cpuid_checkpass(cpu, 1)); 2175 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2176 } 2177 2178 uint_t 2179 cpuid_get_ncore_per_chip(cpu_t *cpu) 2180 { 2181 ASSERT(cpuid_checkpass(cpu, 1)); 2182 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2183 } 2184 2185 uint_t 2186 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2187 { 2188 ASSERT(cpuid_checkpass(cpu, 2)); 2189 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2190 } 2191 2192 id_t 2193 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2194 { 2195 ASSERT(cpuid_checkpass(cpu, 2)); 2196 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2197 } 2198 2199 uint_t 2200 cpuid_getstep(cpu_t *cpu) 2201 { 2202 ASSERT(cpuid_checkpass(cpu, 1)); 2203 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2204 } 2205 2206 uint_t 2207 cpuid_getsig(struct cpu *cpu) 2208 { 2209 ASSERT(cpuid_checkpass(cpu, 1)); 2210 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2211 } 2212 2213 uint32_t 2214 cpuid_getchiprev(struct cpu *cpu) 2215 { 2216 ASSERT(cpuid_checkpass(cpu, 1)); 2217 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2218 } 2219 2220 const char * 2221 cpuid_getchiprevstr(struct cpu *cpu) 2222 { 2223 ASSERT(cpuid_checkpass(cpu, 1)); 2224 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2225 } 2226 2227 uint32_t 2228 cpuid_getsockettype(struct cpu *cpu) 2229 { 2230 ASSERT(cpuid_checkpass(cpu, 1)); 2231 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2232 } 2233 2234 int 2235 cpuid_get_chipid(cpu_t *cpu) 2236 { 2237 ASSERT(cpuid_checkpass(cpu, 1)); 2238 2239 if (cpuid_is_cmt(cpu)) 2240 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2241 return (cpu->cpu_id); 2242 } 2243 2244 id_t 2245 cpuid_get_coreid(cpu_t *cpu) 2246 { 2247 ASSERT(cpuid_checkpass(cpu, 1)); 2248 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2249 } 2250 2251 int 2252 cpuid_get_clogid(cpu_t *cpu) 2253 { 2254 ASSERT(cpuid_checkpass(cpu, 1)); 2255 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2256 } 2257 2258 void 2259 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2260 { 2261 struct cpuid_info *cpi; 2262 2263 if (cpu == NULL) 2264 cpu = CPU; 2265 cpi = cpu->cpu_m.mcpu_cpi; 2266 2267 ASSERT(cpuid_checkpass(cpu, 1)); 2268 2269 if (pabits) 2270 *pabits = cpi->cpi_pabits; 2271 if (vabits) 2272 *vabits = cpi->cpi_vabits; 2273 } 2274 2275 /* 2276 * Returns the number of data TLB entries for a corresponding 2277 * pagesize. If it can't be computed, or isn't known, the 2278 * routine returns zero. If you ask about an architecturally 2279 * impossible pagesize, the routine will panic (so that the 2280 * hat implementor knows that things are inconsistent.) 2281 */ 2282 uint_t 2283 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2284 { 2285 struct cpuid_info *cpi; 2286 uint_t dtlb_nent = 0; 2287 2288 if (cpu == NULL) 2289 cpu = CPU; 2290 cpi = cpu->cpu_m.mcpu_cpi; 2291 2292 ASSERT(cpuid_checkpass(cpu, 1)); 2293 2294 /* 2295 * Check the L2 TLB info 2296 */ 2297 if (cpi->cpi_xmaxeax >= 0x80000006) { 2298 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2299 2300 switch (pagesize) { 2301 2302 case 4 * 1024: 2303 /* 2304 * All zero in the top 16 bits of the register 2305 * indicates a unified TLB. Size is in low 16 bits. 2306 */ 2307 if ((cp->cp_ebx & 0xffff0000) == 0) 2308 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2309 else 2310 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2311 break; 2312 2313 case 2 * 1024 * 1024: 2314 if ((cp->cp_eax & 0xffff0000) == 0) 2315 dtlb_nent = cp->cp_eax & 0x0000ffff; 2316 else 2317 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2318 break; 2319 2320 default: 2321 panic("unknown L2 pagesize"); 2322 /*NOTREACHED*/ 2323 } 2324 } 2325 2326 if (dtlb_nent != 0) 2327 return (dtlb_nent); 2328 2329 /* 2330 * No L2 TLB support for this size, try L1. 2331 */ 2332 if (cpi->cpi_xmaxeax >= 0x80000005) { 2333 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2334 2335 switch (pagesize) { 2336 case 4 * 1024: 2337 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2338 break; 2339 case 2 * 1024 * 1024: 2340 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2341 break; 2342 default: 2343 panic("unknown L1 d-TLB pagesize"); 2344 /*NOTREACHED*/ 2345 } 2346 } 2347 2348 return (dtlb_nent); 2349 } 2350 2351 /* 2352 * Return 0 if the erratum is not present or not applicable, positive 2353 * if it is, and negative if the status of the erratum is unknown. 2354 * 2355 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2356 * Processors" #25759, Rev 3.57, August 2005 2357 */ 2358 int 2359 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2360 { 2361 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2362 uint_t eax; 2363 2364 /* 2365 * Bail out if this CPU isn't an AMD CPU, or if it's 2366 * a legacy (32-bit) AMD CPU. 2367 */ 2368 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2369 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2370 cpi->cpi_family == 6) 2371 2372 return (0); 2373 2374 eax = cpi->cpi_std[1].cp_eax; 2375 2376 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2377 #define SH_B3(eax) (eax == 0xf51) 2378 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2379 2380 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2381 2382 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2383 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2384 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2385 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2386 2387 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2388 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2389 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2390 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2391 2392 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2393 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2394 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2395 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2396 #define BH_E4(eax) (eax == 0x20fb1) 2397 #define SH_E5(eax) (eax == 0x20f42) 2398 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2399 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2400 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2401 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2402 DH_E6(eax) || JH_E6(eax)) 2403 2404 switch (erratum) { 2405 case 1: 2406 return (cpi->cpi_family < 0x10); 2407 case 51: /* what does the asterisk mean? */ 2408 return (B(eax) || SH_C0(eax) || CG(eax)); 2409 case 52: 2410 return (B(eax)); 2411 case 57: 2412 return (cpi->cpi_family <= 0x10); 2413 case 58: 2414 return (B(eax)); 2415 case 60: 2416 return (cpi->cpi_family <= 0x10); 2417 case 61: 2418 case 62: 2419 case 63: 2420 case 64: 2421 case 65: 2422 case 66: 2423 case 68: 2424 case 69: 2425 case 70: 2426 case 71: 2427 return (B(eax)); 2428 case 72: 2429 return (SH_B0(eax)); 2430 case 74: 2431 return (B(eax)); 2432 case 75: 2433 return (cpi->cpi_family < 0x10); 2434 case 76: 2435 return (B(eax)); 2436 case 77: 2437 return (cpi->cpi_family <= 0x10); 2438 case 78: 2439 return (B(eax) || SH_C0(eax)); 2440 case 79: 2441 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2442 case 80: 2443 case 81: 2444 case 82: 2445 return (B(eax)); 2446 case 83: 2447 return (B(eax) || SH_C0(eax) || CG(eax)); 2448 case 85: 2449 return (cpi->cpi_family < 0x10); 2450 case 86: 2451 return (SH_C0(eax) || CG(eax)); 2452 case 88: 2453 #if !defined(__amd64) 2454 return (0); 2455 #else 2456 return (B(eax) || SH_C0(eax)); 2457 #endif 2458 case 89: 2459 return (cpi->cpi_family < 0x10); 2460 case 90: 2461 return (B(eax) || SH_C0(eax) || CG(eax)); 2462 case 91: 2463 case 92: 2464 return (B(eax) || SH_C0(eax)); 2465 case 93: 2466 return (SH_C0(eax)); 2467 case 94: 2468 return (B(eax) || SH_C0(eax) || CG(eax)); 2469 case 95: 2470 #if !defined(__amd64) 2471 return (0); 2472 #else 2473 return (B(eax) || SH_C0(eax)); 2474 #endif 2475 case 96: 2476 return (B(eax) || SH_C0(eax) || CG(eax)); 2477 case 97: 2478 case 98: 2479 return (SH_C0(eax) || CG(eax)); 2480 case 99: 2481 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2482 case 100: 2483 return (B(eax) || SH_C0(eax)); 2484 case 101: 2485 case 103: 2486 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2487 case 104: 2488 return (SH_C0(eax) || CG(eax) || D0(eax)); 2489 case 105: 2490 case 106: 2491 case 107: 2492 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2493 case 108: 2494 return (DH_CG(eax)); 2495 case 109: 2496 return (SH_C0(eax) || CG(eax) || D0(eax)); 2497 case 110: 2498 return (D0(eax) || EX(eax)); 2499 case 111: 2500 return (CG(eax)); 2501 case 112: 2502 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2503 case 113: 2504 return (eax == 0x20fc0); 2505 case 114: 2506 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2507 case 115: 2508 return (SH_E0(eax) || JH_E1(eax)); 2509 case 116: 2510 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2511 case 117: 2512 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2513 case 118: 2514 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2515 JH_E6(eax)); 2516 case 121: 2517 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2518 case 122: 2519 return (cpi->cpi_family < 0x10); 2520 case 123: 2521 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2522 case 131: 2523 return (cpi->cpi_family < 0x10); 2524 case 6336786: 2525 /* 2526 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2527 * if this is a K8 family or newer processor 2528 */ 2529 if (CPI_FAMILY(cpi) == 0xf) { 2530 struct cpuid_regs regs; 2531 regs.cp_eax = 0x80000007; 2532 (void) __cpuid_insn(®s); 2533 return (!(regs.cp_edx & 0x100)); 2534 } 2535 return (0); 2536 case 6323525: 2537 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2538 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2539 2540 default: 2541 return (-1); 2542 } 2543 } 2544 2545 static const char assoc_str[] = "associativity"; 2546 static const char line_str[] = "line-size"; 2547 static const char size_str[] = "size"; 2548 2549 static void 2550 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2551 uint32_t val) 2552 { 2553 char buf[128]; 2554 2555 /* 2556 * ndi_prop_update_int() is used because it is desirable for 2557 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2558 */ 2559 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2560 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2561 } 2562 2563 /* 2564 * Intel-style cache/tlb description 2565 * 2566 * Standard cpuid level 2 gives a randomly ordered 2567 * selection of tags that index into a table that describes 2568 * cache and tlb properties. 2569 */ 2570 2571 static const char l1_icache_str[] = "l1-icache"; 2572 static const char l1_dcache_str[] = "l1-dcache"; 2573 static const char l2_cache_str[] = "l2-cache"; 2574 static const char l3_cache_str[] = "l3-cache"; 2575 static const char itlb4k_str[] = "itlb-4K"; 2576 static const char dtlb4k_str[] = "dtlb-4K"; 2577 static const char itlb4M_str[] = "itlb-4M"; 2578 static const char dtlb4M_str[] = "dtlb-4M"; 2579 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2580 static const char dtlb44_str[] = "dtlb-4K-4M"; 2581 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2582 static const char sl2_cache_str[] = "sectored-l2-cache"; 2583 static const char itrace_str[] = "itrace-cache"; 2584 static const char sl3_cache_str[] = "sectored-l3-cache"; 2585 2586 static const struct cachetab { 2587 uint8_t ct_code; 2588 uint8_t ct_assoc; 2589 uint16_t ct_line_size; 2590 size_t ct_size; 2591 const char *ct_label; 2592 } intel_ctab[] = { 2593 /* maintain descending order! */ 2594 { 0xb4, 4, 0, 256, dtlb4k_str }, 2595 { 0xb3, 4, 0, 128, dtlb4k_str }, 2596 { 0xb0, 4, 0, 128, itlb4k_str }, 2597 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2598 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2599 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2600 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2601 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2602 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2603 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2604 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2605 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2606 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2607 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2608 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2609 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2610 { 0x73, 8, 0, 64*1024, itrace_str}, 2611 { 0x72, 8, 0, 32*1024, itrace_str}, 2612 { 0x71, 8, 0, 16*1024, itrace_str}, 2613 { 0x70, 8, 0, 12*1024, itrace_str}, 2614 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2615 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2616 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2617 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2618 { 0x5d, 0, 0, 256, dtlb44_str}, 2619 { 0x5c, 0, 0, 128, dtlb44_str}, 2620 { 0x5b, 0, 0, 64, dtlb44_str}, 2621 { 0x52, 0, 0, 256, itlb424_str}, 2622 { 0x51, 0, 0, 128, itlb424_str}, 2623 { 0x50, 0, 0, 64, itlb424_str}, 2624 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 2625 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 2626 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 2627 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 2628 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 2629 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 2630 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 2631 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 2632 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 2633 { 0x43, 4, 32, 512*1024, l2_cache_str}, 2634 { 0x42, 4, 32, 256*1024, l2_cache_str}, 2635 { 0x41, 4, 32, 128*1024, l2_cache_str}, 2636 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 2637 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 2638 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 2639 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 2640 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 2641 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 2642 { 0x30, 8, 64, 32*1024, l1_icache_str}, 2643 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 2644 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 2645 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 2646 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 2647 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 2648 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 2649 { 0x0b, 4, 0, 4, itlb4M_str}, 2650 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 2651 { 0x08, 4, 32, 16*1024, l1_icache_str}, 2652 { 0x06, 4, 32, 8*1024, l1_icache_str}, 2653 { 0x04, 4, 0, 8, dtlb4M_str}, 2654 { 0x03, 4, 0, 64, dtlb4k_str}, 2655 { 0x02, 4, 0, 2, itlb4M_str}, 2656 { 0x01, 4, 0, 32, itlb4k_str}, 2657 { 0 } 2658 }; 2659 2660 static const struct cachetab cyrix_ctab[] = { 2661 { 0x70, 4, 0, 32, "tlb-4K" }, 2662 { 0x80, 4, 16, 16*1024, "l1-cache" }, 2663 { 0 } 2664 }; 2665 2666 /* 2667 * Search a cache table for a matching entry 2668 */ 2669 static const struct cachetab * 2670 find_cacheent(const struct cachetab *ct, uint_t code) 2671 { 2672 if (code != 0) { 2673 for (; ct->ct_code != 0; ct++) 2674 if (ct->ct_code <= code) 2675 break; 2676 if (ct->ct_code == code) 2677 return (ct); 2678 } 2679 return (NULL); 2680 } 2681 2682 /* 2683 * Walk the cacheinfo descriptor, applying 'func' to every valid element 2684 * The walk is terminated if the walker returns non-zero. 2685 */ 2686 static void 2687 intel_walk_cacheinfo(struct cpuid_info *cpi, 2688 void *arg, int (*func)(void *, const struct cachetab *)) 2689 { 2690 const struct cachetab *ct; 2691 uint8_t *dp; 2692 int i; 2693 2694 if ((dp = cpi->cpi_cacheinfo) == NULL) 2695 return; 2696 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2697 /* 2698 * For overloaded descriptor 0x49 we use cpuid function 4 2699 * if supported by the current processor, to update 2700 * cache information. 2701 */ 2702 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4) { 2703 intel_cpuid_4_cache_info(arg, cpi); 2704 continue; 2705 } 2706 2707 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2708 if (func(arg, ct) != 0) 2709 break; 2710 } 2711 } 2712 } 2713 2714 /* 2715 * (Like the Intel one, except for Cyrix CPUs) 2716 */ 2717 static void 2718 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 2719 void *arg, int (*func)(void *, const struct cachetab *)) 2720 { 2721 const struct cachetab *ct; 2722 uint8_t *dp; 2723 int i; 2724 2725 if ((dp = cpi->cpi_cacheinfo) == NULL) 2726 return; 2727 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2728 /* 2729 * Search Cyrix-specific descriptor table first .. 2730 */ 2731 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 2732 if (func(arg, ct) != 0) 2733 break; 2734 continue; 2735 } 2736 /* 2737 * .. else fall back to the Intel one 2738 */ 2739 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2740 if (func(arg, ct) != 0) 2741 break; 2742 continue; 2743 } 2744 } 2745 } 2746 2747 /* 2748 * A cacheinfo walker that adds associativity, line-size, and size properties 2749 * to the devinfo node it is passed as an argument. 2750 */ 2751 static int 2752 add_cacheent_props(void *arg, const struct cachetab *ct) 2753 { 2754 dev_info_t *devi = arg; 2755 2756 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 2757 if (ct->ct_line_size != 0) 2758 add_cache_prop(devi, ct->ct_label, line_str, 2759 ct->ct_line_size); 2760 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 2761 return (0); 2762 } 2763 2764 /* 2765 * Add L2 or L3 cache-information using cpuid function 4. This 2766 * function is called from intel_walk_cacheinfo() when descriptor 2767 * 0x49 is encountered. 2768 */ 2769 static void 2770 intel_cpuid_4_cache_info(void *arg, struct cpuid_info *cpi) 2771 { 2772 uint32_t level, i; 2773 2774 struct cachetab ct; 2775 2776 for (i = 0; i < cpi->cpi_std_4_size; i++) { 2777 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 2778 2779 if (level == 2 || level == 3) { 2780 ct.ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 2781 ct.ct_line_size = 2782 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 2783 ct.ct_size = ct.ct_assoc * 2784 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 2785 ct.ct_line_size * 2786 (cpi->cpi_std_4[i]->cp_ecx + 1); 2787 2788 if (level == 2) { 2789 ct.ct_label = l2_cache_str; 2790 } else if (level == 3) { 2791 ct.ct_label = l3_cache_str; 2792 } 2793 2794 (void) add_cacheent_props(arg, 2795 (const struct cachetab *) (&ct)); 2796 } 2797 } 2798 } 2799 2800 static const char fully_assoc[] = "fully-associative?"; 2801 2802 /* 2803 * AMD style cache/tlb description 2804 * 2805 * Extended functions 5 and 6 directly describe properties of 2806 * tlbs and various cache levels. 2807 */ 2808 static void 2809 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 2810 { 2811 switch (assoc) { 2812 case 0: /* reserved; ignore */ 2813 break; 2814 default: 2815 add_cache_prop(devi, label, assoc_str, assoc); 2816 break; 2817 case 0xff: 2818 add_cache_prop(devi, label, fully_assoc, 1); 2819 break; 2820 } 2821 } 2822 2823 static void 2824 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 2825 { 2826 if (size == 0) 2827 return; 2828 add_cache_prop(devi, label, size_str, size); 2829 add_amd_assoc(devi, label, assoc); 2830 } 2831 2832 static void 2833 add_amd_cache(dev_info_t *devi, const char *label, 2834 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 2835 { 2836 if (size == 0 || line_size == 0) 2837 return; 2838 add_amd_assoc(devi, label, assoc); 2839 /* 2840 * Most AMD parts have a sectored cache. Multiple cache lines are 2841 * associated with each tag. A sector consists of all cache lines 2842 * associated with a tag. For example, the AMD K6-III has a sector 2843 * size of 2 cache lines per tag. 2844 */ 2845 if (lines_per_tag != 0) 2846 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 2847 add_cache_prop(devi, label, line_str, line_size); 2848 add_cache_prop(devi, label, size_str, size * 1024); 2849 } 2850 2851 static void 2852 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 2853 { 2854 switch (assoc) { 2855 case 0: /* off */ 2856 break; 2857 case 1: 2858 case 2: 2859 case 4: 2860 add_cache_prop(devi, label, assoc_str, assoc); 2861 break; 2862 case 6: 2863 add_cache_prop(devi, label, assoc_str, 8); 2864 break; 2865 case 8: 2866 add_cache_prop(devi, label, assoc_str, 16); 2867 break; 2868 case 0xf: 2869 add_cache_prop(devi, label, fully_assoc, 1); 2870 break; 2871 default: /* reserved; ignore */ 2872 break; 2873 } 2874 } 2875 2876 static void 2877 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 2878 { 2879 if (size == 0 || assoc == 0) 2880 return; 2881 add_amd_l2_assoc(devi, label, assoc); 2882 add_cache_prop(devi, label, size_str, size); 2883 } 2884 2885 static void 2886 add_amd_l2_cache(dev_info_t *devi, const char *label, 2887 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 2888 { 2889 if (size == 0 || assoc == 0 || line_size == 0) 2890 return; 2891 add_amd_l2_assoc(devi, label, assoc); 2892 if (lines_per_tag != 0) 2893 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 2894 add_cache_prop(devi, label, line_str, line_size); 2895 add_cache_prop(devi, label, size_str, size * 1024); 2896 } 2897 2898 static void 2899 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 2900 { 2901 struct cpuid_regs *cp; 2902 2903 if (cpi->cpi_xmaxeax < 0x80000005) 2904 return; 2905 cp = &cpi->cpi_extd[5]; 2906 2907 /* 2908 * 4M/2M L1 TLB configuration 2909 * 2910 * We report the size for 2M pages because AMD uses two 2911 * TLB entries for one 4M page. 2912 */ 2913 add_amd_tlb(devi, "dtlb-2M", 2914 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 2915 add_amd_tlb(devi, "itlb-2M", 2916 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 2917 2918 /* 2919 * 4K L1 TLB configuration 2920 */ 2921 2922 switch (cpi->cpi_vendor) { 2923 uint_t nentries; 2924 case X86_VENDOR_TM: 2925 if (cpi->cpi_family >= 5) { 2926 /* 2927 * Crusoe processors have 256 TLB entries, but 2928 * cpuid data format constrains them to only 2929 * reporting 255 of them. 2930 */ 2931 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 2932 nentries = 256; 2933 /* 2934 * Crusoe processors also have a unified TLB 2935 */ 2936 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 2937 nentries); 2938 break; 2939 } 2940 /*FALLTHROUGH*/ 2941 default: 2942 add_amd_tlb(devi, itlb4k_str, 2943 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 2944 add_amd_tlb(devi, dtlb4k_str, 2945 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 2946 break; 2947 } 2948 2949 /* 2950 * data L1 cache configuration 2951 */ 2952 2953 add_amd_cache(devi, l1_dcache_str, 2954 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 2955 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 2956 2957 /* 2958 * code L1 cache configuration 2959 */ 2960 2961 add_amd_cache(devi, l1_icache_str, 2962 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 2963 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 2964 2965 if (cpi->cpi_xmaxeax < 0x80000006) 2966 return; 2967 cp = &cpi->cpi_extd[6]; 2968 2969 /* Check for a unified L2 TLB for large pages */ 2970 2971 if (BITX(cp->cp_eax, 31, 16) == 0) 2972 add_amd_l2_tlb(devi, "l2-tlb-2M", 2973 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 2974 else { 2975 add_amd_l2_tlb(devi, "l2-dtlb-2M", 2976 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 2977 add_amd_l2_tlb(devi, "l2-itlb-2M", 2978 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 2979 } 2980 2981 /* Check for a unified L2 TLB for 4K pages */ 2982 2983 if (BITX(cp->cp_ebx, 31, 16) == 0) { 2984 add_amd_l2_tlb(devi, "l2-tlb-4K", 2985 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 2986 } else { 2987 add_amd_l2_tlb(devi, "l2-dtlb-4K", 2988 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 2989 add_amd_l2_tlb(devi, "l2-itlb-4K", 2990 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 2991 } 2992 2993 add_amd_l2_cache(devi, l2_cache_str, 2994 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 2995 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 2996 } 2997 2998 /* 2999 * There are two basic ways that the x86 world describes it cache 3000 * and tlb architecture - Intel's way and AMD's way. 3001 * 3002 * Return which flavor of cache architecture we should use 3003 */ 3004 static int 3005 x86_which_cacheinfo(struct cpuid_info *cpi) 3006 { 3007 switch (cpi->cpi_vendor) { 3008 case X86_VENDOR_Intel: 3009 if (cpi->cpi_maxeax >= 2) 3010 return (X86_VENDOR_Intel); 3011 break; 3012 case X86_VENDOR_AMD: 3013 /* 3014 * The K5 model 1 was the first part from AMD that reported 3015 * cache sizes via extended cpuid functions. 3016 */ 3017 if (cpi->cpi_family > 5 || 3018 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3019 return (X86_VENDOR_AMD); 3020 break; 3021 case X86_VENDOR_TM: 3022 if (cpi->cpi_family >= 5) 3023 return (X86_VENDOR_AMD); 3024 /*FALLTHROUGH*/ 3025 default: 3026 /* 3027 * If they have extended CPU data for 0x80000005 3028 * then we assume they have AMD-format cache 3029 * information. 3030 * 3031 * If not, and the vendor happens to be Cyrix, 3032 * then try our-Cyrix specific handler. 3033 * 3034 * If we're not Cyrix, then assume we're using Intel's 3035 * table-driven format instead. 3036 */ 3037 if (cpi->cpi_xmaxeax >= 0x80000005) 3038 return (X86_VENDOR_AMD); 3039 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3040 return (X86_VENDOR_Cyrix); 3041 else if (cpi->cpi_maxeax >= 2) 3042 return (X86_VENDOR_Intel); 3043 break; 3044 } 3045 return (-1); 3046 } 3047 3048 /* 3049 * create a node for the given cpu under the prom root node. 3050 * Also, create a cpu node in the device tree. 3051 */ 3052 static dev_info_t *cpu_nex_devi = NULL; 3053 static kmutex_t cpu_node_lock; 3054 3055 /* 3056 * Called from post_startup() and mp_startup() 3057 */ 3058 void 3059 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3060 { 3061 dev_info_t *cpu_devi; 3062 int create; 3063 3064 mutex_enter(&cpu_node_lock); 3065 3066 /* 3067 * create a nexus node for all cpus identified as 'cpu_id' under 3068 * the root node. 3069 */ 3070 if (cpu_nex_devi == NULL) { 3071 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3072 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3073 mutex_exit(&cpu_node_lock); 3074 return; 3075 } 3076 (void) ndi_devi_online(cpu_nex_devi, 0); 3077 } 3078 3079 /* 3080 * create a child node for cpu identified as 'cpu_id' 3081 */ 3082 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3083 cpu_id); 3084 if (cpu_devi == NULL) { 3085 mutex_exit(&cpu_node_lock); 3086 return; 3087 } 3088 3089 /* device_type */ 3090 3091 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3092 "device_type", "cpu"); 3093 3094 /* reg */ 3095 3096 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3097 "reg", cpu_id); 3098 3099 /* cpu-mhz, and clock-frequency */ 3100 3101 if (cpu_freq > 0) { 3102 long long mul; 3103 3104 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3105 "cpu-mhz", cpu_freq); 3106 3107 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3108 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3109 "clock-frequency", (int)mul); 3110 } 3111 3112 (void) ndi_devi_online(cpu_devi, 0); 3113 3114 if ((x86_feature & X86_CPUID) == 0) { 3115 mutex_exit(&cpu_node_lock); 3116 return; 3117 } 3118 3119 /* vendor-id */ 3120 3121 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3122 "vendor-id", cpi->cpi_vendorstr); 3123 3124 if (cpi->cpi_maxeax == 0) { 3125 mutex_exit(&cpu_node_lock); 3126 return; 3127 } 3128 3129 /* 3130 * family, model, and step 3131 */ 3132 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3133 "family", CPI_FAMILY(cpi)); 3134 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3135 "cpu-model", CPI_MODEL(cpi)); 3136 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3137 "stepping-id", CPI_STEP(cpi)); 3138 3139 /* type */ 3140 3141 switch (cpi->cpi_vendor) { 3142 case X86_VENDOR_Intel: 3143 create = 1; 3144 break; 3145 default: 3146 create = 0; 3147 break; 3148 } 3149 if (create) 3150 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3151 "type", CPI_TYPE(cpi)); 3152 3153 /* ext-family */ 3154 3155 switch (cpi->cpi_vendor) { 3156 case X86_VENDOR_Intel: 3157 case X86_VENDOR_AMD: 3158 create = cpi->cpi_family >= 0xf; 3159 break; 3160 default: 3161 create = 0; 3162 break; 3163 } 3164 if (create) 3165 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3166 "ext-family", CPI_FAMILY_XTD(cpi)); 3167 3168 /* ext-model */ 3169 3170 switch (cpi->cpi_vendor) { 3171 case X86_VENDOR_Intel: 3172 create = CPI_MODEL(cpi) == 0xf; 3173 break; 3174 case X86_VENDOR_AMD: 3175 create = CPI_FAMILY(cpi) == 0xf; 3176 break; 3177 default: 3178 create = 0; 3179 break; 3180 } 3181 if (create) 3182 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3183 "ext-model", CPI_MODEL_XTD(cpi)); 3184 3185 /* generation */ 3186 3187 switch (cpi->cpi_vendor) { 3188 case X86_VENDOR_AMD: 3189 /* 3190 * AMD K5 model 1 was the first part to support this 3191 */ 3192 create = cpi->cpi_xmaxeax >= 0x80000001; 3193 break; 3194 default: 3195 create = 0; 3196 break; 3197 } 3198 if (create) 3199 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3200 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3201 3202 /* brand-id */ 3203 3204 switch (cpi->cpi_vendor) { 3205 case X86_VENDOR_Intel: 3206 /* 3207 * brand id first appeared on Pentium III Xeon model 8, 3208 * and Celeron model 8 processors and Opteron 3209 */ 3210 create = cpi->cpi_family > 6 || 3211 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3212 break; 3213 case X86_VENDOR_AMD: 3214 create = cpi->cpi_family >= 0xf; 3215 break; 3216 default: 3217 create = 0; 3218 break; 3219 } 3220 if (create && cpi->cpi_brandid != 0) { 3221 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3222 "brand-id", cpi->cpi_brandid); 3223 } 3224 3225 /* chunks, and apic-id */ 3226 3227 switch (cpi->cpi_vendor) { 3228 /* 3229 * first available on Pentium IV and Opteron (K8) 3230 */ 3231 case X86_VENDOR_Intel: 3232 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3233 break; 3234 case X86_VENDOR_AMD: 3235 create = cpi->cpi_family >= 0xf; 3236 break; 3237 default: 3238 create = 0; 3239 break; 3240 } 3241 if (create) { 3242 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3243 "chunks", CPI_CHUNKS(cpi)); 3244 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3245 "apic-id", CPI_APIC_ID(cpi)); 3246 if (cpi->cpi_chipid >= 0) { 3247 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3248 "chip#", cpi->cpi_chipid); 3249 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3250 "clog#", cpi->cpi_clogid); 3251 } 3252 } 3253 3254 /* cpuid-features */ 3255 3256 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3257 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3258 3259 3260 /* cpuid-features-ecx */ 3261 3262 switch (cpi->cpi_vendor) { 3263 case X86_VENDOR_Intel: 3264 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3265 break; 3266 default: 3267 create = 0; 3268 break; 3269 } 3270 if (create) 3271 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3272 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3273 3274 /* ext-cpuid-features */ 3275 3276 switch (cpi->cpi_vendor) { 3277 case X86_VENDOR_Intel: 3278 case X86_VENDOR_AMD: 3279 case X86_VENDOR_Cyrix: 3280 case X86_VENDOR_TM: 3281 case X86_VENDOR_Centaur: 3282 create = cpi->cpi_xmaxeax >= 0x80000001; 3283 break; 3284 default: 3285 create = 0; 3286 break; 3287 } 3288 if (create) { 3289 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3290 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3291 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3292 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3293 } 3294 3295 /* 3296 * Brand String first appeared in Intel Pentium IV, AMD K5 3297 * model 1, and Cyrix GXm. On earlier models we try and 3298 * simulate something similar .. so this string should always 3299 * same -something- about the processor, however lame. 3300 */ 3301 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3302 "brand-string", cpi->cpi_brandstr); 3303 3304 /* 3305 * Finally, cache and tlb information 3306 */ 3307 switch (x86_which_cacheinfo(cpi)) { 3308 case X86_VENDOR_Intel: 3309 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3310 break; 3311 case X86_VENDOR_Cyrix: 3312 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3313 break; 3314 case X86_VENDOR_AMD: 3315 amd_cache_info(cpi, cpu_devi); 3316 break; 3317 default: 3318 break; 3319 } 3320 3321 mutex_exit(&cpu_node_lock); 3322 } 3323 3324 struct l2info { 3325 int *l2i_csz; 3326 int *l2i_lsz; 3327 int *l2i_assoc; 3328 int l2i_ret; 3329 }; 3330 3331 /* 3332 * A cacheinfo walker that fetches the size, line-size and associativity 3333 * of the L2 cache 3334 */ 3335 static int 3336 intel_l2cinfo(void *arg, const struct cachetab *ct) 3337 { 3338 struct l2info *l2i = arg; 3339 int *ip; 3340 3341 if (ct->ct_label != l2_cache_str && 3342 ct->ct_label != sl2_cache_str) 3343 return (0); /* not an L2 -- keep walking */ 3344 3345 if ((ip = l2i->l2i_csz) != NULL) 3346 *ip = ct->ct_size; 3347 if ((ip = l2i->l2i_lsz) != NULL) 3348 *ip = ct->ct_line_size; 3349 if ((ip = l2i->l2i_assoc) != NULL) 3350 *ip = ct->ct_assoc; 3351 l2i->l2i_ret = ct->ct_size; 3352 return (1); /* was an L2 -- terminate walk */ 3353 } 3354 3355 static void 3356 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3357 { 3358 struct cpuid_regs *cp; 3359 uint_t size, assoc; 3360 int *ip; 3361 3362 if (cpi->cpi_xmaxeax < 0x80000006) 3363 return; 3364 cp = &cpi->cpi_extd[6]; 3365 3366 if ((assoc = BITX(cp->cp_ecx, 15, 12)) != 0 && 3367 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3368 uint_t cachesz = size * 1024; 3369 3370 3371 if ((ip = l2i->l2i_csz) != NULL) 3372 *ip = cachesz; 3373 if ((ip = l2i->l2i_lsz) != NULL) 3374 *ip = BITX(cp->cp_ecx, 7, 0); 3375 if ((ip = l2i->l2i_assoc) != NULL) 3376 *ip = assoc; 3377 l2i->l2i_ret = cachesz; 3378 } 3379 } 3380 3381 int 3382 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3383 { 3384 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3385 struct l2info __l2info, *l2i = &__l2info; 3386 3387 l2i->l2i_csz = csz; 3388 l2i->l2i_lsz = lsz; 3389 l2i->l2i_assoc = assoc; 3390 l2i->l2i_ret = -1; 3391 3392 switch (x86_which_cacheinfo(cpi)) { 3393 case X86_VENDOR_Intel: 3394 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3395 break; 3396 case X86_VENDOR_Cyrix: 3397 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3398 break; 3399 case X86_VENDOR_AMD: 3400 amd_l2cacheinfo(cpi, l2i); 3401 break; 3402 default: 3403 break; 3404 } 3405 return (l2i->l2i_ret); 3406 } 3407 3408 size_t 3409 cpuid_get_mwait_size(cpu_t *cpu) 3410 { 3411 ASSERT(cpuid_checkpass(cpu, 2)); 3412 return (cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max); 3413 } 3414