1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Various routines to handle identification 30 * and classification of x86 processors. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/archsystm.h> 35 #include <sys/x86_archext.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 #include <sys/cmn_err.h> 39 #include <sys/sunddi.h> 40 #include <sys/sunndi.h> 41 #include <sys/cpuvar.h> 42 #include <sys/processor.h> 43 #include <sys/pg.h> 44 #include <sys/fp.h> 45 #include <sys/controlregs.h> 46 #include <sys/auxv_386.h> 47 #include <sys/bitmap.h> 48 #include <sys/memnode.h> 49 50 /* 51 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 52 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 53 * them accordingly. For most modern processors, feature detection occurs here 54 * in pass 1. 55 * 56 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 57 * for the boot CPU and does the basic analysis that the early kernel needs. 58 * x86_feature is set based on the return value of cpuid_pass1() of the boot 59 * CPU. 60 * 61 * Pass 1 includes: 62 * 63 * o Determining vendor/model/family/stepping and setting x86_type and 64 * x86_vendor accordingly. 65 * o Processing the feature flags returned by the cpuid instruction while 66 * applying any workarounds or tricks for the specific processor. 67 * o Mapping the feature flags into Solaris feature bits (X86_*). 68 * o Processing extended feature flags if supported by the processor, 69 * again while applying specific processor knowledge. 70 * o Determining the CMT characteristics of the system. 71 * 72 * Pass 1 is done on non-boot CPUs during their initialization and the results 73 * are used only as a meager attempt at ensuring that all processors within the 74 * system support the same features. 75 * 76 * Pass 2 of cpuid feature analysis happens just at the beginning 77 * of startup(). It just copies in and corrects the remainder 78 * of the cpuid data we depend on: standard cpuid functions that we didn't 79 * need for pass1 feature analysis, and extended cpuid functions beyond the 80 * simple feature processing done in pass1. 81 * 82 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 83 * particular kernel memory allocation has been made available. It creates a 84 * readable brand string based on the data collected in the first two passes. 85 * 86 * Pass 4 of cpuid analysis is invoked after post_startup() when all 87 * the support infrastructure for various hardware features has been 88 * initialized. It determines which processor features will be reported 89 * to userland via the aux vector. 90 * 91 * All passes are executed on all CPUs, but only the boot CPU determines what 92 * features the kernel will use. 93 * 94 * Much of the worst junk in this file is for the support of processors 95 * that didn't really implement the cpuid instruction properly. 96 * 97 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 98 * the pass numbers. Accordingly, changes to the pass code may require changes 99 * to the accessor code. 100 */ 101 102 uint_t x86_feature = 0; 103 uint_t x86_vendor = X86_VENDOR_IntelClone; 104 uint_t x86_type = X86_TYPE_OTHER; 105 106 uint_t pentiumpro_bug4046376; 107 uint_t pentiumpro_bug4064495; 108 109 uint_t enable486; 110 111 /* 112 * This set of strings are for processors rumored to support the cpuid 113 * instruction, and is used by locore.s to figure out how to set x86_vendor 114 */ 115 const char CyrixInstead[] = "CyrixInstead"; 116 117 /* 118 * monitor/mwait info. 119 */ 120 struct mwait_info { 121 size_t mon_min; /* min size to avoid missed wakeups */ 122 size_t mon_max; /* size to avoid false wakeups */ 123 uint32_t support; /* processor support of monitor/mwait */ 124 }; 125 126 /* 127 * These constants determine how many of the elements of the 128 * cpuid we cache in the cpuid_info data structure; the 129 * remaining elements are accessible via the cpuid instruction. 130 */ 131 132 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 133 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 134 135 struct cpuid_info { 136 uint_t cpi_pass; /* last pass completed */ 137 /* 138 * standard function information 139 */ 140 uint_t cpi_maxeax; /* fn 0: %eax */ 141 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 142 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 143 144 uint_t cpi_family; /* fn 1: extended family */ 145 uint_t cpi_model; /* fn 1: extended model */ 146 uint_t cpi_step; /* fn 1: stepping */ 147 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 148 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 149 int cpi_clogid; /* fn 1: %ebx: thread # */ 150 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 151 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 152 uint_t cpi_ncache; /* fn 2: number of elements */ 153 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 154 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 155 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 156 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 157 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 158 /* 159 * extended function information 160 */ 161 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 162 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 163 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 164 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 165 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 166 id_t cpi_coreid; 167 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 168 /* Intel: fn 4: %eax[31-26] */ 169 /* 170 * supported feature information 171 */ 172 uint32_t cpi_support[5]; 173 #define STD_EDX_FEATURES 0 174 #define AMD_EDX_FEATURES 1 175 #define TM_EDX_FEATURES 2 176 #define STD_ECX_FEATURES 3 177 #define AMD_ECX_FEATURES 4 178 /* 179 * Synthesized information, where known. 180 */ 181 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 182 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 183 uint32_t cpi_socket; /* Chip package/socket type */ 184 185 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 186 }; 187 188 189 static struct cpuid_info cpuid_info0; 190 191 /* 192 * These bit fields are defined by the Intel Application Note AP-485 193 * "Intel Processor Identification and the CPUID Instruction" 194 */ 195 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 196 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 197 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 198 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 199 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 200 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 201 202 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 203 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 204 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 205 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 206 207 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 208 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 209 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 210 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 211 212 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 213 #define CPI_XMAXEAX_MAX 0x80000100 214 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 215 216 /* 217 * Function 4 (Deterministic Cache Parameters) macros 218 * Defined by Intel Application Note AP-485 219 */ 220 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 221 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 222 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 223 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 224 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 225 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 226 227 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 228 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 229 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 230 231 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 232 233 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 234 235 236 /* 237 * A couple of shorthand macros to identify "later" P6-family chips 238 * like the Pentium M and Core. First, the "older" P6-based stuff 239 * (loosely defined as "pre-Pentium-4"): 240 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 241 */ 242 243 #define IS_LEGACY_P6(cpi) ( \ 244 cpi->cpi_family == 6 && \ 245 (cpi->cpi_model == 1 || \ 246 cpi->cpi_model == 3 || \ 247 cpi->cpi_model == 5 || \ 248 cpi->cpi_model == 6 || \ 249 cpi->cpi_model == 7 || \ 250 cpi->cpi_model == 8 || \ 251 cpi->cpi_model == 0xA || \ 252 cpi->cpi_model == 0xB) \ 253 ) 254 255 /* A "new F6" is everything with family 6 that's not the above */ 256 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 257 258 /* Extended family/model support */ 259 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 260 cpi->cpi_family >= 0xf) 261 262 /* 263 * AMD family 0xf socket types. 264 * First index is 0 for revs B thru E, 1 for F and G. 265 * Second index by (model & 0x3) 266 */ 267 static uint32_t amd_skts[2][4] = { 268 { 269 X86_SOCKET_754, /* 0b00 */ 270 X86_SOCKET_940, /* 0b01 */ 271 X86_SOCKET_754, /* 0b10 */ 272 X86_SOCKET_939 /* 0b11 */ 273 }, 274 { 275 X86_SOCKET_S1g1, /* 0b00 */ 276 X86_SOCKET_F1207, /* 0b01 */ 277 X86_SOCKET_UNKNOWN, /* 0b10 */ 278 X86_SOCKET_AM2 /* 0b11 */ 279 } 280 }; 281 282 /* 283 * Table for mapping AMD Family 0xf model/stepping combination to 284 * chip "revision" and socket type. Only rm_family 0xf is used at the 285 * moment, but AMD family 0x10 will extend the exsiting revision names 286 * so will likely also use this table. 287 * 288 * The first member of this array that matches a given family, extended model 289 * plus model range, and stepping range will be considered a match. 290 */ 291 static const struct amd_rev_mapent { 292 uint_t rm_family; 293 uint_t rm_modello; 294 uint_t rm_modelhi; 295 uint_t rm_steplo; 296 uint_t rm_stephi; 297 uint32_t rm_chiprev; 298 const char *rm_chiprevstr; 299 int rm_sktidx; 300 } amd_revmap[] = { 301 /* 302 * Rev B includes model 0x4 stepping 0 and model 0x5 stepping 0 and 1. 303 */ 304 { 0xf, 0x04, 0x04, 0x0, 0x0, X86_CHIPREV_AMD_F_REV_B, "B", 0 }, 305 { 0xf, 0x05, 0x05, 0x0, 0x1, X86_CHIPREV_AMD_F_REV_B, "B", 0 }, 306 /* 307 * Rev C0 includes model 0x4 stepping 8 and model 0x5 stepping 8 308 */ 309 { 0xf, 0x04, 0x05, 0x8, 0x8, X86_CHIPREV_AMD_F_REV_C0, "C0", 0 }, 310 /* 311 * Rev CG is the rest of extended model 0x0 - i.e., everything 312 * but the rev B and C0 combinations covered above. 313 */ 314 { 0xf, 0x00, 0x0f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_CG, "CG", 0 }, 315 /* 316 * Rev D has extended model 0x1. 317 */ 318 { 0xf, 0x10, 0x1f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_D, "D", 0 }, 319 /* 320 * Rev E has extended model 0x2. 321 * Extended model 0x3 is unused but available to grow into. 322 */ 323 { 0xf, 0x20, 0x3f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_E, "E", 0 }, 324 /* 325 * Rev F has extended models 0x4 and 0x5. 326 */ 327 { 0xf, 0x40, 0x5f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_F, "F", 1 }, 328 /* 329 * Rev G has extended model 0x6. 330 */ 331 { 0xf, 0x60, 0x6f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_G, "G", 1 }, 332 }; 333 334 /* 335 * Info for monitor/mwait idle loop. 336 * 337 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 338 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 339 * 2006. 340 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 341 * Documentation Updates" #33633, Rev 2.05, December 2006. 342 */ 343 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 344 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 345 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 346 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 347 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 348 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 349 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 350 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 351 /* 352 * Number of sub-cstates for a given c-state. 353 */ 354 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 355 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 356 357 static void intel_cpuid_4_cache_info(void *, struct cpuid_info *); 358 359 static void 360 synth_amd_info(struct cpuid_info *cpi) 361 { 362 const struct amd_rev_mapent *rmp; 363 uint_t family, model, step; 364 int i; 365 366 /* 367 * Currently only AMD family 0xf uses these fields. 368 */ 369 if (cpi->cpi_family != 0xf) 370 return; 371 372 family = cpi->cpi_family; 373 model = cpi->cpi_model; 374 step = cpi->cpi_step; 375 376 for (i = 0, rmp = amd_revmap; i < sizeof (amd_revmap) / sizeof (*rmp); 377 i++, rmp++) { 378 if (family == rmp->rm_family && 379 model >= rmp->rm_modello && model <= rmp->rm_modelhi && 380 step >= rmp->rm_steplo && step <= rmp->rm_stephi) { 381 cpi->cpi_chiprev = rmp->rm_chiprev; 382 cpi->cpi_chiprevstr = rmp->rm_chiprevstr; 383 cpi->cpi_socket = amd_skts[rmp->rm_sktidx][model & 0x3]; 384 return; 385 } 386 } 387 } 388 389 static void 390 synth_info(struct cpuid_info *cpi) 391 { 392 cpi->cpi_chiprev = X86_CHIPREV_UNKNOWN; 393 cpi->cpi_chiprevstr = "Unknown"; 394 cpi->cpi_socket = X86_SOCKET_UNKNOWN; 395 396 switch (cpi->cpi_vendor) { 397 case X86_VENDOR_AMD: 398 synth_amd_info(cpi); 399 break; 400 401 default: 402 break; 403 404 } 405 } 406 407 /* 408 * Apply up various platform-dependent restrictions where the 409 * underlying platform restrictions mean the CPU can be marked 410 * as less capable than its cpuid instruction would imply. 411 */ 412 413 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 414 415 /* 416 * Some undocumented ways of patching the results of the cpuid 417 * instruction to permit running Solaris 10 on future cpus that 418 * we don't currently support. Could be set to non-zero values 419 * via settings in eeprom. 420 */ 421 422 uint32_t cpuid_feature_ecx_include; 423 uint32_t cpuid_feature_ecx_exclude; 424 uint32_t cpuid_feature_edx_include; 425 uint32_t cpuid_feature_edx_exclude; 426 427 void 428 cpuid_alloc_space(cpu_t *cpu) 429 { 430 /* 431 * By convention, cpu0 is the boot cpu, which is set up 432 * before memory allocation is available. All other cpus get 433 * their cpuid_info struct allocated here. 434 */ 435 ASSERT(cpu->cpu_id != 0); 436 cpu->cpu_m.mcpu_cpi = 437 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 438 } 439 440 void 441 cpuid_free_space(cpu_t *cpu) 442 { 443 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 444 int i; 445 446 ASSERT(cpu->cpu_id != 0); 447 448 /* 449 * Free up any function 4 related dynamic storage 450 */ 451 for (i = 1; i < cpi->cpi_std_4_size; i++) 452 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 453 if (cpi->cpi_std_4_size > 0) 454 kmem_free(cpi->cpi_std_4, 455 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 456 457 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 458 } 459 460 uint_t 461 cpuid_pass1(cpu_t *cpu) 462 { 463 uint32_t mask_ecx, mask_edx; 464 uint_t feature = X86_CPUID; 465 struct cpuid_info *cpi; 466 struct cpuid_regs *cp; 467 int xcpuid; 468 469 470 /* 471 * Space statically allocated for cpu0, ensure pointer is set 472 */ 473 if (cpu->cpu_id == 0) 474 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 475 cpi = cpu->cpu_m.mcpu_cpi; 476 ASSERT(cpi != NULL); 477 cp = &cpi->cpi_std[0]; 478 cp->cp_eax = 0; 479 cpi->cpi_maxeax = __cpuid_insn(cp); 480 { 481 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 482 *iptr++ = cp->cp_ebx; 483 *iptr++ = cp->cp_edx; 484 *iptr++ = cp->cp_ecx; 485 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 486 } 487 488 /* 489 * Map the vendor string to a type code 490 */ 491 if (strcmp(cpi->cpi_vendorstr, "GenuineIntel") == 0) 492 cpi->cpi_vendor = X86_VENDOR_Intel; 493 else if (strcmp(cpi->cpi_vendorstr, "AuthenticAMD") == 0) 494 cpi->cpi_vendor = X86_VENDOR_AMD; 495 else if (strcmp(cpi->cpi_vendorstr, "GenuineTMx86") == 0) 496 cpi->cpi_vendor = X86_VENDOR_TM; 497 else if (strcmp(cpi->cpi_vendorstr, CyrixInstead) == 0) 498 /* 499 * CyrixInstead is a variable used by the Cyrix detection code 500 * in locore. 501 */ 502 cpi->cpi_vendor = X86_VENDOR_Cyrix; 503 else if (strcmp(cpi->cpi_vendorstr, "UMC UMC UMC ") == 0) 504 cpi->cpi_vendor = X86_VENDOR_UMC; 505 else if (strcmp(cpi->cpi_vendorstr, "NexGenDriven") == 0) 506 cpi->cpi_vendor = X86_VENDOR_NexGen; 507 else if (strcmp(cpi->cpi_vendorstr, "CentaurHauls") == 0) 508 cpi->cpi_vendor = X86_VENDOR_Centaur; 509 else if (strcmp(cpi->cpi_vendorstr, "RiseRiseRise") == 0) 510 cpi->cpi_vendor = X86_VENDOR_Rise; 511 else if (strcmp(cpi->cpi_vendorstr, "SiS SiS SiS ") == 0) 512 cpi->cpi_vendor = X86_VENDOR_SiS; 513 else if (strcmp(cpi->cpi_vendorstr, "Geode by NSC") == 0) 514 cpi->cpi_vendor = X86_VENDOR_NSC; 515 else 516 cpi->cpi_vendor = X86_VENDOR_IntelClone; 517 518 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 519 520 /* 521 * Limit the range in case of weird hardware 522 */ 523 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 524 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 525 if (cpi->cpi_maxeax < 1) 526 goto pass1_done; 527 528 cp = &cpi->cpi_std[1]; 529 cp->cp_eax = 1; 530 (void) __cpuid_insn(cp); 531 532 /* 533 * Extract identifying constants for easy access. 534 */ 535 cpi->cpi_model = CPI_MODEL(cpi); 536 cpi->cpi_family = CPI_FAMILY(cpi); 537 538 if (cpi->cpi_family == 0xf) 539 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 540 541 /* 542 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 543 * Intel, and presumably everyone else, uses model == 0xf, as 544 * one would expect (max value means possible overflow). Sigh. 545 */ 546 547 switch (cpi->cpi_vendor) { 548 case X86_VENDOR_Intel: 549 if (IS_EXTENDED_MODEL_INTEL(cpi)) 550 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 551 case X86_VENDOR_AMD: 552 if (CPI_FAMILY(cpi) == 0xf) 553 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 554 break; 555 default: 556 if (cpi->cpi_model == 0xf) 557 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 558 break; 559 } 560 561 cpi->cpi_step = CPI_STEP(cpi); 562 cpi->cpi_brandid = CPI_BRANDID(cpi); 563 564 /* 565 * *default* assumptions: 566 * - believe %edx feature word 567 * - ignore %ecx feature word 568 * - 32-bit virtual and physical addressing 569 */ 570 mask_edx = 0xffffffff; 571 mask_ecx = 0; 572 573 cpi->cpi_pabits = cpi->cpi_vabits = 32; 574 575 switch (cpi->cpi_vendor) { 576 case X86_VENDOR_Intel: 577 if (cpi->cpi_family == 5) 578 x86_type = X86_TYPE_P5; 579 else if (IS_LEGACY_P6(cpi)) { 580 x86_type = X86_TYPE_P6; 581 pentiumpro_bug4046376 = 1; 582 pentiumpro_bug4064495 = 1; 583 /* 584 * Clear the SEP bit when it was set erroneously 585 */ 586 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 587 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 588 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 589 x86_type = X86_TYPE_P4; 590 /* 591 * We don't currently depend on any of the %ecx 592 * features until Prescott, so we'll only check 593 * this from P4 onwards. We might want to revisit 594 * that idea later. 595 */ 596 mask_ecx = 0xffffffff; 597 } else if (cpi->cpi_family > 0xf) 598 mask_ecx = 0xffffffff; 599 /* 600 * We don't support MONITOR/MWAIT if leaf 5 is not available 601 * to obtain the monitor linesize. 602 */ 603 if (cpi->cpi_maxeax < 5) 604 mask_ecx &= ~CPUID_INTC_ECX_MON; 605 break; 606 case X86_VENDOR_IntelClone: 607 default: 608 break; 609 case X86_VENDOR_AMD: 610 #if defined(OPTERON_ERRATUM_108) 611 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 612 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 613 cpi->cpi_model = 0xc; 614 } else 615 #endif 616 if (cpi->cpi_family == 5) { 617 /* 618 * AMD K5 and K6 619 * 620 * These CPUs have an incomplete implementation 621 * of MCA/MCE which we mask away. 622 */ 623 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 624 625 /* 626 * Model 0 uses the wrong (APIC) bit 627 * to indicate PGE. Fix it here. 628 */ 629 if (cpi->cpi_model == 0) { 630 if (cp->cp_edx & 0x200) { 631 cp->cp_edx &= ~0x200; 632 cp->cp_edx |= CPUID_INTC_EDX_PGE; 633 } 634 } 635 636 /* 637 * Early models had problems w/ MMX; disable. 638 */ 639 if (cpi->cpi_model < 6) 640 mask_edx &= ~CPUID_INTC_EDX_MMX; 641 } 642 643 /* 644 * For newer families, SSE3 and CX16, at least, are valid; 645 * enable all 646 */ 647 if (cpi->cpi_family >= 0xf) 648 mask_ecx = 0xffffffff; 649 /* 650 * We don't support MONITOR/MWAIT if leaf 5 is not available 651 * to obtain the monitor linesize. 652 */ 653 if (cpi->cpi_maxeax < 5) 654 mask_ecx &= ~CPUID_INTC_ECX_MON; 655 break; 656 case X86_VENDOR_TM: 657 /* 658 * workaround the NT workaround in CMS 4.1 659 */ 660 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 661 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 662 cp->cp_edx |= CPUID_INTC_EDX_CX8; 663 break; 664 case X86_VENDOR_Centaur: 665 /* 666 * workaround the NT workarounds again 667 */ 668 if (cpi->cpi_family == 6) 669 cp->cp_edx |= CPUID_INTC_EDX_CX8; 670 break; 671 case X86_VENDOR_Cyrix: 672 /* 673 * We rely heavily on the probing in locore 674 * to actually figure out what parts, if any, 675 * of the Cyrix cpuid instruction to believe. 676 */ 677 switch (x86_type) { 678 case X86_TYPE_CYRIX_486: 679 mask_edx = 0; 680 break; 681 case X86_TYPE_CYRIX_6x86: 682 mask_edx = 0; 683 break; 684 case X86_TYPE_CYRIX_6x86L: 685 mask_edx = 686 CPUID_INTC_EDX_DE | 687 CPUID_INTC_EDX_CX8; 688 break; 689 case X86_TYPE_CYRIX_6x86MX: 690 mask_edx = 691 CPUID_INTC_EDX_DE | 692 CPUID_INTC_EDX_MSR | 693 CPUID_INTC_EDX_CX8 | 694 CPUID_INTC_EDX_PGE | 695 CPUID_INTC_EDX_CMOV | 696 CPUID_INTC_EDX_MMX; 697 break; 698 case X86_TYPE_CYRIX_GXm: 699 mask_edx = 700 CPUID_INTC_EDX_MSR | 701 CPUID_INTC_EDX_CX8 | 702 CPUID_INTC_EDX_CMOV | 703 CPUID_INTC_EDX_MMX; 704 break; 705 case X86_TYPE_CYRIX_MediaGX: 706 break; 707 case X86_TYPE_CYRIX_MII: 708 case X86_TYPE_VIA_CYRIX_III: 709 mask_edx = 710 CPUID_INTC_EDX_DE | 711 CPUID_INTC_EDX_TSC | 712 CPUID_INTC_EDX_MSR | 713 CPUID_INTC_EDX_CX8 | 714 CPUID_INTC_EDX_PGE | 715 CPUID_INTC_EDX_CMOV | 716 CPUID_INTC_EDX_MMX; 717 break; 718 default: 719 break; 720 } 721 break; 722 } 723 724 /* 725 * Now we've figured out the masks that determine 726 * which bits we choose to believe, apply the masks 727 * to the feature words, then map the kernel's view 728 * of these feature words into its feature word. 729 */ 730 cp->cp_edx &= mask_edx; 731 cp->cp_ecx &= mask_ecx; 732 733 /* 734 * apply any platform restrictions (we don't call this 735 * immediately after __cpuid_insn here, because we need the 736 * workarounds applied above first) 737 */ 738 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 739 740 /* 741 * fold in overrides from the "eeprom" mechanism 742 */ 743 cp->cp_edx |= cpuid_feature_edx_include; 744 cp->cp_edx &= ~cpuid_feature_edx_exclude; 745 746 cp->cp_ecx |= cpuid_feature_ecx_include; 747 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 748 749 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 750 feature |= X86_LARGEPAGE; 751 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 752 feature |= X86_TSC; 753 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 754 feature |= X86_MSR; 755 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 756 feature |= X86_MTRR; 757 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 758 feature |= X86_PGE; 759 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 760 feature |= X86_CMOV; 761 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 762 feature |= X86_MMX; 763 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 764 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 765 feature |= X86_MCA; 766 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 767 feature |= X86_PAE; 768 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 769 feature |= X86_CX8; 770 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 771 feature |= X86_CX16; 772 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 773 feature |= X86_PAT; 774 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 775 feature |= X86_SEP; 776 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 777 /* 778 * In our implementation, fxsave/fxrstor 779 * are prerequisites before we'll even 780 * try and do SSE things. 781 */ 782 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 783 feature |= X86_SSE; 784 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 785 feature |= X86_SSE2; 786 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 787 feature |= X86_SSE3; 788 } 789 if (cp->cp_edx & CPUID_INTC_EDX_DE) 790 feature |= X86_DE; 791 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 792 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 793 feature |= X86_MWAIT; 794 } 795 796 if (feature & X86_PAE) 797 cpi->cpi_pabits = 36; 798 799 /* 800 * Hyperthreading configuration is slightly tricky on Intel 801 * and pure clones, and even trickier on AMD. 802 * 803 * (AMD chose to set the HTT bit on their CMP processors, 804 * even though they're not actually hyperthreaded. Thus it 805 * takes a bit more work to figure out what's really going 806 * on ... see the handling of the CMP_LGCY bit below) 807 */ 808 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 809 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 810 if (cpi->cpi_ncpu_per_chip > 1) 811 feature |= X86_HTT; 812 } else { 813 cpi->cpi_ncpu_per_chip = 1; 814 } 815 816 /* 817 * Work on the "extended" feature information, doing 818 * some basic initialization for cpuid_pass2() 819 */ 820 xcpuid = 0; 821 switch (cpi->cpi_vendor) { 822 case X86_VENDOR_Intel: 823 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 824 xcpuid++; 825 break; 826 case X86_VENDOR_AMD: 827 if (cpi->cpi_family > 5 || 828 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 829 xcpuid++; 830 break; 831 case X86_VENDOR_Cyrix: 832 /* 833 * Only these Cyrix CPUs are -known- to support 834 * extended cpuid operations. 835 */ 836 if (x86_type == X86_TYPE_VIA_CYRIX_III || 837 x86_type == X86_TYPE_CYRIX_GXm) 838 xcpuid++; 839 break; 840 case X86_VENDOR_Centaur: 841 case X86_VENDOR_TM: 842 default: 843 xcpuid++; 844 break; 845 } 846 847 if (xcpuid) { 848 cp = &cpi->cpi_extd[0]; 849 cp->cp_eax = 0x80000000; 850 cpi->cpi_xmaxeax = __cpuid_insn(cp); 851 } 852 853 if (cpi->cpi_xmaxeax & 0x80000000) { 854 855 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 856 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 857 858 switch (cpi->cpi_vendor) { 859 case X86_VENDOR_Intel: 860 case X86_VENDOR_AMD: 861 if (cpi->cpi_xmaxeax < 0x80000001) 862 break; 863 cp = &cpi->cpi_extd[1]; 864 cp->cp_eax = 0x80000001; 865 (void) __cpuid_insn(cp); 866 867 if (cpi->cpi_vendor == X86_VENDOR_AMD && 868 cpi->cpi_family == 5 && 869 cpi->cpi_model == 6 && 870 cpi->cpi_step == 6) { 871 /* 872 * K6 model 6 uses bit 10 to indicate SYSC 873 * Later models use bit 11. Fix it here. 874 */ 875 if (cp->cp_edx & 0x400) { 876 cp->cp_edx &= ~0x400; 877 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 878 } 879 } 880 881 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 882 883 /* 884 * Compute the additions to the kernel's feature word. 885 */ 886 if (cp->cp_edx & CPUID_AMD_EDX_NX) 887 feature |= X86_NX; 888 889 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 890 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 891 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 892 feature |= X86_SSE4A; 893 894 /* 895 * If both the HTT and CMP_LGCY bits are set, 896 * then we're not actually HyperThreaded. Read 897 * "AMD CPUID Specification" for more details. 898 */ 899 if (cpi->cpi_vendor == X86_VENDOR_AMD && 900 (feature & X86_HTT) && 901 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 902 feature &= ~X86_HTT; 903 feature |= X86_CMP; 904 } 905 #if defined(__amd64) 906 /* 907 * It's really tricky to support syscall/sysret in 908 * the i386 kernel; we rely on sysenter/sysexit 909 * instead. In the amd64 kernel, things are -way- 910 * better. 911 */ 912 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 913 feature |= X86_ASYSC; 914 915 /* 916 * While we're thinking about system calls, note 917 * that AMD processors don't support sysenter 918 * in long mode at all, so don't try to program them. 919 */ 920 if (x86_vendor == X86_VENDOR_AMD) 921 feature &= ~X86_SEP; 922 #endif 923 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 924 feature |= X86_TSCP; 925 break; 926 default: 927 break; 928 } 929 930 /* 931 * Get CPUID data about processor cores and hyperthreads. 932 */ 933 switch (cpi->cpi_vendor) { 934 case X86_VENDOR_Intel: 935 if (cpi->cpi_maxeax >= 4) { 936 cp = &cpi->cpi_std[4]; 937 cp->cp_eax = 4; 938 cp->cp_ecx = 0; 939 (void) __cpuid_insn(cp); 940 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 941 } 942 /*FALLTHROUGH*/ 943 case X86_VENDOR_AMD: 944 if (cpi->cpi_xmaxeax < 0x80000008) 945 break; 946 cp = &cpi->cpi_extd[8]; 947 cp->cp_eax = 0x80000008; 948 (void) __cpuid_insn(cp); 949 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 950 951 /* 952 * Virtual and physical address limits from 953 * cpuid override previously guessed values. 954 */ 955 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 956 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 957 break; 958 default: 959 break; 960 } 961 962 /* 963 * Derive the number of cores per chip 964 */ 965 switch (cpi->cpi_vendor) { 966 case X86_VENDOR_Intel: 967 if (cpi->cpi_maxeax < 4) { 968 cpi->cpi_ncore_per_chip = 1; 969 break; 970 } else { 971 cpi->cpi_ncore_per_chip = 972 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 973 } 974 break; 975 case X86_VENDOR_AMD: 976 if (cpi->cpi_xmaxeax < 0x80000008) { 977 cpi->cpi_ncore_per_chip = 1; 978 break; 979 } else { 980 cpi->cpi_ncore_per_chip = 981 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 982 } 983 break; 984 default: 985 cpi->cpi_ncore_per_chip = 1; 986 break; 987 } 988 } 989 990 /* 991 * If more than one core, then this processor is CMP. 992 */ 993 if (cpi->cpi_ncore_per_chip > 1) 994 feature |= X86_CMP; 995 996 /* 997 * If the number of cores is the same as the number 998 * of CPUs, then we cannot have HyperThreading. 999 */ 1000 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1001 feature &= ~X86_HTT; 1002 1003 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1004 /* 1005 * Single-core single-threaded processors. 1006 */ 1007 cpi->cpi_chipid = -1; 1008 cpi->cpi_clogid = 0; 1009 cpi->cpi_coreid = cpu->cpu_id; 1010 } else if (cpi->cpi_ncpu_per_chip > 1) { 1011 uint_t i; 1012 uint_t chipid_shift = 0; 1013 uint_t coreid_shift = 0; 1014 uint_t apic_id = CPI_APIC_ID(cpi); 1015 1016 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1017 chipid_shift++; 1018 cpi->cpi_chipid = apic_id >> chipid_shift; 1019 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1020 1021 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1022 if (feature & X86_CMP) { 1023 /* 1024 * Multi-core (and possibly multi-threaded) 1025 * processors. 1026 */ 1027 uint_t ncpu_per_core; 1028 if (cpi->cpi_ncore_per_chip == 1) 1029 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1030 else if (cpi->cpi_ncore_per_chip > 1) 1031 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1032 cpi->cpi_ncore_per_chip; 1033 /* 1034 * 8bit APIC IDs on dual core Pentiums 1035 * look like this: 1036 * 1037 * +-----------------------+------+------+ 1038 * | Physical Package ID | MC | HT | 1039 * +-----------------------+------+------+ 1040 * <------- chipid --------> 1041 * <------- coreid ---------------> 1042 * <--- clogid --> 1043 * 1044 * Where the number of bits necessary to 1045 * represent MC and HT fields together equals 1046 * to the minimum number of bits necessary to 1047 * store the value of cpi->cpi_ncpu_per_chip. 1048 * Of those bits, the MC part uses the number 1049 * of bits necessary to store the value of 1050 * cpi->cpi_ncore_per_chip. 1051 */ 1052 for (i = 1; i < ncpu_per_core; i <<= 1) 1053 coreid_shift++; 1054 cpi->cpi_coreid = apic_id >> coreid_shift; 1055 } else if (feature & X86_HTT) { 1056 /* 1057 * Single-core multi-threaded processors. 1058 */ 1059 cpi->cpi_coreid = cpi->cpi_chipid; 1060 } 1061 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1062 /* 1063 * AMD currently only has dual-core processors with 1064 * single-threaded cores. If they ever release 1065 * multi-threaded processors, then this code 1066 * will have to be updated. 1067 */ 1068 cpi->cpi_coreid = cpu->cpu_id; 1069 } else { 1070 /* 1071 * All other processors are currently 1072 * assumed to have single cores. 1073 */ 1074 cpi->cpi_coreid = cpi->cpi_chipid; 1075 } 1076 } 1077 1078 /* 1079 * Synthesize chip "revision" and socket type 1080 */ 1081 synth_info(cpi); 1082 1083 pass1_done: 1084 cpi->cpi_pass = 1; 1085 return (feature); 1086 } 1087 1088 /* 1089 * Make copies of the cpuid table entries we depend on, in 1090 * part for ease of parsing now, in part so that we have only 1091 * one place to correct any of it, in part for ease of 1092 * later export to userland, and in part so we can look at 1093 * this stuff in a crash dump. 1094 */ 1095 1096 /*ARGSUSED*/ 1097 void 1098 cpuid_pass2(cpu_t *cpu) 1099 { 1100 uint_t n, nmax; 1101 int i; 1102 struct cpuid_regs *cp; 1103 uint8_t *dp; 1104 uint32_t *iptr; 1105 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1106 1107 ASSERT(cpi->cpi_pass == 1); 1108 1109 if (cpi->cpi_maxeax < 1) 1110 goto pass2_done; 1111 1112 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1113 nmax = NMAX_CPI_STD; 1114 /* 1115 * (We already handled n == 0 and n == 1 in pass 1) 1116 */ 1117 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1118 cp->cp_eax = n; 1119 1120 /* 1121 * CPUID function 4 expects %ecx to be initialized 1122 * with an index which indicates which cache to return 1123 * information about. The OS is expected to call function 4 1124 * with %ecx set to 0, 1, 2, ... until it returns with 1125 * EAX[4:0] set to 0, which indicates there are no more 1126 * caches. 1127 * 1128 * Here, populate cpi_std[4] with the information returned by 1129 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1130 * when dynamic memory allocation becomes available. 1131 * 1132 * Note: we need to explicitly initialize %ecx here, since 1133 * function 4 may have been previously invoked. 1134 */ 1135 if (n == 4) 1136 cp->cp_ecx = 0; 1137 1138 (void) __cpuid_insn(cp); 1139 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1140 switch (n) { 1141 case 2: 1142 /* 1143 * "the lower 8 bits of the %eax register 1144 * contain a value that identifies the number 1145 * of times the cpuid [instruction] has to be 1146 * executed to obtain a complete image of the 1147 * processor's caching systems." 1148 * 1149 * How *do* they make this stuff up? 1150 */ 1151 cpi->cpi_ncache = sizeof (*cp) * 1152 BITX(cp->cp_eax, 7, 0); 1153 if (cpi->cpi_ncache == 0) 1154 break; 1155 cpi->cpi_ncache--; /* skip count byte */ 1156 1157 /* 1158 * Well, for now, rather than attempt to implement 1159 * this slightly dubious algorithm, we just look 1160 * at the first 15 .. 1161 */ 1162 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1163 cpi->cpi_ncache = sizeof (*cp) - 1; 1164 1165 dp = cpi->cpi_cacheinfo; 1166 if (BITX(cp->cp_eax, 31, 31) == 0) { 1167 uint8_t *p = (void *)&cp->cp_eax; 1168 for (i = 1; i < 3; i++) 1169 if (p[i] != 0) 1170 *dp++ = p[i]; 1171 } 1172 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1173 uint8_t *p = (void *)&cp->cp_ebx; 1174 for (i = 0; i < 4; i++) 1175 if (p[i] != 0) 1176 *dp++ = p[i]; 1177 } 1178 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1179 uint8_t *p = (void *)&cp->cp_ecx; 1180 for (i = 0; i < 4; i++) 1181 if (p[i] != 0) 1182 *dp++ = p[i]; 1183 } 1184 if (BITX(cp->cp_edx, 31, 31) == 0) { 1185 uint8_t *p = (void *)&cp->cp_edx; 1186 for (i = 0; i < 4; i++) 1187 if (p[i] != 0) 1188 *dp++ = p[i]; 1189 } 1190 break; 1191 1192 case 3: /* Processor serial number, if PSN supported */ 1193 break; 1194 1195 case 4: /* Deterministic cache parameters */ 1196 break; 1197 1198 case 5: /* Monitor/Mwait parameters */ 1199 1200 /* 1201 * check cpi_mwait.support which was set in cpuid_pass1 1202 */ 1203 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1204 break; 1205 1206 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1207 cpi->cpi_mwait.mon_max = (size_t)MWAIT_SIZE_MAX(cpi); 1208 if (MWAIT_EXTENSION(cpi)) { 1209 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1210 if (MWAIT_INT_ENABLE(cpi)) 1211 cpi->cpi_mwait.support |= 1212 MWAIT_ECX_INT_ENABLE; 1213 } 1214 break; 1215 default: 1216 break; 1217 } 1218 } 1219 1220 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1221 goto pass2_done; 1222 1223 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1224 nmax = NMAX_CPI_EXTD; 1225 /* 1226 * Copy the extended properties, fixing them as we go. 1227 * (We already handled n == 0 and n == 1 in pass 1) 1228 */ 1229 iptr = (void *)cpi->cpi_brandstr; 1230 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1231 cp->cp_eax = 0x80000000 + n; 1232 (void) __cpuid_insn(cp); 1233 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1234 switch (n) { 1235 case 2: 1236 case 3: 1237 case 4: 1238 /* 1239 * Extract the brand string 1240 */ 1241 *iptr++ = cp->cp_eax; 1242 *iptr++ = cp->cp_ebx; 1243 *iptr++ = cp->cp_ecx; 1244 *iptr++ = cp->cp_edx; 1245 break; 1246 case 5: 1247 switch (cpi->cpi_vendor) { 1248 case X86_VENDOR_AMD: 1249 /* 1250 * The Athlon and Duron were the first 1251 * parts to report the sizes of the 1252 * TLB for large pages. Before then, 1253 * we don't trust the data. 1254 */ 1255 if (cpi->cpi_family < 6 || 1256 (cpi->cpi_family == 6 && 1257 cpi->cpi_model < 1)) 1258 cp->cp_eax = 0; 1259 break; 1260 default: 1261 break; 1262 } 1263 break; 1264 case 6: 1265 switch (cpi->cpi_vendor) { 1266 case X86_VENDOR_AMD: 1267 /* 1268 * The Athlon and Duron were the first 1269 * AMD parts with L2 TLB's. 1270 * Before then, don't trust the data. 1271 */ 1272 if (cpi->cpi_family < 6 || 1273 cpi->cpi_family == 6 && 1274 cpi->cpi_model < 1) 1275 cp->cp_eax = cp->cp_ebx = 0; 1276 /* 1277 * AMD Duron rev A0 reports L2 1278 * cache size incorrectly as 1K 1279 * when it is really 64K 1280 */ 1281 if (cpi->cpi_family == 6 && 1282 cpi->cpi_model == 3 && 1283 cpi->cpi_step == 0) { 1284 cp->cp_ecx &= 0xffff; 1285 cp->cp_ecx |= 0x400000; 1286 } 1287 break; 1288 case X86_VENDOR_Cyrix: /* VIA C3 */ 1289 /* 1290 * VIA C3 processors are a bit messed 1291 * up w.r.t. encoding cache sizes in %ecx 1292 */ 1293 if (cpi->cpi_family != 6) 1294 break; 1295 /* 1296 * model 7 and 8 were incorrectly encoded 1297 * 1298 * xxx is model 8 really broken? 1299 */ 1300 if (cpi->cpi_model == 7 || 1301 cpi->cpi_model == 8) 1302 cp->cp_ecx = 1303 BITX(cp->cp_ecx, 31, 24) << 16 | 1304 BITX(cp->cp_ecx, 23, 16) << 12 | 1305 BITX(cp->cp_ecx, 15, 8) << 8 | 1306 BITX(cp->cp_ecx, 7, 0); 1307 /* 1308 * model 9 stepping 1 has wrong associativity 1309 */ 1310 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1311 cp->cp_ecx |= 8 << 12; 1312 break; 1313 case X86_VENDOR_Intel: 1314 /* 1315 * Extended L2 Cache features function. 1316 * First appeared on Prescott. 1317 */ 1318 default: 1319 break; 1320 } 1321 break; 1322 default: 1323 break; 1324 } 1325 } 1326 1327 pass2_done: 1328 cpi->cpi_pass = 2; 1329 } 1330 1331 static const char * 1332 intel_cpubrand(const struct cpuid_info *cpi) 1333 { 1334 int i; 1335 1336 if ((x86_feature & X86_CPUID) == 0 || 1337 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1338 return ("i486"); 1339 1340 switch (cpi->cpi_family) { 1341 case 5: 1342 return ("Intel Pentium(r)"); 1343 case 6: 1344 switch (cpi->cpi_model) { 1345 uint_t celeron, xeon; 1346 const struct cpuid_regs *cp; 1347 case 0: 1348 case 1: 1349 case 2: 1350 return ("Intel Pentium(r) Pro"); 1351 case 3: 1352 case 4: 1353 return ("Intel Pentium(r) II"); 1354 case 6: 1355 return ("Intel Celeron(r)"); 1356 case 5: 1357 case 7: 1358 celeron = xeon = 0; 1359 cp = &cpi->cpi_std[2]; /* cache info */ 1360 1361 for (i = 1; i < 3; i++) { 1362 uint_t tmp; 1363 1364 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1365 if (tmp == 0x40) 1366 celeron++; 1367 if (tmp >= 0x44 && tmp <= 0x45) 1368 xeon++; 1369 } 1370 1371 for (i = 0; i < 2; i++) { 1372 uint_t tmp; 1373 1374 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1375 if (tmp == 0x40) 1376 celeron++; 1377 else if (tmp >= 0x44 && tmp <= 0x45) 1378 xeon++; 1379 } 1380 1381 for (i = 0; i < 4; i++) { 1382 uint_t tmp; 1383 1384 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1385 if (tmp == 0x40) 1386 celeron++; 1387 else if (tmp >= 0x44 && tmp <= 0x45) 1388 xeon++; 1389 } 1390 1391 for (i = 0; i < 4; i++) { 1392 uint_t tmp; 1393 1394 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1395 if (tmp == 0x40) 1396 celeron++; 1397 else if (tmp >= 0x44 && tmp <= 0x45) 1398 xeon++; 1399 } 1400 1401 if (celeron) 1402 return ("Intel Celeron(r)"); 1403 if (xeon) 1404 return (cpi->cpi_model == 5 ? 1405 "Intel Pentium(r) II Xeon(tm)" : 1406 "Intel Pentium(r) III Xeon(tm)"); 1407 return (cpi->cpi_model == 5 ? 1408 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1409 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1410 default: 1411 break; 1412 } 1413 default: 1414 break; 1415 } 1416 1417 /* BrandID is present if the field is nonzero */ 1418 if (cpi->cpi_brandid != 0) { 1419 static const struct { 1420 uint_t bt_bid; 1421 const char *bt_str; 1422 } brand_tbl[] = { 1423 { 0x1, "Intel(r) Celeron(r)" }, 1424 { 0x2, "Intel(r) Pentium(r) III" }, 1425 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1426 { 0x4, "Intel(r) Pentium(r) III" }, 1427 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1428 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1429 { 0x8, "Intel(r) Pentium(r) 4" }, 1430 { 0x9, "Intel(r) Pentium(r) 4" }, 1431 { 0xa, "Intel(r) Celeron(r)" }, 1432 { 0xb, "Intel(r) Xeon(tm)" }, 1433 { 0xc, "Intel(r) Xeon(tm) MP" }, 1434 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1435 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1436 { 0x11, "Mobile Genuine Intel(r)" }, 1437 { 0x12, "Intel(r) Celeron(r) M" }, 1438 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1439 { 0x14, "Intel(r) Celeron(r)" }, 1440 { 0x15, "Mobile Genuine Intel(r)" }, 1441 { 0x16, "Intel(r) Pentium(r) M" }, 1442 { 0x17, "Mobile Intel(r) Celeron(r)" } 1443 }; 1444 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1445 uint_t sgn; 1446 1447 sgn = (cpi->cpi_family << 8) | 1448 (cpi->cpi_model << 4) | cpi->cpi_step; 1449 1450 for (i = 0; i < btblmax; i++) 1451 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1452 break; 1453 if (i < btblmax) { 1454 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1455 return ("Intel(r) Celeron(r)"); 1456 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1457 return ("Intel(r) Xeon(tm) MP"); 1458 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1459 return ("Intel(r) Xeon(tm)"); 1460 return (brand_tbl[i].bt_str); 1461 } 1462 } 1463 1464 return (NULL); 1465 } 1466 1467 static const char * 1468 amd_cpubrand(const struct cpuid_info *cpi) 1469 { 1470 if ((x86_feature & X86_CPUID) == 0 || 1471 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1472 return ("i486 compatible"); 1473 1474 switch (cpi->cpi_family) { 1475 case 5: 1476 switch (cpi->cpi_model) { 1477 case 0: 1478 case 1: 1479 case 2: 1480 case 3: 1481 case 4: 1482 case 5: 1483 return ("AMD-K5(r)"); 1484 case 6: 1485 case 7: 1486 return ("AMD-K6(r)"); 1487 case 8: 1488 return ("AMD-K6(r)-2"); 1489 case 9: 1490 return ("AMD-K6(r)-III"); 1491 default: 1492 return ("AMD (family 5)"); 1493 } 1494 case 6: 1495 switch (cpi->cpi_model) { 1496 case 1: 1497 return ("AMD-K7(tm)"); 1498 case 0: 1499 case 2: 1500 case 4: 1501 return ("AMD Athlon(tm)"); 1502 case 3: 1503 case 7: 1504 return ("AMD Duron(tm)"); 1505 case 6: 1506 case 8: 1507 case 10: 1508 /* 1509 * Use the L2 cache size to distinguish 1510 */ 1511 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1512 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1513 default: 1514 return ("AMD (family 6)"); 1515 } 1516 default: 1517 break; 1518 } 1519 1520 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1521 cpi->cpi_brandid != 0) { 1522 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1523 case 3: 1524 return ("AMD Opteron(tm) UP 1xx"); 1525 case 4: 1526 return ("AMD Opteron(tm) DP 2xx"); 1527 case 5: 1528 return ("AMD Opteron(tm) MP 8xx"); 1529 default: 1530 return ("AMD Opteron(tm)"); 1531 } 1532 } 1533 1534 return (NULL); 1535 } 1536 1537 static const char * 1538 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1539 { 1540 if ((x86_feature & X86_CPUID) == 0 || 1541 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1542 type == X86_TYPE_CYRIX_486) 1543 return ("i486 compatible"); 1544 1545 switch (type) { 1546 case X86_TYPE_CYRIX_6x86: 1547 return ("Cyrix 6x86"); 1548 case X86_TYPE_CYRIX_6x86L: 1549 return ("Cyrix 6x86L"); 1550 case X86_TYPE_CYRIX_6x86MX: 1551 return ("Cyrix 6x86MX"); 1552 case X86_TYPE_CYRIX_GXm: 1553 return ("Cyrix GXm"); 1554 case X86_TYPE_CYRIX_MediaGX: 1555 return ("Cyrix MediaGX"); 1556 case X86_TYPE_CYRIX_MII: 1557 return ("Cyrix M2"); 1558 case X86_TYPE_VIA_CYRIX_III: 1559 return ("VIA Cyrix M3"); 1560 default: 1561 /* 1562 * Have another wild guess .. 1563 */ 1564 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1565 return ("Cyrix 5x86"); 1566 else if (cpi->cpi_family == 5) { 1567 switch (cpi->cpi_model) { 1568 case 2: 1569 return ("Cyrix 6x86"); /* Cyrix M1 */ 1570 case 4: 1571 return ("Cyrix MediaGX"); 1572 default: 1573 break; 1574 } 1575 } else if (cpi->cpi_family == 6) { 1576 switch (cpi->cpi_model) { 1577 case 0: 1578 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1579 case 5: 1580 case 6: 1581 case 7: 1582 case 8: 1583 case 9: 1584 return ("VIA C3"); 1585 default: 1586 break; 1587 } 1588 } 1589 break; 1590 } 1591 return (NULL); 1592 } 1593 1594 /* 1595 * This only gets called in the case that the CPU extended 1596 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1597 * aren't available, or contain null bytes for some reason. 1598 */ 1599 static void 1600 fabricate_brandstr(struct cpuid_info *cpi) 1601 { 1602 const char *brand = NULL; 1603 1604 switch (cpi->cpi_vendor) { 1605 case X86_VENDOR_Intel: 1606 brand = intel_cpubrand(cpi); 1607 break; 1608 case X86_VENDOR_AMD: 1609 brand = amd_cpubrand(cpi); 1610 break; 1611 case X86_VENDOR_Cyrix: 1612 brand = cyrix_cpubrand(cpi, x86_type); 1613 break; 1614 case X86_VENDOR_NexGen: 1615 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1616 brand = "NexGen Nx586"; 1617 break; 1618 case X86_VENDOR_Centaur: 1619 if (cpi->cpi_family == 5) 1620 switch (cpi->cpi_model) { 1621 case 4: 1622 brand = "Centaur C6"; 1623 break; 1624 case 8: 1625 brand = "Centaur C2"; 1626 break; 1627 case 9: 1628 brand = "Centaur C3"; 1629 break; 1630 default: 1631 break; 1632 } 1633 break; 1634 case X86_VENDOR_Rise: 1635 if (cpi->cpi_family == 5 && 1636 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1637 brand = "Rise mP6"; 1638 break; 1639 case X86_VENDOR_SiS: 1640 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1641 brand = "SiS 55x"; 1642 break; 1643 case X86_VENDOR_TM: 1644 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1645 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1646 break; 1647 case X86_VENDOR_NSC: 1648 case X86_VENDOR_UMC: 1649 default: 1650 break; 1651 } 1652 if (brand) { 1653 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1654 return; 1655 } 1656 1657 /* 1658 * If all else fails ... 1659 */ 1660 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1661 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1662 cpi->cpi_model, cpi->cpi_step); 1663 } 1664 1665 /* 1666 * This routine is called just after kernel memory allocation 1667 * becomes available on cpu0, and as part of mp_startup() on 1668 * the other cpus. 1669 * 1670 * Fixup the brand string, and collect any information from cpuid 1671 * that requires dynamicically allocated storage to represent. 1672 */ 1673 /*ARGSUSED*/ 1674 void 1675 cpuid_pass3(cpu_t *cpu) 1676 { 1677 int i, max, shft, level, size; 1678 struct cpuid_regs regs; 1679 struct cpuid_regs *cp; 1680 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1681 1682 ASSERT(cpi->cpi_pass == 2); 1683 1684 /* 1685 * Function 4: Deterministic cache parameters 1686 * 1687 * Take this opportunity to detect the number of threads 1688 * sharing the last level cache, and construct a corresponding 1689 * cache id. The respective cpuid_info members are initialized 1690 * to the default case of "no last level cache sharing". 1691 */ 1692 cpi->cpi_ncpu_shr_last_cache = 1; 1693 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1694 1695 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1696 1697 /* 1698 * Find the # of elements (size) returned by fn 4, and along 1699 * the way detect last level cache sharing details. 1700 */ 1701 bzero(®s, sizeof (regs)); 1702 cp = ®s; 1703 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1704 cp->cp_eax = 4; 1705 cp->cp_ecx = i; 1706 1707 (void) __cpuid_insn(cp); 1708 1709 if (CPI_CACHE_TYPE(cp) == 0) 1710 break; 1711 level = CPI_CACHE_LVL(cp); 1712 if (level > max) { 1713 max = level; 1714 cpi->cpi_ncpu_shr_last_cache = 1715 CPI_NTHR_SHR_CACHE(cp) + 1; 1716 } 1717 } 1718 cpi->cpi_std_4_size = size = i; 1719 1720 /* 1721 * Allocate the cpi_std_4 array. The first element 1722 * references the regs for fn 4, %ecx == 0, which 1723 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1724 */ 1725 if (size > 0) { 1726 cpi->cpi_std_4 = 1727 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1728 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1729 1730 /* 1731 * Allocate storage to hold the additional regs 1732 * for function 4, %ecx == 1 .. cpi_std_4_size. 1733 * 1734 * The regs for fn 4, %ecx == 0 has already 1735 * been allocated as indicated above. 1736 */ 1737 for (i = 1; i < size; i++) { 1738 cp = cpi->cpi_std_4[i] = 1739 kmem_zalloc(sizeof (regs), KM_SLEEP); 1740 cp->cp_eax = 4; 1741 cp->cp_ecx = i; 1742 1743 (void) __cpuid_insn(cp); 1744 } 1745 } 1746 /* 1747 * Determine the number of bits needed to represent 1748 * the number of CPUs sharing the last level cache. 1749 * 1750 * Shift off that number of bits from the APIC id to 1751 * derive the cache id. 1752 */ 1753 shft = 0; 1754 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1755 shft++; 1756 cpi->cpi_last_lvl_cacheid = CPI_APIC_ID(cpi) >> shft; 1757 } 1758 1759 /* 1760 * Now fixup the brand string 1761 */ 1762 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1763 fabricate_brandstr(cpi); 1764 } else { 1765 1766 /* 1767 * If we successfully extracted a brand string from the cpuid 1768 * instruction, clean it up by removing leading spaces and 1769 * similar junk. 1770 */ 1771 if (cpi->cpi_brandstr[0]) { 1772 size_t maxlen = sizeof (cpi->cpi_brandstr); 1773 char *src, *dst; 1774 1775 dst = src = (char *)cpi->cpi_brandstr; 1776 src[maxlen - 1] = '\0'; 1777 /* 1778 * strip leading spaces 1779 */ 1780 while (*src == ' ') 1781 src++; 1782 /* 1783 * Remove any 'Genuine' or "Authentic" prefixes 1784 */ 1785 if (strncmp(src, "Genuine ", 8) == 0) 1786 src += 8; 1787 if (strncmp(src, "Authentic ", 10) == 0) 1788 src += 10; 1789 1790 /* 1791 * Now do an in-place copy. 1792 * Map (R) to (r) and (TM) to (tm). 1793 * The era of teletypes is long gone, and there's 1794 * -really- no need to shout. 1795 */ 1796 while (*src != '\0') { 1797 if (src[0] == '(') { 1798 if (strncmp(src + 1, "R)", 2) == 0) { 1799 (void) strncpy(dst, "(r)", 3); 1800 src += 3; 1801 dst += 3; 1802 continue; 1803 } 1804 if (strncmp(src + 1, "TM)", 3) == 0) { 1805 (void) strncpy(dst, "(tm)", 4); 1806 src += 4; 1807 dst += 4; 1808 continue; 1809 } 1810 } 1811 *dst++ = *src++; 1812 } 1813 *dst = '\0'; 1814 1815 /* 1816 * Finally, remove any trailing spaces 1817 */ 1818 while (--dst > cpi->cpi_brandstr) 1819 if (*dst == ' ') 1820 *dst = '\0'; 1821 else 1822 break; 1823 } else 1824 fabricate_brandstr(cpi); 1825 } 1826 cpi->cpi_pass = 3; 1827 } 1828 1829 /* 1830 * This routine is called out of bind_hwcap() much later in the life 1831 * of the kernel (post_startup()). The job of this routine is to resolve 1832 * the hardware feature support and kernel support for those features into 1833 * what we're actually going to tell applications via the aux vector. 1834 */ 1835 uint_t 1836 cpuid_pass4(cpu_t *cpu) 1837 { 1838 struct cpuid_info *cpi; 1839 uint_t hwcap_flags = 0; 1840 1841 if (cpu == NULL) 1842 cpu = CPU; 1843 cpi = cpu->cpu_m.mcpu_cpi; 1844 1845 ASSERT(cpi->cpi_pass == 3); 1846 1847 if (cpi->cpi_maxeax >= 1) { 1848 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 1849 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 1850 1851 *edx = CPI_FEATURES_EDX(cpi); 1852 *ecx = CPI_FEATURES_ECX(cpi); 1853 1854 /* 1855 * [these require explicit kernel support] 1856 */ 1857 if ((x86_feature & X86_SEP) == 0) 1858 *edx &= ~CPUID_INTC_EDX_SEP; 1859 1860 if ((x86_feature & X86_SSE) == 0) 1861 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 1862 if ((x86_feature & X86_SSE2) == 0) 1863 *edx &= ~CPUID_INTC_EDX_SSE2; 1864 1865 if ((x86_feature & X86_HTT) == 0) 1866 *edx &= ~CPUID_INTC_EDX_HTT; 1867 1868 if ((x86_feature & X86_SSE3) == 0) 1869 *ecx &= ~CPUID_INTC_ECX_SSE3; 1870 1871 /* 1872 * [no explicit support required beyond x87 fp context] 1873 */ 1874 if (!fpu_exists) 1875 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 1876 1877 /* 1878 * Now map the supported feature vector to things that we 1879 * think userland will care about. 1880 */ 1881 if (*edx & CPUID_INTC_EDX_SEP) 1882 hwcap_flags |= AV_386_SEP; 1883 if (*edx & CPUID_INTC_EDX_SSE) 1884 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 1885 if (*edx & CPUID_INTC_EDX_SSE2) 1886 hwcap_flags |= AV_386_SSE2; 1887 if (*ecx & CPUID_INTC_ECX_SSE3) 1888 hwcap_flags |= AV_386_SSE3; 1889 if (*ecx & CPUID_INTC_ECX_POPCNT) 1890 hwcap_flags |= AV_386_POPCNT; 1891 if (*edx & CPUID_INTC_EDX_FPU) 1892 hwcap_flags |= AV_386_FPU; 1893 if (*edx & CPUID_INTC_EDX_MMX) 1894 hwcap_flags |= AV_386_MMX; 1895 1896 if (*edx & CPUID_INTC_EDX_TSC) 1897 hwcap_flags |= AV_386_TSC; 1898 if (*edx & CPUID_INTC_EDX_CX8) 1899 hwcap_flags |= AV_386_CX8; 1900 if (*edx & CPUID_INTC_EDX_CMOV) 1901 hwcap_flags |= AV_386_CMOV; 1902 if (*ecx & CPUID_INTC_ECX_MON) 1903 hwcap_flags |= AV_386_MON; 1904 if (*ecx & CPUID_INTC_ECX_CX16) 1905 hwcap_flags |= AV_386_CX16; 1906 } 1907 1908 if (x86_feature & X86_HTT) 1909 hwcap_flags |= AV_386_PAUSE; 1910 1911 if (cpi->cpi_xmaxeax < 0x80000001) 1912 goto pass4_done; 1913 1914 switch (cpi->cpi_vendor) { 1915 struct cpuid_regs cp; 1916 uint32_t *edx, *ecx; 1917 1918 case X86_VENDOR_Intel: 1919 /* 1920 * Seems like Intel duplicated what we necessary 1921 * here to make the initial crop of 64-bit OS's work. 1922 * Hopefully, those are the only "extended" bits 1923 * they'll add. 1924 */ 1925 /*FALLTHROUGH*/ 1926 1927 case X86_VENDOR_AMD: 1928 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 1929 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 1930 1931 *edx = CPI_FEATURES_XTD_EDX(cpi); 1932 *ecx = CPI_FEATURES_XTD_ECX(cpi); 1933 1934 /* 1935 * [these features require explicit kernel support] 1936 */ 1937 switch (cpi->cpi_vendor) { 1938 case X86_VENDOR_Intel: 1939 break; 1940 1941 case X86_VENDOR_AMD: 1942 if ((x86_feature & X86_TSCP) == 0) 1943 *edx &= ~CPUID_AMD_EDX_TSCP; 1944 if ((x86_feature & X86_SSE4A) == 0) 1945 *ecx &= ~CPUID_AMD_ECX_SSE4A; 1946 break; 1947 1948 default: 1949 break; 1950 } 1951 1952 /* 1953 * [no explicit support required beyond 1954 * x87 fp context and exception handlers] 1955 */ 1956 if (!fpu_exists) 1957 *edx &= ~(CPUID_AMD_EDX_MMXamd | 1958 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 1959 1960 if ((x86_feature & X86_NX) == 0) 1961 *edx &= ~CPUID_AMD_EDX_NX; 1962 #if !defined(__amd64) 1963 *edx &= ~CPUID_AMD_EDX_LM; 1964 #endif 1965 /* 1966 * Now map the supported feature vector to 1967 * things that we think userland will care about. 1968 */ 1969 #if defined(__amd64) 1970 if (*edx & CPUID_AMD_EDX_SYSC) 1971 hwcap_flags |= AV_386_AMD_SYSC; 1972 #endif 1973 if (*edx & CPUID_AMD_EDX_MMXamd) 1974 hwcap_flags |= AV_386_AMD_MMX; 1975 if (*edx & CPUID_AMD_EDX_3DNow) 1976 hwcap_flags |= AV_386_AMD_3DNow; 1977 if (*edx & CPUID_AMD_EDX_3DNowx) 1978 hwcap_flags |= AV_386_AMD_3DNowx; 1979 1980 switch (cpi->cpi_vendor) { 1981 case X86_VENDOR_AMD: 1982 if (*edx & CPUID_AMD_EDX_TSCP) 1983 hwcap_flags |= AV_386_TSCP; 1984 if (*ecx & CPUID_AMD_ECX_AHF64) 1985 hwcap_flags |= AV_386_AHF; 1986 if (*ecx & CPUID_AMD_ECX_SSE4A) 1987 hwcap_flags |= AV_386_AMD_SSE4A; 1988 if (*ecx & CPUID_AMD_ECX_LZCNT) 1989 hwcap_flags |= AV_386_AMD_LZCNT; 1990 break; 1991 1992 case X86_VENDOR_Intel: 1993 /* 1994 * Aarrgh. 1995 * Intel uses a different bit in the same word. 1996 */ 1997 if (*ecx & CPUID_INTC_ECX_AHF64) 1998 hwcap_flags |= AV_386_AHF; 1999 break; 2000 2001 default: 2002 break; 2003 } 2004 break; 2005 2006 case X86_VENDOR_TM: 2007 cp.cp_eax = 0x80860001; 2008 (void) __cpuid_insn(&cp); 2009 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2010 break; 2011 2012 default: 2013 break; 2014 } 2015 2016 pass4_done: 2017 cpi->cpi_pass = 4; 2018 return (hwcap_flags); 2019 } 2020 2021 2022 /* 2023 * Simulate the cpuid instruction using the data we previously 2024 * captured about this CPU. We try our best to return the truth 2025 * about the hardware, independently of kernel support. 2026 */ 2027 uint32_t 2028 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2029 { 2030 struct cpuid_info *cpi; 2031 struct cpuid_regs *xcp; 2032 2033 if (cpu == NULL) 2034 cpu = CPU; 2035 cpi = cpu->cpu_m.mcpu_cpi; 2036 2037 ASSERT(cpuid_checkpass(cpu, 3)); 2038 2039 /* 2040 * CPUID data is cached in two separate places: cpi_std for standard 2041 * CPUID functions, and cpi_extd for extended CPUID functions. 2042 */ 2043 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2044 xcp = &cpi->cpi_std[cp->cp_eax]; 2045 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2046 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2047 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2048 else 2049 /* 2050 * The caller is asking for data from an input parameter which 2051 * the kernel has not cached. In this case we go fetch from 2052 * the hardware and return the data directly to the user. 2053 */ 2054 return (__cpuid_insn(cp)); 2055 2056 cp->cp_eax = xcp->cp_eax; 2057 cp->cp_ebx = xcp->cp_ebx; 2058 cp->cp_ecx = xcp->cp_ecx; 2059 cp->cp_edx = xcp->cp_edx; 2060 return (cp->cp_eax); 2061 } 2062 2063 int 2064 cpuid_checkpass(cpu_t *cpu, int pass) 2065 { 2066 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2067 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2068 } 2069 2070 int 2071 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2072 { 2073 ASSERT(cpuid_checkpass(cpu, 3)); 2074 2075 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2076 } 2077 2078 int 2079 cpuid_is_cmt(cpu_t *cpu) 2080 { 2081 if (cpu == NULL) 2082 cpu = CPU; 2083 2084 ASSERT(cpuid_checkpass(cpu, 1)); 2085 2086 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2087 } 2088 2089 /* 2090 * AMD and Intel both implement the 64-bit variant of the syscall 2091 * instruction (syscallq), so if there's -any- support for syscall, 2092 * cpuid currently says "yes, we support this". 2093 * 2094 * However, Intel decided to -not- implement the 32-bit variant of the 2095 * syscall instruction, so we provide a predicate to allow our caller 2096 * to test that subtlety here. 2097 */ 2098 /*ARGSUSED*/ 2099 int 2100 cpuid_syscall32_insn(cpu_t *cpu) 2101 { 2102 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2103 2104 if (cpu == NULL) 2105 cpu = CPU; 2106 2107 /*CSTYLED*/ 2108 { 2109 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2110 2111 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2112 cpi->cpi_xmaxeax >= 0x80000001 && 2113 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2114 return (1); 2115 } 2116 return (0); 2117 } 2118 2119 int 2120 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2121 { 2122 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2123 2124 static const char fmt[] = 2125 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2126 static const char fmt_ht[] = 2127 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2128 2129 ASSERT(cpuid_checkpass(cpu, 1)); 2130 2131 if (cpuid_is_cmt(cpu)) 2132 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2133 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2134 cpi->cpi_family, cpi->cpi_model, 2135 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2136 return (snprintf(s, n, fmt, 2137 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2138 cpi->cpi_family, cpi->cpi_model, 2139 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2140 } 2141 2142 const char * 2143 cpuid_getvendorstr(cpu_t *cpu) 2144 { 2145 ASSERT(cpuid_checkpass(cpu, 1)); 2146 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2147 } 2148 2149 uint_t 2150 cpuid_getvendor(cpu_t *cpu) 2151 { 2152 ASSERT(cpuid_checkpass(cpu, 1)); 2153 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2154 } 2155 2156 uint_t 2157 cpuid_getfamily(cpu_t *cpu) 2158 { 2159 ASSERT(cpuid_checkpass(cpu, 1)); 2160 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2161 } 2162 2163 uint_t 2164 cpuid_getmodel(cpu_t *cpu) 2165 { 2166 ASSERT(cpuid_checkpass(cpu, 1)); 2167 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2168 } 2169 2170 uint_t 2171 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2172 { 2173 ASSERT(cpuid_checkpass(cpu, 1)); 2174 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2175 } 2176 2177 uint_t 2178 cpuid_get_ncore_per_chip(cpu_t *cpu) 2179 { 2180 ASSERT(cpuid_checkpass(cpu, 1)); 2181 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2182 } 2183 2184 uint_t 2185 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2186 { 2187 ASSERT(cpuid_checkpass(cpu, 2)); 2188 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2189 } 2190 2191 id_t 2192 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2193 { 2194 ASSERT(cpuid_checkpass(cpu, 2)); 2195 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2196 } 2197 2198 uint_t 2199 cpuid_getstep(cpu_t *cpu) 2200 { 2201 ASSERT(cpuid_checkpass(cpu, 1)); 2202 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2203 } 2204 2205 uint_t 2206 cpuid_getsig(struct cpu *cpu) 2207 { 2208 ASSERT(cpuid_checkpass(cpu, 1)); 2209 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2210 } 2211 2212 uint32_t 2213 cpuid_getchiprev(struct cpu *cpu) 2214 { 2215 ASSERT(cpuid_checkpass(cpu, 1)); 2216 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2217 } 2218 2219 const char * 2220 cpuid_getchiprevstr(struct cpu *cpu) 2221 { 2222 ASSERT(cpuid_checkpass(cpu, 1)); 2223 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2224 } 2225 2226 uint32_t 2227 cpuid_getsockettype(struct cpu *cpu) 2228 { 2229 ASSERT(cpuid_checkpass(cpu, 1)); 2230 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2231 } 2232 2233 int 2234 cpuid_get_chipid(cpu_t *cpu) 2235 { 2236 ASSERT(cpuid_checkpass(cpu, 1)); 2237 2238 if (cpuid_is_cmt(cpu)) 2239 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2240 return (cpu->cpu_id); 2241 } 2242 2243 id_t 2244 cpuid_get_coreid(cpu_t *cpu) 2245 { 2246 ASSERT(cpuid_checkpass(cpu, 1)); 2247 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2248 } 2249 2250 int 2251 cpuid_get_clogid(cpu_t *cpu) 2252 { 2253 ASSERT(cpuid_checkpass(cpu, 1)); 2254 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2255 } 2256 2257 void 2258 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2259 { 2260 struct cpuid_info *cpi; 2261 2262 if (cpu == NULL) 2263 cpu = CPU; 2264 cpi = cpu->cpu_m.mcpu_cpi; 2265 2266 ASSERT(cpuid_checkpass(cpu, 1)); 2267 2268 if (pabits) 2269 *pabits = cpi->cpi_pabits; 2270 if (vabits) 2271 *vabits = cpi->cpi_vabits; 2272 } 2273 2274 /* 2275 * Returns the number of data TLB entries for a corresponding 2276 * pagesize. If it can't be computed, or isn't known, the 2277 * routine returns zero. If you ask about an architecturally 2278 * impossible pagesize, the routine will panic (so that the 2279 * hat implementor knows that things are inconsistent.) 2280 */ 2281 uint_t 2282 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2283 { 2284 struct cpuid_info *cpi; 2285 uint_t dtlb_nent = 0; 2286 2287 if (cpu == NULL) 2288 cpu = CPU; 2289 cpi = cpu->cpu_m.mcpu_cpi; 2290 2291 ASSERT(cpuid_checkpass(cpu, 1)); 2292 2293 /* 2294 * Check the L2 TLB info 2295 */ 2296 if (cpi->cpi_xmaxeax >= 0x80000006) { 2297 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2298 2299 switch (pagesize) { 2300 2301 case 4 * 1024: 2302 /* 2303 * All zero in the top 16 bits of the register 2304 * indicates a unified TLB. Size is in low 16 bits. 2305 */ 2306 if ((cp->cp_ebx & 0xffff0000) == 0) 2307 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2308 else 2309 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2310 break; 2311 2312 case 2 * 1024 * 1024: 2313 if ((cp->cp_eax & 0xffff0000) == 0) 2314 dtlb_nent = cp->cp_eax & 0x0000ffff; 2315 else 2316 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2317 break; 2318 2319 default: 2320 panic("unknown L2 pagesize"); 2321 /*NOTREACHED*/ 2322 } 2323 } 2324 2325 if (dtlb_nent != 0) 2326 return (dtlb_nent); 2327 2328 /* 2329 * No L2 TLB support for this size, try L1. 2330 */ 2331 if (cpi->cpi_xmaxeax >= 0x80000005) { 2332 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2333 2334 switch (pagesize) { 2335 case 4 * 1024: 2336 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2337 break; 2338 case 2 * 1024 * 1024: 2339 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2340 break; 2341 default: 2342 panic("unknown L1 d-TLB pagesize"); 2343 /*NOTREACHED*/ 2344 } 2345 } 2346 2347 return (dtlb_nent); 2348 } 2349 2350 /* 2351 * Return 0 if the erratum is not present or not applicable, positive 2352 * if it is, and negative if the status of the erratum is unknown. 2353 * 2354 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2355 * Processors" #25759, Rev 3.57, August 2005 2356 */ 2357 int 2358 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2359 { 2360 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2361 uint_t eax; 2362 2363 /* 2364 * Bail out if this CPU isn't an AMD CPU, or if it's 2365 * a legacy (32-bit) AMD CPU. 2366 */ 2367 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2368 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2369 cpi->cpi_family == 6) 2370 2371 return (0); 2372 2373 eax = cpi->cpi_std[1].cp_eax; 2374 2375 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2376 #define SH_B3(eax) (eax == 0xf51) 2377 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2378 2379 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2380 2381 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2382 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2383 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2384 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2385 2386 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2387 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2388 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2389 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2390 2391 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2392 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2393 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2394 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2395 #define BH_E4(eax) (eax == 0x20fb1) 2396 #define SH_E5(eax) (eax == 0x20f42) 2397 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2398 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2399 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2400 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2401 DH_E6(eax) || JH_E6(eax)) 2402 2403 switch (erratum) { 2404 case 1: 2405 return (cpi->cpi_family < 0x10); 2406 case 51: /* what does the asterisk mean? */ 2407 return (B(eax) || SH_C0(eax) || CG(eax)); 2408 case 52: 2409 return (B(eax)); 2410 case 57: 2411 return (cpi->cpi_family <= 0x10); 2412 case 58: 2413 return (B(eax)); 2414 case 60: 2415 return (cpi->cpi_family <= 0x10); 2416 case 61: 2417 case 62: 2418 case 63: 2419 case 64: 2420 case 65: 2421 case 66: 2422 case 68: 2423 case 69: 2424 case 70: 2425 case 71: 2426 return (B(eax)); 2427 case 72: 2428 return (SH_B0(eax)); 2429 case 74: 2430 return (B(eax)); 2431 case 75: 2432 return (cpi->cpi_family < 0x10); 2433 case 76: 2434 return (B(eax)); 2435 case 77: 2436 return (cpi->cpi_family <= 0x10); 2437 case 78: 2438 return (B(eax) || SH_C0(eax)); 2439 case 79: 2440 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2441 case 80: 2442 case 81: 2443 case 82: 2444 return (B(eax)); 2445 case 83: 2446 return (B(eax) || SH_C0(eax) || CG(eax)); 2447 case 85: 2448 return (cpi->cpi_family < 0x10); 2449 case 86: 2450 return (SH_C0(eax) || CG(eax)); 2451 case 88: 2452 #if !defined(__amd64) 2453 return (0); 2454 #else 2455 return (B(eax) || SH_C0(eax)); 2456 #endif 2457 case 89: 2458 return (cpi->cpi_family < 0x10); 2459 case 90: 2460 return (B(eax) || SH_C0(eax) || CG(eax)); 2461 case 91: 2462 case 92: 2463 return (B(eax) || SH_C0(eax)); 2464 case 93: 2465 return (SH_C0(eax)); 2466 case 94: 2467 return (B(eax) || SH_C0(eax) || CG(eax)); 2468 case 95: 2469 #if !defined(__amd64) 2470 return (0); 2471 #else 2472 return (B(eax) || SH_C0(eax)); 2473 #endif 2474 case 96: 2475 return (B(eax) || SH_C0(eax) || CG(eax)); 2476 case 97: 2477 case 98: 2478 return (SH_C0(eax) || CG(eax)); 2479 case 99: 2480 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2481 case 100: 2482 return (B(eax) || SH_C0(eax)); 2483 case 101: 2484 case 103: 2485 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2486 case 104: 2487 return (SH_C0(eax) || CG(eax) || D0(eax)); 2488 case 105: 2489 case 106: 2490 case 107: 2491 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2492 case 108: 2493 return (DH_CG(eax)); 2494 case 109: 2495 return (SH_C0(eax) || CG(eax) || D0(eax)); 2496 case 110: 2497 return (D0(eax) || EX(eax)); 2498 case 111: 2499 return (CG(eax)); 2500 case 112: 2501 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2502 case 113: 2503 return (eax == 0x20fc0); 2504 case 114: 2505 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2506 case 115: 2507 return (SH_E0(eax) || JH_E1(eax)); 2508 case 116: 2509 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2510 case 117: 2511 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2512 case 118: 2513 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2514 JH_E6(eax)); 2515 case 121: 2516 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2517 case 122: 2518 return (cpi->cpi_family < 0x10); 2519 case 123: 2520 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2521 case 131: 2522 return (cpi->cpi_family < 0x10); 2523 case 6336786: 2524 /* 2525 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2526 * if this is a K8 family or newer processor 2527 */ 2528 if (CPI_FAMILY(cpi) == 0xf) { 2529 struct cpuid_regs regs; 2530 regs.cp_eax = 0x80000007; 2531 (void) __cpuid_insn(®s); 2532 return (!(regs.cp_edx & 0x100)); 2533 } 2534 return (0); 2535 case 6323525: 2536 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2537 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2538 2539 default: 2540 return (-1); 2541 } 2542 } 2543 2544 static const char assoc_str[] = "associativity"; 2545 static const char line_str[] = "line-size"; 2546 static const char size_str[] = "size"; 2547 2548 static void 2549 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2550 uint32_t val) 2551 { 2552 char buf[128]; 2553 2554 /* 2555 * ndi_prop_update_int() is used because it is desirable for 2556 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2557 */ 2558 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2559 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2560 } 2561 2562 /* 2563 * Intel-style cache/tlb description 2564 * 2565 * Standard cpuid level 2 gives a randomly ordered 2566 * selection of tags that index into a table that describes 2567 * cache and tlb properties. 2568 */ 2569 2570 static const char l1_icache_str[] = "l1-icache"; 2571 static const char l1_dcache_str[] = "l1-dcache"; 2572 static const char l2_cache_str[] = "l2-cache"; 2573 static const char l3_cache_str[] = "l3-cache"; 2574 static const char itlb4k_str[] = "itlb-4K"; 2575 static const char dtlb4k_str[] = "dtlb-4K"; 2576 static const char itlb4M_str[] = "itlb-4M"; 2577 static const char dtlb4M_str[] = "dtlb-4M"; 2578 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2579 static const char dtlb44_str[] = "dtlb-4K-4M"; 2580 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2581 static const char sl2_cache_str[] = "sectored-l2-cache"; 2582 static const char itrace_str[] = "itrace-cache"; 2583 static const char sl3_cache_str[] = "sectored-l3-cache"; 2584 2585 static const struct cachetab { 2586 uint8_t ct_code; 2587 uint8_t ct_assoc; 2588 uint16_t ct_line_size; 2589 size_t ct_size; 2590 const char *ct_label; 2591 } intel_ctab[] = { 2592 /* maintain descending order! */ 2593 { 0xb4, 4, 0, 256, dtlb4k_str }, 2594 { 0xb3, 4, 0, 128, dtlb4k_str }, 2595 { 0xb0, 4, 0, 128, itlb4k_str }, 2596 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2597 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2598 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2599 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2600 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2601 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2602 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2603 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2604 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2605 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2606 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2607 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2608 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2609 { 0x73, 8, 0, 64*1024, itrace_str}, 2610 { 0x72, 8, 0, 32*1024, itrace_str}, 2611 { 0x71, 8, 0, 16*1024, itrace_str}, 2612 { 0x70, 8, 0, 12*1024, itrace_str}, 2613 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2614 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2615 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2616 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2617 { 0x5d, 0, 0, 256, dtlb44_str}, 2618 { 0x5c, 0, 0, 128, dtlb44_str}, 2619 { 0x5b, 0, 0, 64, dtlb44_str}, 2620 { 0x52, 0, 0, 256, itlb424_str}, 2621 { 0x51, 0, 0, 128, itlb424_str}, 2622 { 0x50, 0, 0, 64, itlb424_str}, 2623 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 2624 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 2625 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 2626 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 2627 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 2628 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 2629 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 2630 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 2631 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 2632 { 0x43, 4, 32, 512*1024, l2_cache_str}, 2633 { 0x42, 4, 32, 256*1024, l2_cache_str}, 2634 { 0x41, 4, 32, 128*1024, l2_cache_str}, 2635 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 2636 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 2637 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 2638 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 2639 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 2640 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 2641 { 0x30, 8, 64, 32*1024, l1_icache_str}, 2642 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 2643 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 2644 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 2645 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 2646 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 2647 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 2648 { 0x0b, 4, 0, 4, itlb4M_str}, 2649 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 2650 { 0x08, 4, 32, 16*1024, l1_icache_str}, 2651 { 0x06, 4, 32, 8*1024, l1_icache_str}, 2652 { 0x04, 4, 0, 8, dtlb4M_str}, 2653 { 0x03, 4, 0, 64, dtlb4k_str}, 2654 { 0x02, 4, 0, 2, itlb4M_str}, 2655 { 0x01, 4, 0, 32, itlb4k_str}, 2656 { 0 } 2657 }; 2658 2659 static const struct cachetab cyrix_ctab[] = { 2660 { 0x70, 4, 0, 32, "tlb-4K" }, 2661 { 0x80, 4, 16, 16*1024, "l1-cache" }, 2662 { 0 } 2663 }; 2664 2665 /* 2666 * Search a cache table for a matching entry 2667 */ 2668 static const struct cachetab * 2669 find_cacheent(const struct cachetab *ct, uint_t code) 2670 { 2671 if (code != 0) { 2672 for (; ct->ct_code != 0; ct++) 2673 if (ct->ct_code <= code) 2674 break; 2675 if (ct->ct_code == code) 2676 return (ct); 2677 } 2678 return (NULL); 2679 } 2680 2681 /* 2682 * Walk the cacheinfo descriptor, applying 'func' to every valid element 2683 * The walk is terminated if the walker returns non-zero. 2684 */ 2685 static void 2686 intel_walk_cacheinfo(struct cpuid_info *cpi, 2687 void *arg, int (*func)(void *, const struct cachetab *)) 2688 { 2689 const struct cachetab *ct; 2690 uint8_t *dp; 2691 int i; 2692 2693 if ((dp = cpi->cpi_cacheinfo) == NULL) 2694 return; 2695 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2696 /* 2697 * For overloaded descriptor 0x49 we use cpuid function 4 2698 * if supported by the current processor, to update 2699 * cache information. 2700 */ 2701 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4) { 2702 intel_cpuid_4_cache_info(arg, cpi); 2703 continue; 2704 } 2705 2706 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2707 if (func(arg, ct) != 0) 2708 break; 2709 } 2710 } 2711 } 2712 2713 /* 2714 * (Like the Intel one, except for Cyrix CPUs) 2715 */ 2716 static void 2717 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 2718 void *arg, int (*func)(void *, const struct cachetab *)) 2719 { 2720 const struct cachetab *ct; 2721 uint8_t *dp; 2722 int i; 2723 2724 if ((dp = cpi->cpi_cacheinfo) == NULL) 2725 return; 2726 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2727 /* 2728 * Search Cyrix-specific descriptor table first .. 2729 */ 2730 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 2731 if (func(arg, ct) != 0) 2732 break; 2733 continue; 2734 } 2735 /* 2736 * .. else fall back to the Intel one 2737 */ 2738 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2739 if (func(arg, ct) != 0) 2740 break; 2741 continue; 2742 } 2743 } 2744 } 2745 2746 /* 2747 * A cacheinfo walker that adds associativity, line-size, and size properties 2748 * to the devinfo node it is passed as an argument. 2749 */ 2750 static int 2751 add_cacheent_props(void *arg, const struct cachetab *ct) 2752 { 2753 dev_info_t *devi = arg; 2754 2755 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 2756 if (ct->ct_line_size != 0) 2757 add_cache_prop(devi, ct->ct_label, line_str, 2758 ct->ct_line_size); 2759 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 2760 return (0); 2761 } 2762 2763 /* 2764 * Add L2 or L3 cache-information using cpuid function 4. This 2765 * function is called from intel_walk_cacheinfo() when descriptor 2766 * 0x49 is encountered. 2767 */ 2768 static void 2769 intel_cpuid_4_cache_info(void *arg, struct cpuid_info *cpi) 2770 { 2771 uint32_t level, i; 2772 2773 struct cachetab ct; 2774 2775 for (i = 0; i < cpi->cpi_std_4_size; i++) { 2776 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 2777 2778 if (level == 2 || level == 3) { 2779 ct.ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 2780 ct.ct_line_size = 2781 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 2782 ct.ct_size = ct.ct_assoc * 2783 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 2784 ct.ct_line_size * 2785 (cpi->cpi_std_4[i]->cp_ecx + 1); 2786 2787 if (level == 2) { 2788 ct.ct_label = l2_cache_str; 2789 } else if (level == 3) { 2790 ct.ct_label = l3_cache_str; 2791 } 2792 2793 (void) add_cacheent_props(arg, 2794 (const struct cachetab *) (&ct)); 2795 } 2796 } 2797 } 2798 2799 static const char fully_assoc[] = "fully-associative?"; 2800 2801 /* 2802 * AMD style cache/tlb description 2803 * 2804 * Extended functions 5 and 6 directly describe properties of 2805 * tlbs and various cache levels. 2806 */ 2807 static void 2808 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 2809 { 2810 switch (assoc) { 2811 case 0: /* reserved; ignore */ 2812 break; 2813 default: 2814 add_cache_prop(devi, label, assoc_str, assoc); 2815 break; 2816 case 0xff: 2817 add_cache_prop(devi, label, fully_assoc, 1); 2818 break; 2819 } 2820 } 2821 2822 static void 2823 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 2824 { 2825 if (size == 0) 2826 return; 2827 add_cache_prop(devi, label, size_str, size); 2828 add_amd_assoc(devi, label, assoc); 2829 } 2830 2831 static void 2832 add_amd_cache(dev_info_t *devi, const char *label, 2833 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 2834 { 2835 if (size == 0 || line_size == 0) 2836 return; 2837 add_amd_assoc(devi, label, assoc); 2838 /* 2839 * Most AMD parts have a sectored cache. Multiple cache lines are 2840 * associated with each tag. A sector consists of all cache lines 2841 * associated with a tag. For example, the AMD K6-III has a sector 2842 * size of 2 cache lines per tag. 2843 */ 2844 if (lines_per_tag != 0) 2845 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 2846 add_cache_prop(devi, label, line_str, line_size); 2847 add_cache_prop(devi, label, size_str, size * 1024); 2848 } 2849 2850 static void 2851 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 2852 { 2853 switch (assoc) { 2854 case 0: /* off */ 2855 break; 2856 case 1: 2857 case 2: 2858 case 4: 2859 add_cache_prop(devi, label, assoc_str, assoc); 2860 break; 2861 case 6: 2862 add_cache_prop(devi, label, assoc_str, 8); 2863 break; 2864 case 8: 2865 add_cache_prop(devi, label, assoc_str, 16); 2866 break; 2867 case 0xf: 2868 add_cache_prop(devi, label, fully_assoc, 1); 2869 break; 2870 default: /* reserved; ignore */ 2871 break; 2872 } 2873 } 2874 2875 static void 2876 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 2877 { 2878 if (size == 0 || assoc == 0) 2879 return; 2880 add_amd_l2_assoc(devi, label, assoc); 2881 add_cache_prop(devi, label, size_str, size); 2882 } 2883 2884 static void 2885 add_amd_l2_cache(dev_info_t *devi, const char *label, 2886 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 2887 { 2888 if (size == 0 || assoc == 0 || line_size == 0) 2889 return; 2890 add_amd_l2_assoc(devi, label, assoc); 2891 if (lines_per_tag != 0) 2892 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 2893 add_cache_prop(devi, label, line_str, line_size); 2894 add_cache_prop(devi, label, size_str, size * 1024); 2895 } 2896 2897 static void 2898 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 2899 { 2900 struct cpuid_regs *cp; 2901 2902 if (cpi->cpi_xmaxeax < 0x80000005) 2903 return; 2904 cp = &cpi->cpi_extd[5]; 2905 2906 /* 2907 * 4M/2M L1 TLB configuration 2908 * 2909 * We report the size for 2M pages because AMD uses two 2910 * TLB entries for one 4M page. 2911 */ 2912 add_amd_tlb(devi, "dtlb-2M", 2913 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 2914 add_amd_tlb(devi, "itlb-2M", 2915 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 2916 2917 /* 2918 * 4K L1 TLB configuration 2919 */ 2920 2921 switch (cpi->cpi_vendor) { 2922 uint_t nentries; 2923 case X86_VENDOR_TM: 2924 if (cpi->cpi_family >= 5) { 2925 /* 2926 * Crusoe processors have 256 TLB entries, but 2927 * cpuid data format constrains them to only 2928 * reporting 255 of them. 2929 */ 2930 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 2931 nentries = 256; 2932 /* 2933 * Crusoe processors also have a unified TLB 2934 */ 2935 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 2936 nentries); 2937 break; 2938 } 2939 /*FALLTHROUGH*/ 2940 default: 2941 add_amd_tlb(devi, itlb4k_str, 2942 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 2943 add_amd_tlb(devi, dtlb4k_str, 2944 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 2945 break; 2946 } 2947 2948 /* 2949 * data L1 cache configuration 2950 */ 2951 2952 add_amd_cache(devi, l1_dcache_str, 2953 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 2954 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 2955 2956 /* 2957 * code L1 cache configuration 2958 */ 2959 2960 add_amd_cache(devi, l1_icache_str, 2961 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 2962 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 2963 2964 if (cpi->cpi_xmaxeax < 0x80000006) 2965 return; 2966 cp = &cpi->cpi_extd[6]; 2967 2968 /* Check for a unified L2 TLB for large pages */ 2969 2970 if (BITX(cp->cp_eax, 31, 16) == 0) 2971 add_amd_l2_tlb(devi, "l2-tlb-2M", 2972 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 2973 else { 2974 add_amd_l2_tlb(devi, "l2-dtlb-2M", 2975 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 2976 add_amd_l2_tlb(devi, "l2-itlb-2M", 2977 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 2978 } 2979 2980 /* Check for a unified L2 TLB for 4K pages */ 2981 2982 if (BITX(cp->cp_ebx, 31, 16) == 0) { 2983 add_amd_l2_tlb(devi, "l2-tlb-4K", 2984 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 2985 } else { 2986 add_amd_l2_tlb(devi, "l2-dtlb-4K", 2987 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 2988 add_amd_l2_tlb(devi, "l2-itlb-4K", 2989 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 2990 } 2991 2992 add_amd_l2_cache(devi, l2_cache_str, 2993 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 2994 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 2995 } 2996 2997 /* 2998 * There are two basic ways that the x86 world describes it cache 2999 * and tlb architecture - Intel's way and AMD's way. 3000 * 3001 * Return which flavor of cache architecture we should use 3002 */ 3003 static int 3004 x86_which_cacheinfo(struct cpuid_info *cpi) 3005 { 3006 switch (cpi->cpi_vendor) { 3007 case X86_VENDOR_Intel: 3008 if (cpi->cpi_maxeax >= 2) 3009 return (X86_VENDOR_Intel); 3010 break; 3011 case X86_VENDOR_AMD: 3012 /* 3013 * The K5 model 1 was the first part from AMD that reported 3014 * cache sizes via extended cpuid functions. 3015 */ 3016 if (cpi->cpi_family > 5 || 3017 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3018 return (X86_VENDOR_AMD); 3019 break; 3020 case X86_VENDOR_TM: 3021 if (cpi->cpi_family >= 5) 3022 return (X86_VENDOR_AMD); 3023 /*FALLTHROUGH*/ 3024 default: 3025 /* 3026 * If they have extended CPU data for 0x80000005 3027 * then we assume they have AMD-format cache 3028 * information. 3029 * 3030 * If not, and the vendor happens to be Cyrix, 3031 * then try our-Cyrix specific handler. 3032 * 3033 * If we're not Cyrix, then assume we're using Intel's 3034 * table-driven format instead. 3035 */ 3036 if (cpi->cpi_xmaxeax >= 0x80000005) 3037 return (X86_VENDOR_AMD); 3038 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3039 return (X86_VENDOR_Cyrix); 3040 else if (cpi->cpi_maxeax >= 2) 3041 return (X86_VENDOR_Intel); 3042 break; 3043 } 3044 return (-1); 3045 } 3046 3047 /* 3048 * create a node for the given cpu under the prom root node. 3049 * Also, create a cpu node in the device tree. 3050 */ 3051 static dev_info_t *cpu_nex_devi = NULL; 3052 static kmutex_t cpu_node_lock; 3053 3054 /* 3055 * Called from post_startup() and mp_startup() 3056 */ 3057 void 3058 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3059 { 3060 dev_info_t *cpu_devi; 3061 int create; 3062 3063 mutex_enter(&cpu_node_lock); 3064 3065 /* 3066 * create a nexus node for all cpus identified as 'cpu_id' under 3067 * the root node. 3068 */ 3069 if (cpu_nex_devi == NULL) { 3070 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3071 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3072 mutex_exit(&cpu_node_lock); 3073 return; 3074 } 3075 (void) ndi_devi_online(cpu_nex_devi, 0); 3076 } 3077 3078 /* 3079 * create a child node for cpu identified as 'cpu_id' 3080 */ 3081 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3082 cpu_id); 3083 if (cpu_devi == NULL) { 3084 mutex_exit(&cpu_node_lock); 3085 return; 3086 } 3087 3088 /* device_type */ 3089 3090 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3091 "device_type", "cpu"); 3092 3093 /* reg */ 3094 3095 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3096 "reg", cpu_id); 3097 3098 /* cpu-mhz, and clock-frequency */ 3099 3100 if (cpu_freq > 0) { 3101 long long mul; 3102 3103 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3104 "cpu-mhz", cpu_freq); 3105 3106 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3107 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3108 "clock-frequency", (int)mul); 3109 } 3110 3111 (void) ndi_devi_online(cpu_devi, 0); 3112 3113 if ((x86_feature & X86_CPUID) == 0) { 3114 mutex_exit(&cpu_node_lock); 3115 return; 3116 } 3117 3118 /* vendor-id */ 3119 3120 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3121 "vendor-id", cpi->cpi_vendorstr); 3122 3123 if (cpi->cpi_maxeax == 0) { 3124 mutex_exit(&cpu_node_lock); 3125 return; 3126 } 3127 3128 /* 3129 * family, model, and step 3130 */ 3131 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3132 "family", CPI_FAMILY(cpi)); 3133 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3134 "cpu-model", CPI_MODEL(cpi)); 3135 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3136 "stepping-id", CPI_STEP(cpi)); 3137 3138 /* type */ 3139 3140 switch (cpi->cpi_vendor) { 3141 case X86_VENDOR_Intel: 3142 create = 1; 3143 break; 3144 default: 3145 create = 0; 3146 break; 3147 } 3148 if (create) 3149 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3150 "type", CPI_TYPE(cpi)); 3151 3152 /* ext-family */ 3153 3154 switch (cpi->cpi_vendor) { 3155 case X86_VENDOR_Intel: 3156 case X86_VENDOR_AMD: 3157 create = cpi->cpi_family >= 0xf; 3158 break; 3159 default: 3160 create = 0; 3161 break; 3162 } 3163 if (create) 3164 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3165 "ext-family", CPI_FAMILY_XTD(cpi)); 3166 3167 /* ext-model */ 3168 3169 switch (cpi->cpi_vendor) { 3170 case X86_VENDOR_Intel: 3171 create = CPI_MODEL(cpi) == 0xf; 3172 break; 3173 case X86_VENDOR_AMD: 3174 create = CPI_FAMILY(cpi) == 0xf; 3175 break; 3176 default: 3177 create = 0; 3178 break; 3179 } 3180 if (create) 3181 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3182 "ext-model", CPI_MODEL_XTD(cpi)); 3183 3184 /* generation */ 3185 3186 switch (cpi->cpi_vendor) { 3187 case X86_VENDOR_AMD: 3188 /* 3189 * AMD K5 model 1 was the first part to support this 3190 */ 3191 create = cpi->cpi_xmaxeax >= 0x80000001; 3192 break; 3193 default: 3194 create = 0; 3195 break; 3196 } 3197 if (create) 3198 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3199 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3200 3201 /* brand-id */ 3202 3203 switch (cpi->cpi_vendor) { 3204 case X86_VENDOR_Intel: 3205 /* 3206 * brand id first appeared on Pentium III Xeon model 8, 3207 * and Celeron model 8 processors and Opteron 3208 */ 3209 create = cpi->cpi_family > 6 || 3210 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3211 break; 3212 case X86_VENDOR_AMD: 3213 create = cpi->cpi_family >= 0xf; 3214 break; 3215 default: 3216 create = 0; 3217 break; 3218 } 3219 if (create && cpi->cpi_brandid != 0) { 3220 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3221 "brand-id", cpi->cpi_brandid); 3222 } 3223 3224 /* chunks, and apic-id */ 3225 3226 switch (cpi->cpi_vendor) { 3227 /* 3228 * first available on Pentium IV and Opteron (K8) 3229 */ 3230 case X86_VENDOR_Intel: 3231 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3232 break; 3233 case X86_VENDOR_AMD: 3234 create = cpi->cpi_family >= 0xf; 3235 break; 3236 default: 3237 create = 0; 3238 break; 3239 } 3240 if (create) { 3241 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3242 "chunks", CPI_CHUNKS(cpi)); 3243 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3244 "apic-id", CPI_APIC_ID(cpi)); 3245 if (cpi->cpi_chipid >= 0) { 3246 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3247 "chip#", cpi->cpi_chipid); 3248 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3249 "clog#", cpi->cpi_clogid); 3250 } 3251 } 3252 3253 /* cpuid-features */ 3254 3255 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3256 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3257 3258 3259 /* cpuid-features-ecx */ 3260 3261 switch (cpi->cpi_vendor) { 3262 case X86_VENDOR_Intel: 3263 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3264 break; 3265 default: 3266 create = 0; 3267 break; 3268 } 3269 if (create) 3270 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3271 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3272 3273 /* ext-cpuid-features */ 3274 3275 switch (cpi->cpi_vendor) { 3276 case X86_VENDOR_Intel: 3277 case X86_VENDOR_AMD: 3278 case X86_VENDOR_Cyrix: 3279 case X86_VENDOR_TM: 3280 case X86_VENDOR_Centaur: 3281 create = cpi->cpi_xmaxeax >= 0x80000001; 3282 break; 3283 default: 3284 create = 0; 3285 break; 3286 } 3287 if (create) { 3288 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3289 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3290 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3291 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3292 } 3293 3294 /* 3295 * Brand String first appeared in Intel Pentium IV, AMD K5 3296 * model 1, and Cyrix GXm. On earlier models we try and 3297 * simulate something similar .. so this string should always 3298 * same -something- about the processor, however lame. 3299 */ 3300 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3301 "brand-string", cpi->cpi_brandstr); 3302 3303 /* 3304 * Finally, cache and tlb information 3305 */ 3306 switch (x86_which_cacheinfo(cpi)) { 3307 case X86_VENDOR_Intel: 3308 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3309 break; 3310 case X86_VENDOR_Cyrix: 3311 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3312 break; 3313 case X86_VENDOR_AMD: 3314 amd_cache_info(cpi, cpu_devi); 3315 break; 3316 default: 3317 break; 3318 } 3319 3320 mutex_exit(&cpu_node_lock); 3321 } 3322 3323 struct l2info { 3324 int *l2i_csz; 3325 int *l2i_lsz; 3326 int *l2i_assoc; 3327 int l2i_ret; 3328 }; 3329 3330 /* 3331 * A cacheinfo walker that fetches the size, line-size and associativity 3332 * of the L2 cache 3333 */ 3334 static int 3335 intel_l2cinfo(void *arg, const struct cachetab *ct) 3336 { 3337 struct l2info *l2i = arg; 3338 int *ip; 3339 3340 if (ct->ct_label != l2_cache_str && 3341 ct->ct_label != sl2_cache_str) 3342 return (0); /* not an L2 -- keep walking */ 3343 3344 if ((ip = l2i->l2i_csz) != NULL) 3345 *ip = ct->ct_size; 3346 if ((ip = l2i->l2i_lsz) != NULL) 3347 *ip = ct->ct_line_size; 3348 if ((ip = l2i->l2i_assoc) != NULL) 3349 *ip = ct->ct_assoc; 3350 l2i->l2i_ret = ct->ct_size; 3351 return (1); /* was an L2 -- terminate walk */ 3352 } 3353 3354 static void 3355 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3356 { 3357 struct cpuid_regs *cp; 3358 uint_t size, assoc; 3359 int *ip; 3360 3361 if (cpi->cpi_xmaxeax < 0x80000006) 3362 return; 3363 cp = &cpi->cpi_extd[6]; 3364 3365 if ((assoc = BITX(cp->cp_ecx, 15, 12)) != 0 && 3366 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3367 uint_t cachesz = size * 1024; 3368 3369 3370 if ((ip = l2i->l2i_csz) != NULL) 3371 *ip = cachesz; 3372 if ((ip = l2i->l2i_lsz) != NULL) 3373 *ip = BITX(cp->cp_ecx, 7, 0); 3374 if ((ip = l2i->l2i_assoc) != NULL) 3375 *ip = assoc; 3376 l2i->l2i_ret = cachesz; 3377 } 3378 } 3379 3380 int 3381 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3382 { 3383 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3384 struct l2info __l2info, *l2i = &__l2info; 3385 3386 l2i->l2i_csz = csz; 3387 l2i->l2i_lsz = lsz; 3388 l2i->l2i_assoc = assoc; 3389 l2i->l2i_ret = -1; 3390 3391 switch (x86_which_cacheinfo(cpi)) { 3392 case X86_VENDOR_Intel: 3393 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3394 break; 3395 case X86_VENDOR_Cyrix: 3396 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3397 break; 3398 case X86_VENDOR_AMD: 3399 amd_l2cacheinfo(cpi, l2i); 3400 break; 3401 default: 3402 break; 3403 } 3404 return (l2i->l2i_ret); 3405 } 3406 3407 size_t 3408 cpuid_get_mwait_size(cpu_t *cpu) 3409 { 3410 ASSERT(cpuid_checkpass(cpu, 2)); 3411 return (cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max); 3412 } 3413