1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Various routines to handle identification 30 * and classification of x86 processors. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/archsystm.h> 35 #include <sys/x86_archext.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 #include <sys/cmn_err.h> 39 #include <sys/sunddi.h> 40 #include <sys/sunndi.h> 41 #include <sys/cpuvar.h> 42 #include <sys/processor.h> 43 #include <sys/sysmacros.h> 44 #include <sys/pg.h> 45 #include <sys/fp.h> 46 #include <sys/controlregs.h> 47 #include <sys/auxv_386.h> 48 #include <sys/bitmap.h> 49 #include <sys/memnode.h> 50 51 /* 52 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 53 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 54 * them accordingly. For most modern processors, feature detection occurs here 55 * in pass 1. 56 * 57 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 58 * for the boot CPU and does the basic analysis that the early kernel needs. 59 * x86_feature is set based on the return value of cpuid_pass1() of the boot 60 * CPU. 61 * 62 * Pass 1 includes: 63 * 64 * o Determining vendor/model/family/stepping and setting x86_type and 65 * x86_vendor accordingly. 66 * o Processing the feature flags returned by the cpuid instruction while 67 * applying any workarounds or tricks for the specific processor. 68 * o Mapping the feature flags into Solaris feature bits (X86_*). 69 * o Processing extended feature flags if supported by the processor, 70 * again while applying specific processor knowledge. 71 * o Determining the CMT characteristics of the system. 72 * 73 * Pass 1 is done on non-boot CPUs during their initialization and the results 74 * are used only as a meager attempt at ensuring that all processors within the 75 * system support the same features. 76 * 77 * Pass 2 of cpuid feature analysis happens just at the beginning 78 * of startup(). It just copies in and corrects the remainder 79 * of the cpuid data we depend on: standard cpuid functions that we didn't 80 * need for pass1 feature analysis, and extended cpuid functions beyond the 81 * simple feature processing done in pass1. 82 * 83 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 84 * particular kernel memory allocation has been made available. It creates a 85 * readable brand string based on the data collected in the first two passes. 86 * 87 * Pass 4 of cpuid analysis is invoked after post_startup() when all 88 * the support infrastructure for various hardware features has been 89 * initialized. It determines which processor features will be reported 90 * to userland via the aux vector. 91 * 92 * All passes are executed on all CPUs, but only the boot CPU determines what 93 * features the kernel will use. 94 * 95 * Much of the worst junk in this file is for the support of processors 96 * that didn't really implement the cpuid instruction properly. 97 * 98 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 99 * the pass numbers. Accordingly, changes to the pass code may require changes 100 * to the accessor code. 101 */ 102 103 uint_t x86_feature = 0; 104 uint_t x86_vendor = X86_VENDOR_IntelClone; 105 uint_t x86_type = X86_TYPE_OTHER; 106 107 uint_t pentiumpro_bug4046376; 108 uint_t pentiumpro_bug4064495; 109 110 uint_t enable486; 111 112 /* 113 * This set of strings are for processors rumored to support the cpuid 114 * instruction, and is used by locore.s to figure out how to set x86_vendor 115 */ 116 const char CyrixInstead[] = "CyrixInstead"; 117 118 /* 119 * monitor/mwait info. 120 * 121 * size_actual and buf_actual are the real address and size allocated to get 122 * proper mwait_buf alignement. buf_actual and size_actual should be passed 123 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 124 * processor cache-line alignment, but this is not guarantied in the furture. 125 */ 126 struct mwait_info { 127 size_t mon_min; /* min size to avoid missed wakeups */ 128 size_t mon_max; /* size to avoid false wakeups */ 129 size_t size_actual; /* size actually allocated */ 130 void *buf_actual; /* memory actually allocated */ 131 uint32_t support; /* processor support of monitor/mwait */ 132 }; 133 134 /* 135 * These constants determine how many of the elements of the 136 * cpuid we cache in the cpuid_info data structure; the 137 * remaining elements are accessible via the cpuid instruction. 138 */ 139 140 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 141 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 142 143 struct cpuid_info { 144 uint_t cpi_pass; /* last pass completed */ 145 /* 146 * standard function information 147 */ 148 uint_t cpi_maxeax; /* fn 0: %eax */ 149 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 150 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 151 152 uint_t cpi_family; /* fn 1: extended family */ 153 uint_t cpi_model; /* fn 1: extended model */ 154 uint_t cpi_step; /* fn 1: stepping */ 155 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 156 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 157 int cpi_clogid; /* fn 1: %ebx: thread # */ 158 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 159 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 160 uint_t cpi_ncache; /* fn 2: number of elements */ 161 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 162 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 163 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 164 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 165 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 166 /* 167 * extended function information 168 */ 169 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 170 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 171 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 172 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 173 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 174 id_t cpi_coreid; 175 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 176 /* Intel: fn 4: %eax[31-26] */ 177 /* 178 * supported feature information 179 */ 180 uint32_t cpi_support[5]; 181 #define STD_EDX_FEATURES 0 182 #define AMD_EDX_FEATURES 1 183 #define TM_EDX_FEATURES 2 184 #define STD_ECX_FEATURES 3 185 #define AMD_ECX_FEATURES 4 186 /* 187 * Synthesized information, where known. 188 */ 189 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 190 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 191 uint32_t cpi_socket; /* Chip package/socket type */ 192 193 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 194 }; 195 196 197 static struct cpuid_info cpuid_info0; 198 199 /* 200 * These bit fields are defined by the Intel Application Note AP-485 201 * "Intel Processor Identification and the CPUID Instruction" 202 */ 203 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 204 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 205 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 206 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 207 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 208 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 209 210 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 211 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 212 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 213 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 214 215 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 216 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 217 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 218 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 219 220 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 221 #define CPI_XMAXEAX_MAX 0x80000100 222 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 223 224 /* 225 * Function 4 (Deterministic Cache Parameters) macros 226 * Defined by Intel Application Note AP-485 227 */ 228 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 229 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 230 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 231 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 232 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 233 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 234 235 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 236 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 237 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 238 239 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 240 241 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 242 243 244 /* 245 * A couple of shorthand macros to identify "later" P6-family chips 246 * like the Pentium M and Core. First, the "older" P6-based stuff 247 * (loosely defined as "pre-Pentium-4"): 248 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 249 */ 250 251 #define IS_LEGACY_P6(cpi) ( \ 252 cpi->cpi_family == 6 && \ 253 (cpi->cpi_model == 1 || \ 254 cpi->cpi_model == 3 || \ 255 cpi->cpi_model == 5 || \ 256 cpi->cpi_model == 6 || \ 257 cpi->cpi_model == 7 || \ 258 cpi->cpi_model == 8 || \ 259 cpi->cpi_model == 0xA || \ 260 cpi->cpi_model == 0xB) \ 261 ) 262 263 /* A "new F6" is everything with family 6 that's not the above */ 264 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 265 266 /* Extended family/model support */ 267 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 268 cpi->cpi_family >= 0xf) 269 270 /* 271 * AMD family 0xf socket types. 272 * First index is 0 for revs B thru E, 1 for F and G. 273 * Second index by (model & 0x3) 274 */ 275 static uint32_t amd_skts[2][4] = { 276 { 277 X86_SOCKET_754, /* 0b00 */ 278 X86_SOCKET_940, /* 0b01 */ 279 X86_SOCKET_754, /* 0b10 */ 280 X86_SOCKET_939 /* 0b11 */ 281 }, 282 { 283 X86_SOCKET_S1g1, /* 0b00 */ 284 X86_SOCKET_F1207, /* 0b01 */ 285 X86_SOCKET_UNKNOWN, /* 0b10 */ 286 X86_SOCKET_AM2 /* 0b11 */ 287 } 288 }; 289 290 /* 291 * Table for mapping AMD Family 0xf model/stepping combination to 292 * chip "revision" and socket type. Only rm_family 0xf is used at the 293 * moment, but AMD family 0x10 will extend the exsiting revision names 294 * so will likely also use this table. 295 * 296 * The first member of this array that matches a given family, extended model 297 * plus model range, and stepping range will be considered a match. 298 */ 299 static const struct amd_rev_mapent { 300 uint_t rm_family; 301 uint_t rm_modello; 302 uint_t rm_modelhi; 303 uint_t rm_steplo; 304 uint_t rm_stephi; 305 uint32_t rm_chiprev; 306 const char *rm_chiprevstr; 307 int rm_sktidx; 308 } amd_revmap[] = { 309 /* 310 * Rev B includes model 0x4 stepping 0 and model 0x5 stepping 0 and 1. 311 */ 312 { 0xf, 0x04, 0x04, 0x0, 0x0, X86_CHIPREV_AMD_F_REV_B, "B", 0 }, 313 { 0xf, 0x05, 0x05, 0x0, 0x1, X86_CHIPREV_AMD_F_REV_B, "B", 0 }, 314 /* 315 * Rev C0 includes model 0x4 stepping 8 and model 0x5 stepping 8 316 */ 317 { 0xf, 0x04, 0x05, 0x8, 0x8, X86_CHIPREV_AMD_F_REV_C0, "C0", 0 }, 318 /* 319 * Rev CG is the rest of extended model 0x0 - i.e., everything 320 * but the rev B and C0 combinations covered above. 321 */ 322 { 0xf, 0x00, 0x0f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_CG, "CG", 0 }, 323 /* 324 * Rev D has extended model 0x1. 325 */ 326 { 0xf, 0x10, 0x1f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_D, "D", 0 }, 327 /* 328 * Rev E has extended model 0x2. 329 * Extended model 0x3 is unused but available to grow into. 330 */ 331 { 0xf, 0x20, 0x3f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_E, "E", 0 }, 332 /* 333 * Rev F has extended models 0x4 and 0x5. 334 */ 335 { 0xf, 0x40, 0x5f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_F, "F", 1 }, 336 /* 337 * Rev G has extended model 0x6. 338 */ 339 { 0xf, 0x60, 0x6f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_G, "G", 1 }, 340 }; 341 342 /* 343 * Info for monitor/mwait idle loop. 344 * 345 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 346 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 347 * 2006. 348 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 349 * Documentation Updates" #33633, Rev 2.05, December 2006. 350 */ 351 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 352 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 353 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 354 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 355 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 356 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 357 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 358 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 359 /* 360 * Number of sub-cstates for a given c-state. 361 */ 362 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 363 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 364 365 static void intel_cpuid_4_cache_info(void *, struct cpuid_info *); 366 367 static void 368 synth_amd_info(struct cpuid_info *cpi) 369 { 370 const struct amd_rev_mapent *rmp; 371 uint_t family, model, step; 372 int i; 373 374 /* 375 * Currently only AMD family 0xf uses these fields. 376 */ 377 if (cpi->cpi_family != 0xf) 378 return; 379 380 family = cpi->cpi_family; 381 model = cpi->cpi_model; 382 step = cpi->cpi_step; 383 384 for (i = 0, rmp = amd_revmap; i < sizeof (amd_revmap) / sizeof (*rmp); 385 i++, rmp++) { 386 if (family == rmp->rm_family && 387 model >= rmp->rm_modello && model <= rmp->rm_modelhi && 388 step >= rmp->rm_steplo && step <= rmp->rm_stephi) { 389 cpi->cpi_chiprev = rmp->rm_chiprev; 390 cpi->cpi_chiprevstr = rmp->rm_chiprevstr; 391 cpi->cpi_socket = amd_skts[rmp->rm_sktidx][model & 0x3]; 392 return; 393 } 394 } 395 } 396 397 static void 398 synth_info(struct cpuid_info *cpi) 399 { 400 cpi->cpi_chiprev = X86_CHIPREV_UNKNOWN; 401 cpi->cpi_chiprevstr = "Unknown"; 402 cpi->cpi_socket = X86_SOCKET_UNKNOWN; 403 404 switch (cpi->cpi_vendor) { 405 case X86_VENDOR_AMD: 406 synth_amd_info(cpi); 407 break; 408 409 default: 410 break; 411 412 } 413 } 414 415 /* 416 * Apply up various platform-dependent restrictions where the 417 * underlying platform restrictions mean the CPU can be marked 418 * as less capable than its cpuid instruction would imply. 419 */ 420 421 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 422 423 /* 424 * Some undocumented ways of patching the results of the cpuid 425 * instruction to permit running Solaris 10 on future cpus that 426 * we don't currently support. Could be set to non-zero values 427 * via settings in eeprom. 428 */ 429 430 uint32_t cpuid_feature_ecx_include; 431 uint32_t cpuid_feature_ecx_exclude; 432 uint32_t cpuid_feature_edx_include; 433 uint32_t cpuid_feature_edx_exclude; 434 435 void 436 cpuid_alloc_space(cpu_t *cpu) 437 { 438 /* 439 * By convention, cpu0 is the boot cpu, which is set up 440 * before memory allocation is available. All other cpus get 441 * their cpuid_info struct allocated here. 442 */ 443 ASSERT(cpu->cpu_id != 0); 444 cpu->cpu_m.mcpu_cpi = 445 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 446 } 447 448 void 449 cpuid_free_space(cpu_t *cpu) 450 { 451 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 452 int i; 453 454 ASSERT(cpu->cpu_id != 0); 455 456 /* 457 * Free up any function 4 related dynamic storage 458 */ 459 for (i = 1; i < cpi->cpi_std_4_size; i++) 460 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 461 if (cpi->cpi_std_4_size > 0) 462 kmem_free(cpi->cpi_std_4, 463 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 464 465 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 466 } 467 468 uint_t 469 cpuid_pass1(cpu_t *cpu) 470 { 471 uint32_t mask_ecx, mask_edx; 472 uint_t feature = X86_CPUID; 473 struct cpuid_info *cpi; 474 struct cpuid_regs *cp; 475 int xcpuid; 476 extern int idle_cpu_prefer_mwait; 477 478 479 /* 480 * Space statically allocated for cpu0, ensure pointer is set 481 */ 482 if (cpu->cpu_id == 0) 483 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 484 cpi = cpu->cpu_m.mcpu_cpi; 485 ASSERT(cpi != NULL); 486 cp = &cpi->cpi_std[0]; 487 cp->cp_eax = 0; 488 cpi->cpi_maxeax = __cpuid_insn(cp); 489 { 490 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 491 *iptr++ = cp->cp_ebx; 492 *iptr++ = cp->cp_edx; 493 *iptr++ = cp->cp_ecx; 494 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 495 } 496 497 /* 498 * Map the vendor string to a type code 499 */ 500 if (strcmp(cpi->cpi_vendorstr, "GenuineIntel") == 0) 501 cpi->cpi_vendor = X86_VENDOR_Intel; 502 else if (strcmp(cpi->cpi_vendorstr, "AuthenticAMD") == 0) 503 cpi->cpi_vendor = X86_VENDOR_AMD; 504 else if (strcmp(cpi->cpi_vendorstr, "GenuineTMx86") == 0) 505 cpi->cpi_vendor = X86_VENDOR_TM; 506 else if (strcmp(cpi->cpi_vendorstr, CyrixInstead) == 0) 507 /* 508 * CyrixInstead is a variable used by the Cyrix detection code 509 * in locore. 510 */ 511 cpi->cpi_vendor = X86_VENDOR_Cyrix; 512 else if (strcmp(cpi->cpi_vendorstr, "UMC UMC UMC ") == 0) 513 cpi->cpi_vendor = X86_VENDOR_UMC; 514 else if (strcmp(cpi->cpi_vendorstr, "NexGenDriven") == 0) 515 cpi->cpi_vendor = X86_VENDOR_NexGen; 516 else if (strcmp(cpi->cpi_vendorstr, "CentaurHauls") == 0) 517 cpi->cpi_vendor = X86_VENDOR_Centaur; 518 else if (strcmp(cpi->cpi_vendorstr, "RiseRiseRise") == 0) 519 cpi->cpi_vendor = X86_VENDOR_Rise; 520 else if (strcmp(cpi->cpi_vendorstr, "SiS SiS SiS ") == 0) 521 cpi->cpi_vendor = X86_VENDOR_SiS; 522 else if (strcmp(cpi->cpi_vendorstr, "Geode by NSC") == 0) 523 cpi->cpi_vendor = X86_VENDOR_NSC; 524 else 525 cpi->cpi_vendor = X86_VENDOR_IntelClone; 526 527 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 528 529 /* 530 * Limit the range in case of weird hardware 531 */ 532 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 533 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 534 if (cpi->cpi_maxeax < 1) 535 goto pass1_done; 536 537 cp = &cpi->cpi_std[1]; 538 cp->cp_eax = 1; 539 (void) __cpuid_insn(cp); 540 541 /* 542 * Extract identifying constants for easy access. 543 */ 544 cpi->cpi_model = CPI_MODEL(cpi); 545 cpi->cpi_family = CPI_FAMILY(cpi); 546 547 if (cpi->cpi_family == 0xf) 548 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 549 550 /* 551 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 552 * Intel, and presumably everyone else, uses model == 0xf, as 553 * one would expect (max value means possible overflow). Sigh. 554 */ 555 556 switch (cpi->cpi_vendor) { 557 case X86_VENDOR_Intel: 558 if (IS_EXTENDED_MODEL_INTEL(cpi)) 559 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 560 break; 561 case X86_VENDOR_AMD: 562 if (CPI_FAMILY(cpi) == 0xf) 563 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 564 break; 565 default: 566 if (cpi->cpi_model == 0xf) 567 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 568 break; 569 } 570 571 cpi->cpi_step = CPI_STEP(cpi); 572 cpi->cpi_brandid = CPI_BRANDID(cpi); 573 574 /* 575 * *default* assumptions: 576 * - believe %edx feature word 577 * - ignore %ecx feature word 578 * - 32-bit virtual and physical addressing 579 */ 580 mask_edx = 0xffffffff; 581 mask_ecx = 0; 582 583 cpi->cpi_pabits = cpi->cpi_vabits = 32; 584 585 switch (cpi->cpi_vendor) { 586 case X86_VENDOR_Intel: 587 if (cpi->cpi_family == 5) 588 x86_type = X86_TYPE_P5; 589 else if (IS_LEGACY_P6(cpi)) { 590 x86_type = X86_TYPE_P6; 591 pentiumpro_bug4046376 = 1; 592 pentiumpro_bug4064495 = 1; 593 /* 594 * Clear the SEP bit when it was set erroneously 595 */ 596 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 597 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 598 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 599 x86_type = X86_TYPE_P4; 600 /* 601 * We don't currently depend on any of the %ecx 602 * features until Prescott, so we'll only check 603 * this from P4 onwards. We might want to revisit 604 * that idea later. 605 */ 606 mask_ecx = 0xffffffff; 607 } else if (cpi->cpi_family > 0xf) 608 mask_ecx = 0xffffffff; 609 /* 610 * We don't support MONITOR/MWAIT if leaf 5 is not available 611 * to obtain the monitor linesize. 612 */ 613 if (cpi->cpi_maxeax < 5) 614 mask_ecx &= ~CPUID_INTC_ECX_MON; 615 break; 616 case X86_VENDOR_IntelClone: 617 default: 618 break; 619 case X86_VENDOR_AMD: 620 #if defined(OPTERON_ERRATUM_108) 621 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 622 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 623 cpi->cpi_model = 0xc; 624 } else 625 #endif 626 if (cpi->cpi_family == 5) { 627 /* 628 * AMD K5 and K6 629 * 630 * These CPUs have an incomplete implementation 631 * of MCA/MCE which we mask away. 632 */ 633 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 634 635 /* 636 * Model 0 uses the wrong (APIC) bit 637 * to indicate PGE. Fix it here. 638 */ 639 if (cpi->cpi_model == 0) { 640 if (cp->cp_edx & 0x200) { 641 cp->cp_edx &= ~0x200; 642 cp->cp_edx |= CPUID_INTC_EDX_PGE; 643 } 644 } 645 646 /* 647 * Early models had problems w/ MMX; disable. 648 */ 649 if (cpi->cpi_model < 6) 650 mask_edx &= ~CPUID_INTC_EDX_MMX; 651 } 652 653 /* 654 * For newer families, SSE3 and CX16, at least, are valid; 655 * enable all 656 */ 657 if (cpi->cpi_family >= 0xf) 658 mask_ecx = 0xffffffff; 659 /* 660 * We don't support MONITOR/MWAIT if leaf 5 is not available 661 * to obtain the monitor linesize. 662 */ 663 if (cpi->cpi_maxeax < 5) 664 mask_ecx &= ~CPUID_INTC_ECX_MON; 665 666 /* 667 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 668 * processors. AMD does not intend MWAIT to be used in the cpu 669 * idle loop on current and future processors. 10h and future 670 * AMD processors use more power in MWAIT than HLT. 671 * Pre-family-10h Opterons do not have the MWAIT instruction. 672 */ 673 idle_cpu_prefer_mwait = 0; 674 675 break; 676 case X86_VENDOR_TM: 677 /* 678 * workaround the NT workaround in CMS 4.1 679 */ 680 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 681 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 682 cp->cp_edx |= CPUID_INTC_EDX_CX8; 683 break; 684 case X86_VENDOR_Centaur: 685 /* 686 * workaround the NT workarounds again 687 */ 688 if (cpi->cpi_family == 6) 689 cp->cp_edx |= CPUID_INTC_EDX_CX8; 690 break; 691 case X86_VENDOR_Cyrix: 692 /* 693 * We rely heavily on the probing in locore 694 * to actually figure out what parts, if any, 695 * of the Cyrix cpuid instruction to believe. 696 */ 697 switch (x86_type) { 698 case X86_TYPE_CYRIX_486: 699 mask_edx = 0; 700 break; 701 case X86_TYPE_CYRIX_6x86: 702 mask_edx = 0; 703 break; 704 case X86_TYPE_CYRIX_6x86L: 705 mask_edx = 706 CPUID_INTC_EDX_DE | 707 CPUID_INTC_EDX_CX8; 708 break; 709 case X86_TYPE_CYRIX_6x86MX: 710 mask_edx = 711 CPUID_INTC_EDX_DE | 712 CPUID_INTC_EDX_MSR | 713 CPUID_INTC_EDX_CX8 | 714 CPUID_INTC_EDX_PGE | 715 CPUID_INTC_EDX_CMOV | 716 CPUID_INTC_EDX_MMX; 717 break; 718 case X86_TYPE_CYRIX_GXm: 719 mask_edx = 720 CPUID_INTC_EDX_MSR | 721 CPUID_INTC_EDX_CX8 | 722 CPUID_INTC_EDX_CMOV | 723 CPUID_INTC_EDX_MMX; 724 break; 725 case X86_TYPE_CYRIX_MediaGX: 726 break; 727 case X86_TYPE_CYRIX_MII: 728 case X86_TYPE_VIA_CYRIX_III: 729 mask_edx = 730 CPUID_INTC_EDX_DE | 731 CPUID_INTC_EDX_TSC | 732 CPUID_INTC_EDX_MSR | 733 CPUID_INTC_EDX_CX8 | 734 CPUID_INTC_EDX_PGE | 735 CPUID_INTC_EDX_CMOV | 736 CPUID_INTC_EDX_MMX; 737 break; 738 default: 739 break; 740 } 741 break; 742 } 743 744 /* 745 * Now we've figured out the masks that determine 746 * which bits we choose to believe, apply the masks 747 * to the feature words, then map the kernel's view 748 * of these feature words into its feature word. 749 */ 750 cp->cp_edx &= mask_edx; 751 cp->cp_ecx &= mask_ecx; 752 753 /* 754 * apply any platform restrictions (we don't call this 755 * immediately after __cpuid_insn here, because we need the 756 * workarounds applied above first) 757 */ 758 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 759 760 /* 761 * fold in overrides from the "eeprom" mechanism 762 */ 763 cp->cp_edx |= cpuid_feature_edx_include; 764 cp->cp_edx &= ~cpuid_feature_edx_exclude; 765 766 cp->cp_ecx |= cpuid_feature_ecx_include; 767 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 768 769 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 770 feature |= X86_LARGEPAGE; 771 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 772 feature |= X86_TSC; 773 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 774 feature |= X86_MSR; 775 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 776 feature |= X86_MTRR; 777 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 778 feature |= X86_PGE; 779 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 780 feature |= X86_CMOV; 781 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 782 feature |= X86_MMX; 783 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 784 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 785 feature |= X86_MCA; 786 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 787 feature |= X86_PAE; 788 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 789 feature |= X86_CX8; 790 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 791 feature |= X86_CX16; 792 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 793 feature |= X86_PAT; 794 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 795 feature |= X86_SEP; 796 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 797 /* 798 * In our implementation, fxsave/fxrstor 799 * are prerequisites before we'll even 800 * try and do SSE things. 801 */ 802 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 803 feature |= X86_SSE; 804 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 805 feature |= X86_SSE2; 806 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 807 feature |= X86_SSE3; 808 } 809 if (cp->cp_edx & CPUID_INTC_EDX_DE) 810 feature |= X86_DE; 811 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 812 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 813 feature |= X86_MWAIT; 814 } 815 816 if (feature & X86_PAE) 817 cpi->cpi_pabits = 36; 818 819 /* 820 * Hyperthreading configuration is slightly tricky on Intel 821 * and pure clones, and even trickier on AMD. 822 * 823 * (AMD chose to set the HTT bit on their CMP processors, 824 * even though they're not actually hyperthreaded. Thus it 825 * takes a bit more work to figure out what's really going 826 * on ... see the handling of the CMP_LGCY bit below) 827 */ 828 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 829 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 830 if (cpi->cpi_ncpu_per_chip > 1) 831 feature |= X86_HTT; 832 } else { 833 cpi->cpi_ncpu_per_chip = 1; 834 } 835 836 /* 837 * Work on the "extended" feature information, doing 838 * some basic initialization for cpuid_pass2() 839 */ 840 xcpuid = 0; 841 switch (cpi->cpi_vendor) { 842 case X86_VENDOR_Intel: 843 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 844 xcpuid++; 845 break; 846 case X86_VENDOR_AMD: 847 if (cpi->cpi_family > 5 || 848 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 849 xcpuid++; 850 break; 851 case X86_VENDOR_Cyrix: 852 /* 853 * Only these Cyrix CPUs are -known- to support 854 * extended cpuid operations. 855 */ 856 if (x86_type == X86_TYPE_VIA_CYRIX_III || 857 x86_type == X86_TYPE_CYRIX_GXm) 858 xcpuid++; 859 break; 860 case X86_VENDOR_Centaur: 861 case X86_VENDOR_TM: 862 default: 863 xcpuid++; 864 break; 865 } 866 867 if (xcpuid) { 868 cp = &cpi->cpi_extd[0]; 869 cp->cp_eax = 0x80000000; 870 cpi->cpi_xmaxeax = __cpuid_insn(cp); 871 } 872 873 if (cpi->cpi_xmaxeax & 0x80000000) { 874 875 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 876 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 877 878 switch (cpi->cpi_vendor) { 879 case X86_VENDOR_Intel: 880 case X86_VENDOR_AMD: 881 if (cpi->cpi_xmaxeax < 0x80000001) 882 break; 883 cp = &cpi->cpi_extd[1]; 884 cp->cp_eax = 0x80000001; 885 (void) __cpuid_insn(cp); 886 887 if (cpi->cpi_vendor == X86_VENDOR_AMD && 888 cpi->cpi_family == 5 && 889 cpi->cpi_model == 6 && 890 cpi->cpi_step == 6) { 891 /* 892 * K6 model 6 uses bit 10 to indicate SYSC 893 * Later models use bit 11. Fix it here. 894 */ 895 if (cp->cp_edx & 0x400) { 896 cp->cp_edx &= ~0x400; 897 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 898 } 899 } 900 901 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 902 903 /* 904 * Compute the additions to the kernel's feature word. 905 */ 906 if (cp->cp_edx & CPUID_AMD_EDX_NX) 907 feature |= X86_NX; 908 909 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 910 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 911 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 912 feature |= X86_SSE4A; 913 914 /* 915 * If both the HTT and CMP_LGCY bits are set, 916 * then we're not actually HyperThreaded. Read 917 * "AMD CPUID Specification" for more details. 918 */ 919 if (cpi->cpi_vendor == X86_VENDOR_AMD && 920 (feature & X86_HTT) && 921 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 922 feature &= ~X86_HTT; 923 feature |= X86_CMP; 924 } 925 #if defined(__amd64) 926 /* 927 * It's really tricky to support syscall/sysret in 928 * the i386 kernel; we rely on sysenter/sysexit 929 * instead. In the amd64 kernel, things are -way- 930 * better. 931 */ 932 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 933 feature |= X86_ASYSC; 934 935 /* 936 * While we're thinking about system calls, note 937 * that AMD processors don't support sysenter 938 * in long mode at all, so don't try to program them. 939 */ 940 if (x86_vendor == X86_VENDOR_AMD) 941 feature &= ~X86_SEP; 942 #endif 943 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) 944 feature |= X86_TSCP; 945 break; 946 default: 947 break; 948 } 949 950 /* 951 * Get CPUID data about processor cores and hyperthreads. 952 */ 953 switch (cpi->cpi_vendor) { 954 case X86_VENDOR_Intel: 955 if (cpi->cpi_maxeax >= 4) { 956 cp = &cpi->cpi_std[4]; 957 cp->cp_eax = 4; 958 cp->cp_ecx = 0; 959 (void) __cpuid_insn(cp); 960 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 961 } 962 /*FALLTHROUGH*/ 963 case X86_VENDOR_AMD: 964 if (cpi->cpi_xmaxeax < 0x80000008) 965 break; 966 cp = &cpi->cpi_extd[8]; 967 cp->cp_eax = 0x80000008; 968 (void) __cpuid_insn(cp); 969 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 970 971 /* 972 * Virtual and physical address limits from 973 * cpuid override previously guessed values. 974 */ 975 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 976 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 977 break; 978 default: 979 break; 980 } 981 982 /* 983 * Derive the number of cores per chip 984 */ 985 switch (cpi->cpi_vendor) { 986 case X86_VENDOR_Intel: 987 if (cpi->cpi_maxeax < 4) { 988 cpi->cpi_ncore_per_chip = 1; 989 break; 990 } else { 991 cpi->cpi_ncore_per_chip = 992 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 993 } 994 break; 995 case X86_VENDOR_AMD: 996 if (cpi->cpi_xmaxeax < 0x80000008) { 997 cpi->cpi_ncore_per_chip = 1; 998 break; 999 } else { 1000 cpi->cpi_ncore_per_chip = 1001 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1002 } 1003 break; 1004 default: 1005 cpi->cpi_ncore_per_chip = 1; 1006 break; 1007 } 1008 } 1009 1010 /* 1011 * If more than one core, then this processor is CMP. 1012 */ 1013 if (cpi->cpi_ncore_per_chip > 1) 1014 feature |= X86_CMP; 1015 1016 /* 1017 * If the number of cores is the same as the number 1018 * of CPUs, then we cannot have HyperThreading. 1019 */ 1020 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1021 feature &= ~X86_HTT; 1022 1023 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1024 /* 1025 * Single-core single-threaded processors. 1026 */ 1027 cpi->cpi_chipid = -1; 1028 cpi->cpi_clogid = 0; 1029 cpi->cpi_coreid = cpu->cpu_id; 1030 } else if (cpi->cpi_ncpu_per_chip > 1) { 1031 uint_t i; 1032 uint_t chipid_shift = 0; 1033 uint_t coreid_shift = 0; 1034 uint_t apic_id = CPI_APIC_ID(cpi); 1035 1036 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1037 chipid_shift++; 1038 cpi->cpi_chipid = apic_id >> chipid_shift; 1039 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1040 1041 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1042 if (feature & X86_CMP) { 1043 /* 1044 * Multi-core (and possibly multi-threaded) 1045 * processors. 1046 */ 1047 uint_t ncpu_per_core; 1048 if (cpi->cpi_ncore_per_chip == 1) 1049 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1050 else if (cpi->cpi_ncore_per_chip > 1) 1051 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1052 cpi->cpi_ncore_per_chip; 1053 /* 1054 * 8bit APIC IDs on dual core Pentiums 1055 * look like this: 1056 * 1057 * +-----------------------+------+------+ 1058 * | Physical Package ID | MC | HT | 1059 * +-----------------------+------+------+ 1060 * <------- chipid --------> 1061 * <------- coreid ---------------> 1062 * <--- clogid --> 1063 * 1064 * Where the number of bits necessary to 1065 * represent MC and HT fields together equals 1066 * to the minimum number of bits necessary to 1067 * store the value of cpi->cpi_ncpu_per_chip. 1068 * Of those bits, the MC part uses the number 1069 * of bits necessary to store the value of 1070 * cpi->cpi_ncore_per_chip. 1071 */ 1072 for (i = 1; i < ncpu_per_core; i <<= 1) 1073 coreid_shift++; 1074 cpi->cpi_coreid = apic_id >> coreid_shift; 1075 } else if (feature & X86_HTT) { 1076 /* 1077 * Single-core multi-threaded processors. 1078 */ 1079 cpi->cpi_coreid = cpi->cpi_chipid; 1080 } 1081 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1082 /* 1083 * AMD currently only has dual-core processors with 1084 * single-threaded cores. If they ever release 1085 * multi-threaded processors, then this code 1086 * will have to be updated. 1087 */ 1088 cpi->cpi_coreid = cpu->cpu_id; 1089 } else { 1090 /* 1091 * All other processors are currently 1092 * assumed to have single cores. 1093 */ 1094 cpi->cpi_coreid = cpi->cpi_chipid; 1095 } 1096 } 1097 1098 /* 1099 * Synthesize chip "revision" and socket type 1100 */ 1101 synth_info(cpi); 1102 1103 pass1_done: 1104 cpi->cpi_pass = 1; 1105 return (feature); 1106 } 1107 1108 /* 1109 * Make copies of the cpuid table entries we depend on, in 1110 * part for ease of parsing now, in part so that we have only 1111 * one place to correct any of it, in part for ease of 1112 * later export to userland, and in part so we can look at 1113 * this stuff in a crash dump. 1114 */ 1115 1116 /*ARGSUSED*/ 1117 void 1118 cpuid_pass2(cpu_t *cpu) 1119 { 1120 uint_t n, nmax; 1121 int i; 1122 struct cpuid_regs *cp; 1123 uint8_t *dp; 1124 uint32_t *iptr; 1125 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1126 1127 ASSERT(cpi->cpi_pass == 1); 1128 1129 if (cpi->cpi_maxeax < 1) 1130 goto pass2_done; 1131 1132 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1133 nmax = NMAX_CPI_STD; 1134 /* 1135 * (We already handled n == 0 and n == 1 in pass 1) 1136 */ 1137 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1138 cp->cp_eax = n; 1139 1140 /* 1141 * CPUID function 4 expects %ecx to be initialized 1142 * with an index which indicates which cache to return 1143 * information about. The OS is expected to call function 4 1144 * with %ecx set to 0, 1, 2, ... until it returns with 1145 * EAX[4:0] set to 0, which indicates there are no more 1146 * caches. 1147 * 1148 * Here, populate cpi_std[4] with the information returned by 1149 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1150 * when dynamic memory allocation becomes available. 1151 * 1152 * Note: we need to explicitly initialize %ecx here, since 1153 * function 4 may have been previously invoked. 1154 */ 1155 if (n == 4) 1156 cp->cp_ecx = 0; 1157 1158 (void) __cpuid_insn(cp); 1159 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1160 switch (n) { 1161 case 2: 1162 /* 1163 * "the lower 8 bits of the %eax register 1164 * contain a value that identifies the number 1165 * of times the cpuid [instruction] has to be 1166 * executed to obtain a complete image of the 1167 * processor's caching systems." 1168 * 1169 * How *do* they make this stuff up? 1170 */ 1171 cpi->cpi_ncache = sizeof (*cp) * 1172 BITX(cp->cp_eax, 7, 0); 1173 if (cpi->cpi_ncache == 0) 1174 break; 1175 cpi->cpi_ncache--; /* skip count byte */ 1176 1177 /* 1178 * Well, for now, rather than attempt to implement 1179 * this slightly dubious algorithm, we just look 1180 * at the first 15 .. 1181 */ 1182 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1183 cpi->cpi_ncache = sizeof (*cp) - 1; 1184 1185 dp = cpi->cpi_cacheinfo; 1186 if (BITX(cp->cp_eax, 31, 31) == 0) { 1187 uint8_t *p = (void *)&cp->cp_eax; 1188 for (i = 1; i < 3; i++) 1189 if (p[i] != 0) 1190 *dp++ = p[i]; 1191 } 1192 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1193 uint8_t *p = (void *)&cp->cp_ebx; 1194 for (i = 0; i < 4; i++) 1195 if (p[i] != 0) 1196 *dp++ = p[i]; 1197 } 1198 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1199 uint8_t *p = (void *)&cp->cp_ecx; 1200 for (i = 0; i < 4; i++) 1201 if (p[i] != 0) 1202 *dp++ = p[i]; 1203 } 1204 if (BITX(cp->cp_edx, 31, 31) == 0) { 1205 uint8_t *p = (void *)&cp->cp_edx; 1206 for (i = 0; i < 4; i++) 1207 if (p[i] != 0) 1208 *dp++ = p[i]; 1209 } 1210 break; 1211 1212 case 3: /* Processor serial number, if PSN supported */ 1213 break; 1214 1215 case 4: /* Deterministic cache parameters */ 1216 break; 1217 1218 case 5: /* Monitor/Mwait parameters */ 1219 { 1220 size_t mwait_size; 1221 1222 /* 1223 * check cpi_mwait.support which was set in cpuid_pass1 1224 */ 1225 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1226 break; 1227 1228 /* 1229 * Protect ourself from insane mwait line size. 1230 * Workaround for incomplete hardware emulator(s). 1231 */ 1232 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1233 if (mwait_size < sizeof (uint32_t) || 1234 !ISP2(mwait_size)) { 1235 #if DEBUG 1236 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1237 "size %ld", 1238 cpu->cpu_id, (long)mwait_size); 1239 #endif 1240 break; 1241 } 1242 1243 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1244 cpi->cpi_mwait.mon_max = mwait_size; 1245 if (MWAIT_EXTENSION(cpi)) { 1246 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1247 if (MWAIT_INT_ENABLE(cpi)) 1248 cpi->cpi_mwait.support |= 1249 MWAIT_ECX_INT_ENABLE; 1250 } 1251 break; 1252 } 1253 default: 1254 break; 1255 } 1256 } 1257 1258 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1259 goto pass2_done; 1260 1261 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1262 nmax = NMAX_CPI_EXTD; 1263 /* 1264 * Copy the extended properties, fixing them as we go. 1265 * (We already handled n == 0 and n == 1 in pass 1) 1266 */ 1267 iptr = (void *)cpi->cpi_brandstr; 1268 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1269 cp->cp_eax = 0x80000000 + n; 1270 (void) __cpuid_insn(cp); 1271 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1272 switch (n) { 1273 case 2: 1274 case 3: 1275 case 4: 1276 /* 1277 * Extract the brand string 1278 */ 1279 *iptr++ = cp->cp_eax; 1280 *iptr++ = cp->cp_ebx; 1281 *iptr++ = cp->cp_ecx; 1282 *iptr++ = cp->cp_edx; 1283 break; 1284 case 5: 1285 switch (cpi->cpi_vendor) { 1286 case X86_VENDOR_AMD: 1287 /* 1288 * The Athlon and Duron were the first 1289 * parts to report the sizes of the 1290 * TLB for large pages. Before then, 1291 * we don't trust the data. 1292 */ 1293 if (cpi->cpi_family < 6 || 1294 (cpi->cpi_family == 6 && 1295 cpi->cpi_model < 1)) 1296 cp->cp_eax = 0; 1297 break; 1298 default: 1299 break; 1300 } 1301 break; 1302 case 6: 1303 switch (cpi->cpi_vendor) { 1304 case X86_VENDOR_AMD: 1305 /* 1306 * The Athlon and Duron were the first 1307 * AMD parts with L2 TLB's. 1308 * Before then, don't trust the data. 1309 */ 1310 if (cpi->cpi_family < 6 || 1311 cpi->cpi_family == 6 && 1312 cpi->cpi_model < 1) 1313 cp->cp_eax = cp->cp_ebx = 0; 1314 /* 1315 * AMD Duron rev A0 reports L2 1316 * cache size incorrectly as 1K 1317 * when it is really 64K 1318 */ 1319 if (cpi->cpi_family == 6 && 1320 cpi->cpi_model == 3 && 1321 cpi->cpi_step == 0) { 1322 cp->cp_ecx &= 0xffff; 1323 cp->cp_ecx |= 0x400000; 1324 } 1325 break; 1326 case X86_VENDOR_Cyrix: /* VIA C3 */ 1327 /* 1328 * VIA C3 processors are a bit messed 1329 * up w.r.t. encoding cache sizes in %ecx 1330 */ 1331 if (cpi->cpi_family != 6) 1332 break; 1333 /* 1334 * model 7 and 8 were incorrectly encoded 1335 * 1336 * xxx is model 8 really broken? 1337 */ 1338 if (cpi->cpi_model == 7 || 1339 cpi->cpi_model == 8) 1340 cp->cp_ecx = 1341 BITX(cp->cp_ecx, 31, 24) << 16 | 1342 BITX(cp->cp_ecx, 23, 16) << 12 | 1343 BITX(cp->cp_ecx, 15, 8) << 8 | 1344 BITX(cp->cp_ecx, 7, 0); 1345 /* 1346 * model 9 stepping 1 has wrong associativity 1347 */ 1348 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1349 cp->cp_ecx |= 8 << 12; 1350 break; 1351 case X86_VENDOR_Intel: 1352 /* 1353 * Extended L2 Cache features function. 1354 * First appeared on Prescott. 1355 */ 1356 default: 1357 break; 1358 } 1359 break; 1360 default: 1361 break; 1362 } 1363 } 1364 1365 pass2_done: 1366 cpi->cpi_pass = 2; 1367 } 1368 1369 static const char * 1370 intel_cpubrand(const struct cpuid_info *cpi) 1371 { 1372 int i; 1373 1374 if ((x86_feature & X86_CPUID) == 0 || 1375 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1376 return ("i486"); 1377 1378 switch (cpi->cpi_family) { 1379 case 5: 1380 return ("Intel Pentium(r)"); 1381 case 6: 1382 switch (cpi->cpi_model) { 1383 uint_t celeron, xeon; 1384 const struct cpuid_regs *cp; 1385 case 0: 1386 case 1: 1387 case 2: 1388 return ("Intel Pentium(r) Pro"); 1389 case 3: 1390 case 4: 1391 return ("Intel Pentium(r) II"); 1392 case 6: 1393 return ("Intel Celeron(r)"); 1394 case 5: 1395 case 7: 1396 celeron = xeon = 0; 1397 cp = &cpi->cpi_std[2]; /* cache info */ 1398 1399 for (i = 1; i < 3; i++) { 1400 uint_t tmp; 1401 1402 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1403 if (tmp == 0x40) 1404 celeron++; 1405 if (tmp >= 0x44 && tmp <= 0x45) 1406 xeon++; 1407 } 1408 1409 for (i = 0; i < 2; i++) { 1410 uint_t tmp; 1411 1412 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1413 if (tmp == 0x40) 1414 celeron++; 1415 else if (tmp >= 0x44 && tmp <= 0x45) 1416 xeon++; 1417 } 1418 1419 for (i = 0; i < 4; i++) { 1420 uint_t tmp; 1421 1422 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1423 if (tmp == 0x40) 1424 celeron++; 1425 else if (tmp >= 0x44 && tmp <= 0x45) 1426 xeon++; 1427 } 1428 1429 for (i = 0; i < 4; i++) { 1430 uint_t tmp; 1431 1432 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1433 if (tmp == 0x40) 1434 celeron++; 1435 else if (tmp >= 0x44 && tmp <= 0x45) 1436 xeon++; 1437 } 1438 1439 if (celeron) 1440 return ("Intel Celeron(r)"); 1441 if (xeon) 1442 return (cpi->cpi_model == 5 ? 1443 "Intel Pentium(r) II Xeon(tm)" : 1444 "Intel Pentium(r) III Xeon(tm)"); 1445 return (cpi->cpi_model == 5 ? 1446 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1447 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1448 default: 1449 break; 1450 } 1451 default: 1452 break; 1453 } 1454 1455 /* BrandID is present if the field is nonzero */ 1456 if (cpi->cpi_brandid != 0) { 1457 static const struct { 1458 uint_t bt_bid; 1459 const char *bt_str; 1460 } brand_tbl[] = { 1461 { 0x1, "Intel(r) Celeron(r)" }, 1462 { 0x2, "Intel(r) Pentium(r) III" }, 1463 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1464 { 0x4, "Intel(r) Pentium(r) III" }, 1465 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1466 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1467 { 0x8, "Intel(r) Pentium(r) 4" }, 1468 { 0x9, "Intel(r) Pentium(r) 4" }, 1469 { 0xa, "Intel(r) Celeron(r)" }, 1470 { 0xb, "Intel(r) Xeon(tm)" }, 1471 { 0xc, "Intel(r) Xeon(tm) MP" }, 1472 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1473 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1474 { 0x11, "Mobile Genuine Intel(r)" }, 1475 { 0x12, "Intel(r) Celeron(r) M" }, 1476 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1477 { 0x14, "Intel(r) Celeron(r)" }, 1478 { 0x15, "Mobile Genuine Intel(r)" }, 1479 { 0x16, "Intel(r) Pentium(r) M" }, 1480 { 0x17, "Mobile Intel(r) Celeron(r)" } 1481 }; 1482 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1483 uint_t sgn; 1484 1485 sgn = (cpi->cpi_family << 8) | 1486 (cpi->cpi_model << 4) | cpi->cpi_step; 1487 1488 for (i = 0; i < btblmax; i++) 1489 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1490 break; 1491 if (i < btblmax) { 1492 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1493 return ("Intel(r) Celeron(r)"); 1494 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1495 return ("Intel(r) Xeon(tm) MP"); 1496 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1497 return ("Intel(r) Xeon(tm)"); 1498 return (brand_tbl[i].bt_str); 1499 } 1500 } 1501 1502 return (NULL); 1503 } 1504 1505 static const char * 1506 amd_cpubrand(const struct cpuid_info *cpi) 1507 { 1508 if ((x86_feature & X86_CPUID) == 0 || 1509 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1510 return ("i486 compatible"); 1511 1512 switch (cpi->cpi_family) { 1513 case 5: 1514 switch (cpi->cpi_model) { 1515 case 0: 1516 case 1: 1517 case 2: 1518 case 3: 1519 case 4: 1520 case 5: 1521 return ("AMD-K5(r)"); 1522 case 6: 1523 case 7: 1524 return ("AMD-K6(r)"); 1525 case 8: 1526 return ("AMD-K6(r)-2"); 1527 case 9: 1528 return ("AMD-K6(r)-III"); 1529 default: 1530 return ("AMD (family 5)"); 1531 } 1532 case 6: 1533 switch (cpi->cpi_model) { 1534 case 1: 1535 return ("AMD-K7(tm)"); 1536 case 0: 1537 case 2: 1538 case 4: 1539 return ("AMD Athlon(tm)"); 1540 case 3: 1541 case 7: 1542 return ("AMD Duron(tm)"); 1543 case 6: 1544 case 8: 1545 case 10: 1546 /* 1547 * Use the L2 cache size to distinguish 1548 */ 1549 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1550 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1551 default: 1552 return ("AMD (family 6)"); 1553 } 1554 default: 1555 break; 1556 } 1557 1558 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1559 cpi->cpi_brandid != 0) { 1560 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1561 case 3: 1562 return ("AMD Opteron(tm) UP 1xx"); 1563 case 4: 1564 return ("AMD Opteron(tm) DP 2xx"); 1565 case 5: 1566 return ("AMD Opteron(tm) MP 8xx"); 1567 default: 1568 return ("AMD Opteron(tm)"); 1569 } 1570 } 1571 1572 return (NULL); 1573 } 1574 1575 static const char * 1576 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1577 { 1578 if ((x86_feature & X86_CPUID) == 0 || 1579 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1580 type == X86_TYPE_CYRIX_486) 1581 return ("i486 compatible"); 1582 1583 switch (type) { 1584 case X86_TYPE_CYRIX_6x86: 1585 return ("Cyrix 6x86"); 1586 case X86_TYPE_CYRIX_6x86L: 1587 return ("Cyrix 6x86L"); 1588 case X86_TYPE_CYRIX_6x86MX: 1589 return ("Cyrix 6x86MX"); 1590 case X86_TYPE_CYRIX_GXm: 1591 return ("Cyrix GXm"); 1592 case X86_TYPE_CYRIX_MediaGX: 1593 return ("Cyrix MediaGX"); 1594 case X86_TYPE_CYRIX_MII: 1595 return ("Cyrix M2"); 1596 case X86_TYPE_VIA_CYRIX_III: 1597 return ("VIA Cyrix M3"); 1598 default: 1599 /* 1600 * Have another wild guess .. 1601 */ 1602 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1603 return ("Cyrix 5x86"); 1604 else if (cpi->cpi_family == 5) { 1605 switch (cpi->cpi_model) { 1606 case 2: 1607 return ("Cyrix 6x86"); /* Cyrix M1 */ 1608 case 4: 1609 return ("Cyrix MediaGX"); 1610 default: 1611 break; 1612 } 1613 } else if (cpi->cpi_family == 6) { 1614 switch (cpi->cpi_model) { 1615 case 0: 1616 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1617 case 5: 1618 case 6: 1619 case 7: 1620 case 8: 1621 case 9: 1622 return ("VIA C3"); 1623 default: 1624 break; 1625 } 1626 } 1627 break; 1628 } 1629 return (NULL); 1630 } 1631 1632 /* 1633 * This only gets called in the case that the CPU extended 1634 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1635 * aren't available, or contain null bytes for some reason. 1636 */ 1637 static void 1638 fabricate_brandstr(struct cpuid_info *cpi) 1639 { 1640 const char *brand = NULL; 1641 1642 switch (cpi->cpi_vendor) { 1643 case X86_VENDOR_Intel: 1644 brand = intel_cpubrand(cpi); 1645 break; 1646 case X86_VENDOR_AMD: 1647 brand = amd_cpubrand(cpi); 1648 break; 1649 case X86_VENDOR_Cyrix: 1650 brand = cyrix_cpubrand(cpi, x86_type); 1651 break; 1652 case X86_VENDOR_NexGen: 1653 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1654 brand = "NexGen Nx586"; 1655 break; 1656 case X86_VENDOR_Centaur: 1657 if (cpi->cpi_family == 5) 1658 switch (cpi->cpi_model) { 1659 case 4: 1660 brand = "Centaur C6"; 1661 break; 1662 case 8: 1663 brand = "Centaur C2"; 1664 break; 1665 case 9: 1666 brand = "Centaur C3"; 1667 break; 1668 default: 1669 break; 1670 } 1671 break; 1672 case X86_VENDOR_Rise: 1673 if (cpi->cpi_family == 5 && 1674 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1675 brand = "Rise mP6"; 1676 break; 1677 case X86_VENDOR_SiS: 1678 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1679 brand = "SiS 55x"; 1680 break; 1681 case X86_VENDOR_TM: 1682 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1683 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1684 break; 1685 case X86_VENDOR_NSC: 1686 case X86_VENDOR_UMC: 1687 default: 1688 break; 1689 } 1690 if (brand) { 1691 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1692 return; 1693 } 1694 1695 /* 1696 * If all else fails ... 1697 */ 1698 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1699 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1700 cpi->cpi_model, cpi->cpi_step); 1701 } 1702 1703 /* 1704 * This routine is called just after kernel memory allocation 1705 * becomes available on cpu0, and as part of mp_startup() on 1706 * the other cpus. 1707 * 1708 * Fixup the brand string, and collect any information from cpuid 1709 * that requires dynamicically allocated storage to represent. 1710 */ 1711 /*ARGSUSED*/ 1712 void 1713 cpuid_pass3(cpu_t *cpu) 1714 { 1715 int i, max, shft, level, size; 1716 struct cpuid_regs regs; 1717 struct cpuid_regs *cp; 1718 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1719 1720 ASSERT(cpi->cpi_pass == 2); 1721 1722 /* 1723 * Function 4: Deterministic cache parameters 1724 * 1725 * Take this opportunity to detect the number of threads 1726 * sharing the last level cache, and construct a corresponding 1727 * cache id. The respective cpuid_info members are initialized 1728 * to the default case of "no last level cache sharing". 1729 */ 1730 cpi->cpi_ncpu_shr_last_cache = 1; 1731 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1732 1733 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1734 1735 /* 1736 * Find the # of elements (size) returned by fn 4, and along 1737 * the way detect last level cache sharing details. 1738 */ 1739 bzero(®s, sizeof (regs)); 1740 cp = ®s; 1741 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1742 cp->cp_eax = 4; 1743 cp->cp_ecx = i; 1744 1745 (void) __cpuid_insn(cp); 1746 1747 if (CPI_CACHE_TYPE(cp) == 0) 1748 break; 1749 level = CPI_CACHE_LVL(cp); 1750 if (level > max) { 1751 max = level; 1752 cpi->cpi_ncpu_shr_last_cache = 1753 CPI_NTHR_SHR_CACHE(cp) + 1; 1754 } 1755 } 1756 cpi->cpi_std_4_size = size = i; 1757 1758 /* 1759 * Allocate the cpi_std_4 array. The first element 1760 * references the regs for fn 4, %ecx == 0, which 1761 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1762 */ 1763 if (size > 0) { 1764 cpi->cpi_std_4 = 1765 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1766 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1767 1768 /* 1769 * Allocate storage to hold the additional regs 1770 * for function 4, %ecx == 1 .. cpi_std_4_size. 1771 * 1772 * The regs for fn 4, %ecx == 0 has already 1773 * been allocated as indicated above. 1774 */ 1775 for (i = 1; i < size; i++) { 1776 cp = cpi->cpi_std_4[i] = 1777 kmem_zalloc(sizeof (regs), KM_SLEEP); 1778 cp->cp_eax = 4; 1779 cp->cp_ecx = i; 1780 1781 (void) __cpuid_insn(cp); 1782 } 1783 } 1784 /* 1785 * Determine the number of bits needed to represent 1786 * the number of CPUs sharing the last level cache. 1787 * 1788 * Shift off that number of bits from the APIC id to 1789 * derive the cache id. 1790 */ 1791 shft = 0; 1792 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1793 shft++; 1794 cpi->cpi_last_lvl_cacheid = CPI_APIC_ID(cpi) >> shft; 1795 } 1796 1797 /* 1798 * Now fixup the brand string 1799 */ 1800 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1801 fabricate_brandstr(cpi); 1802 } else { 1803 1804 /* 1805 * If we successfully extracted a brand string from the cpuid 1806 * instruction, clean it up by removing leading spaces and 1807 * similar junk. 1808 */ 1809 if (cpi->cpi_brandstr[0]) { 1810 size_t maxlen = sizeof (cpi->cpi_brandstr); 1811 char *src, *dst; 1812 1813 dst = src = (char *)cpi->cpi_brandstr; 1814 src[maxlen - 1] = '\0'; 1815 /* 1816 * strip leading spaces 1817 */ 1818 while (*src == ' ') 1819 src++; 1820 /* 1821 * Remove any 'Genuine' or "Authentic" prefixes 1822 */ 1823 if (strncmp(src, "Genuine ", 8) == 0) 1824 src += 8; 1825 if (strncmp(src, "Authentic ", 10) == 0) 1826 src += 10; 1827 1828 /* 1829 * Now do an in-place copy. 1830 * Map (R) to (r) and (TM) to (tm). 1831 * The era of teletypes is long gone, and there's 1832 * -really- no need to shout. 1833 */ 1834 while (*src != '\0') { 1835 if (src[0] == '(') { 1836 if (strncmp(src + 1, "R)", 2) == 0) { 1837 (void) strncpy(dst, "(r)", 3); 1838 src += 3; 1839 dst += 3; 1840 continue; 1841 } 1842 if (strncmp(src + 1, "TM)", 3) == 0) { 1843 (void) strncpy(dst, "(tm)", 4); 1844 src += 4; 1845 dst += 4; 1846 continue; 1847 } 1848 } 1849 *dst++ = *src++; 1850 } 1851 *dst = '\0'; 1852 1853 /* 1854 * Finally, remove any trailing spaces 1855 */ 1856 while (--dst > cpi->cpi_brandstr) 1857 if (*dst == ' ') 1858 *dst = '\0'; 1859 else 1860 break; 1861 } else 1862 fabricate_brandstr(cpi); 1863 } 1864 cpi->cpi_pass = 3; 1865 } 1866 1867 /* 1868 * This routine is called out of bind_hwcap() much later in the life 1869 * of the kernel (post_startup()). The job of this routine is to resolve 1870 * the hardware feature support and kernel support for those features into 1871 * what we're actually going to tell applications via the aux vector. 1872 */ 1873 uint_t 1874 cpuid_pass4(cpu_t *cpu) 1875 { 1876 struct cpuid_info *cpi; 1877 uint_t hwcap_flags = 0; 1878 1879 if (cpu == NULL) 1880 cpu = CPU; 1881 cpi = cpu->cpu_m.mcpu_cpi; 1882 1883 ASSERT(cpi->cpi_pass == 3); 1884 1885 if (cpi->cpi_maxeax >= 1) { 1886 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 1887 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 1888 1889 *edx = CPI_FEATURES_EDX(cpi); 1890 *ecx = CPI_FEATURES_ECX(cpi); 1891 1892 /* 1893 * [these require explicit kernel support] 1894 */ 1895 if ((x86_feature & X86_SEP) == 0) 1896 *edx &= ~CPUID_INTC_EDX_SEP; 1897 1898 if ((x86_feature & X86_SSE) == 0) 1899 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 1900 if ((x86_feature & X86_SSE2) == 0) 1901 *edx &= ~CPUID_INTC_EDX_SSE2; 1902 1903 if ((x86_feature & X86_HTT) == 0) 1904 *edx &= ~CPUID_INTC_EDX_HTT; 1905 1906 if ((x86_feature & X86_SSE3) == 0) 1907 *ecx &= ~CPUID_INTC_ECX_SSE3; 1908 1909 /* 1910 * [no explicit support required beyond x87 fp context] 1911 */ 1912 if (!fpu_exists) 1913 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 1914 1915 /* 1916 * Now map the supported feature vector to things that we 1917 * think userland will care about. 1918 */ 1919 if (*edx & CPUID_INTC_EDX_SEP) 1920 hwcap_flags |= AV_386_SEP; 1921 if (*edx & CPUID_INTC_EDX_SSE) 1922 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 1923 if (*edx & CPUID_INTC_EDX_SSE2) 1924 hwcap_flags |= AV_386_SSE2; 1925 if (*ecx & CPUID_INTC_ECX_SSE3) 1926 hwcap_flags |= AV_386_SSE3; 1927 if (*ecx & CPUID_INTC_ECX_POPCNT) 1928 hwcap_flags |= AV_386_POPCNT; 1929 if (*edx & CPUID_INTC_EDX_FPU) 1930 hwcap_flags |= AV_386_FPU; 1931 if (*edx & CPUID_INTC_EDX_MMX) 1932 hwcap_flags |= AV_386_MMX; 1933 1934 if (*edx & CPUID_INTC_EDX_TSC) 1935 hwcap_flags |= AV_386_TSC; 1936 if (*edx & CPUID_INTC_EDX_CX8) 1937 hwcap_flags |= AV_386_CX8; 1938 if (*edx & CPUID_INTC_EDX_CMOV) 1939 hwcap_flags |= AV_386_CMOV; 1940 if (*ecx & CPUID_INTC_ECX_MON) 1941 hwcap_flags |= AV_386_MON; 1942 if (*ecx & CPUID_INTC_ECX_CX16) 1943 hwcap_flags |= AV_386_CX16; 1944 } 1945 1946 if (x86_feature & X86_HTT) 1947 hwcap_flags |= AV_386_PAUSE; 1948 1949 if (cpi->cpi_xmaxeax < 0x80000001) 1950 goto pass4_done; 1951 1952 switch (cpi->cpi_vendor) { 1953 struct cpuid_regs cp; 1954 uint32_t *edx, *ecx; 1955 1956 case X86_VENDOR_Intel: 1957 /* 1958 * Seems like Intel duplicated what we necessary 1959 * here to make the initial crop of 64-bit OS's work. 1960 * Hopefully, those are the only "extended" bits 1961 * they'll add. 1962 */ 1963 /*FALLTHROUGH*/ 1964 1965 case X86_VENDOR_AMD: 1966 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 1967 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 1968 1969 *edx = CPI_FEATURES_XTD_EDX(cpi); 1970 *ecx = CPI_FEATURES_XTD_ECX(cpi); 1971 1972 /* 1973 * [these features require explicit kernel support] 1974 */ 1975 switch (cpi->cpi_vendor) { 1976 case X86_VENDOR_Intel: 1977 break; 1978 1979 case X86_VENDOR_AMD: 1980 if ((x86_feature & X86_TSCP) == 0) 1981 *edx &= ~CPUID_AMD_EDX_TSCP; 1982 if ((x86_feature & X86_SSE4A) == 0) 1983 *ecx &= ~CPUID_AMD_ECX_SSE4A; 1984 break; 1985 1986 default: 1987 break; 1988 } 1989 1990 /* 1991 * [no explicit support required beyond 1992 * x87 fp context and exception handlers] 1993 */ 1994 if (!fpu_exists) 1995 *edx &= ~(CPUID_AMD_EDX_MMXamd | 1996 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 1997 1998 if ((x86_feature & X86_NX) == 0) 1999 *edx &= ~CPUID_AMD_EDX_NX; 2000 #if !defined(__amd64) 2001 *edx &= ~CPUID_AMD_EDX_LM; 2002 #endif 2003 /* 2004 * Now map the supported feature vector to 2005 * things that we think userland will care about. 2006 */ 2007 #if defined(__amd64) 2008 if (*edx & CPUID_AMD_EDX_SYSC) 2009 hwcap_flags |= AV_386_AMD_SYSC; 2010 #endif 2011 if (*edx & CPUID_AMD_EDX_MMXamd) 2012 hwcap_flags |= AV_386_AMD_MMX; 2013 if (*edx & CPUID_AMD_EDX_3DNow) 2014 hwcap_flags |= AV_386_AMD_3DNow; 2015 if (*edx & CPUID_AMD_EDX_3DNowx) 2016 hwcap_flags |= AV_386_AMD_3DNowx; 2017 2018 switch (cpi->cpi_vendor) { 2019 case X86_VENDOR_AMD: 2020 if (*edx & CPUID_AMD_EDX_TSCP) 2021 hwcap_flags |= AV_386_TSCP; 2022 if (*ecx & CPUID_AMD_ECX_AHF64) 2023 hwcap_flags |= AV_386_AHF; 2024 if (*ecx & CPUID_AMD_ECX_SSE4A) 2025 hwcap_flags |= AV_386_AMD_SSE4A; 2026 if (*ecx & CPUID_AMD_ECX_LZCNT) 2027 hwcap_flags |= AV_386_AMD_LZCNT; 2028 break; 2029 2030 case X86_VENDOR_Intel: 2031 /* 2032 * Aarrgh. 2033 * Intel uses a different bit in the same word. 2034 */ 2035 if (*ecx & CPUID_INTC_ECX_AHF64) 2036 hwcap_flags |= AV_386_AHF; 2037 break; 2038 2039 default: 2040 break; 2041 } 2042 break; 2043 2044 case X86_VENDOR_TM: 2045 cp.cp_eax = 0x80860001; 2046 (void) __cpuid_insn(&cp); 2047 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2048 break; 2049 2050 default: 2051 break; 2052 } 2053 2054 pass4_done: 2055 cpi->cpi_pass = 4; 2056 return (hwcap_flags); 2057 } 2058 2059 2060 /* 2061 * Simulate the cpuid instruction using the data we previously 2062 * captured about this CPU. We try our best to return the truth 2063 * about the hardware, independently of kernel support. 2064 */ 2065 uint32_t 2066 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2067 { 2068 struct cpuid_info *cpi; 2069 struct cpuid_regs *xcp; 2070 2071 if (cpu == NULL) 2072 cpu = CPU; 2073 cpi = cpu->cpu_m.mcpu_cpi; 2074 2075 ASSERT(cpuid_checkpass(cpu, 3)); 2076 2077 /* 2078 * CPUID data is cached in two separate places: cpi_std for standard 2079 * CPUID functions, and cpi_extd for extended CPUID functions. 2080 */ 2081 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2082 xcp = &cpi->cpi_std[cp->cp_eax]; 2083 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2084 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2085 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2086 else 2087 /* 2088 * The caller is asking for data from an input parameter which 2089 * the kernel has not cached. In this case we go fetch from 2090 * the hardware and return the data directly to the user. 2091 */ 2092 return (__cpuid_insn(cp)); 2093 2094 cp->cp_eax = xcp->cp_eax; 2095 cp->cp_ebx = xcp->cp_ebx; 2096 cp->cp_ecx = xcp->cp_ecx; 2097 cp->cp_edx = xcp->cp_edx; 2098 return (cp->cp_eax); 2099 } 2100 2101 int 2102 cpuid_checkpass(cpu_t *cpu, int pass) 2103 { 2104 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2105 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2106 } 2107 2108 int 2109 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2110 { 2111 ASSERT(cpuid_checkpass(cpu, 3)); 2112 2113 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2114 } 2115 2116 int 2117 cpuid_is_cmt(cpu_t *cpu) 2118 { 2119 if (cpu == NULL) 2120 cpu = CPU; 2121 2122 ASSERT(cpuid_checkpass(cpu, 1)); 2123 2124 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2125 } 2126 2127 /* 2128 * AMD and Intel both implement the 64-bit variant of the syscall 2129 * instruction (syscallq), so if there's -any- support for syscall, 2130 * cpuid currently says "yes, we support this". 2131 * 2132 * However, Intel decided to -not- implement the 32-bit variant of the 2133 * syscall instruction, so we provide a predicate to allow our caller 2134 * to test that subtlety here. 2135 */ 2136 /*ARGSUSED*/ 2137 int 2138 cpuid_syscall32_insn(cpu_t *cpu) 2139 { 2140 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2141 2142 if (cpu == NULL) 2143 cpu = CPU; 2144 2145 /*CSTYLED*/ 2146 { 2147 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2148 2149 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2150 cpi->cpi_xmaxeax >= 0x80000001 && 2151 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2152 return (1); 2153 } 2154 return (0); 2155 } 2156 2157 int 2158 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2159 { 2160 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2161 2162 static const char fmt[] = 2163 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2164 static const char fmt_ht[] = 2165 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2166 2167 ASSERT(cpuid_checkpass(cpu, 1)); 2168 2169 if (cpuid_is_cmt(cpu)) 2170 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2171 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2172 cpi->cpi_family, cpi->cpi_model, 2173 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2174 return (snprintf(s, n, fmt, 2175 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2176 cpi->cpi_family, cpi->cpi_model, 2177 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2178 } 2179 2180 const char * 2181 cpuid_getvendorstr(cpu_t *cpu) 2182 { 2183 ASSERT(cpuid_checkpass(cpu, 1)); 2184 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2185 } 2186 2187 uint_t 2188 cpuid_getvendor(cpu_t *cpu) 2189 { 2190 ASSERT(cpuid_checkpass(cpu, 1)); 2191 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2192 } 2193 2194 uint_t 2195 cpuid_getfamily(cpu_t *cpu) 2196 { 2197 ASSERT(cpuid_checkpass(cpu, 1)); 2198 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2199 } 2200 2201 uint_t 2202 cpuid_getmodel(cpu_t *cpu) 2203 { 2204 ASSERT(cpuid_checkpass(cpu, 1)); 2205 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2206 } 2207 2208 uint_t 2209 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2210 { 2211 ASSERT(cpuid_checkpass(cpu, 1)); 2212 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2213 } 2214 2215 uint_t 2216 cpuid_get_ncore_per_chip(cpu_t *cpu) 2217 { 2218 ASSERT(cpuid_checkpass(cpu, 1)); 2219 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2220 } 2221 2222 uint_t 2223 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2224 { 2225 ASSERT(cpuid_checkpass(cpu, 2)); 2226 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2227 } 2228 2229 id_t 2230 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2231 { 2232 ASSERT(cpuid_checkpass(cpu, 2)); 2233 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2234 } 2235 2236 uint_t 2237 cpuid_getstep(cpu_t *cpu) 2238 { 2239 ASSERT(cpuid_checkpass(cpu, 1)); 2240 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2241 } 2242 2243 uint_t 2244 cpuid_getsig(struct cpu *cpu) 2245 { 2246 ASSERT(cpuid_checkpass(cpu, 1)); 2247 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2248 } 2249 2250 uint32_t 2251 cpuid_getchiprev(struct cpu *cpu) 2252 { 2253 ASSERT(cpuid_checkpass(cpu, 1)); 2254 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2255 } 2256 2257 const char * 2258 cpuid_getchiprevstr(struct cpu *cpu) 2259 { 2260 ASSERT(cpuid_checkpass(cpu, 1)); 2261 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2262 } 2263 2264 uint32_t 2265 cpuid_getsockettype(struct cpu *cpu) 2266 { 2267 ASSERT(cpuid_checkpass(cpu, 1)); 2268 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2269 } 2270 2271 int 2272 cpuid_get_chipid(cpu_t *cpu) 2273 { 2274 ASSERT(cpuid_checkpass(cpu, 1)); 2275 2276 if (cpuid_is_cmt(cpu)) 2277 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2278 return (cpu->cpu_id); 2279 } 2280 2281 id_t 2282 cpuid_get_coreid(cpu_t *cpu) 2283 { 2284 ASSERT(cpuid_checkpass(cpu, 1)); 2285 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2286 } 2287 2288 int 2289 cpuid_get_clogid(cpu_t *cpu) 2290 { 2291 ASSERT(cpuid_checkpass(cpu, 1)); 2292 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2293 } 2294 2295 void 2296 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2297 { 2298 struct cpuid_info *cpi; 2299 2300 if (cpu == NULL) 2301 cpu = CPU; 2302 cpi = cpu->cpu_m.mcpu_cpi; 2303 2304 ASSERT(cpuid_checkpass(cpu, 1)); 2305 2306 if (pabits) 2307 *pabits = cpi->cpi_pabits; 2308 if (vabits) 2309 *vabits = cpi->cpi_vabits; 2310 } 2311 2312 /* 2313 * Returns the number of data TLB entries for a corresponding 2314 * pagesize. If it can't be computed, or isn't known, the 2315 * routine returns zero. If you ask about an architecturally 2316 * impossible pagesize, the routine will panic (so that the 2317 * hat implementor knows that things are inconsistent.) 2318 */ 2319 uint_t 2320 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2321 { 2322 struct cpuid_info *cpi; 2323 uint_t dtlb_nent = 0; 2324 2325 if (cpu == NULL) 2326 cpu = CPU; 2327 cpi = cpu->cpu_m.mcpu_cpi; 2328 2329 ASSERT(cpuid_checkpass(cpu, 1)); 2330 2331 /* 2332 * Check the L2 TLB info 2333 */ 2334 if (cpi->cpi_xmaxeax >= 0x80000006) { 2335 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2336 2337 switch (pagesize) { 2338 2339 case 4 * 1024: 2340 /* 2341 * All zero in the top 16 bits of the register 2342 * indicates a unified TLB. Size is in low 16 bits. 2343 */ 2344 if ((cp->cp_ebx & 0xffff0000) == 0) 2345 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2346 else 2347 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2348 break; 2349 2350 case 2 * 1024 * 1024: 2351 if ((cp->cp_eax & 0xffff0000) == 0) 2352 dtlb_nent = cp->cp_eax & 0x0000ffff; 2353 else 2354 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2355 break; 2356 2357 default: 2358 panic("unknown L2 pagesize"); 2359 /*NOTREACHED*/ 2360 } 2361 } 2362 2363 if (dtlb_nent != 0) 2364 return (dtlb_nent); 2365 2366 /* 2367 * No L2 TLB support for this size, try L1. 2368 */ 2369 if (cpi->cpi_xmaxeax >= 0x80000005) { 2370 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2371 2372 switch (pagesize) { 2373 case 4 * 1024: 2374 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2375 break; 2376 case 2 * 1024 * 1024: 2377 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2378 break; 2379 default: 2380 panic("unknown L1 d-TLB pagesize"); 2381 /*NOTREACHED*/ 2382 } 2383 } 2384 2385 return (dtlb_nent); 2386 } 2387 2388 /* 2389 * Return 0 if the erratum is not present or not applicable, positive 2390 * if it is, and negative if the status of the erratum is unknown. 2391 * 2392 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2393 * Processors" #25759, Rev 3.57, August 2005 2394 */ 2395 int 2396 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2397 { 2398 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2399 uint_t eax; 2400 2401 /* 2402 * Bail out if this CPU isn't an AMD CPU, or if it's 2403 * a legacy (32-bit) AMD CPU. 2404 */ 2405 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2406 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2407 cpi->cpi_family == 6) 2408 2409 return (0); 2410 2411 eax = cpi->cpi_std[1].cp_eax; 2412 2413 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2414 #define SH_B3(eax) (eax == 0xf51) 2415 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2416 2417 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2418 2419 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2420 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2421 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2422 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2423 2424 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2425 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2426 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2427 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2428 2429 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2430 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2431 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2432 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2433 #define BH_E4(eax) (eax == 0x20fb1) 2434 #define SH_E5(eax) (eax == 0x20f42) 2435 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2436 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2437 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2438 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2439 DH_E6(eax) || JH_E6(eax)) 2440 2441 switch (erratum) { 2442 case 1: 2443 return (cpi->cpi_family < 0x10); 2444 case 51: /* what does the asterisk mean? */ 2445 return (B(eax) || SH_C0(eax) || CG(eax)); 2446 case 52: 2447 return (B(eax)); 2448 case 57: 2449 return (cpi->cpi_family <= 0x10); 2450 case 58: 2451 return (B(eax)); 2452 case 60: 2453 return (cpi->cpi_family <= 0x10); 2454 case 61: 2455 case 62: 2456 case 63: 2457 case 64: 2458 case 65: 2459 case 66: 2460 case 68: 2461 case 69: 2462 case 70: 2463 case 71: 2464 return (B(eax)); 2465 case 72: 2466 return (SH_B0(eax)); 2467 case 74: 2468 return (B(eax)); 2469 case 75: 2470 return (cpi->cpi_family < 0x10); 2471 case 76: 2472 return (B(eax)); 2473 case 77: 2474 return (cpi->cpi_family <= 0x10); 2475 case 78: 2476 return (B(eax) || SH_C0(eax)); 2477 case 79: 2478 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2479 case 80: 2480 case 81: 2481 case 82: 2482 return (B(eax)); 2483 case 83: 2484 return (B(eax) || SH_C0(eax) || CG(eax)); 2485 case 85: 2486 return (cpi->cpi_family < 0x10); 2487 case 86: 2488 return (SH_C0(eax) || CG(eax)); 2489 case 88: 2490 #if !defined(__amd64) 2491 return (0); 2492 #else 2493 return (B(eax) || SH_C0(eax)); 2494 #endif 2495 case 89: 2496 return (cpi->cpi_family < 0x10); 2497 case 90: 2498 return (B(eax) || SH_C0(eax) || CG(eax)); 2499 case 91: 2500 case 92: 2501 return (B(eax) || SH_C0(eax)); 2502 case 93: 2503 return (SH_C0(eax)); 2504 case 94: 2505 return (B(eax) || SH_C0(eax) || CG(eax)); 2506 case 95: 2507 #if !defined(__amd64) 2508 return (0); 2509 #else 2510 return (B(eax) || SH_C0(eax)); 2511 #endif 2512 case 96: 2513 return (B(eax) || SH_C0(eax) || CG(eax)); 2514 case 97: 2515 case 98: 2516 return (SH_C0(eax) || CG(eax)); 2517 case 99: 2518 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2519 case 100: 2520 return (B(eax) || SH_C0(eax)); 2521 case 101: 2522 case 103: 2523 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2524 case 104: 2525 return (SH_C0(eax) || CG(eax) || D0(eax)); 2526 case 105: 2527 case 106: 2528 case 107: 2529 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2530 case 108: 2531 return (DH_CG(eax)); 2532 case 109: 2533 return (SH_C0(eax) || CG(eax) || D0(eax)); 2534 case 110: 2535 return (D0(eax) || EX(eax)); 2536 case 111: 2537 return (CG(eax)); 2538 case 112: 2539 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2540 case 113: 2541 return (eax == 0x20fc0); 2542 case 114: 2543 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2544 case 115: 2545 return (SH_E0(eax) || JH_E1(eax)); 2546 case 116: 2547 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2548 case 117: 2549 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2550 case 118: 2551 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2552 JH_E6(eax)); 2553 case 121: 2554 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2555 case 122: 2556 return (cpi->cpi_family < 0x10); 2557 case 123: 2558 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2559 case 131: 2560 return (cpi->cpi_family < 0x10); 2561 case 6336786: 2562 /* 2563 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2564 * if this is a K8 family or newer processor 2565 */ 2566 if (CPI_FAMILY(cpi) == 0xf) { 2567 struct cpuid_regs regs; 2568 regs.cp_eax = 0x80000007; 2569 (void) __cpuid_insn(®s); 2570 return (!(regs.cp_edx & 0x100)); 2571 } 2572 return (0); 2573 case 6323525: 2574 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2575 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2576 2577 default: 2578 return (-1); 2579 } 2580 } 2581 2582 static const char assoc_str[] = "associativity"; 2583 static const char line_str[] = "line-size"; 2584 static const char size_str[] = "size"; 2585 2586 static void 2587 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2588 uint32_t val) 2589 { 2590 char buf[128]; 2591 2592 /* 2593 * ndi_prop_update_int() is used because it is desirable for 2594 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2595 */ 2596 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2597 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2598 } 2599 2600 /* 2601 * Intel-style cache/tlb description 2602 * 2603 * Standard cpuid level 2 gives a randomly ordered 2604 * selection of tags that index into a table that describes 2605 * cache and tlb properties. 2606 */ 2607 2608 static const char l1_icache_str[] = "l1-icache"; 2609 static const char l1_dcache_str[] = "l1-dcache"; 2610 static const char l2_cache_str[] = "l2-cache"; 2611 static const char l3_cache_str[] = "l3-cache"; 2612 static const char itlb4k_str[] = "itlb-4K"; 2613 static const char dtlb4k_str[] = "dtlb-4K"; 2614 static const char itlb4M_str[] = "itlb-4M"; 2615 static const char dtlb4M_str[] = "dtlb-4M"; 2616 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2617 static const char dtlb44_str[] = "dtlb-4K-4M"; 2618 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2619 static const char sl2_cache_str[] = "sectored-l2-cache"; 2620 static const char itrace_str[] = "itrace-cache"; 2621 static const char sl3_cache_str[] = "sectored-l3-cache"; 2622 2623 static const struct cachetab { 2624 uint8_t ct_code; 2625 uint8_t ct_assoc; 2626 uint16_t ct_line_size; 2627 size_t ct_size; 2628 const char *ct_label; 2629 } intel_ctab[] = { 2630 /* maintain descending order! */ 2631 { 0xb4, 4, 0, 256, dtlb4k_str }, 2632 { 0xb3, 4, 0, 128, dtlb4k_str }, 2633 { 0xb0, 4, 0, 128, itlb4k_str }, 2634 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2635 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2636 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2637 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2638 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2639 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2640 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2641 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2642 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2643 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2644 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2645 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2646 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2647 { 0x73, 8, 0, 64*1024, itrace_str}, 2648 { 0x72, 8, 0, 32*1024, itrace_str}, 2649 { 0x71, 8, 0, 16*1024, itrace_str}, 2650 { 0x70, 8, 0, 12*1024, itrace_str}, 2651 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2652 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2653 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2654 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2655 { 0x5d, 0, 0, 256, dtlb44_str}, 2656 { 0x5c, 0, 0, 128, dtlb44_str}, 2657 { 0x5b, 0, 0, 64, dtlb44_str}, 2658 { 0x52, 0, 0, 256, itlb424_str}, 2659 { 0x51, 0, 0, 128, itlb424_str}, 2660 { 0x50, 0, 0, 64, itlb424_str}, 2661 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 2662 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 2663 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 2664 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 2665 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 2666 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 2667 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 2668 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 2669 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 2670 { 0x43, 4, 32, 512*1024, l2_cache_str}, 2671 { 0x42, 4, 32, 256*1024, l2_cache_str}, 2672 { 0x41, 4, 32, 128*1024, l2_cache_str}, 2673 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 2674 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 2675 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 2676 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 2677 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 2678 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 2679 { 0x30, 8, 64, 32*1024, l1_icache_str}, 2680 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 2681 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 2682 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 2683 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 2684 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 2685 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 2686 { 0x0b, 4, 0, 4, itlb4M_str}, 2687 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 2688 { 0x08, 4, 32, 16*1024, l1_icache_str}, 2689 { 0x06, 4, 32, 8*1024, l1_icache_str}, 2690 { 0x04, 4, 0, 8, dtlb4M_str}, 2691 { 0x03, 4, 0, 64, dtlb4k_str}, 2692 { 0x02, 4, 0, 2, itlb4M_str}, 2693 { 0x01, 4, 0, 32, itlb4k_str}, 2694 { 0 } 2695 }; 2696 2697 static const struct cachetab cyrix_ctab[] = { 2698 { 0x70, 4, 0, 32, "tlb-4K" }, 2699 { 0x80, 4, 16, 16*1024, "l1-cache" }, 2700 { 0 } 2701 }; 2702 2703 /* 2704 * Search a cache table for a matching entry 2705 */ 2706 static const struct cachetab * 2707 find_cacheent(const struct cachetab *ct, uint_t code) 2708 { 2709 if (code != 0) { 2710 for (; ct->ct_code != 0; ct++) 2711 if (ct->ct_code <= code) 2712 break; 2713 if (ct->ct_code == code) 2714 return (ct); 2715 } 2716 return (NULL); 2717 } 2718 2719 /* 2720 * Walk the cacheinfo descriptor, applying 'func' to every valid element 2721 * The walk is terminated if the walker returns non-zero. 2722 */ 2723 static void 2724 intel_walk_cacheinfo(struct cpuid_info *cpi, 2725 void *arg, int (*func)(void *, const struct cachetab *)) 2726 { 2727 const struct cachetab *ct; 2728 uint8_t *dp; 2729 int i; 2730 2731 if ((dp = cpi->cpi_cacheinfo) == NULL) 2732 return; 2733 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2734 /* 2735 * For overloaded descriptor 0x49 we use cpuid function 4 2736 * if supported by the current processor, to update 2737 * cache information. 2738 */ 2739 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4) { 2740 intel_cpuid_4_cache_info(arg, cpi); 2741 continue; 2742 } 2743 2744 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2745 if (func(arg, ct) != 0) 2746 break; 2747 } 2748 } 2749 } 2750 2751 /* 2752 * (Like the Intel one, except for Cyrix CPUs) 2753 */ 2754 static void 2755 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 2756 void *arg, int (*func)(void *, const struct cachetab *)) 2757 { 2758 const struct cachetab *ct; 2759 uint8_t *dp; 2760 int i; 2761 2762 if ((dp = cpi->cpi_cacheinfo) == NULL) 2763 return; 2764 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2765 /* 2766 * Search Cyrix-specific descriptor table first .. 2767 */ 2768 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 2769 if (func(arg, ct) != 0) 2770 break; 2771 continue; 2772 } 2773 /* 2774 * .. else fall back to the Intel one 2775 */ 2776 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2777 if (func(arg, ct) != 0) 2778 break; 2779 continue; 2780 } 2781 } 2782 } 2783 2784 /* 2785 * A cacheinfo walker that adds associativity, line-size, and size properties 2786 * to the devinfo node it is passed as an argument. 2787 */ 2788 static int 2789 add_cacheent_props(void *arg, const struct cachetab *ct) 2790 { 2791 dev_info_t *devi = arg; 2792 2793 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 2794 if (ct->ct_line_size != 0) 2795 add_cache_prop(devi, ct->ct_label, line_str, 2796 ct->ct_line_size); 2797 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 2798 return (0); 2799 } 2800 2801 /* 2802 * Add L2 or L3 cache-information using cpuid function 4. This 2803 * function is called from intel_walk_cacheinfo() when descriptor 2804 * 0x49 is encountered. 2805 */ 2806 static void 2807 intel_cpuid_4_cache_info(void *arg, struct cpuid_info *cpi) 2808 { 2809 uint32_t level, i; 2810 2811 struct cachetab ct; 2812 2813 for (i = 0; i < cpi->cpi_std_4_size; i++) { 2814 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 2815 2816 if (level == 2 || level == 3) { 2817 ct.ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 2818 ct.ct_line_size = 2819 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 2820 ct.ct_size = ct.ct_assoc * 2821 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 2822 ct.ct_line_size * 2823 (cpi->cpi_std_4[i]->cp_ecx + 1); 2824 2825 if (level == 2) { 2826 ct.ct_label = l2_cache_str; 2827 } else if (level == 3) { 2828 ct.ct_label = l3_cache_str; 2829 } 2830 2831 (void) add_cacheent_props(arg, 2832 (const struct cachetab *) (&ct)); 2833 } 2834 } 2835 } 2836 2837 static const char fully_assoc[] = "fully-associative?"; 2838 2839 /* 2840 * AMD style cache/tlb description 2841 * 2842 * Extended functions 5 and 6 directly describe properties of 2843 * tlbs and various cache levels. 2844 */ 2845 static void 2846 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 2847 { 2848 switch (assoc) { 2849 case 0: /* reserved; ignore */ 2850 break; 2851 default: 2852 add_cache_prop(devi, label, assoc_str, assoc); 2853 break; 2854 case 0xff: 2855 add_cache_prop(devi, label, fully_assoc, 1); 2856 break; 2857 } 2858 } 2859 2860 static void 2861 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 2862 { 2863 if (size == 0) 2864 return; 2865 add_cache_prop(devi, label, size_str, size); 2866 add_amd_assoc(devi, label, assoc); 2867 } 2868 2869 static void 2870 add_amd_cache(dev_info_t *devi, const char *label, 2871 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 2872 { 2873 if (size == 0 || line_size == 0) 2874 return; 2875 add_amd_assoc(devi, label, assoc); 2876 /* 2877 * Most AMD parts have a sectored cache. Multiple cache lines are 2878 * associated with each tag. A sector consists of all cache lines 2879 * associated with a tag. For example, the AMD K6-III has a sector 2880 * size of 2 cache lines per tag. 2881 */ 2882 if (lines_per_tag != 0) 2883 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 2884 add_cache_prop(devi, label, line_str, line_size); 2885 add_cache_prop(devi, label, size_str, size * 1024); 2886 } 2887 2888 static void 2889 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 2890 { 2891 switch (assoc) { 2892 case 0: /* off */ 2893 break; 2894 case 1: 2895 case 2: 2896 case 4: 2897 add_cache_prop(devi, label, assoc_str, assoc); 2898 break; 2899 case 6: 2900 add_cache_prop(devi, label, assoc_str, 8); 2901 break; 2902 case 8: 2903 add_cache_prop(devi, label, assoc_str, 16); 2904 break; 2905 case 0xf: 2906 add_cache_prop(devi, label, fully_assoc, 1); 2907 break; 2908 default: /* reserved; ignore */ 2909 break; 2910 } 2911 } 2912 2913 static void 2914 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 2915 { 2916 if (size == 0 || assoc == 0) 2917 return; 2918 add_amd_l2_assoc(devi, label, assoc); 2919 add_cache_prop(devi, label, size_str, size); 2920 } 2921 2922 static void 2923 add_amd_l2_cache(dev_info_t *devi, const char *label, 2924 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 2925 { 2926 if (size == 0 || assoc == 0 || line_size == 0) 2927 return; 2928 add_amd_l2_assoc(devi, label, assoc); 2929 if (lines_per_tag != 0) 2930 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 2931 add_cache_prop(devi, label, line_str, line_size); 2932 add_cache_prop(devi, label, size_str, size * 1024); 2933 } 2934 2935 static void 2936 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 2937 { 2938 struct cpuid_regs *cp; 2939 2940 if (cpi->cpi_xmaxeax < 0x80000005) 2941 return; 2942 cp = &cpi->cpi_extd[5]; 2943 2944 /* 2945 * 4M/2M L1 TLB configuration 2946 * 2947 * We report the size for 2M pages because AMD uses two 2948 * TLB entries for one 4M page. 2949 */ 2950 add_amd_tlb(devi, "dtlb-2M", 2951 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 2952 add_amd_tlb(devi, "itlb-2M", 2953 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 2954 2955 /* 2956 * 4K L1 TLB configuration 2957 */ 2958 2959 switch (cpi->cpi_vendor) { 2960 uint_t nentries; 2961 case X86_VENDOR_TM: 2962 if (cpi->cpi_family >= 5) { 2963 /* 2964 * Crusoe processors have 256 TLB entries, but 2965 * cpuid data format constrains them to only 2966 * reporting 255 of them. 2967 */ 2968 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 2969 nentries = 256; 2970 /* 2971 * Crusoe processors also have a unified TLB 2972 */ 2973 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 2974 nentries); 2975 break; 2976 } 2977 /*FALLTHROUGH*/ 2978 default: 2979 add_amd_tlb(devi, itlb4k_str, 2980 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 2981 add_amd_tlb(devi, dtlb4k_str, 2982 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 2983 break; 2984 } 2985 2986 /* 2987 * data L1 cache configuration 2988 */ 2989 2990 add_amd_cache(devi, l1_dcache_str, 2991 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 2992 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 2993 2994 /* 2995 * code L1 cache configuration 2996 */ 2997 2998 add_amd_cache(devi, l1_icache_str, 2999 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3000 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3001 3002 if (cpi->cpi_xmaxeax < 0x80000006) 3003 return; 3004 cp = &cpi->cpi_extd[6]; 3005 3006 /* Check for a unified L2 TLB for large pages */ 3007 3008 if (BITX(cp->cp_eax, 31, 16) == 0) 3009 add_amd_l2_tlb(devi, "l2-tlb-2M", 3010 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3011 else { 3012 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3013 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3014 add_amd_l2_tlb(devi, "l2-itlb-2M", 3015 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3016 } 3017 3018 /* Check for a unified L2 TLB for 4K pages */ 3019 3020 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3021 add_amd_l2_tlb(devi, "l2-tlb-4K", 3022 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3023 } else { 3024 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3025 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3026 add_amd_l2_tlb(devi, "l2-itlb-4K", 3027 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3028 } 3029 3030 add_amd_l2_cache(devi, l2_cache_str, 3031 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3032 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3033 } 3034 3035 /* 3036 * There are two basic ways that the x86 world describes it cache 3037 * and tlb architecture - Intel's way and AMD's way. 3038 * 3039 * Return which flavor of cache architecture we should use 3040 */ 3041 static int 3042 x86_which_cacheinfo(struct cpuid_info *cpi) 3043 { 3044 switch (cpi->cpi_vendor) { 3045 case X86_VENDOR_Intel: 3046 if (cpi->cpi_maxeax >= 2) 3047 return (X86_VENDOR_Intel); 3048 break; 3049 case X86_VENDOR_AMD: 3050 /* 3051 * The K5 model 1 was the first part from AMD that reported 3052 * cache sizes via extended cpuid functions. 3053 */ 3054 if (cpi->cpi_family > 5 || 3055 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3056 return (X86_VENDOR_AMD); 3057 break; 3058 case X86_VENDOR_TM: 3059 if (cpi->cpi_family >= 5) 3060 return (X86_VENDOR_AMD); 3061 /*FALLTHROUGH*/ 3062 default: 3063 /* 3064 * If they have extended CPU data for 0x80000005 3065 * then we assume they have AMD-format cache 3066 * information. 3067 * 3068 * If not, and the vendor happens to be Cyrix, 3069 * then try our-Cyrix specific handler. 3070 * 3071 * If we're not Cyrix, then assume we're using Intel's 3072 * table-driven format instead. 3073 */ 3074 if (cpi->cpi_xmaxeax >= 0x80000005) 3075 return (X86_VENDOR_AMD); 3076 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3077 return (X86_VENDOR_Cyrix); 3078 else if (cpi->cpi_maxeax >= 2) 3079 return (X86_VENDOR_Intel); 3080 break; 3081 } 3082 return (-1); 3083 } 3084 3085 /* 3086 * create a node for the given cpu under the prom root node. 3087 * Also, create a cpu node in the device tree. 3088 */ 3089 static dev_info_t *cpu_nex_devi = NULL; 3090 static kmutex_t cpu_node_lock; 3091 3092 /* 3093 * Called from post_startup() and mp_startup() 3094 */ 3095 void 3096 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3097 { 3098 dev_info_t *cpu_devi; 3099 int create; 3100 3101 mutex_enter(&cpu_node_lock); 3102 3103 /* 3104 * create a nexus node for all cpus identified as 'cpu_id' under 3105 * the root node. 3106 */ 3107 if (cpu_nex_devi == NULL) { 3108 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3109 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3110 mutex_exit(&cpu_node_lock); 3111 return; 3112 } 3113 (void) ndi_devi_online(cpu_nex_devi, 0); 3114 } 3115 3116 /* 3117 * create a child node for cpu identified as 'cpu_id' 3118 */ 3119 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3120 cpu_id); 3121 if (cpu_devi == NULL) { 3122 mutex_exit(&cpu_node_lock); 3123 return; 3124 } 3125 3126 /* device_type */ 3127 3128 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3129 "device_type", "cpu"); 3130 3131 /* reg */ 3132 3133 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3134 "reg", cpu_id); 3135 3136 /* cpu-mhz, and clock-frequency */ 3137 3138 if (cpu_freq > 0) { 3139 long long mul; 3140 3141 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3142 "cpu-mhz", cpu_freq); 3143 3144 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3145 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3146 "clock-frequency", (int)mul); 3147 } 3148 3149 (void) ndi_devi_online(cpu_devi, 0); 3150 3151 if ((x86_feature & X86_CPUID) == 0) { 3152 mutex_exit(&cpu_node_lock); 3153 return; 3154 } 3155 3156 /* vendor-id */ 3157 3158 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3159 "vendor-id", cpi->cpi_vendorstr); 3160 3161 if (cpi->cpi_maxeax == 0) { 3162 mutex_exit(&cpu_node_lock); 3163 return; 3164 } 3165 3166 /* 3167 * family, model, and step 3168 */ 3169 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3170 "family", CPI_FAMILY(cpi)); 3171 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3172 "cpu-model", CPI_MODEL(cpi)); 3173 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3174 "stepping-id", CPI_STEP(cpi)); 3175 3176 /* type */ 3177 3178 switch (cpi->cpi_vendor) { 3179 case X86_VENDOR_Intel: 3180 create = 1; 3181 break; 3182 default: 3183 create = 0; 3184 break; 3185 } 3186 if (create) 3187 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3188 "type", CPI_TYPE(cpi)); 3189 3190 /* ext-family */ 3191 3192 switch (cpi->cpi_vendor) { 3193 case X86_VENDOR_Intel: 3194 case X86_VENDOR_AMD: 3195 create = cpi->cpi_family >= 0xf; 3196 break; 3197 default: 3198 create = 0; 3199 break; 3200 } 3201 if (create) 3202 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3203 "ext-family", CPI_FAMILY_XTD(cpi)); 3204 3205 /* ext-model */ 3206 3207 switch (cpi->cpi_vendor) { 3208 case X86_VENDOR_Intel: 3209 create = CPI_MODEL(cpi) == 0xf; 3210 break; 3211 case X86_VENDOR_AMD: 3212 create = CPI_FAMILY(cpi) == 0xf; 3213 break; 3214 default: 3215 create = 0; 3216 break; 3217 } 3218 if (create) 3219 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3220 "ext-model", CPI_MODEL_XTD(cpi)); 3221 3222 /* generation */ 3223 3224 switch (cpi->cpi_vendor) { 3225 case X86_VENDOR_AMD: 3226 /* 3227 * AMD K5 model 1 was the first part to support this 3228 */ 3229 create = cpi->cpi_xmaxeax >= 0x80000001; 3230 break; 3231 default: 3232 create = 0; 3233 break; 3234 } 3235 if (create) 3236 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3237 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3238 3239 /* brand-id */ 3240 3241 switch (cpi->cpi_vendor) { 3242 case X86_VENDOR_Intel: 3243 /* 3244 * brand id first appeared on Pentium III Xeon model 8, 3245 * and Celeron model 8 processors and Opteron 3246 */ 3247 create = cpi->cpi_family > 6 || 3248 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3249 break; 3250 case X86_VENDOR_AMD: 3251 create = cpi->cpi_family >= 0xf; 3252 break; 3253 default: 3254 create = 0; 3255 break; 3256 } 3257 if (create && cpi->cpi_brandid != 0) { 3258 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3259 "brand-id", cpi->cpi_brandid); 3260 } 3261 3262 /* chunks, and apic-id */ 3263 3264 switch (cpi->cpi_vendor) { 3265 /* 3266 * first available on Pentium IV and Opteron (K8) 3267 */ 3268 case X86_VENDOR_Intel: 3269 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3270 break; 3271 case X86_VENDOR_AMD: 3272 create = cpi->cpi_family >= 0xf; 3273 break; 3274 default: 3275 create = 0; 3276 break; 3277 } 3278 if (create) { 3279 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3280 "chunks", CPI_CHUNKS(cpi)); 3281 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3282 "apic-id", CPI_APIC_ID(cpi)); 3283 if (cpi->cpi_chipid >= 0) { 3284 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3285 "chip#", cpi->cpi_chipid); 3286 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3287 "clog#", cpi->cpi_clogid); 3288 } 3289 } 3290 3291 /* cpuid-features */ 3292 3293 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3294 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3295 3296 3297 /* cpuid-features-ecx */ 3298 3299 switch (cpi->cpi_vendor) { 3300 case X86_VENDOR_Intel: 3301 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3302 break; 3303 default: 3304 create = 0; 3305 break; 3306 } 3307 if (create) 3308 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3309 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3310 3311 /* ext-cpuid-features */ 3312 3313 switch (cpi->cpi_vendor) { 3314 case X86_VENDOR_Intel: 3315 case X86_VENDOR_AMD: 3316 case X86_VENDOR_Cyrix: 3317 case X86_VENDOR_TM: 3318 case X86_VENDOR_Centaur: 3319 create = cpi->cpi_xmaxeax >= 0x80000001; 3320 break; 3321 default: 3322 create = 0; 3323 break; 3324 } 3325 if (create) { 3326 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3327 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3328 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3329 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3330 } 3331 3332 /* 3333 * Brand String first appeared in Intel Pentium IV, AMD K5 3334 * model 1, and Cyrix GXm. On earlier models we try and 3335 * simulate something similar .. so this string should always 3336 * same -something- about the processor, however lame. 3337 */ 3338 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3339 "brand-string", cpi->cpi_brandstr); 3340 3341 /* 3342 * Finally, cache and tlb information 3343 */ 3344 switch (x86_which_cacheinfo(cpi)) { 3345 case X86_VENDOR_Intel: 3346 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3347 break; 3348 case X86_VENDOR_Cyrix: 3349 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3350 break; 3351 case X86_VENDOR_AMD: 3352 amd_cache_info(cpi, cpu_devi); 3353 break; 3354 default: 3355 break; 3356 } 3357 3358 mutex_exit(&cpu_node_lock); 3359 } 3360 3361 struct l2info { 3362 int *l2i_csz; 3363 int *l2i_lsz; 3364 int *l2i_assoc; 3365 int l2i_ret; 3366 }; 3367 3368 /* 3369 * A cacheinfo walker that fetches the size, line-size and associativity 3370 * of the L2 cache 3371 */ 3372 static int 3373 intel_l2cinfo(void *arg, const struct cachetab *ct) 3374 { 3375 struct l2info *l2i = arg; 3376 int *ip; 3377 3378 if (ct->ct_label != l2_cache_str && 3379 ct->ct_label != sl2_cache_str) 3380 return (0); /* not an L2 -- keep walking */ 3381 3382 if ((ip = l2i->l2i_csz) != NULL) 3383 *ip = ct->ct_size; 3384 if ((ip = l2i->l2i_lsz) != NULL) 3385 *ip = ct->ct_line_size; 3386 if ((ip = l2i->l2i_assoc) != NULL) 3387 *ip = ct->ct_assoc; 3388 l2i->l2i_ret = ct->ct_size; 3389 return (1); /* was an L2 -- terminate walk */ 3390 } 3391 3392 static void 3393 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3394 { 3395 struct cpuid_regs *cp; 3396 uint_t size, assoc; 3397 int *ip; 3398 3399 if (cpi->cpi_xmaxeax < 0x80000006) 3400 return; 3401 cp = &cpi->cpi_extd[6]; 3402 3403 if ((assoc = BITX(cp->cp_ecx, 15, 12)) != 0 && 3404 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3405 uint_t cachesz = size * 1024; 3406 3407 3408 if ((ip = l2i->l2i_csz) != NULL) 3409 *ip = cachesz; 3410 if ((ip = l2i->l2i_lsz) != NULL) 3411 *ip = BITX(cp->cp_ecx, 7, 0); 3412 if ((ip = l2i->l2i_assoc) != NULL) 3413 *ip = assoc; 3414 l2i->l2i_ret = cachesz; 3415 } 3416 } 3417 3418 int 3419 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3420 { 3421 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3422 struct l2info __l2info, *l2i = &__l2info; 3423 3424 l2i->l2i_csz = csz; 3425 l2i->l2i_lsz = lsz; 3426 l2i->l2i_assoc = assoc; 3427 l2i->l2i_ret = -1; 3428 3429 switch (x86_which_cacheinfo(cpi)) { 3430 case X86_VENDOR_Intel: 3431 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3432 break; 3433 case X86_VENDOR_Cyrix: 3434 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3435 break; 3436 case X86_VENDOR_AMD: 3437 amd_l2cacheinfo(cpi, l2i); 3438 break; 3439 default: 3440 break; 3441 } 3442 return (l2i->l2i_ret); 3443 } 3444 3445 uint32_t * 3446 cpuid_mwait_alloc(cpu_t *cpu) 3447 { 3448 uint32_t *ret; 3449 size_t mwait_size; 3450 3451 ASSERT(cpuid_checkpass(cpu, 2)); 3452 3453 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3454 if (mwait_size == 0) 3455 return (NULL); 3456 3457 /* 3458 * kmem_alloc() returns cache line size aligned data for mwait_size 3459 * allocations. mwait_size is currently cache line sized. Neither 3460 * of these implementation details are guarantied to be true in the 3461 * future. 3462 * 3463 * First try allocating mwait_size as kmem_alloc() currently returns 3464 * correctly aligned memory. If kmem_alloc() does not return 3465 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3466 * 3467 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3468 * decide to free this memory. 3469 */ 3470 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3471 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3472 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3473 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3474 *ret = MWAIT_RUNNING; 3475 return (ret); 3476 } else { 3477 kmem_free(ret, mwait_size); 3478 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3479 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3480 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3481 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3482 *ret = MWAIT_RUNNING; 3483 return (ret); 3484 } 3485 } 3486 3487 void 3488 cpuid_mwait_free(cpu_t *cpu) 3489 { 3490 ASSERT(cpuid_checkpass(cpu, 2)); 3491 3492 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3493 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3494 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3495 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3496 } 3497 3498 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3499 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3500 } 3501