1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Various routines to handle identification 30 * and classification of x86 processors. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/archsystm.h> 35 #include <sys/x86_archext.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 #include <sys/cmn_err.h> 39 #include <sys/sunddi.h> 40 #include <sys/sunndi.h> 41 #include <sys/cpuvar.h> 42 #include <sys/processor.h> 43 #include <sys/sysmacros.h> 44 #include <sys/pg.h> 45 #include <sys/fp.h> 46 #include <sys/controlregs.h> 47 #include <sys/auxv_386.h> 48 #include <sys/bitmap.h> 49 #include <sys/memnode.h> 50 51 /* 52 * Pass 0 of cpuid feature analysis happens in locore. It contains special code 53 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with 54 * them accordingly. For most modern processors, feature detection occurs here 55 * in pass 1. 56 * 57 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup() 58 * for the boot CPU and does the basic analysis that the early kernel needs. 59 * x86_feature is set based on the return value of cpuid_pass1() of the boot 60 * CPU. 61 * 62 * Pass 1 includes: 63 * 64 * o Determining vendor/model/family/stepping and setting x86_type and 65 * x86_vendor accordingly. 66 * o Processing the feature flags returned by the cpuid instruction while 67 * applying any workarounds or tricks for the specific processor. 68 * o Mapping the feature flags into Solaris feature bits (X86_*). 69 * o Processing extended feature flags if supported by the processor, 70 * again while applying specific processor knowledge. 71 * o Determining the CMT characteristics of the system. 72 * 73 * Pass 1 is done on non-boot CPUs during their initialization and the results 74 * are used only as a meager attempt at ensuring that all processors within the 75 * system support the same features. 76 * 77 * Pass 2 of cpuid feature analysis happens just at the beginning 78 * of startup(). It just copies in and corrects the remainder 79 * of the cpuid data we depend on: standard cpuid functions that we didn't 80 * need for pass1 feature analysis, and extended cpuid functions beyond the 81 * simple feature processing done in pass1. 82 * 83 * Pass 3 of cpuid analysis is invoked after basic kernel services; in 84 * particular kernel memory allocation has been made available. It creates a 85 * readable brand string based on the data collected in the first two passes. 86 * 87 * Pass 4 of cpuid analysis is invoked after post_startup() when all 88 * the support infrastructure for various hardware features has been 89 * initialized. It determines which processor features will be reported 90 * to userland via the aux vector. 91 * 92 * All passes are executed on all CPUs, but only the boot CPU determines what 93 * features the kernel will use. 94 * 95 * Much of the worst junk in this file is for the support of processors 96 * that didn't really implement the cpuid instruction properly. 97 * 98 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon, 99 * the pass numbers. Accordingly, changes to the pass code may require changes 100 * to the accessor code. 101 */ 102 103 uint_t x86_feature = 0; 104 uint_t x86_vendor = X86_VENDOR_IntelClone; 105 uint_t x86_type = X86_TYPE_OTHER; 106 107 uint_t pentiumpro_bug4046376; 108 uint_t pentiumpro_bug4064495; 109 110 uint_t enable486; 111 112 /* 113 * This set of strings are for processors rumored to support the cpuid 114 * instruction, and is used by locore.s to figure out how to set x86_vendor 115 */ 116 const char CyrixInstead[] = "CyrixInstead"; 117 118 /* 119 * monitor/mwait info. 120 * 121 * size_actual and buf_actual are the real address and size allocated to get 122 * proper mwait_buf alignement. buf_actual and size_actual should be passed 123 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 124 * processor cache-line alignment, but this is not guarantied in the furture. 125 */ 126 struct mwait_info { 127 size_t mon_min; /* min size to avoid missed wakeups */ 128 size_t mon_max; /* size to avoid false wakeups */ 129 size_t size_actual; /* size actually allocated */ 130 void *buf_actual; /* memory actually allocated */ 131 uint32_t support; /* processor support of monitor/mwait */ 132 }; 133 134 /* 135 * These constants determine how many of the elements of the 136 * cpuid we cache in the cpuid_info data structure; the 137 * remaining elements are accessible via the cpuid instruction. 138 */ 139 140 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */ 141 #define NMAX_CPI_EXTD 9 /* eax = 0x80000000 .. 0x80000008 */ 142 143 struct cpuid_info { 144 uint_t cpi_pass; /* last pass completed */ 145 /* 146 * standard function information 147 */ 148 uint_t cpi_maxeax; /* fn 0: %eax */ 149 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 150 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 151 152 uint_t cpi_family; /* fn 1: extended family */ 153 uint_t cpi_model; /* fn 1: extended model */ 154 uint_t cpi_step; /* fn 1: stepping */ 155 chipid_t cpi_chipid; /* fn 1: %ebx: chip # on ht cpus */ 156 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 157 int cpi_clogid; /* fn 1: %ebx: thread # */ 158 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 159 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 160 uint_t cpi_ncache; /* fn 2: number of elements */ 161 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 162 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 163 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */ 164 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */ 165 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */ 166 /* 167 * extended function information 168 */ 169 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 170 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 171 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 172 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 173 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */ 174 id_t cpi_coreid; 175 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 176 /* Intel: fn 4: %eax[31-26] */ 177 /* 178 * supported feature information 179 */ 180 uint32_t cpi_support[5]; 181 #define STD_EDX_FEATURES 0 182 #define AMD_EDX_FEATURES 1 183 #define TM_EDX_FEATURES 2 184 #define STD_ECX_FEATURES 3 185 #define AMD_ECX_FEATURES 4 186 /* 187 * Synthesized information, where known. 188 */ 189 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 190 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 191 uint32_t cpi_socket; /* Chip package/socket type */ 192 193 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 194 }; 195 196 197 static struct cpuid_info cpuid_info0; 198 199 /* 200 * These bit fields are defined by the Intel Application Note AP-485 201 * "Intel Processor Identification and the CPUID Instruction" 202 */ 203 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 204 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 205 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 206 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 207 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 208 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 209 210 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 211 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 212 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 213 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 214 215 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 216 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 217 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 218 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 219 220 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 221 #define CPI_XMAXEAX_MAX 0x80000100 222 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 223 224 /* 225 * Function 4 (Deterministic Cache Parameters) macros 226 * Defined by Intel Application Note AP-485 227 */ 228 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 229 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 230 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 231 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 232 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 233 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 234 235 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 236 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 237 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 238 239 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 240 241 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 242 243 244 /* 245 * A couple of shorthand macros to identify "later" P6-family chips 246 * like the Pentium M and Core. First, the "older" P6-based stuff 247 * (loosely defined as "pre-Pentium-4"): 248 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 249 */ 250 251 #define IS_LEGACY_P6(cpi) ( \ 252 cpi->cpi_family == 6 && \ 253 (cpi->cpi_model == 1 || \ 254 cpi->cpi_model == 3 || \ 255 cpi->cpi_model == 5 || \ 256 cpi->cpi_model == 6 || \ 257 cpi->cpi_model == 7 || \ 258 cpi->cpi_model == 8 || \ 259 cpi->cpi_model == 0xA || \ 260 cpi->cpi_model == 0xB) \ 261 ) 262 263 /* A "new F6" is everything with family 6 that's not the above */ 264 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 265 266 /* Extended family/model support */ 267 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 268 cpi->cpi_family >= 0xf) 269 270 /* 271 * AMD family 0xf and family 0x10 socket types. 272 * First index : 273 * 0 for family 0xf, revs B thru E 274 * 1 for family 0xf, revs F and G 275 * 2 for family 0x10, rev B 276 * Second index by (model & 0x3) 277 */ 278 static uint32_t amd_skts[3][4] = { 279 /* 280 * Family 0xf revisions B through E 281 */ 282 #define A_SKTS_0 0 283 { 284 X86_SOCKET_754, /* 0b00 */ 285 X86_SOCKET_940, /* 0b01 */ 286 X86_SOCKET_754, /* 0b10 */ 287 X86_SOCKET_939 /* 0b11 */ 288 }, 289 /* 290 * Family 0xf revisions F and G 291 */ 292 #define A_SKTS_1 1 293 { 294 X86_SOCKET_S1g1, /* 0b00 */ 295 X86_SOCKET_F1207, /* 0b01 */ 296 X86_SOCKET_UNKNOWN, /* 0b10 */ 297 X86_SOCKET_AM2 /* 0b11 */ 298 }, 299 /* 300 * Family 0x10 revisions A and B 301 * It is not clear whether, as new sockets release, that 302 * model & 0x3 will id socket for this family 303 */ 304 #define A_SKTS_2 2 305 { 306 X86_SOCKET_F1207, /* 0b00 */ 307 X86_SOCKET_F1207, /* 0b01 */ 308 X86_SOCKET_F1207, /* 0b10 */ 309 X86_SOCKET_F1207, /* 0b11 */ 310 } 311 }; 312 313 /* 314 * Table for mapping AMD Family 0xf and AMD Family 0x10 model/stepping 315 * combination to chip "revision" and socket type. 316 * 317 * The first member of this array that matches a given family, extended model 318 * plus model range, and stepping range will be considered a match. 319 */ 320 static const struct amd_rev_mapent { 321 uint_t rm_family; 322 uint_t rm_modello; 323 uint_t rm_modelhi; 324 uint_t rm_steplo; 325 uint_t rm_stephi; 326 uint32_t rm_chiprev; 327 const char *rm_chiprevstr; 328 int rm_sktidx; 329 } amd_revmap[] = { 330 /* 331 * =============== AuthenticAMD Family 0xf =============== 332 */ 333 334 /* 335 * Rev B includes model 0x4 stepping 0 and model 0x5 stepping 0 and 1. 336 */ 337 { 0xf, 0x04, 0x04, 0x0, 0x0, X86_CHIPREV_AMD_F_REV_B, "B", A_SKTS_0 }, 338 { 0xf, 0x05, 0x05, 0x0, 0x1, X86_CHIPREV_AMD_F_REV_B, "B", A_SKTS_0 }, 339 /* 340 * Rev C0 includes model 0x4 stepping 8 and model 0x5 stepping 8 341 */ 342 { 0xf, 0x04, 0x05, 0x8, 0x8, X86_CHIPREV_AMD_F_REV_C0, "C0", A_SKTS_0 }, 343 /* 344 * Rev CG is the rest of extended model 0x0 - i.e., everything 345 * but the rev B and C0 combinations covered above. 346 */ 347 { 0xf, 0x00, 0x0f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_CG, "CG", A_SKTS_0 }, 348 /* 349 * Rev D has extended model 0x1. 350 */ 351 { 0xf, 0x10, 0x1f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_D, "D", A_SKTS_0 }, 352 /* 353 * Rev E has extended model 0x2. 354 * Extended model 0x3 is unused but available to grow into. 355 */ 356 { 0xf, 0x20, 0x3f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_E, "E", A_SKTS_0 }, 357 /* 358 * Rev F has extended models 0x4 and 0x5. 359 */ 360 { 0xf, 0x40, 0x5f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_F, "F", A_SKTS_1 }, 361 /* 362 * Rev G has extended model 0x6. 363 */ 364 { 0xf, 0x60, 0x6f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_G, "G", A_SKTS_1 }, 365 366 /* 367 * =============== AuthenticAMD Family 0x10 =============== 368 */ 369 370 /* 371 * Rev A has model 0 and stepping 0/1/2 for DR-{A0,A1,A2}. 372 * Give all of model 0 stepping range to rev A. 373 */ 374 { 0x10, 0x00, 0x00, 0x0, 0x2, X86_CHIPREV_AMD_10_REV_A, "A", A_SKTS_2 }, 375 376 /* 377 * Rev B has model 2 and steppings 0/1/0xa/2 for DR-{B0,B1,BA,B2}. 378 * Give all of model 2 stepping range to rev B. 379 */ 380 { 0x10, 0x02, 0x02, 0x0, 0xf, X86_CHIPREV_AMD_10_REV_B, "B", A_SKTS_2 }, 381 }; 382 383 /* 384 * Info for monitor/mwait idle loop. 385 * 386 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 387 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 388 * 2006. 389 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 390 * Documentation Updates" #33633, Rev 2.05, December 2006. 391 */ 392 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 393 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 394 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 395 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 396 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 397 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 398 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 399 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 400 /* 401 * Number of sub-cstates for a given c-state. 402 */ 403 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 404 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 405 406 static void 407 synth_amd_info(struct cpuid_info *cpi) 408 { 409 const struct amd_rev_mapent *rmp; 410 uint_t family, model, step; 411 int i; 412 413 /* 414 * Currently only AMD family 0xf and family 0x10 use these fields. 415 */ 416 if (cpi->cpi_family != 0xf && cpi->cpi_family != 0x10) 417 return; 418 419 family = cpi->cpi_family; 420 model = cpi->cpi_model; 421 step = cpi->cpi_step; 422 423 for (i = 0, rmp = amd_revmap; i < sizeof (amd_revmap) / sizeof (*rmp); 424 i++, rmp++) { 425 if (family == rmp->rm_family && 426 model >= rmp->rm_modello && model <= rmp->rm_modelhi && 427 step >= rmp->rm_steplo && step <= rmp->rm_stephi) { 428 cpi->cpi_chiprev = rmp->rm_chiprev; 429 cpi->cpi_chiprevstr = rmp->rm_chiprevstr; 430 cpi->cpi_socket = amd_skts[rmp->rm_sktidx][model & 0x3]; 431 return; 432 } 433 } 434 } 435 436 static void 437 synth_info(struct cpuid_info *cpi) 438 { 439 cpi->cpi_chiprev = X86_CHIPREV_UNKNOWN; 440 cpi->cpi_chiprevstr = "Unknown"; 441 cpi->cpi_socket = X86_SOCKET_UNKNOWN; 442 443 switch (cpi->cpi_vendor) { 444 case X86_VENDOR_AMD: 445 synth_amd_info(cpi); 446 break; 447 448 default: 449 break; 450 451 } 452 } 453 454 /* 455 * Apply up various platform-dependent restrictions where the 456 * underlying platform restrictions mean the CPU can be marked 457 * as less capable than its cpuid instruction would imply. 458 */ 459 #if defined(__xpv) 460 static void 461 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 462 { 463 switch (eax) { 464 case 1: 465 cp->cp_edx &= 466 ~(CPUID_INTC_EDX_PSE | 467 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 468 CPUID_INTC_EDX_MCA | /* XXPV true on dom0? */ 469 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 470 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 471 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 472 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 473 break; 474 475 case 0x80000001: 476 cp->cp_edx &= 477 ~(CPUID_AMD_EDX_PSE | 478 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 479 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 480 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 481 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 482 CPUID_AMD_EDX_TSCP); 483 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 484 break; 485 default: 486 break; 487 } 488 489 switch (vendor) { 490 case X86_VENDOR_Intel: 491 switch (eax) { 492 case 4: 493 /* 494 * Zero out the (ncores-per-chip - 1) field 495 */ 496 cp->cp_eax &= 0x03fffffff; 497 break; 498 default: 499 break; 500 } 501 break; 502 case X86_VENDOR_AMD: 503 switch (eax) { 504 case 0x80000008: 505 /* 506 * Zero out the (ncores-per-chip - 1) field 507 */ 508 cp->cp_ecx &= 0xffffff00; 509 break; 510 default: 511 break; 512 } 513 break; 514 default: 515 break; 516 } 517 } 518 #else 519 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 520 #endif 521 522 /* 523 * Some undocumented ways of patching the results of the cpuid 524 * instruction to permit running Solaris 10 on future cpus that 525 * we don't currently support. Could be set to non-zero values 526 * via settings in eeprom. 527 */ 528 529 uint32_t cpuid_feature_ecx_include; 530 uint32_t cpuid_feature_ecx_exclude; 531 uint32_t cpuid_feature_edx_include; 532 uint32_t cpuid_feature_edx_exclude; 533 534 void 535 cpuid_alloc_space(cpu_t *cpu) 536 { 537 /* 538 * By convention, cpu0 is the boot cpu, which is set up 539 * before memory allocation is available. All other cpus get 540 * their cpuid_info struct allocated here. 541 */ 542 ASSERT(cpu->cpu_id != 0); 543 cpu->cpu_m.mcpu_cpi = 544 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 545 } 546 547 void 548 cpuid_free_space(cpu_t *cpu) 549 { 550 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 551 int i; 552 553 ASSERT(cpu->cpu_id != 0); 554 555 /* 556 * Free up any function 4 related dynamic storage 557 */ 558 for (i = 1; i < cpi->cpi_std_4_size; i++) 559 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs)); 560 if (cpi->cpi_std_4_size > 0) 561 kmem_free(cpi->cpi_std_4, 562 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *)); 563 564 kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi)); 565 } 566 567 #if !defined(__xpv) 568 569 static void 570 check_for_hvm() 571 { 572 struct cpuid_regs cp; 573 char *xen_str; 574 uint32_t xen_signature[4]; 575 extern int xpv_is_hvm; 576 577 /* 578 * In a fully virtualized domain, Xen's pseudo-cpuid function 579 * 0x40000000 returns a string representing the Xen signature in 580 * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid 581 * function. 582 */ 583 cp.cp_eax = 0x40000000; 584 (void) __cpuid_insn(&cp); 585 xen_signature[0] = cp.cp_ebx; 586 xen_signature[1] = cp.cp_ecx; 587 xen_signature[2] = cp.cp_edx; 588 xen_signature[3] = 0; 589 xen_str = (char *)xen_signature; 590 if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002) 591 xpv_is_hvm = 1; 592 } 593 #endif /* __xpv */ 594 595 uint_t 596 cpuid_pass1(cpu_t *cpu) 597 { 598 uint32_t mask_ecx, mask_edx; 599 uint_t feature = X86_CPUID; 600 struct cpuid_info *cpi; 601 struct cpuid_regs *cp; 602 int xcpuid; 603 #if !defined(__xpv) 604 extern int idle_cpu_prefer_mwait; 605 #endif 606 607 /* 608 * Space statically allocated for cpu0, ensure pointer is set 609 */ 610 if (cpu->cpu_id == 0) 611 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 612 cpi = cpu->cpu_m.mcpu_cpi; 613 ASSERT(cpi != NULL); 614 cp = &cpi->cpi_std[0]; 615 cp->cp_eax = 0; 616 cpi->cpi_maxeax = __cpuid_insn(cp); 617 { 618 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 619 *iptr++ = cp->cp_ebx; 620 *iptr++ = cp->cp_edx; 621 *iptr++ = cp->cp_ecx; 622 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 623 } 624 625 /* 626 * Map the vendor string to a type code 627 */ 628 if (strcmp(cpi->cpi_vendorstr, "GenuineIntel") == 0) 629 cpi->cpi_vendor = X86_VENDOR_Intel; 630 else if (strcmp(cpi->cpi_vendorstr, "AuthenticAMD") == 0) 631 cpi->cpi_vendor = X86_VENDOR_AMD; 632 else if (strcmp(cpi->cpi_vendorstr, "GenuineTMx86") == 0) 633 cpi->cpi_vendor = X86_VENDOR_TM; 634 else if (strcmp(cpi->cpi_vendorstr, CyrixInstead) == 0) 635 /* 636 * CyrixInstead is a variable used by the Cyrix detection code 637 * in locore. 638 */ 639 cpi->cpi_vendor = X86_VENDOR_Cyrix; 640 else if (strcmp(cpi->cpi_vendorstr, "UMC UMC UMC ") == 0) 641 cpi->cpi_vendor = X86_VENDOR_UMC; 642 else if (strcmp(cpi->cpi_vendorstr, "NexGenDriven") == 0) 643 cpi->cpi_vendor = X86_VENDOR_NexGen; 644 else if (strcmp(cpi->cpi_vendorstr, "CentaurHauls") == 0) 645 cpi->cpi_vendor = X86_VENDOR_Centaur; 646 else if (strcmp(cpi->cpi_vendorstr, "RiseRiseRise") == 0) 647 cpi->cpi_vendor = X86_VENDOR_Rise; 648 else if (strcmp(cpi->cpi_vendorstr, "SiS SiS SiS ") == 0) 649 cpi->cpi_vendor = X86_VENDOR_SiS; 650 else if (strcmp(cpi->cpi_vendorstr, "Geode by NSC") == 0) 651 cpi->cpi_vendor = X86_VENDOR_NSC; 652 else 653 cpi->cpi_vendor = X86_VENDOR_IntelClone; 654 655 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 656 657 /* 658 * Limit the range in case of weird hardware 659 */ 660 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 661 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 662 if (cpi->cpi_maxeax < 1) 663 goto pass1_done; 664 665 cp = &cpi->cpi_std[1]; 666 cp->cp_eax = 1; 667 (void) __cpuid_insn(cp); 668 669 /* 670 * Extract identifying constants for easy access. 671 */ 672 cpi->cpi_model = CPI_MODEL(cpi); 673 cpi->cpi_family = CPI_FAMILY(cpi); 674 675 if (cpi->cpi_family == 0xf) 676 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 677 678 /* 679 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 680 * Intel, and presumably everyone else, uses model == 0xf, as 681 * one would expect (max value means possible overflow). Sigh. 682 */ 683 684 switch (cpi->cpi_vendor) { 685 case X86_VENDOR_Intel: 686 if (IS_EXTENDED_MODEL_INTEL(cpi)) 687 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 688 break; 689 case X86_VENDOR_AMD: 690 if (CPI_FAMILY(cpi) == 0xf) 691 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 692 break; 693 default: 694 if (cpi->cpi_model == 0xf) 695 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 696 break; 697 } 698 699 cpi->cpi_step = CPI_STEP(cpi); 700 cpi->cpi_brandid = CPI_BRANDID(cpi); 701 702 /* 703 * *default* assumptions: 704 * - believe %edx feature word 705 * - ignore %ecx feature word 706 * - 32-bit virtual and physical addressing 707 */ 708 mask_edx = 0xffffffff; 709 mask_ecx = 0; 710 711 cpi->cpi_pabits = cpi->cpi_vabits = 32; 712 713 switch (cpi->cpi_vendor) { 714 case X86_VENDOR_Intel: 715 if (cpi->cpi_family == 5) 716 x86_type = X86_TYPE_P5; 717 else if (IS_LEGACY_P6(cpi)) { 718 x86_type = X86_TYPE_P6; 719 pentiumpro_bug4046376 = 1; 720 pentiumpro_bug4064495 = 1; 721 /* 722 * Clear the SEP bit when it was set erroneously 723 */ 724 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 725 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 726 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 727 x86_type = X86_TYPE_P4; 728 /* 729 * We don't currently depend on any of the %ecx 730 * features until Prescott, so we'll only check 731 * this from P4 onwards. We might want to revisit 732 * that idea later. 733 */ 734 mask_ecx = 0xffffffff; 735 } else if (cpi->cpi_family > 0xf) 736 mask_ecx = 0xffffffff; 737 /* 738 * We don't support MONITOR/MWAIT if leaf 5 is not available 739 * to obtain the monitor linesize. 740 */ 741 if (cpi->cpi_maxeax < 5) 742 mask_ecx &= ~CPUID_INTC_ECX_MON; 743 break; 744 case X86_VENDOR_IntelClone: 745 default: 746 break; 747 case X86_VENDOR_AMD: 748 #if defined(OPTERON_ERRATUM_108) 749 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 750 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 751 cpi->cpi_model = 0xc; 752 } else 753 #endif 754 if (cpi->cpi_family == 5) { 755 /* 756 * AMD K5 and K6 757 * 758 * These CPUs have an incomplete implementation 759 * of MCA/MCE which we mask away. 760 */ 761 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 762 763 /* 764 * Model 0 uses the wrong (APIC) bit 765 * to indicate PGE. Fix it here. 766 */ 767 if (cpi->cpi_model == 0) { 768 if (cp->cp_edx & 0x200) { 769 cp->cp_edx &= ~0x200; 770 cp->cp_edx |= CPUID_INTC_EDX_PGE; 771 } 772 } 773 774 /* 775 * Early models had problems w/ MMX; disable. 776 */ 777 if (cpi->cpi_model < 6) 778 mask_edx &= ~CPUID_INTC_EDX_MMX; 779 } 780 781 /* 782 * For newer families, SSE3 and CX16, at least, are valid; 783 * enable all 784 */ 785 if (cpi->cpi_family >= 0xf) 786 mask_ecx = 0xffffffff; 787 /* 788 * We don't support MONITOR/MWAIT if leaf 5 is not available 789 * to obtain the monitor linesize. 790 */ 791 if (cpi->cpi_maxeax < 5) 792 mask_ecx &= ~CPUID_INTC_ECX_MON; 793 794 #if !defined(__xpv) 795 /* 796 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD 797 * processors. AMD does not intend MWAIT to be used in the cpu 798 * idle loop on current and future processors. 10h and future 799 * AMD processors use more power in MWAIT than HLT. 800 * Pre-family-10h Opterons do not have the MWAIT instruction. 801 */ 802 idle_cpu_prefer_mwait = 0; 803 #endif 804 805 break; 806 case X86_VENDOR_TM: 807 /* 808 * workaround the NT workaround in CMS 4.1 809 */ 810 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 811 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 812 cp->cp_edx |= CPUID_INTC_EDX_CX8; 813 break; 814 case X86_VENDOR_Centaur: 815 /* 816 * workaround the NT workarounds again 817 */ 818 if (cpi->cpi_family == 6) 819 cp->cp_edx |= CPUID_INTC_EDX_CX8; 820 break; 821 case X86_VENDOR_Cyrix: 822 /* 823 * We rely heavily on the probing in locore 824 * to actually figure out what parts, if any, 825 * of the Cyrix cpuid instruction to believe. 826 */ 827 switch (x86_type) { 828 case X86_TYPE_CYRIX_486: 829 mask_edx = 0; 830 break; 831 case X86_TYPE_CYRIX_6x86: 832 mask_edx = 0; 833 break; 834 case X86_TYPE_CYRIX_6x86L: 835 mask_edx = 836 CPUID_INTC_EDX_DE | 837 CPUID_INTC_EDX_CX8; 838 break; 839 case X86_TYPE_CYRIX_6x86MX: 840 mask_edx = 841 CPUID_INTC_EDX_DE | 842 CPUID_INTC_EDX_MSR | 843 CPUID_INTC_EDX_CX8 | 844 CPUID_INTC_EDX_PGE | 845 CPUID_INTC_EDX_CMOV | 846 CPUID_INTC_EDX_MMX; 847 break; 848 case X86_TYPE_CYRIX_GXm: 849 mask_edx = 850 CPUID_INTC_EDX_MSR | 851 CPUID_INTC_EDX_CX8 | 852 CPUID_INTC_EDX_CMOV | 853 CPUID_INTC_EDX_MMX; 854 break; 855 case X86_TYPE_CYRIX_MediaGX: 856 break; 857 case X86_TYPE_CYRIX_MII: 858 case X86_TYPE_VIA_CYRIX_III: 859 mask_edx = 860 CPUID_INTC_EDX_DE | 861 CPUID_INTC_EDX_TSC | 862 CPUID_INTC_EDX_MSR | 863 CPUID_INTC_EDX_CX8 | 864 CPUID_INTC_EDX_PGE | 865 CPUID_INTC_EDX_CMOV | 866 CPUID_INTC_EDX_MMX; 867 break; 868 default: 869 break; 870 } 871 break; 872 } 873 874 #if defined(__xpv) 875 /* 876 * Do not support MONITOR/MWAIT under a hypervisor 877 */ 878 mask_ecx &= ~CPUID_INTC_ECX_MON; 879 #endif /* __xpv */ 880 881 /* 882 * Now we've figured out the masks that determine 883 * which bits we choose to believe, apply the masks 884 * to the feature words, then map the kernel's view 885 * of these feature words into its feature word. 886 */ 887 cp->cp_edx &= mask_edx; 888 cp->cp_ecx &= mask_ecx; 889 890 /* 891 * apply any platform restrictions (we don't call this 892 * immediately after __cpuid_insn here, because we need the 893 * workarounds applied above first) 894 */ 895 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 896 897 /* 898 * fold in overrides from the "eeprom" mechanism 899 */ 900 cp->cp_edx |= cpuid_feature_edx_include; 901 cp->cp_edx &= ~cpuid_feature_edx_exclude; 902 903 cp->cp_ecx |= cpuid_feature_ecx_include; 904 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 905 906 if (cp->cp_edx & CPUID_INTC_EDX_PSE) 907 feature |= X86_LARGEPAGE; 908 if (cp->cp_edx & CPUID_INTC_EDX_TSC) 909 feature |= X86_TSC; 910 if (cp->cp_edx & CPUID_INTC_EDX_MSR) 911 feature |= X86_MSR; 912 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) 913 feature |= X86_MTRR; 914 if (cp->cp_edx & CPUID_INTC_EDX_PGE) 915 feature |= X86_PGE; 916 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) 917 feature |= X86_CMOV; 918 if (cp->cp_edx & CPUID_INTC_EDX_MMX) 919 feature |= X86_MMX; 920 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 921 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) 922 feature |= X86_MCA; 923 if (cp->cp_edx & CPUID_INTC_EDX_PAE) 924 feature |= X86_PAE; 925 if (cp->cp_edx & CPUID_INTC_EDX_CX8) 926 feature |= X86_CX8; 927 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) 928 feature |= X86_CX16; 929 if (cp->cp_edx & CPUID_INTC_EDX_PAT) 930 feature |= X86_PAT; 931 if (cp->cp_edx & CPUID_INTC_EDX_SEP) 932 feature |= X86_SEP; 933 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 934 /* 935 * In our implementation, fxsave/fxrstor 936 * are prerequisites before we'll even 937 * try and do SSE things. 938 */ 939 if (cp->cp_edx & CPUID_INTC_EDX_SSE) 940 feature |= X86_SSE; 941 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) 942 feature |= X86_SSE2; 943 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) 944 feature |= X86_SSE3; 945 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 946 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) 947 feature |= X86_SSSE3; 948 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) 949 feature |= X86_SSE4_1; 950 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) 951 feature |= X86_SSE4_2; 952 } 953 } 954 if (cp->cp_edx & CPUID_INTC_EDX_DE) 955 feature |= X86_DE; 956 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 957 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 958 feature |= X86_MWAIT; 959 } 960 961 if (feature & X86_PAE) 962 cpi->cpi_pabits = 36; 963 964 /* 965 * Hyperthreading configuration is slightly tricky on Intel 966 * and pure clones, and even trickier on AMD. 967 * 968 * (AMD chose to set the HTT bit on their CMP processors, 969 * even though they're not actually hyperthreaded. Thus it 970 * takes a bit more work to figure out what's really going 971 * on ... see the handling of the CMP_LGCY bit below) 972 */ 973 if (cp->cp_edx & CPUID_INTC_EDX_HTT) { 974 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 975 if (cpi->cpi_ncpu_per_chip > 1) 976 feature |= X86_HTT; 977 } else { 978 cpi->cpi_ncpu_per_chip = 1; 979 } 980 981 /* 982 * Work on the "extended" feature information, doing 983 * some basic initialization for cpuid_pass2() 984 */ 985 xcpuid = 0; 986 switch (cpi->cpi_vendor) { 987 case X86_VENDOR_Intel: 988 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf) 989 xcpuid++; 990 break; 991 case X86_VENDOR_AMD: 992 if (cpi->cpi_family > 5 || 993 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 994 xcpuid++; 995 break; 996 case X86_VENDOR_Cyrix: 997 /* 998 * Only these Cyrix CPUs are -known- to support 999 * extended cpuid operations. 1000 */ 1001 if (x86_type == X86_TYPE_VIA_CYRIX_III || 1002 x86_type == X86_TYPE_CYRIX_GXm) 1003 xcpuid++; 1004 break; 1005 case X86_VENDOR_Centaur: 1006 case X86_VENDOR_TM: 1007 default: 1008 xcpuid++; 1009 break; 1010 } 1011 1012 if (xcpuid) { 1013 cp = &cpi->cpi_extd[0]; 1014 cp->cp_eax = 0x80000000; 1015 cpi->cpi_xmaxeax = __cpuid_insn(cp); 1016 } 1017 1018 if (cpi->cpi_xmaxeax & 0x80000000) { 1019 1020 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 1021 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 1022 1023 switch (cpi->cpi_vendor) { 1024 case X86_VENDOR_Intel: 1025 case X86_VENDOR_AMD: 1026 if (cpi->cpi_xmaxeax < 0x80000001) 1027 break; 1028 cp = &cpi->cpi_extd[1]; 1029 cp->cp_eax = 0x80000001; 1030 (void) __cpuid_insn(cp); 1031 1032 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1033 cpi->cpi_family == 5 && 1034 cpi->cpi_model == 6 && 1035 cpi->cpi_step == 6) { 1036 /* 1037 * K6 model 6 uses bit 10 to indicate SYSC 1038 * Later models use bit 11. Fix it here. 1039 */ 1040 if (cp->cp_edx & 0x400) { 1041 cp->cp_edx &= ~0x400; 1042 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 1043 } 1044 } 1045 1046 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 1047 1048 /* 1049 * Compute the additions to the kernel's feature word. 1050 */ 1051 if (cp->cp_edx & CPUID_AMD_EDX_NX) 1052 feature |= X86_NX; 1053 1054 #if defined(__amd64) 1055 /* 1 GB large page - enable only for 64 bit kernel */ 1056 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) 1057 feature |= X86_1GPG; 1058 #endif 1059 1060 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 1061 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 1062 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) 1063 feature |= X86_SSE4A; 1064 1065 /* 1066 * If both the HTT and CMP_LGCY bits are set, 1067 * then we're not actually HyperThreaded. Read 1068 * "AMD CPUID Specification" for more details. 1069 */ 1070 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1071 (feature & X86_HTT) && 1072 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) { 1073 feature &= ~X86_HTT; 1074 feature |= X86_CMP; 1075 } 1076 #if defined(__amd64) 1077 /* 1078 * It's really tricky to support syscall/sysret in 1079 * the i386 kernel; we rely on sysenter/sysexit 1080 * instead. In the amd64 kernel, things are -way- 1081 * better. 1082 */ 1083 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) 1084 feature |= X86_ASYSC; 1085 1086 /* 1087 * While we're thinking about system calls, note 1088 * that AMD processors don't support sysenter 1089 * in long mode at all, so don't try to program them. 1090 */ 1091 if (x86_vendor == X86_VENDOR_AMD) 1092 feature &= ~X86_SEP; 1093 #endif 1094 if (x86_vendor == X86_VENDOR_AMD && 1095 cp->cp_edx & CPUID_AMD_EDX_TSCP) 1096 feature |= X86_TSCP; 1097 break; 1098 default: 1099 break; 1100 } 1101 1102 /* 1103 * Get CPUID data about processor cores and hyperthreads. 1104 */ 1105 switch (cpi->cpi_vendor) { 1106 case X86_VENDOR_Intel: 1107 if (cpi->cpi_maxeax >= 4) { 1108 cp = &cpi->cpi_std[4]; 1109 cp->cp_eax = 4; 1110 cp->cp_ecx = 0; 1111 (void) __cpuid_insn(cp); 1112 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 1113 } 1114 /*FALLTHROUGH*/ 1115 case X86_VENDOR_AMD: 1116 if (cpi->cpi_xmaxeax < 0x80000008) 1117 break; 1118 cp = &cpi->cpi_extd[8]; 1119 cp->cp_eax = 0x80000008; 1120 (void) __cpuid_insn(cp); 1121 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp); 1122 1123 /* 1124 * Virtual and physical address limits from 1125 * cpuid override previously guessed values. 1126 */ 1127 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 1128 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 1129 break; 1130 default: 1131 break; 1132 } 1133 1134 /* 1135 * Derive the number of cores per chip 1136 */ 1137 switch (cpi->cpi_vendor) { 1138 case X86_VENDOR_Intel: 1139 if (cpi->cpi_maxeax < 4) { 1140 cpi->cpi_ncore_per_chip = 1; 1141 break; 1142 } else { 1143 cpi->cpi_ncore_per_chip = 1144 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1; 1145 } 1146 break; 1147 case X86_VENDOR_AMD: 1148 if (cpi->cpi_xmaxeax < 0x80000008) { 1149 cpi->cpi_ncore_per_chip = 1; 1150 break; 1151 } else { 1152 cpi->cpi_ncore_per_chip = 1153 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1; 1154 } 1155 break; 1156 default: 1157 cpi->cpi_ncore_per_chip = 1; 1158 break; 1159 } 1160 } else { 1161 cpi->cpi_ncore_per_chip = 1; 1162 } 1163 1164 /* 1165 * If more than one core, then this processor is CMP. 1166 */ 1167 if (cpi->cpi_ncore_per_chip > 1) 1168 feature |= X86_CMP; 1169 1170 /* 1171 * If the number of cores is the same as the number 1172 * of CPUs, then we cannot have HyperThreading. 1173 */ 1174 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) 1175 feature &= ~X86_HTT; 1176 1177 if ((feature & (X86_HTT | X86_CMP)) == 0) { 1178 /* 1179 * Single-core single-threaded processors. 1180 */ 1181 cpi->cpi_chipid = -1; 1182 cpi->cpi_clogid = 0; 1183 cpi->cpi_coreid = cpu->cpu_id; 1184 } else if (cpi->cpi_ncpu_per_chip > 1) { 1185 uint_t i; 1186 uint_t chipid_shift = 0; 1187 uint_t coreid_shift = 0; 1188 uint_t apic_id = CPI_APIC_ID(cpi); 1189 1190 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1191 chipid_shift++; 1192 cpi->cpi_chipid = apic_id >> chipid_shift; 1193 cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1); 1194 1195 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 1196 if (feature & X86_CMP) { 1197 /* 1198 * Multi-core (and possibly multi-threaded) 1199 * processors. 1200 */ 1201 uint_t ncpu_per_core; 1202 if (cpi->cpi_ncore_per_chip == 1) 1203 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1204 else if (cpi->cpi_ncore_per_chip > 1) 1205 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1206 cpi->cpi_ncore_per_chip; 1207 /* 1208 * 8bit APIC IDs on dual core Pentiums 1209 * look like this: 1210 * 1211 * +-----------------------+------+------+ 1212 * | Physical Package ID | MC | HT | 1213 * +-----------------------+------+------+ 1214 * <------- chipid --------> 1215 * <------- coreid ---------------> 1216 * <--- clogid --> 1217 * 1218 * Where the number of bits necessary to 1219 * represent MC and HT fields together equals 1220 * to the minimum number of bits necessary to 1221 * store the value of cpi->cpi_ncpu_per_chip. 1222 * Of those bits, the MC part uses the number 1223 * of bits necessary to store the value of 1224 * cpi->cpi_ncore_per_chip. 1225 */ 1226 for (i = 1; i < ncpu_per_core; i <<= 1) 1227 coreid_shift++; 1228 cpi->cpi_coreid = apic_id >> coreid_shift; 1229 } else if (feature & X86_HTT) { 1230 /* 1231 * Single-core multi-threaded processors. 1232 */ 1233 cpi->cpi_coreid = cpi->cpi_chipid; 1234 } 1235 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 1236 /* 1237 * AMD currently only has dual-core processors with 1238 * single-threaded cores. If they ever release 1239 * multi-threaded processors, then this code 1240 * will have to be updated. 1241 */ 1242 cpi->cpi_coreid = cpu->cpu_id; 1243 } else { 1244 /* 1245 * All other processors are currently 1246 * assumed to have single cores. 1247 */ 1248 cpi->cpi_coreid = cpi->cpi_chipid; 1249 } 1250 } 1251 1252 /* 1253 * Synthesize chip "revision" and socket type 1254 */ 1255 synth_info(cpi); 1256 1257 pass1_done: 1258 #if !defined(__xpv) 1259 check_for_hvm(); 1260 #endif 1261 cpi->cpi_pass = 1; 1262 return (feature); 1263 } 1264 1265 /* 1266 * Make copies of the cpuid table entries we depend on, in 1267 * part for ease of parsing now, in part so that we have only 1268 * one place to correct any of it, in part for ease of 1269 * later export to userland, and in part so we can look at 1270 * this stuff in a crash dump. 1271 */ 1272 1273 /*ARGSUSED*/ 1274 void 1275 cpuid_pass2(cpu_t *cpu) 1276 { 1277 uint_t n, nmax; 1278 int i; 1279 struct cpuid_regs *cp; 1280 uint8_t *dp; 1281 uint32_t *iptr; 1282 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1283 1284 ASSERT(cpi->cpi_pass == 1); 1285 1286 if (cpi->cpi_maxeax < 1) 1287 goto pass2_done; 1288 1289 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 1290 nmax = NMAX_CPI_STD; 1291 /* 1292 * (We already handled n == 0 and n == 1 in pass 1) 1293 */ 1294 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 1295 cp->cp_eax = n; 1296 1297 /* 1298 * CPUID function 4 expects %ecx to be initialized 1299 * with an index which indicates which cache to return 1300 * information about. The OS is expected to call function 4 1301 * with %ecx set to 0, 1, 2, ... until it returns with 1302 * EAX[4:0] set to 0, which indicates there are no more 1303 * caches. 1304 * 1305 * Here, populate cpi_std[4] with the information returned by 1306 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 1307 * when dynamic memory allocation becomes available. 1308 * 1309 * Note: we need to explicitly initialize %ecx here, since 1310 * function 4 may have been previously invoked. 1311 */ 1312 if (n == 4) 1313 cp->cp_ecx = 0; 1314 1315 (void) __cpuid_insn(cp); 1316 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 1317 switch (n) { 1318 case 2: 1319 /* 1320 * "the lower 8 bits of the %eax register 1321 * contain a value that identifies the number 1322 * of times the cpuid [instruction] has to be 1323 * executed to obtain a complete image of the 1324 * processor's caching systems." 1325 * 1326 * How *do* they make this stuff up? 1327 */ 1328 cpi->cpi_ncache = sizeof (*cp) * 1329 BITX(cp->cp_eax, 7, 0); 1330 if (cpi->cpi_ncache == 0) 1331 break; 1332 cpi->cpi_ncache--; /* skip count byte */ 1333 1334 /* 1335 * Well, for now, rather than attempt to implement 1336 * this slightly dubious algorithm, we just look 1337 * at the first 15 .. 1338 */ 1339 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 1340 cpi->cpi_ncache = sizeof (*cp) - 1; 1341 1342 dp = cpi->cpi_cacheinfo; 1343 if (BITX(cp->cp_eax, 31, 31) == 0) { 1344 uint8_t *p = (void *)&cp->cp_eax; 1345 for (i = 1; i < 3; i++) 1346 if (p[i] != 0) 1347 *dp++ = p[i]; 1348 } 1349 if (BITX(cp->cp_ebx, 31, 31) == 0) { 1350 uint8_t *p = (void *)&cp->cp_ebx; 1351 for (i = 0; i < 4; i++) 1352 if (p[i] != 0) 1353 *dp++ = p[i]; 1354 } 1355 if (BITX(cp->cp_ecx, 31, 31) == 0) { 1356 uint8_t *p = (void *)&cp->cp_ecx; 1357 for (i = 0; i < 4; i++) 1358 if (p[i] != 0) 1359 *dp++ = p[i]; 1360 } 1361 if (BITX(cp->cp_edx, 31, 31) == 0) { 1362 uint8_t *p = (void *)&cp->cp_edx; 1363 for (i = 0; i < 4; i++) 1364 if (p[i] != 0) 1365 *dp++ = p[i]; 1366 } 1367 break; 1368 1369 case 3: /* Processor serial number, if PSN supported */ 1370 break; 1371 1372 case 4: /* Deterministic cache parameters */ 1373 break; 1374 1375 case 5: /* Monitor/Mwait parameters */ 1376 { 1377 size_t mwait_size; 1378 1379 /* 1380 * check cpi_mwait.support which was set in cpuid_pass1 1381 */ 1382 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 1383 break; 1384 1385 /* 1386 * Protect ourself from insane mwait line size. 1387 * Workaround for incomplete hardware emulator(s). 1388 */ 1389 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 1390 if (mwait_size < sizeof (uint32_t) || 1391 !ISP2(mwait_size)) { 1392 #if DEBUG 1393 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 1394 "size %ld", 1395 cpu->cpu_id, (long)mwait_size); 1396 #endif 1397 break; 1398 } 1399 1400 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 1401 cpi->cpi_mwait.mon_max = mwait_size; 1402 if (MWAIT_EXTENSION(cpi)) { 1403 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 1404 if (MWAIT_INT_ENABLE(cpi)) 1405 cpi->cpi_mwait.support |= 1406 MWAIT_ECX_INT_ENABLE; 1407 } 1408 break; 1409 } 1410 default: 1411 break; 1412 } 1413 } 1414 1415 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) 1416 goto pass2_done; 1417 1418 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD) 1419 nmax = NMAX_CPI_EXTD; 1420 /* 1421 * Copy the extended properties, fixing them as we go. 1422 * (We already handled n == 0 and n == 1 in pass 1) 1423 */ 1424 iptr = (void *)cpi->cpi_brandstr; 1425 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 1426 cp->cp_eax = 0x80000000 + n; 1427 (void) __cpuid_insn(cp); 1428 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp); 1429 switch (n) { 1430 case 2: 1431 case 3: 1432 case 4: 1433 /* 1434 * Extract the brand string 1435 */ 1436 *iptr++ = cp->cp_eax; 1437 *iptr++ = cp->cp_ebx; 1438 *iptr++ = cp->cp_ecx; 1439 *iptr++ = cp->cp_edx; 1440 break; 1441 case 5: 1442 switch (cpi->cpi_vendor) { 1443 case X86_VENDOR_AMD: 1444 /* 1445 * The Athlon and Duron were the first 1446 * parts to report the sizes of the 1447 * TLB for large pages. Before then, 1448 * we don't trust the data. 1449 */ 1450 if (cpi->cpi_family < 6 || 1451 (cpi->cpi_family == 6 && 1452 cpi->cpi_model < 1)) 1453 cp->cp_eax = 0; 1454 break; 1455 default: 1456 break; 1457 } 1458 break; 1459 case 6: 1460 switch (cpi->cpi_vendor) { 1461 case X86_VENDOR_AMD: 1462 /* 1463 * The Athlon and Duron were the first 1464 * AMD parts with L2 TLB's. 1465 * Before then, don't trust the data. 1466 */ 1467 if (cpi->cpi_family < 6 || 1468 cpi->cpi_family == 6 && 1469 cpi->cpi_model < 1) 1470 cp->cp_eax = cp->cp_ebx = 0; 1471 /* 1472 * AMD Duron rev A0 reports L2 1473 * cache size incorrectly as 1K 1474 * when it is really 64K 1475 */ 1476 if (cpi->cpi_family == 6 && 1477 cpi->cpi_model == 3 && 1478 cpi->cpi_step == 0) { 1479 cp->cp_ecx &= 0xffff; 1480 cp->cp_ecx |= 0x400000; 1481 } 1482 break; 1483 case X86_VENDOR_Cyrix: /* VIA C3 */ 1484 /* 1485 * VIA C3 processors are a bit messed 1486 * up w.r.t. encoding cache sizes in %ecx 1487 */ 1488 if (cpi->cpi_family != 6) 1489 break; 1490 /* 1491 * model 7 and 8 were incorrectly encoded 1492 * 1493 * xxx is model 8 really broken? 1494 */ 1495 if (cpi->cpi_model == 7 || 1496 cpi->cpi_model == 8) 1497 cp->cp_ecx = 1498 BITX(cp->cp_ecx, 31, 24) << 16 | 1499 BITX(cp->cp_ecx, 23, 16) << 12 | 1500 BITX(cp->cp_ecx, 15, 8) << 8 | 1501 BITX(cp->cp_ecx, 7, 0); 1502 /* 1503 * model 9 stepping 1 has wrong associativity 1504 */ 1505 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 1506 cp->cp_ecx |= 8 << 12; 1507 break; 1508 case X86_VENDOR_Intel: 1509 /* 1510 * Extended L2 Cache features function. 1511 * First appeared on Prescott. 1512 */ 1513 default: 1514 break; 1515 } 1516 break; 1517 default: 1518 break; 1519 } 1520 } 1521 1522 pass2_done: 1523 cpi->cpi_pass = 2; 1524 } 1525 1526 static const char * 1527 intel_cpubrand(const struct cpuid_info *cpi) 1528 { 1529 int i; 1530 1531 if ((x86_feature & X86_CPUID) == 0 || 1532 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1533 return ("i486"); 1534 1535 switch (cpi->cpi_family) { 1536 case 5: 1537 return ("Intel Pentium(r)"); 1538 case 6: 1539 switch (cpi->cpi_model) { 1540 uint_t celeron, xeon; 1541 const struct cpuid_regs *cp; 1542 case 0: 1543 case 1: 1544 case 2: 1545 return ("Intel Pentium(r) Pro"); 1546 case 3: 1547 case 4: 1548 return ("Intel Pentium(r) II"); 1549 case 6: 1550 return ("Intel Celeron(r)"); 1551 case 5: 1552 case 7: 1553 celeron = xeon = 0; 1554 cp = &cpi->cpi_std[2]; /* cache info */ 1555 1556 for (i = 1; i < 3; i++) { 1557 uint_t tmp; 1558 1559 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 1560 if (tmp == 0x40) 1561 celeron++; 1562 if (tmp >= 0x44 && tmp <= 0x45) 1563 xeon++; 1564 } 1565 1566 for (i = 0; i < 2; i++) { 1567 uint_t tmp; 1568 1569 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 1570 if (tmp == 0x40) 1571 celeron++; 1572 else if (tmp >= 0x44 && tmp <= 0x45) 1573 xeon++; 1574 } 1575 1576 for (i = 0; i < 4; i++) { 1577 uint_t tmp; 1578 1579 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 1580 if (tmp == 0x40) 1581 celeron++; 1582 else if (tmp >= 0x44 && tmp <= 0x45) 1583 xeon++; 1584 } 1585 1586 for (i = 0; i < 4; i++) { 1587 uint_t tmp; 1588 1589 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 1590 if (tmp == 0x40) 1591 celeron++; 1592 else if (tmp >= 0x44 && tmp <= 0x45) 1593 xeon++; 1594 } 1595 1596 if (celeron) 1597 return ("Intel Celeron(r)"); 1598 if (xeon) 1599 return (cpi->cpi_model == 5 ? 1600 "Intel Pentium(r) II Xeon(tm)" : 1601 "Intel Pentium(r) III Xeon(tm)"); 1602 return (cpi->cpi_model == 5 ? 1603 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 1604 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 1605 default: 1606 break; 1607 } 1608 default: 1609 break; 1610 } 1611 1612 /* BrandID is present if the field is nonzero */ 1613 if (cpi->cpi_brandid != 0) { 1614 static const struct { 1615 uint_t bt_bid; 1616 const char *bt_str; 1617 } brand_tbl[] = { 1618 { 0x1, "Intel(r) Celeron(r)" }, 1619 { 0x2, "Intel(r) Pentium(r) III" }, 1620 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 1621 { 0x4, "Intel(r) Pentium(r) III" }, 1622 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 1623 { 0x7, "Mobile Intel(r) Celeron(r)" }, 1624 { 0x8, "Intel(r) Pentium(r) 4" }, 1625 { 0x9, "Intel(r) Pentium(r) 4" }, 1626 { 0xa, "Intel(r) Celeron(r)" }, 1627 { 0xb, "Intel(r) Xeon(tm)" }, 1628 { 0xc, "Intel(r) Xeon(tm) MP" }, 1629 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 1630 { 0xf, "Mobile Intel(r) Celeron(r)" }, 1631 { 0x11, "Mobile Genuine Intel(r)" }, 1632 { 0x12, "Intel(r) Celeron(r) M" }, 1633 { 0x13, "Mobile Intel(r) Celeron(r)" }, 1634 { 0x14, "Intel(r) Celeron(r)" }, 1635 { 0x15, "Mobile Genuine Intel(r)" }, 1636 { 0x16, "Intel(r) Pentium(r) M" }, 1637 { 0x17, "Mobile Intel(r) Celeron(r)" } 1638 }; 1639 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 1640 uint_t sgn; 1641 1642 sgn = (cpi->cpi_family << 8) | 1643 (cpi->cpi_model << 4) | cpi->cpi_step; 1644 1645 for (i = 0; i < btblmax; i++) 1646 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 1647 break; 1648 if (i < btblmax) { 1649 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 1650 return ("Intel(r) Celeron(r)"); 1651 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 1652 return ("Intel(r) Xeon(tm) MP"); 1653 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 1654 return ("Intel(r) Xeon(tm)"); 1655 return (brand_tbl[i].bt_str); 1656 } 1657 } 1658 1659 return (NULL); 1660 } 1661 1662 static const char * 1663 amd_cpubrand(const struct cpuid_info *cpi) 1664 { 1665 if ((x86_feature & X86_CPUID) == 0 || 1666 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 1667 return ("i486 compatible"); 1668 1669 switch (cpi->cpi_family) { 1670 case 5: 1671 switch (cpi->cpi_model) { 1672 case 0: 1673 case 1: 1674 case 2: 1675 case 3: 1676 case 4: 1677 case 5: 1678 return ("AMD-K5(r)"); 1679 case 6: 1680 case 7: 1681 return ("AMD-K6(r)"); 1682 case 8: 1683 return ("AMD-K6(r)-2"); 1684 case 9: 1685 return ("AMD-K6(r)-III"); 1686 default: 1687 return ("AMD (family 5)"); 1688 } 1689 case 6: 1690 switch (cpi->cpi_model) { 1691 case 1: 1692 return ("AMD-K7(tm)"); 1693 case 0: 1694 case 2: 1695 case 4: 1696 return ("AMD Athlon(tm)"); 1697 case 3: 1698 case 7: 1699 return ("AMD Duron(tm)"); 1700 case 6: 1701 case 8: 1702 case 10: 1703 /* 1704 * Use the L2 cache size to distinguish 1705 */ 1706 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 1707 "AMD Athlon(tm)" : "AMD Duron(tm)"); 1708 default: 1709 return ("AMD (family 6)"); 1710 } 1711 default: 1712 break; 1713 } 1714 1715 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 1716 cpi->cpi_brandid != 0) { 1717 switch (BITX(cpi->cpi_brandid, 7, 5)) { 1718 case 3: 1719 return ("AMD Opteron(tm) UP 1xx"); 1720 case 4: 1721 return ("AMD Opteron(tm) DP 2xx"); 1722 case 5: 1723 return ("AMD Opteron(tm) MP 8xx"); 1724 default: 1725 return ("AMD Opteron(tm)"); 1726 } 1727 } 1728 1729 return (NULL); 1730 } 1731 1732 static const char * 1733 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 1734 { 1735 if ((x86_feature & X86_CPUID) == 0 || 1736 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 1737 type == X86_TYPE_CYRIX_486) 1738 return ("i486 compatible"); 1739 1740 switch (type) { 1741 case X86_TYPE_CYRIX_6x86: 1742 return ("Cyrix 6x86"); 1743 case X86_TYPE_CYRIX_6x86L: 1744 return ("Cyrix 6x86L"); 1745 case X86_TYPE_CYRIX_6x86MX: 1746 return ("Cyrix 6x86MX"); 1747 case X86_TYPE_CYRIX_GXm: 1748 return ("Cyrix GXm"); 1749 case X86_TYPE_CYRIX_MediaGX: 1750 return ("Cyrix MediaGX"); 1751 case X86_TYPE_CYRIX_MII: 1752 return ("Cyrix M2"); 1753 case X86_TYPE_VIA_CYRIX_III: 1754 return ("VIA Cyrix M3"); 1755 default: 1756 /* 1757 * Have another wild guess .. 1758 */ 1759 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 1760 return ("Cyrix 5x86"); 1761 else if (cpi->cpi_family == 5) { 1762 switch (cpi->cpi_model) { 1763 case 2: 1764 return ("Cyrix 6x86"); /* Cyrix M1 */ 1765 case 4: 1766 return ("Cyrix MediaGX"); 1767 default: 1768 break; 1769 } 1770 } else if (cpi->cpi_family == 6) { 1771 switch (cpi->cpi_model) { 1772 case 0: 1773 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 1774 case 5: 1775 case 6: 1776 case 7: 1777 case 8: 1778 case 9: 1779 return ("VIA C3"); 1780 default: 1781 break; 1782 } 1783 } 1784 break; 1785 } 1786 return (NULL); 1787 } 1788 1789 /* 1790 * This only gets called in the case that the CPU extended 1791 * feature brand string (0x80000002, 0x80000003, 0x80000004) 1792 * aren't available, or contain null bytes for some reason. 1793 */ 1794 static void 1795 fabricate_brandstr(struct cpuid_info *cpi) 1796 { 1797 const char *brand = NULL; 1798 1799 switch (cpi->cpi_vendor) { 1800 case X86_VENDOR_Intel: 1801 brand = intel_cpubrand(cpi); 1802 break; 1803 case X86_VENDOR_AMD: 1804 brand = amd_cpubrand(cpi); 1805 break; 1806 case X86_VENDOR_Cyrix: 1807 brand = cyrix_cpubrand(cpi, x86_type); 1808 break; 1809 case X86_VENDOR_NexGen: 1810 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1811 brand = "NexGen Nx586"; 1812 break; 1813 case X86_VENDOR_Centaur: 1814 if (cpi->cpi_family == 5) 1815 switch (cpi->cpi_model) { 1816 case 4: 1817 brand = "Centaur C6"; 1818 break; 1819 case 8: 1820 brand = "Centaur C2"; 1821 break; 1822 case 9: 1823 brand = "Centaur C3"; 1824 break; 1825 default: 1826 break; 1827 } 1828 break; 1829 case X86_VENDOR_Rise: 1830 if (cpi->cpi_family == 5 && 1831 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 1832 brand = "Rise mP6"; 1833 break; 1834 case X86_VENDOR_SiS: 1835 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 1836 brand = "SiS 55x"; 1837 break; 1838 case X86_VENDOR_TM: 1839 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 1840 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 1841 break; 1842 case X86_VENDOR_NSC: 1843 case X86_VENDOR_UMC: 1844 default: 1845 break; 1846 } 1847 if (brand) { 1848 (void) strcpy((char *)cpi->cpi_brandstr, brand); 1849 return; 1850 } 1851 1852 /* 1853 * If all else fails ... 1854 */ 1855 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 1856 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 1857 cpi->cpi_model, cpi->cpi_step); 1858 } 1859 1860 /* 1861 * This routine is called just after kernel memory allocation 1862 * becomes available on cpu0, and as part of mp_startup() on 1863 * the other cpus. 1864 * 1865 * Fixup the brand string, and collect any information from cpuid 1866 * that requires dynamicically allocated storage to represent. 1867 */ 1868 /*ARGSUSED*/ 1869 void 1870 cpuid_pass3(cpu_t *cpu) 1871 { 1872 int i, max, shft, level, size; 1873 struct cpuid_regs regs; 1874 struct cpuid_regs *cp; 1875 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1876 1877 ASSERT(cpi->cpi_pass == 2); 1878 1879 /* 1880 * Function 4: Deterministic cache parameters 1881 * 1882 * Take this opportunity to detect the number of threads 1883 * sharing the last level cache, and construct a corresponding 1884 * cache id. The respective cpuid_info members are initialized 1885 * to the default case of "no last level cache sharing". 1886 */ 1887 cpi->cpi_ncpu_shr_last_cache = 1; 1888 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 1889 1890 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) { 1891 1892 /* 1893 * Find the # of elements (size) returned by fn 4, and along 1894 * the way detect last level cache sharing details. 1895 */ 1896 bzero(®s, sizeof (regs)); 1897 cp = ®s; 1898 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 1899 cp->cp_eax = 4; 1900 cp->cp_ecx = i; 1901 1902 (void) __cpuid_insn(cp); 1903 1904 if (CPI_CACHE_TYPE(cp) == 0) 1905 break; 1906 level = CPI_CACHE_LVL(cp); 1907 if (level > max) { 1908 max = level; 1909 cpi->cpi_ncpu_shr_last_cache = 1910 CPI_NTHR_SHR_CACHE(cp) + 1; 1911 } 1912 } 1913 cpi->cpi_std_4_size = size = i; 1914 1915 /* 1916 * Allocate the cpi_std_4 array. The first element 1917 * references the regs for fn 4, %ecx == 0, which 1918 * cpuid_pass2() stashed in cpi->cpi_std[4]. 1919 */ 1920 if (size > 0) { 1921 cpi->cpi_std_4 = 1922 kmem_alloc(size * sizeof (cp), KM_SLEEP); 1923 cpi->cpi_std_4[0] = &cpi->cpi_std[4]; 1924 1925 /* 1926 * Allocate storage to hold the additional regs 1927 * for function 4, %ecx == 1 .. cpi_std_4_size. 1928 * 1929 * The regs for fn 4, %ecx == 0 has already 1930 * been allocated as indicated above. 1931 */ 1932 for (i = 1; i < size; i++) { 1933 cp = cpi->cpi_std_4[i] = 1934 kmem_zalloc(sizeof (regs), KM_SLEEP); 1935 cp->cp_eax = 4; 1936 cp->cp_ecx = i; 1937 1938 (void) __cpuid_insn(cp); 1939 } 1940 } 1941 /* 1942 * Determine the number of bits needed to represent 1943 * the number of CPUs sharing the last level cache. 1944 * 1945 * Shift off that number of bits from the APIC id to 1946 * derive the cache id. 1947 */ 1948 shft = 0; 1949 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 1950 shft++; 1951 cpi->cpi_last_lvl_cacheid = CPI_APIC_ID(cpi) >> shft; 1952 } 1953 1954 /* 1955 * Now fixup the brand string 1956 */ 1957 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) { 1958 fabricate_brandstr(cpi); 1959 } else { 1960 1961 /* 1962 * If we successfully extracted a brand string from the cpuid 1963 * instruction, clean it up by removing leading spaces and 1964 * similar junk. 1965 */ 1966 if (cpi->cpi_brandstr[0]) { 1967 size_t maxlen = sizeof (cpi->cpi_brandstr); 1968 char *src, *dst; 1969 1970 dst = src = (char *)cpi->cpi_brandstr; 1971 src[maxlen - 1] = '\0'; 1972 /* 1973 * strip leading spaces 1974 */ 1975 while (*src == ' ') 1976 src++; 1977 /* 1978 * Remove any 'Genuine' or "Authentic" prefixes 1979 */ 1980 if (strncmp(src, "Genuine ", 8) == 0) 1981 src += 8; 1982 if (strncmp(src, "Authentic ", 10) == 0) 1983 src += 10; 1984 1985 /* 1986 * Now do an in-place copy. 1987 * Map (R) to (r) and (TM) to (tm). 1988 * The era of teletypes is long gone, and there's 1989 * -really- no need to shout. 1990 */ 1991 while (*src != '\0') { 1992 if (src[0] == '(') { 1993 if (strncmp(src + 1, "R)", 2) == 0) { 1994 (void) strncpy(dst, "(r)", 3); 1995 src += 3; 1996 dst += 3; 1997 continue; 1998 } 1999 if (strncmp(src + 1, "TM)", 3) == 0) { 2000 (void) strncpy(dst, "(tm)", 4); 2001 src += 4; 2002 dst += 4; 2003 continue; 2004 } 2005 } 2006 *dst++ = *src++; 2007 } 2008 *dst = '\0'; 2009 2010 /* 2011 * Finally, remove any trailing spaces 2012 */ 2013 while (--dst > cpi->cpi_brandstr) 2014 if (*dst == ' ') 2015 *dst = '\0'; 2016 else 2017 break; 2018 } else 2019 fabricate_brandstr(cpi); 2020 } 2021 cpi->cpi_pass = 3; 2022 } 2023 2024 /* 2025 * This routine is called out of bind_hwcap() much later in the life 2026 * of the kernel (post_startup()). The job of this routine is to resolve 2027 * the hardware feature support and kernel support for those features into 2028 * what we're actually going to tell applications via the aux vector. 2029 */ 2030 uint_t 2031 cpuid_pass4(cpu_t *cpu) 2032 { 2033 struct cpuid_info *cpi; 2034 uint_t hwcap_flags = 0; 2035 2036 if (cpu == NULL) 2037 cpu = CPU; 2038 cpi = cpu->cpu_m.mcpu_cpi; 2039 2040 ASSERT(cpi->cpi_pass == 3); 2041 2042 if (cpi->cpi_maxeax >= 1) { 2043 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 2044 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 2045 2046 *edx = CPI_FEATURES_EDX(cpi); 2047 *ecx = CPI_FEATURES_ECX(cpi); 2048 2049 /* 2050 * [these require explicit kernel support] 2051 */ 2052 if ((x86_feature & X86_SEP) == 0) 2053 *edx &= ~CPUID_INTC_EDX_SEP; 2054 2055 if ((x86_feature & X86_SSE) == 0) 2056 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 2057 if ((x86_feature & X86_SSE2) == 0) 2058 *edx &= ~CPUID_INTC_EDX_SSE2; 2059 2060 if ((x86_feature & X86_HTT) == 0) 2061 *edx &= ~CPUID_INTC_EDX_HTT; 2062 2063 if ((x86_feature & X86_SSE3) == 0) 2064 *ecx &= ~CPUID_INTC_ECX_SSE3; 2065 2066 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2067 if ((x86_feature & X86_SSSE3) == 0) 2068 *ecx &= ~CPUID_INTC_ECX_SSSE3; 2069 if ((x86_feature & X86_SSE4_1) == 0) 2070 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 2071 if ((x86_feature & X86_SSE4_2) == 0) 2072 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 2073 } 2074 2075 /* 2076 * [no explicit support required beyond x87 fp context] 2077 */ 2078 if (!fpu_exists) 2079 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 2080 2081 /* 2082 * Now map the supported feature vector to things that we 2083 * think userland will care about. 2084 */ 2085 if (*edx & CPUID_INTC_EDX_SEP) 2086 hwcap_flags |= AV_386_SEP; 2087 if (*edx & CPUID_INTC_EDX_SSE) 2088 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 2089 if (*edx & CPUID_INTC_EDX_SSE2) 2090 hwcap_flags |= AV_386_SSE2; 2091 if (*ecx & CPUID_INTC_ECX_SSE3) 2092 hwcap_flags |= AV_386_SSE3; 2093 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2094 if (*ecx & CPUID_INTC_ECX_SSSE3) 2095 hwcap_flags |= AV_386_SSSE3; 2096 if (*ecx & CPUID_INTC_ECX_SSE4_1) 2097 hwcap_flags |= AV_386_SSE4_1; 2098 if (*ecx & CPUID_INTC_ECX_SSE4_2) 2099 hwcap_flags |= AV_386_SSE4_2; 2100 } 2101 if (*ecx & CPUID_INTC_ECX_POPCNT) 2102 hwcap_flags |= AV_386_POPCNT; 2103 if (*edx & CPUID_INTC_EDX_FPU) 2104 hwcap_flags |= AV_386_FPU; 2105 if (*edx & CPUID_INTC_EDX_MMX) 2106 hwcap_flags |= AV_386_MMX; 2107 2108 if (*edx & CPUID_INTC_EDX_TSC) 2109 hwcap_flags |= AV_386_TSC; 2110 if (*edx & CPUID_INTC_EDX_CX8) 2111 hwcap_flags |= AV_386_CX8; 2112 if (*edx & CPUID_INTC_EDX_CMOV) 2113 hwcap_flags |= AV_386_CMOV; 2114 if (*ecx & CPUID_INTC_ECX_MON) 2115 hwcap_flags |= AV_386_MON; 2116 if (*ecx & CPUID_INTC_ECX_CX16) 2117 hwcap_flags |= AV_386_CX16; 2118 } 2119 2120 if (x86_feature & X86_HTT) 2121 hwcap_flags |= AV_386_PAUSE; 2122 2123 if (cpi->cpi_xmaxeax < 0x80000001) 2124 goto pass4_done; 2125 2126 switch (cpi->cpi_vendor) { 2127 struct cpuid_regs cp; 2128 uint32_t *edx, *ecx; 2129 2130 case X86_VENDOR_Intel: 2131 /* 2132 * Seems like Intel duplicated what we necessary 2133 * here to make the initial crop of 64-bit OS's work. 2134 * Hopefully, those are the only "extended" bits 2135 * they'll add. 2136 */ 2137 /*FALLTHROUGH*/ 2138 2139 case X86_VENDOR_AMD: 2140 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 2141 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 2142 2143 *edx = CPI_FEATURES_XTD_EDX(cpi); 2144 *ecx = CPI_FEATURES_XTD_ECX(cpi); 2145 2146 /* 2147 * [these features require explicit kernel support] 2148 */ 2149 switch (cpi->cpi_vendor) { 2150 case X86_VENDOR_Intel: 2151 break; 2152 2153 case X86_VENDOR_AMD: 2154 if ((x86_feature & X86_TSCP) == 0) 2155 *edx &= ~CPUID_AMD_EDX_TSCP; 2156 if ((x86_feature & X86_SSE4A) == 0) 2157 *ecx &= ~CPUID_AMD_ECX_SSE4A; 2158 break; 2159 2160 default: 2161 break; 2162 } 2163 2164 /* 2165 * [no explicit support required beyond 2166 * x87 fp context and exception handlers] 2167 */ 2168 if (!fpu_exists) 2169 *edx &= ~(CPUID_AMD_EDX_MMXamd | 2170 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 2171 2172 if ((x86_feature & X86_NX) == 0) 2173 *edx &= ~CPUID_AMD_EDX_NX; 2174 #if !defined(__amd64) 2175 *edx &= ~CPUID_AMD_EDX_LM; 2176 #endif 2177 /* 2178 * Now map the supported feature vector to 2179 * things that we think userland will care about. 2180 */ 2181 #if defined(__amd64) 2182 if (*edx & CPUID_AMD_EDX_SYSC) 2183 hwcap_flags |= AV_386_AMD_SYSC; 2184 #endif 2185 if (*edx & CPUID_AMD_EDX_MMXamd) 2186 hwcap_flags |= AV_386_AMD_MMX; 2187 if (*edx & CPUID_AMD_EDX_3DNow) 2188 hwcap_flags |= AV_386_AMD_3DNow; 2189 if (*edx & CPUID_AMD_EDX_3DNowx) 2190 hwcap_flags |= AV_386_AMD_3DNowx; 2191 2192 switch (cpi->cpi_vendor) { 2193 case X86_VENDOR_AMD: 2194 if (*edx & CPUID_AMD_EDX_TSCP) 2195 hwcap_flags |= AV_386_TSCP; 2196 if (*ecx & CPUID_AMD_ECX_AHF64) 2197 hwcap_flags |= AV_386_AHF; 2198 if (*ecx & CPUID_AMD_ECX_SSE4A) 2199 hwcap_flags |= AV_386_AMD_SSE4A; 2200 if (*ecx & CPUID_AMD_ECX_LZCNT) 2201 hwcap_flags |= AV_386_AMD_LZCNT; 2202 break; 2203 2204 case X86_VENDOR_Intel: 2205 /* 2206 * Aarrgh. 2207 * Intel uses a different bit in the same word. 2208 */ 2209 if (*ecx & CPUID_INTC_ECX_AHF64) 2210 hwcap_flags |= AV_386_AHF; 2211 break; 2212 2213 default: 2214 break; 2215 } 2216 break; 2217 2218 case X86_VENDOR_TM: 2219 cp.cp_eax = 0x80860001; 2220 (void) __cpuid_insn(&cp); 2221 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 2222 break; 2223 2224 default: 2225 break; 2226 } 2227 2228 pass4_done: 2229 cpi->cpi_pass = 4; 2230 return (hwcap_flags); 2231 } 2232 2233 2234 /* 2235 * Simulate the cpuid instruction using the data we previously 2236 * captured about this CPU. We try our best to return the truth 2237 * about the hardware, independently of kernel support. 2238 */ 2239 uint32_t 2240 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 2241 { 2242 struct cpuid_info *cpi; 2243 struct cpuid_regs *xcp; 2244 2245 if (cpu == NULL) 2246 cpu = CPU; 2247 cpi = cpu->cpu_m.mcpu_cpi; 2248 2249 ASSERT(cpuid_checkpass(cpu, 3)); 2250 2251 /* 2252 * CPUID data is cached in two separate places: cpi_std for standard 2253 * CPUID functions, and cpi_extd for extended CPUID functions. 2254 */ 2255 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) 2256 xcp = &cpi->cpi_std[cp->cp_eax]; 2257 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax && 2258 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD) 2259 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000]; 2260 else 2261 /* 2262 * The caller is asking for data from an input parameter which 2263 * the kernel has not cached. In this case we go fetch from 2264 * the hardware and return the data directly to the user. 2265 */ 2266 return (__cpuid_insn(cp)); 2267 2268 cp->cp_eax = xcp->cp_eax; 2269 cp->cp_ebx = xcp->cp_ebx; 2270 cp->cp_ecx = xcp->cp_ecx; 2271 cp->cp_edx = xcp->cp_edx; 2272 return (cp->cp_eax); 2273 } 2274 2275 int 2276 cpuid_checkpass(cpu_t *cpu, int pass) 2277 { 2278 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 2279 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 2280 } 2281 2282 int 2283 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 2284 { 2285 ASSERT(cpuid_checkpass(cpu, 3)); 2286 2287 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 2288 } 2289 2290 int 2291 cpuid_is_cmt(cpu_t *cpu) 2292 { 2293 if (cpu == NULL) 2294 cpu = CPU; 2295 2296 ASSERT(cpuid_checkpass(cpu, 1)); 2297 2298 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 2299 } 2300 2301 /* 2302 * AMD and Intel both implement the 64-bit variant of the syscall 2303 * instruction (syscallq), so if there's -any- support for syscall, 2304 * cpuid currently says "yes, we support this". 2305 * 2306 * However, Intel decided to -not- implement the 32-bit variant of the 2307 * syscall instruction, so we provide a predicate to allow our caller 2308 * to test that subtlety here. 2309 * 2310 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 2311 * even in the case where the hardware would in fact support it. 2312 */ 2313 /*ARGSUSED*/ 2314 int 2315 cpuid_syscall32_insn(cpu_t *cpu) 2316 { 2317 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 2318 2319 #if !defined(__xpv) 2320 if (cpu == NULL) 2321 cpu = CPU; 2322 2323 /*CSTYLED*/ 2324 { 2325 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2326 2327 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2328 cpi->cpi_xmaxeax >= 0x80000001 && 2329 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 2330 return (1); 2331 } 2332 #endif 2333 return (0); 2334 } 2335 2336 int 2337 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 2338 { 2339 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2340 2341 static const char fmt[] = 2342 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 2343 static const char fmt_ht[] = 2344 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 2345 2346 ASSERT(cpuid_checkpass(cpu, 1)); 2347 2348 if (cpuid_is_cmt(cpu)) 2349 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 2350 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2351 cpi->cpi_family, cpi->cpi_model, 2352 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2353 return (snprintf(s, n, fmt, 2354 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 2355 cpi->cpi_family, cpi->cpi_model, 2356 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 2357 } 2358 2359 const char * 2360 cpuid_getvendorstr(cpu_t *cpu) 2361 { 2362 ASSERT(cpuid_checkpass(cpu, 1)); 2363 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 2364 } 2365 2366 uint_t 2367 cpuid_getvendor(cpu_t *cpu) 2368 { 2369 ASSERT(cpuid_checkpass(cpu, 1)); 2370 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 2371 } 2372 2373 uint_t 2374 cpuid_getfamily(cpu_t *cpu) 2375 { 2376 ASSERT(cpuid_checkpass(cpu, 1)); 2377 return (cpu->cpu_m.mcpu_cpi->cpi_family); 2378 } 2379 2380 uint_t 2381 cpuid_getmodel(cpu_t *cpu) 2382 { 2383 ASSERT(cpuid_checkpass(cpu, 1)); 2384 return (cpu->cpu_m.mcpu_cpi->cpi_model); 2385 } 2386 2387 uint_t 2388 cpuid_get_ncpu_per_chip(cpu_t *cpu) 2389 { 2390 ASSERT(cpuid_checkpass(cpu, 1)); 2391 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 2392 } 2393 2394 uint_t 2395 cpuid_get_ncore_per_chip(cpu_t *cpu) 2396 { 2397 ASSERT(cpuid_checkpass(cpu, 1)); 2398 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 2399 } 2400 2401 uint_t 2402 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 2403 { 2404 ASSERT(cpuid_checkpass(cpu, 2)); 2405 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 2406 } 2407 2408 id_t 2409 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 2410 { 2411 ASSERT(cpuid_checkpass(cpu, 2)); 2412 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 2413 } 2414 2415 uint_t 2416 cpuid_getstep(cpu_t *cpu) 2417 { 2418 ASSERT(cpuid_checkpass(cpu, 1)); 2419 return (cpu->cpu_m.mcpu_cpi->cpi_step); 2420 } 2421 2422 uint_t 2423 cpuid_getsig(struct cpu *cpu) 2424 { 2425 ASSERT(cpuid_checkpass(cpu, 1)); 2426 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 2427 } 2428 2429 uint32_t 2430 cpuid_getchiprev(struct cpu *cpu) 2431 { 2432 ASSERT(cpuid_checkpass(cpu, 1)); 2433 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 2434 } 2435 2436 const char * 2437 cpuid_getchiprevstr(struct cpu *cpu) 2438 { 2439 ASSERT(cpuid_checkpass(cpu, 1)); 2440 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 2441 } 2442 2443 uint32_t 2444 cpuid_getsockettype(struct cpu *cpu) 2445 { 2446 ASSERT(cpuid_checkpass(cpu, 1)); 2447 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 2448 } 2449 2450 int 2451 cpuid_get_chipid(cpu_t *cpu) 2452 { 2453 ASSERT(cpuid_checkpass(cpu, 1)); 2454 2455 if (cpuid_is_cmt(cpu)) 2456 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 2457 return (cpu->cpu_id); 2458 } 2459 2460 id_t 2461 cpuid_get_coreid(cpu_t *cpu) 2462 { 2463 ASSERT(cpuid_checkpass(cpu, 1)); 2464 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 2465 } 2466 2467 int 2468 cpuid_get_clogid(cpu_t *cpu) 2469 { 2470 ASSERT(cpuid_checkpass(cpu, 1)); 2471 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 2472 } 2473 2474 void 2475 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 2476 { 2477 struct cpuid_info *cpi; 2478 2479 if (cpu == NULL) 2480 cpu = CPU; 2481 cpi = cpu->cpu_m.mcpu_cpi; 2482 2483 ASSERT(cpuid_checkpass(cpu, 1)); 2484 2485 if (pabits) 2486 *pabits = cpi->cpi_pabits; 2487 if (vabits) 2488 *vabits = cpi->cpi_vabits; 2489 } 2490 2491 /* 2492 * Returns the number of data TLB entries for a corresponding 2493 * pagesize. If it can't be computed, or isn't known, the 2494 * routine returns zero. If you ask about an architecturally 2495 * impossible pagesize, the routine will panic (so that the 2496 * hat implementor knows that things are inconsistent.) 2497 */ 2498 uint_t 2499 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 2500 { 2501 struct cpuid_info *cpi; 2502 uint_t dtlb_nent = 0; 2503 2504 if (cpu == NULL) 2505 cpu = CPU; 2506 cpi = cpu->cpu_m.mcpu_cpi; 2507 2508 ASSERT(cpuid_checkpass(cpu, 1)); 2509 2510 /* 2511 * Check the L2 TLB info 2512 */ 2513 if (cpi->cpi_xmaxeax >= 0x80000006) { 2514 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 2515 2516 switch (pagesize) { 2517 2518 case 4 * 1024: 2519 /* 2520 * All zero in the top 16 bits of the register 2521 * indicates a unified TLB. Size is in low 16 bits. 2522 */ 2523 if ((cp->cp_ebx & 0xffff0000) == 0) 2524 dtlb_nent = cp->cp_ebx & 0x0000ffff; 2525 else 2526 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 2527 break; 2528 2529 case 2 * 1024 * 1024: 2530 if ((cp->cp_eax & 0xffff0000) == 0) 2531 dtlb_nent = cp->cp_eax & 0x0000ffff; 2532 else 2533 dtlb_nent = BITX(cp->cp_eax, 27, 16); 2534 break; 2535 2536 default: 2537 panic("unknown L2 pagesize"); 2538 /*NOTREACHED*/ 2539 } 2540 } 2541 2542 if (dtlb_nent != 0) 2543 return (dtlb_nent); 2544 2545 /* 2546 * No L2 TLB support for this size, try L1. 2547 */ 2548 if (cpi->cpi_xmaxeax >= 0x80000005) { 2549 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 2550 2551 switch (pagesize) { 2552 case 4 * 1024: 2553 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 2554 break; 2555 case 2 * 1024 * 1024: 2556 dtlb_nent = BITX(cp->cp_eax, 23, 16); 2557 break; 2558 default: 2559 panic("unknown L1 d-TLB pagesize"); 2560 /*NOTREACHED*/ 2561 } 2562 } 2563 2564 return (dtlb_nent); 2565 } 2566 2567 /* 2568 * Return 0 if the erratum is not present or not applicable, positive 2569 * if it is, and negative if the status of the erratum is unknown. 2570 * 2571 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 2572 * Processors" #25759, Rev 3.57, August 2005 2573 */ 2574 int 2575 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 2576 { 2577 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2578 uint_t eax; 2579 2580 /* 2581 * Bail out if this CPU isn't an AMD CPU, or if it's 2582 * a legacy (32-bit) AMD CPU. 2583 */ 2584 if (cpi->cpi_vendor != X86_VENDOR_AMD || 2585 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 2586 cpi->cpi_family == 6) 2587 2588 return (0); 2589 2590 eax = cpi->cpi_std[1].cp_eax; 2591 2592 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 2593 #define SH_B3(eax) (eax == 0xf51) 2594 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 2595 2596 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 2597 2598 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 2599 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 2600 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 2601 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 2602 2603 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 2604 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 2605 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 2606 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 2607 2608 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 2609 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 2610 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 2611 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 2612 #define BH_E4(eax) (eax == 0x20fb1) 2613 #define SH_E5(eax) (eax == 0x20f42) 2614 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 2615 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 2616 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 2617 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 2618 DH_E6(eax) || JH_E6(eax)) 2619 2620 switch (erratum) { 2621 case 1: 2622 return (cpi->cpi_family < 0x10); 2623 case 51: /* what does the asterisk mean? */ 2624 return (B(eax) || SH_C0(eax) || CG(eax)); 2625 case 52: 2626 return (B(eax)); 2627 case 57: 2628 return (cpi->cpi_family <= 0x10); 2629 case 58: 2630 return (B(eax)); 2631 case 60: 2632 return (cpi->cpi_family <= 0x10); 2633 case 61: 2634 case 62: 2635 case 63: 2636 case 64: 2637 case 65: 2638 case 66: 2639 case 68: 2640 case 69: 2641 case 70: 2642 case 71: 2643 return (B(eax)); 2644 case 72: 2645 return (SH_B0(eax)); 2646 case 74: 2647 return (B(eax)); 2648 case 75: 2649 return (cpi->cpi_family < 0x10); 2650 case 76: 2651 return (B(eax)); 2652 case 77: 2653 return (cpi->cpi_family <= 0x10); 2654 case 78: 2655 return (B(eax) || SH_C0(eax)); 2656 case 79: 2657 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2658 case 80: 2659 case 81: 2660 case 82: 2661 return (B(eax)); 2662 case 83: 2663 return (B(eax) || SH_C0(eax) || CG(eax)); 2664 case 85: 2665 return (cpi->cpi_family < 0x10); 2666 case 86: 2667 return (SH_C0(eax) || CG(eax)); 2668 case 88: 2669 #if !defined(__amd64) 2670 return (0); 2671 #else 2672 return (B(eax) || SH_C0(eax)); 2673 #endif 2674 case 89: 2675 return (cpi->cpi_family < 0x10); 2676 case 90: 2677 return (B(eax) || SH_C0(eax) || CG(eax)); 2678 case 91: 2679 case 92: 2680 return (B(eax) || SH_C0(eax)); 2681 case 93: 2682 return (SH_C0(eax)); 2683 case 94: 2684 return (B(eax) || SH_C0(eax) || CG(eax)); 2685 case 95: 2686 #if !defined(__amd64) 2687 return (0); 2688 #else 2689 return (B(eax) || SH_C0(eax)); 2690 #endif 2691 case 96: 2692 return (B(eax) || SH_C0(eax) || CG(eax)); 2693 case 97: 2694 case 98: 2695 return (SH_C0(eax) || CG(eax)); 2696 case 99: 2697 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2698 case 100: 2699 return (B(eax) || SH_C0(eax)); 2700 case 101: 2701 case 103: 2702 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2703 case 104: 2704 return (SH_C0(eax) || CG(eax) || D0(eax)); 2705 case 105: 2706 case 106: 2707 case 107: 2708 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2709 case 108: 2710 return (DH_CG(eax)); 2711 case 109: 2712 return (SH_C0(eax) || CG(eax) || D0(eax)); 2713 case 110: 2714 return (D0(eax) || EX(eax)); 2715 case 111: 2716 return (CG(eax)); 2717 case 112: 2718 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2719 case 113: 2720 return (eax == 0x20fc0); 2721 case 114: 2722 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2723 case 115: 2724 return (SH_E0(eax) || JH_E1(eax)); 2725 case 116: 2726 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 2727 case 117: 2728 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 2729 case 118: 2730 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 2731 JH_E6(eax)); 2732 case 121: 2733 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 2734 case 122: 2735 return (cpi->cpi_family < 0x10); 2736 case 123: 2737 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 2738 case 131: 2739 return (cpi->cpi_family < 0x10); 2740 case 6336786: 2741 /* 2742 * Test for AdvPowerMgmtInfo.TscPStateInvariant 2743 * if this is a K8 family or newer processor 2744 */ 2745 if (CPI_FAMILY(cpi) == 0xf) { 2746 struct cpuid_regs regs; 2747 regs.cp_eax = 0x80000007; 2748 (void) __cpuid_insn(®s); 2749 return (!(regs.cp_edx & 0x100)); 2750 } 2751 return (0); 2752 case 6323525: 2753 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 2754 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 2755 2756 default: 2757 return (-1); 2758 } 2759 } 2760 2761 static const char assoc_str[] = "associativity"; 2762 static const char line_str[] = "line-size"; 2763 static const char size_str[] = "size"; 2764 2765 static void 2766 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 2767 uint32_t val) 2768 { 2769 char buf[128]; 2770 2771 /* 2772 * ndi_prop_update_int() is used because it is desirable for 2773 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 2774 */ 2775 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 2776 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 2777 } 2778 2779 /* 2780 * Intel-style cache/tlb description 2781 * 2782 * Standard cpuid level 2 gives a randomly ordered 2783 * selection of tags that index into a table that describes 2784 * cache and tlb properties. 2785 */ 2786 2787 static const char l1_icache_str[] = "l1-icache"; 2788 static const char l1_dcache_str[] = "l1-dcache"; 2789 static const char l2_cache_str[] = "l2-cache"; 2790 static const char l3_cache_str[] = "l3-cache"; 2791 static const char itlb4k_str[] = "itlb-4K"; 2792 static const char dtlb4k_str[] = "dtlb-4K"; 2793 static const char itlb4M_str[] = "itlb-4M"; 2794 static const char dtlb4M_str[] = "dtlb-4M"; 2795 static const char itlb424_str[] = "itlb-4K-2M-4M"; 2796 static const char dtlb44_str[] = "dtlb-4K-4M"; 2797 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 2798 static const char sl2_cache_str[] = "sectored-l2-cache"; 2799 static const char itrace_str[] = "itrace-cache"; 2800 static const char sl3_cache_str[] = "sectored-l3-cache"; 2801 2802 static const struct cachetab { 2803 uint8_t ct_code; 2804 uint8_t ct_assoc; 2805 uint16_t ct_line_size; 2806 size_t ct_size; 2807 const char *ct_label; 2808 } intel_ctab[] = { 2809 /* maintain descending order! */ 2810 { 0xb4, 4, 0, 256, dtlb4k_str }, 2811 { 0xb3, 4, 0, 128, dtlb4k_str }, 2812 { 0xb0, 4, 0, 128, itlb4k_str }, 2813 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 2814 { 0x86, 4, 64, 512*1024, l2_cache_str}, 2815 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 2816 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 2817 { 0x83, 8, 32, 512*1024, l2_cache_str}, 2818 { 0x82, 8, 32, 256*1024, l2_cache_str}, 2819 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 2820 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 2821 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 2822 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 2823 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 2824 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 2825 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 2826 { 0x73, 8, 0, 64*1024, itrace_str}, 2827 { 0x72, 8, 0, 32*1024, itrace_str}, 2828 { 0x71, 8, 0, 16*1024, itrace_str}, 2829 { 0x70, 8, 0, 12*1024, itrace_str}, 2830 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 2831 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 2832 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 2833 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 2834 { 0x5d, 0, 0, 256, dtlb44_str}, 2835 { 0x5c, 0, 0, 128, dtlb44_str}, 2836 { 0x5b, 0, 0, 64, dtlb44_str}, 2837 { 0x52, 0, 0, 256, itlb424_str}, 2838 { 0x51, 0, 0, 128, itlb424_str}, 2839 { 0x50, 0, 0, 64, itlb424_str}, 2840 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 2841 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 2842 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 2843 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 2844 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 2845 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 2846 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 2847 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 2848 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 2849 { 0x43, 4, 32, 512*1024, l2_cache_str}, 2850 { 0x42, 4, 32, 256*1024, l2_cache_str}, 2851 { 0x41, 4, 32, 128*1024, l2_cache_str}, 2852 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 2853 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 2854 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 2855 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 2856 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 2857 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 2858 { 0x30, 8, 64, 32*1024, l1_icache_str}, 2859 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 2860 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 2861 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 2862 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 2863 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 2864 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 2865 { 0x0b, 4, 0, 4, itlb4M_str}, 2866 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 2867 { 0x08, 4, 32, 16*1024, l1_icache_str}, 2868 { 0x06, 4, 32, 8*1024, l1_icache_str}, 2869 { 0x04, 4, 0, 8, dtlb4M_str}, 2870 { 0x03, 4, 0, 64, dtlb4k_str}, 2871 { 0x02, 4, 0, 2, itlb4M_str}, 2872 { 0x01, 4, 0, 32, itlb4k_str}, 2873 { 0 } 2874 }; 2875 2876 static const struct cachetab cyrix_ctab[] = { 2877 { 0x70, 4, 0, 32, "tlb-4K" }, 2878 { 0x80, 4, 16, 16*1024, "l1-cache" }, 2879 { 0 } 2880 }; 2881 2882 /* 2883 * Search a cache table for a matching entry 2884 */ 2885 static const struct cachetab * 2886 find_cacheent(const struct cachetab *ct, uint_t code) 2887 { 2888 if (code != 0) { 2889 for (; ct->ct_code != 0; ct++) 2890 if (ct->ct_code <= code) 2891 break; 2892 if (ct->ct_code == code) 2893 return (ct); 2894 } 2895 return (NULL); 2896 } 2897 2898 /* 2899 * Populate cachetab entry with L2 or L3 cache-information using 2900 * cpuid function 4. This function is called from intel_walk_cacheinfo() 2901 * when descriptor 0x49 is encountered. It returns 0 if no such cache 2902 * information is found. 2903 */ 2904 static int 2905 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 2906 { 2907 uint32_t level, i; 2908 int ret = 0; 2909 2910 for (i = 0; i < cpi->cpi_std_4_size; i++) { 2911 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]); 2912 2913 if (level == 2 || level == 3) { 2914 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1; 2915 ct->ct_line_size = 2916 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1; 2917 ct->ct_size = ct->ct_assoc * 2918 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) * 2919 ct->ct_line_size * 2920 (cpi->cpi_std_4[i]->cp_ecx + 1); 2921 2922 if (level == 2) { 2923 ct->ct_label = l2_cache_str; 2924 } else if (level == 3) { 2925 ct->ct_label = l3_cache_str; 2926 } 2927 ret = 1; 2928 } 2929 } 2930 2931 return (ret); 2932 } 2933 2934 /* 2935 * Walk the cacheinfo descriptor, applying 'func' to every valid element 2936 * The walk is terminated if the walker returns non-zero. 2937 */ 2938 static void 2939 intel_walk_cacheinfo(struct cpuid_info *cpi, 2940 void *arg, int (*func)(void *, const struct cachetab *)) 2941 { 2942 const struct cachetab *ct; 2943 struct cachetab des_49_ct; 2944 uint8_t *dp; 2945 int i; 2946 2947 if ((dp = cpi->cpi_cacheinfo) == NULL) 2948 return; 2949 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2950 /* 2951 * For overloaded descriptor 0x49 we use cpuid function 4 2952 * if supported by the current processor, to create 2953 * cache information. 2954 */ 2955 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 2956 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 2957 ct = &des_49_ct; 2958 } else { 2959 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 2960 continue; 2961 } 2962 } 2963 2964 if (func(arg, ct) != 0) { 2965 break; 2966 } 2967 } 2968 } 2969 2970 /* 2971 * (Like the Intel one, except for Cyrix CPUs) 2972 */ 2973 static void 2974 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 2975 void *arg, int (*func)(void *, const struct cachetab *)) 2976 { 2977 const struct cachetab *ct; 2978 uint8_t *dp; 2979 int i; 2980 2981 if ((dp = cpi->cpi_cacheinfo) == NULL) 2982 return; 2983 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 2984 /* 2985 * Search Cyrix-specific descriptor table first .. 2986 */ 2987 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 2988 if (func(arg, ct) != 0) 2989 break; 2990 continue; 2991 } 2992 /* 2993 * .. else fall back to the Intel one 2994 */ 2995 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 2996 if (func(arg, ct) != 0) 2997 break; 2998 continue; 2999 } 3000 } 3001 } 3002 3003 /* 3004 * A cacheinfo walker that adds associativity, line-size, and size properties 3005 * to the devinfo node it is passed as an argument. 3006 */ 3007 static int 3008 add_cacheent_props(void *arg, const struct cachetab *ct) 3009 { 3010 dev_info_t *devi = arg; 3011 3012 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 3013 if (ct->ct_line_size != 0) 3014 add_cache_prop(devi, ct->ct_label, line_str, 3015 ct->ct_line_size); 3016 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 3017 return (0); 3018 } 3019 3020 3021 static const char fully_assoc[] = "fully-associative?"; 3022 3023 /* 3024 * AMD style cache/tlb description 3025 * 3026 * Extended functions 5 and 6 directly describe properties of 3027 * tlbs and various cache levels. 3028 */ 3029 static void 3030 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3031 { 3032 switch (assoc) { 3033 case 0: /* reserved; ignore */ 3034 break; 3035 default: 3036 add_cache_prop(devi, label, assoc_str, assoc); 3037 break; 3038 case 0xff: 3039 add_cache_prop(devi, label, fully_assoc, 1); 3040 break; 3041 } 3042 } 3043 3044 static void 3045 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3046 { 3047 if (size == 0) 3048 return; 3049 add_cache_prop(devi, label, size_str, size); 3050 add_amd_assoc(devi, label, assoc); 3051 } 3052 3053 static void 3054 add_amd_cache(dev_info_t *devi, const char *label, 3055 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3056 { 3057 if (size == 0 || line_size == 0) 3058 return; 3059 add_amd_assoc(devi, label, assoc); 3060 /* 3061 * Most AMD parts have a sectored cache. Multiple cache lines are 3062 * associated with each tag. A sector consists of all cache lines 3063 * associated with a tag. For example, the AMD K6-III has a sector 3064 * size of 2 cache lines per tag. 3065 */ 3066 if (lines_per_tag != 0) 3067 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3068 add_cache_prop(devi, label, line_str, line_size); 3069 add_cache_prop(devi, label, size_str, size * 1024); 3070 } 3071 3072 static void 3073 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 3074 { 3075 switch (assoc) { 3076 case 0: /* off */ 3077 break; 3078 case 1: 3079 case 2: 3080 case 4: 3081 add_cache_prop(devi, label, assoc_str, assoc); 3082 break; 3083 case 6: 3084 add_cache_prop(devi, label, assoc_str, 8); 3085 break; 3086 case 8: 3087 add_cache_prop(devi, label, assoc_str, 16); 3088 break; 3089 case 0xf: 3090 add_cache_prop(devi, label, fully_assoc, 1); 3091 break; 3092 default: /* reserved; ignore */ 3093 break; 3094 } 3095 } 3096 3097 static void 3098 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 3099 { 3100 if (size == 0 || assoc == 0) 3101 return; 3102 add_amd_l2_assoc(devi, label, assoc); 3103 add_cache_prop(devi, label, size_str, size); 3104 } 3105 3106 static void 3107 add_amd_l2_cache(dev_info_t *devi, const char *label, 3108 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 3109 { 3110 if (size == 0 || assoc == 0 || line_size == 0) 3111 return; 3112 add_amd_l2_assoc(devi, label, assoc); 3113 if (lines_per_tag != 0) 3114 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 3115 add_cache_prop(devi, label, line_str, line_size); 3116 add_cache_prop(devi, label, size_str, size * 1024); 3117 } 3118 3119 static void 3120 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 3121 { 3122 struct cpuid_regs *cp; 3123 3124 if (cpi->cpi_xmaxeax < 0x80000005) 3125 return; 3126 cp = &cpi->cpi_extd[5]; 3127 3128 /* 3129 * 4M/2M L1 TLB configuration 3130 * 3131 * We report the size for 2M pages because AMD uses two 3132 * TLB entries for one 4M page. 3133 */ 3134 add_amd_tlb(devi, "dtlb-2M", 3135 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 3136 add_amd_tlb(devi, "itlb-2M", 3137 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 3138 3139 /* 3140 * 4K L1 TLB configuration 3141 */ 3142 3143 switch (cpi->cpi_vendor) { 3144 uint_t nentries; 3145 case X86_VENDOR_TM: 3146 if (cpi->cpi_family >= 5) { 3147 /* 3148 * Crusoe processors have 256 TLB entries, but 3149 * cpuid data format constrains them to only 3150 * reporting 255 of them. 3151 */ 3152 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 3153 nentries = 256; 3154 /* 3155 * Crusoe processors also have a unified TLB 3156 */ 3157 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 3158 nentries); 3159 break; 3160 } 3161 /*FALLTHROUGH*/ 3162 default: 3163 add_amd_tlb(devi, itlb4k_str, 3164 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 3165 add_amd_tlb(devi, dtlb4k_str, 3166 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 3167 break; 3168 } 3169 3170 /* 3171 * data L1 cache configuration 3172 */ 3173 3174 add_amd_cache(devi, l1_dcache_str, 3175 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 3176 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 3177 3178 /* 3179 * code L1 cache configuration 3180 */ 3181 3182 add_amd_cache(devi, l1_icache_str, 3183 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 3184 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 3185 3186 if (cpi->cpi_xmaxeax < 0x80000006) 3187 return; 3188 cp = &cpi->cpi_extd[6]; 3189 3190 /* Check for a unified L2 TLB for large pages */ 3191 3192 if (BITX(cp->cp_eax, 31, 16) == 0) 3193 add_amd_l2_tlb(devi, "l2-tlb-2M", 3194 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3195 else { 3196 add_amd_l2_tlb(devi, "l2-dtlb-2M", 3197 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3198 add_amd_l2_tlb(devi, "l2-itlb-2M", 3199 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3200 } 3201 3202 /* Check for a unified L2 TLB for 4K pages */ 3203 3204 if (BITX(cp->cp_ebx, 31, 16) == 0) { 3205 add_amd_l2_tlb(devi, "l2-tlb-4K", 3206 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3207 } else { 3208 add_amd_l2_tlb(devi, "l2-dtlb-4K", 3209 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 3210 add_amd_l2_tlb(devi, "l2-itlb-4K", 3211 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 3212 } 3213 3214 add_amd_l2_cache(devi, l2_cache_str, 3215 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 3216 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 3217 } 3218 3219 /* 3220 * There are two basic ways that the x86 world describes it cache 3221 * and tlb architecture - Intel's way and AMD's way. 3222 * 3223 * Return which flavor of cache architecture we should use 3224 */ 3225 static int 3226 x86_which_cacheinfo(struct cpuid_info *cpi) 3227 { 3228 switch (cpi->cpi_vendor) { 3229 case X86_VENDOR_Intel: 3230 if (cpi->cpi_maxeax >= 2) 3231 return (X86_VENDOR_Intel); 3232 break; 3233 case X86_VENDOR_AMD: 3234 /* 3235 * The K5 model 1 was the first part from AMD that reported 3236 * cache sizes via extended cpuid functions. 3237 */ 3238 if (cpi->cpi_family > 5 || 3239 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3240 return (X86_VENDOR_AMD); 3241 break; 3242 case X86_VENDOR_TM: 3243 if (cpi->cpi_family >= 5) 3244 return (X86_VENDOR_AMD); 3245 /*FALLTHROUGH*/ 3246 default: 3247 /* 3248 * If they have extended CPU data for 0x80000005 3249 * then we assume they have AMD-format cache 3250 * information. 3251 * 3252 * If not, and the vendor happens to be Cyrix, 3253 * then try our-Cyrix specific handler. 3254 * 3255 * If we're not Cyrix, then assume we're using Intel's 3256 * table-driven format instead. 3257 */ 3258 if (cpi->cpi_xmaxeax >= 0x80000005) 3259 return (X86_VENDOR_AMD); 3260 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 3261 return (X86_VENDOR_Cyrix); 3262 else if (cpi->cpi_maxeax >= 2) 3263 return (X86_VENDOR_Intel); 3264 break; 3265 } 3266 return (-1); 3267 } 3268 3269 /* 3270 * create a node for the given cpu under the prom root node. 3271 * Also, create a cpu node in the device tree. 3272 */ 3273 static dev_info_t *cpu_nex_devi = NULL; 3274 static kmutex_t cpu_node_lock; 3275 3276 /* 3277 * Called from post_startup() and mp_startup() 3278 */ 3279 void 3280 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi) 3281 { 3282 dev_info_t *cpu_devi; 3283 int create; 3284 3285 mutex_enter(&cpu_node_lock); 3286 3287 /* 3288 * create a nexus node for all cpus identified as 'cpu_id' under 3289 * the root node. 3290 */ 3291 if (cpu_nex_devi == NULL) { 3292 if (ndi_devi_alloc(ddi_root_node(), "cpus", 3293 (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) { 3294 mutex_exit(&cpu_node_lock); 3295 return; 3296 } 3297 (void) ndi_devi_online(cpu_nex_devi, 0); 3298 } 3299 3300 /* 3301 * create a child node for cpu identified as 'cpu_id' 3302 */ 3303 cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID, 3304 cpu_id); 3305 if (cpu_devi == NULL) { 3306 mutex_exit(&cpu_node_lock); 3307 return; 3308 } 3309 3310 /* device_type */ 3311 3312 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3313 "device_type", "cpu"); 3314 3315 /* reg */ 3316 3317 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3318 "reg", cpu_id); 3319 3320 /* cpu-mhz, and clock-frequency */ 3321 3322 if (cpu_freq > 0) { 3323 long long mul; 3324 3325 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3326 "cpu-mhz", cpu_freq); 3327 3328 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 3329 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3330 "clock-frequency", (int)mul); 3331 } 3332 3333 (void) ndi_devi_online(cpu_devi, 0); 3334 3335 if ((x86_feature & X86_CPUID) == 0) { 3336 mutex_exit(&cpu_node_lock); 3337 return; 3338 } 3339 3340 /* vendor-id */ 3341 3342 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3343 "vendor-id", cpi->cpi_vendorstr); 3344 3345 if (cpi->cpi_maxeax == 0) { 3346 mutex_exit(&cpu_node_lock); 3347 return; 3348 } 3349 3350 /* 3351 * family, model, and step 3352 */ 3353 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3354 "family", CPI_FAMILY(cpi)); 3355 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3356 "cpu-model", CPI_MODEL(cpi)); 3357 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3358 "stepping-id", CPI_STEP(cpi)); 3359 3360 /* type */ 3361 3362 switch (cpi->cpi_vendor) { 3363 case X86_VENDOR_Intel: 3364 create = 1; 3365 break; 3366 default: 3367 create = 0; 3368 break; 3369 } 3370 if (create) 3371 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3372 "type", CPI_TYPE(cpi)); 3373 3374 /* ext-family */ 3375 3376 switch (cpi->cpi_vendor) { 3377 case X86_VENDOR_Intel: 3378 case X86_VENDOR_AMD: 3379 create = cpi->cpi_family >= 0xf; 3380 break; 3381 default: 3382 create = 0; 3383 break; 3384 } 3385 if (create) 3386 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3387 "ext-family", CPI_FAMILY_XTD(cpi)); 3388 3389 /* ext-model */ 3390 3391 switch (cpi->cpi_vendor) { 3392 case X86_VENDOR_Intel: 3393 create = CPI_MODEL(cpi) == 0xf; 3394 break; 3395 case X86_VENDOR_AMD: 3396 create = CPI_FAMILY(cpi) == 0xf; 3397 break; 3398 default: 3399 create = 0; 3400 break; 3401 } 3402 if (create) 3403 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3404 "ext-model", CPI_MODEL_XTD(cpi)); 3405 3406 /* generation */ 3407 3408 switch (cpi->cpi_vendor) { 3409 case X86_VENDOR_AMD: 3410 /* 3411 * AMD K5 model 1 was the first part to support this 3412 */ 3413 create = cpi->cpi_xmaxeax >= 0x80000001; 3414 break; 3415 default: 3416 create = 0; 3417 break; 3418 } 3419 if (create) 3420 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3421 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 3422 3423 /* brand-id */ 3424 3425 switch (cpi->cpi_vendor) { 3426 case X86_VENDOR_Intel: 3427 /* 3428 * brand id first appeared on Pentium III Xeon model 8, 3429 * and Celeron model 8 processors and Opteron 3430 */ 3431 create = cpi->cpi_family > 6 || 3432 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 3433 break; 3434 case X86_VENDOR_AMD: 3435 create = cpi->cpi_family >= 0xf; 3436 break; 3437 default: 3438 create = 0; 3439 break; 3440 } 3441 if (create && cpi->cpi_brandid != 0) { 3442 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3443 "brand-id", cpi->cpi_brandid); 3444 } 3445 3446 /* chunks, and apic-id */ 3447 3448 switch (cpi->cpi_vendor) { 3449 /* 3450 * first available on Pentium IV and Opteron (K8) 3451 */ 3452 case X86_VENDOR_Intel: 3453 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3454 break; 3455 case X86_VENDOR_AMD: 3456 create = cpi->cpi_family >= 0xf; 3457 break; 3458 default: 3459 create = 0; 3460 break; 3461 } 3462 if (create) { 3463 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3464 "chunks", CPI_CHUNKS(cpi)); 3465 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3466 "apic-id", CPI_APIC_ID(cpi)); 3467 if (cpi->cpi_chipid >= 0) { 3468 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3469 "chip#", cpi->cpi_chipid); 3470 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3471 "clog#", cpi->cpi_clogid); 3472 } 3473 } 3474 3475 /* cpuid-features */ 3476 3477 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3478 "cpuid-features", CPI_FEATURES_EDX(cpi)); 3479 3480 3481 /* cpuid-features-ecx */ 3482 3483 switch (cpi->cpi_vendor) { 3484 case X86_VENDOR_Intel: 3485 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 3486 break; 3487 default: 3488 create = 0; 3489 break; 3490 } 3491 if (create) 3492 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3493 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 3494 3495 /* ext-cpuid-features */ 3496 3497 switch (cpi->cpi_vendor) { 3498 case X86_VENDOR_Intel: 3499 case X86_VENDOR_AMD: 3500 case X86_VENDOR_Cyrix: 3501 case X86_VENDOR_TM: 3502 case X86_VENDOR_Centaur: 3503 create = cpi->cpi_xmaxeax >= 0x80000001; 3504 break; 3505 default: 3506 create = 0; 3507 break; 3508 } 3509 if (create) { 3510 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3511 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 3512 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 3513 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 3514 } 3515 3516 /* 3517 * Brand String first appeared in Intel Pentium IV, AMD K5 3518 * model 1, and Cyrix GXm. On earlier models we try and 3519 * simulate something similar .. so this string should always 3520 * same -something- about the processor, however lame. 3521 */ 3522 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 3523 "brand-string", cpi->cpi_brandstr); 3524 3525 /* 3526 * Finally, cache and tlb information 3527 */ 3528 switch (x86_which_cacheinfo(cpi)) { 3529 case X86_VENDOR_Intel: 3530 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3531 break; 3532 case X86_VENDOR_Cyrix: 3533 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 3534 break; 3535 case X86_VENDOR_AMD: 3536 amd_cache_info(cpi, cpu_devi); 3537 break; 3538 default: 3539 break; 3540 } 3541 3542 mutex_exit(&cpu_node_lock); 3543 } 3544 3545 struct l2info { 3546 int *l2i_csz; 3547 int *l2i_lsz; 3548 int *l2i_assoc; 3549 int l2i_ret; 3550 }; 3551 3552 /* 3553 * A cacheinfo walker that fetches the size, line-size and associativity 3554 * of the L2 cache 3555 */ 3556 static int 3557 intel_l2cinfo(void *arg, const struct cachetab *ct) 3558 { 3559 struct l2info *l2i = arg; 3560 int *ip; 3561 3562 if (ct->ct_label != l2_cache_str && 3563 ct->ct_label != sl2_cache_str) 3564 return (0); /* not an L2 -- keep walking */ 3565 3566 if ((ip = l2i->l2i_csz) != NULL) 3567 *ip = ct->ct_size; 3568 if ((ip = l2i->l2i_lsz) != NULL) 3569 *ip = ct->ct_line_size; 3570 if ((ip = l2i->l2i_assoc) != NULL) 3571 *ip = ct->ct_assoc; 3572 l2i->l2i_ret = ct->ct_size; 3573 return (1); /* was an L2 -- terminate walk */ 3574 } 3575 3576 /* 3577 * AMD L2/L3 Cache and TLB Associativity Field Definition: 3578 * 3579 * Unlike the associativity for the L1 cache and tlb where the 8 bit 3580 * value is the associativity, the associativity for the L2 cache and 3581 * tlb is encoded in the following table. The 4 bit L2 value serves as 3582 * an index into the amd_afd[] array to determine the associativity. 3583 * -1 is undefined. 0 is fully associative. 3584 */ 3585 3586 static int amd_afd[] = 3587 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 3588 3589 static void 3590 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 3591 { 3592 struct cpuid_regs *cp; 3593 uint_t size, assoc; 3594 int i; 3595 int *ip; 3596 3597 if (cpi->cpi_xmaxeax < 0x80000006) 3598 return; 3599 cp = &cpi->cpi_extd[6]; 3600 3601 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 3602 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 3603 uint_t cachesz = size * 1024; 3604 assoc = amd_afd[i]; 3605 3606 ASSERT(assoc != -1); 3607 3608 if ((ip = l2i->l2i_csz) != NULL) 3609 *ip = cachesz; 3610 if ((ip = l2i->l2i_lsz) != NULL) 3611 *ip = BITX(cp->cp_ecx, 7, 0); 3612 if ((ip = l2i->l2i_assoc) != NULL) 3613 *ip = assoc; 3614 l2i->l2i_ret = cachesz; 3615 } 3616 } 3617 3618 int 3619 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 3620 { 3621 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3622 struct l2info __l2info, *l2i = &__l2info; 3623 3624 l2i->l2i_csz = csz; 3625 l2i->l2i_lsz = lsz; 3626 l2i->l2i_assoc = assoc; 3627 l2i->l2i_ret = -1; 3628 3629 switch (x86_which_cacheinfo(cpi)) { 3630 case X86_VENDOR_Intel: 3631 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3632 break; 3633 case X86_VENDOR_Cyrix: 3634 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 3635 break; 3636 case X86_VENDOR_AMD: 3637 amd_l2cacheinfo(cpi, l2i); 3638 break; 3639 default: 3640 break; 3641 } 3642 return (l2i->l2i_ret); 3643 } 3644 3645 #if !defined(__xpv) 3646 3647 uint32_t * 3648 cpuid_mwait_alloc(cpu_t *cpu) 3649 { 3650 uint32_t *ret; 3651 size_t mwait_size; 3652 3653 ASSERT(cpuid_checkpass(cpu, 2)); 3654 3655 mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 3656 if (mwait_size == 0) 3657 return (NULL); 3658 3659 /* 3660 * kmem_alloc() returns cache line size aligned data for mwait_size 3661 * allocations. mwait_size is currently cache line sized. Neither 3662 * of these implementation details are guarantied to be true in the 3663 * future. 3664 * 3665 * First try allocating mwait_size as kmem_alloc() currently returns 3666 * correctly aligned memory. If kmem_alloc() does not return 3667 * mwait_size aligned memory, then use mwait_size ROUNDUP. 3668 * 3669 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 3670 * decide to free this memory. 3671 */ 3672 ret = kmem_zalloc(mwait_size, KM_SLEEP); 3673 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 3674 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3675 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 3676 *ret = MWAIT_RUNNING; 3677 return (ret); 3678 } else { 3679 kmem_free(ret, mwait_size); 3680 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 3681 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 3682 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 3683 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 3684 *ret = MWAIT_RUNNING; 3685 return (ret); 3686 } 3687 } 3688 3689 void 3690 cpuid_mwait_free(cpu_t *cpu) 3691 { 3692 ASSERT(cpuid_checkpass(cpu, 2)); 3693 3694 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 3695 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 3696 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 3697 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 3698 } 3699 3700 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 3701 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 3702 } 3703 3704 void 3705 patch_tsc_read(int flag) 3706 { 3707 size_t cnt; 3708 switch (flag) { 3709 case X86_NO_TSC: 3710 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 3711 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 3712 break; 3713 case X86_HAVE_TSCP: 3714 cnt = &_tscp_end - &_tscp_start; 3715 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 3716 break; 3717 case X86_TSC_MFENCE: 3718 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 3719 (void) memcpy((void *)tsc_read, 3720 (void *)&_tsc_mfence_start, cnt); 3721 break; 3722 default: 3723 break; 3724 } 3725 } 3726 3727 #endif /* !__xpv */ 3728