xref: /titanic_52/usr/src/uts/i86pc/os/cpuid.c (revision 23f76dc290ca84b3df56bf58be0a4b8e3a7e38ab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 /*
25  * Copyright (c) 2010, Intel Corporation.
26  * All rights reserved.
27  */
28 /*
29  * Portions Copyright 2009 Advanced Micro Devices, Inc.
30  */
31 /*
32  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
33  */
34 /*
35  * Various routines to handle identification
36  * and classification of x86 processors.
37  */
38 
39 #include <sys/types.h>
40 #include <sys/archsystm.h>
41 #include <sys/x86_archext.h>
42 #include <sys/kmem.h>
43 #include <sys/systm.h>
44 #include <sys/cmn_err.h>
45 #include <sys/sunddi.h>
46 #include <sys/sunndi.h>
47 #include <sys/cpuvar.h>
48 #include <sys/processor.h>
49 #include <sys/sysmacros.h>
50 #include <sys/pg.h>
51 #include <sys/fp.h>
52 #include <sys/controlregs.h>
53 #include <sys/bitmap.h>
54 #include <sys/auxv_386.h>
55 #include <sys/memnode.h>
56 #include <sys/pci_cfgspace.h>
57 
58 #ifdef __xpv
59 #include <sys/hypervisor.h>
60 #else
61 #include <sys/ontrap.h>
62 #endif
63 
64 /*
65  * Pass 0 of cpuid feature analysis happens in locore. It contains special code
66  * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
67  * them accordingly. For most modern processors, feature detection occurs here
68  * in pass 1.
69  *
70  * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
71  * for the boot CPU and does the basic analysis that the early kernel needs.
72  * x86_featureset is set based on the return value of cpuid_pass1() of the boot
73  * CPU.
74  *
75  * Pass 1 includes:
76  *
77  *	o Determining vendor/model/family/stepping and setting x86_type and
78  *	  x86_vendor accordingly.
79  *	o Processing the feature flags returned by the cpuid instruction while
80  *	  applying any workarounds or tricks for the specific processor.
81  *	o Mapping the feature flags into Solaris feature bits (X86_*).
82  *	o Processing extended feature flags if supported by the processor,
83  *	  again while applying specific processor knowledge.
84  *	o Determining the CMT characteristics of the system.
85  *
86  * Pass 1 is done on non-boot CPUs during their initialization and the results
87  * are used only as a meager attempt at ensuring that all processors within the
88  * system support the same features.
89  *
90  * Pass 2 of cpuid feature analysis happens just at the beginning
91  * of startup().  It just copies in and corrects the remainder
92  * of the cpuid data we depend on: standard cpuid functions that we didn't
93  * need for pass1 feature analysis, and extended cpuid functions beyond the
94  * simple feature processing done in pass1.
95  *
96  * Pass 3 of cpuid analysis is invoked after basic kernel services; in
97  * particular kernel memory allocation has been made available. It creates a
98  * readable brand string based on the data collected in the first two passes.
99  *
100  * Pass 4 of cpuid analysis is invoked after post_startup() when all
101  * the support infrastructure for various hardware features has been
102  * initialized. It determines which processor features will be reported
103  * to userland via the aux vector.
104  *
105  * All passes are executed on all CPUs, but only the boot CPU determines what
106  * features the kernel will use.
107  *
108  * Much of the worst junk in this file is for the support of processors
109  * that didn't really implement the cpuid instruction properly.
110  *
111  * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
112  * the pass numbers.  Accordingly, changes to the pass code may require changes
113  * to the accessor code.
114  */
115 
116 uint_t x86_vendor = X86_VENDOR_IntelClone;
117 uint_t x86_type = X86_TYPE_OTHER;
118 uint_t x86_clflush_size = 0;
119 
120 uint_t pentiumpro_bug4046376;
121 uint_t pentiumpro_bug4064495;
122 
123 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
124 
125 static char *x86_feature_names[NUM_X86_FEATURES] = {
126 	"lgpg",
127 	"tsc",
128 	"msr",
129 	"mtrr",
130 	"pge",
131 	"de",
132 	"cmov",
133 	"mmx",
134 	"mca",
135 	"pae",
136 	"cv8",
137 	"pat",
138 	"sep",
139 	"sse",
140 	"sse2",
141 	"htt",
142 	"asysc",
143 	"nx",
144 	"sse3",
145 	"cx16",
146 	"cmp",
147 	"tscp",
148 	"mwait",
149 	"sse4a",
150 	"cpuid",
151 	"ssse3",
152 	"sse4_1",
153 	"sse4_2",
154 	"1gpg",
155 	"clfsh",
156 	"64",
157 	"aes",
158 	"pclmulqdq",
159 	"xsave",
160 	"avx",
161 	"vmx",
162 	"svm"
163 };
164 
165 boolean_t
166 is_x86_feature(void *featureset, uint_t feature)
167 {
168 	ASSERT(feature < NUM_X86_FEATURES);
169 	return (BT_TEST((ulong_t *)featureset, feature));
170 }
171 
172 void
173 add_x86_feature(void *featureset, uint_t feature)
174 {
175 	ASSERT(feature < NUM_X86_FEATURES);
176 	BT_SET((ulong_t *)featureset, feature);
177 }
178 
179 void
180 remove_x86_feature(void *featureset, uint_t feature)
181 {
182 	ASSERT(feature < NUM_X86_FEATURES);
183 	BT_CLEAR((ulong_t *)featureset, feature);
184 }
185 
186 boolean_t
187 compare_x86_featureset(void *setA, void *setB)
188 {
189 	/*
190 	 * We assume that the unused bits of the bitmap are always zero.
191 	 */
192 	if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
193 		return (B_TRUE);
194 	} else {
195 		return (B_FALSE);
196 	}
197 }
198 
199 void
200 print_x86_featureset(void *featureset)
201 {
202 	uint_t i;
203 
204 	for (i = 0; i < NUM_X86_FEATURES; i++) {
205 		if (is_x86_feature(featureset, i)) {
206 			cmn_err(CE_CONT, "?x86_feature: %s\n",
207 			    x86_feature_names[i]);
208 		}
209 	}
210 }
211 
212 uint_t enable486;
213 
214 static size_t xsave_state_size = 0;
215 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
216 boolean_t xsave_force_disable = B_FALSE;
217 
218 /*
219  * This is set to platform type Solaris is running on.
220  */
221 static int platform_type = -1;
222 
223 #if !defined(__xpv)
224 /*
225  * Variable to patch if hypervisor platform detection needs to be
226  * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
227  */
228 int enable_platform_detection = 1;
229 #endif
230 
231 /*
232  * monitor/mwait info.
233  *
234  * size_actual and buf_actual are the real address and size allocated to get
235  * proper mwait_buf alignement.  buf_actual and size_actual should be passed
236  * to kmem_free().  Currently kmem_alloc() and mwait happen to both use
237  * processor cache-line alignment, but this is not guarantied in the furture.
238  */
239 struct mwait_info {
240 	size_t		mon_min;	/* min size to avoid missed wakeups */
241 	size_t		mon_max;	/* size to avoid false wakeups */
242 	size_t		size_actual;	/* size actually allocated */
243 	void		*buf_actual;	/* memory actually allocated */
244 	uint32_t	support;	/* processor support of monitor/mwait */
245 };
246 
247 /*
248  * xsave/xrestor info.
249  *
250  * This structure contains HW feature bits and size of the xsave save area.
251  * Note: the kernel will use the maximum size required for all hardware
252  * features. It is not optimize for potential memory savings if features at
253  * the end of the save area are not enabled.
254  */
255 struct xsave_info {
256 	uint32_t	xsav_hw_features_low;   /* Supported HW features */
257 	uint32_t	xsav_hw_features_high;  /* Supported HW features */
258 	size_t		xsav_max_size;  /* max size save area for HW features */
259 	size_t		ymm_size;	/* AVX: size of ymm save area */
260 	size_t		ymm_offset;	/* AVX: offset for ymm save area */
261 };
262 
263 
264 /*
265  * These constants determine how many of the elements of the
266  * cpuid we cache in the cpuid_info data structure; the
267  * remaining elements are accessible via the cpuid instruction.
268  */
269 
270 #define	NMAX_CPI_STD	6		/* eax = 0 .. 5 */
271 #define	NMAX_CPI_EXTD	0x1c		/* eax = 0x80000000 .. 0x8000001b */
272 
273 /*
274  * Some terminology needs to be explained:
275  *  - Socket: Something that can be plugged into a motherboard.
276  *  - Package: Same as socket
277  *  - Chip: Same as socket. Note that AMD's documentation uses term "chip"
278  *    differently: there, chip is the same as processor node (below)
279  *  - Processor node: Some AMD processors have more than one
280  *    "subprocessor" embedded in a package. These subprocessors (nodes)
281  *    are fully-functional processors themselves with cores, caches,
282  *    memory controllers, PCI configuration spaces. They are connected
283  *    inside the package with Hypertransport links. On single-node
284  *    processors, processor node is equivalent to chip/socket/package.
285  */
286 
287 struct cpuid_info {
288 	uint_t cpi_pass;		/* last pass completed */
289 	/*
290 	 * standard function information
291 	 */
292 	uint_t cpi_maxeax;		/* fn 0: %eax */
293 	char cpi_vendorstr[13];		/* fn 0: %ebx:%ecx:%edx */
294 	uint_t cpi_vendor;		/* enum of cpi_vendorstr */
295 
296 	uint_t cpi_family;		/* fn 1: extended family */
297 	uint_t cpi_model;		/* fn 1: extended model */
298 	uint_t cpi_step;		/* fn 1: stepping */
299 	chipid_t cpi_chipid;		/* fn 1: %ebx:  Intel: chip # */
300 					/*		AMD: package/socket # */
301 	uint_t cpi_brandid;		/* fn 1: %ebx: brand ID */
302 	int cpi_clogid;			/* fn 1: %ebx: thread # */
303 	uint_t cpi_ncpu_per_chip;	/* fn 1: %ebx: logical cpu count */
304 	uint8_t cpi_cacheinfo[16];	/* fn 2: intel-style cache desc */
305 	uint_t cpi_ncache;		/* fn 2: number of elements */
306 	uint_t cpi_ncpu_shr_last_cache;	/* fn 4: %eax: ncpus sharing cache */
307 	id_t cpi_last_lvl_cacheid;	/* fn 4: %eax: derived cache id */
308 	uint_t cpi_std_4_size;		/* fn 4: number of fn 4 elements */
309 	struct cpuid_regs **cpi_std_4;	/* fn 4: %ecx == 0 .. fn4_size */
310 	struct cpuid_regs cpi_std[NMAX_CPI_STD];	/* 0 .. 5 */
311 	/*
312 	 * extended function information
313 	 */
314 	uint_t cpi_xmaxeax;		/* fn 0x80000000: %eax */
315 	char cpi_brandstr[49];		/* fn 0x8000000[234] */
316 	uint8_t cpi_pabits;		/* fn 0x80000006: %eax */
317 	uint8_t	cpi_vabits;		/* fn 0x80000006: %eax */
318 	struct	cpuid_regs cpi_extd[NMAX_CPI_EXTD];	/* 0x800000XX */
319 
320 	id_t cpi_coreid;		/* same coreid => strands share core */
321 	int cpi_pkgcoreid;		/* core number within single package */
322 	uint_t cpi_ncore_per_chip;	/* AMD: fn 0x80000008: %ecx[7-0] */
323 					/* Intel: fn 4: %eax[31-26] */
324 	/*
325 	 * supported feature information
326 	 */
327 	uint32_t cpi_support[5];
328 #define	STD_EDX_FEATURES	0
329 #define	AMD_EDX_FEATURES	1
330 #define	TM_EDX_FEATURES		2
331 #define	STD_ECX_FEATURES	3
332 #define	AMD_ECX_FEATURES	4
333 	/*
334 	 * Synthesized information, where known.
335 	 */
336 	uint32_t cpi_chiprev;		/* See X86_CHIPREV_* in x86_archext.h */
337 	const char *cpi_chiprevstr;	/* May be NULL if chiprev unknown */
338 	uint32_t cpi_socket;		/* Chip package/socket type */
339 
340 	struct mwait_info cpi_mwait;	/* fn 5: monitor/mwait info */
341 	uint32_t cpi_apicid;
342 	uint_t cpi_procnodeid;		/* AMD: nodeID on HT, Intel: chipid */
343 	uint_t cpi_procnodes_per_pkg;	/* AMD: # of nodes in the package */
344 					/* Intel: 1 */
345 
346 	struct xsave_info cpi_xsave;	/* fn D: xsave/xrestor info */
347 };
348 
349 
350 static struct cpuid_info cpuid_info0;
351 
352 /*
353  * These bit fields are defined by the Intel Application Note AP-485
354  * "Intel Processor Identification and the CPUID Instruction"
355  */
356 #define	CPI_FAMILY_XTD(cpi)	BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
357 #define	CPI_MODEL_XTD(cpi)	BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
358 #define	CPI_TYPE(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
359 #define	CPI_FAMILY(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
360 #define	CPI_STEP(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
361 #define	CPI_MODEL(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
362 
363 #define	CPI_FEATURES_EDX(cpi)		((cpi)->cpi_std[1].cp_edx)
364 #define	CPI_FEATURES_ECX(cpi)		((cpi)->cpi_std[1].cp_ecx)
365 #define	CPI_FEATURES_XTD_EDX(cpi)	((cpi)->cpi_extd[1].cp_edx)
366 #define	CPI_FEATURES_XTD_ECX(cpi)	((cpi)->cpi_extd[1].cp_ecx)
367 
368 #define	CPI_BRANDID(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
369 #define	CPI_CHUNKS(cpi)		BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
370 #define	CPI_CPU_COUNT(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
371 #define	CPI_APIC_ID(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
372 
373 #define	CPI_MAXEAX_MAX		0x100		/* sanity control */
374 #define	CPI_XMAXEAX_MAX		0x80000100
375 #define	CPI_FN4_ECX_MAX		0x20		/* sanity: max fn 4 levels */
376 #define	CPI_FNB_ECX_MAX		0x20		/* sanity: max fn B levels */
377 
378 /*
379  * Function 4 (Deterministic Cache Parameters) macros
380  * Defined by Intel Application Note AP-485
381  */
382 #define	CPI_NUM_CORES(regs)		BITX((regs)->cp_eax, 31, 26)
383 #define	CPI_NTHR_SHR_CACHE(regs)	BITX((regs)->cp_eax, 25, 14)
384 #define	CPI_FULL_ASSOC_CACHE(regs)	BITX((regs)->cp_eax, 9, 9)
385 #define	CPI_SELF_INIT_CACHE(regs)	BITX((regs)->cp_eax, 8, 8)
386 #define	CPI_CACHE_LVL(regs)		BITX((regs)->cp_eax, 7, 5)
387 #define	CPI_CACHE_TYPE(regs)		BITX((regs)->cp_eax, 4, 0)
388 #define	CPI_CPU_LEVEL_TYPE(regs)	BITX((regs)->cp_ecx, 15, 8)
389 
390 #define	CPI_CACHE_WAYS(regs)		BITX((regs)->cp_ebx, 31, 22)
391 #define	CPI_CACHE_PARTS(regs)		BITX((regs)->cp_ebx, 21, 12)
392 #define	CPI_CACHE_COH_LN_SZ(regs)	BITX((regs)->cp_ebx, 11, 0)
393 
394 #define	CPI_CACHE_SETS(regs)		BITX((regs)->cp_ecx, 31, 0)
395 
396 #define	CPI_PREFCH_STRIDE(regs)		BITX((regs)->cp_edx, 9, 0)
397 
398 
399 /*
400  * A couple of shorthand macros to identify "later" P6-family chips
401  * like the Pentium M and Core.  First, the "older" P6-based stuff
402  * (loosely defined as "pre-Pentium-4"):
403  * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
404  */
405 
406 #define	IS_LEGACY_P6(cpi) (			\
407 	cpi->cpi_family == 6 && 		\
408 		(cpi->cpi_model == 1 ||		\
409 		cpi->cpi_model == 3 ||		\
410 		cpi->cpi_model == 5 ||		\
411 		cpi->cpi_model == 6 ||		\
412 		cpi->cpi_model == 7 ||		\
413 		cpi->cpi_model == 8 ||		\
414 		cpi->cpi_model == 0xA ||	\
415 		cpi->cpi_model == 0xB)		\
416 )
417 
418 /* A "new F6" is everything with family 6 that's not the above */
419 #define	IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
420 
421 /* Extended family/model support */
422 #define	IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
423 	cpi->cpi_family >= 0xf)
424 
425 /*
426  * Info for monitor/mwait idle loop.
427  *
428  * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
429  * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
430  * 2006.
431  * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
432  * Documentation Updates" #33633, Rev 2.05, December 2006.
433  */
434 #define	MWAIT_SUPPORT		(0x00000001)	/* mwait supported */
435 #define	MWAIT_EXTENSIONS	(0x00000002)	/* extenstion supported */
436 #define	MWAIT_ECX_INT_ENABLE	(0x00000004)	/* ecx 1 extension supported */
437 #define	MWAIT_SUPPORTED(cpi)	((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
438 #define	MWAIT_INT_ENABLE(cpi)	((cpi)->cpi_std[5].cp_ecx & 0x2)
439 #define	MWAIT_EXTENSION(cpi)	((cpi)->cpi_std[5].cp_ecx & 0x1)
440 #define	MWAIT_SIZE_MIN(cpi)	BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
441 #define	MWAIT_SIZE_MAX(cpi)	BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
442 /*
443  * Number of sub-cstates for a given c-state.
444  */
445 #define	MWAIT_NUM_SUBC_STATES(cpi, c_state)			\
446 	BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
447 
448 /*
449  * XSAVE leaf 0xD enumeration
450  */
451 #define	CPUID_LEAFD_2_YMM_OFFSET	576
452 #define	CPUID_LEAFD_2_YMM_SIZE		256
453 
454 /*
455  * Functions we consune from cpuid_subr.c;  don't publish these in a header
456  * file to try and keep people using the expected cpuid_* interfaces.
457  */
458 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
459 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
460 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
461 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
462 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
463 
464 /*
465  * Apply up various platform-dependent restrictions where the
466  * underlying platform restrictions mean the CPU can be marked
467  * as less capable than its cpuid instruction would imply.
468  */
469 #if defined(__xpv)
470 static void
471 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
472 {
473 	switch (eax) {
474 	case 1: {
475 		uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
476 		    0 : CPUID_INTC_EDX_MCA;
477 		cp->cp_edx &=
478 		    ~(mcamask |
479 		    CPUID_INTC_EDX_PSE |
480 		    CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
481 		    CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
482 		    CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
483 		    CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
484 		    CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
485 		break;
486 	}
487 
488 	case 0x80000001:
489 		cp->cp_edx &=
490 		    ~(CPUID_AMD_EDX_PSE |
491 		    CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
492 		    CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
493 		    CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
494 		    CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
495 		    CPUID_AMD_EDX_TSCP);
496 		cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
497 		break;
498 	default:
499 		break;
500 	}
501 
502 	switch (vendor) {
503 	case X86_VENDOR_Intel:
504 		switch (eax) {
505 		case 4:
506 			/*
507 			 * Zero out the (ncores-per-chip - 1) field
508 			 */
509 			cp->cp_eax &= 0x03fffffff;
510 			break;
511 		default:
512 			break;
513 		}
514 		break;
515 	case X86_VENDOR_AMD:
516 		switch (eax) {
517 
518 		case 0x80000001:
519 			cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
520 			break;
521 
522 		case 0x80000008:
523 			/*
524 			 * Zero out the (ncores-per-chip - 1) field
525 			 */
526 			cp->cp_ecx &= 0xffffff00;
527 			break;
528 		default:
529 			break;
530 		}
531 		break;
532 	default:
533 		break;
534 	}
535 }
536 #else
537 #define	platform_cpuid_mangle(vendor, eax, cp)	/* nothing */
538 #endif
539 
540 /*
541  *  Some undocumented ways of patching the results of the cpuid
542  *  instruction to permit running Solaris 10 on future cpus that
543  *  we don't currently support.  Could be set to non-zero values
544  *  via settings in eeprom.
545  */
546 
547 uint32_t cpuid_feature_ecx_include;
548 uint32_t cpuid_feature_ecx_exclude;
549 uint32_t cpuid_feature_edx_include;
550 uint32_t cpuid_feature_edx_exclude;
551 
552 /*
553  * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
554  */
555 void
556 cpuid_alloc_space(cpu_t *cpu)
557 {
558 	/*
559 	 * By convention, cpu0 is the boot cpu, which is set up
560 	 * before memory allocation is available.  All other cpus get
561 	 * their cpuid_info struct allocated here.
562 	 */
563 	ASSERT(cpu->cpu_id != 0);
564 	ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
565 	cpu->cpu_m.mcpu_cpi =
566 	    kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
567 }
568 
569 void
570 cpuid_free_space(cpu_t *cpu)
571 {
572 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
573 	int i;
574 
575 	ASSERT(cpi != NULL);
576 	ASSERT(cpi != &cpuid_info0);
577 
578 	/*
579 	 * Free up any function 4 related dynamic storage
580 	 */
581 	for (i = 1; i < cpi->cpi_std_4_size; i++)
582 		kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
583 	if (cpi->cpi_std_4_size > 0)
584 		kmem_free(cpi->cpi_std_4,
585 		    cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
586 
587 	kmem_free(cpi, sizeof (*cpi));
588 	cpu->cpu_m.mcpu_cpi = NULL;
589 }
590 
591 #if !defined(__xpv)
592 
593 static void
594 determine_platform()
595 {
596 	struct cpuid_regs cp;
597 	char *xen_str;
598 	uint32_t xen_signature[4], base;
599 
600 	platform_type = HW_NATIVE;
601 
602 	if (!enable_platform_detection)
603 		return;
604 
605 	/*
606 	 * In a fully virtualized domain, Xen's pseudo-cpuid function
607 	 * returns a string representing the Xen signature in %ebx, %ecx,
608 	 * and %edx. %eax contains the maximum supported cpuid function.
609 	 * We need at least a (base + 2) leaf value to do what we want
610 	 * to do. Try different base values, since the hypervisor might
611 	 * use a different one depending on whether hyper-v emulation
612 	 * is switched on by default or not.
613 	 */
614 	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
615 		cp.cp_eax = base;
616 		(void) __cpuid_insn(&cp);
617 		xen_signature[0] = cp.cp_ebx;
618 		xen_signature[1] = cp.cp_ecx;
619 		xen_signature[2] = cp.cp_edx;
620 		xen_signature[3] = 0;
621 		xen_str = (char *)xen_signature;
622 		if (strcmp("XenVMMXenVMM", xen_str) == 0 &&
623 		    cp.cp_eax >= (base + 2)) {
624 			platform_type = HW_XEN_HVM;
625 			return;
626 		}
627 	}
628 
629 	if (vmware_platform()) /* running under vmware hypervisor? */
630 		platform_type = HW_VMWARE;
631 }
632 
633 int
634 get_hwenv(void)
635 {
636 	if (platform_type == -1)
637 		determine_platform();
638 
639 	return (platform_type);
640 }
641 
642 int
643 is_controldom(void)
644 {
645 	return (0);
646 }
647 
648 #else
649 
650 int
651 get_hwenv(void)
652 {
653 	return (HW_XEN_PV);
654 }
655 
656 int
657 is_controldom(void)
658 {
659 	return (DOMAIN_IS_INITDOMAIN(xen_info));
660 }
661 
662 #endif	/* __xpv */
663 
664 static void
665 cpuid_intel_getids(cpu_t *cpu, void *feature)
666 {
667 	uint_t i;
668 	uint_t chipid_shift = 0;
669 	uint_t coreid_shift = 0;
670 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
671 
672 	for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
673 		chipid_shift++;
674 
675 	cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
676 	cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
677 
678 	if (is_x86_feature(feature, X86FSET_CMP)) {
679 		/*
680 		 * Multi-core (and possibly multi-threaded)
681 		 * processors.
682 		 */
683 		uint_t ncpu_per_core;
684 		if (cpi->cpi_ncore_per_chip == 1)
685 			ncpu_per_core = cpi->cpi_ncpu_per_chip;
686 		else if (cpi->cpi_ncore_per_chip > 1)
687 			ncpu_per_core = cpi->cpi_ncpu_per_chip /
688 			    cpi->cpi_ncore_per_chip;
689 		/*
690 		 * 8bit APIC IDs on dual core Pentiums
691 		 * look like this:
692 		 *
693 		 * +-----------------------+------+------+
694 		 * | Physical Package ID   |  MC  |  HT  |
695 		 * +-----------------------+------+------+
696 		 * <------- chipid -------->
697 		 * <------- coreid --------------->
698 		 *			   <--- clogid -->
699 		 *			   <------>
700 		 *			   pkgcoreid
701 		 *
702 		 * Where the number of bits necessary to
703 		 * represent MC and HT fields together equals
704 		 * to the minimum number of bits necessary to
705 		 * store the value of cpi->cpi_ncpu_per_chip.
706 		 * Of those bits, the MC part uses the number
707 		 * of bits necessary to store the value of
708 		 * cpi->cpi_ncore_per_chip.
709 		 */
710 		for (i = 1; i < ncpu_per_core; i <<= 1)
711 			coreid_shift++;
712 		cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
713 		cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
714 	} else if (is_x86_feature(feature, X86FSET_HTT)) {
715 		/*
716 		 * Single-core multi-threaded processors.
717 		 */
718 		cpi->cpi_coreid = cpi->cpi_chipid;
719 		cpi->cpi_pkgcoreid = 0;
720 	}
721 	cpi->cpi_procnodeid = cpi->cpi_chipid;
722 }
723 
724 static void
725 cpuid_amd_getids(cpu_t *cpu)
726 {
727 	int i, first_half, coreidsz;
728 	uint32_t nb_caps_reg;
729 	uint_t node2_1;
730 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
731 
732 	/*
733 	 * AMD CMP chips currently have a single thread per core.
734 	 *
735 	 * Since no two cpus share a core we must assign a distinct coreid
736 	 * per cpu, and we do this by using the cpu_id.  This scheme does not,
737 	 * however, guarantee that sibling cores of a chip will have sequential
738 	 * coreids starting at a multiple of the number of cores per chip -
739 	 * that is usually the case, but if the ACPI MADT table is presented
740 	 * in a different order then we need to perform a few more gymnastics
741 	 * for the pkgcoreid.
742 	 *
743 	 * All processors in the system have the same number of enabled
744 	 * cores. Cores within a processor are always numbered sequentially
745 	 * from 0 regardless of how many or which are disabled, and there
746 	 * is no way for operating system to discover the real core id when some
747 	 * are disabled.
748 	 */
749 
750 	cpi->cpi_coreid = cpu->cpu_id;
751 
752 	if (cpi->cpi_xmaxeax >= 0x80000008) {
753 
754 		coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
755 
756 		/*
757 		 * In AMD parlance chip is really a node while Solaris
758 		 * sees chip as equivalent to socket/package.
759 		 */
760 		cpi->cpi_ncore_per_chip =
761 		    BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
762 		if (coreidsz == 0) {
763 			/* Use legacy method */
764 			for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
765 				coreidsz++;
766 			if (coreidsz == 0)
767 				coreidsz = 1;
768 		}
769 	} else {
770 		/* Assume single-core part */
771 		cpi->cpi_ncore_per_chip = 1;
772 		coreidsz = 1;
773 	}
774 
775 	cpi->cpi_clogid = cpi->cpi_pkgcoreid =
776 	    cpi->cpi_apicid & ((1<<coreidsz) - 1);
777 	cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
778 
779 	/* Get nodeID */
780 	if (cpi->cpi_family == 0xf) {
781 		cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
782 		cpi->cpi_chipid = cpi->cpi_procnodeid;
783 	} else if (cpi->cpi_family == 0x10) {
784 		/*
785 		 * See if we are a multi-node processor.
786 		 * All processors in the system have the same number of nodes
787 		 */
788 		nb_caps_reg =  pci_getl_func(0, 24, 3, 0xe8);
789 		if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
790 			/* Single-node */
791 			cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
792 			    coreidsz);
793 			cpi->cpi_chipid = cpi->cpi_procnodeid;
794 		} else {
795 
796 			/*
797 			 * Multi-node revision D (2 nodes per package
798 			 * are supported)
799 			 */
800 			cpi->cpi_procnodes_per_pkg = 2;
801 
802 			first_half = (cpi->cpi_pkgcoreid <=
803 			    (cpi->cpi_ncore_per_chip/2 - 1));
804 
805 			if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
806 				/* We are BSP */
807 				cpi->cpi_procnodeid = (first_half ? 0 : 1);
808 				cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
809 			} else {
810 
811 				/* We are AP */
812 				/* NodeId[2:1] bits to use for reading F3xe8 */
813 				node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
814 
815 				nb_caps_reg =
816 				    pci_getl_func(0, 24 + node2_1, 3, 0xe8);
817 
818 				/*
819 				 * Check IntNodeNum bit (31:30, but bit 31 is
820 				 * always 0 on dual-node processors)
821 				 */
822 				if (BITX(nb_caps_reg, 30, 30) == 0)
823 					cpi->cpi_procnodeid = node2_1 +
824 					    !first_half;
825 				else
826 					cpi->cpi_procnodeid = node2_1 +
827 					    first_half;
828 
829 				cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
830 			}
831 		}
832 	} else if (cpi->cpi_family >= 0x11) {
833 		cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
834 		cpi->cpi_chipid = cpi->cpi_procnodeid;
835 	} else {
836 		cpi->cpi_procnodeid = 0;
837 		cpi->cpi_chipid = cpi->cpi_procnodeid;
838 	}
839 }
840 
841 /*
842  * Setup XFeature_Enabled_Mask register. Required by xsave feature.
843  */
844 void
845 setup_xfem(void)
846 {
847 	uint64_t flags = XFEATURE_LEGACY_FP;
848 
849 	ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
850 
851 	if (is_x86_feature(x86_featureset, X86FSET_SSE))
852 		flags |= XFEATURE_SSE;
853 
854 	if (is_x86_feature(x86_featureset, X86FSET_AVX))
855 		flags |= XFEATURE_AVX;
856 
857 	set_xcr(XFEATURE_ENABLED_MASK, flags);
858 
859 	xsave_bv_all = flags;
860 }
861 
862 void
863 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
864 {
865 	uint32_t mask_ecx, mask_edx;
866 	struct cpuid_info *cpi;
867 	struct cpuid_regs *cp;
868 	int xcpuid;
869 #if !defined(__xpv)
870 	extern int idle_cpu_prefer_mwait;
871 #endif
872 
873 #if !defined(__xpv)
874 	determine_platform();
875 #endif
876 	/*
877 	 * Space statically allocated for BSP, ensure pointer is set
878 	 */
879 	if (cpu->cpu_id == 0) {
880 		if (cpu->cpu_m.mcpu_cpi == NULL)
881 			cpu->cpu_m.mcpu_cpi = &cpuid_info0;
882 	}
883 
884 	add_x86_feature(featureset, X86FSET_CPUID);
885 
886 	cpi = cpu->cpu_m.mcpu_cpi;
887 	ASSERT(cpi != NULL);
888 	cp = &cpi->cpi_std[0];
889 	cp->cp_eax = 0;
890 	cpi->cpi_maxeax = __cpuid_insn(cp);
891 	{
892 		uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
893 		*iptr++ = cp->cp_ebx;
894 		*iptr++ = cp->cp_edx;
895 		*iptr++ = cp->cp_ecx;
896 		*(char *)&cpi->cpi_vendorstr[12] = '\0';
897 	}
898 
899 	cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
900 	x86_vendor = cpi->cpi_vendor; /* for compatibility */
901 
902 	/*
903 	 * Limit the range in case of weird hardware
904 	 */
905 	if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
906 		cpi->cpi_maxeax = CPI_MAXEAX_MAX;
907 	if (cpi->cpi_maxeax < 1)
908 		goto pass1_done;
909 
910 	cp = &cpi->cpi_std[1];
911 	cp->cp_eax = 1;
912 	(void) __cpuid_insn(cp);
913 
914 	/*
915 	 * Extract identifying constants for easy access.
916 	 */
917 	cpi->cpi_model = CPI_MODEL(cpi);
918 	cpi->cpi_family = CPI_FAMILY(cpi);
919 
920 	if (cpi->cpi_family == 0xf)
921 		cpi->cpi_family += CPI_FAMILY_XTD(cpi);
922 
923 	/*
924 	 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
925 	 * Intel, and presumably everyone else, uses model == 0xf, as
926 	 * one would expect (max value means possible overflow).  Sigh.
927 	 */
928 
929 	switch (cpi->cpi_vendor) {
930 	case X86_VENDOR_Intel:
931 		if (IS_EXTENDED_MODEL_INTEL(cpi))
932 			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
933 		break;
934 	case X86_VENDOR_AMD:
935 		if (CPI_FAMILY(cpi) == 0xf)
936 			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
937 		break;
938 	default:
939 		if (cpi->cpi_model == 0xf)
940 			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
941 		break;
942 	}
943 
944 	cpi->cpi_step = CPI_STEP(cpi);
945 	cpi->cpi_brandid = CPI_BRANDID(cpi);
946 
947 	/*
948 	 * *default* assumptions:
949 	 * - believe %edx feature word
950 	 * - ignore %ecx feature word
951 	 * - 32-bit virtual and physical addressing
952 	 */
953 	mask_edx = 0xffffffff;
954 	mask_ecx = 0;
955 
956 	cpi->cpi_pabits = cpi->cpi_vabits = 32;
957 
958 	switch (cpi->cpi_vendor) {
959 	case X86_VENDOR_Intel:
960 		if (cpi->cpi_family == 5)
961 			x86_type = X86_TYPE_P5;
962 		else if (IS_LEGACY_P6(cpi)) {
963 			x86_type = X86_TYPE_P6;
964 			pentiumpro_bug4046376 = 1;
965 			pentiumpro_bug4064495 = 1;
966 			/*
967 			 * Clear the SEP bit when it was set erroneously
968 			 */
969 			if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
970 				cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
971 		} else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
972 			x86_type = X86_TYPE_P4;
973 			/*
974 			 * We don't currently depend on any of the %ecx
975 			 * features until Prescott, so we'll only check
976 			 * this from P4 onwards.  We might want to revisit
977 			 * that idea later.
978 			 */
979 			mask_ecx = 0xffffffff;
980 		} else if (cpi->cpi_family > 0xf)
981 			mask_ecx = 0xffffffff;
982 		/*
983 		 * We don't support MONITOR/MWAIT if leaf 5 is not available
984 		 * to obtain the monitor linesize.
985 		 */
986 		if (cpi->cpi_maxeax < 5)
987 			mask_ecx &= ~CPUID_INTC_ECX_MON;
988 		break;
989 	case X86_VENDOR_IntelClone:
990 	default:
991 		break;
992 	case X86_VENDOR_AMD:
993 #if defined(OPTERON_ERRATUM_108)
994 		if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
995 			cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
996 			cpi->cpi_model = 0xc;
997 		} else
998 #endif
999 		if (cpi->cpi_family == 5) {
1000 			/*
1001 			 * AMD K5 and K6
1002 			 *
1003 			 * These CPUs have an incomplete implementation
1004 			 * of MCA/MCE which we mask away.
1005 			 */
1006 			mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1007 
1008 			/*
1009 			 * Model 0 uses the wrong (APIC) bit
1010 			 * to indicate PGE.  Fix it here.
1011 			 */
1012 			if (cpi->cpi_model == 0) {
1013 				if (cp->cp_edx & 0x200) {
1014 					cp->cp_edx &= ~0x200;
1015 					cp->cp_edx |= CPUID_INTC_EDX_PGE;
1016 				}
1017 			}
1018 
1019 			/*
1020 			 * Early models had problems w/ MMX; disable.
1021 			 */
1022 			if (cpi->cpi_model < 6)
1023 				mask_edx &= ~CPUID_INTC_EDX_MMX;
1024 		}
1025 
1026 		/*
1027 		 * For newer families, SSE3 and CX16, at least, are valid;
1028 		 * enable all
1029 		 */
1030 		if (cpi->cpi_family >= 0xf)
1031 			mask_ecx = 0xffffffff;
1032 		/*
1033 		 * We don't support MONITOR/MWAIT if leaf 5 is not available
1034 		 * to obtain the monitor linesize.
1035 		 */
1036 		if (cpi->cpi_maxeax < 5)
1037 			mask_ecx &= ~CPUID_INTC_ECX_MON;
1038 
1039 #if !defined(__xpv)
1040 		/*
1041 		 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1042 		 * processors.  AMD does not intend MWAIT to be used in the cpu
1043 		 * idle loop on current and future processors.  10h and future
1044 		 * AMD processors use more power in MWAIT than HLT.
1045 		 * Pre-family-10h Opterons do not have the MWAIT instruction.
1046 		 */
1047 		idle_cpu_prefer_mwait = 0;
1048 #endif
1049 
1050 		break;
1051 	case X86_VENDOR_TM:
1052 		/*
1053 		 * workaround the NT workaround in CMS 4.1
1054 		 */
1055 		if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1056 		    (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1057 			cp->cp_edx |= CPUID_INTC_EDX_CX8;
1058 		break;
1059 	case X86_VENDOR_Centaur:
1060 		/*
1061 		 * workaround the NT workarounds again
1062 		 */
1063 		if (cpi->cpi_family == 6)
1064 			cp->cp_edx |= CPUID_INTC_EDX_CX8;
1065 		break;
1066 	case X86_VENDOR_Cyrix:
1067 		/*
1068 		 * We rely heavily on the probing in locore
1069 		 * to actually figure out what parts, if any,
1070 		 * of the Cyrix cpuid instruction to believe.
1071 		 */
1072 		switch (x86_type) {
1073 		case X86_TYPE_CYRIX_486:
1074 			mask_edx = 0;
1075 			break;
1076 		case X86_TYPE_CYRIX_6x86:
1077 			mask_edx = 0;
1078 			break;
1079 		case X86_TYPE_CYRIX_6x86L:
1080 			mask_edx =
1081 			    CPUID_INTC_EDX_DE |
1082 			    CPUID_INTC_EDX_CX8;
1083 			break;
1084 		case X86_TYPE_CYRIX_6x86MX:
1085 			mask_edx =
1086 			    CPUID_INTC_EDX_DE |
1087 			    CPUID_INTC_EDX_MSR |
1088 			    CPUID_INTC_EDX_CX8 |
1089 			    CPUID_INTC_EDX_PGE |
1090 			    CPUID_INTC_EDX_CMOV |
1091 			    CPUID_INTC_EDX_MMX;
1092 			break;
1093 		case X86_TYPE_CYRIX_GXm:
1094 			mask_edx =
1095 			    CPUID_INTC_EDX_MSR |
1096 			    CPUID_INTC_EDX_CX8 |
1097 			    CPUID_INTC_EDX_CMOV |
1098 			    CPUID_INTC_EDX_MMX;
1099 			break;
1100 		case X86_TYPE_CYRIX_MediaGX:
1101 			break;
1102 		case X86_TYPE_CYRIX_MII:
1103 		case X86_TYPE_VIA_CYRIX_III:
1104 			mask_edx =
1105 			    CPUID_INTC_EDX_DE |
1106 			    CPUID_INTC_EDX_TSC |
1107 			    CPUID_INTC_EDX_MSR |
1108 			    CPUID_INTC_EDX_CX8 |
1109 			    CPUID_INTC_EDX_PGE |
1110 			    CPUID_INTC_EDX_CMOV |
1111 			    CPUID_INTC_EDX_MMX;
1112 			break;
1113 		default:
1114 			break;
1115 		}
1116 		break;
1117 	}
1118 
1119 #if defined(__xpv)
1120 	/*
1121 	 * Do not support MONITOR/MWAIT under a hypervisor
1122 	 */
1123 	mask_ecx &= ~CPUID_INTC_ECX_MON;
1124 	/*
1125 	 * Do not support XSAVE under a hypervisor for now
1126 	 */
1127 	xsave_force_disable = B_TRUE;
1128 
1129 #endif	/* __xpv */
1130 
1131 	if (xsave_force_disable) {
1132 		mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1133 		mask_ecx &= ~CPUID_INTC_ECX_AVX;
1134 	}
1135 
1136 	/*
1137 	 * Now we've figured out the masks that determine
1138 	 * which bits we choose to believe, apply the masks
1139 	 * to the feature words, then map the kernel's view
1140 	 * of these feature words into its feature word.
1141 	 */
1142 	cp->cp_edx &= mask_edx;
1143 	cp->cp_ecx &= mask_ecx;
1144 
1145 	/*
1146 	 * apply any platform restrictions (we don't call this
1147 	 * immediately after __cpuid_insn here, because we need the
1148 	 * workarounds applied above first)
1149 	 */
1150 	platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1151 
1152 	/*
1153 	 * fold in overrides from the "eeprom" mechanism
1154 	 */
1155 	cp->cp_edx |= cpuid_feature_edx_include;
1156 	cp->cp_edx &= ~cpuid_feature_edx_exclude;
1157 
1158 	cp->cp_ecx |= cpuid_feature_ecx_include;
1159 	cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1160 
1161 	if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1162 		add_x86_feature(featureset, X86FSET_LARGEPAGE);
1163 	}
1164 	if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1165 		add_x86_feature(featureset, X86FSET_TSC);
1166 	}
1167 	if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1168 		add_x86_feature(featureset, X86FSET_MSR);
1169 	}
1170 	if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1171 		add_x86_feature(featureset, X86FSET_MTRR);
1172 	}
1173 	if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1174 		add_x86_feature(featureset, X86FSET_PGE);
1175 	}
1176 	if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1177 		add_x86_feature(featureset, X86FSET_CMOV);
1178 	}
1179 	if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1180 		add_x86_feature(featureset, X86FSET_MMX);
1181 	}
1182 	if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1183 	    (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1184 		add_x86_feature(featureset, X86FSET_MCA);
1185 	}
1186 	if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1187 		add_x86_feature(featureset, X86FSET_PAE);
1188 	}
1189 	if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1190 		add_x86_feature(featureset, X86FSET_CX8);
1191 	}
1192 	if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1193 		add_x86_feature(featureset, X86FSET_CX16);
1194 	}
1195 	if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1196 		add_x86_feature(featureset, X86FSET_PAT);
1197 	}
1198 	if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1199 		add_x86_feature(featureset, X86FSET_SEP);
1200 	}
1201 	if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1202 		/*
1203 		 * In our implementation, fxsave/fxrstor
1204 		 * are prerequisites before we'll even
1205 		 * try and do SSE things.
1206 		 */
1207 		if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1208 			add_x86_feature(featureset, X86FSET_SSE);
1209 		}
1210 		if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1211 			add_x86_feature(featureset, X86FSET_SSE2);
1212 		}
1213 		if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1214 			add_x86_feature(featureset, X86FSET_SSE3);
1215 		}
1216 		if (cpi->cpi_vendor == X86_VENDOR_Intel) {
1217 			if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1218 				add_x86_feature(featureset, X86FSET_SSSE3);
1219 			}
1220 			if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1221 				add_x86_feature(featureset, X86FSET_SSE4_1);
1222 			}
1223 			if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1224 				add_x86_feature(featureset, X86FSET_SSE4_2);
1225 			}
1226 			if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1227 				add_x86_feature(featureset, X86FSET_AES);
1228 			}
1229 			if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1230 				add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1231 			}
1232 
1233 			if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1234 				add_x86_feature(featureset, X86FSET_XSAVE);
1235 				/* We only test AVX when there is XSAVE */
1236 				if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1237 					add_x86_feature(featureset,
1238 					    X86FSET_AVX);
1239 				}
1240 			}
1241 		}
1242 	}
1243 	if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1244 		add_x86_feature(featureset, X86FSET_DE);
1245 	}
1246 #if !defined(__xpv)
1247 	if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1248 
1249 		/*
1250 		 * We require the CLFLUSH instruction for erratum workaround
1251 		 * to use MONITOR/MWAIT.
1252 		 */
1253 		if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1254 			cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1255 			add_x86_feature(featureset, X86FSET_MWAIT);
1256 		} else {
1257 			extern int idle_cpu_assert_cflush_monitor;
1258 
1259 			/*
1260 			 * All processors we are aware of which have
1261 			 * MONITOR/MWAIT also have CLFLUSH.
1262 			 */
1263 			if (idle_cpu_assert_cflush_monitor) {
1264 				ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1265 				    (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1266 			}
1267 		}
1268 	}
1269 #endif	/* __xpv */
1270 
1271 	if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
1272 		add_x86_feature(featureset, X86FSET_VMX);
1273 	}
1274 
1275 	/*
1276 	 * Only need it first time, rest of the cpus would follow suit.
1277 	 * we only capture this for the bootcpu.
1278 	 */
1279 	if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1280 		add_x86_feature(featureset, X86FSET_CLFSH);
1281 		x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1282 	}
1283 	if (is_x86_feature(featureset, X86FSET_PAE))
1284 		cpi->cpi_pabits = 36;
1285 
1286 	/*
1287 	 * Hyperthreading configuration is slightly tricky on Intel
1288 	 * and pure clones, and even trickier on AMD.
1289 	 *
1290 	 * (AMD chose to set the HTT bit on their CMP processors,
1291 	 * even though they're not actually hyperthreaded.  Thus it
1292 	 * takes a bit more work to figure out what's really going
1293 	 * on ... see the handling of the CMP_LGCY bit below)
1294 	 */
1295 	if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1296 		cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1297 		if (cpi->cpi_ncpu_per_chip > 1)
1298 			add_x86_feature(featureset, X86FSET_HTT);
1299 	} else {
1300 		cpi->cpi_ncpu_per_chip = 1;
1301 	}
1302 
1303 	/*
1304 	 * Work on the "extended" feature information, doing
1305 	 * some basic initialization for cpuid_pass2()
1306 	 */
1307 	xcpuid = 0;
1308 	switch (cpi->cpi_vendor) {
1309 	case X86_VENDOR_Intel:
1310 		if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf)
1311 			xcpuid++;
1312 		break;
1313 	case X86_VENDOR_AMD:
1314 		if (cpi->cpi_family > 5 ||
1315 		    (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1316 			xcpuid++;
1317 		break;
1318 	case X86_VENDOR_Cyrix:
1319 		/*
1320 		 * Only these Cyrix CPUs are -known- to support
1321 		 * extended cpuid operations.
1322 		 */
1323 		if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1324 		    x86_type == X86_TYPE_CYRIX_GXm)
1325 			xcpuid++;
1326 		break;
1327 	case X86_VENDOR_Centaur:
1328 	case X86_VENDOR_TM:
1329 	default:
1330 		xcpuid++;
1331 		break;
1332 	}
1333 
1334 	if (xcpuid) {
1335 		cp = &cpi->cpi_extd[0];
1336 		cp->cp_eax = 0x80000000;
1337 		cpi->cpi_xmaxeax = __cpuid_insn(cp);
1338 	}
1339 
1340 	if (cpi->cpi_xmaxeax & 0x80000000) {
1341 
1342 		if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1343 			cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1344 
1345 		switch (cpi->cpi_vendor) {
1346 		case X86_VENDOR_Intel:
1347 		case X86_VENDOR_AMD:
1348 			if (cpi->cpi_xmaxeax < 0x80000001)
1349 				break;
1350 			cp = &cpi->cpi_extd[1];
1351 			cp->cp_eax = 0x80000001;
1352 			(void) __cpuid_insn(cp);
1353 
1354 			if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1355 			    cpi->cpi_family == 5 &&
1356 			    cpi->cpi_model == 6 &&
1357 			    cpi->cpi_step == 6) {
1358 				/*
1359 				 * K6 model 6 uses bit 10 to indicate SYSC
1360 				 * Later models use bit 11. Fix it here.
1361 				 */
1362 				if (cp->cp_edx & 0x400) {
1363 					cp->cp_edx &= ~0x400;
1364 					cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1365 				}
1366 			}
1367 
1368 			platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1369 
1370 			/*
1371 			 * Compute the additions to the kernel's feature word.
1372 			 */
1373 			if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1374 				add_x86_feature(featureset, X86FSET_NX);
1375 			}
1376 
1377 			/*
1378 			 * Regardless whether or not we boot 64-bit,
1379 			 * we should have a way to identify whether
1380 			 * the CPU is capable of running 64-bit.
1381 			 */
1382 			if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1383 				add_x86_feature(featureset, X86FSET_64);
1384 			}
1385 
1386 #if defined(__amd64)
1387 			/* 1 GB large page - enable only for 64 bit kernel */
1388 			if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1389 				add_x86_feature(featureset, X86FSET_1GPG);
1390 			}
1391 #endif
1392 
1393 			if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1394 			    (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1395 			    (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1396 				add_x86_feature(featureset, X86FSET_SSE4A);
1397 			}
1398 
1399 			/*
1400 			 * If both the HTT and CMP_LGCY bits are set,
1401 			 * then we're not actually HyperThreaded.  Read
1402 			 * "AMD CPUID Specification" for more details.
1403 			 */
1404 			if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1405 			    is_x86_feature(featureset, X86FSET_HTT) &&
1406 			    (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1407 				remove_x86_feature(featureset, X86FSET_HTT);
1408 				add_x86_feature(featureset, X86FSET_CMP);
1409 			}
1410 #if defined(__amd64)
1411 			/*
1412 			 * It's really tricky to support syscall/sysret in
1413 			 * the i386 kernel; we rely on sysenter/sysexit
1414 			 * instead.  In the amd64 kernel, things are -way-
1415 			 * better.
1416 			 */
1417 			if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1418 				add_x86_feature(featureset, X86FSET_ASYSC);
1419 			}
1420 
1421 			/*
1422 			 * While we're thinking about system calls, note
1423 			 * that AMD processors don't support sysenter
1424 			 * in long mode at all, so don't try to program them.
1425 			 */
1426 			if (x86_vendor == X86_VENDOR_AMD) {
1427 				remove_x86_feature(featureset, X86FSET_SEP);
1428 			}
1429 #endif
1430 			if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1431 				add_x86_feature(featureset, X86FSET_TSCP);
1432 			}
1433 
1434 			if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
1435 				add_x86_feature(featureset, X86FSET_SVM);
1436 			}
1437 			break;
1438 		default:
1439 			break;
1440 		}
1441 
1442 		/*
1443 		 * Get CPUID data about processor cores and hyperthreads.
1444 		 */
1445 		switch (cpi->cpi_vendor) {
1446 		case X86_VENDOR_Intel:
1447 			if (cpi->cpi_maxeax >= 4) {
1448 				cp = &cpi->cpi_std[4];
1449 				cp->cp_eax = 4;
1450 				cp->cp_ecx = 0;
1451 				(void) __cpuid_insn(cp);
1452 				platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1453 			}
1454 			/*FALLTHROUGH*/
1455 		case X86_VENDOR_AMD:
1456 			if (cpi->cpi_xmaxeax < 0x80000008)
1457 				break;
1458 			cp = &cpi->cpi_extd[8];
1459 			cp->cp_eax = 0x80000008;
1460 			(void) __cpuid_insn(cp);
1461 			platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1462 
1463 			/*
1464 			 * Virtual and physical address limits from
1465 			 * cpuid override previously guessed values.
1466 			 */
1467 			cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1468 			cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1469 			break;
1470 		default:
1471 			break;
1472 		}
1473 
1474 		/*
1475 		 * Derive the number of cores per chip
1476 		 */
1477 		switch (cpi->cpi_vendor) {
1478 		case X86_VENDOR_Intel:
1479 			if (cpi->cpi_maxeax < 4) {
1480 				cpi->cpi_ncore_per_chip = 1;
1481 				break;
1482 			} else {
1483 				cpi->cpi_ncore_per_chip =
1484 				    BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1485 			}
1486 			break;
1487 		case X86_VENDOR_AMD:
1488 			if (cpi->cpi_xmaxeax < 0x80000008) {
1489 				cpi->cpi_ncore_per_chip = 1;
1490 				break;
1491 			} else {
1492 				/*
1493 				 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1494 				 * 1 less than the number of physical cores on
1495 				 * the chip.  In family 0x10 this value can
1496 				 * be affected by "downcoring" - it reflects
1497 				 * 1 less than the number of cores actually
1498 				 * enabled on this node.
1499 				 */
1500 				cpi->cpi_ncore_per_chip =
1501 				    BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1502 			}
1503 			break;
1504 		default:
1505 			cpi->cpi_ncore_per_chip = 1;
1506 			break;
1507 		}
1508 
1509 		/*
1510 		 * Get CPUID data about TSC Invariance in Deep C-State.
1511 		 */
1512 		switch (cpi->cpi_vendor) {
1513 		case X86_VENDOR_Intel:
1514 			if (cpi->cpi_maxeax >= 7) {
1515 				cp = &cpi->cpi_extd[7];
1516 				cp->cp_eax = 0x80000007;
1517 				cp->cp_ecx = 0;
1518 				(void) __cpuid_insn(cp);
1519 			}
1520 			break;
1521 		default:
1522 			break;
1523 		}
1524 	} else {
1525 		cpi->cpi_ncore_per_chip = 1;
1526 	}
1527 
1528 	/*
1529 	 * If more than one core, then this processor is CMP.
1530 	 */
1531 	if (cpi->cpi_ncore_per_chip > 1) {
1532 		add_x86_feature(featureset, X86FSET_CMP);
1533 	}
1534 
1535 	/*
1536 	 * If the number of cores is the same as the number
1537 	 * of CPUs, then we cannot have HyperThreading.
1538 	 */
1539 	if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1540 		remove_x86_feature(featureset, X86FSET_HTT);
1541 	}
1542 
1543 	cpi->cpi_apicid = CPI_APIC_ID(cpi);
1544 	cpi->cpi_procnodes_per_pkg = 1;
1545 	if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1546 	    is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1547 		/*
1548 		 * Single-core single-threaded processors.
1549 		 */
1550 		cpi->cpi_chipid = -1;
1551 		cpi->cpi_clogid = 0;
1552 		cpi->cpi_coreid = cpu->cpu_id;
1553 		cpi->cpi_pkgcoreid = 0;
1554 		if (cpi->cpi_vendor == X86_VENDOR_AMD)
1555 			cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1556 		else
1557 			cpi->cpi_procnodeid = cpi->cpi_chipid;
1558 	} else if (cpi->cpi_ncpu_per_chip > 1) {
1559 		if (cpi->cpi_vendor == X86_VENDOR_Intel)
1560 			cpuid_intel_getids(cpu, featureset);
1561 		else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1562 			cpuid_amd_getids(cpu);
1563 		else {
1564 			/*
1565 			 * All other processors are currently
1566 			 * assumed to have single cores.
1567 			 */
1568 			cpi->cpi_coreid = cpi->cpi_chipid;
1569 			cpi->cpi_pkgcoreid = 0;
1570 			cpi->cpi_procnodeid = cpi->cpi_chipid;
1571 		}
1572 	}
1573 
1574 	/*
1575 	 * Synthesize chip "revision" and socket type
1576 	 */
1577 	cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1578 	    cpi->cpi_model, cpi->cpi_step);
1579 	cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1580 	    cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1581 	cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1582 	    cpi->cpi_model, cpi->cpi_step);
1583 
1584 pass1_done:
1585 	cpi->cpi_pass = 1;
1586 }
1587 
1588 /*
1589  * Make copies of the cpuid table entries we depend on, in
1590  * part for ease of parsing now, in part so that we have only
1591  * one place to correct any of it, in part for ease of
1592  * later export to userland, and in part so we can look at
1593  * this stuff in a crash dump.
1594  */
1595 
1596 /*ARGSUSED*/
1597 void
1598 cpuid_pass2(cpu_t *cpu)
1599 {
1600 	uint_t n, nmax;
1601 	int i;
1602 	struct cpuid_regs *cp;
1603 	uint8_t *dp;
1604 	uint32_t *iptr;
1605 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1606 
1607 	ASSERT(cpi->cpi_pass == 1);
1608 
1609 	if (cpi->cpi_maxeax < 1)
1610 		goto pass2_done;
1611 
1612 	if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1613 		nmax = NMAX_CPI_STD;
1614 	/*
1615 	 * (We already handled n == 0 and n == 1 in pass 1)
1616 	 */
1617 	for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1618 		cp->cp_eax = n;
1619 
1620 		/*
1621 		 * CPUID function 4 expects %ecx to be initialized
1622 		 * with an index which indicates which cache to return
1623 		 * information about. The OS is expected to call function 4
1624 		 * with %ecx set to 0, 1, 2, ... until it returns with
1625 		 * EAX[4:0] set to 0, which indicates there are no more
1626 		 * caches.
1627 		 *
1628 		 * Here, populate cpi_std[4] with the information returned by
1629 		 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1630 		 * when dynamic memory allocation becomes available.
1631 		 *
1632 		 * Note: we need to explicitly initialize %ecx here, since
1633 		 * function 4 may have been previously invoked.
1634 		 */
1635 		if (n == 4)
1636 			cp->cp_ecx = 0;
1637 
1638 		(void) __cpuid_insn(cp);
1639 		platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1640 		switch (n) {
1641 		case 2:
1642 			/*
1643 			 * "the lower 8 bits of the %eax register
1644 			 * contain a value that identifies the number
1645 			 * of times the cpuid [instruction] has to be
1646 			 * executed to obtain a complete image of the
1647 			 * processor's caching systems."
1648 			 *
1649 			 * How *do* they make this stuff up?
1650 			 */
1651 			cpi->cpi_ncache = sizeof (*cp) *
1652 			    BITX(cp->cp_eax, 7, 0);
1653 			if (cpi->cpi_ncache == 0)
1654 				break;
1655 			cpi->cpi_ncache--;	/* skip count byte */
1656 
1657 			/*
1658 			 * Well, for now, rather than attempt to implement
1659 			 * this slightly dubious algorithm, we just look
1660 			 * at the first 15 ..
1661 			 */
1662 			if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1663 				cpi->cpi_ncache = sizeof (*cp) - 1;
1664 
1665 			dp = cpi->cpi_cacheinfo;
1666 			if (BITX(cp->cp_eax, 31, 31) == 0) {
1667 				uint8_t *p = (void *)&cp->cp_eax;
1668 				for (i = 1; i < 4; i++)
1669 					if (p[i] != 0)
1670 						*dp++ = p[i];
1671 			}
1672 			if (BITX(cp->cp_ebx, 31, 31) == 0) {
1673 				uint8_t *p = (void *)&cp->cp_ebx;
1674 				for (i = 0; i < 4; i++)
1675 					if (p[i] != 0)
1676 						*dp++ = p[i];
1677 			}
1678 			if (BITX(cp->cp_ecx, 31, 31) == 0) {
1679 				uint8_t *p = (void *)&cp->cp_ecx;
1680 				for (i = 0; i < 4; i++)
1681 					if (p[i] != 0)
1682 						*dp++ = p[i];
1683 			}
1684 			if (BITX(cp->cp_edx, 31, 31) == 0) {
1685 				uint8_t *p = (void *)&cp->cp_edx;
1686 				for (i = 0; i < 4; i++)
1687 					if (p[i] != 0)
1688 						*dp++ = p[i];
1689 			}
1690 			break;
1691 
1692 		case 3:	/* Processor serial number, if PSN supported */
1693 			break;
1694 
1695 		case 4:	/* Deterministic cache parameters */
1696 			break;
1697 
1698 		case 5:	/* Monitor/Mwait parameters */
1699 		{
1700 			size_t mwait_size;
1701 
1702 			/*
1703 			 * check cpi_mwait.support which was set in cpuid_pass1
1704 			 */
1705 			if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1706 				break;
1707 
1708 			/*
1709 			 * Protect ourself from insane mwait line size.
1710 			 * Workaround for incomplete hardware emulator(s).
1711 			 */
1712 			mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1713 			if (mwait_size < sizeof (uint32_t) ||
1714 			    !ISP2(mwait_size)) {
1715 #if DEBUG
1716 				cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1717 				    "size %ld", cpu->cpu_id, (long)mwait_size);
1718 #endif
1719 				break;
1720 			}
1721 
1722 			cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1723 			cpi->cpi_mwait.mon_max = mwait_size;
1724 			if (MWAIT_EXTENSION(cpi)) {
1725 				cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1726 				if (MWAIT_INT_ENABLE(cpi))
1727 					cpi->cpi_mwait.support |=
1728 					    MWAIT_ECX_INT_ENABLE;
1729 			}
1730 			break;
1731 		}
1732 		default:
1733 			break;
1734 		}
1735 	}
1736 
1737 	if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1738 		struct cpuid_regs regs;
1739 
1740 		cp = &regs;
1741 		cp->cp_eax = 0xB;
1742 		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1743 
1744 		(void) __cpuid_insn(cp);
1745 
1746 		/*
1747 		 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1748 		 * indicates that the extended topology enumeration leaf is
1749 		 * available.
1750 		 */
1751 		if (cp->cp_ebx) {
1752 			uint32_t x2apic_id;
1753 			uint_t coreid_shift = 0;
1754 			uint_t ncpu_per_core = 1;
1755 			uint_t chipid_shift = 0;
1756 			uint_t ncpu_per_chip = 1;
1757 			uint_t i;
1758 			uint_t level;
1759 
1760 			for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1761 				cp->cp_eax = 0xB;
1762 				cp->cp_ecx = i;
1763 
1764 				(void) __cpuid_insn(cp);
1765 				level = CPI_CPU_LEVEL_TYPE(cp);
1766 
1767 				if (level == 1) {
1768 					x2apic_id = cp->cp_edx;
1769 					coreid_shift = BITX(cp->cp_eax, 4, 0);
1770 					ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1771 				} else if (level == 2) {
1772 					x2apic_id = cp->cp_edx;
1773 					chipid_shift = BITX(cp->cp_eax, 4, 0);
1774 					ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1775 				}
1776 			}
1777 
1778 			cpi->cpi_apicid = x2apic_id;
1779 			cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1780 			cpi->cpi_ncore_per_chip = ncpu_per_chip /
1781 			    ncpu_per_core;
1782 			cpi->cpi_chipid = x2apic_id >> chipid_shift;
1783 			cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1784 			cpi->cpi_coreid = x2apic_id >> coreid_shift;
1785 			cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1786 		}
1787 
1788 		/* Make cp NULL so that we don't stumble on others */
1789 		cp = NULL;
1790 	}
1791 
1792 	/*
1793 	 * XSAVE enumeration
1794 	 */
1795 	if (cpi->cpi_maxeax >= 0xD && cpi->cpi_vendor == X86_VENDOR_Intel) {
1796 		struct cpuid_regs regs;
1797 		boolean_t cpuid_d_valid = B_TRUE;
1798 
1799 		cp = &regs;
1800 		cp->cp_eax = 0xD;
1801 		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1802 
1803 		(void) __cpuid_insn(cp);
1804 
1805 		/*
1806 		 * Sanity checks for debug
1807 		 */
1808 		if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
1809 		    (cp->cp_eax & XFEATURE_SSE) == 0) {
1810 			cpuid_d_valid = B_FALSE;
1811 		}
1812 
1813 		cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
1814 		cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
1815 		cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
1816 
1817 		/*
1818 		 * If the hw supports AVX, get the size and offset in the save
1819 		 * area for the ymm state.
1820 		 */
1821 		if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
1822 			cp->cp_eax = 0xD;
1823 			cp->cp_ecx = 2;
1824 			cp->cp_edx = cp->cp_ebx = 0;
1825 
1826 			(void) __cpuid_insn(cp);
1827 
1828 			if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
1829 			    cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
1830 				cpuid_d_valid = B_FALSE;
1831 			}
1832 
1833 			cpi->cpi_xsave.ymm_size = cp->cp_eax;
1834 			cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
1835 		}
1836 
1837 		if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
1838 			xsave_state_size = 0;
1839 		} else if (cpuid_d_valid) {
1840 			xsave_state_size = cpi->cpi_xsave.xsav_max_size;
1841 		} else {
1842 			/* Broken CPUID 0xD, probably in HVM */
1843 			cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
1844 			    "value: hw_low = %d, hw_high = %d, xsave_size = %d"
1845 			    ", ymm_size = %d, ymm_offset = %d\n",
1846 			    cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
1847 			    cpi->cpi_xsave.xsav_hw_features_high,
1848 			    (int)cpi->cpi_xsave.xsav_max_size,
1849 			    (int)cpi->cpi_xsave.ymm_size,
1850 			    (int)cpi->cpi_xsave.ymm_offset);
1851 
1852 			if (xsave_state_size != 0) {
1853 				/*
1854 				 * This must be a non-boot CPU. We cannot
1855 				 * continue, because boot cpu has already
1856 				 * enabled XSAVE.
1857 				 */
1858 				ASSERT(cpu->cpu_id != 0);
1859 				cmn_err(CE_PANIC, "cpu%d: we have already "
1860 				    "enabled XSAVE on boot cpu, cannot "
1861 				    "continue.", cpu->cpu_id);
1862 			} else {
1863 				/*
1864 				 * Must be from boot CPU, OK to disable XSAVE.
1865 				 */
1866 				ASSERT(cpu->cpu_id == 0);
1867 				remove_x86_feature(x86_featureset,
1868 				    X86FSET_XSAVE);
1869 				remove_x86_feature(x86_featureset, X86FSET_AVX);
1870 				CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
1871 				CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
1872 				xsave_force_disable = B_TRUE;
1873 			}
1874 		}
1875 	}
1876 
1877 
1878 	if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
1879 		goto pass2_done;
1880 
1881 	if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
1882 		nmax = NMAX_CPI_EXTD;
1883 	/*
1884 	 * Copy the extended properties, fixing them as we go.
1885 	 * (We already handled n == 0 and n == 1 in pass 1)
1886 	 */
1887 	iptr = (void *)cpi->cpi_brandstr;
1888 	for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
1889 		cp->cp_eax = 0x80000000 + n;
1890 		(void) __cpuid_insn(cp);
1891 		platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
1892 		switch (n) {
1893 		case 2:
1894 		case 3:
1895 		case 4:
1896 			/*
1897 			 * Extract the brand string
1898 			 */
1899 			*iptr++ = cp->cp_eax;
1900 			*iptr++ = cp->cp_ebx;
1901 			*iptr++ = cp->cp_ecx;
1902 			*iptr++ = cp->cp_edx;
1903 			break;
1904 		case 5:
1905 			switch (cpi->cpi_vendor) {
1906 			case X86_VENDOR_AMD:
1907 				/*
1908 				 * The Athlon and Duron were the first
1909 				 * parts to report the sizes of the
1910 				 * TLB for large pages. Before then,
1911 				 * we don't trust the data.
1912 				 */
1913 				if (cpi->cpi_family < 6 ||
1914 				    (cpi->cpi_family == 6 &&
1915 				    cpi->cpi_model < 1))
1916 					cp->cp_eax = 0;
1917 				break;
1918 			default:
1919 				break;
1920 			}
1921 			break;
1922 		case 6:
1923 			switch (cpi->cpi_vendor) {
1924 			case X86_VENDOR_AMD:
1925 				/*
1926 				 * The Athlon and Duron were the first
1927 				 * AMD parts with L2 TLB's.
1928 				 * Before then, don't trust the data.
1929 				 */
1930 				if (cpi->cpi_family < 6 ||
1931 				    cpi->cpi_family == 6 &&
1932 				    cpi->cpi_model < 1)
1933 					cp->cp_eax = cp->cp_ebx = 0;
1934 				/*
1935 				 * AMD Duron rev A0 reports L2
1936 				 * cache size incorrectly as 1K
1937 				 * when it is really 64K
1938 				 */
1939 				if (cpi->cpi_family == 6 &&
1940 				    cpi->cpi_model == 3 &&
1941 				    cpi->cpi_step == 0) {
1942 					cp->cp_ecx &= 0xffff;
1943 					cp->cp_ecx |= 0x400000;
1944 				}
1945 				break;
1946 			case X86_VENDOR_Cyrix:	/* VIA C3 */
1947 				/*
1948 				 * VIA C3 processors are a bit messed
1949 				 * up w.r.t. encoding cache sizes in %ecx
1950 				 */
1951 				if (cpi->cpi_family != 6)
1952 					break;
1953 				/*
1954 				 * model 7 and 8 were incorrectly encoded
1955 				 *
1956 				 * xxx is model 8 really broken?
1957 				 */
1958 				if (cpi->cpi_model == 7 ||
1959 				    cpi->cpi_model == 8)
1960 					cp->cp_ecx =
1961 					    BITX(cp->cp_ecx, 31, 24) << 16 |
1962 					    BITX(cp->cp_ecx, 23, 16) << 12 |
1963 					    BITX(cp->cp_ecx, 15, 8) << 8 |
1964 					    BITX(cp->cp_ecx, 7, 0);
1965 				/*
1966 				 * model 9 stepping 1 has wrong associativity
1967 				 */
1968 				if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
1969 					cp->cp_ecx |= 8 << 12;
1970 				break;
1971 			case X86_VENDOR_Intel:
1972 				/*
1973 				 * Extended L2 Cache features function.
1974 				 * First appeared on Prescott.
1975 				 */
1976 			default:
1977 				break;
1978 			}
1979 			break;
1980 		default:
1981 			break;
1982 		}
1983 	}
1984 
1985 pass2_done:
1986 	cpi->cpi_pass = 2;
1987 }
1988 
1989 static const char *
1990 intel_cpubrand(const struct cpuid_info *cpi)
1991 {
1992 	int i;
1993 
1994 	if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
1995 	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
1996 		return ("i486");
1997 
1998 	switch (cpi->cpi_family) {
1999 	case 5:
2000 		return ("Intel Pentium(r)");
2001 	case 6:
2002 		switch (cpi->cpi_model) {
2003 			uint_t celeron, xeon;
2004 			const struct cpuid_regs *cp;
2005 		case 0:
2006 		case 1:
2007 		case 2:
2008 			return ("Intel Pentium(r) Pro");
2009 		case 3:
2010 		case 4:
2011 			return ("Intel Pentium(r) II");
2012 		case 6:
2013 			return ("Intel Celeron(r)");
2014 		case 5:
2015 		case 7:
2016 			celeron = xeon = 0;
2017 			cp = &cpi->cpi_std[2];	/* cache info */
2018 
2019 			for (i = 1; i < 4; i++) {
2020 				uint_t tmp;
2021 
2022 				tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2023 				if (tmp == 0x40)
2024 					celeron++;
2025 				if (tmp >= 0x44 && tmp <= 0x45)
2026 					xeon++;
2027 			}
2028 
2029 			for (i = 0; i < 2; i++) {
2030 				uint_t tmp;
2031 
2032 				tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2033 				if (tmp == 0x40)
2034 					celeron++;
2035 				else if (tmp >= 0x44 && tmp <= 0x45)
2036 					xeon++;
2037 			}
2038 
2039 			for (i = 0; i < 4; i++) {
2040 				uint_t tmp;
2041 
2042 				tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2043 				if (tmp == 0x40)
2044 					celeron++;
2045 				else if (tmp >= 0x44 && tmp <= 0x45)
2046 					xeon++;
2047 			}
2048 
2049 			for (i = 0; i < 4; i++) {
2050 				uint_t tmp;
2051 
2052 				tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2053 				if (tmp == 0x40)
2054 					celeron++;
2055 				else if (tmp >= 0x44 && tmp <= 0x45)
2056 					xeon++;
2057 			}
2058 
2059 			if (celeron)
2060 				return ("Intel Celeron(r)");
2061 			if (xeon)
2062 				return (cpi->cpi_model == 5 ?
2063 				    "Intel Pentium(r) II Xeon(tm)" :
2064 				    "Intel Pentium(r) III Xeon(tm)");
2065 			return (cpi->cpi_model == 5 ?
2066 			    "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2067 			    "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2068 		default:
2069 			break;
2070 		}
2071 	default:
2072 		break;
2073 	}
2074 
2075 	/* BrandID is present if the field is nonzero */
2076 	if (cpi->cpi_brandid != 0) {
2077 		static const struct {
2078 			uint_t bt_bid;
2079 			const char *bt_str;
2080 		} brand_tbl[] = {
2081 			{ 0x1,	"Intel(r) Celeron(r)" },
2082 			{ 0x2,	"Intel(r) Pentium(r) III" },
2083 			{ 0x3,	"Intel(r) Pentium(r) III Xeon(tm)" },
2084 			{ 0x4,	"Intel(r) Pentium(r) III" },
2085 			{ 0x6,	"Mobile Intel(r) Pentium(r) III" },
2086 			{ 0x7,	"Mobile Intel(r) Celeron(r)" },
2087 			{ 0x8,	"Intel(r) Pentium(r) 4" },
2088 			{ 0x9,	"Intel(r) Pentium(r) 4" },
2089 			{ 0xa,	"Intel(r) Celeron(r)" },
2090 			{ 0xb,	"Intel(r) Xeon(tm)" },
2091 			{ 0xc,	"Intel(r) Xeon(tm) MP" },
2092 			{ 0xe,	"Mobile Intel(r) Pentium(r) 4" },
2093 			{ 0xf,	"Mobile Intel(r) Celeron(r)" },
2094 			{ 0x11, "Mobile Genuine Intel(r)" },
2095 			{ 0x12, "Intel(r) Celeron(r) M" },
2096 			{ 0x13, "Mobile Intel(r) Celeron(r)" },
2097 			{ 0x14, "Intel(r) Celeron(r)" },
2098 			{ 0x15, "Mobile Genuine Intel(r)" },
2099 			{ 0x16,	"Intel(r) Pentium(r) M" },
2100 			{ 0x17, "Mobile Intel(r) Celeron(r)" }
2101 		};
2102 		uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2103 		uint_t sgn;
2104 
2105 		sgn = (cpi->cpi_family << 8) |
2106 		    (cpi->cpi_model << 4) | cpi->cpi_step;
2107 
2108 		for (i = 0; i < btblmax; i++)
2109 			if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2110 				break;
2111 		if (i < btblmax) {
2112 			if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2113 				return ("Intel(r) Celeron(r)");
2114 			if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2115 				return ("Intel(r) Xeon(tm) MP");
2116 			if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2117 				return ("Intel(r) Xeon(tm)");
2118 			return (brand_tbl[i].bt_str);
2119 		}
2120 	}
2121 
2122 	return (NULL);
2123 }
2124 
2125 static const char *
2126 amd_cpubrand(const struct cpuid_info *cpi)
2127 {
2128 	if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2129 	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2130 		return ("i486 compatible");
2131 
2132 	switch (cpi->cpi_family) {
2133 	case 5:
2134 		switch (cpi->cpi_model) {
2135 		case 0:
2136 		case 1:
2137 		case 2:
2138 		case 3:
2139 		case 4:
2140 		case 5:
2141 			return ("AMD-K5(r)");
2142 		case 6:
2143 		case 7:
2144 			return ("AMD-K6(r)");
2145 		case 8:
2146 			return ("AMD-K6(r)-2");
2147 		case 9:
2148 			return ("AMD-K6(r)-III");
2149 		default:
2150 			return ("AMD (family 5)");
2151 		}
2152 	case 6:
2153 		switch (cpi->cpi_model) {
2154 		case 1:
2155 			return ("AMD-K7(tm)");
2156 		case 0:
2157 		case 2:
2158 		case 4:
2159 			return ("AMD Athlon(tm)");
2160 		case 3:
2161 		case 7:
2162 			return ("AMD Duron(tm)");
2163 		case 6:
2164 		case 8:
2165 		case 10:
2166 			/*
2167 			 * Use the L2 cache size to distinguish
2168 			 */
2169 			return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2170 			    "AMD Athlon(tm)" : "AMD Duron(tm)");
2171 		default:
2172 			return ("AMD (family 6)");
2173 		}
2174 	default:
2175 		break;
2176 	}
2177 
2178 	if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2179 	    cpi->cpi_brandid != 0) {
2180 		switch (BITX(cpi->cpi_brandid, 7, 5)) {
2181 		case 3:
2182 			return ("AMD Opteron(tm) UP 1xx");
2183 		case 4:
2184 			return ("AMD Opteron(tm) DP 2xx");
2185 		case 5:
2186 			return ("AMD Opteron(tm) MP 8xx");
2187 		default:
2188 			return ("AMD Opteron(tm)");
2189 		}
2190 	}
2191 
2192 	return (NULL);
2193 }
2194 
2195 static const char *
2196 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2197 {
2198 	if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2199 	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2200 	    type == X86_TYPE_CYRIX_486)
2201 		return ("i486 compatible");
2202 
2203 	switch (type) {
2204 	case X86_TYPE_CYRIX_6x86:
2205 		return ("Cyrix 6x86");
2206 	case X86_TYPE_CYRIX_6x86L:
2207 		return ("Cyrix 6x86L");
2208 	case X86_TYPE_CYRIX_6x86MX:
2209 		return ("Cyrix 6x86MX");
2210 	case X86_TYPE_CYRIX_GXm:
2211 		return ("Cyrix GXm");
2212 	case X86_TYPE_CYRIX_MediaGX:
2213 		return ("Cyrix MediaGX");
2214 	case X86_TYPE_CYRIX_MII:
2215 		return ("Cyrix M2");
2216 	case X86_TYPE_VIA_CYRIX_III:
2217 		return ("VIA Cyrix M3");
2218 	default:
2219 		/*
2220 		 * Have another wild guess ..
2221 		 */
2222 		if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2223 			return ("Cyrix 5x86");
2224 		else if (cpi->cpi_family == 5) {
2225 			switch (cpi->cpi_model) {
2226 			case 2:
2227 				return ("Cyrix 6x86");	/* Cyrix M1 */
2228 			case 4:
2229 				return ("Cyrix MediaGX");
2230 			default:
2231 				break;
2232 			}
2233 		} else if (cpi->cpi_family == 6) {
2234 			switch (cpi->cpi_model) {
2235 			case 0:
2236 				return ("Cyrix 6x86MX"); /* Cyrix M2? */
2237 			case 5:
2238 			case 6:
2239 			case 7:
2240 			case 8:
2241 			case 9:
2242 				return ("VIA C3");
2243 			default:
2244 				break;
2245 			}
2246 		}
2247 		break;
2248 	}
2249 	return (NULL);
2250 }
2251 
2252 /*
2253  * This only gets called in the case that the CPU extended
2254  * feature brand string (0x80000002, 0x80000003, 0x80000004)
2255  * aren't available, or contain null bytes for some reason.
2256  */
2257 static void
2258 fabricate_brandstr(struct cpuid_info *cpi)
2259 {
2260 	const char *brand = NULL;
2261 
2262 	switch (cpi->cpi_vendor) {
2263 	case X86_VENDOR_Intel:
2264 		brand = intel_cpubrand(cpi);
2265 		break;
2266 	case X86_VENDOR_AMD:
2267 		brand = amd_cpubrand(cpi);
2268 		break;
2269 	case X86_VENDOR_Cyrix:
2270 		brand = cyrix_cpubrand(cpi, x86_type);
2271 		break;
2272 	case X86_VENDOR_NexGen:
2273 		if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2274 			brand = "NexGen Nx586";
2275 		break;
2276 	case X86_VENDOR_Centaur:
2277 		if (cpi->cpi_family == 5)
2278 			switch (cpi->cpi_model) {
2279 			case 4:
2280 				brand = "Centaur C6";
2281 				break;
2282 			case 8:
2283 				brand = "Centaur C2";
2284 				break;
2285 			case 9:
2286 				brand = "Centaur C3";
2287 				break;
2288 			default:
2289 				break;
2290 			}
2291 		break;
2292 	case X86_VENDOR_Rise:
2293 		if (cpi->cpi_family == 5 &&
2294 		    (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2295 			brand = "Rise mP6";
2296 		break;
2297 	case X86_VENDOR_SiS:
2298 		if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2299 			brand = "SiS 55x";
2300 		break;
2301 	case X86_VENDOR_TM:
2302 		if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2303 			brand = "Transmeta Crusoe TM3x00 or TM5x00";
2304 		break;
2305 	case X86_VENDOR_NSC:
2306 	case X86_VENDOR_UMC:
2307 	default:
2308 		break;
2309 	}
2310 	if (brand) {
2311 		(void) strcpy((char *)cpi->cpi_brandstr, brand);
2312 		return;
2313 	}
2314 
2315 	/*
2316 	 * If all else fails ...
2317 	 */
2318 	(void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2319 	    "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2320 	    cpi->cpi_model, cpi->cpi_step);
2321 }
2322 
2323 /*
2324  * This routine is called just after kernel memory allocation
2325  * becomes available on cpu0, and as part of mp_startup() on
2326  * the other cpus.
2327  *
2328  * Fixup the brand string, and collect any information from cpuid
2329  * that requires dynamicically allocated storage to represent.
2330  */
2331 /*ARGSUSED*/
2332 void
2333 cpuid_pass3(cpu_t *cpu)
2334 {
2335 	int	i, max, shft, level, size;
2336 	struct cpuid_regs regs;
2337 	struct cpuid_regs *cp;
2338 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2339 
2340 	ASSERT(cpi->cpi_pass == 2);
2341 
2342 	/*
2343 	 * Function 4: Deterministic cache parameters
2344 	 *
2345 	 * Take this opportunity to detect the number of threads
2346 	 * sharing the last level cache, and construct a corresponding
2347 	 * cache id. The respective cpuid_info members are initialized
2348 	 * to the default case of "no last level cache sharing".
2349 	 */
2350 	cpi->cpi_ncpu_shr_last_cache = 1;
2351 	cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2352 
2353 	if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2354 
2355 		/*
2356 		 * Find the # of elements (size) returned by fn 4, and along
2357 		 * the way detect last level cache sharing details.
2358 		 */
2359 		bzero(&regs, sizeof (regs));
2360 		cp = &regs;
2361 		for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2362 			cp->cp_eax = 4;
2363 			cp->cp_ecx = i;
2364 
2365 			(void) __cpuid_insn(cp);
2366 
2367 			if (CPI_CACHE_TYPE(cp) == 0)
2368 				break;
2369 			level = CPI_CACHE_LVL(cp);
2370 			if (level > max) {
2371 				max = level;
2372 				cpi->cpi_ncpu_shr_last_cache =
2373 				    CPI_NTHR_SHR_CACHE(cp) + 1;
2374 			}
2375 		}
2376 		cpi->cpi_std_4_size = size = i;
2377 
2378 		/*
2379 		 * Allocate the cpi_std_4 array. The first element
2380 		 * references the regs for fn 4, %ecx == 0, which
2381 		 * cpuid_pass2() stashed in cpi->cpi_std[4].
2382 		 */
2383 		if (size > 0) {
2384 			cpi->cpi_std_4 =
2385 			    kmem_alloc(size * sizeof (cp), KM_SLEEP);
2386 			cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2387 
2388 			/*
2389 			 * Allocate storage to hold the additional regs
2390 			 * for function 4, %ecx == 1 .. cpi_std_4_size.
2391 			 *
2392 			 * The regs for fn 4, %ecx == 0 has already
2393 			 * been allocated as indicated above.
2394 			 */
2395 			for (i = 1; i < size; i++) {
2396 				cp = cpi->cpi_std_4[i] =
2397 				    kmem_zalloc(sizeof (regs), KM_SLEEP);
2398 				cp->cp_eax = 4;
2399 				cp->cp_ecx = i;
2400 
2401 				(void) __cpuid_insn(cp);
2402 			}
2403 		}
2404 		/*
2405 		 * Determine the number of bits needed to represent
2406 		 * the number of CPUs sharing the last level cache.
2407 		 *
2408 		 * Shift off that number of bits from the APIC id to
2409 		 * derive the cache id.
2410 		 */
2411 		shft = 0;
2412 		for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2413 			shft++;
2414 		cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2415 	}
2416 
2417 	/*
2418 	 * Now fixup the brand string
2419 	 */
2420 	if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2421 		fabricate_brandstr(cpi);
2422 	} else {
2423 
2424 		/*
2425 		 * If we successfully extracted a brand string from the cpuid
2426 		 * instruction, clean it up by removing leading spaces and
2427 		 * similar junk.
2428 		 */
2429 		if (cpi->cpi_brandstr[0]) {
2430 			size_t maxlen = sizeof (cpi->cpi_brandstr);
2431 			char *src, *dst;
2432 
2433 			dst = src = (char *)cpi->cpi_brandstr;
2434 			src[maxlen - 1] = '\0';
2435 			/*
2436 			 * strip leading spaces
2437 			 */
2438 			while (*src == ' ')
2439 				src++;
2440 			/*
2441 			 * Remove any 'Genuine' or "Authentic" prefixes
2442 			 */
2443 			if (strncmp(src, "Genuine ", 8) == 0)
2444 				src += 8;
2445 			if (strncmp(src, "Authentic ", 10) == 0)
2446 				src += 10;
2447 
2448 			/*
2449 			 * Now do an in-place copy.
2450 			 * Map (R) to (r) and (TM) to (tm).
2451 			 * The era of teletypes is long gone, and there's
2452 			 * -really- no need to shout.
2453 			 */
2454 			while (*src != '\0') {
2455 				if (src[0] == '(') {
2456 					if (strncmp(src + 1, "R)", 2) == 0) {
2457 						(void) strncpy(dst, "(r)", 3);
2458 						src += 3;
2459 						dst += 3;
2460 						continue;
2461 					}
2462 					if (strncmp(src + 1, "TM)", 3) == 0) {
2463 						(void) strncpy(dst, "(tm)", 4);
2464 						src += 4;
2465 						dst += 4;
2466 						continue;
2467 					}
2468 				}
2469 				*dst++ = *src++;
2470 			}
2471 			*dst = '\0';
2472 
2473 			/*
2474 			 * Finally, remove any trailing spaces
2475 			 */
2476 			while (--dst > cpi->cpi_brandstr)
2477 				if (*dst == ' ')
2478 					*dst = '\0';
2479 				else
2480 					break;
2481 		} else
2482 			fabricate_brandstr(cpi);
2483 	}
2484 	cpi->cpi_pass = 3;
2485 }
2486 
2487 /*
2488  * This routine is called out of bind_hwcap() much later in the life
2489  * of the kernel (post_startup()).  The job of this routine is to resolve
2490  * the hardware feature support and kernel support for those features into
2491  * what we're actually going to tell applications via the aux vector.
2492  */
2493 uint_t
2494 cpuid_pass4(cpu_t *cpu)
2495 {
2496 	struct cpuid_info *cpi;
2497 	uint_t hwcap_flags = 0;
2498 
2499 	if (cpu == NULL)
2500 		cpu = CPU;
2501 	cpi = cpu->cpu_m.mcpu_cpi;
2502 
2503 	ASSERT(cpi->cpi_pass == 3);
2504 
2505 	if (cpi->cpi_maxeax >= 1) {
2506 		uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2507 		uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2508 
2509 		*edx = CPI_FEATURES_EDX(cpi);
2510 		*ecx = CPI_FEATURES_ECX(cpi);
2511 
2512 		/*
2513 		 * [these require explicit kernel support]
2514 		 */
2515 		if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2516 			*edx &= ~CPUID_INTC_EDX_SEP;
2517 
2518 		if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2519 			*edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2520 		if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2521 			*edx &= ~CPUID_INTC_EDX_SSE2;
2522 
2523 		if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2524 			*edx &= ~CPUID_INTC_EDX_HTT;
2525 
2526 		if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2527 			*ecx &= ~CPUID_INTC_ECX_SSE3;
2528 
2529 		if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2530 			if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2531 				*ecx &= ~CPUID_INTC_ECX_SSSE3;
2532 			if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2533 				*ecx &= ~CPUID_INTC_ECX_SSE4_1;
2534 			if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2535 				*ecx &= ~CPUID_INTC_ECX_SSE4_2;
2536 			if (!is_x86_feature(x86_featureset, X86FSET_AES))
2537 				*ecx &= ~CPUID_INTC_ECX_AES;
2538 			if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2539 				*ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2540 			if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2541 				*ecx &= ~(CPUID_INTC_ECX_XSAVE |
2542 				    CPUID_INTC_ECX_OSXSAVE);
2543 			if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2544 				*ecx &= ~CPUID_INTC_ECX_AVX;
2545 		}
2546 
2547 		/*
2548 		 * [no explicit support required beyond x87 fp context]
2549 		 */
2550 		if (!fpu_exists)
2551 			*edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2552 
2553 		/*
2554 		 * Now map the supported feature vector to things that we
2555 		 * think userland will care about.
2556 		 */
2557 		if (*edx & CPUID_INTC_EDX_SEP)
2558 			hwcap_flags |= AV_386_SEP;
2559 		if (*edx & CPUID_INTC_EDX_SSE)
2560 			hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2561 		if (*edx & CPUID_INTC_EDX_SSE2)
2562 			hwcap_flags |= AV_386_SSE2;
2563 		if (*ecx & CPUID_INTC_ECX_SSE3)
2564 			hwcap_flags |= AV_386_SSE3;
2565 		if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2566 			if (*ecx & CPUID_INTC_ECX_SSSE3)
2567 				hwcap_flags |= AV_386_SSSE3;
2568 			if (*ecx & CPUID_INTC_ECX_SSE4_1)
2569 				hwcap_flags |= AV_386_SSE4_1;
2570 			if (*ecx & CPUID_INTC_ECX_SSE4_2)
2571 				hwcap_flags |= AV_386_SSE4_2;
2572 			if (*ecx & CPUID_INTC_ECX_MOVBE)
2573 				hwcap_flags |= AV_386_MOVBE;
2574 			if (*ecx & CPUID_INTC_ECX_AES)
2575 				hwcap_flags |= AV_386_AES;
2576 			if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2577 				hwcap_flags |= AV_386_PCLMULQDQ;
2578 			if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2579 			    (*ecx & CPUID_INTC_ECX_OSXSAVE))
2580 				hwcap_flags |= AV_386_XSAVE;
2581 		}
2582 		if (*ecx & CPUID_INTC_ECX_VMX)
2583 			hwcap_flags |= AV_386_VMX;
2584 		if (*ecx & CPUID_INTC_ECX_POPCNT)
2585 			hwcap_flags |= AV_386_POPCNT;
2586 		if (*edx & CPUID_INTC_EDX_FPU)
2587 			hwcap_flags |= AV_386_FPU;
2588 		if (*edx & CPUID_INTC_EDX_MMX)
2589 			hwcap_flags |= AV_386_MMX;
2590 
2591 		if (*edx & CPUID_INTC_EDX_TSC)
2592 			hwcap_flags |= AV_386_TSC;
2593 		if (*edx & CPUID_INTC_EDX_CX8)
2594 			hwcap_flags |= AV_386_CX8;
2595 		if (*edx & CPUID_INTC_EDX_CMOV)
2596 			hwcap_flags |= AV_386_CMOV;
2597 		if (*ecx & CPUID_INTC_ECX_CX16)
2598 			hwcap_flags |= AV_386_CX16;
2599 	}
2600 
2601 	if (cpi->cpi_xmaxeax < 0x80000001)
2602 		goto pass4_done;
2603 
2604 	switch (cpi->cpi_vendor) {
2605 		struct cpuid_regs cp;
2606 		uint32_t *edx, *ecx;
2607 
2608 	case X86_VENDOR_Intel:
2609 		/*
2610 		 * Seems like Intel duplicated what we necessary
2611 		 * here to make the initial crop of 64-bit OS's work.
2612 		 * Hopefully, those are the only "extended" bits
2613 		 * they'll add.
2614 		 */
2615 		/*FALLTHROUGH*/
2616 
2617 	case X86_VENDOR_AMD:
2618 		edx = &cpi->cpi_support[AMD_EDX_FEATURES];
2619 		ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
2620 
2621 		*edx = CPI_FEATURES_XTD_EDX(cpi);
2622 		*ecx = CPI_FEATURES_XTD_ECX(cpi);
2623 
2624 		/*
2625 		 * [these features require explicit kernel support]
2626 		 */
2627 		switch (cpi->cpi_vendor) {
2628 		case X86_VENDOR_Intel:
2629 			if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2630 				*edx &= ~CPUID_AMD_EDX_TSCP;
2631 			break;
2632 
2633 		case X86_VENDOR_AMD:
2634 			if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2635 				*edx &= ~CPUID_AMD_EDX_TSCP;
2636 			if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
2637 				*ecx &= ~CPUID_AMD_ECX_SSE4A;
2638 			break;
2639 
2640 		default:
2641 			break;
2642 		}
2643 
2644 		/*
2645 		 * [no explicit support required beyond
2646 		 * x87 fp context and exception handlers]
2647 		 */
2648 		if (!fpu_exists)
2649 			*edx &= ~(CPUID_AMD_EDX_MMXamd |
2650 			    CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
2651 
2652 		if (!is_x86_feature(x86_featureset, X86FSET_NX))
2653 			*edx &= ~CPUID_AMD_EDX_NX;
2654 #if !defined(__amd64)
2655 		*edx &= ~CPUID_AMD_EDX_LM;
2656 #endif
2657 		/*
2658 		 * Now map the supported feature vector to
2659 		 * things that we think userland will care about.
2660 		 */
2661 #if defined(__amd64)
2662 		if (*edx & CPUID_AMD_EDX_SYSC)
2663 			hwcap_flags |= AV_386_AMD_SYSC;
2664 #endif
2665 		if (*edx & CPUID_AMD_EDX_MMXamd)
2666 			hwcap_flags |= AV_386_AMD_MMX;
2667 		if (*edx & CPUID_AMD_EDX_3DNow)
2668 			hwcap_flags |= AV_386_AMD_3DNow;
2669 		if (*edx & CPUID_AMD_EDX_3DNowx)
2670 			hwcap_flags |= AV_386_AMD_3DNowx;
2671 		if (*ecx & CPUID_AMD_ECX_SVM)
2672 			hwcap_flags |= AV_386_AMD_SVM;
2673 
2674 		switch (cpi->cpi_vendor) {
2675 		case X86_VENDOR_AMD:
2676 			if (*edx & CPUID_AMD_EDX_TSCP)
2677 				hwcap_flags |= AV_386_TSCP;
2678 			if (*ecx & CPUID_AMD_ECX_AHF64)
2679 				hwcap_flags |= AV_386_AHF;
2680 			if (*ecx & CPUID_AMD_ECX_SSE4A)
2681 				hwcap_flags |= AV_386_AMD_SSE4A;
2682 			if (*ecx & CPUID_AMD_ECX_LZCNT)
2683 				hwcap_flags |= AV_386_AMD_LZCNT;
2684 			break;
2685 
2686 		case X86_VENDOR_Intel:
2687 			if (*edx & CPUID_AMD_EDX_TSCP)
2688 				hwcap_flags |= AV_386_TSCP;
2689 			/*
2690 			 * Aarrgh.
2691 			 * Intel uses a different bit in the same word.
2692 			 */
2693 			if (*ecx & CPUID_INTC_ECX_AHF64)
2694 				hwcap_flags |= AV_386_AHF;
2695 			break;
2696 
2697 		default:
2698 			break;
2699 		}
2700 		break;
2701 
2702 	case X86_VENDOR_TM:
2703 		cp.cp_eax = 0x80860001;
2704 		(void) __cpuid_insn(&cp);
2705 		cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2706 		break;
2707 
2708 	default:
2709 		break;
2710 	}
2711 
2712 pass4_done:
2713 	cpi->cpi_pass = 4;
2714 	return (hwcap_flags);
2715 }
2716 
2717 
2718 /*
2719  * Simulate the cpuid instruction using the data we previously
2720  * captured about this CPU.  We try our best to return the truth
2721  * about the hardware, independently of kernel support.
2722  */
2723 uint32_t
2724 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2725 {
2726 	struct cpuid_info *cpi;
2727 	struct cpuid_regs *xcp;
2728 
2729 	if (cpu == NULL)
2730 		cpu = CPU;
2731 	cpi = cpu->cpu_m.mcpu_cpi;
2732 
2733 	ASSERT(cpuid_checkpass(cpu, 3));
2734 
2735 	/*
2736 	 * CPUID data is cached in two separate places: cpi_std for standard
2737 	 * CPUID functions, and cpi_extd for extended CPUID functions.
2738 	 */
2739 	if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2740 		xcp = &cpi->cpi_std[cp->cp_eax];
2741 	else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2742 	    cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2743 		xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2744 	else
2745 		/*
2746 		 * The caller is asking for data from an input parameter which
2747 		 * the kernel has not cached.  In this case we go fetch from
2748 		 * the hardware and return the data directly to the user.
2749 		 */
2750 		return (__cpuid_insn(cp));
2751 
2752 	cp->cp_eax = xcp->cp_eax;
2753 	cp->cp_ebx = xcp->cp_ebx;
2754 	cp->cp_ecx = xcp->cp_ecx;
2755 	cp->cp_edx = xcp->cp_edx;
2756 	return (cp->cp_eax);
2757 }
2758 
2759 int
2760 cpuid_checkpass(cpu_t *cpu, int pass)
2761 {
2762 	return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
2763 	    cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
2764 }
2765 
2766 int
2767 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
2768 {
2769 	ASSERT(cpuid_checkpass(cpu, 3));
2770 
2771 	return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
2772 }
2773 
2774 int
2775 cpuid_is_cmt(cpu_t *cpu)
2776 {
2777 	if (cpu == NULL)
2778 		cpu = CPU;
2779 
2780 	ASSERT(cpuid_checkpass(cpu, 1));
2781 
2782 	return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
2783 }
2784 
2785 /*
2786  * AMD and Intel both implement the 64-bit variant of the syscall
2787  * instruction (syscallq), so if there's -any- support for syscall,
2788  * cpuid currently says "yes, we support this".
2789  *
2790  * However, Intel decided to -not- implement the 32-bit variant of the
2791  * syscall instruction, so we provide a predicate to allow our caller
2792  * to test that subtlety here.
2793  *
2794  * XXPV	Currently, 32-bit syscall instructions don't work via the hypervisor,
2795  *	even in the case where the hardware would in fact support it.
2796  */
2797 /*ARGSUSED*/
2798 int
2799 cpuid_syscall32_insn(cpu_t *cpu)
2800 {
2801 	ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
2802 
2803 #if !defined(__xpv)
2804 	if (cpu == NULL)
2805 		cpu = CPU;
2806 
2807 	/*CSTYLED*/
2808 	{
2809 		struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2810 
2811 		if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2812 		    cpi->cpi_xmaxeax >= 0x80000001 &&
2813 		    (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
2814 			return (1);
2815 	}
2816 #endif
2817 	return (0);
2818 }
2819 
2820 int
2821 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
2822 {
2823 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2824 
2825 	static const char fmt[] =
2826 	    "x86 (%s %X family %d model %d step %d clock %d MHz)";
2827 	static const char fmt_ht[] =
2828 	    "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2829 
2830 	ASSERT(cpuid_checkpass(cpu, 1));
2831 
2832 	if (cpuid_is_cmt(cpu))
2833 		return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
2834 		    cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2835 		    cpi->cpi_family, cpi->cpi_model,
2836 		    cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2837 	return (snprintf(s, n, fmt,
2838 	    cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2839 	    cpi->cpi_family, cpi->cpi_model,
2840 	    cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2841 }
2842 
2843 const char *
2844 cpuid_getvendorstr(cpu_t *cpu)
2845 {
2846 	ASSERT(cpuid_checkpass(cpu, 1));
2847 	return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
2848 }
2849 
2850 uint_t
2851 cpuid_getvendor(cpu_t *cpu)
2852 {
2853 	ASSERT(cpuid_checkpass(cpu, 1));
2854 	return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
2855 }
2856 
2857 uint_t
2858 cpuid_getfamily(cpu_t *cpu)
2859 {
2860 	ASSERT(cpuid_checkpass(cpu, 1));
2861 	return (cpu->cpu_m.mcpu_cpi->cpi_family);
2862 }
2863 
2864 uint_t
2865 cpuid_getmodel(cpu_t *cpu)
2866 {
2867 	ASSERT(cpuid_checkpass(cpu, 1));
2868 	return (cpu->cpu_m.mcpu_cpi->cpi_model);
2869 }
2870 
2871 uint_t
2872 cpuid_get_ncpu_per_chip(cpu_t *cpu)
2873 {
2874 	ASSERT(cpuid_checkpass(cpu, 1));
2875 	return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
2876 }
2877 
2878 uint_t
2879 cpuid_get_ncore_per_chip(cpu_t *cpu)
2880 {
2881 	ASSERT(cpuid_checkpass(cpu, 1));
2882 	return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
2883 }
2884 
2885 uint_t
2886 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
2887 {
2888 	ASSERT(cpuid_checkpass(cpu, 2));
2889 	return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
2890 }
2891 
2892 id_t
2893 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
2894 {
2895 	ASSERT(cpuid_checkpass(cpu, 2));
2896 	return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2897 }
2898 
2899 uint_t
2900 cpuid_getstep(cpu_t *cpu)
2901 {
2902 	ASSERT(cpuid_checkpass(cpu, 1));
2903 	return (cpu->cpu_m.mcpu_cpi->cpi_step);
2904 }
2905 
2906 uint_t
2907 cpuid_getsig(struct cpu *cpu)
2908 {
2909 	ASSERT(cpuid_checkpass(cpu, 1));
2910 	return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
2911 }
2912 
2913 uint32_t
2914 cpuid_getchiprev(struct cpu *cpu)
2915 {
2916 	ASSERT(cpuid_checkpass(cpu, 1));
2917 	return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
2918 }
2919 
2920 const char *
2921 cpuid_getchiprevstr(struct cpu *cpu)
2922 {
2923 	ASSERT(cpuid_checkpass(cpu, 1));
2924 	return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
2925 }
2926 
2927 uint32_t
2928 cpuid_getsockettype(struct cpu *cpu)
2929 {
2930 	ASSERT(cpuid_checkpass(cpu, 1));
2931 	return (cpu->cpu_m.mcpu_cpi->cpi_socket);
2932 }
2933 
2934 const char *
2935 cpuid_getsocketstr(cpu_t *cpu)
2936 {
2937 	static const char *socketstr = NULL;
2938 	struct cpuid_info *cpi;
2939 
2940 	ASSERT(cpuid_checkpass(cpu, 1));
2941 	cpi = cpu->cpu_m.mcpu_cpi;
2942 
2943 	/* Assume that socket types are the same across the system */
2944 	if (socketstr == NULL)
2945 		socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
2946 		    cpi->cpi_model, cpi->cpi_step);
2947 
2948 
2949 	return (socketstr);
2950 }
2951 
2952 int
2953 cpuid_get_chipid(cpu_t *cpu)
2954 {
2955 	ASSERT(cpuid_checkpass(cpu, 1));
2956 
2957 	if (cpuid_is_cmt(cpu))
2958 		return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
2959 	return (cpu->cpu_id);
2960 }
2961 
2962 id_t
2963 cpuid_get_coreid(cpu_t *cpu)
2964 {
2965 	ASSERT(cpuid_checkpass(cpu, 1));
2966 	return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
2967 }
2968 
2969 int
2970 cpuid_get_pkgcoreid(cpu_t *cpu)
2971 {
2972 	ASSERT(cpuid_checkpass(cpu, 1));
2973 	return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
2974 }
2975 
2976 int
2977 cpuid_get_clogid(cpu_t *cpu)
2978 {
2979 	ASSERT(cpuid_checkpass(cpu, 1));
2980 	return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
2981 }
2982 
2983 int
2984 cpuid_get_cacheid(cpu_t *cpu)
2985 {
2986 	ASSERT(cpuid_checkpass(cpu, 1));
2987 	return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2988 }
2989 
2990 uint_t
2991 cpuid_get_procnodeid(cpu_t *cpu)
2992 {
2993 	ASSERT(cpuid_checkpass(cpu, 1));
2994 	return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
2995 }
2996 
2997 uint_t
2998 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
2999 {
3000 	ASSERT(cpuid_checkpass(cpu, 1));
3001 	return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3002 }
3003 
3004 /*ARGSUSED*/
3005 int
3006 cpuid_have_cr8access(cpu_t *cpu)
3007 {
3008 #if defined(__amd64)
3009 	return (1);
3010 #else
3011 	struct cpuid_info *cpi;
3012 
3013 	ASSERT(cpu != NULL);
3014 	cpi = cpu->cpu_m.mcpu_cpi;
3015 	if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
3016 	    (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3017 		return (1);
3018 	return (0);
3019 #endif
3020 }
3021 
3022 uint32_t
3023 cpuid_get_apicid(cpu_t *cpu)
3024 {
3025 	ASSERT(cpuid_checkpass(cpu, 1));
3026 	if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3027 		return (UINT32_MAX);
3028 	} else {
3029 		return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3030 	}
3031 }
3032 
3033 void
3034 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3035 {
3036 	struct cpuid_info *cpi;
3037 
3038 	if (cpu == NULL)
3039 		cpu = CPU;
3040 	cpi = cpu->cpu_m.mcpu_cpi;
3041 
3042 	ASSERT(cpuid_checkpass(cpu, 1));
3043 
3044 	if (pabits)
3045 		*pabits = cpi->cpi_pabits;
3046 	if (vabits)
3047 		*vabits = cpi->cpi_vabits;
3048 }
3049 
3050 /*
3051  * Returns the number of data TLB entries for a corresponding
3052  * pagesize.  If it can't be computed, or isn't known, the
3053  * routine returns zero.  If you ask about an architecturally
3054  * impossible pagesize, the routine will panic (so that the
3055  * hat implementor knows that things are inconsistent.)
3056  */
3057 uint_t
3058 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3059 {
3060 	struct cpuid_info *cpi;
3061 	uint_t dtlb_nent = 0;
3062 
3063 	if (cpu == NULL)
3064 		cpu = CPU;
3065 	cpi = cpu->cpu_m.mcpu_cpi;
3066 
3067 	ASSERT(cpuid_checkpass(cpu, 1));
3068 
3069 	/*
3070 	 * Check the L2 TLB info
3071 	 */
3072 	if (cpi->cpi_xmaxeax >= 0x80000006) {
3073 		struct cpuid_regs *cp = &cpi->cpi_extd[6];
3074 
3075 		switch (pagesize) {
3076 
3077 		case 4 * 1024:
3078 			/*
3079 			 * All zero in the top 16 bits of the register
3080 			 * indicates a unified TLB. Size is in low 16 bits.
3081 			 */
3082 			if ((cp->cp_ebx & 0xffff0000) == 0)
3083 				dtlb_nent = cp->cp_ebx & 0x0000ffff;
3084 			else
3085 				dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3086 			break;
3087 
3088 		case 2 * 1024 * 1024:
3089 			if ((cp->cp_eax & 0xffff0000) == 0)
3090 				dtlb_nent = cp->cp_eax & 0x0000ffff;
3091 			else
3092 				dtlb_nent = BITX(cp->cp_eax, 27, 16);
3093 			break;
3094 
3095 		default:
3096 			panic("unknown L2 pagesize");
3097 			/*NOTREACHED*/
3098 		}
3099 	}
3100 
3101 	if (dtlb_nent != 0)
3102 		return (dtlb_nent);
3103 
3104 	/*
3105 	 * No L2 TLB support for this size, try L1.
3106 	 */
3107 	if (cpi->cpi_xmaxeax >= 0x80000005) {
3108 		struct cpuid_regs *cp = &cpi->cpi_extd[5];
3109 
3110 		switch (pagesize) {
3111 		case 4 * 1024:
3112 			dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3113 			break;
3114 		case 2 * 1024 * 1024:
3115 			dtlb_nent = BITX(cp->cp_eax, 23, 16);
3116 			break;
3117 		default:
3118 			panic("unknown L1 d-TLB pagesize");
3119 			/*NOTREACHED*/
3120 		}
3121 	}
3122 
3123 	return (dtlb_nent);
3124 }
3125 
3126 /*
3127  * Return 0 if the erratum is not present or not applicable, positive
3128  * if it is, and negative if the status of the erratum is unknown.
3129  *
3130  * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3131  * Processors" #25759, Rev 3.57, August 2005
3132  */
3133 int
3134 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3135 {
3136 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3137 	uint_t eax;
3138 
3139 	/*
3140 	 * Bail out if this CPU isn't an AMD CPU, or if it's
3141 	 * a legacy (32-bit) AMD CPU.
3142 	 */
3143 	if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3144 	    cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3145 	    cpi->cpi_family == 6)
3146 
3147 		return (0);
3148 
3149 	eax = cpi->cpi_std[1].cp_eax;
3150 
3151 #define	SH_B0(eax)	(eax == 0xf40 || eax == 0xf50)
3152 #define	SH_B3(eax) 	(eax == 0xf51)
3153 #define	B(eax)		(SH_B0(eax) || SH_B3(eax))
3154 
3155 #define	SH_C0(eax)	(eax == 0xf48 || eax == 0xf58)
3156 
3157 #define	SH_CG(eax)	(eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3158 #define	DH_CG(eax)	(eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3159 #define	CH_CG(eax)	(eax == 0xf82 || eax == 0xfb2)
3160 #define	CG(eax)		(SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3161 
3162 #define	SH_D0(eax)	(eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3163 #define	DH_D0(eax)	(eax == 0x10fc0 || eax == 0x10ff0)
3164 #define	CH_D0(eax)	(eax == 0x10f80 || eax == 0x10fb0)
3165 #define	D0(eax)		(SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3166 
3167 #define	SH_E0(eax)	(eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3168 #define	JH_E1(eax)	(eax == 0x20f10)	/* JH8_E0 had 0x20f30 */
3169 #define	DH_E3(eax)	(eax == 0x20fc0 || eax == 0x20ff0)
3170 #define	SH_E4(eax)	(eax == 0x20f51 || eax == 0x20f71)
3171 #define	BH_E4(eax)	(eax == 0x20fb1)
3172 #define	SH_E5(eax)	(eax == 0x20f42)
3173 #define	DH_E6(eax)	(eax == 0x20ff2 || eax == 0x20fc2)
3174 #define	JH_E6(eax)	(eax == 0x20f12 || eax == 0x20f32)
3175 #define	EX(eax)		(SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3176 			    SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3177 			    DH_E6(eax) || JH_E6(eax))
3178 
3179 #define	DR_AX(eax)	(eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3180 #define	DR_B0(eax)	(eax == 0x100f20)
3181 #define	DR_B1(eax)	(eax == 0x100f21)
3182 #define	DR_BA(eax)	(eax == 0x100f2a)
3183 #define	DR_B2(eax)	(eax == 0x100f22)
3184 #define	DR_B3(eax)	(eax == 0x100f23)
3185 #define	RB_C0(eax)	(eax == 0x100f40)
3186 
3187 	switch (erratum) {
3188 	case 1:
3189 		return (cpi->cpi_family < 0x10);
3190 	case 51:	/* what does the asterisk mean? */
3191 		return (B(eax) || SH_C0(eax) || CG(eax));
3192 	case 52:
3193 		return (B(eax));
3194 	case 57:
3195 		return (cpi->cpi_family <= 0x11);
3196 	case 58:
3197 		return (B(eax));
3198 	case 60:
3199 		return (cpi->cpi_family <= 0x11);
3200 	case 61:
3201 	case 62:
3202 	case 63:
3203 	case 64:
3204 	case 65:
3205 	case 66:
3206 	case 68:
3207 	case 69:
3208 	case 70:
3209 	case 71:
3210 		return (B(eax));
3211 	case 72:
3212 		return (SH_B0(eax));
3213 	case 74:
3214 		return (B(eax));
3215 	case 75:
3216 		return (cpi->cpi_family < 0x10);
3217 	case 76:
3218 		return (B(eax));
3219 	case 77:
3220 		return (cpi->cpi_family <= 0x11);
3221 	case 78:
3222 		return (B(eax) || SH_C0(eax));
3223 	case 79:
3224 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3225 	case 80:
3226 	case 81:
3227 	case 82:
3228 		return (B(eax));
3229 	case 83:
3230 		return (B(eax) || SH_C0(eax) || CG(eax));
3231 	case 85:
3232 		return (cpi->cpi_family < 0x10);
3233 	case 86:
3234 		return (SH_C0(eax) || CG(eax));
3235 	case 88:
3236 #if !defined(__amd64)
3237 		return (0);
3238 #else
3239 		return (B(eax) || SH_C0(eax));
3240 #endif
3241 	case 89:
3242 		return (cpi->cpi_family < 0x10);
3243 	case 90:
3244 		return (B(eax) || SH_C0(eax) || CG(eax));
3245 	case 91:
3246 	case 92:
3247 		return (B(eax) || SH_C0(eax));
3248 	case 93:
3249 		return (SH_C0(eax));
3250 	case 94:
3251 		return (B(eax) || SH_C0(eax) || CG(eax));
3252 	case 95:
3253 #if !defined(__amd64)
3254 		return (0);
3255 #else
3256 		return (B(eax) || SH_C0(eax));
3257 #endif
3258 	case 96:
3259 		return (B(eax) || SH_C0(eax) || CG(eax));
3260 	case 97:
3261 	case 98:
3262 		return (SH_C0(eax) || CG(eax));
3263 	case 99:
3264 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3265 	case 100:
3266 		return (B(eax) || SH_C0(eax));
3267 	case 101:
3268 	case 103:
3269 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3270 	case 104:
3271 		return (SH_C0(eax) || CG(eax) || D0(eax));
3272 	case 105:
3273 	case 106:
3274 	case 107:
3275 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3276 	case 108:
3277 		return (DH_CG(eax));
3278 	case 109:
3279 		return (SH_C0(eax) || CG(eax) || D0(eax));
3280 	case 110:
3281 		return (D0(eax) || EX(eax));
3282 	case 111:
3283 		return (CG(eax));
3284 	case 112:
3285 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3286 	case 113:
3287 		return (eax == 0x20fc0);
3288 	case 114:
3289 		return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3290 	case 115:
3291 		return (SH_E0(eax) || JH_E1(eax));
3292 	case 116:
3293 		return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3294 	case 117:
3295 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3296 	case 118:
3297 		return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3298 		    JH_E6(eax));
3299 	case 121:
3300 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3301 	case 122:
3302 		return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3303 	case 123:
3304 		return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3305 	case 131:
3306 		return (cpi->cpi_family < 0x10);
3307 	case 6336786:
3308 		/*
3309 		 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3310 		 * if this is a K8 family or newer processor
3311 		 */
3312 		if (CPI_FAMILY(cpi) == 0xf) {
3313 			struct cpuid_regs regs;
3314 			regs.cp_eax = 0x80000007;
3315 			(void) __cpuid_insn(&regs);
3316 			return (!(regs.cp_edx & 0x100));
3317 		}
3318 		return (0);
3319 	case 6323525:
3320 		return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3321 		    (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3322 
3323 	case 6671130:
3324 		/*
3325 		 * check for processors (pre-Shanghai) that do not provide
3326 		 * optimal management of 1gb ptes in its tlb.
3327 		 */
3328 		return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3329 
3330 	case 298:
3331 		return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3332 		    DR_B2(eax) || RB_C0(eax));
3333 
3334 	default:
3335 		return (-1);
3336 
3337 	}
3338 }
3339 
3340 /*
3341  * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3342  * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3343  */
3344 int
3345 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3346 {
3347 	struct cpuid_info	*cpi;
3348 	uint_t			osvwid;
3349 	static int		osvwfeature = -1;
3350 	uint64_t		osvwlength;
3351 
3352 
3353 	cpi = cpu->cpu_m.mcpu_cpi;
3354 
3355 	/* confirm OSVW supported */
3356 	if (osvwfeature == -1) {
3357 		osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3358 	} else {
3359 		/* assert that osvw feature setting is consistent on all cpus */
3360 		ASSERT(osvwfeature ==
3361 		    (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3362 	}
3363 	if (!osvwfeature)
3364 		return (-1);
3365 
3366 	osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3367 
3368 	switch (erratum) {
3369 	case 298:	/* osvwid is 0 */
3370 		osvwid = 0;
3371 		if (osvwlength <= (uint64_t)osvwid) {
3372 			/* osvwid 0 is unknown */
3373 			return (-1);
3374 		}
3375 
3376 		/*
3377 		 * Check the OSVW STATUS MSR to determine the state
3378 		 * of the erratum where:
3379 		 *   0 - fixed by HW
3380 		 *   1 - BIOS has applied the workaround when BIOS
3381 		 *   workaround is available. (Or for other errata,
3382 		 *   OS workaround is required.)
3383 		 * For a value of 1, caller will confirm that the
3384 		 * erratum 298 workaround has indeed been applied by BIOS.
3385 		 *
3386 		 * A 1 may be set in cpus that have a HW fix
3387 		 * in a mixed cpu system. Regarding erratum 298:
3388 		 *   In a multiprocessor platform, the workaround above
3389 		 *   should be applied to all processors regardless of
3390 		 *   silicon revision when an affected processor is
3391 		 *   present.
3392 		 */
3393 
3394 		return (rdmsr(MSR_AMD_OSVW_STATUS +
3395 		    (osvwid / OSVW_ID_CNT_PER_MSR)) &
3396 		    (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3397 
3398 	default:
3399 		return (-1);
3400 	}
3401 }
3402 
3403 static const char assoc_str[] = "associativity";
3404 static const char line_str[] = "line-size";
3405 static const char size_str[] = "size";
3406 
3407 static void
3408 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3409     uint32_t val)
3410 {
3411 	char buf[128];
3412 
3413 	/*
3414 	 * ndi_prop_update_int() is used because it is desirable for
3415 	 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3416 	 */
3417 	if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3418 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3419 }
3420 
3421 /*
3422  * Intel-style cache/tlb description
3423  *
3424  * Standard cpuid level 2 gives a randomly ordered
3425  * selection of tags that index into a table that describes
3426  * cache and tlb properties.
3427  */
3428 
3429 static const char l1_icache_str[] = "l1-icache";
3430 static const char l1_dcache_str[] = "l1-dcache";
3431 static const char l2_cache_str[] = "l2-cache";
3432 static const char l3_cache_str[] = "l3-cache";
3433 static const char itlb4k_str[] = "itlb-4K";
3434 static const char dtlb4k_str[] = "dtlb-4K";
3435 static const char itlb2M_str[] = "itlb-2M";
3436 static const char itlb4M_str[] = "itlb-4M";
3437 static const char dtlb4M_str[] = "dtlb-4M";
3438 static const char dtlb24_str[] = "dtlb0-2M-4M";
3439 static const char itlb424_str[] = "itlb-4K-2M-4M";
3440 static const char itlb24_str[] = "itlb-2M-4M";
3441 static const char dtlb44_str[] = "dtlb-4K-4M";
3442 static const char sl1_dcache_str[] = "sectored-l1-dcache";
3443 static const char sl2_cache_str[] = "sectored-l2-cache";
3444 static const char itrace_str[] = "itrace-cache";
3445 static const char sl3_cache_str[] = "sectored-l3-cache";
3446 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3447 
3448 static const struct cachetab {
3449 	uint8_t 	ct_code;
3450 	uint8_t		ct_assoc;
3451 	uint16_t 	ct_line_size;
3452 	size_t		ct_size;
3453 	const char	*ct_label;
3454 } intel_ctab[] = {
3455 	/*
3456 	 * maintain descending order!
3457 	 *
3458 	 * Codes ignored - Reason
3459 	 * ----------------------
3460 	 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3461 	 * f0H/f1H - Currently we do not interpret prefetch size by design
3462 	 */
3463 	{ 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3464 	{ 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3465 	{ 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3466 	{ 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3467 	{ 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3468 	{ 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3469 	{ 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3470 	{ 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3471 	{ 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3472 	{ 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3473 	{ 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3474 	{ 0xd0, 4, 64, 512*1024, l3_cache_str},
3475 	{ 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3476 	{ 0xc0, 4, 0, 8, dtlb44_str },
3477 	{ 0xba, 4, 0, 64, dtlb4k_str },
3478 	{ 0xb4, 4, 0, 256, dtlb4k_str },
3479 	{ 0xb3, 4, 0, 128, dtlb4k_str },
3480 	{ 0xb2, 4, 0, 64, itlb4k_str },
3481 	{ 0xb0, 4, 0, 128, itlb4k_str },
3482 	{ 0x87, 8, 64, 1024*1024, l2_cache_str},
3483 	{ 0x86, 4, 64, 512*1024, l2_cache_str},
3484 	{ 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3485 	{ 0x84, 8, 32, 1024*1024, l2_cache_str},
3486 	{ 0x83, 8, 32, 512*1024, l2_cache_str},
3487 	{ 0x82, 8, 32, 256*1024, l2_cache_str},
3488 	{ 0x80, 8, 64, 512*1024, l2_cache_str},
3489 	{ 0x7f, 2, 64, 512*1024, l2_cache_str},
3490 	{ 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3491 	{ 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3492 	{ 0x7b, 8, 64, 512*1024, sl2_cache_str},
3493 	{ 0x7a, 8, 64, 256*1024, sl2_cache_str},
3494 	{ 0x79, 8, 64, 128*1024, sl2_cache_str},
3495 	{ 0x78, 8, 64, 1024*1024, l2_cache_str},
3496 	{ 0x73, 8, 0, 64*1024, itrace_str},
3497 	{ 0x72, 8, 0, 32*1024, itrace_str},
3498 	{ 0x71, 8, 0, 16*1024, itrace_str},
3499 	{ 0x70, 8, 0, 12*1024, itrace_str},
3500 	{ 0x68, 4, 64, 32*1024, sl1_dcache_str},
3501 	{ 0x67, 4, 64, 16*1024, sl1_dcache_str},
3502 	{ 0x66, 4, 64, 8*1024, sl1_dcache_str},
3503 	{ 0x60, 8, 64, 16*1024, sl1_dcache_str},
3504 	{ 0x5d, 0, 0, 256, dtlb44_str},
3505 	{ 0x5c, 0, 0, 128, dtlb44_str},
3506 	{ 0x5b, 0, 0, 64, dtlb44_str},
3507 	{ 0x5a, 4, 0, 32, dtlb24_str},
3508 	{ 0x59, 0, 0, 16, dtlb4k_str},
3509 	{ 0x57, 4, 0, 16, dtlb4k_str},
3510 	{ 0x56, 4, 0, 16, dtlb4M_str},
3511 	{ 0x55, 0, 0, 7, itlb24_str},
3512 	{ 0x52, 0, 0, 256, itlb424_str},
3513 	{ 0x51, 0, 0, 128, itlb424_str},
3514 	{ 0x50, 0, 0, 64, itlb424_str},
3515 	{ 0x4f, 0, 0, 32, itlb4k_str},
3516 	{ 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3517 	{ 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3518 	{ 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3519 	{ 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3520 	{ 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3521 	{ 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3522 	{ 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3523 	{ 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3524 	{ 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3525 	{ 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3526 	{ 0x44, 4, 32, 1024*1024, l2_cache_str},
3527 	{ 0x43, 4, 32, 512*1024, l2_cache_str},
3528 	{ 0x42, 4, 32, 256*1024, l2_cache_str},
3529 	{ 0x41, 4, 32, 128*1024, l2_cache_str},
3530 	{ 0x3e, 4, 64, 512*1024, sl2_cache_str},
3531 	{ 0x3d, 6, 64, 384*1024, sl2_cache_str},
3532 	{ 0x3c, 4, 64, 256*1024, sl2_cache_str},
3533 	{ 0x3b, 2, 64, 128*1024, sl2_cache_str},
3534 	{ 0x3a, 6, 64, 192*1024, sl2_cache_str},
3535 	{ 0x39, 4, 64, 128*1024, sl2_cache_str},
3536 	{ 0x30, 8, 64, 32*1024, l1_icache_str},
3537 	{ 0x2c, 8, 64, 32*1024, l1_dcache_str},
3538 	{ 0x29, 8, 64, 4096*1024, sl3_cache_str},
3539 	{ 0x25, 8, 64, 2048*1024, sl3_cache_str},
3540 	{ 0x23, 8, 64, 1024*1024, sl3_cache_str},
3541 	{ 0x22, 4, 64, 512*1024, sl3_cache_str},
3542 	{ 0x0e, 6, 64, 24*1024, l1_dcache_str},
3543 	{ 0x0d, 4, 32, 16*1024, l1_dcache_str},
3544 	{ 0x0c, 4, 32, 16*1024, l1_dcache_str},
3545 	{ 0x0b, 4, 0, 4, itlb4M_str},
3546 	{ 0x0a, 2, 32, 8*1024, l1_dcache_str},
3547 	{ 0x08, 4, 32, 16*1024, l1_icache_str},
3548 	{ 0x06, 4, 32, 8*1024, l1_icache_str},
3549 	{ 0x05, 4, 0, 32, dtlb4M_str},
3550 	{ 0x04, 4, 0, 8, dtlb4M_str},
3551 	{ 0x03, 4, 0, 64, dtlb4k_str},
3552 	{ 0x02, 4, 0, 2, itlb4M_str},
3553 	{ 0x01, 4, 0, 32, itlb4k_str},
3554 	{ 0 }
3555 };
3556 
3557 static const struct cachetab cyrix_ctab[] = {
3558 	{ 0x70, 4, 0, 32, "tlb-4K" },
3559 	{ 0x80, 4, 16, 16*1024, "l1-cache" },
3560 	{ 0 }
3561 };
3562 
3563 /*
3564  * Search a cache table for a matching entry
3565  */
3566 static const struct cachetab *
3567 find_cacheent(const struct cachetab *ct, uint_t code)
3568 {
3569 	if (code != 0) {
3570 		for (; ct->ct_code != 0; ct++)
3571 			if (ct->ct_code <= code)
3572 				break;
3573 		if (ct->ct_code == code)
3574 			return (ct);
3575 	}
3576 	return (NULL);
3577 }
3578 
3579 /*
3580  * Populate cachetab entry with L2 or L3 cache-information using
3581  * cpuid function 4. This function is called from intel_walk_cacheinfo()
3582  * when descriptor 0x49 is encountered. It returns 0 if no such cache
3583  * information is found.
3584  */
3585 static int
3586 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
3587 {
3588 	uint32_t level, i;
3589 	int ret = 0;
3590 
3591 	for (i = 0; i < cpi->cpi_std_4_size; i++) {
3592 		level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
3593 
3594 		if (level == 2 || level == 3) {
3595 			ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
3596 			ct->ct_line_size =
3597 			    CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
3598 			ct->ct_size = ct->ct_assoc *
3599 			    (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
3600 			    ct->ct_line_size *
3601 			    (cpi->cpi_std_4[i]->cp_ecx + 1);
3602 
3603 			if (level == 2) {
3604 				ct->ct_label = l2_cache_str;
3605 			} else if (level == 3) {
3606 				ct->ct_label = l3_cache_str;
3607 			}
3608 			ret = 1;
3609 		}
3610 	}
3611 
3612 	return (ret);
3613 }
3614 
3615 /*
3616  * Walk the cacheinfo descriptor, applying 'func' to every valid element
3617  * The walk is terminated if the walker returns non-zero.
3618  */
3619 static void
3620 intel_walk_cacheinfo(struct cpuid_info *cpi,
3621     void *arg, int (*func)(void *, const struct cachetab *))
3622 {
3623 	const struct cachetab *ct;
3624 	struct cachetab des_49_ct, des_b1_ct;
3625 	uint8_t *dp;
3626 	int i;
3627 
3628 	if ((dp = cpi->cpi_cacheinfo) == NULL)
3629 		return;
3630 	for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3631 		/*
3632 		 * For overloaded descriptor 0x49 we use cpuid function 4
3633 		 * if supported by the current processor, to create
3634 		 * cache information.
3635 		 * For overloaded descriptor 0xb1 we use X86_PAE flag
3636 		 * to disambiguate the cache information.
3637 		 */
3638 		if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
3639 		    intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
3640 				ct = &des_49_ct;
3641 		} else if (*dp == 0xb1) {
3642 			des_b1_ct.ct_code = 0xb1;
3643 			des_b1_ct.ct_assoc = 4;
3644 			des_b1_ct.ct_line_size = 0;
3645 			if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
3646 				des_b1_ct.ct_size = 8;
3647 				des_b1_ct.ct_label = itlb2M_str;
3648 			} else {
3649 				des_b1_ct.ct_size = 4;
3650 				des_b1_ct.ct_label = itlb4M_str;
3651 			}
3652 			ct = &des_b1_ct;
3653 		} else {
3654 			if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
3655 				continue;
3656 			}
3657 		}
3658 
3659 		if (func(arg, ct) != 0) {
3660 			break;
3661 		}
3662 	}
3663 }
3664 
3665 /*
3666  * (Like the Intel one, except for Cyrix CPUs)
3667  */
3668 static void
3669 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
3670     void *arg, int (*func)(void *, const struct cachetab *))
3671 {
3672 	const struct cachetab *ct;
3673 	uint8_t *dp;
3674 	int i;
3675 
3676 	if ((dp = cpi->cpi_cacheinfo) == NULL)
3677 		return;
3678 	for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3679 		/*
3680 		 * Search Cyrix-specific descriptor table first ..
3681 		 */
3682 		if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
3683 			if (func(arg, ct) != 0)
3684 				break;
3685 			continue;
3686 		}
3687 		/*
3688 		 * .. else fall back to the Intel one
3689 		 */
3690 		if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
3691 			if (func(arg, ct) != 0)
3692 				break;
3693 			continue;
3694 		}
3695 	}
3696 }
3697 
3698 /*
3699  * A cacheinfo walker that adds associativity, line-size, and size properties
3700  * to the devinfo node it is passed as an argument.
3701  */
3702 static int
3703 add_cacheent_props(void *arg, const struct cachetab *ct)
3704 {
3705 	dev_info_t *devi = arg;
3706 
3707 	add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
3708 	if (ct->ct_line_size != 0)
3709 		add_cache_prop(devi, ct->ct_label, line_str,
3710 		    ct->ct_line_size);
3711 	add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
3712 	return (0);
3713 }
3714 
3715 
3716 static const char fully_assoc[] = "fully-associative?";
3717 
3718 /*
3719  * AMD style cache/tlb description
3720  *
3721  * Extended functions 5 and 6 directly describe properties of
3722  * tlbs and various cache levels.
3723  */
3724 static void
3725 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3726 {
3727 	switch (assoc) {
3728 	case 0:	/* reserved; ignore */
3729 		break;
3730 	default:
3731 		add_cache_prop(devi, label, assoc_str, assoc);
3732 		break;
3733 	case 0xff:
3734 		add_cache_prop(devi, label, fully_assoc, 1);
3735 		break;
3736 	}
3737 }
3738 
3739 static void
3740 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3741 {
3742 	if (size == 0)
3743 		return;
3744 	add_cache_prop(devi, label, size_str, size);
3745 	add_amd_assoc(devi, label, assoc);
3746 }
3747 
3748 static void
3749 add_amd_cache(dev_info_t *devi, const char *label,
3750     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3751 {
3752 	if (size == 0 || line_size == 0)
3753 		return;
3754 	add_amd_assoc(devi, label, assoc);
3755 	/*
3756 	 * Most AMD parts have a sectored cache. Multiple cache lines are
3757 	 * associated with each tag. A sector consists of all cache lines
3758 	 * associated with a tag. For example, the AMD K6-III has a sector
3759 	 * size of 2 cache lines per tag.
3760 	 */
3761 	if (lines_per_tag != 0)
3762 		add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3763 	add_cache_prop(devi, label, line_str, line_size);
3764 	add_cache_prop(devi, label, size_str, size * 1024);
3765 }
3766 
3767 static void
3768 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3769 {
3770 	switch (assoc) {
3771 	case 0:	/* off */
3772 		break;
3773 	case 1:
3774 	case 2:
3775 	case 4:
3776 		add_cache_prop(devi, label, assoc_str, assoc);
3777 		break;
3778 	case 6:
3779 		add_cache_prop(devi, label, assoc_str, 8);
3780 		break;
3781 	case 8:
3782 		add_cache_prop(devi, label, assoc_str, 16);
3783 		break;
3784 	case 0xf:
3785 		add_cache_prop(devi, label, fully_assoc, 1);
3786 		break;
3787 	default: /* reserved; ignore */
3788 		break;
3789 	}
3790 }
3791 
3792 static void
3793 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3794 {
3795 	if (size == 0 || assoc == 0)
3796 		return;
3797 	add_amd_l2_assoc(devi, label, assoc);
3798 	add_cache_prop(devi, label, size_str, size);
3799 }
3800 
3801 static void
3802 add_amd_l2_cache(dev_info_t *devi, const char *label,
3803     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3804 {
3805 	if (size == 0 || assoc == 0 || line_size == 0)
3806 		return;
3807 	add_amd_l2_assoc(devi, label, assoc);
3808 	if (lines_per_tag != 0)
3809 		add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3810 	add_cache_prop(devi, label, line_str, line_size);
3811 	add_cache_prop(devi, label, size_str, size * 1024);
3812 }
3813 
3814 static void
3815 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
3816 {
3817 	struct cpuid_regs *cp;
3818 
3819 	if (cpi->cpi_xmaxeax < 0x80000005)
3820 		return;
3821 	cp = &cpi->cpi_extd[5];
3822 
3823 	/*
3824 	 * 4M/2M L1 TLB configuration
3825 	 *
3826 	 * We report the size for 2M pages because AMD uses two
3827 	 * TLB entries for one 4M page.
3828 	 */
3829 	add_amd_tlb(devi, "dtlb-2M",
3830 	    BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
3831 	add_amd_tlb(devi, "itlb-2M",
3832 	    BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
3833 
3834 	/*
3835 	 * 4K L1 TLB configuration
3836 	 */
3837 
3838 	switch (cpi->cpi_vendor) {
3839 		uint_t nentries;
3840 	case X86_VENDOR_TM:
3841 		if (cpi->cpi_family >= 5) {
3842 			/*
3843 			 * Crusoe processors have 256 TLB entries, but
3844 			 * cpuid data format constrains them to only
3845 			 * reporting 255 of them.
3846 			 */
3847 			if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
3848 				nentries = 256;
3849 			/*
3850 			 * Crusoe processors also have a unified TLB
3851 			 */
3852 			add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
3853 			    nentries);
3854 			break;
3855 		}
3856 		/*FALLTHROUGH*/
3857 	default:
3858 		add_amd_tlb(devi, itlb4k_str,
3859 		    BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
3860 		add_amd_tlb(devi, dtlb4k_str,
3861 		    BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
3862 		break;
3863 	}
3864 
3865 	/*
3866 	 * data L1 cache configuration
3867 	 */
3868 
3869 	add_amd_cache(devi, l1_dcache_str,
3870 	    BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
3871 	    BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
3872 
3873 	/*
3874 	 * code L1 cache configuration
3875 	 */
3876 
3877 	add_amd_cache(devi, l1_icache_str,
3878 	    BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
3879 	    BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
3880 
3881 	if (cpi->cpi_xmaxeax < 0x80000006)
3882 		return;
3883 	cp = &cpi->cpi_extd[6];
3884 
3885 	/* Check for a unified L2 TLB for large pages */
3886 
3887 	if (BITX(cp->cp_eax, 31, 16) == 0)
3888 		add_amd_l2_tlb(devi, "l2-tlb-2M",
3889 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3890 	else {
3891 		add_amd_l2_tlb(devi, "l2-dtlb-2M",
3892 		    BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3893 		add_amd_l2_tlb(devi, "l2-itlb-2M",
3894 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3895 	}
3896 
3897 	/* Check for a unified L2 TLB for 4K pages */
3898 
3899 	if (BITX(cp->cp_ebx, 31, 16) == 0) {
3900 		add_amd_l2_tlb(devi, "l2-tlb-4K",
3901 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3902 	} else {
3903 		add_amd_l2_tlb(devi, "l2-dtlb-4K",
3904 		    BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3905 		add_amd_l2_tlb(devi, "l2-itlb-4K",
3906 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3907 	}
3908 
3909 	add_amd_l2_cache(devi, l2_cache_str,
3910 	    BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
3911 	    BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
3912 }
3913 
3914 /*
3915  * There are two basic ways that the x86 world describes it cache
3916  * and tlb architecture - Intel's way and AMD's way.
3917  *
3918  * Return which flavor of cache architecture we should use
3919  */
3920 static int
3921 x86_which_cacheinfo(struct cpuid_info *cpi)
3922 {
3923 	switch (cpi->cpi_vendor) {
3924 	case X86_VENDOR_Intel:
3925 		if (cpi->cpi_maxeax >= 2)
3926 			return (X86_VENDOR_Intel);
3927 		break;
3928 	case X86_VENDOR_AMD:
3929 		/*
3930 		 * The K5 model 1 was the first part from AMD that reported
3931 		 * cache sizes via extended cpuid functions.
3932 		 */
3933 		if (cpi->cpi_family > 5 ||
3934 		    (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3935 			return (X86_VENDOR_AMD);
3936 		break;
3937 	case X86_VENDOR_TM:
3938 		if (cpi->cpi_family >= 5)
3939 			return (X86_VENDOR_AMD);
3940 		/*FALLTHROUGH*/
3941 	default:
3942 		/*
3943 		 * If they have extended CPU data for 0x80000005
3944 		 * then we assume they have AMD-format cache
3945 		 * information.
3946 		 *
3947 		 * If not, and the vendor happens to be Cyrix,
3948 		 * then try our-Cyrix specific handler.
3949 		 *
3950 		 * If we're not Cyrix, then assume we're using Intel's
3951 		 * table-driven format instead.
3952 		 */
3953 		if (cpi->cpi_xmaxeax >= 0x80000005)
3954 			return (X86_VENDOR_AMD);
3955 		else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
3956 			return (X86_VENDOR_Cyrix);
3957 		else if (cpi->cpi_maxeax >= 2)
3958 			return (X86_VENDOR_Intel);
3959 		break;
3960 	}
3961 	return (-1);
3962 }
3963 
3964 void
3965 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
3966     struct cpuid_info *cpi)
3967 {
3968 	dev_info_t *cpu_devi;
3969 	int create;
3970 
3971 	cpu_devi = (dev_info_t *)dip;
3972 
3973 	/* device_type */
3974 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
3975 	    "device_type", "cpu");
3976 
3977 	/* reg */
3978 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3979 	    "reg", cpu_id);
3980 
3981 	/* cpu-mhz, and clock-frequency */
3982 	if (cpu_freq > 0) {
3983 		long long mul;
3984 
3985 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3986 		    "cpu-mhz", cpu_freq);
3987 		if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
3988 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3989 			    "clock-frequency", (int)mul);
3990 	}
3991 
3992 	if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
3993 		return;
3994 	}
3995 
3996 	/* vendor-id */
3997 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
3998 	    "vendor-id", cpi->cpi_vendorstr);
3999 
4000 	if (cpi->cpi_maxeax == 0) {
4001 		return;
4002 	}
4003 
4004 	/*
4005 	 * family, model, and step
4006 	 */
4007 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4008 	    "family", CPI_FAMILY(cpi));
4009 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4010 	    "cpu-model", CPI_MODEL(cpi));
4011 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4012 	    "stepping-id", CPI_STEP(cpi));
4013 
4014 	/* type */
4015 	switch (cpi->cpi_vendor) {
4016 	case X86_VENDOR_Intel:
4017 		create = 1;
4018 		break;
4019 	default:
4020 		create = 0;
4021 		break;
4022 	}
4023 	if (create)
4024 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4025 		    "type", CPI_TYPE(cpi));
4026 
4027 	/* ext-family */
4028 	switch (cpi->cpi_vendor) {
4029 	case X86_VENDOR_Intel:
4030 	case X86_VENDOR_AMD:
4031 		create = cpi->cpi_family >= 0xf;
4032 		break;
4033 	default:
4034 		create = 0;
4035 		break;
4036 	}
4037 	if (create)
4038 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4039 		    "ext-family", CPI_FAMILY_XTD(cpi));
4040 
4041 	/* ext-model */
4042 	switch (cpi->cpi_vendor) {
4043 	case X86_VENDOR_Intel:
4044 		create = IS_EXTENDED_MODEL_INTEL(cpi);
4045 		break;
4046 	case X86_VENDOR_AMD:
4047 		create = CPI_FAMILY(cpi) == 0xf;
4048 		break;
4049 	default:
4050 		create = 0;
4051 		break;
4052 	}
4053 	if (create)
4054 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4055 		    "ext-model", CPI_MODEL_XTD(cpi));
4056 
4057 	/* generation */
4058 	switch (cpi->cpi_vendor) {
4059 	case X86_VENDOR_AMD:
4060 		/*
4061 		 * AMD K5 model 1 was the first part to support this
4062 		 */
4063 		create = cpi->cpi_xmaxeax >= 0x80000001;
4064 		break;
4065 	default:
4066 		create = 0;
4067 		break;
4068 	}
4069 	if (create)
4070 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4071 		    "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4072 
4073 	/* brand-id */
4074 	switch (cpi->cpi_vendor) {
4075 	case X86_VENDOR_Intel:
4076 		/*
4077 		 * brand id first appeared on Pentium III Xeon model 8,
4078 		 * and Celeron model 8 processors and Opteron
4079 		 */
4080 		create = cpi->cpi_family > 6 ||
4081 		    (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4082 		break;
4083 	case X86_VENDOR_AMD:
4084 		create = cpi->cpi_family >= 0xf;
4085 		break;
4086 	default:
4087 		create = 0;
4088 		break;
4089 	}
4090 	if (create && cpi->cpi_brandid != 0) {
4091 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4092 		    "brand-id", cpi->cpi_brandid);
4093 	}
4094 
4095 	/* chunks, and apic-id */
4096 	switch (cpi->cpi_vendor) {
4097 		/*
4098 		 * first available on Pentium IV and Opteron (K8)
4099 		 */
4100 	case X86_VENDOR_Intel:
4101 		create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4102 		break;
4103 	case X86_VENDOR_AMD:
4104 		create = cpi->cpi_family >= 0xf;
4105 		break;
4106 	default:
4107 		create = 0;
4108 		break;
4109 	}
4110 	if (create) {
4111 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4112 		    "chunks", CPI_CHUNKS(cpi));
4113 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4114 		    "apic-id", cpi->cpi_apicid);
4115 		if (cpi->cpi_chipid >= 0) {
4116 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4117 			    "chip#", cpi->cpi_chipid);
4118 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4119 			    "clog#", cpi->cpi_clogid);
4120 		}
4121 	}
4122 
4123 	/* cpuid-features */
4124 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4125 	    "cpuid-features", CPI_FEATURES_EDX(cpi));
4126 
4127 
4128 	/* cpuid-features-ecx */
4129 	switch (cpi->cpi_vendor) {
4130 	case X86_VENDOR_Intel:
4131 		create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4132 		break;
4133 	default:
4134 		create = 0;
4135 		break;
4136 	}
4137 	if (create)
4138 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4139 		    "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4140 
4141 	/* ext-cpuid-features */
4142 	switch (cpi->cpi_vendor) {
4143 	case X86_VENDOR_Intel:
4144 	case X86_VENDOR_AMD:
4145 	case X86_VENDOR_Cyrix:
4146 	case X86_VENDOR_TM:
4147 	case X86_VENDOR_Centaur:
4148 		create = cpi->cpi_xmaxeax >= 0x80000001;
4149 		break;
4150 	default:
4151 		create = 0;
4152 		break;
4153 	}
4154 	if (create) {
4155 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4156 		    "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4157 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4158 		    "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4159 	}
4160 
4161 	/*
4162 	 * Brand String first appeared in Intel Pentium IV, AMD K5
4163 	 * model 1, and Cyrix GXm.  On earlier models we try and
4164 	 * simulate something similar .. so this string should always
4165 	 * same -something- about the processor, however lame.
4166 	 */
4167 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4168 	    "brand-string", cpi->cpi_brandstr);
4169 
4170 	/*
4171 	 * Finally, cache and tlb information
4172 	 */
4173 	switch (x86_which_cacheinfo(cpi)) {
4174 	case X86_VENDOR_Intel:
4175 		intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4176 		break;
4177 	case X86_VENDOR_Cyrix:
4178 		cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4179 		break;
4180 	case X86_VENDOR_AMD:
4181 		amd_cache_info(cpi, cpu_devi);
4182 		break;
4183 	default:
4184 		break;
4185 	}
4186 }
4187 
4188 struct l2info {
4189 	int *l2i_csz;
4190 	int *l2i_lsz;
4191 	int *l2i_assoc;
4192 	int l2i_ret;
4193 };
4194 
4195 /*
4196  * A cacheinfo walker that fetches the size, line-size and associativity
4197  * of the L2 cache
4198  */
4199 static int
4200 intel_l2cinfo(void *arg, const struct cachetab *ct)
4201 {
4202 	struct l2info *l2i = arg;
4203 	int *ip;
4204 
4205 	if (ct->ct_label != l2_cache_str &&
4206 	    ct->ct_label != sl2_cache_str)
4207 		return (0);	/* not an L2 -- keep walking */
4208 
4209 	if ((ip = l2i->l2i_csz) != NULL)
4210 		*ip = ct->ct_size;
4211 	if ((ip = l2i->l2i_lsz) != NULL)
4212 		*ip = ct->ct_line_size;
4213 	if ((ip = l2i->l2i_assoc) != NULL)
4214 		*ip = ct->ct_assoc;
4215 	l2i->l2i_ret = ct->ct_size;
4216 	return (1);		/* was an L2 -- terminate walk */
4217 }
4218 
4219 /*
4220  * AMD L2/L3 Cache and TLB Associativity Field Definition:
4221  *
4222  *	Unlike the associativity for the L1 cache and tlb where the 8 bit
4223  *	value is the associativity, the associativity for the L2 cache and
4224  *	tlb is encoded in the following table. The 4 bit L2 value serves as
4225  *	an index into the amd_afd[] array to determine the associativity.
4226  *	-1 is undefined. 0 is fully associative.
4227  */
4228 
4229 static int amd_afd[] =
4230 	{-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4231 
4232 static void
4233 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4234 {
4235 	struct cpuid_regs *cp;
4236 	uint_t size, assoc;
4237 	int i;
4238 	int *ip;
4239 
4240 	if (cpi->cpi_xmaxeax < 0x80000006)
4241 		return;
4242 	cp = &cpi->cpi_extd[6];
4243 
4244 	if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4245 	    (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4246 		uint_t cachesz = size * 1024;
4247 		assoc = amd_afd[i];
4248 
4249 		ASSERT(assoc != -1);
4250 
4251 		if ((ip = l2i->l2i_csz) != NULL)
4252 			*ip = cachesz;
4253 		if ((ip = l2i->l2i_lsz) != NULL)
4254 			*ip = BITX(cp->cp_ecx, 7, 0);
4255 		if ((ip = l2i->l2i_assoc) != NULL)
4256 			*ip = assoc;
4257 		l2i->l2i_ret = cachesz;
4258 	}
4259 }
4260 
4261 int
4262 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4263 {
4264 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4265 	struct l2info __l2info, *l2i = &__l2info;
4266 
4267 	l2i->l2i_csz = csz;
4268 	l2i->l2i_lsz = lsz;
4269 	l2i->l2i_assoc = assoc;
4270 	l2i->l2i_ret = -1;
4271 
4272 	switch (x86_which_cacheinfo(cpi)) {
4273 	case X86_VENDOR_Intel:
4274 		intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4275 		break;
4276 	case X86_VENDOR_Cyrix:
4277 		cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4278 		break;
4279 	case X86_VENDOR_AMD:
4280 		amd_l2cacheinfo(cpi, l2i);
4281 		break;
4282 	default:
4283 		break;
4284 	}
4285 	return (l2i->l2i_ret);
4286 }
4287 
4288 #if !defined(__xpv)
4289 
4290 uint32_t *
4291 cpuid_mwait_alloc(cpu_t *cpu)
4292 {
4293 	uint32_t	*ret;
4294 	size_t		mwait_size;
4295 
4296 	ASSERT(cpuid_checkpass(CPU, 2));
4297 
4298 	mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4299 	if (mwait_size == 0)
4300 		return (NULL);
4301 
4302 	/*
4303 	 * kmem_alloc() returns cache line size aligned data for mwait_size
4304 	 * allocations.  mwait_size is currently cache line sized.  Neither
4305 	 * of these implementation details are guarantied to be true in the
4306 	 * future.
4307 	 *
4308 	 * First try allocating mwait_size as kmem_alloc() currently returns
4309 	 * correctly aligned memory.  If kmem_alloc() does not return
4310 	 * mwait_size aligned memory, then use mwait_size ROUNDUP.
4311 	 *
4312 	 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4313 	 * decide to free this memory.
4314 	 */
4315 	ret = kmem_zalloc(mwait_size, KM_SLEEP);
4316 	if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
4317 		cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4318 		cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
4319 		*ret = MWAIT_RUNNING;
4320 		return (ret);
4321 	} else {
4322 		kmem_free(ret, mwait_size);
4323 		ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
4324 		cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4325 		cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
4326 		ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
4327 		*ret = MWAIT_RUNNING;
4328 		return (ret);
4329 	}
4330 }
4331 
4332 void
4333 cpuid_mwait_free(cpu_t *cpu)
4334 {
4335 	if (cpu->cpu_m.mcpu_cpi == NULL) {
4336 		return;
4337 	}
4338 
4339 	if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4340 	    cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4341 		kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4342 		    cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4343 	}
4344 
4345 	cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4346 	cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4347 }
4348 
4349 void
4350 patch_tsc_read(int flag)
4351 {
4352 	size_t cnt;
4353 
4354 	switch (flag) {
4355 	case X86_NO_TSC:
4356 		cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4357 		(void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4358 		break;
4359 	case X86_HAVE_TSCP:
4360 		cnt = &_tscp_end - &_tscp_start;
4361 		(void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4362 		break;
4363 	case X86_TSC_MFENCE:
4364 		cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4365 		(void) memcpy((void *)tsc_read,
4366 		    (void *)&_tsc_mfence_start, cnt);
4367 		break;
4368 	case X86_TSC_LFENCE:
4369 		cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4370 		(void) memcpy((void *)tsc_read,
4371 		    (void *)&_tsc_lfence_start, cnt);
4372 		break;
4373 	default:
4374 		break;
4375 	}
4376 }
4377 
4378 int
4379 cpuid_deep_cstates_supported(void)
4380 {
4381 	struct cpuid_info *cpi;
4382 	struct cpuid_regs regs;
4383 
4384 	ASSERT(cpuid_checkpass(CPU, 1));
4385 
4386 	cpi = CPU->cpu_m.mcpu_cpi;
4387 
4388 	if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4389 		return (0);
4390 
4391 	switch (cpi->cpi_vendor) {
4392 	case X86_VENDOR_Intel:
4393 		if (cpi->cpi_xmaxeax < 0x80000007)
4394 			return (0);
4395 
4396 		/*
4397 		 * TSC run at a constant rate in all ACPI C-states?
4398 		 */
4399 		regs.cp_eax = 0x80000007;
4400 		(void) __cpuid_insn(&regs);
4401 		return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4402 
4403 	default:
4404 		return (0);
4405 	}
4406 }
4407 
4408 #endif	/* !__xpv */
4409 
4410 void
4411 post_startup_cpu_fixups(void)
4412 {
4413 #ifndef __xpv
4414 	/*
4415 	 * Some AMD processors support C1E state. Entering this state will
4416 	 * cause the local APIC timer to stop, which we can't deal with at
4417 	 * this time.
4418 	 */
4419 	if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4420 		on_trap_data_t otd;
4421 		uint64_t reg;
4422 
4423 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
4424 			reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4425 			/* Disable C1E state if it is enabled by BIOS */
4426 			if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4427 			    AMD_ACTONCMPHALT_MASK) {
4428 				reg &= ~(AMD_ACTONCMPHALT_MASK <<
4429 				    AMD_ACTONCMPHALT_SHIFT);
4430 				wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4431 			}
4432 		}
4433 		no_trap();
4434 	}
4435 #endif	/* !__xpv */
4436 }
4437 
4438 /*
4439  * Setup necessary registers to enable XSAVE feature on this processor.
4440  * This function needs to be called early enough, so that no xsave/xrstor
4441  * ops will execute on the processor before the MSRs are properly set up.
4442  *
4443  * Current implementation has the following assumption:
4444  * - cpuid_pass1() is done, so that X86 features are known.
4445  * - fpu_probe() is done, so that fp_save_mech is chosen.
4446  */
4447 void
4448 xsave_setup_msr(cpu_t *cpu)
4449 {
4450 	ASSERT(fp_save_mech == FP_XSAVE);
4451 	ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4452 
4453 	/* Enable OSXSAVE in CR4. */
4454 	setcr4(getcr4() | CR4_OSXSAVE);
4455 	/*
4456 	 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4457 	 * correct value.
4458 	 */
4459 	cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4460 	setup_xfem();
4461 }
4462 
4463 /*
4464  * Starting with the Westmere processor the local
4465  * APIC timer will continue running in all C-states,
4466  * including the deepest C-states.
4467  */
4468 int
4469 cpuid_arat_supported(void)
4470 {
4471 	struct cpuid_info *cpi;
4472 	struct cpuid_regs regs;
4473 
4474 	ASSERT(cpuid_checkpass(CPU, 1));
4475 	ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4476 
4477 	cpi = CPU->cpu_m.mcpu_cpi;
4478 
4479 	switch (cpi->cpi_vendor) {
4480 	case X86_VENDOR_Intel:
4481 		/*
4482 		 * Always-running Local APIC Timer is
4483 		 * indicated by CPUID.6.EAX[2].
4484 		 */
4485 		if (cpi->cpi_maxeax >= 6) {
4486 			regs.cp_eax = 6;
4487 			(void) cpuid_insn(NULL, &regs);
4488 			return (regs.cp_eax & CPUID_CSTATE_ARAT);
4489 		} else {
4490 			return (0);
4491 		}
4492 	default:
4493 		return (0);
4494 	}
4495 }
4496 
4497 /*
4498  * Check support for Intel ENERGY_PERF_BIAS feature
4499  */
4500 int
4501 cpuid_iepb_supported(struct cpu *cp)
4502 {
4503 	struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4504 	struct cpuid_regs regs;
4505 
4506 	ASSERT(cpuid_checkpass(cp, 1));
4507 
4508 	if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
4509 	    !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
4510 		return (0);
4511 	}
4512 
4513 	/*
4514 	 * Intel ENERGY_PERF_BIAS MSR is indicated by
4515 	 * capability bit CPUID.6.ECX.3
4516 	 */
4517 	if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
4518 		return (0);
4519 
4520 	regs.cp_eax = 0x6;
4521 	(void) cpuid_insn(NULL, &regs);
4522 	return (regs.cp_ecx & CPUID_EPB_SUPPORT);
4523 }
4524 
4525 /*
4526  * Check support for TSC deadline timer
4527  *
4528  * TSC deadline timer provides a superior software programming
4529  * model over local APIC timer that eliminates "time drifts".
4530  * Instead of specifying a relative time, software specifies an
4531  * absolute time as the target at which the processor should
4532  * generate a timer event.
4533  */
4534 int
4535 cpuid_deadline_tsc_supported(void)
4536 {
4537 	struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
4538 	struct cpuid_regs regs;
4539 
4540 	ASSERT(cpuid_checkpass(CPU, 1));
4541 	ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4542 
4543 	switch (cpi->cpi_vendor) {
4544 	case X86_VENDOR_Intel:
4545 		if (cpi->cpi_maxeax >= 1) {
4546 			regs.cp_eax = 1;
4547 			(void) cpuid_insn(NULL, &regs);
4548 			return (regs.cp_ecx & CPUID_DEADLINE_TSC);
4549 		} else {
4550 			return (0);
4551 		}
4552 	default:
4553 		return (0);
4554 	}
4555 }
4556 
4557 #if defined(__amd64) && !defined(__xpv)
4558 /*
4559  * Patch in versions of bcopy for high performance Intel Nhm processors
4560  * and later...
4561  */
4562 void
4563 patch_memops(uint_t vendor)
4564 {
4565 	size_t cnt, i;
4566 	caddr_t to, from;
4567 
4568 	if ((vendor == X86_VENDOR_Intel) &&
4569 	    is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
4570 		cnt = &bcopy_patch_end - &bcopy_patch_start;
4571 		to = &bcopy_ck_size;
4572 		from = &bcopy_patch_start;
4573 		for (i = 0; i < cnt; i++) {
4574 			*to++ = *from++;
4575 		}
4576 	}
4577 }
4578 #endif  /* __amd64 && !__xpv */
4579 
4580 /*
4581  * This function finds the number of bits to represent the number of cores per
4582  * chip and the number of strands per core for the Intel platforms.
4583  * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4584  */
4585 void
4586 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
4587 {
4588 	struct cpuid_regs regs;
4589 	struct cpuid_regs *cp = &regs;
4590 
4591 	if (vendor != X86_VENDOR_Intel) {
4592 		return;
4593 	}
4594 
4595 	/* if the cpuid level is 0xB, extended topo is available. */
4596 	cp->cp_eax = 0;
4597 	if (__cpuid_insn(cp) >= 0xB) {
4598 
4599 		cp->cp_eax = 0xB;
4600 		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
4601 		(void) __cpuid_insn(cp);
4602 
4603 		/*
4604 		 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4605 		 * indicates that the extended topology enumeration leaf is
4606 		 * available.
4607 		 */
4608 		if (cp->cp_ebx) {
4609 			uint_t coreid_shift = 0;
4610 			uint_t chipid_shift = 0;
4611 			uint_t i;
4612 			uint_t level;
4613 
4614 			for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
4615 				cp->cp_eax = 0xB;
4616 				cp->cp_ecx = i;
4617 
4618 				(void) __cpuid_insn(cp);
4619 				level = CPI_CPU_LEVEL_TYPE(cp);
4620 
4621 				if (level == 1) {
4622 					/*
4623 					 * Thread level processor topology
4624 					 * Number of bits shift right APIC ID
4625 					 * to get the coreid.
4626 					 */
4627 					coreid_shift = BITX(cp->cp_eax, 4, 0);
4628 				} else if (level == 2) {
4629 					/*
4630 					 * Core level processor topology
4631 					 * Number of bits shift right APIC ID
4632 					 * to get the chipid.
4633 					 */
4634 					chipid_shift = BITX(cp->cp_eax, 4, 0);
4635 				}
4636 			}
4637 
4638 			if (coreid_shift > 0 && chipid_shift > coreid_shift) {
4639 				*strand_nbits = coreid_shift;
4640 				*core_nbits = chipid_shift - coreid_shift;
4641 			}
4642 		}
4643 	}
4644 }
4645