xref: /titanic_51/usr/src/uts/i86pc/os/cpuid.c (revision 651c0131ccc65381cbda174bee44a4fd7a518d6b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Various routines to handle identification
30  * and classification of x86 processors.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/archsystm.h>
35 #include <sys/x86_archext.h>
36 #include <sys/kmem.h>
37 #include <sys/systm.h>
38 #include <sys/cmn_err.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/cpuvar.h>
42 #include <sys/processor.h>
43 #include <sys/pg.h>
44 #include <sys/fp.h>
45 #include <sys/controlregs.h>
46 #include <sys/auxv_386.h>
47 #include <sys/bitmap.h>
48 #include <sys/memnode.h>
49 
50 /*
51  * Pass 0 of cpuid feature analysis happens in locore. It contains special code
52  * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
53  * them accordingly. For most modern processors, feature detection occurs here
54  * in pass 1.
55  *
56  * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
57  * for the boot CPU and does the basic analysis that the early kernel needs.
58  * x86_feature is set based on the return value of cpuid_pass1() of the boot
59  * CPU.
60  *
61  * Pass 1 includes:
62  *
63  *	o Determining vendor/model/family/stepping and setting x86_type and
64  *	  x86_vendor accordingly.
65  *	o Processing the feature flags returned by the cpuid instruction while
66  *	  applying any workarounds or tricks for the specific processor.
67  *	o Mapping the feature flags into Solaris feature bits (X86_*).
68  *	o Processing extended feature flags if supported by the processor,
69  *	  again while applying specific processor knowledge.
70  *	o Determining the CMT characteristics of the system.
71  *
72  * Pass 1 is done on non-boot CPUs during their initialization and the results
73  * are used only as a meager attempt at ensuring that all processors within the
74  * system support the same features.
75  *
76  * Pass 2 of cpuid feature analysis happens just at the beginning
77  * of startup().  It just copies in and corrects the remainder
78  * of the cpuid data we depend on: standard cpuid functions that we didn't
79  * need for pass1 feature analysis, and extended cpuid functions beyond the
80  * simple feature processing done in pass1.
81  *
82  * Pass 3 of cpuid analysis is invoked after basic kernel services; in
83  * particular kernel memory allocation has been made available. It creates a
84  * readable brand string based on the data collected in the first two passes.
85  *
86  * Pass 4 of cpuid analysis is invoked after post_startup() when all
87  * the support infrastructure for various hardware features has been
88  * initialized. It determines which processor features will be reported
89  * to userland via the aux vector.
90  *
91  * All passes are executed on all CPUs, but only the boot CPU determines what
92  * features the kernel will use.
93  *
94  * Much of the worst junk in this file is for the support of processors
95  * that didn't really implement the cpuid instruction properly.
96  *
97  * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
98  * the pass numbers.  Accordingly, changes to the pass code may require changes
99  * to the accessor code.
100  */
101 
102 uint_t x86_feature = 0;
103 uint_t x86_vendor = X86_VENDOR_IntelClone;
104 uint_t x86_type = X86_TYPE_OTHER;
105 
106 uint_t pentiumpro_bug4046376;
107 uint_t pentiumpro_bug4064495;
108 
109 uint_t enable486;
110 
111 /*
112  * This set of strings are for processors rumored to support the cpuid
113  * instruction, and is used by locore.s to figure out how to set x86_vendor
114  */
115 const char CyrixInstead[] = "CyrixInstead";
116 
117 /*
118  * monitor/mwait info.
119  */
120 struct mwait_info {
121 	size_t		mon_min;	/* min size to avoid missed wakeups */
122 	size_t		mon_max;	/* size to avoid false wakeups */
123 	uint32_t	support;	/* processor support of monitor/mwait */
124 };
125 
126 /*
127  * These constants determine how many of the elements of the
128  * cpuid we cache in the cpuid_info data structure; the
129  * remaining elements are accessible via the cpuid instruction.
130  */
131 
132 #define	NMAX_CPI_STD	6		/* eax = 0 .. 5 */
133 #define	NMAX_CPI_EXTD	9		/* eax = 0x80000000 .. 0x80000008 */
134 
135 struct cpuid_info {
136 	uint_t cpi_pass;		/* last pass completed */
137 	/*
138 	 * standard function information
139 	 */
140 	uint_t cpi_maxeax;		/* fn 0: %eax */
141 	char cpi_vendorstr[13];		/* fn 0: %ebx:%ecx:%edx */
142 	uint_t cpi_vendor;		/* enum of cpi_vendorstr */
143 
144 	uint_t cpi_family;		/* fn 1: extended family */
145 	uint_t cpi_model;		/* fn 1: extended model */
146 	uint_t cpi_step;		/* fn 1: stepping */
147 	chipid_t cpi_chipid;		/* fn 1: %ebx: chip # on ht cpus */
148 	uint_t cpi_brandid;		/* fn 1: %ebx: brand ID */
149 	int cpi_clogid;			/* fn 1: %ebx: thread # */
150 	uint_t cpi_ncpu_per_chip;	/* fn 1: %ebx: logical cpu count */
151 	uint8_t cpi_cacheinfo[16];	/* fn 2: intel-style cache desc */
152 	uint_t cpi_ncache;		/* fn 2: number of elements */
153 	uint_t cpi_ncpu_shr_last_cache;	/* fn 4: %eax: ncpus sharing cache */
154 	id_t cpi_last_lvl_cacheid;	/* fn 4: %eax: derived cache id */
155 	uint_t cpi_std_4_size;		/* fn 4: number of fn 4 elements */
156 	struct cpuid_regs **cpi_std_4;	/* fn 4: %ecx == 0 .. fn4_size */
157 	struct cpuid_regs cpi_std[NMAX_CPI_STD];	/* 0 .. 5 */
158 	/*
159 	 * extended function information
160 	 */
161 	uint_t cpi_xmaxeax;		/* fn 0x80000000: %eax */
162 	char cpi_brandstr[49];		/* fn 0x8000000[234] */
163 	uint8_t cpi_pabits;		/* fn 0x80000006: %eax */
164 	uint8_t cpi_vabits;		/* fn 0x80000006: %eax */
165 	struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x8000000[0-8] */
166 	id_t cpi_coreid;
167 	uint_t cpi_ncore_per_chip;	/* AMD: fn 0x80000008: %ecx[7-0] */
168 					/* Intel: fn 4: %eax[31-26] */
169 	/*
170 	 * supported feature information
171 	 */
172 	uint32_t cpi_support[5];
173 #define	STD_EDX_FEATURES	0
174 #define	AMD_EDX_FEATURES	1
175 #define	TM_EDX_FEATURES		2
176 #define	STD_ECX_FEATURES	3
177 #define	AMD_ECX_FEATURES	4
178 	/*
179 	 * Synthesized information, where known.
180 	 */
181 	uint32_t cpi_chiprev;		/* See X86_CHIPREV_* in x86_archext.h */
182 	const char *cpi_chiprevstr;	/* May be NULL if chiprev unknown */
183 	uint32_t cpi_socket;		/* Chip package/socket type */
184 
185 	struct mwait_info cpi_mwait;	/* fn 5: monitor/mwait info */
186 };
187 
188 
189 static struct cpuid_info cpuid_info0;
190 
191 /*
192  * These bit fields are defined by the Intel Application Note AP-485
193  * "Intel Processor Identification and the CPUID Instruction"
194  */
195 #define	CPI_FAMILY_XTD(cpi)	BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
196 #define	CPI_MODEL_XTD(cpi)	BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
197 #define	CPI_TYPE(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
198 #define	CPI_FAMILY(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
199 #define	CPI_STEP(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
200 #define	CPI_MODEL(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
201 
202 #define	CPI_FEATURES_EDX(cpi)		((cpi)->cpi_std[1].cp_edx)
203 #define	CPI_FEATURES_ECX(cpi)		((cpi)->cpi_std[1].cp_ecx)
204 #define	CPI_FEATURES_XTD_EDX(cpi)	((cpi)->cpi_extd[1].cp_edx)
205 #define	CPI_FEATURES_XTD_ECX(cpi)	((cpi)->cpi_extd[1].cp_ecx)
206 
207 #define	CPI_BRANDID(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
208 #define	CPI_CHUNKS(cpi)		BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
209 #define	CPI_CPU_COUNT(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
210 #define	CPI_APIC_ID(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
211 
212 #define	CPI_MAXEAX_MAX		0x100		/* sanity control */
213 #define	CPI_XMAXEAX_MAX		0x80000100
214 #define	CPI_FN4_ECX_MAX		0x20		/* sanity: max fn 4 levels */
215 
216 /*
217  * Function 4 (Deterministic Cache Parameters) macros
218  * Defined by Intel Application Note AP-485
219  */
220 #define	CPI_NUM_CORES(regs)		BITX((regs)->cp_eax, 31, 26)
221 #define	CPI_NTHR_SHR_CACHE(regs)	BITX((regs)->cp_eax, 25, 14)
222 #define	CPI_FULL_ASSOC_CACHE(regs)	BITX((regs)->cp_eax, 9, 9)
223 #define	CPI_SELF_INIT_CACHE(regs)	BITX((regs)->cp_eax, 8, 8)
224 #define	CPI_CACHE_LVL(regs)		BITX((regs)->cp_eax, 7, 5)
225 #define	CPI_CACHE_TYPE(regs)		BITX((regs)->cp_eax, 4, 0)
226 
227 #define	CPI_CACHE_WAYS(regs)		BITX((regs)->cp_ebx, 31, 22)
228 #define	CPI_CACHE_PARTS(regs)		BITX((regs)->cp_ebx, 21, 12)
229 #define	CPI_CACHE_COH_LN_SZ(regs)	BITX((regs)->cp_ebx, 11, 0)
230 
231 #define	CPI_CACHE_SETS(regs)		BITX((regs)->cp_ecx, 31, 0)
232 
233 #define	CPI_PREFCH_STRIDE(regs)		BITX((regs)->cp_edx, 9, 0)
234 
235 
236 /*
237  * A couple of shorthand macros to identify "later" P6-family chips
238  * like the Pentium M and Core.  First, the "older" P6-based stuff
239  * (loosely defined as "pre-Pentium-4"):
240  * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
241  */
242 
243 #define	IS_LEGACY_P6(cpi) (			\
244 	cpi->cpi_family == 6 && 		\
245 		(cpi->cpi_model == 1 ||		\
246 		cpi->cpi_model == 3 ||		\
247 		cpi->cpi_model == 5 ||		\
248 		cpi->cpi_model == 6 ||		\
249 		cpi->cpi_model == 7 ||		\
250 		cpi->cpi_model == 8 ||		\
251 		cpi->cpi_model == 0xA ||	\
252 		cpi->cpi_model == 0xB)		\
253 )
254 
255 /* A "new F6" is everything with family 6 that's not the above */
256 #define	IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
257 
258 /*
259  * AMD family 0xf socket types.
260  * First index is 0 for revs B thru E, 1 for F and G.
261  * Second index by (model & 0x3)
262  */
263 static uint32_t amd_skts[2][4] = {
264 	{
265 		X86_SOCKET_754,		/* 0b00 */
266 		X86_SOCKET_940,		/* 0b01 */
267 		X86_SOCKET_754,		/* 0b10 */
268 		X86_SOCKET_939		/* 0b11 */
269 	},
270 	{
271 		X86_SOCKET_S1g1,	/* 0b00 */
272 		X86_SOCKET_F1207,	/* 0b01 */
273 		X86_SOCKET_UNKNOWN,	/* 0b10 */
274 		X86_SOCKET_AM2		/* 0b11 */
275 	}
276 };
277 
278 /*
279  * Table for mapping AMD Family 0xf model/stepping combination to
280  * chip "revision" and socket type.  Only rm_family 0xf is used at the
281  * moment, but AMD family 0x10 will extend the exsiting revision names
282  * so will likely also use this table.
283  *
284  * The first member of this array that matches a given family, extended model
285  * plus model range, and stepping range will be considered a match.
286  */
287 static const struct amd_rev_mapent {
288 	uint_t rm_family;
289 	uint_t rm_modello;
290 	uint_t rm_modelhi;
291 	uint_t rm_steplo;
292 	uint_t rm_stephi;
293 	uint32_t rm_chiprev;
294 	const char *rm_chiprevstr;
295 	int rm_sktidx;
296 } amd_revmap[] = {
297 	/*
298 	 * Rev B includes model 0x4 stepping 0 and model 0x5 stepping 0 and 1.
299 	 */
300 	{ 0xf, 0x04, 0x04, 0x0, 0x0, X86_CHIPREV_AMD_F_REV_B, "B", 0 },
301 	{ 0xf, 0x05, 0x05, 0x0, 0x1, X86_CHIPREV_AMD_F_REV_B, "B", 0 },
302 	/*
303 	 * Rev C0 includes model 0x4 stepping 8 and model 0x5 stepping 8
304 	 */
305 	{ 0xf, 0x04, 0x05, 0x8, 0x8, X86_CHIPREV_AMD_F_REV_C0, "C0", 0 },
306 	/*
307 	 * Rev CG is the rest of extended model 0x0 - i.e., everything
308 	 * but the rev B and C0 combinations covered above.
309 	 */
310 	{ 0xf, 0x00, 0x0f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_CG, "CG", 0 },
311 	/*
312 	 * Rev D has extended model 0x1.
313 	 */
314 	{ 0xf, 0x10, 0x1f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_D, "D", 0 },
315 	/*
316 	 * Rev E has extended model 0x2.
317 	 * Extended model 0x3 is unused but available to grow into.
318 	 */
319 	{ 0xf, 0x20, 0x3f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_E, "E", 0 },
320 	/*
321 	 * Rev F has extended models 0x4 and 0x5.
322 	 */
323 	{ 0xf, 0x40, 0x5f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_F, "F", 1 },
324 	/*
325 	 * Rev G has extended model 0x6.
326 	 */
327 	{ 0xf, 0x60, 0x6f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_G, "G", 1 },
328 };
329 
330 /*
331  * Info for monitor/mwait idle loop.
332  *
333  * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
334  * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
335  * 2006.
336  * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
337  * Documentation Updates" #33633, Rev 2.05, December 2006.
338  */
339 #define	MWAIT_SUPPORT		(0x00000001)	/* mwait supported */
340 #define	MWAIT_EXTENSIONS	(0x00000002)	/* extenstion supported */
341 #define	MWAIT_ECX_INT_ENABLE	(0x00000004)	/* ecx 1 extension supported */
342 #define	MWAIT_SUPPORTED(cpi)	((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
343 #define	MWAIT_INT_ENABLE(cpi)	((cpi)->cpi_std[5].cp_ecx & 0x2)
344 #define	MWAIT_EXTENSION(cpi)	((cpi)->cpi_std[5].cp_ecx & 0x1)
345 #define	MWAIT_SIZE_MIN(cpi)	BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
346 #define	MWAIT_SIZE_MAX(cpi)	BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
347 /*
348  * Number of sub-cstates for a given c-state.
349  */
350 #define	MWAIT_NUM_SUBC_STATES(cpi, c_state)			\
351 	BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
352 
353 static void
354 synth_amd_info(struct cpuid_info *cpi)
355 {
356 	const struct amd_rev_mapent *rmp;
357 	uint_t family, model, step;
358 	int i;
359 
360 	/*
361 	 * Currently only AMD family 0xf uses these fields.
362 	 */
363 	if (cpi->cpi_family != 0xf)
364 		return;
365 
366 	family = cpi->cpi_family;
367 	model = cpi->cpi_model;
368 	step = cpi->cpi_step;
369 
370 	for (i = 0, rmp = amd_revmap; i < sizeof (amd_revmap) / sizeof (*rmp);
371 	    i++, rmp++) {
372 		if (family == rmp->rm_family &&
373 		    model >= rmp->rm_modello && model <= rmp->rm_modelhi &&
374 		    step >= rmp->rm_steplo && step <= rmp->rm_stephi) {
375 			cpi->cpi_chiprev = rmp->rm_chiprev;
376 			cpi->cpi_chiprevstr = rmp->rm_chiprevstr;
377 			cpi->cpi_socket = amd_skts[rmp->rm_sktidx][model & 0x3];
378 			return;
379 		}
380 	}
381 }
382 
383 static void
384 synth_info(struct cpuid_info *cpi)
385 {
386 	cpi->cpi_chiprev = X86_CHIPREV_UNKNOWN;
387 	cpi->cpi_chiprevstr = "Unknown";
388 	cpi->cpi_socket = X86_SOCKET_UNKNOWN;
389 
390 	switch (cpi->cpi_vendor) {
391 	case X86_VENDOR_AMD:
392 		synth_amd_info(cpi);
393 		break;
394 
395 	default:
396 		break;
397 
398 	}
399 }
400 
401 /*
402  * Apply up various platform-dependent restrictions where the
403  * underlying platform restrictions mean the CPU can be marked
404  * as less capable than its cpuid instruction would imply.
405  */
406 
407 #define	platform_cpuid_mangle(vendor, eax, cp)	/* nothing */
408 
409 /*
410  *  Some undocumented ways of patching the results of the cpuid
411  *  instruction to permit running Solaris 10 on future cpus that
412  *  we don't currently support.  Could be set to non-zero values
413  *  via settings in eeprom.
414  */
415 
416 uint32_t cpuid_feature_ecx_include;
417 uint32_t cpuid_feature_ecx_exclude;
418 uint32_t cpuid_feature_edx_include;
419 uint32_t cpuid_feature_edx_exclude;
420 
421 void
422 cpuid_alloc_space(cpu_t *cpu)
423 {
424 	/*
425 	 * By convention, cpu0 is the boot cpu, which is set up
426 	 * before memory allocation is available.  All other cpus get
427 	 * their cpuid_info struct allocated here.
428 	 */
429 	ASSERT(cpu->cpu_id != 0);
430 	cpu->cpu_m.mcpu_cpi =
431 	    kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
432 }
433 
434 void
435 cpuid_free_space(cpu_t *cpu)
436 {
437 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
438 	int i;
439 
440 	ASSERT(cpu->cpu_id != 0);
441 
442 	/*
443 	 * Free up any function 4 related dynamic storage
444 	 */
445 	for (i = 1; i < cpi->cpi_std_4_size; i++)
446 		kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
447 	if (cpi->cpi_std_4_size > 0)
448 		kmem_free(cpi->cpi_std_4,
449 		    cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
450 
451 	kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi));
452 }
453 
454 uint_t
455 cpuid_pass1(cpu_t *cpu)
456 {
457 	uint32_t mask_ecx, mask_edx;
458 	uint_t feature = X86_CPUID;
459 	struct cpuid_info *cpi;
460 	struct cpuid_regs *cp;
461 	int xcpuid;
462 
463 
464 	/*
465 	 * Space statically allocated for cpu0, ensure pointer is set
466 	 */
467 	if (cpu->cpu_id == 0)
468 		cpu->cpu_m.mcpu_cpi = &cpuid_info0;
469 	cpi = cpu->cpu_m.mcpu_cpi;
470 	ASSERT(cpi != NULL);
471 	cp = &cpi->cpi_std[0];
472 	cp->cp_eax = 0;
473 	cpi->cpi_maxeax = __cpuid_insn(cp);
474 	{
475 		uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
476 		*iptr++ = cp->cp_ebx;
477 		*iptr++ = cp->cp_edx;
478 		*iptr++ = cp->cp_ecx;
479 		*(char *)&cpi->cpi_vendorstr[12] = '\0';
480 	}
481 
482 	/*
483 	 * Map the vendor string to a type code
484 	 */
485 	if (strcmp(cpi->cpi_vendorstr, "GenuineIntel") == 0)
486 		cpi->cpi_vendor = X86_VENDOR_Intel;
487 	else if (strcmp(cpi->cpi_vendorstr, "AuthenticAMD") == 0)
488 		cpi->cpi_vendor = X86_VENDOR_AMD;
489 	else if (strcmp(cpi->cpi_vendorstr, "GenuineTMx86") == 0)
490 		cpi->cpi_vendor = X86_VENDOR_TM;
491 	else if (strcmp(cpi->cpi_vendorstr, CyrixInstead) == 0)
492 		/*
493 		 * CyrixInstead is a variable used by the Cyrix detection code
494 		 * in locore.
495 		 */
496 		cpi->cpi_vendor = X86_VENDOR_Cyrix;
497 	else if (strcmp(cpi->cpi_vendorstr, "UMC UMC UMC ") == 0)
498 		cpi->cpi_vendor = X86_VENDOR_UMC;
499 	else if (strcmp(cpi->cpi_vendorstr, "NexGenDriven") == 0)
500 		cpi->cpi_vendor = X86_VENDOR_NexGen;
501 	else if (strcmp(cpi->cpi_vendorstr, "CentaurHauls") == 0)
502 		cpi->cpi_vendor = X86_VENDOR_Centaur;
503 	else if (strcmp(cpi->cpi_vendorstr, "RiseRiseRise") == 0)
504 		cpi->cpi_vendor = X86_VENDOR_Rise;
505 	else if (strcmp(cpi->cpi_vendorstr, "SiS SiS SiS ") == 0)
506 		cpi->cpi_vendor = X86_VENDOR_SiS;
507 	else if (strcmp(cpi->cpi_vendorstr, "Geode by NSC") == 0)
508 		cpi->cpi_vendor = X86_VENDOR_NSC;
509 	else
510 		cpi->cpi_vendor = X86_VENDOR_IntelClone;
511 
512 	x86_vendor = cpi->cpi_vendor; /* for compatibility */
513 
514 	/*
515 	 * Limit the range in case of weird hardware
516 	 */
517 	if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
518 		cpi->cpi_maxeax = CPI_MAXEAX_MAX;
519 	if (cpi->cpi_maxeax < 1)
520 		goto pass1_done;
521 
522 	cp = &cpi->cpi_std[1];
523 	cp->cp_eax = 1;
524 	(void) __cpuid_insn(cp);
525 
526 	/*
527 	 * Extract identifying constants for easy access.
528 	 */
529 	cpi->cpi_model = CPI_MODEL(cpi);
530 	cpi->cpi_family = CPI_FAMILY(cpi);
531 
532 	if (cpi->cpi_family == 0xf)
533 		cpi->cpi_family += CPI_FAMILY_XTD(cpi);
534 
535 	/*
536 	 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
537 	 * Intel, and presumably everyone else, uses model == 0xf, as
538 	 * one would expect (max value means possible overflow).  Sigh.
539 	 */
540 
541 	switch (cpi->cpi_vendor) {
542 	case X86_VENDOR_AMD:
543 		if (CPI_FAMILY(cpi) == 0xf)
544 			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
545 		break;
546 	default:
547 		if (cpi->cpi_model == 0xf)
548 			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
549 		break;
550 	}
551 
552 	cpi->cpi_step = CPI_STEP(cpi);
553 	cpi->cpi_brandid = CPI_BRANDID(cpi);
554 
555 	/*
556 	 * *default* assumptions:
557 	 * - believe %edx feature word
558 	 * - ignore %ecx feature word
559 	 * - 32-bit virtual and physical addressing
560 	 */
561 	mask_edx = 0xffffffff;
562 	mask_ecx = 0;
563 
564 	cpi->cpi_pabits = cpi->cpi_vabits = 32;
565 
566 	switch (cpi->cpi_vendor) {
567 	case X86_VENDOR_Intel:
568 		if (cpi->cpi_family == 5)
569 			x86_type = X86_TYPE_P5;
570 		else if (IS_LEGACY_P6(cpi)) {
571 			x86_type = X86_TYPE_P6;
572 			pentiumpro_bug4046376 = 1;
573 			pentiumpro_bug4064495 = 1;
574 			/*
575 			 * Clear the SEP bit when it was set erroneously
576 			 */
577 			if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
578 				cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
579 		} else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
580 			x86_type = X86_TYPE_P4;
581 			/*
582 			 * We don't currently depend on any of the %ecx
583 			 * features until Prescott, so we'll only check
584 			 * this from P4 onwards.  We might want to revisit
585 			 * that idea later.
586 			 */
587 			mask_ecx = 0xffffffff;
588 		} else if (cpi->cpi_family > 0xf)
589 			mask_ecx = 0xffffffff;
590 		/*
591 		 * We don't support MONITOR/MWAIT if leaf 5 is not available
592 		 * to obtain the monitor linesize.
593 		 */
594 		if (cpi->cpi_maxeax < 5)
595 			mask_ecx &= ~CPUID_INTC_ECX_MON;
596 		break;
597 	case X86_VENDOR_IntelClone:
598 	default:
599 		break;
600 	case X86_VENDOR_AMD:
601 #if defined(OPTERON_ERRATUM_108)
602 		if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
603 			cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
604 			cpi->cpi_model = 0xc;
605 		} else
606 #endif
607 		if (cpi->cpi_family == 5) {
608 			/*
609 			 * AMD K5 and K6
610 			 *
611 			 * These CPUs have an incomplete implementation
612 			 * of MCA/MCE which we mask away.
613 			 */
614 			mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
615 
616 			/*
617 			 * Model 0 uses the wrong (APIC) bit
618 			 * to indicate PGE.  Fix it here.
619 			 */
620 			if (cpi->cpi_model == 0) {
621 				if (cp->cp_edx & 0x200) {
622 					cp->cp_edx &= ~0x200;
623 					cp->cp_edx |= CPUID_INTC_EDX_PGE;
624 				}
625 			}
626 
627 			/*
628 			 * Early models had problems w/ MMX; disable.
629 			 */
630 			if (cpi->cpi_model < 6)
631 				mask_edx &= ~CPUID_INTC_EDX_MMX;
632 		}
633 
634 		/*
635 		 * For newer families, SSE3 and CX16, at least, are valid;
636 		 * enable all
637 		 */
638 		if (cpi->cpi_family >= 0xf)
639 			mask_ecx = 0xffffffff;
640 		/*
641 		 * We don't support MONITOR/MWAIT if leaf 5 is not available
642 		 * to obtain the monitor linesize.
643 		 */
644 		if (cpi->cpi_maxeax < 5)
645 			mask_ecx &= ~CPUID_INTC_ECX_MON;
646 		break;
647 	case X86_VENDOR_TM:
648 		/*
649 		 * workaround the NT workaround in CMS 4.1
650 		 */
651 		if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
652 		    (cpi->cpi_step == 2 || cpi->cpi_step == 3))
653 			cp->cp_edx |= CPUID_INTC_EDX_CX8;
654 		break;
655 	case X86_VENDOR_Centaur:
656 		/*
657 		 * workaround the NT workarounds again
658 		 */
659 		if (cpi->cpi_family == 6)
660 			cp->cp_edx |= CPUID_INTC_EDX_CX8;
661 		break;
662 	case X86_VENDOR_Cyrix:
663 		/*
664 		 * We rely heavily on the probing in locore
665 		 * to actually figure out what parts, if any,
666 		 * of the Cyrix cpuid instruction to believe.
667 		 */
668 		switch (x86_type) {
669 		case X86_TYPE_CYRIX_486:
670 			mask_edx = 0;
671 			break;
672 		case X86_TYPE_CYRIX_6x86:
673 			mask_edx = 0;
674 			break;
675 		case X86_TYPE_CYRIX_6x86L:
676 			mask_edx =
677 			    CPUID_INTC_EDX_DE |
678 			    CPUID_INTC_EDX_CX8;
679 			break;
680 		case X86_TYPE_CYRIX_6x86MX:
681 			mask_edx =
682 			    CPUID_INTC_EDX_DE |
683 			    CPUID_INTC_EDX_MSR |
684 			    CPUID_INTC_EDX_CX8 |
685 			    CPUID_INTC_EDX_PGE |
686 			    CPUID_INTC_EDX_CMOV |
687 			    CPUID_INTC_EDX_MMX;
688 			break;
689 		case X86_TYPE_CYRIX_GXm:
690 			mask_edx =
691 			    CPUID_INTC_EDX_MSR |
692 			    CPUID_INTC_EDX_CX8 |
693 			    CPUID_INTC_EDX_CMOV |
694 			    CPUID_INTC_EDX_MMX;
695 			break;
696 		case X86_TYPE_CYRIX_MediaGX:
697 			break;
698 		case X86_TYPE_CYRIX_MII:
699 		case X86_TYPE_VIA_CYRIX_III:
700 			mask_edx =
701 			    CPUID_INTC_EDX_DE |
702 			    CPUID_INTC_EDX_TSC |
703 			    CPUID_INTC_EDX_MSR |
704 			    CPUID_INTC_EDX_CX8 |
705 			    CPUID_INTC_EDX_PGE |
706 			    CPUID_INTC_EDX_CMOV |
707 			    CPUID_INTC_EDX_MMX;
708 			break;
709 		default:
710 			break;
711 		}
712 		break;
713 	}
714 
715 	/*
716 	 * Now we've figured out the masks that determine
717 	 * which bits we choose to believe, apply the masks
718 	 * to the feature words, then map the kernel's view
719 	 * of these feature words into its feature word.
720 	 */
721 	cp->cp_edx &= mask_edx;
722 	cp->cp_ecx &= mask_ecx;
723 
724 	/*
725 	 * apply any platform restrictions (we don't call this
726 	 * immediately after __cpuid_insn here, because we need the
727 	 * workarounds applied above first)
728 	 */
729 	platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
730 
731 	/*
732 	 * fold in overrides from the "eeprom" mechanism
733 	 */
734 	cp->cp_edx |= cpuid_feature_edx_include;
735 	cp->cp_edx &= ~cpuid_feature_edx_exclude;
736 
737 	cp->cp_ecx |= cpuid_feature_ecx_include;
738 	cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
739 
740 	if (cp->cp_edx & CPUID_INTC_EDX_PSE)
741 		feature |= X86_LARGEPAGE;
742 	if (cp->cp_edx & CPUID_INTC_EDX_TSC)
743 		feature |= X86_TSC;
744 	if (cp->cp_edx & CPUID_INTC_EDX_MSR)
745 		feature |= X86_MSR;
746 	if (cp->cp_edx & CPUID_INTC_EDX_MTRR)
747 		feature |= X86_MTRR;
748 	if (cp->cp_edx & CPUID_INTC_EDX_PGE)
749 		feature |= X86_PGE;
750 	if (cp->cp_edx & CPUID_INTC_EDX_CMOV)
751 		feature |= X86_CMOV;
752 	if (cp->cp_edx & CPUID_INTC_EDX_MMX)
753 		feature |= X86_MMX;
754 	if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
755 	    (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0)
756 		feature |= X86_MCA;
757 	if (cp->cp_edx & CPUID_INTC_EDX_PAE)
758 		feature |= X86_PAE;
759 	if (cp->cp_edx & CPUID_INTC_EDX_CX8)
760 		feature |= X86_CX8;
761 	if (cp->cp_ecx & CPUID_INTC_ECX_CX16)
762 		feature |= X86_CX16;
763 	if (cp->cp_edx & CPUID_INTC_EDX_PAT)
764 		feature |= X86_PAT;
765 	if (cp->cp_edx & CPUID_INTC_EDX_SEP)
766 		feature |= X86_SEP;
767 	if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
768 		/*
769 		 * In our implementation, fxsave/fxrstor
770 		 * are prerequisites before we'll even
771 		 * try and do SSE things.
772 		 */
773 		if (cp->cp_edx & CPUID_INTC_EDX_SSE)
774 			feature |= X86_SSE;
775 		if (cp->cp_edx & CPUID_INTC_EDX_SSE2)
776 			feature |= X86_SSE2;
777 		if (cp->cp_ecx & CPUID_INTC_ECX_SSE3)
778 			feature |= X86_SSE3;
779 	}
780 	if (cp->cp_edx & CPUID_INTC_EDX_DE)
781 		feature |= X86_DE;
782 	if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
783 		cpi->cpi_mwait.support |= MWAIT_SUPPORT;
784 		feature |= X86_MWAIT;
785 	}
786 
787 	if (feature & X86_PAE)
788 		cpi->cpi_pabits = 36;
789 
790 	/*
791 	 * Hyperthreading configuration is slightly tricky on Intel
792 	 * and pure clones, and even trickier on AMD.
793 	 *
794 	 * (AMD chose to set the HTT bit on their CMP processors,
795 	 * even though they're not actually hyperthreaded.  Thus it
796 	 * takes a bit more work to figure out what's really going
797 	 * on ... see the handling of the CMP_LGCY bit below)
798 	 */
799 	if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
800 		cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
801 		if (cpi->cpi_ncpu_per_chip > 1)
802 			feature |= X86_HTT;
803 	} else {
804 		cpi->cpi_ncpu_per_chip = 1;
805 	}
806 
807 	/*
808 	 * Work on the "extended" feature information, doing
809 	 * some basic initialization for cpuid_pass2()
810 	 */
811 	xcpuid = 0;
812 	switch (cpi->cpi_vendor) {
813 	case X86_VENDOR_Intel:
814 		if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf)
815 			xcpuid++;
816 		break;
817 	case X86_VENDOR_AMD:
818 		if (cpi->cpi_family > 5 ||
819 		    (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
820 			xcpuid++;
821 		break;
822 	case X86_VENDOR_Cyrix:
823 		/*
824 		 * Only these Cyrix CPUs are -known- to support
825 		 * extended cpuid operations.
826 		 */
827 		if (x86_type == X86_TYPE_VIA_CYRIX_III ||
828 		    x86_type == X86_TYPE_CYRIX_GXm)
829 			xcpuid++;
830 		break;
831 	case X86_VENDOR_Centaur:
832 	case X86_VENDOR_TM:
833 	default:
834 		xcpuid++;
835 		break;
836 	}
837 
838 	if (xcpuid) {
839 		cp = &cpi->cpi_extd[0];
840 		cp->cp_eax = 0x80000000;
841 		cpi->cpi_xmaxeax = __cpuid_insn(cp);
842 	}
843 
844 	if (cpi->cpi_xmaxeax & 0x80000000) {
845 
846 		if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
847 			cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
848 
849 		switch (cpi->cpi_vendor) {
850 		case X86_VENDOR_Intel:
851 		case X86_VENDOR_AMD:
852 			if (cpi->cpi_xmaxeax < 0x80000001)
853 				break;
854 			cp = &cpi->cpi_extd[1];
855 			cp->cp_eax = 0x80000001;
856 			(void) __cpuid_insn(cp);
857 
858 			if (cpi->cpi_vendor == X86_VENDOR_AMD &&
859 			    cpi->cpi_family == 5 &&
860 			    cpi->cpi_model == 6 &&
861 			    cpi->cpi_step == 6) {
862 				/*
863 				 * K6 model 6 uses bit 10 to indicate SYSC
864 				 * Later models use bit 11. Fix it here.
865 				 */
866 				if (cp->cp_edx & 0x400) {
867 					cp->cp_edx &= ~0x400;
868 					cp->cp_edx |= CPUID_AMD_EDX_SYSC;
869 				}
870 			}
871 
872 			platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
873 
874 			/*
875 			 * Compute the additions to the kernel's feature word.
876 			 */
877 			if (cp->cp_edx & CPUID_AMD_EDX_NX)
878 				feature |= X86_NX;
879 
880 			if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
881 			    (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
882 			    (cp->cp_ecx & CPUID_AMD_ECX_SSE4A))
883 				feature |= X86_SSE4A;
884 
885 			/*
886 			 * If both the HTT and CMP_LGCY bits are set,
887 			 * then we're not actually HyperThreaded.  Read
888 			 * "AMD CPUID Specification" for more details.
889 			 */
890 			if (cpi->cpi_vendor == X86_VENDOR_AMD &&
891 			    (feature & X86_HTT) &&
892 			    (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
893 				feature &= ~X86_HTT;
894 				feature |= X86_CMP;
895 			}
896 #if defined(__amd64)
897 			/*
898 			 * It's really tricky to support syscall/sysret in
899 			 * the i386 kernel; we rely on sysenter/sysexit
900 			 * instead.  In the amd64 kernel, things are -way-
901 			 * better.
902 			 */
903 			if (cp->cp_edx & CPUID_AMD_EDX_SYSC)
904 				feature |= X86_ASYSC;
905 
906 			/*
907 			 * While we're thinking about system calls, note
908 			 * that AMD processors don't support sysenter
909 			 * in long mode at all, so don't try to program them.
910 			 */
911 			if (x86_vendor == X86_VENDOR_AMD)
912 				feature &= ~X86_SEP;
913 #endif
914 			if (cp->cp_edx & CPUID_AMD_EDX_TSCP)
915 				feature |= X86_TSCP;
916 			break;
917 		default:
918 			break;
919 		}
920 
921 		/*
922 		 * Get CPUID data about processor cores and hyperthreads.
923 		 */
924 		switch (cpi->cpi_vendor) {
925 		case X86_VENDOR_Intel:
926 			if (cpi->cpi_maxeax >= 4) {
927 				cp = &cpi->cpi_std[4];
928 				cp->cp_eax = 4;
929 				cp->cp_ecx = 0;
930 				(void) __cpuid_insn(cp);
931 				platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
932 			}
933 			/*FALLTHROUGH*/
934 		case X86_VENDOR_AMD:
935 			if (cpi->cpi_xmaxeax < 0x80000008)
936 				break;
937 			cp = &cpi->cpi_extd[8];
938 			cp->cp_eax = 0x80000008;
939 			(void) __cpuid_insn(cp);
940 			platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
941 
942 			/*
943 			 * Virtual and physical address limits from
944 			 * cpuid override previously guessed values.
945 			 */
946 			cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
947 			cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
948 			break;
949 		default:
950 			break;
951 		}
952 
953 		/*
954 		 * Derive the number of cores per chip
955 		 */
956 		switch (cpi->cpi_vendor) {
957 		case X86_VENDOR_Intel:
958 			if (cpi->cpi_maxeax < 4) {
959 				cpi->cpi_ncore_per_chip = 1;
960 				break;
961 			} else {
962 				cpi->cpi_ncore_per_chip =
963 				    BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
964 			}
965 			break;
966 		case X86_VENDOR_AMD:
967 			if (cpi->cpi_xmaxeax < 0x80000008) {
968 				cpi->cpi_ncore_per_chip = 1;
969 				break;
970 			} else {
971 				cpi->cpi_ncore_per_chip =
972 				    BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
973 			}
974 			break;
975 		default:
976 			cpi->cpi_ncore_per_chip = 1;
977 			break;
978 		}
979 	}
980 
981 	/*
982 	 * If more than one core, then this processor is CMP.
983 	 */
984 	if (cpi->cpi_ncore_per_chip > 1)
985 		feature |= X86_CMP;
986 
987 	/*
988 	 * If the number of cores is the same as the number
989 	 * of CPUs, then we cannot have HyperThreading.
990 	 */
991 	if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip)
992 		feature &= ~X86_HTT;
993 
994 	if ((feature & (X86_HTT | X86_CMP)) == 0) {
995 		/*
996 		 * Single-core single-threaded processors.
997 		 */
998 		cpi->cpi_chipid = -1;
999 		cpi->cpi_clogid = 0;
1000 		cpi->cpi_coreid = cpu->cpu_id;
1001 	} else if (cpi->cpi_ncpu_per_chip > 1) {
1002 		uint_t i;
1003 		uint_t chipid_shift = 0;
1004 		uint_t coreid_shift = 0;
1005 		uint_t apic_id = CPI_APIC_ID(cpi);
1006 
1007 		for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1008 			chipid_shift++;
1009 		cpi->cpi_chipid = apic_id >> chipid_shift;
1010 		cpi->cpi_clogid = apic_id & ((1 << chipid_shift) - 1);
1011 
1012 		if (cpi->cpi_vendor == X86_VENDOR_Intel) {
1013 			if (feature & X86_CMP) {
1014 				/*
1015 				 * Multi-core (and possibly multi-threaded)
1016 				 * processors.
1017 				 */
1018 				uint_t ncpu_per_core;
1019 				if (cpi->cpi_ncore_per_chip == 1)
1020 					ncpu_per_core = cpi->cpi_ncpu_per_chip;
1021 				else if (cpi->cpi_ncore_per_chip > 1)
1022 					ncpu_per_core = cpi->cpi_ncpu_per_chip /
1023 					    cpi->cpi_ncore_per_chip;
1024 				/*
1025 				 * 8bit APIC IDs on dual core Pentiums
1026 				 * look like this:
1027 				 *
1028 				 * +-----------------------+------+------+
1029 				 * | Physical Package ID   |  MC  |  HT  |
1030 				 * +-----------------------+------+------+
1031 				 * <------- chipid -------->
1032 				 * <------- coreid --------------->
1033 				 *			   <--- clogid -->
1034 				 *
1035 				 * Where the number of bits necessary to
1036 				 * represent MC and HT fields together equals
1037 				 * to the minimum number of bits necessary to
1038 				 * store the value of cpi->cpi_ncpu_per_chip.
1039 				 * Of those bits, the MC part uses the number
1040 				 * of bits necessary to store the value of
1041 				 * cpi->cpi_ncore_per_chip.
1042 				 */
1043 				for (i = 1; i < ncpu_per_core; i <<= 1)
1044 					coreid_shift++;
1045 				cpi->cpi_coreid = apic_id >> coreid_shift;
1046 			} else if (feature & X86_HTT) {
1047 				/*
1048 				 * Single-core multi-threaded processors.
1049 				 */
1050 				cpi->cpi_coreid = cpi->cpi_chipid;
1051 			}
1052 		} else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
1053 			/*
1054 			 * AMD currently only has dual-core processors with
1055 			 * single-threaded cores.  If they ever release
1056 			 * multi-threaded processors, then this code
1057 			 * will have to be updated.
1058 			 */
1059 			cpi->cpi_coreid = cpu->cpu_id;
1060 		} else {
1061 			/*
1062 			 * All other processors are currently
1063 			 * assumed to have single cores.
1064 			 */
1065 			cpi->cpi_coreid = cpi->cpi_chipid;
1066 		}
1067 	}
1068 
1069 	/*
1070 	 * Synthesize chip "revision" and socket type
1071 	 */
1072 	synth_info(cpi);
1073 
1074 pass1_done:
1075 	cpi->cpi_pass = 1;
1076 	return (feature);
1077 }
1078 
1079 /*
1080  * Make copies of the cpuid table entries we depend on, in
1081  * part for ease of parsing now, in part so that we have only
1082  * one place to correct any of it, in part for ease of
1083  * later export to userland, and in part so we can look at
1084  * this stuff in a crash dump.
1085  */
1086 
1087 /*ARGSUSED*/
1088 void
1089 cpuid_pass2(cpu_t *cpu)
1090 {
1091 	uint_t n, nmax;
1092 	int i;
1093 	struct cpuid_regs *cp;
1094 	uint8_t *dp;
1095 	uint32_t *iptr;
1096 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1097 
1098 	ASSERT(cpi->cpi_pass == 1);
1099 
1100 	if (cpi->cpi_maxeax < 1)
1101 		goto pass2_done;
1102 
1103 	if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1104 		nmax = NMAX_CPI_STD;
1105 	/*
1106 	 * (We already handled n == 0 and n == 1 in pass 1)
1107 	 */
1108 	for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1109 		cp->cp_eax = n;
1110 
1111 		/*
1112 		 * CPUID function 4 expects %ecx to be initialized
1113 		 * with an index which indicates which cache to return
1114 		 * information about. The OS is expected to call function 4
1115 		 * with %ecx set to 0, 1, 2, ... until it returns with
1116 		 * EAX[4:0] set to 0, which indicates there are no more
1117 		 * caches.
1118 		 *
1119 		 * Here, populate cpi_std[4] with the information returned by
1120 		 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1121 		 * when dynamic memory allocation becomes available.
1122 		 *
1123 		 * Note: we need to explicitly initialize %ecx here, since
1124 		 * function 4 may have been previously invoked.
1125 		 */
1126 		if (n == 4)
1127 			cp->cp_ecx = 0;
1128 
1129 		(void) __cpuid_insn(cp);
1130 		platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1131 		switch (n) {
1132 		case 2:
1133 			/*
1134 			 * "the lower 8 bits of the %eax register
1135 			 * contain a value that identifies the number
1136 			 * of times the cpuid [instruction] has to be
1137 			 * executed to obtain a complete image of the
1138 			 * processor's caching systems."
1139 			 *
1140 			 * How *do* they make this stuff up?
1141 			 */
1142 			cpi->cpi_ncache = sizeof (*cp) *
1143 			    BITX(cp->cp_eax, 7, 0);
1144 			if (cpi->cpi_ncache == 0)
1145 				break;
1146 			cpi->cpi_ncache--;	/* skip count byte */
1147 
1148 			/*
1149 			 * Well, for now, rather than attempt to implement
1150 			 * this slightly dubious algorithm, we just look
1151 			 * at the first 15 ..
1152 			 */
1153 			if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1154 				cpi->cpi_ncache = sizeof (*cp) - 1;
1155 
1156 			dp = cpi->cpi_cacheinfo;
1157 			if (BITX(cp->cp_eax, 31, 31) == 0) {
1158 				uint8_t *p = (void *)&cp->cp_eax;
1159 				for (i = 1; i < 3; i++)
1160 					if (p[i] != 0)
1161 						*dp++ = p[i];
1162 			}
1163 			if (BITX(cp->cp_ebx, 31, 31) == 0) {
1164 				uint8_t *p = (void *)&cp->cp_ebx;
1165 				for (i = 0; i < 4; i++)
1166 					if (p[i] != 0)
1167 						*dp++ = p[i];
1168 			}
1169 			if (BITX(cp->cp_ecx, 31, 31) == 0) {
1170 				uint8_t *p = (void *)&cp->cp_ecx;
1171 				for (i = 0; i < 4; i++)
1172 					if (p[i] != 0)
1173 						*dp++ = p[i];
1174 			}
1175 			if (BITX(cp->cp_edx, 31, 31) == 0) {
1176 				uint8_t *p = (void *)&cp->cp_edx;
1177 				for (i = 0; i < 4; i++)
1178 					if (p[i] != 0)
1179 						*dp++ = p[i];
1180 			}
1181 			break;
1182 
1183 		case 3:	/* Processor serial number, if PSN supported */
1184 			break;
1185 
1186 		case 4:	/* Deterministic cache parameters */
1187 			break;
1188 
1189 		case 5:	/* Monitor/Mwait parameters */
1190 
1191 			/*
1192 			 * check cpi_mwait.support which was set in cpuid_pass1
1193 			 */
1194 			if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1195 				break;
1196 
1197 			cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1198 			cpi->cpi_mwait.mon_max = (size_t)MWAIT_SIZE_MAX(cpi);
1199 			if (MWAIT_EXTENSION(cpi)) {
1200 				cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1201 				if (MWAIT_INT_ENABLE(cpi))
1202 					cpi->cpi_mwait.support |=
1203 					    MWAIT_ECX_INT_ENABLE;
1204 			}
1205 			break;
1206 		default:
1207 			break;
1208 		}
1209 	}
1210 
1211 	if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
1212 		goto pass2_done;
1213 
1214 	if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
1215 		nmax = NMAX_CPI_EXTD;
1216 	/*
1217 	 * Copy the extended properties, fixing them as we go.
1218 	 * (We already handled n == 0 and n == 1 in pass 1)
1219 	 */
1220 	iptr = (void *)cpi->cpi_brandstr;
1221 	for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
1222 		cp->cp_eax = 0x80000000 + n;
1223 		(void) __cpuid_insn(cp);
1224 		platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
1225 		switch (n) {
1226 		case 2:
1227 		case 3:
1228 		case 4:
1229 			/*
1230 			 * Extract the brand string
1231 			 */
1232 			*iptr++ = cp->cp_eax;
1233 			*iptr++ = cp->cp_ebx;
1234 			*iptr++ = cp->cp_ecx;
1235 			*iptr++ = cp->cp_edx;
1236 			break;
1237 		case 5:
1238 			switch (cpi->cpi_vendor) {
1239 			case X86_VENDOR_AMD:
1240 				/*
1241 				 * The Athlon and Duron were the first
1242 				 * parts to report the sizes of the
1243 				 * TLB for large pages. Before then,
1244 				 * we don't trust the data.
1245 				 */
1246 				if (cpi->cpi_family < 6 ||
1247 				    (cpi->cpi_family == 6 &&
1248 				    cpi->cpi_model < 1))
1249 					cp->cp_eax = 0;
1250 				break;
1251 			default:
1252 				break;
1253 			}
1254 			break;
1255 		case 6:
1256 			switch (cpi->cpi_vendor) {
1257 			case X86_VENDOR_AMD:
1258 				/*
1259 				 * The Athlon and Duron were the first
1260 				 * AMD parts with L2 TLB's.
1261 				 * Before then, don't trust the data.
1262 				 */
1263 				if (cpi->cpi_family < 6 ||
1264 				    cpi->cpi_family == 6 &&
1265 				    cpi->cpi_model < 1)
1266 					cp->cp_eax = cp->cp_ebx = 0;
1267 				/*
1268 				 * AMD Duron rev A0 reports L2
1269 				 * cache size incorrectly as 1K
1270 				 * when it is really 64K
1271 				 */
1272 				if (cpi->cpi_family == 6 &&
1273 				    cpi->cpi_model == 3 &&
1274 				    cpi->cpi_step == 0) {
1275 					cp->cp_ecx &= 0xffff;
1276 					cp->cp_ecx |= 0x400000;
1277 				}
1278 				break;
1279 			case X86_VENDOR_Cyrix:	/* VIA C3 */
1280 				/*
1281 				 * VIA C3 processors are a bit messed
1282 				 * up w.r.t. encoding cache sizes in %ecx
1283 				 */
1284 				if (cpi->cpi_family != 6)
1285 					break;
1286 				/*
1287 				 * model 7 and 8 were incorrectly encoded
1288 				 *
1289 				 * xxx is model 8 really broken?
1290 				 */
1291 				if (cpi->cpi_model == 7 ||
1292 				    cpi->cpi_model == 8)
1293 					cp->cp_ecx =
1294 					    BITX(cp->cp_ecx, 31, 24) << 16 |
1295 					    BITX(cp->cp_ecx, 23, 16) << 12 |
1296 					    BITX(cp->cp_ecx, 15, 8) << 8 |
1297 					    BITX(cp->cp_ecx, 7, 0);
1298 				/*
1299 				 * model 9 stepping 1 has wrong associativity
1300 				 */
1301 				if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
1302 					cp->cp_ecx |= 8 << 12;
1303 				break;
1304 			case X86_VENDOR_Intel:
1305 				/*
1306 				 * Extended L2 Cache features function.
1307 				 * First appeared on Prescott.
1308 				 */
1309 			default:
1310 				break;
1311 			}
1312 			break;
1313 		default:
1314 			break;
1315 		}
1316 	}
1317 
1318 pass2_done:
1319 	cpi->cpi_pass = 2;
1320 }
1321 
1322 static const char *
1323 intel_cpubrand(const struct cpuid_info *cpi)
1324 {
1325 	int i;
1326 
1327 	if ((x86_feature & X86_CPUID) == 0 ||
1328 	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
1329 		return ("i486");
1330 
1331 	switch (cpi->cpi_family) {
1332 	case 5:
1333 		return ("Intel Pentium(r)");
1334 	case 6:
1335 		switch (cpi->cpi_model) {
1336 			uint_t celeron, xeon;
1337 			const struct cpuid_regs *cp;
1338 		case 0:
1339 		case 1:
1340 		case 2:
1341 			return ("Intel Pentium(r) Pro");
1342 		case 3:
1343 		case 4:
1344 			return ("Intel Pentium(r) II");
1345 		case 6:
1346 			return ("Intel Celeron(r)");
1347 		case 5:
1348 		case 7:
1349 			celeron = xeon = 0;
1350 			cp = &cpi->cpi_std[2];	/* cache info */
1351 
1352 			for (i = 1; i < 3; i++) {
1353 				uint_t tmp;
1354 
1355 				tmp = (cp->cp_eax >> (8 * i)) & 0xff;
1356 				if (tmp == 0x40)
1357 					celeron++;
1358 				if (tmp >= 0x44 && tmp <= 0x45)
1359 					xeon++;
1360 			}
1361 
1362 			for (i = 0; i < 2; i++) {
1363 				uint_t tmp;
1364 
1365 				tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
1366 				if (tmp == 0x40)
1367 					celeron++;
1368 				else if (tmp >= 0x44 && tmp <= 0x45)
1369 					xeon++;
1370 			}
1371 
1372 			for (i = 0; i < 4; i++) {
1373 				uint_t tmp;
1374 
1375 				tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
1376 				if (tmp == 0x40)
1377 					celeron++;
1378 				else if (tmp >= 0x44 && tmp <= 0x45)
1379 					xeon++;
1380 			}
1381 
1382 			for (i = 0; i < 4; i++) {
1383 				uint_t tmp;
1384 
1385 				tmp = (cp->cp_edx >> (8 * i)) & 0xff;
1386 				if (tmp == 0x40)
1387 					celeron++;
1388 				else if (tmp >= 0x44 && tmp <= 0x45)
1389 					xeon++;
1390 			}
1391 
1392 			if (celeron)
1393 				return ("Intel Celeron(r)");
1394 			if (xeon)
1395 				return (cpi->cpi_model == 5 ?
1396 				    "Intel Pentium(r) II Xeon(tm)" :
1397 				    "Intel Pentium(r) III Xeon(tm)");
1398 			return (cpi->cpi_model == 5 ?
1399 			    "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
1400 			    "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
1401 		default:
1402 			break;
1403 		}
1404 	default:
1405 		break;
1406 	}
1407 
1408 	/* BrandID is present if the field is nonzero */
1409 	if (cpi->cpi_brandid != 0) {
1410 		static const struct {
1411 			uint_t bt_bid;
1412 			const char *bt_str;
1413 		} brand_tbl[] = {
1414 			{ 0x1,	"Intel(r) Celeron(r)" },
1415 			{ 0x2,	"Intel(r) Pentium(r) III" },
1416 			{ 0x3,	"Intel(r) Pentium(r) III Xeon(tm)" },
1417 			{ 0x4,	"Intel(r) Pentium(r) III" },
1418 			{ 0x6,	"Mobile Intel(r) Pentium(r) III" },
1419 			{ 0x7,	"Mobile Intel(r) Celeron(r)" },
1420 			{ 0x8,	"Intel(r) Pentium(r) 4" },
1421 			{ 0x9,	"Intel(r) Pentium(r) 4" },
1422 			{ 0xa,	"Intel(r) Celeron(r)" },
1423 			{ 0xb,	"Intel(r) Xeon(tm)" },
1424 			{ 0xc,	"Intel(r) Xeon(tm) MP" },
1425 			{ 0xe,	"Mobile Intel(r) Pentium(r) 4" },
1426 			{ 0xf,	"Mobile Intel(r) Celeron(r)" },
1427 			{ 0x11, "Mobile Genuine Intel(r)" },
1428 			{ 0x12, "Intel(r) Celeron(r) M" },
1429 			{ 0x13, "Mobile Intel(r) Celeron(r)" },
1430 			{ 0x14, "Intel(r) Celeron(r)" },
1431 			{ 0x15, "Mobile Genuine Intel(r)" },
1432 			{ 0x16,	"Intel(r) Pentium(r) M" },
1433 			{ 0x17, "Mobile Intel(r) Celeron(r)" }
1434 		};
1435 		uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
1436 		uint_t sgn;
1437 
1438 		sgn = (cpi->cpi_family << 8) |
1439 		    (cpi->cpi_model << 4) | cpi->cpi_step;
1440 
1441 		for (i = 0; i < btblmax; i++)
1442 			if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
1443 				break;
1444 		if (i < btblmax) {
1445 			if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
1446 				return ("Intel(r) Celeron(r)");
1447 			if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
1448 				return ("Intel(r) Xeon(tm) MP");
1449 			if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
1450 				return ("Intel(r) Xeon(tm)");
1451 			return (brand_tbl[i].bt_str);
1452 		}
1453 	}
1454 
1455 	return (NULL);
1456 }
1457 
1458 static const char *
1459 amd_cpubrand(const struct cpuid_info *cpi)
1460 {
1461 	if ((x86_feature & X86_CPUID) == 0 ||
1462 	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
1463 		return ("i486 compatible");
1464 
1465 	switch (cpi->cpi_family) {
1466 	case 5:
1467 		switch (cpi->cpi_model) {
1468 		case 0:
1469 		case 1:
1470 		case 2:
1471 		case 3:
1472 		case 4:
1473 		case 5:
1474 			return ("AMD-K5(r)");
1475 		case 6:
1476 		case 7:
1477 			return ("AMD-K6(r)");
1478 		case 8:
1479 			return ("AMD-K6(r)-2");
1480 		case 9:
1481 			return ("AMD-K6(r)-III");
1482 		default:
1483 			return ("AMD (family 5)");
1484 		}
1485 	case 6:
1486 		switch (cpi->cpi_model) {
1487 		case 1:
1488 			return ("AMD-K7(tm)");
1489 		case 0:
1490 		case 2:
1491 		case 4:
1492 			return ("AMD Athlon(tm)");
1493 		case 3:
1494 		case 7:
1495 			return ("AMD Duron(tm)");
1496 		case 6:
1497 		case 8:
1498 		case 10:
1499 			/*
1500 			 * Use the L2 cache size to distinguish
1501 			 */
1502 			return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
1503 			    "AMD Athlon(tm)" : "AMD Duron(tm)");
1504 		default:
1505 			return ("AMD (family 6)");
1506 		}
1507 	default:
1508 		break;
1509 	}
1510 
1511 	if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
1512 	    cpi->cpi_brandid != 0) {
1513 		switch (BITX(cpi->cpi_brandid, 7, 5)) {
1514 		case 3:
1515 			return ("AMD Opteron(tm) UP 1xx");
1516 		case 4:
1517 			return ("AMD Opteron(tm) DP 2xx");
1518 		case 5:
1519 			return ("AMD Opteron(tm) MP 8xx");
1520 		default:
1521 			return ("AMD Opteron(tm)");
1522 		}
1523 	}
1524 
1525 	return (NULL);
1526 }
1527 
1528 static const char *
1529 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
1530 {
1531 	if ((x86_feature & X86_CPUID) == 0 ||
1532 	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
1533 	    type == X86_TYPE_CYRIX_486)
1534 		return ("i486 compatible");
1535 
1536 	switch (type) {
1537 	case X86_TYPE_CYRIX_6x86:
1538 		return ("Cyrix 6x86");
1539 	case X86_TYPE_CYRIX_6x86L:
1540 		return ("Cyrix 6x86L");
1541 	case X86_TYPE_CYRIX_6x86MX:
1542 		return ("Cyrix 6x86MX");
1543 	case X86_TYPE_CYRIX_GXm:
1544 		return ("Cyrix GXm");
1545 	case X86_TYPE_CYRIX_MediaGX:
1546 		return ("Cyrix MediaGX");
1547 	case X86_TYPE_CYRIX_MII:
1548 		return ("Cyrix M2");
1549 	case X86_TYPE_VIA_CYRIX_III:
1550 		return ("VIA Cyrix M3");
1551 	default:
1552 		/*
1553 		 * Have another wild guess ..
1554 		 */
1555 		if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
1556 			return ("Cyrix 5x86");
1557 		else if (cpi->cpi_family == 5) {
1558 			switch (cpi->cpi_model) {
1559 			case 2:
1560 				return ("Cyrix 6x86");	/* Cyrix M1 */
1561 			case 4:
1562 				return ("Cyrix MediaGX");
1563 			default:
1564 				break;
1565 			}
1566 		} else if (cpi->cpi_family == 6) {
1567 			switch (cpi->cpi_model) {
1568 			case 0:
1569 				return ("Cyrix 6x86MX"); /* Cyrix M2? */
1570 			case 5:
1571 			case 6:
1572 			case 7:
1573 			case 8:
1574 			case 9:
1575 				return ("VIA C3");
1576 			default:
1577 				break;
1578 			}
1579 		}
1580 		break;
1581 	}
1582 	return (NULL);
1583 }
1584 
1585 /*
1586  * This only gets called in the case that the CPU extended
1587  * feature brand string (0x80000002, 0x80000003, 0x80000004)
1588  * aren't available, or contain null bytes for some reason.
1589  */
1590 static void
1591 fabricate_brandstr(struct cpuid_info *cpi)
1592 {
1593 	const char *brand = NULL;
1594 
1595 	switch (cpi->cpi_vendor) {
1596 	case X86_VENDOR_Intel:
1597 		brand = intel_cpubrand(cpi);
1598 		break;
1599 	case X86_VENDOR_AMD:
1600 		brand = amd_cpubrand(cpi);
1601 		break;
1602 	case X86_VENDOR_Cyrix:
1603 		brand = cyrix_cpubrand(cpi, x86_type);
1604 		break;
1605 	case X86_VENDOR_NexGen:
1606 		if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
1607 			brand = "NexGen Nx586";
1608 		break;
1609 	case X86_VENDOR_Centaur:
1610 		if (cpi->cpi_family == 5)
1611 			switch (cpi->cpi_model) {
1612 			case 4:
1613 				brand = "Centaur C6";
1614 				break;
1615 			case 8:
1616 				brand = "Centaur C2";
1617 				break;
1618 			case 9:
1619 				brand = "Centaur C3";
1620 				break;
1621 			default:
1622 				break;
1623 			}
1624 		break;
1625 	case X86_VENDOR_Rise:
1626 		if (cpi->cpi_family == 5 &&
1627 		    (cpi->cpi_model == 0 || cpi->cpi_model == 2))
1628 			brand = "Rise mP6";
1629 		break;
1630 	case X86_VENDOR_SiS:
1631 		if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
1632 			brand = "SiS 55x";
1633 		break;
1634 	case X86_VENDOR_TM:
1635 		if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
1636 			brand = "Transmeta Crusoe TM3x00 or TM5x00";
1637 		break;
1638 	case X86_VENDOR_NSC:
1639 	case X86_VENDOR_UMC:
1640 	default:
1641 		break;
1642 	}
1643 	if (brand) {
1644 		(void) strcpy((char *)cpi->cpi_brandstr, brand);
1645 		return;
1646 	}
1647 
1648 	/*
1649 	 * If all else fails ...
1650 	 */
1651 	(void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
1652 	    "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
1653 	    cpi->cpi_model, cpi->cpi_step);
1654 }
1655 
1656 /*
1657  * This routine is called just after kernel memory allocation
1658  * becomes available on cpu0, and as part of mp_startup() on
1659  * the other cpus.
1660  *
1661  * Fixup the brand string, and collect any information from cpuid
1662  * that requires dynamicically allocated storage to represent.
1663  */
1664 /*ARGSUSED*/
1665 void
1666 cpuid_pass3(cpu_t *cpu)
1667 {
1668 	int	i, max, shft, level, size;
1669 	struct cpuid_regs regs;
1670 	struct cpuid_regs *cp;
1671 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1672 
1673 	ASSERT(cpi->cpi_pass == 2);
1674 
1675 	/*
1676 	 * Function 4: Deterministic cache parameters
1677 	 *
1678 	 * Take this opportunity to detect the number of threads
1679 	 * sharing the last level cache, and construct a corresponding
1680 	 * cache id. The respective cpuid_info members are initialized
1681 	 * to the default case of "no last level cache sharing".
1682 	 */
1683 	cpi->cpi_ncpu_shr_last_cache = 1;
1684 	cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
1685 
1686 	if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
1687 
1688 		/*
1689 		 * Find the # of elements (size) returned by fn 4, and along
1690 		 * the way detect last level cache sharing details.
1691 		 */
1692 		bzero(&regs, sizeof (regs));
1693 		cp = &regs;
1694 		for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
1695 			cp->cp_eax = 4;
1696 			cp->cp_ecx = i;
1697 
1698 			(void) __cpuid_insn(cp);
1699 
1700 			if (CPI_CACHE_TYPE(cp) == 0)
1701 				break;
1702 			level = CPI_CACHE_LVL(cp);
1703 			if (level > max) {
1704 				max = level;
1705 				cpi->cpi_ncpu_shr_last_cache =
1706 				    CPI_NTHR_SHR_CACHE(cp) + 1;
1707 			}
1708 		}
1709 		cpi->cpi_std_4_size = size = i;
1710 
1711 		/*
1712 		 * Allocate the cpi_std_4 array. The first element
1713 		 * references the regs for fn 4, %ecx == 0, which
1714 		 * cpuid_pass2() stashed in cpi->cpi_std[4].
1715 		 */
1716 		if (size > 0) {
1717 			cpi->cpi_std_4 =
1718 			    kmem_alloc(size * sizeof (cp), KM_SLEEP);
1719 			cpi->cpi_std_4[0] = &cpi->cpi_std[4];
1720 
1721 			/*
1722 			 * Allocate storage to hold the additional regs
1723 			 * for function 4, %ecx == 1 .. cpi_std_4_size.
1724 			 *
1725 			 * The regs for fn 4, %ecx == 0 has already
1726 			 * been allocated as indicated above.
1727 			 */
1728 			for (i = 1; i < size; i++) {
1729 				cp = cpi->cpi_std_4[i] =
1730 				    kmem_zalloc(sizeof (regs), KM_SLEEP);
1731 				cp->cp_eax = 4;
1732 				cp->cp_ecx = i;
1733 
1734 				(void) __cpuid_insn(cp);
1735 			}
1736 		}
1737 		/*
1738 		 * Determine the number of bits needed to represent
1739 		 * the number of CPUs sharing the last level cache.
1740 		 *
1741 		 * Shift off that number of bits from the APIC id to
1742 		 * derive the cache id.
1743 		 */
1744 		shft = 0;
1745 		for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
1746 			shft++;
1747 		cpi->cpi_last_lvl_cacheid = CPI_APIC_ID(cpi) >> shft;
1748 	}
1749 
1750 	/*
1751 	 * Now fixup the brand string
1752 	 */
1753 	if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
1754 		fabricate_brandstr(cpi);
1755 	} else {
1756 
1757 		/*
1758 		 * If we successfully extracted a brand string from the cpuid
1759 		 * instruction, clean it up by removing leading spaces and
1760 		 * similar junk.
1761 		 */
1762 		if (cpi->cpi_brandstr[0]) {
1763 			size_t maxlen = sizeof (cpi->cpi_brandstr);
1764 			char *src, *dst;
1765 
1766 			dst = src = (char *)cpi->cpi_brandstr;
1767 			src[maxlen - 1] = '\0';
1768 			/*
1769 			 * strip leading spaces
1770 			 */
1771 			while (*src == ' ')
1772 				src++;
1773 			/*
1774 			 * Remove any 'Genuine' or "Authentic" prefixes
1775 			 */
1776 			if (strncmp(src, "Genuine ", 8) == 0)
1777 				src += 8;
1778 			if (strncmp(src, "Authentic ", 10) == 0)
1779 				src += 10;
1780 
1781 			/*
1782 			 * Now do an in-place copy.
1783 			 * Map (R) to (r) and (TM) to (tm).
1784 			 * The era of teletypes is long gone, and there's
1785 			 * -really- no need to shout.
1786 			 */
1787 			while (*src != '\0') {
1788 				if (src[0] == '(') {
1789 					if (strncmp(src + 1, "R)", 2) == 0) {
1790 						(void) strncpy(dst, "(r)", 3);
1791 						src += 3;
1792 						dst += 3;
1793 						continue;
1794 					}
1795 					if (strncmp(src + 1, "TM)", 3) == 0) {
1796 						(void) strncpy(dst, "(tm)", 4);
1797 						src += 4;
1798 						dst += 4;
1799 						continue;
1800 					}
1801 				}
1802 				*dst++ = *src++;
1803 			}
1804 			*dst = '\0';
1805 
1806 			/*
1807 			 * Finally, remove any trailing spaces
1808 			 */
1809 			while (--dst > cpi->cpi_brandstr)
1810 				if (*dst == ' ')
1811 					*dst = '\0';
1812 				else
1813 					break;
1814 		} else
1815 			fabricate_brandstr(cpi);
1816 	}
1817 	cpi->cpi_pass = 3;
1818 }
1819 
1820 /*
1821  * This routine is called out of bind_hwcap() much later in the life
1822  * of the kernel (post_startup()).  The job of this routine is to resolve
1823  * the hardware feature support and kernel support for those features into
1824  * what we're actually going to tell applications via the aux vector.
1825  */
1826 uint_t
1827 cpuid_pass4(cpu_t *cpu)
1828 {
1829 	struct cpuid_info *cpi;
1830 	uint_t hwcap_flags = 0;
1831 
1832 	if (cpu == NULL)
1833 		cpu = CPU;
1834 	cpi = cpu->cpu_m.mcpu_cpi;
1835 
1836 	ASSERT(cpi->cpi_pass == 3);
1837 
1838 	if (cpi->cpi_maxeax >= 1) {
1839 		uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
1840 		uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
1841 
1842 		*edx = CPI_FEATURES_EDX(cpi);
1843 		*ecx = CPI_FEATURES_ECX(cpi);
1844 
1845 		/*
1846 		 * [these require explicit kernel support]
1847 		 */
1848 		if ((x86_feature & X86_SEP) == 0)
1849 			*edx &= ~CPUID_INTC_EDX_SEP;
1850 
1851 		if ((x86_feature & X86_SSE) == 0)
1852 			*edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
1853 		if ((x86_feature & X86_SSE2) == 0)
1854 			*edx &= ~CPUID_INTC_EDX_SSE2;
1855 
1856 		if ((x86_feature & X86_HTT) == 0)
1857 			*edx &= ~CPUID_INTC_EDX_HTT;
1858 
1859 		if ((x86_feature & X86_SSE3) == 0)
1860 			*ecx &= ~CPUID_INTC_ECX_SSE3;
1861 
1862 		/*
1863 		 * [no explicit support required beyond x87 fp context]
1864 		 */
1865 		if (!fpu_exists)
1866 			*edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
1867 
1868 		/*
1869 		 * Now map the supported feature vector to things that we
1870 		 * think userland will care about.
1871 		 */
1872 		if (*edx & CPUID_INTC_EDX_SEP)
1873 			hwcap_flags |= AV_386_SEP;
1874 		if (*edx & CPUID_INTC_EDX_SSE)
1875 			hwcap_flags |= AV_386_FXSR | AV_386_SSE;
1876 		if (*edx & CPUID_INTC_EDX_SSE2)
1877 			hwcap_flags |= AV_386_SSE2;
1878 		if (*ecx & CPUID_INTC_ECX_SSE3)
1879 			hwcap_flags |= AV_386_SSE3;
1880 		if (*ecx & CPUID_INTC_ECX_POPCNT)
1881 			hwcap_flags |= AV_386_POPCNT;
1882 		if (*edx & CPUID_INTC_EDX_FPU)
1883 			hwcap_flags |= AV_386_FPU;
1884 		if (*edx & CPUID_INTC_EDX_MMX)
1885 			hwcap_flags |= AV_386_MMX;
1886 
1887 		if (*edx & CPUID_INTC_EDX_TSC)
1888 			hwcap_flags |= AV_386_TSC;
1889 		if (*edx & CPUID_INTC_EDX_CX8)
1890 			hwcap_flags |= AV_386_CX8;
1891 		if (*edx & CPUID_INTC_EDX_CMOV)
1892 			hwcap_flags |= AV_386_CMOV;
1893 		if (*ecx & CPUID_INTC_ECX_MON)
1894 			hwcap_flags |= AV_386_MON;
1895 		if (*ecx & CPUID_INTC_ECX_CX16)
1896 			hwcap_flags |= AV_386_CX16;
1897 	}
1898 
1899 	if (x86_feature & X86_HTT)
1900 		hwcap_flags |= AV_386_PAUSE;
1901 
1902 	if (cpi->cpi_xmaxeax < 0x80000001)
1903 		goto pass4_done;
1904 
1905 	switch (cpi->cpi_vendor) {
1906 		struct cpuid_regs cp;
1907 		uint32_t *edx, *ecx;
1908 
1909 	case X86_VENDOR_Intel:
1910 		/*
1911 		 * Seems like Intel duplicated what we necessary
1912 		 * here to make the initial crop of 64-bit OS's work.
1913 		 * Hopefully, those are the only "extended" bits
1914 		 * they'll add.
1915 		 */
1916 		/*FALLTHROUGH*/
1917 
1918 	case X86_VENDOR_AMD:
1919 		edx = &cpi->cpi_support[AMD_EDX_FEATURES];
1920 		ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
1921 
1922 		*edx = CPI_FEATURES_XTD_EDX(cpi);
1923 		*ecx = CPI_FEATURES_XTD_ECX(cpi);
1924 
1925 		/*
1926 		 * [these features require explicit kernel support]
1927 		 */
1928 		switch (cpi->cpi_vendor) {
1929 		case X86_VENDOR_Intel:
1930 			break;
1931 
1932 		case X86_VENDOR_AMD:
1933 			if ((x86_feature & X86_TSCP) == 0)
1934 				*edx &= ~CPUID_AMD_EDX_TSCP;
1935 			if ((x86_feature & X86_SSE4A) == 0)
1936 				*ecx &= ~CPUID_AMD_ECX_SSE4A;
1937 			break;
1938 
1939 		default:
1940 			break;
1941 		}
1942 
1943 		/*
1944 		 * [no explicit support required beyond
1945 		 * x87 fp context and exception handlers]
1946 		 */
1947 		if (!fpu_exists)
1948 			*edx &= ~(CPUID_AMD_EDX_MMXamd |
1949 			    CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
1950 
1951 		if ((x86_feature & X86_NX) == 0)
1952 			*edx &= ~CPUID_AMD_EDX_NX;
1953 #if !defined(__amd64)
1954 		*edx &= ~CPUID_AMD_EDX_LM;
1955 #endif
1956 		/*
1957 		 * Now map the supported feature vector to
1958 		 * things that we think userland will care about.
1959 		 */
1960 #if defined(__amd64)
1961 		if (*edx & CPUID_AMD_EDX_SYSC)
1962 			hwcap_flags |= AV_386_AMD_SYSC;
1963 #endif
1964 		if (*edx & CPUID_AMD_EDX_MMXamd)
1965 			hwcap_flags |= AV_386_AMD_MMX;
1966 		if (*edx & CPUID_AMD_EDX_3DNow)
1967 			hwcap_flags |= AV_386_AMD_3DNow;
1968 		if (*edx & CPUID_AMD_EDX_3DNowx)
1969 			hwcap_flags |= AV_386_AMD_3DNowx;
1970 
1971 		switch (cpi->cpi_vendor) {
1972 		case X86_VENDOR_AMD:
1973 			if (*edx & CPUID_AMD_EDX_TSCP)
1974 				hwcap_flags |= AV_386_TSCP;
1975 			if (*ecx & CPUID_AMD_ECX_AHF64)
1976 				hwcap_flags |= AV_386_AHF;
1977 			if (*ecx & CPUID_AMD_ECX_SSE4A)
1978 				hwcap_flags |= AV_386_AMD_SSE4A;
1979 			if (*ecx & CPUID_AMD_ECX_LZCNT)
1980 				hwcap_flags |= AV_386_AMD_LZCNT;
1981 			break;
1982 
1983 		case X86_VENDOR_Intel:
1984 			/*
1985 			 * Aarrgh.
1986 			 * Intel uses a different bit in the same word.
1987 			 */
1988 			if (*ecx & CPUID_INTC_ECX_AHF64)
1989 				hwcap_flags |= AV_386_AHF;
1990 			break;
1991 
1992 		default:
1993 			break;
1994 		}
1995 		break;
1996 
1997 	case X86_VENDOR_TM:
1998 		cp.cp_eax = 0x80860001;
1999 		(void) __cpuid_insn(&cp);
2000 		cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2001 		break;
2002 
2003 	default:
2004 		break;
2005 	}
2006 
2007 pass4_done:
2008 	cpi->cpi_pass = 4;
2009 	return (hwcap_flags);
2010 }
2011 
2012 
2013 /*
2014  * Simulate the cpuid instruction using the data we previously
2015  * captured about this CPU.  We try our best to return the truth
2016  * about the hardware, independently of kernel support.
2017  */
2018 uint32_t
2019 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2020 {
2021 	struct cpuid_info *cpi;
2022 	struct cpuid_regs *xcp;
2023 
2024 	if (cpu == NULL)
2025 		cpu = CPU;
2026 	cpi = cpu->cpu_m.mcpu_cpi;
2027 
2028 	ASSERT(cpuid_checkpass(cpu, 3));
2029 
2030 	/*
2031 	 * CPUID data is cached in two separate places: cpi_std for standard
2032 	 * CPUID functions, and cpi_extd for extended CPUID functions.
2033 	 */
2034 	if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2035 		xcp = &cpi->cpi_std[cp->cp_eax];
2036 	else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2037 	    cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2038 		xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2039 	else
2040 		/*
2041 		 * The caller is asking for data from an input parameter which
2042 		 * the kernel has not cached.  In this case we go fetch from
2043 		 * the hardware and return the data directly to the user.
2044 		 */
2045 		return (__cpuid_insn(cp));
2046 
2047 	cp->cp_eax = xcp->cp_eax;
2048 	cp->cp_ebx = xcp->cp_ebx;
2049 	cp->cp_ecx = xcp->cp_ecx;
2050 	cp->cp_edx = xcp->cp_edx;
2051 	return (cp->cp_eax);
2052 }
2053 
2054 int
2055 cpuid_checkpass(cpu_t *cpu, int pass)
2056 {
2057 	return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
2058 	    cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
2059 }
2060 
2061 int
2062 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
2063 {
2064 	ASSERT(cpuid_checkpass(cpu, 3));
2065 
2066 	return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
2067 }
2068 
2069 int
2070 cpuid_is_cmt(cpu_t *cpu)
2071 {
2072 	if (cpu == NULL)
2073 		cpu = CPU;
2074 
2075 	ASSERT(cpuid_checkpass(cpu, 1));
2076 
2077 	return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
2078 }
2079 
2080 /*
2081  * AMD and Intel both implement the 64-bit variant of the syscall
2082  * instruction (syscallq), so if there's -any- support for syscall,
2083  * cpuid currently says "yes, we support this".
2084  *
2085  * However, Intel decided to -not- implement the 32-bit variant of the
2086  * syscall instruction, so we provide a predicate to allow our caller
2087  * to test that subtlety here.
2088  */
2089 /*ARGSUSED*/
2090 int
2091 cpuid_syscall32_insn(cpu_t *cpu)
2092 {
2093 	ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
2094 
2095 	if (cpu == NULL)
2096 		cpu = CPU;
2097 
2098 	/*CSTYLED*/
2099 	{
2100 		struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2101 
2102 		if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2103 		    cpi->cpi_xmaxeax >= 0x80000001 &&
2104 		    (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
2105 			return (1);
2106 	}
2107 	return (0);
2108 }
2109 
2110 int
2111 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
2112 {
2113 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2114 
2115 	static const char fmt[] =
2116 	    "x86 (%s %X family %d model %d step %d clock %d MHz)";
2117 	static const char fmt_ht[] =
2118 	    "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2119 
2120 	ASSERT(cpuid_checkpass(cpu, 1));
2121 
2122 	if (cpuid_is_cmt(cpu))
2123 		return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
2124 		    cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2125 		    cpi->cpi_family, cpi->cpi_model,
2126 		    cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2127 	return (snprintf(s, n, fmt,
2128 	    cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2129 	    cpi->cpi_family, cpi->cpi_model,
2130 	    cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2131 }
2132 
2133 const char *
2134 cpuid_getvendorstr(cpu_t *cpu)
2135 {
2136 	ASSERT(cpuid_checkpass(cpu, 1));
2137 	return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
2138 }
2139 
2140 uint_t
2141 cpuid_getvendor(cpu_t *cpu)
2142 {
2143 	ASSERT(cpuid_checkpass(cpu, 1));
2144 	return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
2145 }
2146 
2147 uint_t
2148 cpuid_getfamily(cpu_t *cpu)
2149 {
2150 	ASSERT(cpuid_checkpass(cpu, 1));
2151 	return (cpu->cpu_m.mcpu_cpi->cpi_family);
2152 }
2153 
2154 uint_t
2155 cpuid_getmodel(cpu_t *cpu)
2156 {
2157 	ASSERT(cpuid_checkpass(cpu, 1));
2158 	return (cpu->cpu_m.mcpu_cpi->cpi_model);
2159 }
2160 
2161 uint_t
2162 cpuid_get_ncpu_per_chip(cpu_t *cpu)
2163 {
2164 	ASSERT(cpuid_checkpass(cpu, 1));
2165 	return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
2166 }
2167 
2168 uint_t
2169 cpuid_get_ncore_per_chip(cpu_t *cpu)
2170 {
2171 	ASSERT(cpuid_checkpass(cpu, 1));
2172 	return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
2173 }
2174 
2175 uint_t
2176 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
2177 {
2178 	ASSERT(cpuid_checkpass(cpu, 2));
2179 	return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
2180 }
2181 
2182 id_t
2183 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
2184 {
2185 	ASSERT(cpuid_checkpass(cpu, 2));
2186 	return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2187 }
2188 
2189 uint_t
2190 cpuid_getstep(cpu_t *cpu)
2191 {
2192 	ASSERT(cpuid_checkpass(cpu, 1));
2193 	return (cpu->cpu_m.mcpu_cpi->cpi_step);
2194 }
2195 
2196 uint_t
2197 cpuid_getsig(struct cpu *cpu)
2198 {
2199 	ASSERT(cpuid_checkpass(cpu, 1));
2200 	return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
2201 }
2202 
2203 uint32_t
2204 cpuid_getchiprev(struct cpu *cpu)
2205 {
2206 	ASSERT(cpuid_checkpass(cpu, 1));
2207 	return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
2208 }
2209 
2210 const char *
2211 cpuid_getchiprevstr(struct cpu *cpu)
2212 {
2213 	ASSERT(cpuid_checkpass(cpu, 1));
2214 	return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
2215 }
2216 
2217 uint32_t
2218 cpuid_getsockettype(struct cpu *cpu)
2219 {
2220 	ASSERT(cpuid_checkpass(cpu, 1));
2221 	return (cpu->cpu_m.mcpu_cpi->cpi_socket);
2222 }
2223 
2224 int
2225 cpuid_get_chipid(cpu_t *cpu)
2226 {
2227 	ASSERT(cpuid_checkpass(cpu, 1));
2228 
2229 	if (cpuid_is_cmt(cpu))
2230 		return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
2231 	return (cpu->cpu_id);
2232 }
2233 
2234 id_t
2235 cpuid_get_coreid(cpu_t *cpu)
2236 {
2237 	ASSERT(cpuid_checkpass(cpu, 1));
2238 	return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
2239 }
2240 
2241 int
2242 cpuid_get_clogid(cpu_t *cpu)
2243 {
2244 	ASSERT(cpuid_checkpass(cpu, 1));
2245 	return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
2246 }
2247 
2248 void
2249 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
2250 {
2251 	struct cpuid_info *cpi;
2252 
2253 	if (cpu == NULL)
2254 		cpu = CPU;
2255 	cpi = cpu->cpu_m.mcpu_cpi;
2256 
2257 	ASSERT(cpuid_checkpass(cpu, 1));
2258 
2259 	if (pabits)
2260 		*pabits = cpi->cpi_pabits;
2261 	if (vabits)
2262 		*vabits = cpi->cpi_vabits;
2263 }
2264 
2265 /*
2266  * Returns the number of data TLB entries for a corresponding
2267  * pagesize.  If it can't be computed, or isn't known, the
2268  * routine returns zero.  If you ask about an architecturally
2269  * impossible pagesize, the routine will panic (so that the
2270  * hat implementor knows that things are inconsistent.)
2271  */
2272 uint_t
2273 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
2274 {
2275 	struct cpuid_info *cpi;
2276 	uint_t dtlb_nent = 0;
2277 
2278 	if (cpu == NULL)
2279 		cpu = CPU;
2280 	cpi = cpu->cpu_m.mcpu_cpi;
2281 
2282 	ASSERT(cpuid_checkpass(cpu, 1));
2283 
2284 	/*
2285 	 * Check the L2 TLB info
2286 	 */
2287 	if (cpi->cpi_xmaxeax >= 0x80000006) {
2288 		struct cpuid_regs *cp = &cpi->cpi_extd[6];
2289 
2290 		switch (pagesize) {
2291 
2292 		case 4 * 1024:
2293 			/*
2294 			 * All zero in the top 16 bits of the register
2295 			 * indicates a unified TLB. Size is in low 16 bits.
2296 			 */
2297 			if ((cp->cp_ebx & 0xffff0000) == 0)
2298 				dtlb_nent = cp->cp_ebx & 0x0000ffff;
2299 			else
2300 				dtlb_nent = BITX(cp->cp_ebx, 27, 16);
2301 			break;
2302 
2303 		case 2 * 1024 * 1024:
2304 			if ((cp->cp_eax & 0xffff0000) == 0)
2305 				dtlb_nent = cp->cp_eax & 0x0000ffff;
2306 			else
2307 				dtlb_nent = BITX(cp->cp_eax, 27, 16);
2308 			break;
2309 
2310 		default:
2311 			panic("unknown L2 pagesize");
2312 			/*NOTREACHED*/
2313 		}
2314 	}
2315 
2316 	if (dtlb_nent != 0)
2317 		return (dtlb_nent);
2318 
2319 	/*
2320 	 * No L2 TLB support for this size, try L1.
2321 	 */
2322 	if (cpi->cpi_xmaxeax >= 0x80000005) {
2323 		struct cpuid_regs *cp = &cpi->cpi_extd[5];
2324 
2325 		switch (pagesize) {
2326 		case 4 * 1024:
2327 			dtlb_nent = BITX(cp->cp_ebx, 23, 16);
2328 			break;
2329 		case 2 * 1024 * 1024:
2330 			dtlb_nent = BITX(cp->cp_eax, 23, 16);
2331 			break;
2332 		default:
2333 			panic("unknown L1 d-TLB pagesize");
2334 			/*NOTREACHED*/
2335 		}
2336 	}
2337 
2338 	return (dtlb_nent);
2339 }
2340 
2341 /*
2342  * Return 0 if the erratum is not present or not applicable, positive
2343  * if it is, and negative if the status of the erratum is unknown.
2344  *
2345  * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
2346  * Processors" #25759, Rev 3.57, August 2005
2347  */
2348 int
2349 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
2350 {
2351 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2352 	uint_t eax;
2353 
2354 	/*
2355 	 * Bail out if this CPU isn't an AMD CPU, or if it's
2356 	 * a legacy (32-bit) AMD CPU.
2357 	 */
2358 	if (cpi->cpi_vendor != X86_VENDOR_AMD ||
2359 	    cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
2360 	    cpi->cpi_family == 6)
2361 
2362 		return (0);
2363 
2364 	eax = cpi->cpi_std[1].cp_eax;
2365 
2366 #define	SH_B0(eax)	(eax == 0xf40 || eax == 0xf50)
2367 #define	SH_B3(eax) 	(eax == 0xf51)
2368 #define	B(eax)		(SH_B0(eax) || SH_B3(eax))
2369 
2370 #define	SH_C0(eax)	(eax == 0xf48 || eax == 0xf58)
2371 
2372 #define	SH_CG(eax)	(eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
2373 #define	DH_CG(eax)	(eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
2374 #define	CH_CG(eax)	(eax == 0xf82 || eax == 0xfb2)
2375 #define	CG(eax)		(SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
2376 
2377 #define	SH_D0(eax)	(eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
2378 #define	DH_D0(eax)	(eax == 0x10fc0 || eax == 0x10ff0)
2379 #define	CH_D0(eax)	(eax == 0x10f80 || eax == 0x10fb0)
2380 #define	D0(eax)		(SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
2381 
2382 #define	SH_E0(eax)	(eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
2383 #define	JH_E1(eax)	(eax == 0x20f10)	/* JH8_E0 had 0x20f30 */
2384 #define	DH_E3(eax)	(eax == 0x20fc0 || eax == 0x20ff0)
2385 #define	SH_E4(eax)	(eax == 0x20f51 || eax == 0x20f71)
2386 #define	BH_E4(eax)	(eax == 0x20fb1)
2387 #define	SH_E5(eax)	(eax == 0x20f42)
2388 #define	DH_E6(eax)	(eax == 0x20ff2 || eax == 0x20fc2)
2389 #define	JH_E6(eax)	(eax == 0x20f12 || eax == 0x20f32)
2390 #define	EX(eax)		(SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
2391 			    SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
2392 			    DH_E6(eax) || JH_E6(eax))
2393 
2394 	switch (erratum) {
2395 	case 1:
2396 		return (cpi->cpi_family < 0x10);
2397 	case 51:	/* what does the asterisk mean? */
2398 		return (B(eax) || SH_C0(eax) || CG(eax));
2399 	case 52:
2400 		return (B(eax));
2401 	case 57:
2402 		return (cpi->cpi_family <= 0x10);
2403 	case 58:
2404 		return (B(eax));
2405 	case 60:
2406 		return (cpi->cpi_family <= 0x10);
2407 	case 61:
2408 	case 62:
2409 	case 63:
2410 	case 64:
2411 	case 65:
2412 	case 66:
2413 	case 68:
2414 	case 69:
2415 	case 70:
2416 	case 71:
2417 		return (B(eax));
2418 	case 72:
2419 		return (SH_B0(eax));
2420 	case 74:
2421 		return (B(eax));
2422 	case 75:
2423 		return (cpi->cpi_family < 0x10);
2424 	case 76:
2425 		return (B(eax));
2426 	case 77:
2427 		return (cpi->cpi_family <= 0x10);
2428 	case 78:
2429 		return (B(eax) || SH_C0(eax));
2430 	case 79:
2431 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
2432 	case 80:
2433 	case 81:
2434 	case 82:
2435 		return (B(eax));
2436 	case 83:
2437 		return (B(eax) || SH_C0(eax) || CG(eax));
2438 	case 85:
2439 		return (cpi->cpi_family < 0x10);
2440 	case 86:
2441 		return (SH_C0(eax) || CG(eax));
2442 	case 88:
2443 #if !defined(__amd64)
2444 		return (0);
2445 #else
2446 		return (B(eax) || SH_C0(eax));
2447 #endif
2448 	case 89:
2449 		return (cpi->cpi_family < 0x10);
2450 	case 90:
2451 		return (B(eax) || SH_C0(eax) || CG(eax));
2452 	case 91:
2453 	case 92:
2454 		return (B(eax) || SH_C0(eax));
2455 	case 93:
2456 		return (SH_C0(eax));
2457 	case 94:
2458 		return (B(eax) || SH_C0(eax) || CG(eax));
2459 	case 95:
2460 #if !defined(__amd64)
2461 		return (0);
2462 #else
2463 		return (B(eax) || SH_C0(eax));
2464 #endif
2465 	case 96:
2466 		return (B(eax) || SH_C0(eax) || CG(eax));
2467 	case 97:
2468 	case 98:
2469 		return (SH_C0(eax) || CG(eax));
2470 	case 99:
2471 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
2472 	case 100:
2473 		return (B(eax) || SH_C0(eax));
2474 	case 101:
2475 	case 103:
2476 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
2477 	case 104:
2478 		return (SH_C0(eax) || CG(eax) || D0(eax));
2479 	case 105:
2480 	case 106:
2481 	case 107:
2482 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
2483 	case 108:
2484 		return (DH_CG(eax));
2485 	case 109:
2486 		return (SH_C0(eax) || CG(eax) || D0(eax));
2487 	case 110:
2488 		return (D0(eax) || EX(eax));
2489 	case 111:
2490 		return (CG(eax));
2491 	case 112:
2492 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
2493 	case 113:
2494 		return (eax == 0x20fc0);
2495 	case 114:
2496 		return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
2497 	case 115:
2498 		return (SH_E0(eax) || JH_E1(eax));
2499 	case 116:
2500 		return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
2501 	case 117:
2502 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
2503 	case 118:
2504 		return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
2505 		    JH_E6(eax));
2506 	case 121:
2507 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
2508 	case 122:
2509 		return (cpi->cpi_family < 0x10);
2510 	case 123:
2511 		return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
2512 	case 131:
2513 		return (cpi->cpi_family < 0x10);
2514 	case 6336786:
2515 		/*
2516 		 * Test for AdvPowerMgmtInfo.TscPStateInvariant
2517 		 * if this is a K8 family or newer processor
2518 		 */
2519 		if (CPI_FAMILY(cpi) == 0xf) {
2520 			struct cpuid_regs regs;
2521 			regs.cp_eax = 0x80000007;
2522 			(void) __cpuid_insn(&regs);
2523 			return (!(regs.cp_edx & 0x100));
2524 		}
2525 		return (0);
2526 	case 6323525:
2527 		return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
2528 		    (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
2529 
2530 	default:
2531 		return (-1);
2532 	}
2533 }
2534 
2535 static const char assoc_str[] = "associativity";
2536 static const char line_str[] = "line-size";
2537 static const char size_str[] = "size";
2538 
2539 static void
2540 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
2541     uint32_t val)
2542 {
2543 	char buf[128];
2544 
2545 	/*
2546 	 * ndi_prop_update_int() is used because it is desirable for
2547 	 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
2548 	 */
2549 	if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
2550 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
2551 }
2552 
2553 /*
2554  * Intel-style cache/tlb description
2555  *
2556  * Standard cpuid level 2 gives a randomly ordered
2557  * selection of tags that index into a table that describes
2558  * cache and tlb properties.
2559  */
2560 
2561 static const char l1_icache_str[] = "l1-icache";
2562 static const char l1_dcache_str[] = "l1-dcache";
2563 static const char l2_cache_str[] = "l2-cache";
2564 static const char l3_cache_str[] = "l3-cache";
2565 static const char itlb4k_str[] = "itlb-4K";
2566 static const char dtlb4k_str[] = "dtlb-4K";
2567 static const char itlb4M_str[] = "itlb-4M";
2568 static const char dtlb4M_str[] = "dtlb-4M";
2569 static const char itlb424_str[] = "itlb-4K-2M-4M";
2570 static const char dtlb44_str[] = "dtlb-4K-4M";
2571 static const char sl1_dcache_str[] = "sectored-l1-dcache";
2572 static const char sl2_cache_str[] = "sectored-l2-cache";
2573 static const char itrace_str[] = "itrace-cache";
2574 static const char sl3_cache_str[] = "sectored-l3-cache";
2575 
2576 static const struct cachetab {
2577 	uint8_t 	ct_code;
2578 	uint8_t		ct_assoc;
2579 	uint16_t 	ct_line_size;
2580 	size_t		ct_size;
2581 	const char	*ct_label;
2582 } intel_ctab[] = {
2583 	/* maintain descending order! */
2584 	{ 0xb4, 4, 0, 256, dtlb4k_str },
2585 	{ 0xb3, 4, 0, 128, dtlb4k_str },
2586 	{ 0xb0, 4, 0, 128, itlb4k_str },
2587 	{ 0x87, 8, 64, 1024*1024, l2_cache_str},
2588 	{ 0x86, 4, 64, 512*1024, l2_cache_str},
2589 	{ 0x85, 8, 32, 2*1024*1024, l2_cache_str},
2590 	{ 0x84, 8, 32, 1024*1024, l2_cache_str},
2591 	{ 0x83, 8, 32, 512*1024, l2_cache_str},
2592 	{ 0x82, 8, 32, 256*1024, l2_cache_str},
2593 	{ 0x7f, 2, 64, 512*1024, l2_cache_str},
2594 	{ 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
2595 	{ 0x7c, 8, 64, 1024*1024, sl2_cache_str},
2596 	{ 0x7b, 8, 64, 512*1024, sl2_cache_str},
2597 	{ 0x7a, 8, 64, 256*1024, sl2_cache_str},
2598 	{ 0x79, 8, 64, 128*1024, sl2_cache_str},
2599 	{ 0x78, 8, 64, 1024*1024, l2_cache_str},
2600 	{ 0x73, 8, 0, 64*1024, itrace_str},
2601 	{ 0x72, 8, 0, 32*1024, itrace_str},
2602 	{ 0x71, 8, 0, 16*1024, itrace_str},
2603 	{ 0x70, 8, 0, 12*1024, itrace_str},
2604 	{ 0x68, 4, 64, 32*1024, sl1_dcache_str},
2605 	{ 0x67, 4, 64, 16*1024, sl1_dcache_str},
2606 	{ 0x66, 4, 64, 8*1024, sl1_dcache_str},
2607 	{ 0x60, 8, 64, 16*1024, sl1_dcache_str},
2608 	{ 0x5d, 0, 0, 256, dtlb44_str},
2609 	{ 0x5c, 0, 0, 128, dtlb44_str},
2610 	{ 0x5b, 0, 0, 64, dtlb44_str},
2611 	{ 0x52, 0, 0, 256, itlb424_str},
2612 	{ 0x51, 0, 0, 128, itlb424_str},
2613 	{ 0x50, 0, 0, 64, itlb424_str},
2614 	{ 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
2615 	{ 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
2616 	{ 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
2617 	{ 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
2618 	{ 0x49, 16, 64, 4*1024*1024, l3_cache_str},
2619 	{ 0x47, 8, 64, 8*1024*1024, l3_cache_str},
2620 	{ 0x46, 4, 64, 4*1024*1024, l3_cache_str},
2621 	{ 0x45, 4, 32, 2*1024*1024, l2_cache_str},
2622 	{ 0x44, 4, 32, 1024*1024, l2_cache_str},
2623 	{ 0x43, 4, 32, 512*1024, l2_cache_str},
2624 	{ 0x42, 4, 32, 256*1024, l2_cache_str},
2625 	{ 0x41, 4, 32, 128*1024, l2_cache_str},
2626 	{ 0x3e, 4, 64, 512*1024, sl2_cache_str},
2627 	{ 0x3d, 6, 64, 384*1024, sl2_cache_str},
2628 	{ 0x3c, 4, 64, 256*1024, sl2_cache_str},
2629 	{ 0x3b, 2, 64, 128*1024, sl2_cache_str},
2630 	{ 0x3a, 6, 64, 192*1024, sl2_cache_str},
2631 	{ 0x39, 4, 64, 128*1024, sl2_cache_str},
2632 	{ 0x30, 8, 64, 32*1024, l1_icache_str},
2633 	{ 0x2c, 8, 64, 32*1024, l1_dcache_str},
2634 	{ 0x29, 8, 64, 4096*1024, sl3_cache_str},
2635 	{ 0x25, 8, 64, 2048*1024, sl3_cache_str},
2636 	{ 0x23, 8, 64, 1024*1024, sl3_cache_str},
2637 	{ 0x22, 4, 64, 512*1024, sl3_cache_str},
2638 	{ 0x0c, 4, 32, 16*1024, l1_dcache_str},
2639 	{ 0x0b, 4, 0, 4, itlb4M_str},
2640 	{ 0x0a, 2, 32, 8*1024, l1_dcache_str},
2641 	{ 0x08, 4, 32, 16*1024, l1_icache_str},
2642 	{ 0x06, 4, 32, 8*1024, l1_icache_str},
2643 	{ 0x04, 4, 0, 8, dtlb4M_str},
2644 	{ 0x03, 4, 0, 64, dtlb4k_str},
2645 	{ 0x02, 4, 0, 2, itlb4M_str},
2646 	{ 0x01, 4, 0, 32, itlb4k_str},
2647 	{ 0 }
2648 };
2649 
2650 static const struct cachetab cyrix_ctab[] = {
2651 	{ 0x70, 4, 0, 32, "tlb-4K" },
2652 	{ 0x80, 4, 16, 16*1024, "l1-cache" },
2653 	{ 0 }
2654 };
2655 
2656 /*
2657  * Search a cache table for a matching entry
2658  */
2659 static const struct cachetab *
2660 find_cacheent(const struct cachetab *ct, uint_t code)
2661 {
2662 	if (code != 0) {
2663 		for (; ct->ct_code != 0; ct++)
2664 			if (ct->ct_code <= code)
2665 				break;
2666 		if (ct->ct_code == code)
2667 			return (ct);
2668 	}
2669 	return (NULL);
2670 }
2671 
2672 /*
2673  * Walk the cacheinfo descriptor, applying 'func' to every valid element
2674  * The walk is terminated if the walker returns non-zero.
2675  */
2676 static void
2677 intel_walk_cacheinfo(struct cpuid_info *cpi,
2678     void *arg, int (*func)(void *, const struct cachetab *))
2679 {
2680 	const struct cachetab *ct;
2681 	uint8_t *dp;
2682 	int i;
2683 
2684 	if ((dp = cpi->cpi_cacheinfo) == NULL)
2685 		return;
2686 	for (i = 0; i < cpi->cpi_ncache; i++, dp++)
2687 		if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
2688 			if (func(arg, ct) != 0)
2689 				break;
2690 		}
2691 }
2692 
2693 /*
2694  * (Like the Intel one, except for Cyrix CPUs)
2695  */
2696 static void
2697 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
2698     void *arg, int (*func)(void *, const struct cachetab *))
2699 {
2700 	const struct cachetab *ct;
2701 	uint8_t *dp;
2702 	int i;
2703 
2704 	if ((dp = cpi->cpi_cacheinfo) == NULL)
2705 		return;
2706 	for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
2707 		/*
2708 		 * Search Cyrix-specific descriptor table first ..
2709 		 */
2710 		if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
2711 			if (func(arg, ct) != 0)
2712 				break;
2713 			continue;
2714 		}
2715 		/*
2716 		 * .. else fall back to the Intel one
2717 		 */
2718 		if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
2719 			if (func(arg, ct) != 0)
2720 				break;
2721 			continue;
2722 		}
2723 	}
2724 }
2725 
2726 /*
2727  * A cacheinfo walker that adds associativity, line-size, and size properties
2728  * to the devinfo node it is passed as an argument.
2729  */
2730 static int
2731 add_cacheent_props(void *arg, const struct cachetab *ct)
2732 {
2733 	dev_info_t *devi = arg;
2734 
2735 	add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
2736 	if (ct->ct_line_size != 0)
2737 		add_cache_prop(devi, ct->ct_label, line_str,
2738 		    ct->ct_line_size);
2739 	add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
2740 	return (0);
2741 }
2742 
2743 static const char fully_assoc[] = "fully-associative?";
2744 
2745 /*
2746  * AMD style cache/tlb description
2747  *
2748  * Extended functions 5 and 6 directly describe properties of
2749  * tlbs and various cache levels.
2750  */
2751 static void
2752 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
2753 {
2754 	switch (assoc) {
2755 	case 0:	/* reserved; ignore */
2756 		break;
2757 	default:
2758 		add_cache_prop(devi, label, assoc_str, assoc);
2759 		break;
2760 	case 0xff:
2761 		add_cache_prop(devi, label, fully_assoc, 1);
2762 		break;
2763 	}
2764 }
2765 
2766 static void
2767 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
2768 {
2769 	if (size == 0)
2770 		return;
2771 	add_cache_prop(devi, label, size_str, size);
2772 	add_amd_assoc(devi, label, assoc);
2773 }
2774 
2775 static void
2776 add_amd_cache(dev_info_t *devi, const char *label,
2777     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
2778 {
2779 	if (size == 0 || line_size == 0)
2780 		return;
2781 	add_amd_assoc(devi, label, assoc);
2782 	/*
2783 	 * Most AMD parts have a sectored cache. Multiple cache lines are
2784 	 * associated with each tag. A sector consists of all cache lines
2785 	 * associated with a tag. For example, the AMD K6-III has a sector
2786 	 * size of 2 cache lines per tag.
2787 	 */
2788 	if (lines_per_tag != 0)
2789 		add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
2790 	add_cache_prop(devi, label, line_str, line_size);
2791 	add_cache_prop(devi, label, size_str, size * 1024);
2792 }
2793 
2794 static void
2795 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
2796 {
2797 	switch (assoc) {
2798 	case 0:	/* off */
2799 		break;
2800 	case 1:
2801 	case 2:
2802 	case 4:
2803 		add_cache_prop(devi, label, assoc_str, assoc);
2804 		break;
2805 	case 6:
2806 		add_cache_prop(devi, label, assoc_str, 8);
2807 		break;
2808 	case 8:
2809 		add_cache_prop(devi, label, assoc_str, 16);
2810 		break;
2811 	case 0xf:
2812 		add_cache_prop(devi, label, fully_assoc, 1);
2813 		break;
2814 	default: /* reserved; ignore */
2815 		break;
2816 	}
2817 }
2818 
2819 static void
2820 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
2821 {
2822 	if (size == 0 || assoc == 0)
2823 		return;
2824 	add_amd_l2_assoc(devi, label, assoc);
2825 	add_cache_prop(devi, label, size_str, size);
2826 }
2827 
2828 static void
2829 add_amd_l2_cache(dev_info_t *devi, const char *label,
2830     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
2831 {
2832 	if (size == 0 || assoc == 0 || line_size == 0)
2833 		return;
2834 	add_amd_l2_assoc(devi, label, assoc);
2835 	if (lines_per_tag != 0)
2836 		add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
2837 	add_cache_prop(devi, label, line_str, line_size);
2838 	add_cache_prop(devi, label, size_str, size * 1024);
2839 }
2840 
2841 static void
2842 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
2843 {
2844 	struct cpuid_regs *cp;
2845 
2846 	if (cpi->cpi_xmaxeax < 0x80000005)
2847 		return;
2848 	cp = &cpi->cpi_extd[5];
2849 
2850 	/*
2851 	 * 4M/2M L1 TLB configuration
2852 	 *
2853 	 * We report the size for 2M pages because AMD uses two
2854 	 * TLB entries for one 4M page.
2855 	 */
2856 	add_amd_tlb(devi, "dtlb-2M",
2857 	    BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
2858 	add_amd_tlb(devi, "itlb-2M",
2859 	    BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
2860 
2861 	/*
2862 	 * 4K L1 TLB configuration
2863 	 */
2864 
2865 	switch (cpi->cpi_vendor) {
2866 		uint_t nentries;
2867 	case X86_VENDOR_TM:
2868 		if (cpi->cpi_family >= 5) {
2869 			/*
2870 			 * Crusoe processors have 256 TLB entries, but
2871 			 * cpuid data format constrains them to only
2872 			 * reporting 255 of them.
2873 			 */
2874 			if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
2875 				nentries = 256;
2876 			/*
2877 			 * Crusoe processors also have a unified TLB
2878 			 */
2879 			add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
2880 			    nentries);
2881 			break;
2882 		}
2883 		/*FALLTHROUGH*/
2884 	default:
2885 		add_amd_tlb(devi, itlb4k_str,
2886 		    BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
2887 		add_amd_tlb(devi, dtlb4k_str,
2888 		    BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
2889 		break;
2890 	}
2891 
2892 	/*
2893 	 * data L1 cache configuration
2894 	 */
2895 
2896 	add_amd_cache(devi, l1_dcache_str,
2897 	    BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
2898 	    BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
2899 
2900 	/*
2901 	 * code L1 cache configuration
2902 	 */
2903 
2904 	add_amd_cache(devi, l1_icache_str,
2905 	    BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
2906 	    BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
2907 
2908 	if (cpi->cpi_xmaxeax < 0x80000006)
2909 		return;
2910 	cp = &cpi->cpi_extd[6];
2911 
2912 	/* Check for a unified L2 TLB for large pages */
2913 
2914 	if (BITX(cp->cp_eax, 31, 16) == 0)
2915 		add_amd_l2_tlb(devi, "l2-tlb-2M",
2916 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
2917 	else {
2918 		add_amd_l2_tlb(devi, "l2-dtlb-2M",
2919 		    BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
2920 		add_amd_l2_tlb(devi, "l2-itlb-2M",
2921 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
2922 	}
2923 
2924 	/* Check for a unified L2 TLB for 4K pages */
2925 
2926 	if (BITX(cp->cp_ebx, 31, 16) == 0) {
2927 		add_amd_l2_tlb(devi, "l2-tlb-4K",
2928 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
2929 	} else {
2930 		add_amd_l2_tlb(devi, "l2-dtlb-4K",
2931 		    BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
2932 		add_amd_l2_tlb(devi, "l2-itlb-4K",
2933 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
2934 	}
2935 
2936 	add_amd_l2_cache(devi, l2_cache_str,
2937 	    BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
2938 	    BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
2939 }
2940 
2941 /*
2942  * There are two basic ways that the x86 world describes it cache
2943  * and tlb architecture - Intel's way and AMD's way.
2944  *
2945  * Return which flavor of cache architecture we should use
2946  */
2947 static int
2948 x86_which_cacheinfo(struct cpuid_info *cpi)
2949 {
2950 	switch (cpi->cpi_vendor) {
2951 	case X86_VENDOR_Intel:
2952 		if (cpi->cpi_maxeax >= 2)
2953 			return (X86_VENDOR_Intel);
2954 		break;
2955 	case X86_VENDOR_AMD:
2956 		/*
2957 		 * The K5 model 1 was the first part from AMD that reported
2958 		 * cache sizes via extended cpuid functions.
2959 		 */
2960 		if (cpi->cpi_family > 5 ||
2961 		    (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
2962 			return (X86_VENDOR_AMD);
2963 		break;
2964 	case X86_VENDOR_TM:
2965 		if (cpi->cpi_family >= 5)
2966 			return (X86_VENDOR_AMD);
2967 		/*FALLTHROUGH*/
2968 	default:
2969 		/*
2970 		 * If they have extended CPU data for 0x80000005
2971 		 * then we assume they have AMD-format cache
2972 		 * information.
2973 		 *
2974 		 * If not, and the vendor happens to be Cyrix,
2975 		 * then try our-Cyrix specific handler.
2976 		 *
2977 		 * If we're not Cyrix, then assume we're using Intel's
2978 		 * table-driven format instead.
2979 		 */
2980 		if (cpi->cpi_xmaxeax >= 0x80000005)
2981 			return (X86_VENDOR_AMD);
2982 		else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
2983 			return (X86_VENDOR_Cyrix);
2984 		else if (cpi->cpi_maxeax >= 2)
2985 			return (X86_VENDOR_Intel);
2986 		break;
2987 	}
2988 	return (-1);
2989 }
2990 
2991 /*
2992  * create a node for the given cpu under the prom root node.
2993  * Also, create a cpu node in the device tree.
2994  */
2995 static dev_info_t *cpu_nex_devi = NULL;
2996 static kmutex_t cpu_node_lock;
2997 
2998 /*
2999  * Called from post_startup() and mp_startup()
3000  */
3001 void
3002 add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi)
3003 {
3004 	dev_info_t *cpu_devi;
3005 	int create;
3006 
3007 	mutex_enter(&cpu_node_lock);
3008 
3009 	/*
3010 	 * create a nexus node for all cpus identified as 'cpu_id' under
3011 	 * the root node.
3012 	 */
3013 	if (cpu_nex_devi == NULL) {
3014 		if (ndi_devi_alloc(ddi_root_node(), "cpus",
3015 		    (pnode_t)DEVI_SID_NODEID, &cpu_nex_devi) != NDI_SUCCESS) {
3016 			mutex_exit(&cpu_node_lock);
3017 			return;
3018 		}
3019 		(void) ndi_devi_online(cpu_nex_devi, 0);
3020 	}
3021 
3022 	/*
3023 	 * create a child node for cpu identified as 'cpu_id'
3024 	 */
3025 	cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID,
3026 	    cpu_id);
3027 	if (cpu_devi == NULL) {
3028 		mutex_exit(&cpu_node_lock);
3029 		return;
3030 	}
3031 
3032 	/* device_type */
3033 
3034 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
3035 	    "device_type", "cpu");
3036 
3037 	/* reg */
3038 
3039 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3040 	    "reg", cpu_id);
3041 
3042 	/* cpu-mhz, and clock-frequency */
3043 
3044 	if (cpu_freq > 0) {
3045 		long long mul;
3046 
3047 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3048 		    "cpu-mhz", cpu_freq);
3049 
3050 		if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
3051 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3052 			    "clock-frequency", (int)mul);
3053 	}
3054 
3055 	(void) ndi_devi_online(cpu_devi, 0);
3056 
3057 	if ((x86_feature & X86_CPUID) == 0) {
3058 		mutex_exit(&cpu_node_lock);
3059 		return;
3060 	}
3061 
3062 	/* vendor-id */
3063 
3064 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
3065 	    "vendor-id", cpi->cpi_vendorstr);
3066 
3067 	if (cpi->cpi_maxeax == 0) {
3068 		mutex_exit(&cpu_node_lock);
3069 		return;
3070 	}
3071 
3072 	/*
3073 	 * family, model, and step
3074 	 */
3075 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3076 	    "family", CPI_FAMILY(cpi));
3077 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3078 	    "cpu-model", CPI_MODEL(cpi));
3079 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3080 	    "stepping-id", CPI_STEP(cpi));
3081 
3082 	/* type */
3083 
3084 	switch (cpi->cpi_vendor) {
3085 	case X86_VENDOR_Intel:
3086 		create = 1;
3087 		break;
3088 	default:
3089 		create = 0;
3090 		break;
3091 	}
3092 	if (create)
3093 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3094 		    "type", CPI_TYPE(cpi));
3095 
3096 	/* ext-family */
3097 
3098 	switch (cpi->cpi_vendor) {
3099 	case X86_VENDOR_Intel:
3100 	case X86_VENDOR_AMD:
3101 		create = cpi->cpi_family >= 0xf;
3102 		break;
3103 	default:
3104 		create = 0;
3105 		break;
3106 	}
3107 	if (create)
3108 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3109 		    "ext-family", CPI_FAMILY_XTD(cpi));
3110 
3111 	/* ext-model */
3112 
3113 	switch (cpi->cpi_vendor) {
3114 	case X86_VENDOR_Intel:
3115 		create = CPI_MODEL(cpi) == 0xf;
3116 		break;
3117 	case X86_VENDOR_AMD:
3118 		create = CPI_FAMILY(cpi) == 0xf;
3119 		break;
3120 	default:
3121 		create = 0;
3122 		break;
3123 	}
3124 	if (create)
3125 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3126 		    "ext-model", CPI_MODEL_XTD(cpi));
3127 
3128 	/* generation */
3129 
3130 	switch (cpi->cpi_vendor) {
3131 	case X86_VENDOR_AMD:
3132 		/*
3133 		 * AMD K5 model 1 was the first part to support this
3134 		 */
3135 		create = cpi->cpi_xmaxeax >= 0x80000001;
3136 		break;
3137 	default:
3138 		create = 0;
3139 		break;
3140 	}
3141 	if (create)
3142 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3143 		    "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
3144 
3145 	/* brand-id */
3146 
3147 	switch (cpi->cpi_vendor) {
3148 	case X86_VENDOR_Intel:
3149 		/*
3150 		 * brand id first appeared on Pentium III Xeon model 8,
3151 		 * and Celeron model 8 processors and Opteron
3152 		 */
3153 		create = cpi->cpi_family > 6 ||
3154 		    (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
3155 		break;
3156 	case X86_VENDOR_AMD:
3157 		create = cpi->cpi_family >= 0xf;
3158 		break;
3159 	default:
3160 		create = 0;
3161 		break;
3162 	}
3163 	if (create && cpi->cpi_brandid != 0) {
3164 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3165 		    "brand-id", cpi->cpi_brandid);
3166 	}
3167 
3168 	/* chunks, and apic-id */
3169 
3170 	switch (cpi->cpi_vendor) {
3171 		/*
3172 		 * first available on Pentium IV and Opteron (K8)
3173 		 */
3174 	case X86_VENDOR_Intel:
3175 		create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
3176 		break;
3177 	case X86_VENDOR_AMD:
3178 		create = cpi->cpi_family >= 0xf;
3179 		break;
3180 	default:
3181 		create = 0;
3182 		break;
3183 	}
3184 	if (create) {
3185 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3186 		    "chunks", CPI_CHUNKS(cpi));
3187 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3188 		    "apic-id", CPI_APIC_ID(cpi));
3189 		if (cpi->cpi_chipid >= 0) {
3190 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3191 			    "chip#", cpi->cpi_chipid);
3192 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3193 			    "clog#", cpi->cpi_clogid);
3194 		}
3195 	}
3196 
3197 	/* cpuid-features */
3198 
3199 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3200 	    "cpuid-features", CPI_FEATURES_EDX(cpi));
3201 
3202 
3203 	/* cpuid-features-ecx */
3204 
3205 	switch (cpi->cpi_vendor) {
3206 	case X86_VENDOR_Intel:
3207 		create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
3208 		break;
3209 	default:
3210 		create = 0;
3211 		break;
3212 	}
3213 	if (create)
3214 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3215 		    "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
3216 
3217 	/* ext-cpuid-features */
3218 
3219 	switch (cpi->cpi_vendor) {
3220 	case X86_VENDOR_Intel:
3221 	case X86_VENDOR_AMD:
3222 	case X86_VENDOR_Cyrix:
3223 	case X86_VENDOR_TM:
3224 	case X86_VENDOR_Centaur:
3225 		create = cpi->cpi_xmaxeax >= 0x80000001;
3226 		break;
3227 	default:
3228 		create = 0;
3229 		break;
3230 	}
3231 	if (create) {
3232 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3233 		    "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
3234 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3235 		    "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
3236 	}
3237 
3238 	/*
3239 	 * Brand String first appeared in Intel Pentium IV, AMD K5
3240 	 * model 1, and Cyrix GXm.  On earlier models we try and
3241 	 * simulate something similar .. so this string should always
3242 	 * same -something- about the processor, however lame.
3243 	 */
3244 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
3245 	    "brand-string", cpi->cpi_brandstr);
3246 
3247 	/*
3248 	 * Finally, cache and tlb information
3249 	 */
3250 	switch (x86_which_cacheinfo(cpi)) {
3251 	case X86_VENDOR_Intel:
3252 		intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
3253 		break;
3254 	case X86_VENDOR_Cyrix:
3255 		cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
3256 		break;
3257 	case X86_VENDOR_AMD:
3258 		amd_cache_info(cpi, cpu_devi);
3259 		break;
3260 	default:
3261 		break;
3262 	}
3263 
3264 	mutex_exit(&cpu_node_lock);
3265 }
3266 
3267 struct l2info {
3268 	int *l2i_csz;
3269 	int *l2i_lsz;
3270 	int *l2i_assoc;
3271 	int l2i_ret;
3272 };
3273 
3274 /*
3275  * A cacheinfo walker that fetches the size, line-size and associativity
3276  * of the L2 cache
3277  */
3278 static int
3279 intel_l2cinfo(void *arg, const struct cachetab *ct)
3280 {
3281 	struct l2info *l2i = arg;
3282 	int *ip;
3283 
3284 	if (ct->ct_label != l2_cache_str &&
3285 	    ct->ct_label != sl2_cache_str)
3286 		return (0);	/* not an L2 -- keep walking */
3287 
3288 	if ((ip = l2i->l2i_csz) != NULL)
3289 		*ip = ct->ct_size;
3290 	if ((ip = l2i->l2i_lsz) != NULL)
3291 		*ip = ct->ct_line_size;
3292 	if ((ip = l2i->l2i_assoc) != NULL)
3293 		*ip = ct->ct_assoc;
3294 	l2i->l2i_ret = ct->ct_size;
3295 	return (1);		/* was an L2 -- terminate walk */
3296 }
3297 
3298 static void
3299 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
3300 {
3301 	struct cpuid_regs *cp;
3302 	uint_t size, assoc;
3303 	int *ip;
3304 
3305 	if (cpi->cpi_xmaxeax < 0x80000006)
3306 		return;
3307 	cp = &cpi->cpi_extd[6];
3308 
3309 	if ((assoc = BITX(cp->cp_ecx, 15, 12)) != 0 &&
3310 	    (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
3311 		uint_t cachesz = size * 1024;
3312 
3313 
3314 		if ((ip = l2i->l2i_csz) != NULL)
3315 			*ip = cachesz;
3316 		if ((ip = l2i->l2i_lsz) != NULL)
3317 			*ip = BITX(cp->cp_ecx, 7, 0);
3318 		if ((ip = l2i->l2i_assoc) != NULL)
3319 			*ip = assoc;
3320 		l2i->l2i_ret = cachesz;
3321 	}
3322 }
3323 
3324 int
3325 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
3326 {
3327 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3328 	struct l2info __l2info, *l2i = &__l2info;
3329 
3330 	l2i->l2i_csz = csz;
3331 	l2i->l2i_lsz = lsz;
3332 	l2i->l2i_assoc = assoc;
3333 	l2i->l2i_ret = -1;
3334 
3335 	switch (x86_which_cacheinfo(cpi)) {
3336 	case X86_VENDOR_Intel:
3337 		intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
3338 		break;
3339 	case X86_VENDOR_Cyrix:
3340 		cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
3341 		break;
3342 	case X86_VENDOR_AMD:
3343 		amd_l2cacheinfo(cpi, l2i);
3344 		break;
3345 	default:
3346 		break;
3347 	}
3348 	return (l2i->l2i_ret);
3349 }
3350 
3351 size_t
3352 cpuid_get_mwait_size(cpu_t *cpu)
3353 {
3354 	ASSERT(cpuid_checkpass(cpu, 2));
3355 	return (cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max);
3356 }
3357