xref: /linux/arch/x86/kernel/cpu/cacheinfo.c (revision b60a5b8dcf49af9f2c60ae82e0383ee8e62a9a52)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *	Routines to identify caches on Intel CPU.
4  *
5  *	Changes:
6  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
7  *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
9  */
10 
11 #include <linux/slab.h>
12 #include <linux/cacheinfo.h>
13 #include <linux/cpu.h>
14 #include <linux/sched.h>
15 #include <linux/capability.h>
16 #include <linux/sysfs.h>
17 #include <linux/pci.h>
18 
19 #include <asm/cpufeature.h>
20 #include <asm/cacheinfo.h>
21 #include <asm/amd_nb.h>
22 #include <asm/smp.h>
23 
24 #include "cpu.h"
25 
26 #define LVL_1_INST	1
27 #define LVL_1_DATA	2
28 #define LVL_2		3
29 #define LVL_3		4
30 #define LVL_TRACE	5
31 
32 struct _cache_table {
33 	unsigned char descriptor;
34 	char cache_type;
35 	short size;
36 };
37 
38 #define MB(x)	((x) * 1024)
39 
40 /* All the cache descriptor types we care about (no TLB or
41    trace cache entries) */
42 
43 static const struct _cache_table cache_table[] =
44 {
45 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
46 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
47 	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
48 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
49 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
50 	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
51 	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
52 	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
53 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
54 	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
55 	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
56 	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
57 	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
58 	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
59 	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
60 	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
61 	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
62 	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
63 	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
64 	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
65 	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
66 	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
67 	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
68 	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
69 	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
70 	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
71 	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
72 	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
73 	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
74 	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
75 	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
76 	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
77 	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
78 	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
79 	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
80 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
81 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
82 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
83 	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
84 	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
85 	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
86 	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
87 	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
88 	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
89 	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
90 	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
91 	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
92 	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
93 	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
94 	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
95 	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
96 	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
97 	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
98 	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
99 	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
100 	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
101 	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
102 	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
103 	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
104 	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
105 	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
106 	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
107 	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
108 	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
109 	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
110 	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
111 	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
112 	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
113 	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
114 	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
115 	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
116 	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
117 	{ 0x00, 0, 0}
118 };
119 
120 
121 enum _cache_type {
122 	CTYPE_NULL = 0,
123 	CTYPE_DATA = 1,
124 	CTYPE_INST = 2,
125 	CTYPE_UNIFIED = 3
126 };
127 
128 union _cpuid4_leaf_eax {
129 	struct {
130 		enum _cache_type	type:5;
131 		unsigned int		level:3;
132 		unsigned int		is_self_initializing:1;
133 		unsigned int		is_fully_associative:1;
134 		unsigned int		reserved:4;
135 		unsigned int		num_threads_sharing:12;
136 		unsigned int		num_cores_on_die:6;
137 	} split;
138 	u32 full;
139 };
140 
141 union _cpuid4_leaf_ebx {
142 	struct {
143 		unsigned int		coherency_line_size:12;
144 		unsigned int		physical_line_partition:10;
145 		unsigned int		ways_of_associativity:10;
146 	} split;
147 	u32 full;
148 };
149 
150 union _cpuid4_leaf_ecx {
151 	struct {
152 		unsigned int		number_of_sets:32;
153 	} split;
154 	u32 full;
155 };
156 
157 struct _cpuid4_info_regs {
158 	union _cpuid4_leaf_eax eax;
159 	union _cpuid4_leaf_ebx ebx;
160 	union _cpuid4_leaf_ecx ecx;
161 	unsigned int id;
162 	unsigned long size;
163 	struct amd_northbridge *nb;
164 };
165 
166 static unsigned short num_cache_leaves;
167 
168 /* AMD doesn't have CPUID4. Emulate it here to report the same
169    information to the user.  This makes some assumptions about the machine:
170    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
171 
172    In theory the TLBs could be reported as fake type (they are in "dummy").
173    Maybe later */
174 union l1_cache {
175 	struct {
176 		unsigned line_size:8;
177 		unsigned lines_per_tag:8;
178 		unsigned assoc:8;
179 		unsigned size_in_kb:8;
180 	};
181 	unsigned val;
182 };
183 
184 union l2_cache {
185 	struct {
186 		unsigned line_size:8;
187 		unsigned lines_per_tag:4;
188 		unsigned assoc:4;
189 		unsigned size_in_kb:16;
190 	};
191 	unsigned val;
192 };
193 
194 union l3_cache {
195 	struct {
196 		unsigned line_size:8;
197 		unsigned lines_per_tag:4;
198 		unsigned assoc:4;
199 		unsigned res:2;
200 		unsigned size_encoded:14;
201 	};
202 	unsigned val;
203 };
204 
205 static const unsigned short assocs[] = {
206 	[1] = 1,
207 	[2] = 2,
208 	[4] = 4,
209 	[6] = 8,
210 	[8] = 16,
211 	[0xa] = 32,
212 	[0xb] = 48,
213 	[0xc] = 64,
214 	[0xd] = 96,
215 	[0xe] = 128,
216 	[0xf] = 0xffff /* fully associative - no way to show this currently */
217 };
218 
219 static const unsigned char levels[] = { 1, 1, 2, 3 };
220 static const unsigned char types[] = { 1, 2, 3, 3 };
221 
222 static const enum cache_type cache_type_map[] = {
223 	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
224 	[CTYPE_DATA] = CACHE_TYPE_DATA,
225 	[CTYPE_INST] = CACHE_TYPE_INST,
226 	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
227 };
228 
229 static void
230 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
231 		     union _cpuid4_leaf_ebx *ebx,
232 		     union _cpuid4_leaf_ecx *ecx)
233 {
234 	unsigned dummy;
235 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
236 	union l1_cache l1i, l1d;
237 	union l2_cache l2;
238 	union l3_cache l3;
239 	union l1_cache *l1 = &l1d;
240 
241 	eax->full = 0;
242 	ebx->full = 0;
243 	ecx->full = 0;
244 
245 	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
246 	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
247 
248 	switch (leaf) {
249 	case 1:
250 		l1 = &l1i;
251 		/* fall through */
252 	case 0:
253 		if (!l1->val)
254 			return;
255 		assoc = assocs[l1->assoc];
256 		line_size = l1->line_size;
257 		lines_per_tag = l1->lines_per_tag;
258 		size_in_kb = l1->size_in_kb;
259 		break;
260 	case 2:
261 		if (!l2.val)
262 			return;
263 		assoc = assocs[l2.assoc];
264 		line_size = l2.line_size;
265 		lines_per_tag = l2.lines_per_tag;
266 		/* cpu_data has errata corrections for K7 applied */
267 		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
268 		break;
269 	case 3:
270 		if (!l3.val)
271 			return;
272 		assoc = assocs[l3.assoc];
273 		line_size = l3.line_size;
274 		lines_per_tag = l3.lines_per_tag;
275 		size_in_kb = l3.size_encoded * 512;
276 		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
277 			size_in_kb = size_in_kb >> 1;
278 			assoc = assoc >> 1;
279 		}
280 		break;
281 	default:
282 		return;
283 	}
284 
285 	eax->split.is_self_initializing = 1;
286 	eax->split.type = types[leaf];
287 	eax->split.level = levels[leaf];
288 	eax->split.num_threads_sharing = 0;
289 	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
290 
291 
292 	if (assoc == 0xffff)
293 		eax->split.is_fully_associative = 1;
294 	ebx->split.coherency_line_size = line_size - 1;
295 	ebx->split.ways_of_associativity = assoc - 1;
296 	ebx->split.physical_line_partition = lines_per_tag - 1;
297 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
298 		(ebx->split.ways_of_associativity + 1) - 1;
299 }
300 
301 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
302 
303 /*
304  * L3 cache descriptors
305  */
306 static void amd_calc_l3_indices(struct amd_northbridge *nb)
307 {
308 	struct amd_l3_cache *l3 = &nb->l3_cache;
309 	unsigned int sc0, sc1, sc2, sc3;
310 	u32 val = 0;
311 
312 	pci_read_config_dword(nb->misc, 0x1C4, &val);
313 
314 	/* calculate subcache sizes */
315 	l3->subcaches[0] = sc0 = !(val & BIT(0));
316 	l3->subcaches[1] = sc1 = !(val & BIT(4));
317 
318 	if (boot_cpu_data.x86 == 0x15) {
319 		l3->subcaches[0] = sc0 += !(val & BIT(1));
320 		l3->subcaches[1] = sc1 += !(val & BIT(5));
321 	}
322 
323 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
324 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
325 
326 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
327 }
328 
329 /*
330  * check whether a slot used for disabling an L3 index is occupied.
331  * @l3: L3 cache descriptor
332  * @slot: slot number (0..1)
333  *
334  * @returns: the disabled index if used or negative value if slot free.
335  */
336 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
337 {
338 	unsigned int reg = 0;
339 
340 	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
341 
342 	/* check whether this slot is activated already */
343 	if (reg & (3UL << 30))
344 		return reg & 0xfff;
345 
346 	return -1;
347 }
348 
349 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
350 				  unsigned int slot)
351 {
352 	int index;
353 	struct amd_northbridge *nb = this_leaf->priv;
354 
355 	index = amd_get_l3_disable_slot(nb, slot);
356 	if (index >= 0)
357 		return sprintf(buf, "%d\n", index);
358 
359 	return sprintf(buf, "FREE\n");
360 }
361 
362 #define SHOW_CACHE_DISABLE(slot)					\
363 static ssize_t								\
364 cache_disable_##slot##_show(struct device *dev,				\
365 			    struct device_attribute *attr, char *buf)	\
366 {									\
367 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
368 	return show_cache_disable(this_leaf, buf, slot);		\
369 }
370 SHOW_CACHE_DISABLE(0)
371 SHOW_CACHE_DISABLE(1)
372 
373 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
374 				 unsigned slot, unsigned long idx)
375 {
376 	int i;
377 
378 	idx |= BIT(30);
379 
380 	/*
381 	 *  disable index in all 4 subcaches
382 	 */
383 	for (i = 0; i < 4; i++) {
384 		u32 reg = idx | (i << 20);
385 
386 		if (!nb->l3_cache.subcaches[i])
387 			continue;
388 
389 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
390 
391 		/*
392 		 * We need to WBINVD on a core on the node containing the L3
393 		 * cache which indices we disable therefore a simple wbinvd()
394 		 * is not sufficient.
395 		 */
396 		wbinvd_on_cpu(cpu);
397 
398 		reg |= BIT(31);
399 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
400 	}
401 }
402 
403 /*
404  * disable a L3 cache index by using a disable-slot
405  *
406  * @l3:    L3 cache descriptor
407  * @cpu:   A CPU on the node containing the L3 cache
408  * @slot:  slot number (0..1)
409  * @index: index to disable
410  *
411  * @return: 0 on success, error status on failure
412  */
413 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
414 			    unsigned slot, unsigned long index)
415 {
416 	int ret = 0;
417 
418 	/*  check if @slot is already used or the index is already disabled */
419 	ret = amd_get_l3_disable_slot(nb, slot);
420 	if (ret >= 0)
421 		return -EEXIST;
422 
423 	if (index > nb->l3_cache.indices)
424 		return -EINVAL;
425 
426 	/* check whether the other slot has disabled the same index already */
427 	if (index == amd_get_l3_disable_slot(nb, !slot))
428 		return -EEXIST;
429 
430 	amd_l3_disable_index(nb, cpu, slot, index);
431 
432 	return 0;
433 }
434 
435 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
436 				   const char *buf, size_t count,
437 				   unsigned int slot)
438 {
439 	unsigned long val = 0;
440 	int cpu, err = 0;
441 	struct amd_northbridge *nb = this_leaf->priv;
442 
443 	if (!capable(CAP_SYS_ADMIN))
444 		return -EPERM;
445 
446 	cpu = cpumask_first(&this_leaf->shared_cpu_map);
447 
448 	if (kstrtoul(buf, 10, &val) < 0)
449 		return -EINVAL;
450 
451 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
452 	if (err) {
453 		if (err == -EEXIST)
454 			pr_warn("L3 slot %d in use/index already disabled!\n",
455 				   slot);
456 		return err;
457 	}
458 	return count;
459 }
460 
461 #define STORE_CACHE_DISABLE(slot)					\
462 static ssize_t								\
463 cache_disable_##slot##_store(struct device *dev,			\
464 			     struct device_attribute *attr,		\
465 			     const char *buf, size_t count)		\
466 {									\
467 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
468 	return store_cache_disable(this_leaf, buf, count, slot);	\
469 }
470 STORE_CACHE_DISABLE(0)
471 STORE_CACHE_DISABLE(1)
472 
473 static ssize_t subcaches_show(struct device *dev,
474 			      struct device_attribute *attr, char *buf)
475 {
476 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
477 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
478 
479 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
480 }
481 
482 static ssize_t subcaches_store(struct device *dev,
483 			       struct device_attribute *attr,
484 			       const char *buf, size_t count)
485 {
486 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
487 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
488 	unsigned long val;
489 
490 	if (!capable(CAP_SYS_ADMIN))
491 		return -EPERM;
492 
493 	if (kstrtoul(buf, 16, &val) < 0)
494 		return -EINVAL;
495 
496 	if (amd_set_subcaches(cpu, val))
497 		return -EINVAL;
498 
499 	return count;
500 }
501 
502 static DEVICE_ATTR_RW(cache_disable_0);
503 static DEVICE_ATTR_RW(cache_disable_1);
504 static DEVICE_ATTR_RW(subcaches);
505 
506 static umode_t
507 cache_private_attrs_is_visible(struct kobject *kobj,
508 			       struct attribute *attr, int unused)
509 {
510 	struct device *dev = kobj_to_dev(kobj);
511 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
512 	umode_t mode = attr->mode;
513 
514 	if (!this_leaf->priv)
515 		return 0;
516 
517 	if ((attr == &dev_attr_subcaches.attr) &&
518 	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
519 		return mode;
520 
521 	if ((attr == &dev_attr_cache_disable_0.attr ||
522 	     attr == &dev_attr_cache_disable_1.attr) &&
523 	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
524 		return mode;
525 
526 	return 0;
527 }
528 
529 static struct attribute_group cache_private_group = {
530 	.is_visible = cache_private_attrs_is_visible,
531 };
532 
533 static void init_amd_l3_attrs(void)
534 {
535 	int n = 1;
536 	static struct attribute **amd_l3_attrs;
537 
538 	if (amd_l3_attrs) /* already initialized */
539 		return;
540 
541 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
542 		n += 2;
543 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
544 		n += 1;
545 
546 	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
547 	if (!amd_l3_attrs)
548 		return;
549 
550 	n = 0;
551 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
552 		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
553 		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
554 	}
555 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
556 		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
557 
558 	cache_private_group.attrs = amd_l3_attrs;
559 }
560 
561 const struct attribute_group *
562 cache_get_priv_group(struct cacheinfo *this_leaf)
563 {
564 	struct amd_northbridge *nb = this_leaf->priv;
565 
566 	if (this_leaf->level < 3 || !nb)
567 		return NULL;
568 
569 	if (nb && nb->l3_cache.indices)
570 		init_amd_l3_attrs();
571 
572 	return &cache_private_group;
573 }
574 
575 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
576 {
577 	int node;
578 
579 	/* only for L3, and not in virtualized environments */
580 	if (index < 3)
581 		return;
582 
583 	node = amd_get_nb_id(smp_processor_id());
584 	this_leaf->nb = node_to_amd_nb(node);
585 	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
586 		amd_calc_l3_indices(this_leaf->nb);
587 }
588 #else
589 #define amd_init_l3_cache(x, y)
590 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
591 
592 static int
593 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
594 {
595 	union _cpuid4_leaf_eax	eax;
596 	union _cpuid4_leaf_ebx	ebx;
597 	union _cpuid4_leaf_ecx	ecx;
598 	unsigned		edx;
599 
600 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
601 		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
602 			cpuid_count(0x8000001d, index, &eax.full,
603 				    &ebx.full, &ecx.full, &edx);
604 		else
605 			amd_cpuid4(index, &eax, &ebx, &ecx);
606 		amd_init_l3_cache(this_leaf, index);
607 	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
608 		cpuid_count(0x8000001d, index, &eax.full,
609 			    &ebx.full, &ecx.full, &edx);
610 		amd_init_l3_cache(this_leaf, index);
611 	} else {
612 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
613 	}
614 
615 	if (eax.split.type == CTYPE_NULL)
616 		return -EIO; /* better error ? */
617 
618 	this_leaf->eax = eax;
619 	this_leaf->ebx = ebx;
620 	this_leaf->ecx = ecx;
621 	this_leaf->size = (ecx.split.number_of_sets          + 1) *
622 			  (ebx.split.coherency_line_size     + 1) *
623 			  (ebx.split.physical_line_partition + 1) *
624 			  (ebx.split.ways_of_associativity   + 1);
625 	return 0;
626 }
627 
628 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
629 {
630 	unsigned int		eax, ebx, ecx, edx, op;
631 	union _cpuid4_leaf_eax	cache_eax;
632 	int 			i = -1;
633 
634 	if (c->x86_vendor == X86_VENDOR_AMD ||
635 	    c->x86_vendor == X86_VENDOR_HYGON)
636 		op = 0x8000001d;
637 	else
638 		op = 4;
639 
640 	do {
641 		++i;
642 		/* Do cpuid(op) loop to find out num_cache_leaves */
643 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
644 		cache_eax.full = eax;
645 	} while (cache_eax.split.type != CTYPE_NULL);
646 	return i;
647 }
648 
649 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
650 {
651 	/*
652 	 * We may have multiple LLCs if L3 caches exist, so check if we
653 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
654 	 */
655 	if (!cpuid_edx(0x80000006))
656 		return;
657 
658 	if (c->x86 < 0x17) {
659 		/* LLC is at the node level. */
660 		per_cpu(cpu_llc_id, cpu) = node_id;
661 	} else if (c->x86 == 0x17 &&
662 		   c->x86_model >= 0 && c->x86_model <= 0x1F) {
663 		/*
664 		 * LLC is at the core complex level.
665 		 * Core complex ID is ApicId[3] for these processors.
666 		 */
667 		per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
668 	} else {
669 		/*
670 		 * LLC ID is calculated from the number of threads sharing the
671 		 * cache.
672 		 * */
673 		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
674 		u32 llc_index = find_num_cache_leaves(c) - 1;
675 
676 		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
677 		if (eax)
678 			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
679 
680 		if (num_sharing_cache) {
681 			int bits = get_count_order(num_sharing_cache);
682 
683 			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
684 		}
685 	}
686 }
687 
688 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
689 {
690 	/*
691 	 * We may have multiple LLCs if L3 caches exist, so check if we
692 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
693 	 */
694 	if (!cpuid_edx(0x80000006))
695 		return;
696 
697 	/*
698 	 * LLC is at the core complex level.
699 	 * Core complex ID is ApicId[3] for these processors.
700 	 */
701 	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
702 }
703 
704 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
705 {
706 
707 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
708 		num_cache_leaves = find_num_cache_leaves(c);
709 	} else if (c->extended_cpuid_level >= 0x80000006) {
710 		if (cpuid_edx(0x80000006) & 0xf000)
711 			num_cache_leaves = 4;
712 		else
713 			num_cache_leaves = 3;
714 	}
715 }
716 
717 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
718 {
719 	num_cache_leaves = find_num_cache_leaves(c);
720 }
721 
722 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
723 {
724 	/* Cache sizes */
725 	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
726 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
727 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
728 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
729 #ifdef CONFIG_SMP
730 	unsigned int cpu = c->cpu_index;
731 #endif
732 
733 	if (c->cpuid_level > 3) {
734 		static int is_initialized;
735 
736 		if (is_initialized == 0) {
737 			/* Init num_cache_leaves from boot CPU */
738 			num_cache_leaves = find_num_cache_leaves(c);
739 			is_initialized++;
740 		}
741 
742 		/*
743 		 * Whenever possible use cpuid(4), deterministic cache
744 		 * parameters cpuid leaf to find the cache details
745 		 */
746 		for (i = 0; i < num_cache_leaves; i++) {
747 			struct _cpuid4_info_regs this_leaf = {};
748 			int retval;
749 
750 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
751 			if (retval < 0)
752 				continue;
753 
754 			switch (this_leaf.eax.split.level) {
755 			case 1:
756 				if (this_leaf.eax.split.type == CTYPE_DATA)
757 					new_l1d = this_leaf.size/1024;
758 				else if (this_leaf.eax.split.type == CTYPE_INST)
759 					new_l1i = this_leaf.size/1024;
760 				break;
761 			case 2:
762 				new_l2 = this_leaf.size/1024;
763 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
764 				index_msb = get_count_order(num_threads_sharing);
765 				l2_id = c->apicid & ~((1 << index_msb) - 1);
766 				break;
767 			case 3:
768 				new_l3 = this_leaf.size/1024;
769 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
770 				index_msb = get_count_order(num_threads_sharing);
771 				l3_id = c->apicid & ~((1 << index_msb) - 1);
772 				break;
773 			default:
774 				break;
775 			}
776 		}
777 	}
778 	/*
779 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
780 	 * trace cache
781 	 */
782 	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
783 		/* supports eax=2  call */
784 		int j, n;
785 		unsigned int regs[4];
786 		unsigned char *dp = (unsigned char *)regs;
787 		int only_trace = 0;
788 
789 		if (num_cache_leaves != 0 && c->x86 == 15)
790 			only_trace = 1;
791 
792 		/* Number of times to iterate */
793 		n = cpuid_eax(2) & 0xFF;
794 
795 		for (i = 0 ; i < n ; i++) {
796 			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
797 
798 			/* If bit 31 is set, this is an unknown format */
799 			for (j = 0 ; j < 3 ; j++)
800 				if (regs[j] & (1 << 31))
801 					regs[j] = 0;
802 
803 			/* Byte 0 is level count, not a descriptor */
804 			for (j = 1 ; j < 16 ; j++) {
805 				unsigned char des = dp[j];
806 				unsigned char k = 0;
807 
808 				/* look up this descriptor in the table */
809 				while (cache_table[k].descriptor != 0) {
810 					if (cache_table[k].descriptor == des) {
811 						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
812 							break;
813 						switch (cache_table[k].cache_type) {
814 						case LVL_1_INST:
815 							l1i += cache_table[k].size;
816 							break;
817 						case LVL_1_DATA:
818 							l1d += cache_table[k].size;
819 							break;
820 						case LVL_2:
821 							l2 += cache_table[k].size;
822 							break;
823 						case LVL_3:
824 							l3 += cache_table[k].size;
825 							break;
826 						case LVL_TRACE:
827 							trace += cache_table[k].size;
828 							break;
829 						}
830 
831 						break;
832 					}
833 
834 					k++;
835 				}
836 			}
837 		}
838 	}
839 
840 	if (new_l1d)
841 		l1d = new_l1d;
842 
843 	if (new_l1i)
844 		l1i = new_l1i;
845 
846 	if (new_l2) {
847 		l2 = new_l2;
848 #ifdef CONFIG_SMP
849 		per_cpu(cpu_llc_id, cpu) = l2_id;
850 #endif
851 	}
852 
853 	if (new_l3) {
854 		l3 = new_l3;
855 #ifdef CONFIG_SMP
856 		per_cpu(cpu_llc_id, cpu) = l3_id;
857 #endif
858 	}
859 
860 #ifdef CONFIG_SMP
861 	/*
862 	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
863 	 * turns means that the only possibility is SMT (as indicated in
864 	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
865 	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
866 	 * c->phys_proc_id.
867 	 */
868 	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
869 		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
870 #endif
871 
872 	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
873 
874 	if (!l2)
875 		cpu_detect_cache_sizes(c);
876 }
877 
878 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
879 				    struct _cpuid4_info_regs *base)
880 {
881 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
882 	struct cacheinfo *this_leaf;
883 	int i, sibling;
884 
885 	/*
886 	 * For L3, always use the pre-calculated cpu_llc_shared_mask
887 	 * to derive shared_cpu_map.
888 	 */
889 	if (index == 3) {
890 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
891 			this_cpu_ci = get_cpu_cacheinfo(i);
892 			if (!this_cpu_ci->info_list)
893 				continue;
894 			this_leaf = this_cpu_ci->info_list + index;
895 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
896 				if (!cpu_online(sibling))
897 					continue;
898 				cpumask_set_cpu(sibling,
899 						&this_leaf->shared_cpu_map);
900 			}
901 		}
902 	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
903 		unsigned int apicid, nshared, first, last;
904 
905 		nshared = base->eax.split.num_threads_sharing + 1;
906 		apicid = cpu_data(cpu).apicid;
907 		first = apicid - (apicid % nshared);
908 		last = first + nshared - 1;
909 
910 		for_each_online_cpu(i) {
911 			this_cpu_ci = get_cpu_cacheinfo(i);
912 			if (!this_cpu_ci->info_list)
913 				continue;
914 
915 			apicid = cpu_data(i).apicid;
916 			if ((apicid < first) || (apicid > last))
917 				continue;
918 
919 			this_leaf = this_cpu_ci->info_list + index;
920 
921 			for_each_online_cpu(sibling) {
922 				apicid = cpu_data(sibling).apicid;
923 				if ((apicid < first) || (apicid > last))
924 					continue;
925 				cpumask_set_cpu(sibling,
926 						&this_leaf->shared_cpu_map);
927 			}
928 		}
929 	} else
930 		return 0;
931 
932 	return 1;
933 }
934 
935 static void __cache_cpumap_setup(unsigned int cpu, int index,
936 				 struct _cpuid4_info_regs *base)
937 {
938 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
939 	struct cacheinfo *this_leaf, *sibling_leaf;
940 	unsigned long num_threads_sharing;
941 	int index_msb, i;
942 	struct cpuinfo_x86 *c = &cpu_data(cpu);
943 
944 	if (c->x86_vendor == X86_VENDOR_AMD ||
945 	    c->x86_vendor == X86_VENDOR_HYGON) {
946 		if (__cache_amd_cpumap_setup(cpu, index, base))
947 			return;
948 	}
949 
950 	this_leaf = this_cpu_ci->info_list + index;
951 	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
952 
953 	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
954 	if (num_threads_sharing == 1)
955 		return;
956 
957 	index_msb = get_count_order(num_threads_sharing);
958 
959 	for_each_online_cpu(i)
960 		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
961 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
962 
963 			if (i == cpu || !sib_cpu_ci->info_list)
964 				continue;/* skip if itself or no cacheinfo */
965 			sibling_leaf = sib_cpu_ci->info_list + index;
966 			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
967 			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
968 		}
969 }
970 
971 static void ci_leaf_init(struct cacheinfo *this_leaf,
972 			 struct _cpuid4_info_regs *base)
973 {
974 	this_leaf->id = base->id;
975 	this_leaf->attributes = CACHE_ID;
976 	this_leaf->level = base->eax.split.level;
977 	this_leaf->type = cache_type_map[base->eax.split.type];
978 	this_leaf->coherency_line_size =
979 				base->ebx.split.coherency_line_size + 1;
980 	this_leaf->ways_of_associativity =
981 				base->ebx.split.ways_of_associativity + 1;
982 	this_leaf->size = base->size;
983 	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
984 	this_leaf->physical_line_partition =
985 				base->ebx.split.physical_line_partition + 1;
986 	this_leaf->priv = base->nb;
987 }
988 
989 static int __init_cache_level(unsigned int cpu)
990 {
991 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
992 
993 	if (!num_cache_leaves)
994 		return -ENOENT;
995 	if (!this_cpu_ci)
996 		return -EINVAL;
997 	this_cpu_ci->num_levels = 3;
998 	this_cpu_ci->num_leaves = num_cache_leaves;
999 	return 0;
1000 }
1001 
1002 /*
1003  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1004  * ECX as cache index. Then right shift apicid by the number's order to get
1005  * cache id for this cache node.
1006  */
1007 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1008 {
1009 	struct cpuinfo_x86 *c = &cpu_data(cpu);
1010 	unsigned long num_threads_sharing;
1011 	int index_msb;
1012 
1013 	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1014 	index_msb = get_count_order(num_threads_sharing);
1015 	id4_regs->id = c->apicid >> index_msb;
1016 }
1017 
1018 static int __populate_cache_leaves(unsigned int cpu)
1019 {
1020 	unsigned int idx, ret;
1021 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1022 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1023 	struct _cpuid4_info_regs id4_regs = {};
1024 
1025 	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1026 		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1027 		if (ret)
1028 			return ret;
1029 		get_cache_id(cpu, &id4_regs);
1030 		ci_leaf_init(this_leaf++, &id4_regs);
1031 		__cache_cpumap_setup(cpu, idx, &id4_regs);
1032 	}
1033 	this_cpu_ci->cpu_map_populated = true;
1034 
1035 	return 0;
1036 }
1037 
1038 DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
1039 DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
1040