xref: /linux/arch/x86/kernel/cpu/cacheinfo.c (revision c924c5e9b8c65b3a479a90e5e37d74cc8cd9fe0a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *	Routines to identify caches on Intel CPU.
4  *
5  *	Changes:
6  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
7  *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
9  */
10 
11 #include <linux/cacheinfo.h>
12 #include <linux/capability.h>
13 #include <linux/cpu.h>
14 #include <linux/cpuhotplug.h>
15 #include <linux/pci.h>
16 #include <linux/stop_machine.h>
17 #include <linux/sysfs.h>
18 
19 #include <asm/amd_nb.h>
20 #include <asm/cacheinfo.h>
21 #include <asm/cpufeature.h>
22 #include <asm/mtrr.h>
23 #include <asm/smp.h>
24 #include <asm/tlbflush.h>
25 
26 #include "cpu.h"
27 
28 #define LVL_1_INST	1
29 #define LVL_1_DATA	2
30 #define LVL_2		3
31 #define LVL_3		4
32 
33 /* Shared last level cache maps */
34 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
35 
36 /* Shared L2 cache maps */
37 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
38 
39 static cpumask_var_t cpu_cacheinfo_mask;
40 
41 /* Kernel controls MTRR and/or PAT MSRs. */
42 unsigned int memory_caching_control __ro_after_init;
43 
44 struct _cache_table {
45 	unsigned char descriptor;
46 	char cache_type;
47 	short size;
48 };
49 
50 #define MB(x)	((x) * 1024)
51 
52 /* All the cache descriptor types we care about (no TLB or
53    trace cache entries) */
54 
55 static const struct _cache_table cache_table[] =
56 {
57 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
58 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
59 	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
60 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
61 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
62 	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
63 	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
64 	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
65 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
66 	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
67 	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
68 	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
69 	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
70 	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
71 	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
72 	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
73 	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
74 	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
75 	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
76 	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
77 	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
78 	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
79 	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
80 	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
81 	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
82 	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
83 	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
84 	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
85 	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
86 	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
87 	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
88 	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
89 	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
90 	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
91 	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
92 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
93 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
94 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
95 	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
96 	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
97 	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
98 	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
99 	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
100 	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
101 	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
102 	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
103 	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
104 	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
105 	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
106 	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
107 	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
108 	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
109 	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
110 	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
111 	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
112 	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
113 	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
114 	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
115 	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
116 	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
117 	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
118 	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
119 	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
120 	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
121 	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
122 	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
123 	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
124 	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
125 	{ 0x00, 0, 0}
126 };
127 
128 
129 enum _cache_type {
130 	CTYPE_NULL = 0,
131 	CTYPE_DATA = 1,
132 	CTYPE_INST = 2,
133 	CTYPE_UNIFIED = 3
134 };
135 
136 union _cpuid4_leaf_eax {
137 	struct {
138 		enum _cache_type	type:5;
139 		unsigned int		level:3;
140 		unsigned int		is_self_initializing:1;
141 		unsigned int		is_fully_associative:1;
142 		unsigned int		reserved:4;
143 		unsigned int		num_threads_sharing:12;
144 		unsigned int		num_cores_on_die:6;
145 	} split;
146 	u32 full;
147 };
148 
149 union _cpuid4_leaf_ebx {
150 	struct {
151 		unsigned int		coherency_line_size:12;
152 		unsigned int		physical_line_partition:10;
153 		unsigned int		ways_of_associativity:10;
154 	} split;
155 	u32 full;
156 };
157 
158 union _cpuid4_leaf_ecx {
159 	struct {
160 		unsigned int		number_of_sets:32;
161 	} split;
162 	u32 full;
163 };
164 
165 struct _cpuid4_info_regs {
166 	union _cpuid4_leaf_eax eax;
167 	union _cpuid4_leaf_ebx ebx;
168 	union _cpuid4_leaf_ecx ecx;
169 	unsigned int id;
170 	unsigned long size;
171 	struct amd_northbridge *nb;
172 };
173 
174 /* AMD doesn't have CPUID4. Emulate it here to report the same
175    information to the user.  This makes some assumptions about the machine:
176    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
177 
178    In theory the TLBs could be reported as fake type (they are in "dummy").
179    Maybe later */
180 union l1_cache {
181 	struct {
182 		unsigned line_size:8;
183 		unsigned lines_per_tag:8;
184 		unsigned assoc:8;
185 		unsigned size_in_kb:8;
186 	};
187 	unsigned val;
188 };
189 
190 union l2_cache {
191 	struct {
192 		unsigned line_size:8;
193 		unsigned lines_per_tag:4;
194 		unsigned assoc:4;
195 		unsigned size_in_kb:16;
196 	};
197 	unsigned val;
198 };
199 
200 union l3_cache {
201 	struct {
202 		unsigned line_size:8;
203 		unsigned lines_per_tag:4;
204 		unsigned assoc:4;
205 		unsigned res:2;
206 		unsigned size_encoded:14;
207 	};
208 	unsigned val;
209 };
210 
211 static const unsigned short assocs[] = {
212 	[1] = 1,
213 	[2] = 2,
214 	[4] = 4,
215 	[6] = 8,
216 	[8] = 16,
217 	[0xa] = 32,
218 	[0xb] = 48,
219 	[0xc] = 64,
220 	[0xd] = 96,
221 	[0xe] = 128,
222 	[0xf] = 0xffff /* fully associative - no way to show this currently */
223 };
224 
225 static const unsigned char levels[] = { 1, 1, 2, 3 };
226 static const unsigned char types[] = { 1, 2, 3, 3 };
227 
228 static const enum cache_type cache_type_map[] = {
229 	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
230 	[CTYPE_DATA] = CACHE_TYPE_DATA,
231 	[CTYPE_INST] = CACHE_TYPE_INST,
232 	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
233 };
234 
235 static void
amd_cpuid4(int leaf,union _cpuid4_leaf_eax * eax,union _cpuid4_leaf_ebx * ebx,union _cpuid4_leaf_ecx * ecx)236 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
237 		     union _cpuid4_leaf_ebx *ebx,
238 		     union _cpuid4_leaf_ecx *ecx)
239 {
240 	unsigned dummy;
241 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
242 	union l1_cache l1i, l1d;
243 	union l2_cache l2;
244 	union l3_cache l3;
245 	union l1_cache *l1 = &l1d;
246 
247 	eax->full = 0;
248 	ebx->full = 0;
249 	ecx->full = 0;
250 
251 	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
252 	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
253 
254 	switch (leaf) {
255 	case 1:
256 		l1 = &l1i;
257 		fallthrough;
258 	case 0:
259 		if (!l1->val)
260 			return;
261 		assoc = assocs[l1->assoc];
262 		line_size = l1->line_size;
263 		lines_per_tag = l1->lines_per_tag;
264 		size_in_kb = l1->size_in_kb;
265 		break;
266 	case 2:
267 		if (!l2.val)
268 			return;
269 		assoc = assocs[l2.assoc];
270 		line_size = l2.line_size;
271 		lines_per_tag = l2.lines_per_tag;
272 		/* cpu_data has errata corrections for K7 applied */
273 		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
274 		break;
275 	case 3:
276 		if (!l3.val)
277 			return;
278 		assoc = assocs[l3.assoc];
279 		line_size = l3.line_size;
280 		lines_per_tag = l3.lines_per_tag;
281 		size_in_kb = l3.size_encoded * 512;
282 		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
283 			size_in_kb = size_in_kb >> 1;
284 			assoc = assoc >> 1;
285 		}
286 		break;
287 	default:
288 		return;
289 	}
290 
291 	eax->split.is_self_initializing = 1;
292 	eax->split.type = types[leaf];
293 	eax->split.level = levels[leaf];
294 	eax->split.num_threads_sharing = 0;
295 	eax->split.num_cores_on_die = topology_num_cores_per_package();
296 
297 
298 	if (assoc == 0xffff)
299 		eax->split.is_fully_associative = 1;
300 	ebx->split.coherency_line_size = line_size - 1;
301 	ebx->split.ways_of_associativity = assoc - 1;
302 	ebx->split.physical_line_partition = lines_per_tag - 1;
303 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
304 		(ebx->split.ways_of_associativity + 1) - 1;
305 }
306 
307 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
308 
309 /*
310  * L3 cache descriptors
311  */
amd_calc_l3_indices(struct amd_northbridge * nb)312 static void amd_calc_l3_indices(struct amd_northbridge *nb)
313 {
314 	struct amd_l3_cache *l3 = &nb->l3_cache;
315 	unsigned int sc0, sc1, sc2, sc3;
316 	u32 val = 0;
317 
318 	pci_read_config_dword(nb->misc, 0x1C4, &val);
319 
320 	/* calculate subcache sizes */
321 	l3->subcaches[0] = sc0 = !(val & BIT(0));
322 	l3->subcaches[1] = sc1 = !(val & BIT(4));
323 
324 	if (boot_cpu_data.x86 == 0x15) {
325 		l3->subcaches[0] = sc0 += !(val & BIT(1));
326 		l3->subcaches[1] = sc1 += !(val & BIT(5));
327 	}
328 
329 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
330 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
331 
332 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
333 }
334 
335 /*
336  * check whether a slot used for disabling an L3 index is occupied.
337  * @l3: L3 cache descriptor
338  * @slot: slot number (0..1)
339  *
340  * @returns: the disabled index if used or negative value if slot free.
341  */
amd_get_l3_disable_slot(struct amd_northbridge * nb,unsigned slot)342 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
343 {
344 	unsigned int reg = 0;
345 
346 	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
347 
348 	/* check whether this slot is activated already */
349 	if (reg & (3UL << 30))
350 		return reg & 0xfff;
351 
352 	return -1;
353 }
354 
show_cache_disable(struct cacheinfo * this_leaf,char * buf,unsigned int slot)355 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
356 				  unsigned int slot)
357 {
358 	int index;
359 	struct amd_northbridge *nb = this_leaf->priv;
360 
361 	index = amd_get_l3_disable_slot(nb, slot);
362 	if (index >= 0)
363 		return sprintf(buf, "%d\n", index);
364 
365 	return sprintf(buf, "FREE\n");
366 }
367 
368 #define SHOW_CACHE_DISABLE(slot)					\
369 static ssize_t								\
370 cache_disable_##slot##_show(struct device *dev,				\
371 			    struct device_attribute *attr, char *buf)	\
372 {									\
373 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
374 	return show_cache_disable(this_leaf, buf, slot);		\
375 }
376 SHOW_CACHE_DISABLE(0)
377 SHOW_CACHE_DISABLE(1)
378 
amd_l3_disable_index(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long idx)379 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
380 				 unsigned slot, unsigned long idx)
381 {
382 	int i;
383 
384 	idx |= BIT(30);
385 
386 	/*
387 	 *  disable index in all 4 subcaches
388 	 */
389 	for (i = 0; i < 4; i++) {
390 		u32 reg = idx | (i << 20);
391 
392 		if (!nb->l3_cache.subcaches[i])
393 			continue;
394 
395 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
396 
397 		/*
398 		 * We need to WBINVD on a core on the node containing the L3
399 		 * cache which indices we disable therefore a simple wbinvd()
400 		 * is not sufficient.
401 		 */
402 		wbinvd_on_cpu(cpu);
403 
404 		reg |= BIT(31);
405 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
406 	}
407 }
408 
409 /*
410  * disable a L3 cache index by using a disable-slot
411  *
412  * @l3:    L3 cache descriptor
413  * @cpu:   A CPU on the node containing the L3 cache
414  * @slot:  slot number (0..1)
415  * @index: index to disable
416  *
417  * @return: 0 on success, error status on failure
418  */
amd_set_l3_disable_slot(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long index)419 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
420 			    unsigned slot, unsigned long index)
421 {
422 	int ret = 0;
423 
424 	/*  check if @slot is already used or the index is already disabled */
425 	ret = amd_get_l3_disable_slot(nb, slot);
426 	if (ret >= 0)
427 		return -EEXIST;
428 
429 	if (index > nb->l3_cache.indices)
430 		return -EINVAL;
431 
432 	/* check whether the other slot has disabled the same index already */
433 	if (index == amd_get_l3_disable_slot(nb, !slot))
434 		return -EEXIST;
435 
436 	amd_l3_disable_index(nb, cpu, slot, index);
437 
438 	return 0;
439 }
440 
store_cache_disable(struct cacheinfo * this_leaf,const char * buf,size_t count,unsigned int slot)441 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
442 				   const char *buf, size_t count,
443 				   unsigned int slot)
444 {
445 	unsigned long val = 0;
446 	int cpu, err = 0;
447 	struct amd_northbridge *nb = this_leaf->priv;
448 
449 	if (!capable(CAP_SYS_ADMIN))
450 		return -EPERM;
451 
452 	cpu = cpumask_first(&this_leaf->shared_cpu_map);
453 
454 	if (kstrtoul(buf, 10, &val) < 0)
455 		return -EINVAL;
456 
457 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
458 	if (err) {
459 		if (err == -EEXIST)
460 			pr_warn("L3 slot %d in use/index already disabled!\n",
461 				   slot);
462 		return err;
463 	}
464 	return count;
465 }
466 
467 #define STORE_CACHE_DISABLE(slot)					\
468 static ssize_t								\
469 cache_disable_##slot##_store(struct device *dev,			\
470 			     struct device_attribute *attr,		\
471 			     const char *buf, size_t count)		\
472 {									\
473 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
474 	return store_cache_disable(this_leaf, buf, count, slot);	\
475 }
476 STORE_CACHE_DISABLE(0)
477 STORE_CACHE_DISABLE(1)
478 
subcaches_show(struct device * dev,struct device_attribute * attr,char * buf)479 static ssize_t subcaches_show(struct device *dev,
480 			      struct device_attribute *attr, char *buf)
481 {
482 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
483 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
484 
485 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
486 }
487 
subcaches_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)488 static ssize_t subcaches_store(struct device *dev,
489 			       struct device_attribute *attr,
490 			       const char *buf, size_t count)
491 {
492 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
493 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
494 	unsigned long val;
495 
496 	if (!capable(CAP_SYS_ADMIN))
497 		return -EPERM;
498 
499 	if (kstrtoul(buf, 16, &val) < 0)
500 		return -EINVAL;
501 
502 	if (amd_set_subcaches(cpu, val))
503 		return -EINVAL;
504 
505 	return count;
506 }
507 
508 static DEVICE_ATTR_RW(cache_disable_0);
509 static DEVICE_ATTR_RW(cache_disable_1);
510 static DEVICE_ATTR_RW(subcaches);
511 
512 static umode_t
cache_private_attrs_is_visible(struct kobject * kobj,struct attribute * attr,int unused)513 cache_private_attrs_is_visible(struct kobject *kobj,
514 			       struct attribute *attr, int unused)
515 {
516 	struct device *dev = kobj_to_dev(kobj);
517 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
518 	umode_t mode = attr->mode;
519 
520 	if (!this_leaf->priv)
521 		return 0;
522 
523 	if ((attr == &dev_attr_subcaches.attr) &&
524 	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
525 		return mode;
526 
527 	if ((attr == &dev_attr_cache_disable_0.attr ||
528 	     attr == &dev_attr_cache_disable_1.attr) &&
529 	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
530 		return mode;
531 
532 	return 0;
533 }
534 
535 static struct attribute_group cache_private_group = {
536 	.is_visible = cache_private_attrs_is_visible,
537 };
538 
init_amd_l3_attrs(void)539 static void init_amd_l3_attrs(void)
540 {
541 	int n = 1;
542 	static struct attribute **amd_l3_attrs;
543 
544 	if (amd_l3_attrs) /* already initialized */
545 		return;
546 
547 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
548 		n += 2;
549 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
550 		n += 1;
551 
552 	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
553 	if (!amd_l3_attrs)
554 		return;
555 
556 	n = 0;
557 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
558 		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
559 		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
560 	}
561 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
562 		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
563 
564 	cache_private_group.attrs = amd_l3_attrs;
565 }
566 
567 const struct attribute_group *
cache_get_priv_group(struct cacheinfo * this_leaf)568 cache_get_priv_group(struct cacheinfo *this_leaf)
569 {
570 	struct amd_northbridge *nb = this_leaf->priv;
571 
572 	if (this_leaf->level < 3 || !nb)
573 		return NULL;
574 
575 	if (nb && nb->l3_cache.indices)
576 		init_amd_l3_attrs();
577 
578 	return &cache_private_group;
579 }
580 
amd_init_l3_cache(struct _cpuid4_info_regs * this_leaf,int index)581 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
582 {
583 	int node;
584 
585 	/* only for L3, and not in virtualized environments */
586 	if (index < 3)
587 		return;
588 
589 	node = topology_amd_node_id(smp_processor_id());
590 	this_leaf->nb = node_to_amd_nb(node);
591 	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
592 		amd_calc_l3_indices(this_leaf->nb);
593 }
594 #else
595 #define amd_init_l3_cache(x, y)
596 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
597 
598 static int
cpuid4_cache_lookup_regs(int index,struct _cpuid4_info_regs * this_leaf)599 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
600 {
601 	union _cpuid4_leaf_eax	eax;
602 	union _cpuid4_leaf_ebx	ebx;
603 	union _cpuid4_leaf_ecx	ecx;
604 	unsigned		edx;
605 
606 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
607 		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
608 			cpuid_count(0x8000001d, index, &eax.full,
609 				    &ebx.full, &ecx.full, &edx);
610 		else
611 			amd_cpuid4(index, &eax, &ebx, &ecx);
612 		amd_init_l3_cache(this_leaf, index);
613 	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
614 		cpuid_count(0x8000001d, index, &eax.full,
615 			    &ebx.full, &ecx.full, &edx);
616 		amd_init_l3_cache(this_leaf, index);
617 	} else {
618 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
619 	}
620 
621 	if (eax.split.type == CTYPE_NULL)
622 		return -EIO; /* better error ? */
623 
624 	this_leaf->eax = eax;
625 	this_leaf->ebx = ebx;
626 	this_leaf->ecx = ecx;
627 	this_leaf->size = (ecx.split.number_of_sets          + 1) *
628 			  (ebx.split.coherency_line_size     + 1) *
629 			  (ebx.split.physical_line_partition + 1) *
630 			  (ebx.split.ways_of_associativity   + 1);
631 	return 0;
632 }
633 
find_num_cache_leaves(struct cpuinfo_x86 * c)634 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
635 {
636 	unsigned int		eax, ebx, ecx, edx, op;
637 	union _cpuid4_leaf_eax	cache_eax;
638 	int 			i = -1;
639 
640 	if (c->x86_vendor == X86_VENDOR_AMD ||
641 	    c->x86_vendor == X86_VENDOR_HYGON)
642 		op = 0x8000001d;
643 	else
644 		op = 4;
645 
646 	do {
647 		++i;
648 		/* Do cpuid(op) loop to find out num_cache_leaves */
649 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
650 		cache_eax.full = eax;
651 	} while (cache_eax.split.type != CTYPE_NULL);
652 	return i;
653 }
654 
cacheinfo_amd_init_llc_id(struct cpuinfo_x86 * c,u16 die_id)655 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
656 {
657 	/*
658 	 * We may have multiple LLCs if L3 caches exist, so check if we
659 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
660 	 */
661 	if (!cpuid_edx(0x80000006))
662 		return;
663 
664 	if (c->x86 < 0x17) {
665 		/* LLC is at the node level. */
666 		c->topo.llc_id = die_id;
667 	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
668 		/*
669 		 * LLC is at the core complex level.
670 		 * Core complex ID is ApicId[3] for these processors.
671 		 */
672 		c->topo.llc_id = c->topo.apicid >> 3;
673 	} else {
674 		/*
675 		 * LLC ID is calculated from the number of threads sharing the
676 		 * cache.
677 		 * */
678 		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
679 		u32 llc_index = find_num_cache_leaves(c) - 1;
680 
681 		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
682 		if (eax)
683 			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
684 
685 		if (num_sharing_cache) {
686 			int bits = get_count_order(num_sharing_cache);
687 
688 			c->topo.llc_id = c->topo.apicid >> bits;
689 		}
690 	}
691 }
692 
cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 * c)693 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
694 {
695 	/*
696 	 * We may have multiple LLCs if L3 caches exist, so check if we
697 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
698 	 */
699 	if (!cpuid_edx(0x80000006))
700 		return;
701 
702 	/*
703 	 * LLC is at the core complex level.
704 	 * Core complex ID is ApicId[3] for these processors.
705 	 */
706 	c->topo.llc_id = c->topo.apicid >> 3;
707 }
708 
init_amd_cacheinfo(struct cpuinfo_x86 * c)709 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
710 {
711 	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
712 
713 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
714 		ci->num_leaves = find_num_cache_leaves(c);
715 	} else if (c->extended_cpuid_level >= 0x80000006) {
716 		if (cpuid_edx(0x80000006) & 0xf000)
717 			ci->num_leaves = 4;
718 		else
719 			ci->num_leaves = 3;
720 	}
721 }
722 
init_hygon_cacheinfo(struct cpuinfo_x86 * c)723 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
724 {
725 	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
726 
727 	ci->num_leaves = find_num_cache_leaves(c);
728 }
729 
init_intel_cacheinfo(struct cpuinfo_x86 * c)730 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
731 {
732 	/* Cache sizes */
733 	unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0;
734 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
735 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
736 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
737 	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
738 
739 	if (c->cpuid_level > 3) {
740 		/*
741 		 * There should be at least one leaf. A non-zero value means
742 		 * that the number of leaves has been initialized.
743 		 */
744 		if (!ci->num_leaves)
745 			ci->num_leaves = find_num_cache_leaves(c);
746 
747 		/*
748 		 * Whenever possible use cpuid(4), deterministic cache
749 		 * parameters cpuid leaf to find the cache details
750 		 */
751 		for (i = 0; i < ci->num_leaves; i++) {
752 			struct _cpuid4_info_regs this_leaf = {};
753 			int retval;
754 
755 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
756 			if (retval < 0)
757 				continue;
758 
759 			switch (this_leaf.eax.split.level) {
760 			case 1:
761 				if (this_leaf.eax.split.type == CTYPE_DATA)
762 					new_l1d = this_leaf.size/1024;
763 				else if (this_leaf.eax.split.type == CTYPE_INST)
764 					new_l1i = this_leaf.size/1024;
765 				break;
766 			case 2:
767 				new_l2 = this_leaf.size/1024;
768 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
769 				index_msb = get_count_order(num_threads_sharing);
770 				l2_id = c->topo.apicid & ~((1 << index_msb) - 1);
771 				break;
772 			case 3:
773 				new_l3 = this_leaf.size/1024;
774 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
775 				index_msb = get_count_order(num_threads_sharing);
776 				l3_id = c->topo.apicid & ~((1 << index_msb) - 1);
777 				break;
778 			default:
779 				break;
780 			}
781 		}
782 	}
783 
784 	/* Don't use CPUID(2) if CPUID(4) is supported. */
785 	if (!ci->num_leaves && c->cpuid_level > 1) {
786 		/* supports eax=2  call */
787 		int j, n;
788 		unsigned int regs[4];
789 		unsigned char *dp = (unsigned char *)regs;
790 
791 		/* Number of times to iterate */
792 		n = cpuid_eax(2) & 0xFF;
793 
794 		for (i = 0 ; i < n ; i++) {
795 			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
796 
797 			/* If bit 31 is set, this is an unknown format */
798 			for (j = 0 ; j < 4 ; j++)
799 				if (regs[j] & (1 << 31))
800 					regs[j] = 0;
801 
802 			/* Byte 0 is level count, not a descriptor */
803 			for (j = 1 ; j < 16 ; j++) {
804 				unsigned char des = dp[j];
805 				unsigned char k = 0;
806 
807 				/* look up this descriptor in the table */
808 				while (cache_table[k].descriptor != 0) {
809 					if (cache_table[k].descriptor == des) {
810 						switch (cache_table[k].cache_type) {
811 						case LVL_1_INST:
812 							l1i += cache_table[k].size;
813 							break;
814 						case LVL_1_DATA:
815 							l1d += cache_table[k].size;
816 							break;
817 						case LVL_2:
818 							l2 += cache_table[k].size;
819 							break;
820 						case LVL_3:
821 							l3 += cache_table[k].size;
822 							break;
823 						}
824 
825 						break;
826 					}
827 
828 					k++;
829 				}
830 			}
831 		}
832 	}
833 
834 	if (new_l1d)
835 		l1d = new_l1d;
836 
837 	if (new_l1i)
838 		l1i = new_l1i;
839 
840 	if (new_l2) {
841 		l2 = new_l2;
842 		c->topo.llc_id = l2_id;
843 		c->topo.l2c_id = l2_id;
844 	}
845 
846 	if (new_l3) {
847 		l3 = new_l3;
848 		c->topo.llc_id = l3_id;
849 	}
850 
851 	/*
852 	 * If llc_id is not yet set, this means cpuid_level < 4 which in
853 	 * turns means that the only possibility is SMT (as indicated in
854 	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
855 	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
856 	 * c->topo.pkg_id.
857 	 */
858 	if (c->topo.llc_id == BAD_APICID)
859 		c->topo.llc_id = c->topo.pkg_id;
860 
861 	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
862 
863 	if (!l2)
864 		cpu_detect_cache_sizes(c);
865 }
866 
__cache_amd_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)867 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
868 				    struct _cpuid4_info_regs *base)
869 {
870 	struct cpu_cacheinfo *this_cpu_ci;
871 	struct cacheinfo *this_leaf;
872 	int i, sibling;
873 
874 	/*
875 	 * For L3, always use the pre-calculated cpu_llc_shared_mask
876 	 * to derive shared_cpu_map.
877 	 */
878 	if (index == 3) {
879 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
880 			this_cpu_ci = get_cpu_cacheinfo(i);
881 			if (!this_cpu_ci->info_list)
882 				continue;
883 			this_leaf = this_cpu_ci->info_list + index;
884 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
885 				if (!cpu_online(sibling))
886 					continue;
887 				cpumask_set_cpu(sibling,
888 						&this_leaf->shared_cpu_map);
889 			}
890 		}
891 	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
892 		unsigned int apicid, nshared, first, last;
893 
894 		nshared = base->eax.split.num_threads_sharing + 1;
895 		apicid = cpu_data(cpu).topo.apicid;
896 		first = apicid - (apicid % nshared);
897 		last = first + nshared - 1;
898 
899 		for_each_online_cpu(i) {
900 			this_cpu_ci = get_cpu_cacheinfo(i);
901 			if (!this_cpu_ci->info_list)
902 				continue;
903 
904 			apicid = cpu_data(i).topo.apicid;
905 			if ((apicid < first) || (apicid > last))
906 				continue;
907 
908 			this_leaf = this_cpu_ci->info_list + index;
909 
910 			for_each_online_cpu(sibling) {
911 				apicid = cpu_data(sibling).topo.apicid;
912 				if ((apicid < first) || (apicid > last))
913 					continue;
914 				cpumask_set_cpu(sibling,
915 						&this_leaf->shared_cpu_map);
916 			}
917 		}
918 	} else
919 		return 0;
920 
921 	return 1;
922 }
923 
__cache_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)924 static void __cache_cpumap_setup(unsigned int cpu, int index,
925 				 struct _cpuid4_info_regs *base)
926 {
927 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
928 	struct cacheinfo *this_leaf, *sibling_leaf;
929 	unsigned long num_threads_sharing;
930 	int index_msb, i;
931 	struct cpuinfo_x86 *c = &cpu_data(cpu);
932 
933 	if (c->x86_vendor == X86_VENDOR_AMD ||
934 	    c->x86_vendor == X86_VENDOR_HYGON) {
935 		if (__cache_amd_cpumap_setup(cpu, index, base))
936 			return;
937 	}
938 
939 	this_leaf = this_cpu_ci->info_list + index;
940 	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
941 
942 	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
943 	if (num_threads_sharing == 1)
944 		return;
945 
946 	index_msb = get_count_order(num_threads_sharing);
947 
948 	for_each_online_cpu(i)
949 		if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) {
950 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
951 
952 			if (i == cpu || !sib_cpu_ci->info_list)
953 				continue;/* skip if itself or no cacheinfo */
954 			sibling_leaf = sib_cpu_ci->info_list + index;
955 			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
956 			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
957 		}
958 }
959 
ci_leaf_init(struct cacheinfo * this_leaf,struct _cpuid4_info_regs * base)960 static void ci_leaf_init(struct cacheinfo *this_leaf,
961 			 struct _cpuid4_info_regs *base)
962 {
963 	this_leaf->id = base->id;
964 	this_leaf->attributes = CACHE_ID;
965 	this_leaf->level = base->eax.split.level;
966 	this_leaf->type = cache_type_map[base->eax.split.type];
967 	this_leaf->coherency_line_size =
968 				base->ebx.split.coherency_line_size + 1;
969 	this_leaf->ways_of_associativity =
970 				base->ebx.split.ways_of_associativity + 1;
971 	this_leaf->size = base->size;
972 	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
973 	this_leaf->physical_line_partition =
974 				base->ebx.split.physical_line_partition + 1;
975 	this_leaf->priv = base->nb;
976 }
977 
init_cache_level(unsigned int cpu)978 int init_cache_level(unsigned int cpu)
979 {
980 	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
981 
982 	/* There should be at least one leaf. */
983 	if (!ci->num_leaves)
984 		return -ENOENT;
985 
986 	return 0;
987 }
988 
989 /*
990  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
991  * ECX as cache index. Then right shift apicid by the number's order to get
992  * cache id for this cache node.
993  */
get_cache_id(int cpu,struct _cpuid4_info_regs * id4_regs)994 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
995 {
996 	struct cpuinfo_x86 *c = &cpu_data(cpu);
997 	unsigned long num_threads_sharing;
998 	int index_msb;
999 
1000 	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1001 	index_msb = get_count_order(num_threads_sharing);
1002 	id4_regs->id = c->topo.apicid >> index_msb;
1003 }
1004 
populate_cache_leaves(unsigned int cpu)1005 int populate_cache_leaves(unsigned int cpu)
1006 {
1007 	unsigned int idx, ret;
1008 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1009 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1010 	struct _cpuid4_info_regs id4_regs = {};
1011 
1012 	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1013 		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1014 		if (ret)
1015 			return ret;
1016 		get_cache_id(cpu, &id4_regs);
1017 		ci_leaf_init(this_leaf++, &id4_regs);
1018 		__cache_cpumap_setup(cpu, idx, &id4_regs);
1019 	}
1020 	this_cpu_ci->cpu_map_populated = true;
1021 
1022 	return 0;
1023 }
1024 
1025 /*
1026  * Disable and enable caches. Needed for changing MTRRs and the PAT MSR.
1027  *
1028  * Since we are disabling the cache don't allow any interrupts,
1029  * they would run extremely slow and would only increase the pain.
1030  *
1031  * The caller must ensure that local interrupts are disabled and
1032  * are reenabled after cache_enable() has been called.
1033  */
1034 static unsigned long saved_cr4;
1035 static DEFINE_RAW_SPINLOCK(cache_disable_lock);
1036 
cache_disable(void)1037 void cache_disable(void) __acquires(cache_disable_lock)
1038 {
1039 	unsigned long cr0;
1040 
1041 	/*
1042 	 * Note that this is not ideal
1043 	 * since the cache is only flushed/disabled for this CPU while the
1044 	 * MTRRs are changed, but changing this requires more invasive
1045 	 * changes to the way the kernel boots
1046 	 */
1047 
1048 	raw_spin_lock(&cache_disable_lock);
1049 
1050 	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
1051 	cr0 = read_cr0() | X86_CR0_CD;
1052 	write_cr0(cr0);
1053 
1054 	/*
1055 	 * Cache flushing is the most time-consuming step when programming
1056 	 * the MTRRs. Fortunately, as per the Intel Software Development
1057 	 * Manual, we can skip it if the processor supports cache self-
1058 	 * snooping.
1059 	 */
1060 	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1061 		wbinvd();
1062 
1063 	/* Save value of CR4 and clear Page Global Enable (bit 7) */
1064 	if (cpu_feature_enabled(X86_FEATURE_PGE)) {
1065 		saved_cr4 = __read_cr4();
1066 		__write_cr4(saved_cr4 & ~X86_CR4_PGE);
1067 	}
1068 
1069 	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
1070 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1071 	flush_tlb_local();
1072 
1073 	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1074 		mtrr_disable();
1075 
1076 	/* Again, only flush caches if we have to. */
1077 	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1078 		wbinvd();
1079 }
1080 
cache_enable(void)1081 void cache_enable(void) __releases(cache_disable_lock)
1082 {
1083 	/* Flush TLBs (no need to flush caches - they are disabled) */
1084 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1085 	flush_tlb_local();
1086 
1087 	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1088 		mtrr_enable();
1089 
1090 	/* Enable caches */
1091 	write_cr0(read_cr0() & ~X86_CR0_CD);
1092 
1093 	/* Restore value of CR4 */
1094 	if (cpu_feature_enabled(X86_FEATURE_PGE))
1095 		__write_cr4(saved_cr4);
1096 
1097 	raw_spin_unlock(&cache_disable_lock);
1098 }
1099 
cache_cpu_init(void)1100 static void cache_cpu_init(void)
1101 {
1102 	unsigned long flags;
1103 
1104 	local_irq_save(flags);
1105 
1106 	if (memory_caching_control & CACHE_MTRR) {
1107 		cache_disable();
1108 		mtrr_generic_set_state();
1109 		cache_enable();
1110 	}
1111 
1112 	if (memory_caching_control & CACHE_PAT)
1113 		pat_cpu_init();
1114 
1115 	local_irq_restore(flags);
1116 }
1117 
1118 static bool cache_aps_delayed_init = true;
1119 
set_cache_aps_delayed_init(bool val)1120 void set_cache_aps_delayed_init(bool val)
1121 {
1122 	cache_aps_delayed_init = val;
1123 }
1124 
get_cache_aps_delayed_init(void)1125 bool get_cache_aps_delayed_init(void)
1126 {
1127 	return cache_aps_delayed_init;
1128 }
1129 
cache_rendezvous_handler(void * unused)1130 static int cache_rendezvous_handler(void *unused)
1131 {
1132 	if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id()))
1133 		cache_cpu_init();
1134 
1135 	return 0;
1136 }
1137 
cache_bp_init(void)1138 void __init cache_bp_init(void)
1139 {
1140 	mtrr_bp_init();
1141 	pat_bp_init();
1142 
1143 	if (memory_caching_control)
1144 		cache_cpu_init();
1145 }
1146 
cache_bp_restore(void)1147 void cache_bp_restore(void)
1148 {
1149 	if (memory_caching_control)
1150 		cache_cpu_init();
1151 }
1152 
cache_ap_online(unsigned int cpu)1153 static int cache_ap_online(unsigned int cpu)
1154 {
1155 	cpumask_set_cpu(cpu, cpu_cacheinfo_mask);
1156 
1157 	if (!memory_caching_control || get_cache_aps_delayed_init())
1158 		return 0;
1159 
1160 	/*
1161 	 * Ideally we should hold mtrr_mutex here to avoid MTRR entries
1162 	 * changed, but this routine will be called in CPU boot time,
1163 	 * holding the lock breaks it.
1164 	 *
1165 	 * This routine is called in two cases:
1166 	 *
1167 	 *   1. very early time of software resume, when there absolutely
1168 	 *      isn't MTRR entry changes;
1169 	 *
1170 	 *   2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug
1171 	 *      lock to prevent MTRR entry changes
1172 	 */
1173 	stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
1174 				       cpu_cacheinfo_mask);
1175 
1176 	return 0;
1177 }
1178 
cache_ap_offline(unsigned int cpu)1179 static int cache_ap_offline(unsigned int cpu)
1180 {
1181 	cpumask_clear_cpu(cpu, cpu_cacheinfo_mask);
1182 	return 0;
1183 }
1184 
1185 /*
1186  * Delayed cache initialization for all AP's
1187  */
cache_aps_init(void)1188 void cache_aps_init(void)
1189 {
1190 	if (!memory_caching_control || !get_cache_aps_delayed_init())
1191 		return;
1192 
1193 	stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask);
1194 	set_cache_aps_delayed_init(false);
1195 }
1196 
cache_ap_register(void)1197 static int __init cache_ap_register(void)
1198 {
1199 	zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL);
1200 	cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask);
1201 
1202 	cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
1203 				  "x86/cachectrl:starting",
1204 				  cache_ap_online, cache_ap_offline);
1205 	return 0;
1206 }
1207 early_initcall(cache_ap_register);
1208