xref: /linux/arch/x86/kernel/cpu/resctrl/core.c (revision 785cdec46e9227f9433884ed3b436471e944007c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Resource Director Technology(RDT)
4  * - Cache Allocation code.
5  *
6  * Copyright (C) 2016 Intel Corporation
7  *
8  * Authors:
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  *    Tony Luck <tony.luck@intel.com>
11  *    Vikas Shivappa <vikas.shivappa@intel.com>
12  *
13  * More information about RDT be found in the Intel (R) x86 Architecture
14  * Software Developer Manual June 2016, volume 3, section 17.17.
15  */
16 
17 #define pr_fmt(fmt)	"resctrl: " fmt
18 
19 #include <linux/cpu.h>
20 #include <linux/slab.h>
21 #include <linux/err.h>
22 #include <linux/cpuhotplug.h>
23 
24 #include <asm/cpu_device_id.h>
25 #include <asm/msr.h>
26 #include <asm/resctrl.h>
27 #include "internal.h"
28 
29 /*
30  * rdt_domain structures are kfree()d when their last CPU goes offline,
31  * and allocated when the first CPU in a new domain comes online.
32  * The rdt_resource's domain list is updated when this happens. Readers of
33  * the domain list must either take cpus_read_lock(), or rely on an RCU
34  * read-side critical section, to avoid observing concurrent modification.
35  * All writers take this mutex:
36  */
37 static DEFINE_MUTEX(domain_list_lock);
38 
39 /*
40  * The cached resctrl_pqr_state is strictly per CPU and can never be
41  * updated from a remote CPU. Functions which modify the state
42  * are called with interrupts disabled and no preemption, which
43  * is sufficient for the protection.
44  */
45 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
46 
47 /*
48  * Global boolean for rdt_alloc which is true if any
49  * resource allocation is enabled.
50  */
51 bool rdt_alloc_capable;
52 
53 static void mba_wrmsr_intel(struct msr_param *m);
54 static void cat_wrmsr(struct msr_param *m);
55 static void mba_wrmsr_amd(struct msr_param *m);
56 
57 #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
58 #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)
59 
60 struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
61 	[RDT_RESOURCE_L3] =
62 	{
63 		.r_resctrl = {
64 			.rid			= RDT_RESOURCE_L3,
65 			.name			= "L3",
66 			.ctrl_scope		= RESCTRL_L3_CACHE,
67 			.mon_scope		= RESCTRL_L3_CACHE,
68 			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_L3),
69 			.mon_domains		= mon_domain_init(RDT_RESOURCE_L3),
70 			.schema_fmt		= RESCTRL_SCHEMA_BITMAP,
71 		},
72 		.msr_base		= MSR_IA32_L3_CBM_BASE,
73 		.msr_update		= cat_wrmsr,
74 	},
75 	[RDT_RESOURCE_L2] =
76 	{
77 		.r_resctrl = {
78 			.rid			= RDT_RESOURCE_L2,
79 			.name			= "L2",
80 			.ctrl_scope		= RESCTRL_L2_CACHE,
81 			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_L2),
82 			.schema_fmt		= RESCTRL_SCHEMA_BITMAP,
83 		},
84 		.msr_base		= MSR_IA32_L2_CBM_BASE,
85 		.msr_update		= cat_wrmsr,
86 	},
87 	[RDT_RESOURCE_MBA] =
88 	{
89 		.r_resctrl = {
90 			.rid			= RDT_RESOURCE_MBA,
91 			.name			= "MB",
92 			.ctrl_scope		= RESCTRL_L3_CACHE,
93 			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_MBA),
94 			.schema_fmt		= RESCTRL_SCHEMA_RANGE,
95 		},
96 	},
97 	[RDT_RESOURCE_SMBA] =
98 	{
99 		.r_resctrl = {
100 			.rid			= RDT_RESOURCE_SMBA,
101 			.name			= "SMBA",
102 			.ctrl_scope		= RESCTRL_L3_CACHE,
103 			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_SMBA),
104 			.schema_fmt		= RESCTRL_SCHEMA_RANGE,
105 		},
106 	},
107 };
108 
109 u32 resctrl_arch_system_num_rmid_idx(void)
110 {
111 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
112 
113 	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
114 	return r->num_rmid;
115 }
116 
117 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
118 {
119 	if (l >= RDT_NUM_RESOURCES)
120 		return NULL;
121 
122 	return &rdt_resources_all[l].r_resctrl;
123 }
124 
125 /*
126  * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
127  * as they do not have CPUID enumeration support for Cache allocation.
128  * The check for Vendor/Family/Model is not enough to guarantee that
129  * the MSRs won't #GP fault because only the following SKUs support
130  * CAT:
131  *	Intel(R) Xeon(R)  CPU E5-2658  v3  @  2.20GHz
132  *	Intel(R) Xeon(R)  CPU E5-2648L v3  @  1.80GHz
133  *	Intel(R) Xeon(R)  CPU E5-2628L v3  @  2.00GHz
134  *	Intel(R) Xeon(R)  CPU E5-2618L v3  @  2.30GHz
135  *	Intel(R) Xeon(R)  CPU E5-2608L v3  @  2.00GHz
136  *	Intel(R) Xeon(R)  CPU E5-2658A v3  @  2.20GHz
137  *
138  * Probe by trying to write the first of the L3 cache mask registers
139  * and checking that the bits stick. Max CLOSids is always 4 and max cbm length
140  * is always 20 on hsw server parts. The minimum cache bitmask length
141  * allowed for HSW server is always 2 bits. Hardcode all of them.
142  */
143 static inline void cache_alloc_hsw_probe(void)
144 {
145 	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3];
146 	struct rdt_resource *r  = &hw_res->r_resctrl;
147 	u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0;
148 
149 	if (wrmsrq_safe(MSR_IA32_L3_CBM_BASE, max_cbm))
150 		return;
151 
152 	rdmsrq(MSR_IA32_L3_CBM_BASE, l3_cbm_0);
153 
154 	/* If all the bits were set in MSR, return success */
155 	if (l3_cbm_0 != max_cbm)
156 		return;
157 
158 	hw_res->num_closid = 4;
159 	r->cache.cbm_len = 20;
160 	r->cache.shareable_bits = 0xc0000;
161 	r->cache.min_cbm_bits = 2;
162 	r->cache.arch_has_sparse_bitmasks = false;
163 	r->alloc_capable = true;
164 
165 	rdt_alloc_capable = true;
166 }
167 
168 bool is_mba_sc(struct rdt_resource *r)
169 {
170 	if (!r)
171 		r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
172 
173 	/*
174 	 * The software controller support is only applicable to MBA resource.
175 	 * Make sure to check for resource type.
176 	 */
177 	if (r->rid != RDT_RESOURCE_MBA)
178 		return false;
179 
180 	return r->membw.mba_sc;
181 }
182 
183 /*
184  * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
185  * exposed to user interface and the h/w understandable delay values.
186  *
187  * The non-linear delay values have the granularity of power of two
188  * and also the h/w does not guarantee a curve for configured delay
189  * values vs. actual b/w enforced.
190  * Hence we need a mapping that is pre calibrated so the user can
191  * express the memory b/w as a percentage value.
192  */
193 static inline bool rdt_get_mb_table(struct rdt_resource *r)
194 {
195 	/*
196 	 * There are no Intel SKUs as of now to support non-linear delay.
197 	 */
198 	pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
199 		boot_cpu_data.x86, boot_cpu_data.x86_model);
200 
201 	return false;
202 }
203 
204 static __init bool __get_mem_config_intel(struct rdt_resource *r)
205 {
206 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
207 	union cpuid_0x10_3_eax eax;
208 	union cpuid_0x10_x_edx edx;
209 	u32 ebx, ecx, max_delay;
210 
211 	cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
212 	hw_res->num_closid = edx.split.cos_max + 1;
213 	max_delay = eax.split.max_delay + 1;
214 	r->membw.max_bw = MAX_MBA_BW;
215 	r->membw.arch_needs_linear = true;
216 	if (ecx & MBA_IS_LINEAR) {
217 		r->membw.delay_linear = true;
218 		r->membw.min_bw = MAX_MBA_BW - max_delay;
219 		r->membw.bw_gran = MAX_MBA_BW - max_delay;
220 	} else {
221 		if (!rdt_get_mb_table(r))
222 			return false;
223 		r->membw.arch_needs_linear = false;
224 	}
225 
226 	if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
227 		r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
228 	else
229 		r->membw.throttle_mode = THREAD_THROTTLE_MAX;
230 
231 	r->alloc_capable = true;
232 
233 	return true;
234 }
235 
236 static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
237 {
238 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
239 	u32 eax, ebx, ecx, edx, subleaf;
240 
241 	/*
242 	 * Query CPUID_Fn80000020_EDX_x01 for MBA and
243 	 * CPUID_Fn80000020_EDX_x02 for SMBA
244 	 */
245 	subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 :  1;
246 
247 	cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx);
248 	hw_res->num_closid = edx + 1;
249 	r->membw.max_bw = 1 << eax;
250 
251 	/* AMD does not use delay */
252 	r->membw.delay_linear = false;
253 	r->membw.arch_needs_linear = false;
254 
255 	/*
256 	 * AMD does not use memory delay throttle model to control
257 	 * the allocation like Intel does.
258 	 */
259 	r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
260 	r->membw.min_bw = 0;
261 	r->membw.bw_gran = 1;
262 
263 	r->alloc_capable = true;
264 
265 	return true;
266 }
267 
268 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
269 {
270 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
271 	union cpuid_0x10_1_eax eax;
272 	union cpuid_0x10_x_ecx ecx;
273 	union cpuid_0x10_x_edx edx;
274 	u32 ebx, default_ctrl;
275 
276 	cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full);
277 	hw_res->num_closid = edx.split.cos_max + 1;
278 	r->cache.cbm_len = eax.split.cbm_len + 1;
279 	default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
280 	r->cache.shareable_bits = ebx & default_ctrl;
281 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
282 		r->cache.arch_has_sparse_bitmasks = ecx.split.noncont;
283 	r->alloc_capable = true;
284 }
285 
286 static void rdt_get_cdp_config(int level)
287 {
288 	/*
289 	 * By default, CDP is disabled. CDP can be enabled by mount parameter
290 	 * "cdp" during resctrl file system mount time.
291 	 */
292 	rdt_resources_all[level].cdp_enabled = false;
293 	rdt_resources_all[level].r_resctrl.cdp_capable = true;
294 }
295 
296 static void rdt_get_cdp_l3_config(void)
297 {
298 	rdt_get_cdp_config(RDT_RESOURCE_L3);
299 }
300 
301 static void rdt_get_cdp_l2_config(void)
302 {
303 	rdt_get_cdp_config(RDT_RESOURCE_L2);
304 }
305 
306 static void mba_wrmsr_amd(struct msr_param *m)
307 {
308 	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
309 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
310 	unsigned int i;
311 
312 	for (i = m->low; i < m->high; i++)
313 		wrmsrq(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
314 }
315 
316 /*
317  * Map the memory b/w percentage value to delay values
318  * that can be written to QOS_MSRs.
319  * There are currently no SKUs which support non linear delay values.
320  */
321 static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
322 {
323 	if (r->membw.delay_linear)
324 		return MAX_MBA_BW - bw;
325 
326 	pr_warn_once("Non Linear delay-bw map not supported but queried\n");
327 	return MAX_MBA_BW;
328 }
329 
330 static void mba_wrmsr_intel(struct msr_param *m)
331 {
332 	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
333 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
334 	unsigned int i;
335 
336 	/*  Write the delay values for mba. */
337 	for (i = m->low; i < m->high; i++)
338 		wrmsrq(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res));
339 }
340 
341 static void cat_wrmsr(struct msr_param *m)
342 {
343 	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
344 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
345 	unsigned int i;
346 
347 	for (i = m->low; i < m->high; i++)
348 		wrmsrq(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
349 }
350 
351 u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
352 {
353 	return resctrl_to_arch_res(r)->num_closid;
354 }
355 
356 void rdt_ctrl_update(void *arg)
357 {
358 	struct rdt_hw_resource *hw_res;
359 	struct msr_param *m = arg;
360 
361 	hw_res = resctrl_to_arch_res(m->res);
362 	hw_res->msr_update(m);
363 }
364 
365 static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
366 {
367 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
368 	int i;
369 
370 	/*
371 	 * Initialize the Control MSRs to having no control.
372 	 * For Cache Allocation: Set all bits in cbm
373 	 * For Memory Allocation: Set b/w requested to 100%
374 	 */
375 	for (i = 0; i < hw_res->num_closid; i++, dc++)
376 		*dc = resctrl_get_default_ctrl(r);
377 }
378 
379 static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
380 {
381 	kfree(hw_dom->ctrl_val);
382 	kfree(hw_dom);
383 }
384 
385 static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
386 {
387 	kfree(hw_dom->arch_mbm_total);
388 	kfree(hw_dom->arch_mbm_local);
389 	kfree(hw_dom);
390 }
391 
392 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d)
393 {
394 	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
395 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
396 	struct msr_param m;
397 	u32 *dc;
398 
399 	dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val),
400 			   GFP_KERNEL);
401 	if (!dc)
402 		return -ENOMEM;
403 
404 	hw_dom->ctrl_val = dc;
405 	setup_default_ctrlval(r, dc);
406 
407 	m.res = r;
408 	m.dom = d;
409 	m.low = 0;
410 	m.high = hw_res->num_closid;
411 	hw_res->msr_update(&m);
412 	return 0;
413 }
414 
415 /**
416  * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters
417  * @num_rmid:	The size of the MBM counter array
418  * @hw_dom:	The domain that owns the allocated arrays
419  */
420 static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
421 {
422 	size_t tsize;
423 
424 	if (resctrl_arch_is_mbm_total_enabled()) {
425 		tsize = sizeof(*hw_dom->arch_mbm_total);
426 		hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL);
427 		if (!hw_dom->arch_mbm_total)
428 			return -ENOMEM;
429 	}
430 	if (resctrl_arch_is_mbm_local_enabled()) {
431 		tsize = sizeof(*hw_dom->arch_mbm_local);
432 		hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL);
433 		if (!hw_dom->arch_mbm_local) {
434 			kfree(hw_dom->arch_mbm_total);
435 			hw_dom->arch_mbm_total = NULL;
436 			return -ENOMEM;
437 		}
438 	}
439 
440 	return 0;
441 }
442 
443 static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
444 {
445 	switch (scope) {
446 	case RESCTRL_L2_CACHE:
447 	case RESCTRL_L3_CACHE:
448 		return get_cpu_cacheinfo_id(cpu, scope);
449 	case RESCTRL_L3_NODE:
450 		return cpu_to_node(cpu);
451 	default:
452 		break;
453 	}
454 
455 	return -EINVAL;
456 }
457 
458 static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
459 {
460 	int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
461 	struct rdt_hw_ctrl_domain *hw_dom;
462 	struct list_head *add_pos = NULL;
463 	struct rdt_domain_hdr *hdr;
464 	struct rdt_ctrl_domain *d;
465 	int err;
466 
467 	lockdep_assert_held(&domain_list_lock);
468 
469 	if (id < 0) {
470 		pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
471 			     cpu, r->ctrl_scope, r->name);
472 		return;
473 	}
474 
475 	hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos);
476 	if (hdr) {
477 		if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
478 			return;
479 		d = container_of(hdr, struct rdt_ctrl_domain, hdr);
480 
481 		cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
482 		if (r->cache.arch_has_per_cpu_cfg)
483 			rdt_domain_reconfigure_cdp(r);
484 		return;
485 	}
486 
487 	hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
488 	if (!hw_dom)
489 		return;
490 
491 	d = &hw_dom->d_resctrl;
492 	d->hdr.id = id;
493 	d->hdr.type = RESCTRL_CTRL_DOMAIN;
494 	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
495 
496 	rdt_domain_reconfigure_cdp(r);
497 
498 	if (domain_setup_ctrlval(r, d)) {
499 		ctrl_domain_free(hw_dom);
500 		return;
501 	}
502 
503 	list_add_tail_rcu(&d->hdr.list, add_pos);
504 
505 	err = resctrl_online_ctrl_domain(r, d);
506 	if (err) {
507 		list_del_rcu(&d->hdr.list);
508 		synchronize_rcu();
509 		ctrl_domain_free(hw_dom);
510 	}
511 }
512 
513 static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
514 {
515 	int id = get_domain_id_from_scope(cpu, r->mon_scope);
516 	struct list_head *add_pos = NULL;
517 	struct rdt_hw_mon_domain *hw_dom;
518 	struct rdt_domain_hdr *hdr;
519 	struct rdt_mon_domain *d;
520 	int err;
521 
522 	lockdep_assert_held(&domain_list_lock);
523 
524 	if (id < 0) {
525 		pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
526 			     cpu, r->mon_scope, r->name);
527 		return;
528 	}
529 
530 	hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos);
531 	if (hdr) {
532 		if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
533 			return;
534 		d = container_of(hdr, struct rdt_mon_domain, hdr);
535 
536 		cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
537 		return;
538 	}
539 
540 	hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
541 	if (!hw_dom)
542 		return;
543 
544 	d = &hw_dom->d_resctrl;
545 	d->hdr.id = id;
546 	d->hdr.type = RESCTRL_MON_DOMAIN;
547 	d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
548 	if (!d->ci) {
549 		pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
550 		mon_domain_free(hw_dom);
551 		return;
552 	}
553 	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
554 
555 	arch_mon_domain_online(r, d);
556 
557 	if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
558 		mon_domain_free(hw_dom);
559 		return;
560 	}
561 
562 	list_add_tail_rcu(&d->hdr.list, add_pos);
563 
564 	err = resctrl_online_mon_domain(r, d);
565 	if (err) {
566 		list_del_rcu(&d->hdr.list);
567 		synchronize_rcu();
568 		mon_domain_free(hw_dom);
569 	}
570 }
571 
572 static void domain_add_cpu(int cpu, struct rdt_resource *r)
573 {
574 	if (r->alloc_capable)
575 		domain_add_cpu_ctrl(cpu, r);
576 	if (r->mon_capable)
577 		domain_add_cpu_mon(cpu, r);
578 }
579 
580 static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
581 {
582 	int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
583 	struct rdt_hw_ctrl_domain *hw_dom;
584 	struct rdt_domain_hdr *hdr;
585 	struct rdt_ctrl_domain *d;
586 
587 	lockdep_assert_held(&domain_list_lock);
588 
589 	if (id < 0) {
590 		pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
591 			     cpu, r->ctrl_scope, r->name);
592 		return;
593 	}
594 
595 	hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL);
596 	if (!hdr) {
597 		pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
598 			id, cpu, r->name);
599 		return;
600 	}
601 
602 	if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
603 		return;
604 
605 	d = container_of(hdr, struct rdt_ctrl_domain, hdr);
606 	hw_dom = resctrl_to_arch_ctrl_dom(d);
607 
608 	cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
609 	if (cpumask_empty(&d->hdr.cpu_mask)) {
610 		resctrl_offline_ctrl_domain(r, d);
611 		list_del_rcu(&d->hdr.list);
612 		synchronize_rcu();
613 
614 		/*
615 		 * rdt_ctrl_domain "d" is going to be freed below, so clear
616 		 * its pointer from pseudo_lock_region struct.
617 		 */
618 		if (d->plr)
619 			d->plr->d = NULL;
620 		ctrl_domain_free(hw_dom);
621 
622 		return;
623 	}
624 }
625 
626 static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
627 {
628 	int id = get_domain_id_from_scope(cpu, r->mon_scope);
629 	struct rdt_hw_mon_domain *hw_dom;
630 	struct rdt_domain_hdr *hdr;
631 	struct rdt_mon_domain *d;
632 
633 	lockdep_assert_held(&domain_list_lock);
634 
635 	if (id < 0) {
636 		pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
637 			     cpu, r->mon_scope, r->name);
638 		return;
639 	}
640 
641 	hdr = resctrl_find_domain(&r->mon_domains, id, NULL);
642 	if (!hdr) {
643 		pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
644 			id, cpu, r->name);
645 		return;
646 	}
647 
648 	if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
649 		return;
650 
651 	d = container_of(hdr, struct rdt_mon_domain, hdr);
652 	hw_dom = resctrl_to_arch_mon_dom(d);
653 
654 	cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
655 	if (cpumask_empty(&d->hdr.cpu_mask)) {
656 		resctrl_offline_mon_domain(r, d);
657 		list_del_rcu(&d->hdr.list);
658 		synchronize_rcu();
659 		mon_domain_free(hw_dom);
660 
661 		return;
662 	}
663 }
664 
665 static void domain_remove_cpu(int cpu, struct rdt_resource *r)
666 {
667 	if (r->alloc_capable)
668 		domain_remove_cpu_ctrl(cpu, r);
669 	if (r->mon_capable)
670 		domain_remove_cpu_mon(cpu, r);
671 }
672 
673 static void clear_closid_rmid(int cpu)
674 {
675 	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
676 
677 	state->default_closid = RESCTRL_RESERVED_CLOSID;
678 	state->default_rmid = RESCTRL_RESERVED_RMID;
679 	state->cur_closid = RESCTRL_RESERVED_CLOSID;
680 	state->cur_rmid = RESCTRL_RESERVED_RMID;
681 	wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID,
682 	      RESCTRL_RESERVED_CLOSID);
683 }
684 
685 static int resctrl_arch_online_cpu(unsigned int cpu)
686 {
687 	struct rdt_resource *r;
688 
689 	mutex_lock(&domain_list_lock);
690 	for_each_capable_rdt_resource(r)
691 		domain_add_cpu(cpu, r);
692 	mutex_unlock(&domain_list_lock);
693 
694 	clear_closid_rmid(cpu);
695 	resctrl_online_cpu(cpu);
696 
697 	return 0;
698 }
699 
700 static int resctrl_arch_offline_cpu(unsigned int cpu)
701 {
702 	struct rdt_resource *r;
703 
704 	resctrl_offline_cpu(cpu);
705 
706 	mutex_lock(&domain_list_lock);
707 	for_each_capable_rdt_resource(r)
708 		domain_remove_cpu(cpu, r);
709 	mutex_unlock(&domain_list_lock);
710 
711 	clear_closid_rmid(cpu);
712 
713 	return 0;
714 }
715 
716 enum {
717 	RDT_FLAG_CMT,
718 	RDT_FLAG_MBM_TOTAL,
719 	RDT_FLAG_MBM_LOCAL,
720 	RDT_FLAG_L3_CAT,
721 	RDT_FLAG_L3_CDP,
722 	RDT_FLAG_L2_CAT,
723 	RDT_FLAG_L2_CDP,
724 	RDT_FLAG_MBA,
725 	RDT_FLAG_SMBA,
726 	RDT_FLAG_BMEC,
727 };
728 
729 #define RDT_OPT(idx, n, f)	\
730 [idx] = {			\
731 	.name = n,		\
732 	.flag = f		\
733 }
734 
735 struct rdt_options {
736 	char	*name;
737 	int	flag;
738 	bool	force_off, force_on;
739 };
740 
741 static struct rdt_options rdt_options[]  __initdata = {
742 	RDT_OPT(RDT_FLAG_CMT,	    "cmt",	X86_FEATURE_CQM_OCCUP_LLC),
743 	RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL),
744 	RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL),
745 	RDT_OPT(RDT_FLAG_L3_CAT,    "l3cat",	X86_FEATURE_CAT_L3),
746 	RDT_OPT(RDT_FLAG_L3_CDP,    "l3cdp",	X86_FEATURE_CDP_L3),
747 	RDT_OPT(RDT_FLAG_L2_CAT,    "l2cat",	X86_FEATURE_CAT_L2),
748 	RDT_OPT(RDT_FLAG_L2_CDP,    "l2cdp",	X86_FEATURE_CDP_L2),
749 	RDT_OPT(RDT_FLAG_MBA,	    "mba",	X86_FEATURE_MBA),
750 	RDT_OPT(RDT_FLAG_SMBA,	    "smba",	X86_FEATURE_SMBA),
751 	RDT_OPT(RDT_FLAG_BMEC,	    "bmec",	X86_FEATURE_BMEC),
752 };
753 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
754 
755 static int __init set_rdt_options(char *str)
756 {
757 	struct rdt_options *o;
758 	bool force_off;
759 	char *tok;
760 
761 	if (*str == '=')
762 		str++;
763 	while ((tok = strsep(&str, ",")) != NULL) {
764 		force_off = *tok == '!';
765 		if (force_off)
766 			tok++;
767 		for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
768 			if (strcmp(tok, o->name) == 0) {
769 				if (force_off)
770 					o->force_off = true;
771 				else
772 					o->force_on = true;
773 				break;
774 			}
775 		}
776 	}
777 	return 1;
778 }
779 __setup("rdt", set_rdt_options);
780 
781 bool __init rdt_cpu_has(int flag)
782 {
783 	bool ret = boot_cpu_has(flag);
784 	struct rdt_options *o;
785 
786 	if (!ret)
787 		return ret;
788 
789 	for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
790 		if (flag == o->flag) {
791 			if (o->force_off)
792 				ret = false;
793 			if (o->force_on)
794 				ret = true;
795 			break;
796 		}
797 	}
798 	return ret;
799 }
800 
801 __init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
802 {
803 	if (!rdt_cpu_has(X86_FEATURE_BMEC))
804 		return false;
805 
806 	switch (evt) {
807 	case QOS_L3_MBM_TOTAL_EVENT_ID:
808 		return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL);
809 	case QOS_L3_MBM_LOCAL_EVENT_ID:
810 		return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL);
811 	default:
812 		return false;
813 	}
814 }
815 
816 static __init bool get_mem_config(void)
817 {
818 	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
819 
820 	if (!rdt_cpu_has(X86_FEATURE_MBA))
821 		return false;
822 
823 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
824 		return __get_mem_config_intel(&hw_res->r_resctrl);
825 	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
826 		return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
827 
828 	return false;
829 }
830 
831 static __init bool get_slow_mem_config(void)
832 {
833 	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA];
834 
835 	if (!rdt_cpu_has(X86_FEATURE_SMBA))
836 		return false;
837 
838 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
839 		return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
840 
841 	return false;
842 }
843 
844 static __init bool get_rdt_alloc_resources(void)
845 {
846 	struct rdt_resource *r;
847 	bool ret = false;
848 
849 	if (rdt_alloc_capable)
850 		return true;
851 
852 	if (!boot_cpu_has(X86_FEATURE_RDT_A))
853 		return false;
854 
855 	if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
856 		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
857 		rdt_get_cache_alloc_cfg(1, r);
858 		if (rdt_cpu_has(X86_FEATURE_CDP_L3))
859 			rdt_get_cdp_l3_config();
860 		ret = true;
861 	}
862 	if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
863 		/* CPUID 0x10.2 fields are same format at 0x10.1 */
864 		r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl;
865 		rdt_get_cache_alloc_cfg(2, r);
866 		if (rdt_cpu_has(X86_FEATURE_CDP_L2))
867 			rdt_get_cdp_l2_config();
868 		ret = true;
869 	}
870 
871 	if (get_mem_config())
872 		ret = true;
873 
874 	if (get_slow_mem_config())
875 		ret = true;
876 
877 	return ret;
878 }
879 
880 static __init bool get_rdt_mon_resources(void)
881 {
882 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
883 
884 	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
885 		rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
886 	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
887 		rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
888 	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
889 		rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
890 
891 	if (!rdt_mon_features)
892 		return false;
893 
894 	return !rdt_get_mon_l3_config(r);
895 }
896 
897 static __init void __check_quirks_intel(void)
898 {
899 	switch (boot_cpu_data.x86_vfm) {
900 	case INTEL_HASWELL_X:
901 		if (!rdt_options[RDT_FLAG_L3_CAT].force_off)
902 			cache_alloc_hsw_probe();
903 		break;
904 	case INTEL_SKYLAKE_X:
905 		if (boot_cpu_data.x86_stepping <= 4)
906 			set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
907 		else
908 			set_rdt_options("!l3cat");
909 		fallthrough;
910 	case INTEL_BROADWELL_X:
911 		intel_rdt_mbm_apply_quirk();
912 		break;
913 	}
914 }
915 
916 static __init void check_quirks(void)
917 {
918 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
919 		__check_quirks_intel();
920 }
921 
922 static __init bool get_rdt_resources(void)
923 {
924 	rdt_alloc_capable = get_rdt_alloc_resources();
925 	rdt_mon_capable = get_rdt_mon_resources();
926 
927 	return (rdt_mon_capable || rdt_alloc_capable);
928 }
929 
930 static __init void rdt_init_res_defs_intel(void)
931 {
932 	struct rdt_hw_resource *hw_res;
933 	struct rdt_resource *r;
934 
935 	for_each_rdt_resource(r) {
936 		hw_res = resctrl_to_arch_res(r);
937 
938 		if (r->rid == RDT_RESOURCE_L3 ||
939 		    r->rid == RDT_RESOURCE_L2) {
940 			r->cache.arch_has_per_cpu_cfg = false;
941 			r->cache.min_cbm_bits = 1;
942 		} else if (r->rid == RDT_RESOURCE_MBA) {
943 			hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
944 			hw_res->msr_update = mba_wrmsr_intel;
945 		}
946 	}
947 }
948 
949 static __init void rdt_init_res_defs_amd(void)
950 {
951 	struct rdt_hw_resource *hw_res;
952 	struct rdt_resource *r;
953 
954 	for_each_rdt_resource(r) {
955 		hw_res = resctrl_to_arch_res(r);
956 
957 		if (r->rid == RDT_RESOURCE_L3 ||
958 		    r->rid == RDT_RESOURCE_L2) {
959 			r->cache.arch_has_sparse_bitmasks = true;
960 			r->cache.arch_has_per_cpu_cfg = true;
961 			r->cache.min_cbm_bits = 0;
962 		} else if (r->rid == RDT_RESOURCE_MBA) {
963 			hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
964 			hw_res->msr_update = mba_wrmsr_amd;
965 		} else if (r->rid == RDT_RESOURCE_SMBA) {
966 			hw_res->msr_base = MSR_IA32_SMBA_BW_BASE;
967 			hw_res->msr_update = mba_wrmsr_amd;
968 		}
969 	}
970 }
971 
972 static __init void rdt_init_res_defs(void)
973 {
974 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
975 		rdt_init_res_defs_intel();
976 	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
977 		rdt_init_res_defs_amd();
978 }
979 
980 static enum cpuhp_state rdt_online;
981 
982 /* Runs once on the BSP during boot. */
983 void resctrl_cpu_detect(struct cpuinfo_x86 *c)
984 {
985 	if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
986 		c->x86_cache_max_rmid  = -1;
987 		c->x86_cache_occ_scale = -1;
988 		c->x86_cache_mbm_width_offset = -1;
989 		return;
990 	}
991 
992 	/* will be overridden if occupancy monitoring exists */
993 	c->x86_cache_max_rmid = cpuid_ebx(0xf);
994 
995 	if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
996 	    cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
997 	    cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
998 		u32 eax, ebx, ecx, edx;
999 
1000 		/* QoS sub-leaf, EAX=0Fh, ECX=1 */
1001 		cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
1002 
1003 		c->x86_cache_max_rmid  = ecx;
1004 		c->x86_cache_occ_scale = ebx;
1005 		c->x86_cache_mbm_width_offset = eax & 0xff;
1006 
1007 		if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset)
1008 			c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD;
1009 	}
1010 }
1011 
1012 static int __init resctrl_arch_late_init(void)
1013 {
1014 	struct rdt_resource *r;
1015 	int state, ret;
1016 
1017 	/*
1018 	 * Initialize functions(or definitions) that are different
1019 	 * between vendors here.
1020 	 */
1021 	rdt_init_res_defs();
1022 
1023 	check_quirks();
1024 
1025 	if (!get_rdt_resources())
1026 		return -ENODEV;
1027 
1028 	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
1029 				  "x86/resctrl/cat:online:",
1030 				  resctrl_arch_online_cpu,
1031 				  resctrl_arch_offline_cpu);
1032 	if (state < 0)
1033 		return state;
1034 
1035 	ret = resctrl_init();
1036 	if (ret) {
1037 		cpuhp_remove_state(state);
1038 		return ret;
1039 	}
1040 	rdt_online = state;
1041 
1042 	for_each_alloc_capable_rdt_resource(r)
1043 		pr_info("%s allocation detected\n", r->name);
1044 
1045 	for_each_mon_capable_rdt_resource(r)
1046 		pr_info("%s monitoring detected\n", r->name);
1047 
1048 	return 0;
1049 }
1050 
1051 late_initcall(resctrl_arch_late_init);
1052 
1053 static void __exit resctrl_arch_exit(void)
1054 {
1055 	cpuhp_remove_state(rdt_online);
1056 
1057 	resctrl_exit();
1058 }
1059 
1060 __exitcall(resctrl_arch_exit);
1061