1*7168ae33SJames Morse // SPDX-License-Identifier: GPL-2.0
2*7168ae33SJames Morse /*
3*7168ae33SJames Morse * Resource Director Technology (RDT)
4*7168ae33SJames Morse *
5*7168ae33SJames Morse * Pseudo-locking support built on top of Cache Allocation Technology (CAT)
6*7168ae33SJames Morse *
7*7168ae33SJames Morse * Copyright (C) 2018 Intel Corporation
8*7168ae33SJames Morse *
9*7168ae33SJames Morse * Author: Reinette Chatre <reinette.chatre@intel.com>
10*7168ae33SJames Morse */
11*7168ae33SJames Morse
12*7168ae33SJames Morse #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13*7168ae33SJames Morse
14*7168ae33SJames Morse #include <linux/cacheinfo.h>
15*7168ae33SJames Morse #include <linux/cpu.h>
16*7168ae33SJames Morse #include <linux/cpumask.h>
17*7168ae33SJames Morse #include <linux/debugfs.h>
18*7168ae33SJames Morse #include <linux/kthread.h>
19*7168ae33SJames Morse #include <linux/mman.h>
20*7168ae33SJames Morse #include <linux/pm_qos.h>
21*7168ae33SJames Morse #include <linux/resctrl.h>
22*7168ae33SJames Morse #include <linux/slab.h>
23*7168ae33SJames Morse #include <linux/uaccess.h>
24*7168ae33SJames Morse
25*7168ae33SJames Morse #include "internal.h"
26*7168ae33SJames Morse
27*7168ae33SJames Morse /*
28*7168ae33SJames Morse * Major number assigned to and shared by all devices exposing
29*7168ae33SJames Morse * pseudo-locked regions.
30*7168ae33SJames Morse */
31*7168ae33SJames Morse static unsigned int pseudo_lock_major;
32*7168ae33SJames Morse
33*7168ae33SJames Morse static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
34*7168ae33SJames Morse
pseudo_lock_devnode(const struct device * dev,umode_t * mode)35*7168ae33SJames Morse static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode)
36*7168ae33SJames Morse {
37*7168ae33SJames Morse const struct rdtgroup *rdtgrp;
38*7168ae33SJames Morse
39*7168ae33SJames Morse rdtgrp = dev_get_drvdata(dev);
40*7168ae33SJames Morse if (mode)
41*7168ae33SJames Morse *mode = 0600;
42*7168ae33SJames Morse guard(mutex)(&rdtgroup_mutex);
43*7168ae33SJames Morse return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdt_kn_name(rdtgrp->kn));
44*7168ae33SJames Morse }
45*7168ae33SJames Morse
46*7168ae33SJames Morse static const struct class pseudo_lock_class = {
47*7168ae33SJames Morse .name = "pseudo_lock",
48*7168ae33SJames Morse .devnode = pseudo_lock_devnode,
49*7168ae33SJames Morse };
50*7168ae33SJames Morse
51*7168ae33SJames Morse /**
52*7168ae33SJames Morse * pseudo_lock_minor_get - Obtain available minor number
53*7168ae33SJames Morse * @minor: Pointer to where new minor number will be stored
54*7168ae33SJames Morse *
55*7168ae33SJames Morse * A bitmask is used to track available minor numbers. Here the next free
56*7168ae33SJames Morse * minor number is marked as unavailable and returned.
57*7168ae33SJames Morse *
58*7168ae33SJames Morse * Return: 0 on success, <0 on failure.
59*7168ae33SJames Morse */
pseudo_lock_minor_get(unsigned int * minor)60*7168ae33SJames Morse static int pseudo_lock_minor_get(unsigned int *minor)
61*7168ae33SJames Morse {
62*7168ae33SJames Morse unsigned long first_bit;
63*7168ae33SJames Morse
64*7168ae33SJames Morse first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
65*7168ae33SJames Morse
66*7168ae33SJames Morse if (first_bit == MINORBITS)
67*7168ae33SJames Morse return -ENOSPC;
68*7168ae33SJames Morse
69*7168ae33SJames Morse __clear_bit(first_bit, &pseudo_lock_minor_avail);
70*7168ae33SJames Morse *minor = first_bit;
71*7168ae33SJames Morse
72*7168ae33SJames Morse return 0;
73*7168ae33SJames Morse }
74*7168ae33SJames Morse
75*7168ae33SJames Morse /**
76*7168ae33SJames Morse * pseudo_lock_minor_release - Return minor number to available
77*7168ae33SJames Morse * @minor: The minor number made available
78*7168ae33SJames Morse */
pseudo_lock_minor_release(unsigned int minor)79*7168ae33SJames Morse static void pseudo_lock_minor_release(unsigned int minor)
80*7168ae33SJames Morse {
81*7168ae33SJames Morse __set_bit(minor, &pseudo_lock_minor_avail);
82*7168ae33SJames Morse }
83*7168ae33SJames Morse
84*7168ae33SJames Morse /**
85*7168ae33SJames Morse * region_find_by_minor - Locate a pseudo-lock region by inode minor number
86*7168ae33SJames Morse * @minor: The minor number of the device representing pseudo-locked region
87*7168ae33SJames Morse *
88*7168ae33SJames Morse * When the character device is accessed we need to determine which
89*7168ae33SJames Morse * pseudo-locked region it belongs to. This is done by matching the minor
90*7168ae33SJames Morse * number of the device to the pseudo-locked region it belongs.
91*7168ae33SJames Morse *
92*7168ae33SJames Morse * Minor numbers are assigned at the time a pseudo-locked region is associated
93*7168ae33SJames Morse * with a cache instance.
94*7168ae33SJames Morse *
95*7168ae33SJames Morse * Return: On success return pointer to resource group owning the pseudo-locked
96*7168ae33SJames Morse * region, NULL on failure.
97*7168ae33SJames Morse */
region_find_by_minor(unsigned int minor)98*7168ae33SJames Morse static struct rdtgroup *region_find_by_minor(unsigned int minor)
99*7168ae33SJames Morse {
100*7168ae33SJames Morse struct rdtgroup *rdtgrp, *rdtgrp_match = NULL;
101*7168ae33SJames Morse
102*7168ae33SJames Morse list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
103*7168ae33SJames Morse if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
104*7168ae33SJames Morse rdtgrp_match = rdtgrp;
105*7168ae33SJames Morse break;
106*7168ae33SJames Morse }
107*7168ae33SJames Morse }
108*7168ae33SJames Morse return rdtgrp_match;
109*7168ae33SJames Morse }
110*7168ae33SJames Morse
111*7168ae33SJames Morse /**
112*7168ae33SJames Morse * struct pseudo_lock_pm_req - A power management QoS request list entry
113*7168ae33SJames Morse * @list: Entry within the @pm_reqs list for a pseudo-locked region
114*7168ae33SJames Morse * @req: PM QoS request
115*7168ae33SJames Morse */
116*7168ae33SJames Morse struct pseudo_lock_pm_req {
117*7168ae33SJames Morse struct list_head list;
118*7168ae33SJames Morse struct dev_pm_qos_request req;
119*7168ae33SJames Morse };
120*7168ae33SJames Morse
pseudo_lock_cstates_relax(struct pseudo_lock_region * plr)121*7168ae33SJames Morse static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
122*7168ae33SJames Morse {
123*7168ae33SJames Morse struct pseudo_lock_pm_req *pm_req, *next;
124*7168ae33SJames Morse
125*7168ae33SJames Morse list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
126*7168ae33SJames Morse dev_pm_qos_remove_request(&pm_req->req);
127*7168ae33SJames Morse list_del(&pm_req->list);
128*7168ae33SJames Morse kfree(pm_req);
129*7168ae33SJames Morse }
130*7168ae33SJames Morse }
131*7168ae33SJames Morse
132*7168ae33SJames Morse /**
133*7168ae33SJames Morse * pseudo_lock_cstates_constrain - Restrict cores from entering C6
134*7168ae33SJames Morse * @plr: Pseudo-locked region
135*7168ae33SJames Morse *
136*7168ae33SJames Morse * To prevent the cache from being affected by power management entering
137*7168ae33SJames Morse * C6 has to be avoided. This is accomplished by requesting a latency
138*7168ae33SJames Morse * requirement lower than lowest C6 exit latency of all supported
139*7168ae33SJames Morse * platforms as found in the cpuidle state tables in the intel_idle driver.
140*7168ae33SJames Morse * At this time it is possible to do so with a single latency requirement
141*7168ae33SJames Morse * for all supported platforms.
142*7168ae33SJames Morse *
143*7168ae33SJames Morse * Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
144*7168ae33SJames Morse * the ACPI latencies need to be considered while keeping in mind that C2
145*7168ae33SJames Morse * may be set to map to deeper sleep states. In this case the latency
146*7168ae33SJames Morse * requirement needs to prevent entering C2 also.
147*7168ae33SJames Morse *
148*7168ae33SJames Morse * Return: 0 on success, <0 on failure
149*7168ae33SJames Morse */
pseudo_lock_cstates_constrain(struct pseudo_lock_region * plr)150*7168ae33SJames Morse static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
151*7168ae33SJames Morse {
152*7168ae33SJames Morse struct pseudo_lock_pm_req *pm_req;
153*7168ae33SJames Morse int cpu;
154*7168ae33SJames Morse int ret;
155*7168ae33SJames Morse
156*7168ae33SJames Morse for_each_cpu(cpu, &plr->d->hdr.cpu_mask) {
157*7168ae33SJames Morse pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
158*7168ae33SJames Morse if (!pm_req) {
159*7168ae33SJames Morse rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
160*7168ae33SJames Morse ret = -ENOMEM;
161*7168ae33SJames Morse goto out_err;
162*7168ae33SJames Morse }
163*7168ae33SJames Morse ret = dev_pm_qos_add_request(get_cpu_device(cpu),
164*7168ae33SJames Morse &pm_req->req,
165*7168ae33SJames Morse DEV_PM_QOS_RESUME_LATENCY,
166*7168ae33SJames Morse 30);
167*7168ae33SJames Morse if (ret < 0) {
168*7168ae33SJames Morse rdt_last_cmd_printf("Failed to add latency req CPU%d\n",
169*7168ae33SJames Morse cpu);
170*7168ae33SJames Morse kfree(pm_req);
171*7168ae33SJames Morse ret = -1;
172*7168ae33SJames Morse goto out_err;
173*7168ae33SJames Morse }
174*7168ae33SJames Morse list_add(&pm_req->list, &plr->pm_reqs);
175*7168ae33SJames Morse }
176*7168ae33SJames Morse
177*7168ae33SJames Morse return 0;
178*7168ae33SJames Morse
179*7168ae33SJames Morse out_err:
180*7168ae33SJames Morse pseudo_lock_cstates_relax(plr);
181*7168ae33SJames Morse return ret;
182*7168ae33SJames Morse }
183*7168ae33SJames Morse
184*7168ae33SJames Morse /**
185*7168ae33SJames Morse * pseudo_lock_region_clear - Reset pseudo-lock region data
186*7168ae33SJames Morse * @plr: pseudo-lock region
187*7168ae33SJames Morse *
188*7168ae33SJames Morse * All content of the pseudo-locked region is reset - any memory allocated
189*7168ae33SJames Morse * freed.
190*7168ae33SJames Morse *
191*7168ae33SJames Morse * Return: void
192*7168ae33SJames Morse */
pseudo_lock_region_clear(struct pseudo_lock_region * plr)193*7168ae33SJames Morse static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
194*7168ae33SJames Morse {
195*7168ae33SJames Morse plr->size = 0;
196*7168ae33SJames Morse plr->line_size = 0;
197*7168ae33SJames Morse kfree(plr->kmem);
198*7168ae33SJames Morse plr->kmem = NULL;
199*7168ae33SJames Morse plr->s = NULL;
200*7168ae33SJames Morse if (plr->d)
201*7168ae33SJames Morse plr->d->plr = NULL;
202*7168ae33SJames Morse plr->d = NULL;
203*7168ae33SJames Morse plr->cbm = 0;
204*7168ae33SJames Morse plr->debugfs_dir = NULL;
205*7168ae33SJames Morse }
206*7168ae33SJames Morse
207*7168ae33SJames Morse /**
208*7168ae33SJames Morse * pseudo_lock_region_init - Initialize pseudo-lock region information
209*7168ae33SJames Morse * @plr: pseudo-lock region
210*7168ae33SJames Morse *
211*7168ae33SJames Morse * Called after user provided a schemata to be pseudo-locked. From the
212*7168ae33SJames Morse * schemata the &struct pseudo_lock_region is on entry already initialized
213*7168ae33SJames Morse * with the resource, domain, and capacity bitmask. Here the information
214*7168ae33SJames Morse * required for pseudo-locking is deduced from this data and &struct
215*7168ae33SJames Morse * pseudo_lock_region initialized further. This information includes:
216*7168ae33SJames Morse * - size in bytes of the region to be pseudo-locked
217*7168ae33SJames Morse * - cache line size to know the stride with which data needs to be accessed
218*7168ae33SJames Morse * to be pseudo-locked
219*7168ae33SJames Morse * - a cpu associated with the cache instance on which the pseudo-locking
220*7168ae33SJames Morse * flow can be executed
221*7168ae33SJames Morse *
222*7168ae33SJames Morse * Return: 0 on success, <0 on failure. Descriptive error will be written
223*7168ae33SJames Morse * to last_cmd_status buffer.
224*7168ae33SJames Morse */
pseudo_lock_region_init(struct pseudo_lock_region * plr)225*7168ae33SJames Morse static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
226*7168ae33SJames Morse {
227*7168ae33SJames Morse enum resctrl_scope scope = plr->s->res->ctrl_scope;
228*7168ae33SJames Morse struct cacheinfo *ci;
229*7168ae33SJames Morse int ret;
230*7168ae33SJames Morse
231*7168ae33SJames Morse if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE))
232*7168ae33SJames Morse return -ENODEV;
233*7168ae33SJames Morse
234*7168ae33SJames Morse /* Pick the first cpu we find that is associated with the cache. */
235*7168ae33SJames Morse plr->cpu = cpumask_first(&plr->d->hdr.cpu_mask);
236*7168ae33SJames Morse
237*7168ae33SJames Morse if (!cpu_online(plr->cpu)) {
238*7168ae33SJames Morse rdt_last_cmd_printf("CPU %u associated with cache not online\n",
239*7168ae33SJames Morse plr->cpu);
240*7168ae33SJames Morse ret = -ENODEV;
241*7168ae33SJames Morse goto out_region;
242*7168ae33SJames Morse }
243*7168ae33SJames Morse
244*7168ae33SJames Morse ci = get_cpu_cacheinfo_level(plr->cpu, scope);
245*7168ae33SJames Morse if (ci) {
246*7168ae33SJames Morse plr->line_size = ci->coherency_line_size;
247*7168ae33SJames Morse plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
248*7168ae33SJames Morse return 0;
249*7168ae33SJames Morse }
250*7168ae33SJames Morse
251*7168ae33SJames Morse ret = -1;
252*7168ae33SJames Morse rdt_last_cmd_puts("Unable to determine cache line size\n");
253*7168ae33SJames Morse out_region:
254*7168ae33SJames Morse pseudo_lock_region_clear(plr);
255*7168ae33SJames Morse return ret;
256*7168ae33SJames Morse }
257*7168ae33SJames Morse
258*7168ae33SJames Morse /**
259*7168ae33SJames Morse * pseudo_lock_init - Initialize a pseudo-lock region
260*7168ae33SJames Morse * @rdtgrp: resource group to which new pseudo-locked region will belong
261*7168ae33SJames Morse *
262*7168ae33SJames Morse * A pseudo-locked region is associated with a resource group. When this
263*7168ae33SJames Morse * association is created the pseudo-locked region is initialized. The
264*7168ae33SJames Morse * details of the pseudo-locked region are not known at this time so only
265*7168ae33SJames Morse * allocation is done and association established.
266*7168ae33SJames Morse *
267*7168ae33SJames Morse * Return: 0 on success, <0 on failure
268*7168ae33SJames Morse */
pseudo_lock_init(struct rdtgroup * rdtgrp)269*7168ae33SJames Morse static int pseudo_lock_init(struct rdtgroup *rdtgrp)
270*7168ae33SJames Morse {
271*7168ae33SJames Morse struct pseudo_lock_region *plr;
272*7168ae33SJames Morse
273*7168ae33SJames Morse plr = kzalloc(sizeof(*plr), GFP_KERNEL);
274*7168ae33SJames Morse if (!plr)
275*7168ae33SJames Morse return -ENOMEM;
276*7168ae33SJames Morse
277*7168ae33SJames Morse init_waitqueue_head(&plr->lock_thread_wq);
278*7168ae33SJames Morse INIT_LIST_HEAD(&plr->pm_reqs);
279*7168ae33SJames Morse rdtgrp->plr = plr;
280*7168ae33SJames Morse return 0;
281*7168ae33SJames Morse }
282*7168ae33SJames Morse
283*7168ae33SJames Morse /**
284*7168ae33SJames Morse * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked
285*7168ae33SJames Morse * @plr: pseudo-lock region
286*7168ae33SJames Morse *
287*7168ae33SJames Morse * Initialize the details required to set up the pseudo-locked region and
288*7168ae33SJames Morse * allocate the contiguous memory that will be pseudo-locked to the cache.
289*7168ae33SJames Morse *
290*7168ae33SJames Morse * Return: 0 on success, <0 on failure. Descriptive error will be written
291*7168ae33SJames Morse * to last_cmd_status buffer.
292*7168ae33SJames Morse */
pseudo_lock_region_alloc(struct pseudo_lock_region * plr)293*7168ae33SJames Morse static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
294*7168ae33SJames Morse {
295*7168ae33SJames Morse int ret;
296*7168ae33SJames Morse
297*7168ae33SJames Morse ret = pseudo_lock_region_init(plr);
298*7168ae33SJames Morse if (ret < 0)
299*7168ae33SJames Morse return ret;
300*7168ae33SJames Morse
301*7168ae33SJames Morse /*
302*7168ae33SJames Morse * We do not yet support contiguous regions larger than
303*7168ae33SJames Morse * KMALLOC_MAX_SIZE.
304*7168ae33SJames Morse */
305*7168ae33SJames Morse if (plr->size > KMALLOC_MAX_SIZE) {
306*7168ae33SJames Morse rdt_last_cmd_puts("Requested region exceeds maximum size\n");
307*7168ae33SJames Morse ret = -E2BIG;
308*7168ae33SJames Morse goto out_region;
309*7168ae33SJames Morse }
310*7168ae33SJames Morse
311*7168ae33SJames Morse plr->kmem = kzalloc(plr->size, GFP_KERNEL);
312*7168ae33SJames Morse if (!plr->kmem) {
313*7168ae33SJames Morse rdt_last_cmd_puts("Unable to allocate memory\n");
314*7168ae33SJames Morse ret = -ENOMEM;
315*7168ae33SJames Morse goto out_region;
316*7168ae33SJames Morse }
317*7168ae33SJames Morse
318*7168ae33SJames Morse ret = 0;
319*7168ae33SJames Morse goto out;
320*7168ae33SJames Morse out_region:
321*7168ae33SJames Morse pseudo_lock_region_clear(plr);
322*7168ae33SJames Morse out:
323*7168ae33SJames Morse return ret;
324*7168ae33SJames Morse }
325*7168ae33SJames Morse
326*7168ae33SJames Morse /**
327*7168ae33SJames Morse * pseudo_lock_free - Free a pseudo-locked region
328*7168ae33SJames Morse * @rdtgrp: resource group to which pseudo-locked region belonged
329*7168ae33SJames Morse *
330*7168ae33SJames Morse * The pseudo-locked region's resources have already been released, or not
331*7168ae33SJames Morse * yet created at this point. Now it can be freed and disassociated from the
332*7168ae33SJames Morse * resource group.
333*7168ae33SJames Morse *
334*7168ae33SJames Morse * Return: void
335*7168ae33SJames Morse */
pseudo_lock_free(struct rdtgroup * rdtgrp)336*7168ae33SJames Morse static void pseudo_lock_free(struct rdtgroup *rdtgrp)
337*7168ae33SJames Morse {
338*7168ae33SJames Morse pseudo_lock_region_clear(rdtgrp->plr);
339*7168ae33SJames Morse kfree(rdtgrp->plr);
340*7168ae33SJames Morse rdtgrp->plr = NULL;
341*7168ae33SJames Morse }
342*7168ae33SJames Morse
343*7168ae33SJames Morse /**
344*7168ae33SJames Morse * rdtgroup_monitor_in_progress - Test if monitoring in progress
345*7168ae33SJames Morse * @rdtgrp: resource group being queried
346*7168ae33SJames Morse *
347*7168ae33SJames Morse * Return: 1 if monitor groups have been created for this resource
348*7168ae33SJames Morse * group, 0 otherwise.
349*7168ae33SJames Morse */
rdtgroup_monitor_in_progress(struct rdtgroup * rdtgrp)350*7168ae33SJames Morse static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
351*7168ae33SJames Morse {
352*7168ae33SJames Morse return !list_empty(&rdtgrp->mon.crdtgrp_list);
353*7168ae33SJames Morse }
354*7168ae33SJames Morse
355*7168ae33SJames Morse /**
356*7168ae33SJames Morse * rdtgroup_locksetup_user_restrict - Restrict user access to group
357*7168ae33SJames Morse * @rdtgrp: resource group needing access restricted
358*7168ae33SJames Morse *
359*7168ae33SJames Morse * A resource group used for cache pseudo-locking cannot have cpus or tasks
360*7168ae33SJames Morse * assigned to it. This is communicated to the user by restricting access
361*7168ae33SJames Morse * to all the files that can be used to make such changes.
362*7168ae33SJames Morse *
363*7168ae33SJames Morse * Permissions restored with rdtgroup_locksetup_user_restore()
364*7168ae33SJames Morse *
365*7168ae33SJames Morse * Return: 0 on success, <0 on failure. If a failure occurs during the
366*7168ae33SJames Morse * restriction of access an attempt will be made to restore permissions but
367*7168ae33SJames Morse * the state of the mode of these files will be uncertain when a failure
368*7168ae33SJames Morse * occurs.
369*7168ae33SJames Morse */
rdtgroup_locksetup_user_restrict(struct rdtgroup * rdtgrp)370*7168ae33SJames Morse static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
371*7168ae33SJames Morse {
372*7168ae33SJames Morse int ret;
373*7168ae33SJames Morse
374*7168ae33SJames Morse ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
375*7168ae33SJames Morse if (ret)
376*7168ae33SJames Morse return ret;
377*7168ae33SJames Morse
378*7168ae33SJames Morse ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
379*7168ae33SJames Morse if (ret)
380*7168ae33SJames Morse goto err_tasks;
381*7168ae33SJames Morse
382*7168ae33SJames Morse ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
383*7168ae33SJames Morse if (ret)
384*7168ae33SJames Morse goto err_cpus;
385*7168ae33SJames Morse
386*7168ae33SJames Morse if (resctrl_arch_mon_capable()) {
387*7168ae33SJames Morse ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
388*7168ae33SJames Morse if (ret)
389*7168ae33SJames Morse goto err_cpus_list;
390*7168ae33SJames Morse }
391*7168ae33SJames Morse
392*7168ae33SJames Morse ret = 0;
393*7168ae33SJames Morse goto out;
394*7168ae33SJames Morse
395*7168ae33SJames Morse err_cpus_list:
396*7168ae33SJames Morse rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
397*7168ae33SJames Morse err_cpus:
398*7168ae33SJames Morse rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
399*7168ae33SJames Morse err_tasks:
400*7168ae33SJames Morse rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
401*7168ae33SJames Morse out:
402*7168ae33SJames Morse return ret;
403*7168ae33SJames Morse }
404*7168ae33SJames Morse
405*7168ae33SJames Morse /**
406*7168ae33SJames Morse * rdtgroup_locksetup_user_restore - Restore user access to group
407*7168ae33SJames Morse * @rdtgrp: resource group needing access restored
408*7168ae33SJames Morse *
409*7168ae33SJames Morse * Restore all file access previously removed using
410*7168ae33SJames Morse * rdtgroup_locksetup_user_restrict()
411*7168ae33SJames Morse *
412*7168ae33SJames Morse * Return: 0 on success, <0 on failure. If a failure occurs during the
413*7168ae33SJames Morse * restoration of access an attempt will be made to restrict permissions
414*7168ae33SJames Morse * again but the state of the mode of these files will be uncertain when
415*7168ae33SJames Morse * a failure occurs.
416*7168ae33SJames Morse */
rdtgroup_locksetup_user_restore(struct rdtgroup * rdtgrp)417*7168ae33SJames Morse static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
418*7168ae33SJames Morse {
419*7168ae33SJames Morse int ret;
420*7168ae33SJames Morse
421*7168ae33SJames Morse ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
422*7168ae33SJames Morse if (ret)
423*7168ae33SJames Morse return ret;
424*7168ae33SJames Morse
425*7168ae33SJames Morse ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
426*7168ae33SJames Morse if (ret)
427*7168ae33SJames Morse goto err_tasks;
428*7168ae33SJames Morse
429*7168ae33SJames Morse ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
430*7168ae33SJames Morse if (ret)
431*7168ae33SJames Morse goto err_cpus;
432*7168ae33SJames Morse
433*7168ae33SJames Morse if (resctrl_arch_mon_capable()) {
434*7168ae33SJames Morse ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
435*7168ae33SJames Morse if (ret)
436*7168ae33SJames Morse goto err_cpus_list;
437*7168ae33SJames Morse }
438*7168ae33SJames Morse
439*7168ae33SJames Morse ret = 0;
440*7168ae33SJames Morse goto out;
441*7168ae33SJames Morse
442*7168ae33SJames Morse err_cpus_list:
443*7168ae33SJames Morse rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
444*7168ae33SJames Morse err_cpus:
445*7168ae33SJames Morse rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
446*7168ae33SJames Morse err_tasks:
447*7168ae33SJames Morse rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
448*7168ae33SJames Morse out:
449*7168ae33SJames Morse return ret;
450*7168ae33SJames Morse }
451*7168ae33SJames Morse
452*7168ae33SJames Morse /**
453*7168ae33SJames Morse * rdtgroup_locksetup_enter - Resource group enters locksetup mode
454*7168ae33SJames Morse * @rdtgrp: resource group requested to enter locksetup mode
455*7168ae33SJames Morse *
456*7168ae33SJames Morse * A resource group enters locksetup mode to reflect that it would be used
457*7168ae33SJames Morse * to represent a pseudo-locked region and is in the process of being set
458*7168ae33SJames Morse * up to do so. A resource group used for a pseudo-locked region would
459*7168ae33SJames Morse * lose the closid associated with it so we cannot allow it to have any
460*7168ae33SJames Morse * tasks or cpus assigned nor permit tasks or cpus to be assigned in the
461*7168ae33SJames Morse * future. Monitoring of a pseudo-locked region is not allowed either.
462*7168ae33SJames Morse *
463*7168ae33SJames Morse * The above and more restrictions on a pseudo-locked region are checked
464*7168ae33SJames Morse * for and enforced before the resource group enters the locksetup mode.
465*7168ae33SJames Morse *
466*7168ae33SJames Morse * Returns: 0 if the resource group successfully entered locksetup mode, <0
467*7168ae33SJames Morse * on failure. On failure the last_cmd_status buffer is updated with text to
468*7168ae33SJames Morse * communicate details of failure to the user.
469*7168ae33SJames Morse */
rdtgroup_locksetup_enter(struct rdtgroup * rdtgrp)470*7168ae33SJames Morse int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
471*7168ae33SJames Morse {
472*7168ae33SJames Morse int ret;
473*7168ae33SJames Morse
474*7168ae33SJames Morse /*
475*7168ae33SJames Morse * The default resource group can neither be removed nor lose the
476*7168ae33SJames Morse * default closid associated with it.
477*7168ae33SJames Morse */
478*7168ae33SJames Morse if (rdtgrp == &rdtgroup_default) {
479*7168ae33SJames Morse rdt_last_cmd_puts("Cannot pseudo-lock default group\n");
480*7168ae33SJames Morse return -EINVAL;
481*7168ae33SJames Morse }
482*7168ae33SJames Morse
483*7168ae33SJames Morse /*
484*7168ae33SJames Morse * Cache Pseudo-locking not supported when CDP is enabled.
485*7168ae33SJames Morse *
486*7168ae33SJames Morse * Some things to consider if you would like to enable this
487*7168ae33SJames Morse * support (using L3 CDP as example):
488*7168ae33SJames Morse * - When CDP is enabled two separate resources are exposed,
489*7168ae33SJames Morse * L3DATA and L3CODE, but they are actually on the same cache.
490*7168ae33SJames Morse * The implication for pseudo-locking is that if a
491*7168ae33SJames Morse * pseudo-locked region is created on a domain of one
492*7168ae33SJames Morse * resource (eg. L3CODE), then a pseudo-locked region cannot
493*7168ae33SJames Morse * be created on that same domain of the other resource
494*7168ae33SJames Morse * (eg. L3DATA). This is because the creation of a
495*7168ae33SJames Morse * pseudo-locked region involves a call to wbinvd that will
496*7168ae33SJames Morse * affect all cache allocations on particular domain.
497*7168ae33SJames Morse * - Considering the previous, it may be possible to only
498*7168ae33SJames Morse * expose one of the CDP resources to pseudo-locking and
499*7168ae33SJames Morse * hide the other. For example, we could consider to only
500*7168ae33SJames Morse * expose L3DATA and since the L3 cache is unified it is
501*7168ae33SJames Morse * still possible to place instructions there are execute it.
502*7168ae33SJames Morse * - If only one region is exposed to pseudo-locking we should
503*7168ae33SJames Morse * still keep in mind that availability of a portion of cache
504*7168ae33SJames Morse * for pseudo-locking should take into account both resources.
505*7168ae33SJames Morse * Similarly, if a pseudo-locked region is created in one
506*7168ae33SJames Morse * resource, the portion of cache used by it should be made
507*7168ae33SJames Morse * unavailable to all future allocations from both resources.
508*7168ae33SJames Morse */
509*7168ae33SJames Morse if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) ||
510*7168ae33SJames Morse resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) {
511*7168ae33SJames Morse rdt_last_cmd_puts("CDP enabled\n");
512*7168ae33SJames Morse return -EINVAL;
513*7168ae33SJames Morse }
514*7168ae33SJames Morse
515*7168ae33SJames Morse /*
516*7168ae33SJames Morse * Not knowing the bits to disable prefetching implies that this
517*7168ae33SJames Morse * platform does not support Cache Pseudo-Locking.
518*7168ae33SJames Morse */
519*7168ae33SJames Morse if (resctrl_arch_get_prefetch_disable_bits() == 0) {
520*7168ae33SJames Morse rdt_last_cmd_puts("Pseudo-locking not supported\n");
521*7168ae33SJames Morse return -EINVAL;
522*7168ae33SJames Morse }
523*7168ae33SJames Morse
524*7168ae33SJames Morse if (rdtgroup_monitor_in_progress(rdtgrp)) {
525*7168ae33SJames Morse rdt_last_cmd_puts("Monitoring in progress\n");
526*7168ae33SJames Morse return -EINVAL;
527*7168ae33SJames Morse }
528*7168ae33SJames Morse
529*7168ae33SJames Morse if (rdtgroup_tasks_assigned(rdtgrp)) {
530*7168ae33SJames Morse rdt_last_cmd_puts("Tasks assigned to resource group\n");
531*7168ae33SJames Morse return -EINVAL;
532*7168ae33SJames Morse }
533*7168ae33SJames Morse
534*7168ae33SJames Morse if (!cpumask_empty(&rdtgrp->cpu_mask)) {
535*7168ae33SJames Morse rdt_last_cmd_puts("CPUs assigned to resource group\n");
536*7168ae33SJames Morse return -EINVAL;
537*7168ae33SJames Morse }
538*7168ae33SJames Morse
539*7168ae33SJames Morse if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
540*7168ae33SJames Morse rdt_last_cmd_puts("Unable to modify resctrl permissions\n");
541*7168ae33SJames Morse return -EIO;
542*7168ae33SJames Morse }
543*7168ae33SJames Morse
544*7168ae33SJames Morse ret = pseudo_lock_init(rdtgrp);
545*7168ae33SJames Morse if (ret) {
546*7168ae33SJames Morse rdt_last_cmd_puts("Unable to init pseudo-lock region\n");
547*7168ae33SJames Morse goto out_release;
548*7168ae33SJames Morse }
549*7168ae33SJames Morse
550*7168ae33SJames Morse /*
551*7168ae33SJames Morse * If this system is capable of monitoring a rmid would have been
552*7168ae33SJames Morse * allocated when the control group was created. This is not needed
553*7168ae33SJames Morse * anymore when this group would be used for pseudo-locking. This
554*7168ae33SJames Morse * is safe to call on platforms not capable of monitoring.
555*7168ae33SJames Morse */
556*7168ae33SJames Morse free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
557*7168ae33SJames Morse
558*7168ae33SJames Morse ret = 0;
559*7168ae33SJames Morse goto out;
560*7168ae33SJames Morse
561*7168ae33SJames Morse out_release:
562*7168ae33SJames Morse rdtgroup_locksetup_user_restore(rdtgrp);
563*7168ae33SJames Morse out:
564*7168ae33SJames Morse return ret;
565*7168ae33SJames Morse }
566*7168ae33SJames Morse
567*7168ae33SJames Morse /**
568*7168ae33SJames Morse * rdtgroup_locksetup_exit - resource group exist locksetup mode
569*7168ae33SJames Morse * @rdtgrp: resource group
570*7168ae33SJames Morse *
571*7168ae33SJames Morse * When a resource group exits locksetup mode the earlier restrictions are
572*7168ae33SJames Morse * lifted.
573*7168ae33SJames Morse *
574*7168ae33SJames Morse * Return: 0 on success, <0 on failure
575*7168ae33SJames Morse */
rdtgroup_locksetup_exit(struct rdtgroup * rdtgrp)576*7168ae33SJames Morse int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
577*7168ae33SJames Morse {
578*7168ae33SJames Morse int ret;
579*7168ae33SJames Morse
580*7168ae33SJames Morse if (resctrl_arch_mon_capable()) {
581*7168ae33SJames Morse ret = alloc_rmid(rdtgrp->closid);
582*7168ae33SJames Morse if (ret < 0) {
583*7168ae33SJames Morse rdt_last_cmd_puts("Out of RMIDs\n");
584*7168ae33SJames Morse return ret;
585*7168ae33SJames Morse }
586*7168ae33SJames Morse rdtgrp->mon.rmid = ret;
587*7168ae33SJames Morse }
588*7168ae33SJames Morse
589*7168ae33SJames Morse ret = rdtgroup_locksetup_user_restore(rdtgrp);
590*7168ae33SJames Morse if (ret) {
591*7168ae33SJames Morse free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
592*7168ae33SJames Morse return ret;
593*7168ae33SJames Morse }
594*7168ae33SJames Morse
595*7168ae33SJames Morse pseudo_lock_free(rdtgrp);
596*7168ae33SJames Morse return 0;
597*7168ae33SJames Morse }
598*7168ae33SJames Morse
599*7168ae33SJames Morse /**
600*7168ae33SJames Morse * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
601*7168ae33SJames Morse * @d: RDT domain
602*7168ae33SJames Morse * @cbm: CBM to test
603*7168ae33SJames Morse *
604*7168ae33SJames Morse * @d represents a cache instance and @cbm a capacity bitmask that is
605*7168ae33SJames Morse * considered for it. Determine if @cbm overlaps with any existing
606*7168ae33SJames Morse * pseudo-locked region on @d.
607*7168ae33SJames Morse *
608*7168ae33SJames Morse * @cbm is unsigned long, even if only 32 bits are used, to make the
609*7168ae33SJames Morse * bitmap functions work correctly.
610*7168ae33SJames Morse *
611*7168ae33SJames Morse * Return: true if @cbm overlaps with pseudo-locked region on @d, false
612*7168ae33SJames Morse * otherwise.
613*7168ae33SJames Morse */
rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain * d,unsigned long cbm)614*7168ae33SJames Morse bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm)
615*7168ae33SJames Morse {
616*7168ae33SJames Morse unsigned int cbm_len;
617*7168ae33SJames Morse unsigned long cbm_b;
618*7168ae33SJames Morse
619*7168ae33SJames Morse if (d->plr) {
620*7168ae33SJames Morse cbm_len = d->plr->s->res->cache.cbm_len;
621*7168ae33SJames Morse cbm_b = d->plr->cbm;
622*7168ae33SJames Morse if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
623*7168ae33SJames Morse return true;
624*7168ae33SJames Morse }
625*7168ae33SJames Morse return false;
626*7168ae33SJames Morse }
627*7168ae33SJames Morse
628*7168ae33SJames Morse /**
629*7168ae33SJames Morse * rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy
630*7168ae33SJames Morse * @d: RDT domain under test
631*7168ae33SJames Morse *
632*7168ae33SJames Morse * The setup of a pseudo-locked region affects all cache instances within
633*7168ae33SJames Morse * the hierarchy of the region. It is thus essential to know if any
634*7168ae33SJames Morse * pseudo-locked regions exist within a cache hierarchy to prevent any
635*7168ae33SJames Morse * attempts to create new pseudo-locked regions in the same hierarchy.
636*7168ae33SJames Morse *
637*7168ae33SJames Morse * Return: true if a pseudo-locked region exists in the hierarchy of @d or
638*7168ae33SJames Morse * if it is not possible to test due to memory allocation issue,
639*7168ae33SJames Morse * false otherwise.
640*7168ae33SJames Morse */
rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain * d)641*7168ae33SJames Morse bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
642*7168ae33SJames Morse {
643*7168ae33SJames Morse struct rdt_ctrl_domain *d_i;
644*7168ae33SJames Morse cpumask_var_t cpu_with_psl;
645*7168ae33SJames Morse struct rdt_resource *r;
646*7168ae33SJames Morse bool ret = false;
647*7168ae33SJames Morse
648*7168ae33SJames Morse /* Walking r->domains, ensure it can't race with cpuhp */
649*7168ae33SJames Morse lockdep_assert_cpus_held();
650*7168ae33SJames Morse
651*7168ae33SJames Morse if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
652*7168ae33SJames Morse return true;
653*7168ae33SJames Morse
654*7168ae33SJames Morse /*
655*7168ae33SJames Morse * First determine which cpus have pseudo-locked regions
656*7168ae33SJames Morse * associated with them.
657*7168ae33SJames Morse */
658*7168ae33SJames Morse for_each_alloc_capable_rdt_resource(r) {
659*7168ae33SJames Morse list_for_each_entry(d_i, &r->ctrl_domains, hdr.list) {
660*7168ae33SJames Morse if (d_i->plr)
661*7168ae33SJames Morse cpumask_or(cpu_with_psl, cpu_with_psl,
662*7168ae33SJames Morse &d_i->hdr.cpu_mask);
663*7168ae33SJames Morse }
664*7168ae33SJames Morse }
665*7168ae33SJames Morse
666*7168ae33SJames Morse /*
667*7168ae33SJames Morse * Next test if new pseudo-locked region would intersect with
668*7168ae33SJames Morse * existing region.
669*7168ae33SJames Morse */
670*7168ae33SJames Morse if (cpumask_intersects(&d->hdr.cpu_mask, cpu_with_psl))
671*7168ae33SJames Morse ret = true;
672*7168ae33SJames Morse
673*7168ae33SJames Morse free_cpumask_var(cpu_with_psl);
674*7168ae33SJames Morse return ret;
675*7168ae33SJames Morse }
676*7168ae33SJames Morse
677*7168ae33SJames Morse /**
678*7168ae33SJames Morse * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
679*7168ae33SJames Morse * @rdtgrp: Resource group to which the pseudo-locked region belongs.
680*7168ae33SJames Morse * @sel: Selector of which measurement to perform on a pseudo-locked region.
681*7168ae33SJames Morse *
682*7168ae33SJames Morse * The measurement of latency to access a pseudo-locked region should be
683*7168ae33SJames Morse * done from a cpu that is associated with that pseudo-locked region.
684*7168ae33SJames Morse * Determine which cpu is associated with this region and start a thread on
685*7168ae33SJames Morse * that cpu to perform the measurement, wait for that thread to complete.
686*7168ae33SJames Morse *
687*7168ae33SJames Morse * Return: 0 on success, <0 on failure
688*7168ae33SJames Morse */
pseudo_lock_measure_cycles(struct rdtgroup * rdtgrp,int sel)689*7168ae33SJames Morse static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
690*7168ae33SJames Morse {
691*7168ae33SJames Morse struct pseudo_lock_region *plr = rdtgrp->plr;
692*7168ae33SJames Morse struct task_struct *thread;
693*7168ae33SJames Morse unsigned int cpu;
694*7168ae33SJames Morse int ret = -1;
695*7168ae33SJames Morse
696*7168ae33SJames Morse cpus_read_lock();
697*7168ae33SJames Morse mutex_lock(&rdtgroup_mutex);
698*7168ae33SJames Morse
699*7168ae33SJames Morse if (rdtgrp->flags & RDT_DELETED) {
700*7168ae33SJames Morse ret = -ENODEV;
701*7168ae33SJames Morse goto out;
702*7168ae33SJames Morse }
703*7168ae33SJames Morse
704*7168ae33SJames Morse if (!plr->d) {
705*7168ae33SJames Morse ret = -ENODEV;
706*7168ae33SJames Morse goto out;
707*7168ae33SJames Morse }
708*7168ae33SJames Morse
709*7168ae33SJames Morse plr->thread_done = 0;
710*7168ae33SJames Morse cpu = cpumask_first(&plr->d->hdr.cpu_mask);
711*7168ae33SJames Morse if (!cpu_online(cpu)) {
712*7168ae33SJames Morse ret = -ENODEV;
713*7168ae33SJames Morse goto out;
714*7168ae33SJames Morse }
715*7168ae33SJames Morse
716*7168ae33SJames Morse plr->cpu = cpu;
717*7168ae33SJames Morse
718*7168ae33SJames Morse if (sel == 1)
719*7168ae33SJames Morse thread = kthread_run_on_cpu(resctrl_arch_measure_cycles_lat_fn,
720*7168ae33SJames Morse plr, cpu, "pseudo_lock_measure/%u");
721*7168ae33SJames Morse else if (sel == 2)
722*7168ae33SJames Morse thread = kthread_run_on_cpu(resctrl_arch_measure_l2_residency,
723*7168ae33SJames Morse plr, cpu, "pseudo_lock_measure/%u");
724*7168ae33SJames Morse else if (sel == 3)
725*7168ae33SJames Morse thread = kthread_run_on_cpu(resctrl_arch_measure_l3_residency,
726*7168ae33SJames Morse plr, cpu, "pseudo_lock_measure/%u");
727*7168ae33SJames Morse else
728*7168ae33SJames Morse goto out;
729*7168ae33SJames Morse
730*7168ae33SJames Morse if (IS_ERR(thread)) {
731*7168ae33SJames Morse ret = PTR_ERR(thread);
732*7168ae33SJames Morse goto out;
733*7168ae33SJames Morse }
734*7168ae33SJames Morse
735*7168ae33SJames Morse ret = wait_event_interruptible(plr->lock_thread_wq,
736*7168ae33SJames Morse plr->thread_done == 1);
737*7168ae33SJames Morse if (ret < 0)
738*7168ae33SJames Morse goto out;
739*7168ae33SJames Morse
740*7168ae33SJames Morse ret = 0;
741*7168ae33SJames Morse
742*7168ae33SJames Morse out:
743*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
744*7168ae33SJames Morse cpus_read_unlock();
745*7168ae33SJames Morse return ret;
746*7168ae33SJames Morse }
747*7168ae33SJames Morse
pseudo_lock_measure_trigger(struct file * file,const char __user * user_buf,size_t count,loff_t * ppos)748*7168ae33SJames Morse static ssize_t pseudo_lock_measure_trigger(struct file *file,
749*7168ae33SJames Morse const char __user *user_buf,
750*7168ae33SJames Morse size_t count, loff_t *ppos)
751*7168ae33SJames Morse {
752*7168ae33SJames Morse struct rdtgroup *rdtgrp = file->private_data;
753*7168ae33SJames Morse size_t buf_size;
754*7168ae33SJames Morse char buf[32];
755*7168ae33SJames Morse int ret;
756*7168ae33SJames Morse int sel;
757*7168ae33SJames Morse
758*7168ae33SJames Morse buf_size = min(count, (sizeof(buf) - 1));
759*7168ae33SJames Morse if (copy_from_user(buf, user_buf, buf_size))
760*7168ae33SJames Morse return -EFAULT;
761*7168ae33SJames Morse
762*7168ae33SJames Morse buf[buf_size] = '\0';
763*7168ae33SJames Morse ret = kstrtoint(buf, 10, &sel);
764*7168ae33SJames Morse if (ret == 0) {
765*7168ae33SJames Morse if (sel != 1 && sel != 2 && sel != 3)
766*7168ae33SJames Morse return -EINVAL;
767*7168ae33SJames Morse ret = debugfs_file_get(file->f_path.dentry);
768*7168ae33SJames Morse if (ret)
769*7168ae33SJames Morse return ret;
770*7168ae33SJames Morse ret = pseudo_lock_measure_cycles(rdtgrp, sel);
771*7168ae33SJames Morse if (ret == 0)
772*7168ae33SJames Morse ret = count;
773*7168ae33SJames Morse debugfs_file_put(file->f_path.dentry);
774*7168ae33SJames Morse }
775*7168ae33SJames Morse
776*7168ae33SJames Morse return ret;
777*7168ae33SJames Morse }
778*7168ae33SJames Morse
779*7168ae33SJames Morse static const struct file_operations pseudo_measure_fops = {
780*7168ae33SJames Morse .write = pseudo_lock_measure_trigger,
781*7168ae33SJames Morse .open = simple_open,
782*7168ae33SJames Morse .llseek = default_llseek,
783*7168ae33SJames Morse };
784*7168ae33SJames Morse
785*7168ae33SJames Morse /**
786*7168ae33SJames Morse * rdtgroup_pseudo_lock_create - Create a pseudo-locked region
787*7168ae33SJames Morse * @rdtgrp: resource group to which pseudo-lock region belongs
788*7168ae33SJames Morse *
789*7168ae33SJames Morse * Called when a resource group in the pseudo-locksetup mode receives a
790*7168ae33SJames Morse * valid schemata that should be pseudo-locked. Since the resource group is
791*7168ae33SJames Morse * in pseudo-locksetup mode the &struct pseudo_lock_region has already been
792*7168ae33SJames Morse * allocated and initialized with the essential information. If a failure
793*7168ae33SJames Morse * occurs the resource group remains in the pseudo-locksetup mode with the
794*7168ae33SJames Morse * &struct pseudo_lock_region associated with it, but cleared from all
795*7168ae33SJames Morse * information and ready for the user to re-attempt pseudo-locking by
796*7168ae33SJames Morse * writing the schemata again.
797*7168ae33SJames Morse *
798*7168ae33SJames Morse * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0
799*7168ae33SJames Morse * on failure. Descriptive error will be written to last_cmd_status buffer.
800*7168ae33SJames Morse */
rdtgroup_pseudo_lock_create(struct rdtgroup * rdtgrp)801*7168ae33SJames Morse int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
802*7168ae33SJames Morse {
803*7168ae33SJames Morse struct pseudo_lock_region *plr = rdtgrp->plr;
804*7168ae33SJames Morse struct task_struct *thread;
805*7168ae33SJames Morse unsigned int new_minor;
806*7168ae33SJames Morse struct device *dev;
807*7168ae33SJames Morse char *kn_name __free(kfree) = NULL;
808*7168ae33SJames Morse int ret;
809*7168ae33SJames Morse
810*7168ae33SJames Morse ret = pseudo_lock_region_alloc(plr);
811*7168ae33SJames Morse if (ret < 0)
812*7168ae33SJames Morse return ret;
813*7168ae33SJames Morse
814*7168ae33SJames Morse ret = pseudo_lock_cstates_constrain(plr);
815*7168ae33SJames Morse if (ret < 0) {
816*7168ae33SJames Morse ret = -EINVAL;
817*7168ae33SJames Morse goto out_region;
818*7168ae33SJames Morse }
819*7168ae33SJames Morse kn_name = kstrdup(rdt_kn_name(rdtgrp->kn), GFP_KERNEL);
820*7168ae33SJames Morse if (!kn_name) {
821*7168ae33SJames Morse ret = -ENOMEM;
822*7168ae33SJames Morse goto out_cstates;
823*7168ae33SJames Morse }
824*7168ae33SJames Morse
825*7168ae33SJames Morse plr->thread_done = 0;
826*7168ae33SJames Morse
827*7168ae33SJames Morse thread = kthread_run_on_cpu(resctrl_arch_pseudo_lock_fn, plr,
828*7168ae33SJames Morse plr->cpu, "pseudo_lock/%u");
829*7168ae33SJames Morse if (IS_ERR(thread)) {
830*7168ae33SJames Morse ret = PTR_ERR(thread);
831*7168ae33SJames Morse rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
832*7168ae33SJames Morse goto out_cstates;
833*7168ae33SJames Morse }
834*7168ae33SJames Morse
835*7168ae33SJames Morse ret = wait_event_interruptible(plr->lock_thread_wq,
836*7168ae33SJames Morse plr->thread_done == 1);
837*7168ae33SJames Morse if (ret < 0) {
838*7168ae33SJames Morse /*
839*7168ae33SJames Morse * If the thread does not get on the CPU for whatever
840*7168ae33SJames Morse * reason and the process which sets up the region is
841*7168ae33SJames Morse * interrupted then this will leave the thread in runnable
842*7168ae33SJames Morse * state and once it gets on the CPU it will dereference
843*7168ae33SJames Morse * the cleared, but not freed, plr struct resulting in an
844*7168ae33SJames Morse * empty pseudo-locking loop.
845*7168ae33SJames Morse */
846*7168ae33SJames Morse rdt_last_cmd_puts("Locking thread interrupted\n");
847*7168ae33SJames Morse goto out_cstates;
848*7168ae33SJames Morse }
849*7168ae33SJames Morse
850*7168ae33SJames Morse ret = pseudo_lock_minor_get(&new_minor);
851*7168ae33SJames Morse if (ret < 0) {
852*7168ae33SJames Morse rdt_last_cmd_puts("Unable to obtain a new minor number\n");
853*7168ae33SJames Morse goto out_cstates;
854*7168ae33SJames Morse }
855*7168ae33SJames Morse
856*7168ae33SJames Morse /*
857*7168ae33SJames Morse * Unlock access but do not release the reference. The
858*7168ae33SJames Morse * pseudo-locked region will still be here on return.
859*7168ae33SJames Morse *
860*7168ae33SJames Morse * The mutex has to be released temporarily to avoid a potential
861*7168ae33SJames Morse * deadlock with the mm->mmap_lock which is obtained in the
862*7168ae33SJames Morse * device_create() and debugfs_create_dir() callpath below as well as
863*7168ae33SJames Morse * before the mmap() callback is called.
864*7168ae33SJames Morse */
865*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
866*7168ae33SJames Morse
867*7168ae33SJames Morse if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
868*7168ae33SJames Morse plr->debugfs_dir = debugfs_create_dir(kn_name, debugfs_resctrl);
869*7168ae33SJames Morse if (!IS_ERR_OR_NULL(plr->debugfs_dir))
870*7168ae33SJames Morse debugfs_create_file("pseudo_lock_measure", 0200,
871*7168ae33SJames Morse plr->debugfs_dir, rdtgrp,
872*7168ae33SJames Morse &pseudo_measure_fops);
873*7168ae33SJames Morse }
874*7168ae33SJames Morse
875*7168ae33SJames Morse dev = device_create(&pseudo_lock_class, NULL,
876*7168ae33SJames Morse MKDEV(pseudo_lock_major, new_minor),
877*7168ae33SJames Morse rdtgrp, "%s", kn_name);
878*7168ae33SJames Morse
879*7168ae33SJames Morse mutex_lock(&rdtgroup_mutex);
880*7168ae33SJames Morse
881*7168ae33SJames Morse if (IS_ERR(dev)) {
882*7168ae33SJames Morse ret = PTR_ERR(dev);
883*7168ae33SJames Morse rdt_last_cmd_printf("Failed to create character device: %d\n",
884*7168ae33SJames Morse ret);
885*7168ae33SJames Morse goto out_debugfs;
886*7168ae33SJames Morse }
887*7168ae33SJames Morse
888*7168ae33SJames Morse /* We released the mutex - check if group was removed while we did so */
889*7168ae33SJames Morse if (rdtgrp->flags & RDT_DELETED) {
890*7168ae33SJames Morse ret = -ENODEV;
891*7168ae33SJames Morse goto out_device;
892*7168ae33SJames Morse }
893*7168ae33SJames Morse
894*7168ae33SJames Morse plr->minor = new_minor;
895*7168ae33SJames Morse
896*7168ae33SJames Morse rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
897*7168ae33SJames Morse closid_free(rdtgrp->closid);
898*7168ae33SJames Morse rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444);
899*7168ae33SJames Morse rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444);
900*7168ae33SJames Morse
901*7168ae33SJames Morse ret = 0;
902*7168ae33SJames Morse goto out;
903*7168ae33SJames Morse
904*7168ae33SJames Morse out_device:
905*7168ae33SJames Morse device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
906*7168ae33SJames Morse out_debugfs:
907*7168ae33SJames Morse debugfs_remove_recursive(plr->debugfs_dir);
908*7168ae33SJames Morse pseudo_lock_minor_release(new_minor);
909*7168ae33SJames Morse out_cstates:
910*7168ae33SJames Morse pseudo_lock_cstates_relax(plr);
911*7168ae33SJames Morse out_region:
912*7168ae33SJames Morse pseudo_lock_region_clear(plr);
913*7168ae33SJames Morse out:
914*7168ae33SJames Morse return ret;
915*7168ae33SJames Morse }
916*7168ae33SJames Morse
917*7168ae33SJames Morse /**
918*7168ae33SJames Morse * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region
919*7168ae33SJames Morse * @rdtgrp: resource group to which the pseudo-locked region belongs
920*7168ae33SJames Morse *
921*7168ae33SJames Morse * The removal of a pseudo-locked region can be initiated when the resource
922*7168ae33SJames Morse * group is removed from user space via a "rmdir" from userspace or the
923*7168ae33SJames Morse * unmount of the resctrl filesystem. On removal the resource group does
924*7168ae33SJames Morse * not go back to pseudo-locksetup mode before it is removed, instead it is
925*7168ae33SJames Morse * removed directly. There is thus asymmetry with the creation where the
926*7168ae33SJames Morse * &struct pseudo_lock_region is removed here while it was not created in
927*7168ae33SJames Morse * rdtgroup_pseudo_lock_create().
928*7168ae33SJames Morse *
929*7168ae33SJames Morse * Return: void
930*7168ae33SJames Morse */
rdtgroup_pseudo_lock_remove(struct rdtgroup * rdtgrp)931*7168ae33SJames Morse void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
932*7168ae33SJames Morse {
933*7168ae33SJames Morse struct pseudo_lock_region *plr = rdtgrp->plr;
934*7168ae33SJames Morse
935*7168ae33SJames Morse if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
936*7168ae33SJames Morse /*
937*7168ae33SJames Morse * Default group cannot be a pseudo-locked region so we can
938*7168ae33SJames Morse * free closid here.
939*7168ae33SJames Morse */
940*7168ae33SJames Morse closid_free(rdtgrp->closid);
941*7168ae33SJames Morse goto free;
942*7168ae33SJames Morse }
943*7168ae33SJames Morse
944*7168ae33SJames Morse pseudo_lock_cstates_relax(plr);
945*7168ae33SJames Morse debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
946*7168ae33SJames Morse device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
947*7168ae33SJames Morse pseudo_lock_minor_release(plr->minor);
948*7168ae33SJames Morse
949*7168ae33SJames Morse free:
950*7168ae33SJames Morse pseudo_lock_free(rdtgrp);
951*7168ae33SJames Morse }
952*7168ae33SJames Morse
pseudo_lock_dev_open(struct inode * inode,struct file * filp)953*7168ae33SJames Morse static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
954*7168ae33SJames Morse {
955*7168ae33SJames Morse struct rdtgroup *rdtgrp;
956*7168ae33SJames Morse
957*7168ae33SJames Morse mutex_lock(&rdtgroup_mutex);
958*7168ae33SJames Morse
959*7168ae33SJames Morse rdtgrp = region_find_by_minor(iminor(inode));
960*7168ae33SJames Morse if (!rdtgrp) {
961*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
962*7168ae33SJames Morse return -ENODEV;
963*7168ae33SJames Morse }
964*7168ae33SJames Morse
965*7168ae33SJames Morse filp->private_data = rdtgrp;
966*7168ae33SJames Morse atomic_inc(&rdtgrp->waitcount);
967*7168ae33SJames Morse /* Perform a non-seekable open - llseek is not supported */
968*7168ae33SJames Morse filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
969*7168ae33SJames Morse
970*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
971*7168ae33SJames Morse
972*7168ae33SJames Morse return 0;
973*7168ae33SJames Morse }
974*7168ae33SJames Morse
pseudo_lock_dev_release(struct inode * inode,struct file * filp)975*7168ae33SJames Morse static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
976*7168ae33SJames Morse {
977*7168ae33SJames Morse struct rdtgroup *rdtgrp;
978*7168ae33SJames Morse
979*7168ae33SJames Morse mutex_lock(&rdtgroup_mutex);
980*7168ae33SJames Morse rdtgrp = filp->private_data;
981*7168ae33SJames Morse WARN_ON(!rdtgrp);
982*7168ae33SJames Morse if (!rdtgrp) {
983*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
984*7168ae33SJames Morse return -ENODEV;
985*7168ae33SJames Morse }
986*7168ae33SJames Morse filp->private_data = NULL;
987*7168ae33SJames Morse atomic_dec(&rdtgrp->waitcount);
988*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
989*7168ae33SJames Morse return 0;
990*7168ae33SJames Morse }
991*7168ae33SJames Morse
pseudo_lock_dev_mremap(struct vm_area_struct * area)992*7168ae33SJames Morse static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
993*7168ae33SJames Morse {
994*7168ae33SJames Morse /* Not supported */
995*7168ae33SJames Morse return -EINVAL;
996*7168ae33SJames Morse }
997*7168ae33SJames Morse
998*7168ae33SJames Morse static const struct vm_operations_struct pseudo_mmap_ops = {
999*7168ae33SJames Morse .mremap = pseudo_lock_dev_mremap,
1000*7168ae33SJames Morse };
1001*7168ae33SJames Morse
pseudo_lock_dev_mmap(struct file * filp,struct vm_area_struct * vma)1002*7168ae33SJames Morse static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
1003*7168ae33SJames Morse {
1004*7168ae33SJames Morse unsigned long vsize = vma->vm_end - vma->vm_start;
1005*7168ae33SJames Morse unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
1006*7168ae33SJames Morse struct pseudo_lock_region *plr;
1007*7168ae33SJames Morse struct rdtgroup *rdtgrp;
1008*7168ae33SJames Morse unsigned long physical;
1009*7168ae33SJames Morse unsigned long psize;
1010*7168ae33SJames Morse
1011*7168ae33SJames Morse mutex_lock(&rdtgroup_mutex);
1012*7168ae33SJames Morse
1013*7168ae33SJames Morse rdtgrp = filp->private_data;
1014*7168ae33SJames Morse WARN_ON(!rdtgrp);
1015*7168ae33SJames Morse if (!rdtgrp) {
1016*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
1017*7168ae33SJames Morse return -ENODEV;
1018*7168ae33SJames Morse }
1019*7168ae33SJames Morse
1020*7168ae33SJames Morse plr = rdtgrp->plr;
1021*7168ae33SJames Morse
1022*7168ae33SJames Morse if (!plr->d) {
1023*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
1024*7168ae33SJames Morse return -ENODEV;
1025*7168ae33SJames Morse }
1026*7168ae33SJames Morse
1027*7168ae33SJames Morse /*
1028*7168ae33SJames Morse * Task is required to run with affinity to the cpus associated
1029*7168ae33SJames Morse * with the pseudo-locked region. If this is not the case the task
1030*7168ae33SJames Morse * may be scheduled elsewhere and invalidate entries in the
1031*7168ae33SJames Morse * pseudo-locked region.
1032*7168ae33SJames Morse */
1033*7168ae33SJames Morse if (!cpumask_subset(current->cpus_ptr, &plr->d->hdr.cpu_mask)) {
1034*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
1035*7168ae33SJames Morse return -EINVAL;
1036*7168ae33SJames Morse }
1037*7168ae33SJames Morse
1038*7168ae33SJames Morse physical = __pa(plr->kmem) >> PAGE_SHIFT;
1039*7168ae33SJames Morse psize = plr->size - off;
1040*7168ae33SJames Morse
1041*7168ae33SJames Morse if (off > plr->size) {
1042*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
1043*7168ae33SJames Morse return -ENOSPC;
1044*7168ae33SJames Morse }
1045*7168ae33SJames Morse
1046*7168ae33SJames Morse /*
1047*7168ae33SJames Morse * Ensure changes are carried directly to the memory being mapped,
1048*7168ae33SJames Morse * do not allow copy-on-write mapping.
1049*7168ae33SJames Morse */
1050*7168ae33SJames Morse if (!(vma->vm_flags & VM_SHARED)) {
1051*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
1052*7168ae33SJames Morse return -EINVAL;
1053*7168ae33SJames Morse }
1054*7168ae33SJames Morse
1055*7168ae33SJames Morse if (vsize > psize) {
1056*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
1057*7168ae33SJames Morse return -ENOSPC;
1058*7168ae33SJames Morse }
1059*7168ae33SJames Morse
1060*7168ae33SJames Morse memset(plr->kmem + off, 0, vsize);
1061*7168ae33SJames Morse
1062*7168ae33SJames Morse if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
1063*7168ae33SJames Morse vsize, vma->vm_page_prot)) {
1064*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
1065*7168ae33SJames Morse return -EAGAIN;
1066*7168ae33SJames Morse }
1067*7168ae33SJames Morse vma->vm_ops = &pseudo_mmap_ops;
1068*7168ae33SJames Morse mutex_unlock(&rdtgroup_mutex);
1069*7168ae33SJames Morse return 0;
1070*7168ae33SJames Morse }
1071*7168ae33SJames Morse
1072*7168ae33SJames Morse static const struct file_operations pseudo_lock_dev_fops = {
1073*7168ae33SJames Morse .owner = THIS_MODULE,
1074*7168ae33SJames Morse .read = NULL,
1075*7168ae33SJames Morse .write = NULL,
1076*7168ae33SJames Morse .open = pseudo_lock_dev_open,
1077*7168ae33SJames Morse .release = pseudo_lock_dev_release,
1078*7168ae33SJames Morse .mmap = pseudo_lock_dev_mmap,
1079*7168ae33SJames Morse };
1080*7168ae33SJames Morse
rdt_pseudo_lock_init(void)1081*7168ae33SJames Morse int rdt_pseudo_lock_init(void)
1082*7168ae33SJames Morse {
1083*7168ae33SJames Morse int ret;
1084*7168ae33SJames Morse
1085*7168ae33SJames Morse ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
1086*7168ae33SJames Morse if (ret < 0)
1087*7168ae33SJames Morse return ret;
1088*7168ae33SJames Morse
1089*7168ae33SJames Morse pseudo_lock_major = ret;
1090*7168ae33SJames Morse
1091*7168ae33SJames Morse ret = class_register(&pseudo_lock_class);
1092*7168ae33SJames Morse if (ret) {
1093*7168ae33SJames Morse unregister_chrdev(pseudo_lock_major, "pseudo_lock");
1094*7168ae33SJames Morse return ret;
1095*7168ae33SJames Morse }
1096*7168ae33SJames Morse
1097*7168ae33SJames Morse return 0;
1098*7168ae33SJames Morse }
1099*7168ae33SJames Morse
rdt_pseudo_lock_release(void)1100*7168ae33SJames Morse void rdt_pseudo_lock_release(void)
1101*7168ae33SJames Morse {
1102*7168ae33SJames Morse class_unregister(&pseudo_lock_class);
1103*7168ae33SJames Morse unregister_chrdev(pseudo_lock_major, "pseudo_lock");
1104*7168ae33SJames Morse pseudo_lock_major = 0;
1105*7168ae33SJames Morse }
1106