xref: /linux/arch/x86/kernel/cpu/resctrl/intel_aet.c (revision f4e0cd80d3e7c31327459008b01d63804838a89d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Resource Director Technology(RDT)
4  * - Intel Application Energy Telemetry
5  *
6  * Copyright (C) 2025 Intel Corporation
7  *
8  * Author:
9  *    Tony Luck <tony.luck@intel.com>
10  */
11 
12 #define pr_fmt(fmt)   "resctrl: " fmt
13 
14 #include <linux/bits.h>
15 #include <linux/compiler_types.h>
16 #include <linux/container_of.h>
17 #include <linux/cpumask.h>
18 #include <linux/err.h>
19 #include <linux/errno.h>
20 #include <linux/gfp_types.h>
21 #include <linux/init.h>
22 #include <linux/intel_pmt_features.h>
23 #include <linux/intel_vsec.h>
24 #include <linux/io.h>
25 #include <linux/printk.h>
26 #include <linux/rculist.h>
27 #include <linux/rcupdate.h>
28 #include <linux/resctrl.h>
29 #include <linux/resctrl_types.h>
30 #include <linux/slab.h>
31 #include <linux/stddef.h>
32 #include <linux/topology.h>
33 #include <linux/types.h>
34 
35 #include "internal.h"
36 
37 /**
38  * struct pmt_event - Telemetry event.
39  * @id:		Resctrl event id.
40  * @idx:	Counter index within each per-RMID block of counters.
41  * @bin_bits:	Zero for integer valued events, else number bits in fraction
42  *		part of fixed-point.
43  */
44 struct pmt_event {
45 	enum resctrl_event_id	id;
46 	unsigned int		idx;
47 	unsigned int		bin_bits;
48 };
49 
50 #define EVT(_id, _idx, _bits) { .id = _id, .idx = _idx, .bin_bits = _bits }
51 
52 /**
53  * struct event_group - Events with the same feature type ("energy" or "perf") and GUID.
54  * @pfname:		PMT feature name ("energy" or "perf") of this event group.
55  * @pfg:		Points to the aggregated telemetry space information
56  *			returned by the intel_pmt_get_regions_by_feature()
57  *			call to the INTEL_PMT_TELEMETRY driver that contains
58  *			data for all telemetry regions of type @pfname.
59  *			Valid if the system supports the event group,
60  *			NULL otherwise.
61  * @guid:		Unique number per XML description file.
62  * @mmio_size:		Number of bytes of MMIO registers for this group.
63  * @num_events:		Number of events in this group.
64  * @evts:		Array of event descriptors.
65  */
66 struct event_group {
67 	/* Data fields for additional structures to manage this group. */
68 	const char			*pfname;
69 	struct pmt_feature_group	*pfg;
70 
71 	/* Remaining fields initialized from XML file. */
72 	u32				guid;
73 	size_t				mmio_size;
74 	unsigned int			num_events;
75 	struct pmt_event		evts[] __counted_by(num_events);
76 };
77 
78 #define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status) \
79 		      (((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64))
80 
81 /*
82  * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml
83  */
84 static struct event_group energy_0x26696143 = {
85 	.pfname		= "energy",
86 	.guid		= 0x26696143,
87 	.mmio_size	= XML_MMIO_SIZE(576, 2, 3),
88 	.num_events	= 2,
89 	.evts		= {
90 		EVT(PMT_EVENT_ENERGY, 0, 18),
91 		EVT(PMT_EVENT_ACTIVITY, 1, 18),
92 	}
93 };
94 
95 /*
96  * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml
97  */
98 static struct event_group perf_0x26557651 = {
99 	.pfname		= "perf",
100 	.guid		= 0x26557651,
101 	.mmio_size	= XML_MMIO_SIZE(576, 7, 3),
102 	.num_events	= 7,
103 	.evts		= {
104 		EVT(PMT_EVENT_STALLS_LLC_HIT, 0, 0),
105 		EVT(PMT_EVENT_C1_RES, 1, 0),
106 		EVT(PMT_EVENT_UNHALTED_CORE_CYCLES, 2, 0),
107 		EVT(PMT_EVENT_STALLS_LLC_MISS, 3, 0),
108 		EVT(PMT_EVENT_AUTO_C6_RES, 4, 0),
109 		EVT(PMT_EVENT_UNHALTED_REF_CYCLES, 5, 0),
110 		EVT(PMT_EVENT_UOPS_RETIRED, 6, 0),
111 	}
112 };
113 
114 static struct event_group *known_event_groups[] = {
115 	&energy_0x26696143,
116 	&perf_0x26557651,
117 };
118 
119 #define for_each_event_group(_peg)						\
120 	for (_peg = known_event_groups;						\
121 	     _peg < &known_event_groups[ARRAY_SIZE(known_event_groups)];	\
122 	     _peg++)
123 
124 static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e)
125 {
126 	if (tr->guid != e->guid)
127 		return true;
128 	if (tr->plat_info.package_id >= topology_max_packages()) {
129 		pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id,
130 			tr->guid);
131 		return true;
132 	}
133 	if (tr->size != e->mmio_size) {
134 		pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n",
135 			tr->size, e->guid, e->mmio_size);
136 		return true;
137 	}
138 
139 	return false;
140 }
141 
142 static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p)
143 {
144 	bool usable_regions = false;
145 
146 	for (int i = 0; i < p->count; i++) {
147 		if (skip_telem_region(&p->regions[i], e)) {
148 			/*
149 			 * Clear the address field of regions that did not pass the checks in
150 			 * skip_telem_region() so they will not be used by intel_aet_read_event().
151 			 * This is safe to do because intel_pmt_get_regions_by_feature() allocates
152 			 * a new pmt_feature_group structure to return to each caller and only makes
153 			 * use of the pmt_feature_group::kref field when intel_pmt_put_feature_group()
154 			 * returns the structure.
155 			 */
156 			p->regions[i].addr = NULL;
157 
158 			continue;
159 		}
160 		usable_regions = true;
161 	}
162 
163 	return usable_regions;
164 }
165 
166 static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
167 {
168 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl;
169 	int skipped_events = 0;
170 
171 	if (!group_has_usable_regions(e, p))
172 		return false;
173 
174 	for (int j = 0; j < e->num_events; j++) {
175 		if (!resctrl_enable_mon_event(e->evts[j].id, true,
176 					      e->evts[j].bin_bits, &e->evts[j]))
177 			skipped_events++;
178 	}
179 	if (e->num_events == skipped_events) {
180 		pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid);
181 		return false;
182 	}
183 
184 	return true;
185 }
186 
187 static enum pmt_feature_id lookup_pfid(const char *pfname)
188 {
189 	if (!strcmp(pfname, "energy"))
190 		return FEATURE_PER_RMID_ENERGY_TELEM;
191 	else if (!strcmp(pfname, "perf"))
192 		return FEATURE_PER_RMID_PERF_TELEM;
193 
194 	pr_warn("Unknown PMT feature name '%s'\n", pfname);
195 
196 	return FEATURE_INVALID;
197 }
198 
199 /*
200  * Request a copy of struct pmt_feature_group for each event group. If there is
201  * one, the returned structure has an array of telemetry_region structures,
202  * each element of the array describes one telemetry aggregator. The
203  * telemetry aggregators may have different GUIDs so obtain duplicate struct
204  * pmt_feature_group for event groups with same feature type but different
205  * GUID. Post-processing ensures an event group can only use the telemetry
206  * aggregators that match its GUID. An event group keeps a pointer to its
207  * struct pmt_feature_group to indicate that its events are successfully
208  * enabled.
209  */
210 bool intel_aet_get_events(void)
211 {
212 	struct pmt_feature_group *p;
213 	enum pmt_feature_id pfid;
214 	struct event_group **peg;
215 	bool ret = false;
216 
217 	for_each_event_group(peg) {
218 		pfid = lookup_pfid((*peg)->pfname);
219 		p = intel_pmt_get_regions_by_feature(pfid);
220 		if (IS_ERR_OR_NULL(p))
221 			continue;
222 		if (enable_events(*peg, p)) {
223 			(*peg)->pfg = p;
224 			ret = true;
225 		} else {
226 			intel_pmt_put_feature_group(p);
227 		}
228 	}
229 
230 	return ret;
231 }
232 
233 void __exit intel_aet_exit(void)
234 {
235 	struct event_group **peg;
236 
237 	for_each_event_group(peg) {
238 		if ((*peg)->pfg) {
239 			intel_pmt_put_feature_group((*peg)->pfg);
240 			(*peg)->pfg = NULL;
241 		}
242 	}
243 }
244 
245 #define DATA_VALID	BIT_ULL(63)
246 #define DATA_BITS	GENMASK_ULL(62, 0)
247 
248 /*
249  * Read counter for an event on a domain (summing all aggregators on the
250  * domain). If an aggregator hasn't received any data for a specific RMID,
251  * the MMIO read indicates that data is not valid.  Return success if at
252  * least one aggregator has valid data.
253  */
254 int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val)
255 {
256 	struct pmt_event *pevt = arch_priv;
257 	struct event_group *e;
258 	bool valid = false;
259 	u64 total = 0;
260 	u64 evtcount;
261 	void *pevt0;
262 	u32 idx;
263 
264 	pevt0 = pevt - pevt->idx;
265 	e = container_of(pevt0, struct event_group, evts);
266 	idx = rmid * e->num_events;
267 	idx += pevt->idx;
268 
269 	if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) {
270 		pr_warn_once("MMIO index %u out of range\n", idx);
271 		return -EIO;
272 	}
273 
274 	for (int i = 0; i < e->pfg->count; i++) {
275 		if (!e->pfg->regions[i].addr)
276 			continue;
277 		if (e->pfg->regions[i].plat_info.package_id != domid)
278 			continue;
279 		evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64));
280 		if (!(evtcount & DATA_VALID))
281 			continue;
282 		total += evtcount & DATA_BITS;
283 		valid = true;
284 	}
285 
286 	if (valid)
287 		*val = total;
288 
289 	return valid ? 0 : -EINVAL;
290 }
291 
292 void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
293 				struct list_head *add_pos)
294 {
295 	struct rdt_perf_pkg_mon_domain *d;
296 	int err;
297 
298 	d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
299 	if (!d)
300 		return;
301 
302 	d->hdr.id = id;
303 	d->hdr.type = RESCTRL_MON_DOMAIN;
304 	d->hdr.rid = RDT_RESOURCE_PERF_PKG;
305 	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
306 	list_add_tail_rcu(&d->hdr.list, add_pos);
307 
308 	err = resctrl_online_mon_domain(r, &d->hdr);
309 	if (err) {
310 		list_del_rcu(&d->hdr.list);
311 		synchronize_rcu();
312 		kfree(d);
313 	}
314 }
315