xref: /linux/drivers/iommu/intel/perfmon.c (revision add452d09a38c7a7c44aea55c1015392cebf9fa7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Support Intel IOMMU PerfMon
4  * Copyright(c) 2023 Intel Corporation.
5  */
6 #define pr_fmt(fmt)	"DMAR: " fmt
7 #define dev_fmt(fmt)	pr_fmt(fmt)
8 
9 #include <linux/dmar.h>
10 #include "iommu.h"
11 #include "perfmon.h"
12 
13 PMU_FORMAT_ATTR(event,		"config:0-27");		/* ES: Events Select */
14 PMU_FORMAT_ATTR(event_group,	"config:28-31");	/* EGI: Event Group Index */
15 
16 static struct attribute *iommu_pmu_format_attrs[] = {
17 	&format_attr_event_group.attr,
18 	&format_attr_event.attr,
19 	NULL
20 };
21 
22 static struct attribute_group iommu_pmu_format_attr_group = {
23 	.name = "format",
24 	.attrs = iommu_pmu_format_attrs,
25 };
26 
27 /* The available events are added in attr_update later */
28 static struct attribute *attrs_empty[] = {
29 	NULL
30 };
31 
32 static struct attribute_group iommu_pmu_events_attr_group = {
33 	.name = "events",
34 	.attrs = attrs_empty,
35 };
36 
37 static const struct attribute_group *iommu_pmu_attr_groups[] = {
38 	&iommu_pmu_format_attr_group,
39 	&iommu_pmu_events_attr_group,
40 	NULL
41 };
42 
43 static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev)
44 {
45 	/*
46 	 * The perf_event creates its own dev for each PMU.
47 	 * See pmu_dev_alloc()
48 	 */
49 	return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu);
50 }
51 
52 #define IOMMU_PMU_ATTR(_name, _format, _filter)				\
53 	PMU_FORMAT_ATTR(_name, _format);				\
54 									\
55 static struct attribute *_name##_attr[] = {				\
56 	&format_attr_##_name.attr,					\
57 	NULL								\
58 };									\
59 									\
60 static umode_t								\
61 _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i)	\
62 {									\
63 	struct device *dev = kobj_to_dev(kobj);				\
64 	struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev);		\
65 									\
66 	if (!iommu_pmu)							\
67 		return 0;						\
68 	return (iommu_pmu->filter & _filter) ? attr->mode : 0;		\
69 }									\
70 									\
71 static struct attribute_group _name = {					\
72 	.name		= "format",					\
73 	.attrs		= _name##_attr,					\
74 	.is_visible	= _name##_is_visible,				\
75 };
76 
77 IOMMU_PMU_ATTR(filter_requester_id_en,	"config1:0",		IOMMU_PMU_FILTER_REQUESTER_ID);
78 IOMMU_PMU_ATTR(filter_domain_en,	"config1:1",		IOMMU_PMU_FILTER_DOMAIN);
79 IOMMU_PMU_ATTR(filter_pasid_en,		"config1:2",		IOMMU_PMU_FILTER_PASID);
80 IOMMU_PMU_ATTR(filter_ats_en,		"config1:3",		IOMMU_PMU_FILTER_ATS);
81 IOMMU_PMU_ATTR(filter_page_table_en,	"config1:4",		IOMMU_PMU_FILTER_PAGE_TABLE);
82 IOMMU_PMU_ATTR(filter_requester_id,	"config1:16-31",	IOMMU_PMU_FILTER_REQUESTER_ID);
83 IOMMU_PMU_ATTR(filter_domain,		"config1:32-47",	IOMMU_PMU_FILTER_DOMAIN);
84 IOMMU_PMU_ATTR(filter_pasid,		"config2:0-21",		IOMMU_PMU_FILTER_PASID);
85 IOMMU_PMU_ATTR(filter_ats,		"config2:24-28",	IOMMU_PMU_FILTER_ATS);
86 IOMMU_PMU_ATTR(filter_page_table,	"config2:32-36",	IOMMU_PMU_FILTER_PAGE_TABLE);
87 
88 #define iommu_pmu_en_requester_id(e)		((e) & 0x1)
89 #define iommu_pmu_en_domain(e)			(((e) >> 1) & 0x1)
90 #define iommu_pmu_en_pasid(e)			(((e) >> 2) & 0x1)
91 #define iommu_pmu_en_ats(e)			(((e) >> 3) & 0x1)
92 #define iommu_pmu_en_page_table(e)		(((e) >> 4) & 0x1)
93 #define iommu_pmu_get_requester_id(filter)	(((filter) >> 16) & 0xffff)
94 #define iommu_pmu_get_domain(filter)		(((filter) >> 32) & 0xffff)
95 #define iommu_pmu_get_pasid(filter)		((filter) & 0x3fffff)
96 #define iommu_pmu_get_ats(filter)		(((filter) >> 24) & 0x1f)
97 #define iommu_pmu_get_page_table(filter)	(((filter) >> 32) & 0x1f)
98 
99 #define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig)		\
100 {										\
101 	if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) {	\
102 		dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET +	\
103 			    IOMMU_PMU_CFG_SIZE +				\
104 			    (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET,	\
105 			    iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\
106 	}									\
107 }
108 
109 #define iommu_pmu_clear_filter(_filter, _idx)					\
110 {										\
111 	if (iommu_pmu->filter & _filter) {					\
112 		dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET +	\
113 			    IOMMU_PMU_CFG_SIZE +				\
114 			    (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET,	\
115 			    0);							\
116 	}									\
117 }
118 
119 /*
120  * Define the event attr related functions
121  * Input: _name: event attr name
122  *        _string: string of the event in sysfs
123  *        _g_idx: event group encoding
124  *        _event: event encoding
125  */
126 #define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event)			\
127 	PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string)		\
128 										\
129 static struct attribute *_name##_attr[] = {					\
130 	&event_attr_##_name.attr.attr,						\
131 	NULL									\
132 };										\
133 										\
134 static umode_t									\
135 _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i)		\
136 {										\
137 	struct device *dev = kobj_to_dev(kobj);					\
138 	struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev);			\
139 										\
140 	if (!iommu_pmu)								\
141 		return 0;							\
142 	return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0;		\
143 }										\
144 										\
145 static struct attribute_group _name = {						\
146 	.name		= "events",						\
147 	.attrs		= _name##_attr,						\
148 	.is_visible	= _name##_is_visible,					\
149 };
150 
151 IOMMU_PMU_EVENT_ATTR(iommu_clocks,		"event_group=0x0,event=0x001", 0x0, 0x001)
152 IOMMU_PMU_EVENT_ATTR(iommu_requests,		"event_group=0x0,event=0x002", 0x0, 0x002)
153 IOMMU_PMU_EVENT_ATTR(pw_occupancy,		"event_group=0x0,event=0x004", 0x0, 0x004)
154 IOMMU_PMU_EVENT_ATTR(ats_blocked,		"event_group=0x0,event=0x008", 0x0, 0x008)
155 IOMMU_PMU_EVENT_ATTR(iommu_mrds,		"event_group=0x1,event=0x001", 0x1, 0x001)
156 IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked,		"event_group=0x1,event=0x020", 0x1, 0x020)
157 IOMMU_PMU_EVENT_ATTR(pg_req_posted,		"event_group=0x1,event=0x040", 0x1, 0x040)
158 IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup,		"event_group=0x2,event=0x001", 0x2, 0x001)
159 IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit,		"event_group=0x2,event=0x002", 0x2, 0x002)
160 IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup,	"event_group=0x2,event=0x004", 0x2, 0x004)
161 IOMMU_PMU_EVENT_ATTR(pasid_cache_hit,		"event_group=0x2,event=0x008", 0x2, 0x008)
162 IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup,		"event_group=0x2,event=0x010", 0x2, 0x010)
163 IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit,		"event_group=0x2,event=0x020", 0x2, 0x020)
164 IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup,		"event_group=0x2,event=0x040", 0x2, 0x040)
165 IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit,		"event_group=0x2,event=0x080", 0x2, 0x080)
166 IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup,	"event_group=0x2,event=0x100", 0x2, 0x100)
167 IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit,		"event_group=0x2,event=0x200", 0x2, 0x200)
168 IOMMU_PMU_EVENT_ATTR(iotlb_lookup,		"event_group=0x3,event=0x001", 0x3, 0x001)
169 IOMMU_PMU_EVENT_ATTR(iotlb_hit,			"event_group=0x3,event=0x002", 0x3, 0x002)
170 IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup,		"event_group=0x3,event=0x004", 0x3, 0x004)
171 IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit,		"event_group=0x3,event=0x008", 0x3, 0x008)
172 IOMMU_PMU_EVENT_ATTR(int_cache_lookup,		"event_group=0x4,event=0x001", 0x4, 0x001)
173 IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted,	"event_group=0x4,event=0x002", 0x4, 0x002)
174 IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted,	"event_group=0x4,event=0x004", 0x4, 0x004)
175 
176 static const struct attribute_group *iommu_pmu_attr_update[] = {
177 	&filter_requester_id_en,
178 	&filter_domain_en,
179 	&filter_pasid_en,
180 	&filter_ats_en,
181 	&filter_page_table_en,
182 	&filter_requester_id,
183 	&filter_domain,
184 	&filter_pasid,
185 	&filter_ats,
186 	&filter_page_table,
187 	&iommu_clocks,
188 	&iommu_requests,
189 	&pw_occupancy,
190 	&ats_blocked,
191 	&iommu_mrds,
192 	&iommu_mem_blocked,
193 	&pg_req_posted,
194 	&ctxt_cache_lookup,
195 	&ctxt_cache_hit,
196 	&pasid_cache_lookup,
197 	&pasid_cache_hit,
198 	&ss_nonleaf_lookup,
199 	&ss_nonleaf_hit,
200 	&fs_nonleaf_lookup,
201 	&fs_nonleaf_hit,
202 	&hpt_nonleaf_lookup,
203 	&hpt_nonleaf_hit,
204 	&iotlb_lookup,
205 	&iotlb_hit,
206 	&hpt_leaf_lookup,
207 	&hpt_leaf_hit,
208 	&int_cache_lookup,
209 	&int_cache_hit_nonposted,
210 	&int_cache_hit_posted,
211 	NULL
212 };
213 
214 static inline void __iomem *
215 iommu_event_base(struct iommu_pmu *iommu_pmu, int idx)
216 {
217 	return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride;
218 }
219 
220 static inline void __iomem *
221 iommu_config_base(struct iommu_pmu *iommu_pmu, int idx)
222 {
223 	return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET;
224 }
225 
226 static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event)
227 {
228 	return container_of(event->pmu, struct iommu_pmu, pmu);
229 }
230 
231 static inline u64 iommu_event_config(struct perf_event *event)
232 {
233 	u64 config = event->attr.config;
234 
235 	return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) |
236 	       (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) |
237 	       IOMMU_EVENT_CFG_INT;
238 }
239 
240 static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu,
241 				      struct perf_event *event)
242 {
243 	return event->pmu == &iommu_pmu->pmu;
244 }
245 
246 static int iommu_pmu_validate_event(struct perf_event *event)
247 {
248 	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
249 	u32 event_group = iommu_event_group(event->attr.config);
250 
251 	if (event_group >= iommu_pmu->num_eg)
252 		return -EINVAL;
253 
254 	return 0;
255 }
256 
257 static int iommu_pmu_validate_group(struct perf_event *event)
258 {
259 	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
260 	struct perf_event *sibling;
261 	int nr = 0;
262 
263 	/*
264 	 * All events in a group must be scheduled simultaneously.
265 	 * Check whether there is enough counters for all the events.
266 	 */
267 	for_each_sibling_event(sibling, event->group_leader) {
268 		if (!is_iommu_pmu_event(iommu_pmu, sibling) ||
269 		    sibling->state <= PERF_EVENT_STATE_OFF)
270 			continue;
271 
272 		if (++nr > iommu_pmu->num_cntr)
273 			return -EINVAL;
274 	}
275 
276 	return 0;
277 }
278 
279 static int iommu_pmu_event_init(struct perf_event *event)
280 {
281 	struct hw_perf_event *hwc = &event->hw;
282 
283 	if (event->attr.type != event->pmu->type)
284 		return -ENOENT;
285 
286 	/* sampling not supported */
287 	if (event->attr.sample_period)
288 		return -EINVAL;
289 
290 	if (event->cpu < 0)
291 		return -EINVAL;
292 
293 	if (iommu_pmu_validate_event(event))
294 		return -EINVAL;
295 
296 	hwc->config = iommu_event_config(event);
297 
298 	return iommu_pmu_validate_group(event);
299 }
300 
301 static void iommu_pmu_event_update(struct perf_event *event)
302 {
303 	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
304 	struct hw_perf_event *hwc = &event->hw;
305 	u64 prev_count, new_count, delta;
306 	int shift = 64 - iommu_pmu->cntr_width;
307 
308 again:
309 	prev_count = local64_read(&hwc->prev_count);
310 	new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
311 	if (local64_xchg(&hwc->prev_count, new_count) != prev_count)
312 		goto again;
313 
314 	/*
315 	 * The counter width is enumerated. Always shift the counter
316 	 * before using it.
317 	 */
318 	delta = (new_count << shift) - (prev_count << shift);
319 	delta >>= shift;
320 
321 	local64_add(delta, &event->count);
322 }
323 
324 static void iommu_pmu_start(struct perf_event *event, int flags)
325 {
326 	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
327 	struct intel_iommu *iommu = iommu_pmu->iommu;
328 	struct hw_perf_event *hwc = &event->hw;
329 	u64 count;
330 
331 	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
332 		return;
333 
334 	if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX))
335 		return;
336 
337 	if (flags & PERF_EF_RELOAD)
338 		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
339 
340 	hwc->state = 0;
341 
342 	/* Always reprogram the period */
343 	count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
344 	local64_set((&hwc->prev_count), count);
345 
346 	/*
347 	 * The error of ecmd will be ignored.
348 	 * - The existing perf_event subsystem doesn't handle the error.
349 	 *   Only IOMMU PMU returns runtime HW error. We don't want to
350 	 *   change the existing generic interfaces for the specific case.
351 	 * - It's a corner case caused by HW, which is very unlikely to
352 	 *   happen. There is nothing SW can do.
353 	 * - The worst case is that the user will get <not count> with
354 	 *   perf command, which can give the user some hints.
355 	 */
356 	ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0);
357 
358 	perf_event_update_userpage(event);
359 }
360 
361 static void iommu_pmu_stop(struct perf_event *event, int flags)
362 {
363 	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
364 	struct intel_iommu *iommu = iommu_pmu->iommu;
365 	struct hw_perf_event *hwc = &event->hw;
366 
367 	if (!(hwc->state & PERF_HES_STOPPED)) {
368 		ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0);
369 
370 		iommu_pmu_event_update(event);
371 
372 		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
373 	}
374 }
375 
376 static inline int
377 iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu,
378 				  int idx, struct perf_event *event)
379 {
380 	u32 event_group = iommu_event_group(event->attr.config);
381 	u32 select = iommu_event_select(event->attr.config);
382 
383 	if (!(iommu_pmu->cntr_evcap[idx][event_group] & select))
384 		return -EINVAL;
385 
386 	return 0;
387 }
388 
389 static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
390 				  struct perf_event *event)
391 {
392 	struct hw_perf_event *hwc = &event->hw;
393 	int idx;
394 
395 	/*
396 	 * The counters which support limited events are usually at the end.
397 	 * Schedule them first to accommodate more events.
398 	 */
399 	for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) {
400 		if (test_and_set_bit(idx, iommu_pmu->used_mask))
401 			continue;
402 		/* Check per-counter event capabilities */
403 		if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event))
404 			break;
405 		clear_bit(idx, iommu_pmu->used_mask);
406 	}
407 	if (idx < 0)
408 		return -EINVAL;
409 
410 	iommu_pmu->event_list[idx] = event;
411 	hwc->idx = idx;
412 
413 	/* config events */
414 	dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config);
415 
416 	iommu_pmu_set_filter(requester_id, event->attr.config1,
417 			     IOMMU_PMU_FILTER_REQUESTER_ID, idx,
418 			     event->attr.config1);
419 	iommu_pmu_set_filter(domain, event->attr.config1,
420 			     IOMMU_PMU_FILTER_DOMAIN, idx,
421 			     event->attr.config1);
422 	iommu_pmu_set_filter(pasid, event->attr.config2,
423 			     IOMMU_PMU_FILTER_PASID, idx,
424 			     event->attr.config1);
425 	iommu_pmu_set_filter(ats, event->attr.config2,
426 			     IOMMU_PMU_FILTER_ATS, idx,
427 			     event->attr.config1);
428 	iommu_pmu_set_filter(page_table, event->attr.config2,
429 			     IOMMU_PMU_FILTER_PAGE_TABLE, idx,
430 			     event->attr.config1);
431 
432 	return 0;
433 }
434 
435 static int iommu_pmu_add(struct perf_event *event, int flags)
436 {
437 	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
438 	struct hw_perf_event *hwc = &event->hw;
439 	int ret;
440 
441 	ret = iommu_pmu_assign_event(iommu_pmu, event);
442 	if (ret < 0)
443 		return ret;
444 
445 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
446 
447 	if (flags & PERF_EF_START)
448 		iommu_pmu_start(event, 0);
449 
450 	return 0;
451 }
452 
453 static void iommu_pmu_del(struct perf_event *event, int flags)
454 {
455 	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
456 	int idx = event->hw.idx;
457 
458 	iommu_pmu_stop(event, PERF_EF_UPDATE);
459 
460 	iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx);
461 	iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx);
462 	iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx);
463 	iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx);
464 	iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx);
465 
466 	iommu_pmu->event_list[idx] = NULL;
467 	event->hw.idx = -1;
468 	clear_bit(idx, iommu_pmu->used_mask);
469 
470 	perf_event_update_userpage(event);
471 }
472 
473 static void iommu_pmu_enable(struct pmu *pmu)
474 {
475 	struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
476 	struct intel_iommu *iommu = iommu_pmu->iommu;
477 
478 	ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0);
479 }
480 
481 static void iommu_pmu_disable(struct pmu *pmu)
482 {
483 	struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
484 	struct intel_iommu *iommu = iommu_pmu->iommu;
485 
486 	ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0);
487 }
488 
489 static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu)
490 {
491 	struct perf_event *event;
492 	u64 status;
493 	int i;
494 
495 	/*
496 	 * Two counters may be overflowed very close. Always check
497 	 * whether there are more to handle.
498 	 */
499 	while ((status = dmar_readq(iommu_pmu->overflow))) {
500 		for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) {
501 			/*
502 			 * Find the assigned event of the counter.
503 			 * Accumulate the value into the event->count.
504 			 */
505 			event = iommu_pmu->event_list[i];
506 			if (!event) {
507 				pr_warn_once("Cannot find the assigned event for counter %d\n", i);
508 				continue;
509 			}
510 			iommu_pmu_event_update(event);
511 		}
512 
513 		dmar_writeq(iommu_pmu->overflow, status);
514 	}
515 }
516 
517 static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id)
518 {
519 	struct intel_iommu *iommu = dev_id;
520 
521 	if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG))
522 		return IRQ_NONE;
523 
524 	iommu_pmu_counter_overflow(iommu->pmu);
525 
526 	/* Clear the status bit */
527 	dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS);
528 
529 	return IRQ_HANDLED;
530 }
531 
532 static int __iommu_pmu_register(struct intel_iommu *iommu)
533 {
534 	struct iommu_pmu *iommu_pmu = iommu->pmu;
535 
536 	iommu_pmu->pmu.name		= iommu->name;
537 	iommu_pmu->pmu.task_ctx_nr	= perf_invalid_context;
538 	iommu_pmu->pmu.event_init	= iommu_pmu_event_init;
539 	iommu_pmu->pmu.pmu_enable	= iommu_pmu_enable;
540 	iommu_pmu->pmu.pmu_disable	= iommu_pmu_disable;
541 	iommu_pmu->pmu.add		= iommu_pmu_add;
542 	iommu_pmu->pmu.del		= iommu_pmu_del;
543 	iommu_pmu->pmu.start		= iommu_pmu_start;
544 	iommu_pmu->pmu.stop		= iommu_pmu_stop;
545 	iommu_pmu->pmu.read		= iommu_pmu_event_update;
546 	iommu_pmu->pmu.attr_groups	= iommu_pmu_attr_groups;
547 	iommu_pmu->pmu.attr_update	= iommu_pmu_attr_update;
548 	iommu_pmu->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
549 	iommu_pmu->pmu.scope		= PERF_PMU_SCOPE_SYS_WIDE;
550 	iommu_pmu->pmu.module		= THIS_MODULE;
551 
552 	return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1);
553 }
554 
555 static inline void __iomem *
556 get_perf_reg_address(struct intel_iommu *iommu, u32 offset)
557 {
558 	u32 off = dmar_readl(iommu->reg + offset);
559 
560 	return iommu->reg + off;
561 }
562 
563 int alloc_iommu_pmu(struct intel_iommu *iommu)
564 {
565 	struct iommu_pmu *iommu_pmu;
566 	int i, j, ret;
567 	u64 perfcap;
568 	u32 cap;
569 
570 	if (!ecap_pms(iommu->ecap))
571 		return 0;
572 
573 	/* The IOMMU PMU requires the ECMD support as well */
574 	if (!cap_ecmds(iommu->cap))
575 		return -ENODEV;
576 
577 	perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG);
578 	/* The performance monitoring is not supported. */
579 	if (!perfcap)
580 		return -ENODEV;
581 
582 	/* Sanity check for the number of the counters and event groups */
583 	if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap))
584 		return -ENODEV;
585 
586 	/* The interrupt on overflow is required */
587 	if (!pcap_interrupt(perfcap))
588 		return -ENODEV;
589 
590 	/* Check required Enhanced Command Capability */
591 	if (!ecmd_has_pmu_essential(iommu))
592 		return -ENODEV;
593 
594 	iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL);
595 	if (!iommu_pmu)
596 		return -ENOMEM;
597 
598 	iommu_pmu->num_cntr = pcap_num_cntr(perfcap);
599 	if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) {
600 		pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!",
601 			     iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX);
602 		iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX;
603 	}
604 
605 	iommu_pmu->cntr_width = pcap_cntr_width(perfcap);
606 	iommu_pmu->filter = pcap_filters_mask(perfcap);
607 	iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap);
608 	iommu_pmu->num_eg = pcap_num_event_group(perfcap);
609 
610 	iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL);
611 	if (!iommu_pmu->evcap) {
612 		ret = -ENOMEM;
613 		goto free_pmu;
614 	}
615 
616 	/* Parse event group capabilities */
617 	for (i = 0; i < iommu_pmu->num_eg; i++) {
618 		u64 pcap;
619 
620 		pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG +
621 				  i * IOMMU_PMU_CAP_REGS_STEP);
622 		iommu_pmu->evcap[i] = pecap_es(pcap);
623 	}
624 
625 	iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL);
626 	if (!iommu_pmu->cntr_evcap) {
627 		ret = -ENOMEM;
628 		goto free_pmu_evcap;
629 	}
630 	for (i = 0; i < iommu_pmu->num_cntr; i++) {
631 		iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL);
632 		if (!iommu_pmu->cntr_evcap[i]) {
633 			ret = -ENOMEM;
634 			goto free_pmu_cntr_evcap;
635 		}
636 		/*
637 		 * Set to the global capabilities, will adjust according
638 		 * to per-counter capabilities later.
639 		 */
640 		for (j = 0; j < iommu_pmu->num_eg; j++)
641 			iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j];
642 	}
643 
644 	iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG);
645 	iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG);
646 	iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG);
647 
648 	/*
649 	 * Check per-counter capabilities. All counters should have the
650 	 * same capabilities on Interrupt on Overflow Support and Counter
651 	 * Width.
652 	 */
653 	for (i = 0; i < iommu_pmu->num_cntr; i++) {
654 		cap = dmar_readl(iommu_pmu->cfg_reg +
655 				 i * IOMMU_PMU_CFG_OFFSET +
656 				 IOMMU_PMU_CFG_CNTRCAP_OFFSET);
657 		if (!iommu_cntrcap_pcc(cap))
658 			continue;
659 
660 		/*
661 		 * It's possible that some counters have a different
662 		 * capability because of e.g., HW bug. Check the corner
663 		 * case here and simply drop those counters.
664 		 */
665 		if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) ||
666 		    !iommu_cntrcap_ios(cap)) {
667 			iommu_pmu->num_cntr = i;
668 			pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n",
669 				iommu_pmu->num_cntr);
670 		}
671 
672 		/* Clear the pre-defined events group */
673 		for (j = 0; j < iommu_pmu->num_eg; j++)
674 			iommu_pmu->cntr_evcap[i][j] = 0;
675 
676 		/* Override with per-counter event capabilities */
677 		for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) {
678 			cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET +
679 					 IOMMU_PMU_CFG_CNTREVCAP_OFFSET +
680 					 (j * IOMMU_PMU_OFF_REGS_STEP));
681 			iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap);
682 			/*
683 			 * Some events may only be supported by a specific counter.
684 			 * Track them in the evcap as well.
685 			 */
686 			iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap);
687 		}
688 	}
689 
690 	iommu_pmu->iommu = iommu;
691 	iommu->pmu = iommu_pmu;
692 
693 	return 0;
694 
695 free_pmu_cntr_evcap:
696 	for (i = 0; i < iommu_pmu->num_cntr; i++)
697 		kfree(iommu_pmu->cntr_evcap[i]);
698 	kfree(iommu_pmu->cntr_evcap);
699 free_pmu_evcap:
700 	kfree(iommu_pmu->evcap);
701 free_pmu:
702 	kfree(iommu_pmu);
703 
704 	return ret;
705 }
706 
707 void free_iommu_pmu(struct intel_iommu *iommu)
708 {
709 	struct iommu_pmu *iommu_pmu = iommu->pmu;
710 
711 	if (!iommu_pmu)
712 		return;
713 
714 	if (iommu_pmu->evcap) {
715 		int i;
716 
717 		for (i = 0; i < iommu_pmu->num_cntr; i++)
718 			kfree(iommu_pmu->cntr_evcap[i]);
719 		kfree(iommu_pmu->cntr_evcap);
720 	}
721 	kfree(iommu_pmu->evcap);
722 	kfree(iommu_pmu);
723 	iommu->pmu = NULL;
724 }
725 
726 static int iommu_pmu_set_interrupt(struct intel_iommu *iommu)
727 {
728 	struct iommu_pmu *iommu_pmu = iommu->pmu;
729 	int irq, ret;
730 
731 	irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu);
732 	if (irq <= 0)
733 		return -EINVAL;
734 
735 	snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id);
736 
737 	iommu->perf_irq = irq;
738 	ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler,
739 				   IRQF_ONESHOT, iommu_pmu->irq_name, iommu);
740 	if (ret) {
741 		dmar_free_hwirq(irq);
742 		iommu->perf_irq = 0;
743 		return ret;
744 	}
745 	return 0;
746 }
747 
748 static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
749 {
750 	if (!iommu->perf_irq)
751 		return;
752 
753 	free_irq(iommu->perf_irq, iommu);
754 	dmar_free_hwirq(iommu->perf_irq);
755 	iommu->perf_irq = 0;
756 }
757 
758 void iommu_pmu_register(struct intel_iommu *iommu)
759 {
760 	struct iommu_pmu *iommu_pmu = iommu->pmu;
761 
762 	if (!iommu_pmu)
763 		return;
764 
765 	if (__iommu_pmu_register(iommu))
766 		goto err;
767 
768 	/* Set interrupt for overflow */
769 	if (iommu_pmu_set_interrupt(iommu))
770 		goto unregister;
771 
772 	return;
773 
774 unregister:
775 	perf_pmu_unregister(&iommu_pmu->pmu);
776 err:
777 	pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
778 	free_iommu_pmu(iommu);
779 }
780 
781 void iommu_pmu_unregister(struct intel_iommu *iommu)
782 {
783 	struct iommu_pmu *iommu_pmu = iommu->pmu;
784 
785 	if (!iommu_pmu)
786 		return;
787 
788 	iommu_pmu_unset_interrupt(iommu);
789 	perf_pmu_unregister(&iommu_pmu->pmu);
790 }
791