xref: /linux/drivers/gpu/drm/xe/xe_pmu.c (revision 8cdcef1c2f82d207aa8b2a02298fbc17191c6261)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5 
6 #include <drm/drm_drv.h>
7 #include <drm/drm_managed.h>
8 #include <drm/xe_drm.h>
9 
10 #include "regs/xe_gt_regs.h"
11 #include "xe_device.h"
12 #include "xe_gt_clock.h"
13 #include "xe_mmio.h"
14 
15 static cpumask_t xe_pmu_cpumask;
16 static unsigned int xe_pmu_target_cpu = -1;
17 
18 static unsigned int config_gt_id(const u64 config)
19 {
20 	return config >> __DRM_XE_PMU_GT_SHIFT;
21 }
22 
23 static u64 config_counter(const u64 config)
24 {
25 	return config & ~(~0ULL << __DRM_XE_PMU_GT_SHIFT);
26 }
27 
28 static void xe_pmu_event_destroy(struct perf_event *event)
29 {
30 	struct xe_device *xe =
31 		container_of(event->pmu, typeof(*xe), pmu.base);
32 
33 	drm_WARN_ON(&xe->drm, event->parent);
34 
35 	drm_dev_put(&xe->drm);
36 }
37 
38 static u64 __engine_group_busyness_read(struct xe_gt *gt, int sample_type)
39 {
40 	u64 val;
41 
42 	switch (sample_type) {
43 	case __XE_SAMPLE_RENDER_GROUP_BUSY:
44 		val = xe_mmio_read32(gt, XE_OAG_RENDER_BUSY_FREE);
45 		break;
46 	case __XE_SAMPLE_COPY_GROUP_BUSY:
47 		val = xe_mmio_read32(gt, XE_OAG_BLT_BUSY_FREE);
48 		break;
49 	case __XE_SAMPLE_MEDIA_GROUP_BUSY:
50 		val = xe_mmio_read32(gt, XE_OAG_ANY_MEDIA_FF_BUSY_FREE);
51 		break;
52 	case __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY:
53 		val = xe_mmio_read32(gt, XE_OAG_RC0_ANY_ENGINE_BUSY_FREE);
54 		break;
55 	default:
56 		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
57 	}
58 
59 	return xe_gt_clock_cycles_to_ns(gt, val * 16);
60 }
61 
62 static u64 engine_group_busyness_read(struct xe_gt *gt, u64 config)
63 {
64 	int sample_type = config_counter(config);
65 	const unsigned int gt_id = gt->info.id;
66 	struct xe_device *xe = gt->tile->xe;
67 	struct xe_pmu *pmu = &xe->pmu;
68 	unsigned long flags;
69 	bool device_awake;
70 	u64 val;
71 
72 	device_awake = xe_device_mem_access_get_if_ongoing(xe);
73 	if (device_awake) {
74 		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
75 		val = __engine_group_busyness_read(gt, sample_type);
76 		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
77 		xe_device_mem_access_put(xe);
78 	}
79 
80 	spin_lock_irqsave(&pmu->lock, flags);
81 
82 	if (device_awake)
83 		pmu->sample[gt_id][sample_type] = val;
84 	else
85 		val = pmu->sample[gt_id][sample_type];
86 
87 	spin_unlock_irqrestore(&pmu->lock, flags);
88 
89 	return val;
90 }
91 
92 static void engine_group_busyness_store(struct xe_gt *gt)
93 {
94 	struct xe_pmu *pmu = &gt->tile->xe->pmu;
95 	unsigned int gt_id = gt->info.id;
96 	unsigned long flags;
97 	int i;
98 
99 	spin_lock_irqsave(&pmu->lock, flags);
100 
101 	for (i = __XE_SAMPLE_RENDER_GROUP_BUSY; i <= __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY; i++)
102 		pmu->sample[gt_id][i] = __engine_group_busyness_read(gt, i);
103 
104 	spin_unlock_irqrestore(&pmu->lock, flags);
105 }
106 
107 static int
108 config_status(struct xe_device *xe, u64 config)
109 {
110 	unsigned int gt_id = config_gt_id(config);
111 	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
112 
113 	if (gt_id >= XE_PMU_MAX_GT)
114 		return -ENOENT;
115 
116 	switch (config_counter(config)) {
117 	case DRM_XE_PMU_RENDER_GROUP_BUSY(0):
118 	case DRM_XE_PMU_COPY_GROUP_BUSY(0):
119 	case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
120 		if (gt->info.type == XE_GT_TYPE_MEDIA)
121 			return -ENOENT;
122 		break;
123 	case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
124 		if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0))))
125 			return -ENOENT;
126 		break;
127 	default:
128 		return -ENOENT;
129 	}
130 
131 	return 0;
132 }
133 
134 static int xe_pmu_event_init(struct perf_event *event)
135 {
136 	struct xe_device *xe =
137 		container_of(event->pmu, typeof(*xe), pmu.base);
138 	struct xe_pmu *pmu = &xe->pmu;
139 	int ret;
140 
141 	if (pmu->closed)
142 		return -ENODEV;
143 
144 	if (event->attr.type != event->pmu->type)
145 		return -ENOENT;
146 
147 	/* unsupported modes and filters */
148 	if (event->attr.sample_period) /* no sampling */
149 		return -EINVAL;
150 
151 	if (has_branch_stack(event))
152 		return -EOPNOTSUPP;
153 
154 	if (event->cpu < 0)
155 		return -EINVAL;
156 
157 	/* only allow running on one cpu at a time */
158 	if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask))
159 		return -EINVAL;
160 
161 	ret = config_status(xe, event->attr.config);
162 	if (ret)
163 		return ret;
164 
165 	if (!event->parent) {
166 		drm_dev_get(&xe->drm);
167 		event->destroy = xe_pmu_event_destroy;
168 	}
169 
170 	return 0;
171 }
172 
173 static u64 __xe_pmu_event_read(struct perf_event *event)
174 {
175 	struct xe_device *xe =
176 		container_of(event->pmu, typeof(*xe), pmu.base);
177 	const unsigned int gt_id = config_gt_id(event->attr.config);
178 	const u64 config = event->attr.config;
179 	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
180 	u64 val;
181 
182 	switch (config_counter(config)) {
183 	case DRM_XE_PMU_RENDER_GROUP_BUSY(0):
184 	case DRM_XE_PMU_COPY_GROUP_BUSY(0):
185 	case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
186 	case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
187 		val = engine_group_busyness_read(gt, config);
188 		break;
189 	default:
190 		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
191 	}
192 
193 	return val;
194 }
195 
196 static void xe_pmu_event_read(struct perf_event *event)
197 {
198 	struct xe_device *xe =
199 		container_of(event->pmu, typeof(*xe), pmu.base);
200 	struct hw_perf_event *hwc = &event->hw;
201 	struct xe_pmu *pmu = &xe->pmu;
202 	u64 prev, new;
203 
204 	if (pmu->closed) {
205 		event->hw.state = PERF_HES_STOPPED;
206 		return;
207 	}
208 again:
209 	prev = local64_read(&hwc->prev_count);
210 	new = __xe_pmu_event_read(event);
211 
212 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
213 		goto again;
214 
215 	local64_add(new - prev, &event->count);
216 }
217 
218 static void xe_pmu_enable(struct perf_event *event)
219 {
220 	/*
221 	 * Store the current counter value so we can report the correct delta
222 	 * for all listeners. Even when the event was already enabled and has
223 	 * an existing non-zero value.
224 	 */
225 	local64_set(&event->hw.prev_count, __xe_pmu_event_read(event));
226 }
227 
228 static void xe_pmu_event_start(struct perf_event *event, int flags)
229 {
230 	struct xe_device *xe =
231 		container_of(event->pmu, typeof(*xe), pmu.base);
232 	struct xe_pmu *pmu = &xe->pmu;
233 
234 	if (pmu->closed)
235 		return;
236 
237 	xe_pmu_enable(event);
238 	event->hw.state = 0;
239 }
240 
241 static void xe_pmu_event_stop(struct perf_event *event, int flags)
242 {
243 	if (flags & PERF_EF_UPDATE)
244 		xe_pmu_event_read(event);
245 
246 	event->hw.state = PERF_HES_STOPPED;
247 }
248 
249 static int xe_pmu_event_add(struct perf_event *event, int flags)
250 {
251 	struct xe_device *xe =
252 		container_of(event->pmu, typeof(*xe), pmu.base);
253 	struct xe_pmu *pmu = &xe->pmu;
254 
255 	if (pmu->closed)
256 		return -ENODEV;
257 
258 	if (flags & PERF_EF_START)
259 		xe_pmu_event_start(event, flags);
260 
261 	return 0;
262 }
263 
264 static void xe_pmu_event_del(struct perf_event *event, int flags)
265 {
266 	xe_pmu_event_stop(event, PERF_EF_UPDATE);
267 }
268 
269 static int xe_pmu_event_event_idx(struct perf_event *event)
270 {
271 	return 0;
272 }
273 
274 struct xe_ext_attribute {
275 	struct device_attribute attr;
276 	unsigned long val;
277 };
278 
279 static ssize_t xe_pmu_event_show(struct device *dev,
280 				 struct device_attribute *attr, char *buf)
281 {
282 	struct xe_ext_attribute *eattr;
283 
284 	eattr = container_of(attr, struct xe_ext_attribute, attr);
285 	return sprintf(buf, "config=0x%lx\n", eattr->val);
286 }
287 
288 static ssize_t cpumask_show(struct device *dev,
289 			    struct device_attribute *attr, char *buf)
290 {
291 	return cpumap_print_to_pagebuf(true, buf, &xe_pmu_cpumask);
292 }
293 
294 static DEVICE_ATTR_RO(cpumask);
295 
296 static struct attribute *xe_cpumask_attrs[] = {
297 	&dev_attr_cpumask.attr,
298 	NULL,
299 };
300 
301 static const struct attribute_group xe_pmu_cpumask_attr_group = {
302 	.attrs = xe_cpumask_attrs,
303 };
304 
305 #define __event(__counter, __name, __unit) \
306 { \
307 	.counter = (__counter), \
308 	.name = (__name), \
309 	.unit = (__unit), \
310 	.global = false, \
311 }
312 
313 #define __global_event(__counter, __name, __unit) \
314 { \
315 	.counter = (__counter), \
316 	.name = (__name), \
317 	.unit = (__unit), \
318 	.global = true, \
319 }
320 
321 static struct xe_ext_attribute *
322 add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config)
323 {
324 	sysfs_attr_init(&attr->attr.attr);
325 	attr->attr.attr.name = name;
326 	attr->attr.attr.mode = 0444;
327 	attr->attr.show = xe_pmu_event_show;
328 	attr->val = config;
329 
330 	return ++attr;
331 }
332 
333 static struct perf_pmu_events_attr *
334 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
335 	     const char *str)
336 {
337 	sysfs_attr_init(&attr->attr.attr);
338 	attr->attr.attr.name = name;
339 	attr->attr.attr.mode = 0444;
340 	attr->attr.show = perf_event_sysfs_show;
341 	attr->event_str = str;
342 
343 	return ++attr;
344 }
345 
346 static struct attribute **
347 create_event_attributes(struct xe_pmu *pmu)
348 {
349 	struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
350 	static const struct {
351 		unsigned int counter;
352 		const char *name;
353 		const char *unit;
354 		bool global;
355 	} events[] = {
356 		__event(0, "render-group-busy", "ns"),
357 		__event(1, "copy-group-busy", "ns"),
358 		__event(2, "media-group-busy", "ns"),
359 		__event(3, "any-engine-group-busy", "ns"),
360 	};
361 
362 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
363 	struct xe_ext_attribute *xe_attr = NULL, *xe_iter;
364 	struct attribute **attr = NULL, **attr_iter;
365 	unsigned int count = 0;
366 	unsigned int i, j;
367 	struct xe_gt *gt;
368 
369 	/* Count how many counters we will be exposing. */
370 	for_each_gt(gt, xe, j) {
371 		for (i = 0; i < ARRAY_SIZE(events); i++) {
372 			u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter);
373 
374 			if (!config_status(xe, config))
375 				count++;
376 		}
377 	}
378 
379 	/* Allocate attribute objects and table. */
380 	xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL);
381 	if (!xe_attr)
382 		goto err_alloc;
383 
384 	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
385 	if (!pmu_attr)
386 		goto err_alloc;
387 
388 	/* Max one pointer of each attribute type plus a termination entry. */
389 	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
390 	if (!attr)
391 		goto err_alloc;
392 
393 	xe_iter = xe_attr;
394 	pmu_iter = pmu_attr;
395 	attr_iter = attr;
396 
397 	for_each_gt(gt, xe, j) {
398 		for (i = 0; i < ARRAY_SIZE(events); i++) {
399 			u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter);
400 			char *str;
401 
402 			if (config_status(xe, config))
403 				continue;
404 
405 			if (events[i].global)
406 				str = kstrdup(events[i].name, GFP_KERNEL);
407 			else
408 				str = kasprintf(GFP_KERNEL, "%s-gt%u",
409 						events[i].name, j);
410 			if (!str)
411 				goto err;
412 
413 			*attr_iter++ = &xe_iter->attr.attr;
414 			xe_iter = add_xe_attr(xe_iter, str, config);
415 
416 			if (events[i].unit) {
417 				if (events[i].global)
418 					str = kasprintf(GFP_KERNEL, "%s.unit",
419 							events[i].name);
420 				else
421 					str = kasprintf(GFP_KERNEL, "%s-gt%u.unit",
422 							events[i].name, j);
423 				if (!str)
424 					goto err;
425 
426 				*attr_iter++ = &pmu_iter->attr.attr;
427 				pmu_iter = add_pmu_attr(pmu_iter, str,
428 							events[i].unit);
429 			}
430 		}
431 	}
432 
433 	pmu->xe_attr = xe_attr;
434 	pmu->pmu_attr = pmu_attr;
435 
436 	return attr;
437 
438 err:
439 	for (attr_iter = attr; *attr_iter; attr_iter++)
440 		kfree((*attr_iter)->name);
441 
442 err_alloc:
443 	kfree(attr);
444 	kfree(xe_attr);
445 	kfree(pmu_attr);
446 
447 	return NULL;
448 }
449 
450 static void free_event_attributes(struct xe_pmu *pmu)
451 {
452 	struct attribute **attr_iter = pmu->events_attr_group.attrs;
453 
454 	for (; *attr_iter; attr_iter++)
455 		kfree((*attr_iter)->name);
456 
457 	kfree(pmu->events_attr_group.attrs);
458 	kfree(pmu->xe_attr);
459 	kfree(pmu->pmu_attr);
460 
461 	pmu->events_attr_group.attrs = NULL;
462 	pmu->xe_attr = NULL;
463 	pmu->pmu_attr = NULL;
464 }
465 
466 static int xe_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
467 {
468 	struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
469 
470 	/* Select the first online CPU as a designated reader. */
471 	if (cpumask_empty(&xe_pmu_cpumask))
472 		cpumask_set_cpu(cpu, &xe_pmu_cpumask);
473 
474 	return 0;
475 }
476 
477 static int xe_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
478 {
479 	struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
480 	unsigned int target = xe_pmu_target_cpu;
481 
482 	/*
483 	 * Unregistering an instance generates a CPU offline event which we must
484 	 * ignore to avoid incorrectly modifying the shared xe_pmu_cpumask.
485 	 */
486 	if (pmu->closed)
487 		return 0;
488 
489 	if (cpumask_test_and_clear_cpu(cpu, &xe_pmu_cpumask)) {
490 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
491 
492 		/* Migrate events if there is a valid target */
493 		if (target < nr_cpu_ids) {
494 			cpumask_set_cpu(target, &xe_pmu_cpumask);
495 			xe_pmu_target_cpu = target;
496 		}
497 	}
498 
499 	if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
500 		perf_pmu_migrate_context(&pmu->base, cpu, target);
501 		pmu->cpuhp.cpu = target;
502 	}
503 
504 	return 0;
505 }
506 
507 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
508 
509 int xe_pmu_init(void)
510 {
511 	int ret;
512 
513 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
514 				      "perf/x86/intel/xe:online",
515 				      xe_pmu_cpu_online,
516 				      xe_pmu_cpu_offline);
517 	if (ret < 0)
518 		pr_notice("Failed to setup cpuhp state for xe PMU! (%d)\n",
519 			  ret);
520 	else
521 		cpuhp_slot = ret;
522 
523 	return 0;
524 }
525 
526 void xe_pmu_exit(void)
527 {
528 	if (cpuhp_slot != CPUHP_INVALID)
529 		cpuhp_remove_multi_state(cpuhp_slot);
530 }
531 
532 static int xe_pmu_register_cpuhp_state(struct xe_pmu *pmu)
533 {
534 	if (cpuhp_slot == CPUHP_INVALID)
535 		return -EINVAL;
536 
537 	return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
538 }
539 
540 static void xe_pmu_unregister_cpuhp_state(struct xe_pmu *pmu)
541 {
542 	cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
543 }
544 
545 void xe_pmu_suspend(struct xe_gt *gt)
546 {
547 	engine_group_busyness_store(gt);
548 }
549 
550 static void xe_pmu_unregister(struct drm_device *device, void *arg)
551 {
552 	struct xe_pmu *pmu = arg;
553 
554 	if (!pmu->base.event_init)
555 		return;
556 
557 	/*
558 	 * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
559 	 * ensures all currently executing ones will have exited before we
560 	 * proceed with unregistration.
561 	 */
562 	pmu->closed = true;
563 	synchronize_rcu();
564 
565 	xe_pmu_unregister_cpuhp_state(pmu);
566 
567 	perf_pmu_unregister(&pmu->base);
568 	pmu->base.event_init = NULL;
569 	kfree(pmu->base.attr_groups);
570 	kfree(pmu->name);
571 	free_event_attributes(pmu);
572 }
573 
574 void xe_pmu_register(struct xe_pmu *pmu)
575 {
576 	struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
577 	const struct attribute_group *attr_groups[] = {
578 		&pmu->events_attr_group,
579 		&xe_pmu_cpumask_attr_group,
580 		NULL
581 	};
582 
583 	int ret = -ENOMEM;
584 
585 	spin_lock_init(&pmu->lock);
586 	pmu->cpuhp.cpu = -1;
587 
588 	pmu->name = kasprintf(GFP_KERNEL,
589 			      "xe_%s",
590 			      dev_name(xe->drm.dev));
591 	if (pmu->name)
592 		/* tools/perf reserves colons as special. */
593 		strreplace((char *)pmu->name, ':', '_');
594 
595 	if (!pmu->name)
596 		goto err;
597 
598 	pmu->events_attr_group.name = "events";
599 	pmu->events_attr_group.attrs = create_event_attributes(pmu);
600 	if (!pmu->events_attr_group.attrs)
601 		goto err_name;
602 
603 	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
604 					GFP_KERNEL);
605 	if (!pmu->base.attr_groups)
606 		goto err_attr;
607 
608 	pmu->base.module	= THIS_MODULE;
609 	pmu->base.task_ctx_nr	= perf_invalid_context;
610 	pmu->base.event_init	= xe_pmu_event_init;
611 	pmu->base.add		= xe_pmu_event_add;
612 	pmu->base.del		= xe_pmu_event_del;
613 	pmu->base.start		= xe_pmu_event_start;
614 	pmu->base.stop		= xe_pmu_event_stop;
615 	pmu->base.read		= xe_pmu_event_read;
616 	pmu->base.event_idx	= xe_pmu_event_event_idx;
617 
618 	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
619 	if (ret)
620 		goto err_groups;
621 
622 	ret = xe_pmu_register_cpuhp_state(pmu);
623 	if (ret)
624 		goto err_unreg;
625 
626 	ret = drmm_add_action_or_reset(&xe->drm, xe_pmu_unregister, pmu);
627 	if (ret)
628 		goto err_cpuhp;
629 
630 	return;
631 
632 err_cpuhp:
633 	xe_pmu_unregister_cpuhp_state(pmu);
634 err_unreg:
635 	perf_pmu_unregister(&pmu->base);
636 err_groups:
637 	kfree(pmu->base.attr_groups);
638 err_attr:
639 	pmu->base.event_init = NULL;
640 	free_event_attributes(pmu);
641 err_name:
642 	kfree(pmu->name);
643 err:
644 	drm_notice(&xe->drm, "Failed to register PMU!\n");
645 }
646