xref: /linux/drivers/gpu/drm/i915/i915_pmu.c (revision 7f81907b7e3f93dfed2e903af52659baa4944341)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include <linux/pm_runtime.h>
8 
9 #include "gt/intel_engine.h"
10 #include "gt/intel_engine_pm.h"
11 #include "gt/intel_engine_regs.h"
12 #include "gt/intel_engine_user.h"
13 #include "gt/intel_gt.h"
14 #include "gt/intel_gt_pm.h"
15 #include "gt/intel_gt_regs.h"
16 #include "gt/intel_rc6.h"
17 #include "gt/intel_rps.h"
18 
19 #include "i915_drv.h"
20 #include "i915_pmu.h"
21 
22 /* Frequency for the sampling timer for events which need it. */
23 #define FREQUENCY 200
24 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
25 
26 #define ENGINE_SAMPLE_MASK \
27 	(BIT(I915_SAMPLE_BUSY) | \
28 	 BIT(I915_SAMPLE_WAIT) | \
29 	 BIT(I915_SAMPLE_SEMA))
30 
31 static struct i915_pmu *event_to_pmu(struct perf_event *event)
32 {
33 	return container_of(event->pmu, struct i915_pmu, base);
34 }
35 
36 static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu)
37 {
38 	return container_of(pmu, struct drm_i915_private, pmu);
39 }
40 
41 static u8 engine_config_sample(u64 config)
42 {
43 	return config & I915_PMU_SAMPLE_MASK;
44 }
45 
46 static u8 engine_event_sample(struct perf_event *event)
47 {
48 	return engine_config_sample(event->attr.config);
49 }
50 
51 static u8 engine_event_class(struct perf_event *event)
52 {
53 	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
54 }
55 
56 static u8 engine_event_instance(struct perf_event *event)
57 {
58 	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
59 }
60 
61 static bool is_engine_config(const u64 config)
62 {
63 	return config < __I915_PMU_OTHER(0);
64 }
65 
66 static unsigned int config_gt_id(const u64 config)
67 {
68 	return config >> __I915_PMU_GT_SHIFT;
69 }
70 
71 static u64 config_counter(const u64 config)
72 {
73 	return config & ~(~0ULL << __I915_PMU_GT_SHIFT);
74 }
75 
76 static unsigned int other_bit(const u64 config)
77 {
78 	unsigned int val;
79 
80 	switch (config_counter(config)) {
81 	case I915_PMU_ACTUAL_FREQUENCY:
82 		val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
83 		break;
84 	case I915_PMU_REQUESTED_FREQUENCY:
85 		val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED;
86 		break;
87 	case I915_PMU_RC6_RESIDENCY:
88 		val = __I915_PMU_RC6_RESIDENCY_ENABLED;
89 		break;
90 	default:
91 		/*
92 		 * Events that do not require sampling, or tracking state
93 		 * transitions between enabled and disabled can be ignored.
94 		 */
95 		return -1;
96 	}
97 
98 	return I915_ENGINE_SAMPLE_COUNT +
99 	       config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT +
100 	       val;
101 }
102 
103 static unsigned int config_bit(const u64 config)
104 {
105 	if (is_engine_config(config))
106 		return engine_config_sample(config);
107 	else
108 		return other_bit(config);
109 }
110 
111 static u32 config_mask(const u64 config)
112 {
113 	unsigned int bit = config_bit(config);
114 
115 	if (__builtin_constant_p(bit))
116 		BUILD_BUG_ON(bit >
117 			     BITS_PER_TYPE(typeof_member(struct i915_pmu,
118 							 enable)) - 1);
119 	else
120 		WARN_ON_ONCE(bit >
121 			     BITS_PER_TYPE(typeof_member(struct i915_pmu,
122 							 enable)) - 1);
123 
124 	return BIT(bit);
125 }
126 
127 static bool is_engine_event(struct perf_event *event)
128 {
129 	return is_engine_config(event->attr.config);
130 }
131 
132 static unsigned int event_bit(struct perf_event *event)
133 {
134 	return config_bit(event->attr.config);
135 }
136 
137 static u32 frequency_enabled_mask(void)
138 {
139 	unsigned int i;
140 	u32 mask = 0;
141 
142 	for (i = 0; i < I915_PMU_MAX_GT; i++)
143 		mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) |
144 			config_mask(__I915_PMU_REQUESTED_FREQUENCY(i));
145 
146 	return mask;
147 }
148 
149 static bool pmu_needs_timer(struct i915_pmu *pmu)
150 {
151 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
152 	u32 enable;
153 
154 	/*
155 	 * Only some counters need the sampling timer.
156 	 *
157 	 * We start with a bitmask of all currently enabled events.
158 	 */
159 	enable = pmu->enable;
160 
161 	/*
162 	 * Mask out all the ones which do not need the timer, or in
163 	 * other words keep all the ones that could need the timer.
164 	 */
165 	enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
166 
167 	/*
168 	 * Also there is software busyness tracking available we do not
169 	 * need the timer for I915_SAMPLE_BUSY counter.
170 	 */
171 	if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
172 		enable &= ~BIT(I915_SAMPLE_BUSY);
173 
174 	/*
175 	 * If some bits remain it means we need the sampling timer running.
176 	 */
177 	return enable;
178 }
179 
180 static u64 __get_rc6(struct intel_gt *gt)
181 {
182 	struct drm_i915_private *i915 = gt->i915;
183 	u64 val;
184 
185 	val = intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6);
186 
187 	if (HAS_RC6p(i915))
188 		val += intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6p);
189 
190 	if (HAS_RC6pp(i915))
191 		val += intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6pp);
192 
193 	return val;
194 }
195 
196 static inline s64 ktime_since_raw(const ktime_t kt)
197 {
198 	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
199 }
200 
201 static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
202 {
203 	return pmu->sample[gt_id][sample].cur;
204 }
205 
206 static void
207 store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
208 {
209 	pmu->sample[gt_id][sample].cur = val;
210 }
211 
212 static void
213 add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
214 {
215 	pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul);
216 }
217 
218 static u64 get_rc6(struct intel_gt *gt)
219 {
220 	struct drm_i915_private *i915 = gt->i915;
221 	const unsigned int gt_id = gt->info.id;
222 	struct i915_pmu *pmu = &i915->pmu;
223 	intel_wakeref_t wakeref;
224 	unsigned long flags;
225 	u64 val;
226 
227 	wakeref = intel_gt_pm_get_if_awake(gt);
228 	if (wakeref) {
229 		val = __get_rc6(gt);
230 		intel_gt_pm_put_async(gt, wakeref);
231 	}
232 
233 	spin_lock_irqsave(&pmu->lock, flags);
234 
235 	if (wakeref) {
236 		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
237 	} else {
238 		/*
239 		 * We think we are runtime suspended.
240 		 *
241 		 * Report the delta from when the device was suspended to now,
242 		 * on top of the last known real value, as the approximated RC6
243 		 * counter value.
244 		 */
245 		val = ktime_since_raw(pmu->sleep_last[gt_id]);
246 		val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
247 	}
248 
249 	if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
250 		val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
251 	else
252 		store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
253 
254 	spin_unlock_irqrestore(&pmu->lock, flags);
255 
256 	return val;
257 }
258 
259 static void init_rc6(struct i915_pmu *pmu)
260 {
261 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
262 	struct intel_gt *gt;
263 	unsigned int i;
264 
265 	for_each_gt(gt, i915, i) {
266 		intel_wakeref_t wakeref;
267 
268 		with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
269 			u64 val = __get_rc6(gt);
270 
271 			store_sample(pmu, i, __I915_SAMPLE_RC6, val);
272 			store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
273 				     val);
274 			pmu->sleep_last[i] = ktime_get_raw();
275 		}
276 	}
277 }
278 
279 static void park_rc6(struct intel_gt *gt)
280 {
281 	struct i915_pmu *pmu = &gt->i915->pmu;
282 
283 	store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
284 	pmu->sleep_last[gt->info.id] = ktime_get_raw();
285 }
286 
287 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
288 {
289 	if (!pmu->timer_enabled && pmu_needs_timer(pmu)) {
290 		pmu->timer_enabled = true;
291 		pmu->timer_last = ktime_get();
292 		hrtimer_start_range_ns(&pmu->timer,
293 				       ns_to_ktime(PERIOD), 0,
294 				       HRTIMER_MODE_REL_PINNED);
295 	}
296 }
297 
298 void i915_pmu_gt_parked(struct intel_gt *gt)
299 {
300 	struct i915_pmu *pmu = &gt->i915->pmu;
301 
302 	if (!pmu->registered)
303 		return;
304 
305 	spin_lock_irq(&pmu->lock);
306 
307 	park_rc6(gt);
308 
309 	/*
310 	 * Signal sampling timer to stop if only engine events are enabled and
311 	 * GPU went idle.
312 	 */
313 	pmu->unparked &= ~BIT(gt->info.id);
314 	if (pmu->unparked == 0)
315 		pmu->timer_enabled = false;
316 
317 	spin_unlock_irq(&pmu->lock);
318 }
319 
320 void i915_pmu_gt_unparked(struct intel_gt *gt)
321 {
322 	struct i915_pmu *pmu = &gt->i915->pmu;
323 
324 	if (!pmu->registered)
325 		return;
326 
327 	spin_lock_irq(&pmu->lock);
328 
329 	/*
330 	 * Re-enable sampling timer when GPU goes active.
331 	 */
332 	if (pmu->unparked == 0)
333 		__i915_pmu_maybe_start_timer(pmu);
334 
335 	pmu->unparked |= BIT(gt->info.id);
336 
337 	spin_unlock_irq(&pmu->lock);
338 }
339 
340 static void
341 add_sample(struct i915_pmu_sample *sample, u32 val)
342 {
343 	sample->cur += val;
344 }
345 
346 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
347 {
348 	/*
349 	 * We have to avoid concurrent mmio cache line access on gen7 or
350 	 * risk a machine hang. For a fun history lesson dig out the old
351 	 * userspace intel_gpu_top and run it on Ivybridge or Haswell!
352 	 */
353 	return GRAPHICS_VER(i915) == 7;
354 }
355 
356 static void gen3_engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
357 {
358 	struct intel_engine_pmu *pmu = &engine->pmu;
359 	bool busy;
360 	u32 val;
361 
362 	val = ENGINE_READ_FW(engine, RING_CTL);
363 	if (val == 0) /* powerwell off => engine idle */
364 		return;
365 
366 	if (val & RING_WAIT)
367 		add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
368 	if (val & RING_WAIT_SEMAPHORE)
369 		add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
370 
371 	/* No need to sample when busy stats are supported. */
372 	if (intel_engine_supports_stats(engine))
373 		return;
374 
375 	/*
376 	 * While waiting on a semaphore or event, MI_MODE reports the
377 	 * ring as idle. However, previously using the seqno, and with
378 	 * execlists sampling, we account for the ring waiting as the
379 	 * engine being busy. Therefore, we record the sample as being
380 	 * busy if either waiting or !idle.
381 	 */
382 	busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
383 	if (!busy) {
384 		val = ENGINE_READ_FW(engine, RING_MI_MODE);
385 		busy = !(val & MODE_IDLE);
386 	}
387 	if (busy)
388 		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
389 }
390 
391 static void gen2_engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
392 {
393 	struct intel_engine_pmu *pmu = &engine->pmu;
394 	u32 tail, head, acthd;
395 
396 	tail = ENGINE_READ_FW(engine, RING_TAIL);
397 	head = ENGINE_READ_FW(engine, RING_HEAD);
398 	acthd = ENGINE_READ_FW(engine, ACTHD);
399 
400 	if (head & HEAD_WAIT_I8XX)
401 		add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
402 
403 	if (head & HEAD_WAIT_I8XX || head != acthd ||
404 	    (head & HEAD_ADDR) != (tail & TAIL_ADDR))
405 		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
406 }
407 
408 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
409 {
410 	if (GRAPHICS_VER(engine->i915) >= 3)
411 		gen3_engine_sample(engine, period_ns);
412 	else
413 		gen2_engine_sample(engine, period_ns);
414 }
415 
416 static void
417 engines_sample(struct intel_gt *gt, unsigned int period_ns)
418 {
419 	struct drm_i915_private *i915 = gt->i915;
420 	struct intel_engine_cs *engine;
421 	enum intel_engine_id id;
422 	unsigned long flags;
423 
424 	if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
425 		return;
426 
427 	if (!intel_gt_pm_is_awake(gt))
428 		return;
429 
430 	for_each_engine(engine, gt, id) {
431 		if (!engine->pmu.enable)
432 			continue;
433 
434 		if (!intel_engine_pm_get_if_awake(engine))
435 			continue;
436 
437 		if (exclusive_mmio_access(i915)) {
438 			spin_lock_irqsave(&engine->uncore->lock, flags);
439 			engine_sample(engine, period_ns);
440 			spin_unlock_irqrestore(&engine->uncore->lock, flags);
441 		} else {
442 			engine_sample(engine, period_ns);
443 		}
444 
445 		intel_engine_pm_put_async(engine);
446 	}
447 }
448 
449 static bool
450 frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
451 {
452 	return pmu->enable &
453 	       (config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt)) |
454 		config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt)));
455 }
456 
457 static void
458 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
459 {
460 	struct drm_i915_private *i915 = gt->i915;
461 	const unsigned int gt_id = gt->info.id;
462 	struct i915_pmu *pmu = &i915->pmu;
463 	struct intel_rps *rps = &gt->rps;
464 	intel_wakeref_t wakeref;
465 
466 	if (!frequency_sampling_enabled(pmu, gt_id))
467 		return;
468 
469 	/* Report 0/0 (actual/requested) frequency while parked. */
470 	wakeref = intel_gt_pm_get_if_awake(gt);
471 	if (!wakeref)
472 		return;
473 
474 	if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) {
475 		u32 val;
476 
477 		/*
478 		 * We take a quick peek here without using forcewake
479 		 * so that we don't perturb the system under observation
480 		 * (forcewake => !rc6 => increased power use). We expect
481 		 * that if the read fails because it is outside of the
482 		 * mmio power well, then it will return 0 -- in which
483 		 * case we assume the system is running at the intended
484 		 * frequency. Fortunately, the read should rarely fail!
485 		 */
486 		val = intel_rps_read_actual_frequency_fw(rps);
487 		if (!val)
488 			val = intel_gpu_freq(rps, rps->cur_freq);
489 
490 		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
491 				val, period_ns / 1000);
492 	}
493 
494 	if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
495 		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
496 				intel_rps_get_requested_frequency(rps),
497 				period_ns / 1000);
498 	}
499 
500 	intel_gt_pm_put_async(gt, wakeref);
501 }
502 
503 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
504 {
505 	struct i915_pmu *pmu = container_of(hrtimer, struct i915_pmu, timer);
506 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
507 	unsigned int period_ns;
508 	struct intel_gt *gt;
509 	unsigned int i;
510 	ktime_t now;
511 
512 	if (!READ_ONCE(pmu->timer_enabled))
513 		return HRTIMER_NORESTART;
514 
515 	now = ktime_get();
516 	period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
517 	pmu->timer_last = now;
518 
519 	/*
520 	 * Strictly speaking the passed in period may not be 100% accurate for
521 	 * all internal calculation, since some amount of time can be spent on
522 	 * grabbing the forcewake. However the potential error from timer call-
523 	 * back delay greatly dominates this so we keep it simple.
524 	 */
525 
526 	for_each_gt(gt, i915, i) {
527 		if (!(pmu->unparked & BIT(i)))
528 			continue;
529 
530 		engines_sample(gt, period_ns);
531 		frequency_sample(gt, period_ns);
532 	}
533 
534 	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
535 
536 	return HRTIMER_RESTART;
537 }
538 
539 static void i915_pmu_event_destroy(struct perf_event *event)
540 {
541 	struct i915_pmu *pmu = event_to_pmu(event);
542 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
543 
544 	drm_WARN_ON(&i915->drm, event->parent);
545 
546 	drm_dev_put(&i915->drm);
547 }
548 
549 static int
550 engine_event_status(struct intel_engine_cs *engine,
551 		    enum drm_i915_pmu_engine_sample sample)
552 {
553 	switch (sample) {
554 	case I915_SAMPLE_BUSY:
555 	case I915_SAMPLE_WAIT:
556 		break;
557 	case I915_SAMPLE_SEMA:
558 		if (GRAPHICS_VER(engine->i915) < 6)
559 			return -ENODEV;
560 		break;
561 	default:
562 		return -ENOENT;
563 	}
564 
565 	return 0;
566 }
567 
568 static int
569 config_status(struct drm_i915_private *i915, u64 config)
570 {
571 	struct intel_gt *gt = to_gt(i915);
572 
573 	unsigned int gt_id = config_gt_id(config);
574 	unsigned int max_gt_id = HAS_EXTRA_GT_LIST(i915) ? 1 : 0;
575 
576 	if (gt_id > max_gt_id)
577 		return -ENOENT;
578 
579 	switch (config_counter(config)) {
580 	case I915_PMU_ACTUAL_FREQUENCY:
581 		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
582 			/* Requires a mutex for sampling! */
583 			return -ENODEV;
584 		fallthrough;
585 	case I915_PMU_REQUESTED_FREQUENCY:
586 		if (GRAPHICS_VER(i915) < 6)
587 			return -ENODEV;
588 		break;
589 	case I915_PMU_INTERRUPTS:
590 		if (gt_id)
591 			return -ENOENT;
592 		break;
593 	case I915_PMU_RC6_RESIDENCY:
594 		if (!gt->rc6.supported)
595 			return -ENODEV;
596 		break;
597 	case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
598 		break;
599 	default:
600 		return -ENOENT;
601 	}
602 
603 	return 0;
604 }
605 
606 static int engine_event_init(struct perf_event *event)
607 {
608 	struct i915_pmu *pmu = event_to_pmu(event);
609 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
610 	struct intel_engine_cs *engine;
611 
612 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
613 					  engine_event_instance(event));
614 	if (!engine)
615 		return -ENODEV;
616 
617 	return engine_event_status(engine, engine_event_sample(event));
618 }
619 
620 static int i915_pmu_event_init(struct perf_event *event)
621 {
622 	struct i915_pmu *pmu = event_to_pmu(event);
623 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
624 	int ret;
625 
626 	if (!pmu->registered)
627 		return -ENODEV;
628 
629 	if (event->attr.type != event->pmu->type)
630 		return -ENOENT;
631 
632 	/* unsupported modes and filters */
633 	if (event->attr.sample_period) /* no sampling */
634 		return -EINVAL;
635 
636 	if (has_branch_stack(event))
637 		return -EOPNOTSUPP;
638 
639 	if (event->cpu < 0)
640 		return -EINVAL;
641 
642 	if (is_engine_event(event))
643 		ret = engine_event_init(event);
644 	else
645 		ret = config_status(i915, event->attr.config);
646 	if (ret)
647 		return ret;
648 
649 	if (!event->parent) {
650 		drm_dev_get(&i915->drm);
651 		event->destroy = i915_pmu_event_destroy;
652 	}
653 
654 	return 0;
655 }
656 
657 static u64 __i915_pmu_event_read(struct perf_event *event)
658 {
659 	struct i915_pmu *pmu = event_to_pmu(event);
660 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
661 	u64 val = 0;
662 
663 	if (is_engine_event(event)) {
664 		u8 sample = engine_event_sample(event);
665 		struct intel_engine_cs *engine;
666 
667 		engine = intel_engine_lookup_user(i915,
668 						  engine_event_class(event),
669 						  engine_event_instance(event));
670 
671 		if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
672 			/* Do nothing */
673 		} else if (sample == I915_SAMPLE_BUSY &&
674 			   intel_engine_supports_stats(engine)) {
675 			ktime_t unused;
676 
677 			val = ktime_to_ns(intel_engine_get_busy_time(engine,
678 								     &unused));
679 		} else {
680 			val = engine->pmu.sample[sample].cur;
681 		}
682 	} else {
683 		const unsigned int gt_id = config_gt_id(event->attr.config);
684 		const u64 config = config_counter(event->attr.config);
685 
686 		switch (config) {
687 		case I915_PMU_ACTUAL_FREQUENCY:
688 			val =
689 			   div_u64(read_sample(pmu, gt_id,
690 					       __I915_SAMPLE_FREQ_ACT),
691 				   USEC_PER_SEC /* to MHz */);
692 			break;
693 		case I915_PMU_REQUESTED_FREQUENCY:
694 			val =
695 			   div_u64(read_sample(pmu, gt_id,
696 					       __I915_SAMPLE_FREQ_REQ),
697 				   USEC_PER_SEC /* to MHz */);
698 			break;
699 		case I915_PMU_INTERRUPTS:
700 			val = READ_ONCE(pmu->irq_count);
701 			break;
702 		case I915_PMU_RC6_RESIDENCY:
703 			val = get_rc6(i915->gt[gt_id]);
704 			break;
705 		case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
706 			val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
707 			break;
708 		}
709 	}
710 
711 	return val;
712 }
713 
714 static void i915_pmu_event_read(struct perf_event *event)
715 {
716 	struct i915_pmu *pmu = event_to_pmu(event);
717 	struct hw_perf_event *hwc = &event->hw;
718 	u64 prev, new;
719 
720 	if (!pmu->registered) {
721 		event->hw.state = PERF_HES_STOPPED;
722 		return;
723 	}
724 
725 	prev = local64_read(&hwc->prev_count);
726 	do {
727 		new = __i915_pmu_event_read(event);
728 	} while (!local64_try_cmpxchg(&hwc->prev_count, &prev, new));
729 
730 	local64_add(new - prev, &event->count);
731 }
732 
733 static void i915_pmu_enable(struct perf_event *event)
734 {
735 	struct i915_pmu *pmu = event_to_pmu(event);
736 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
737 	const unsigned int bit = event_bit(event);
738 	unsigned long flags;
739 
740 	if (bit == -1)
741 		goto update;
742 
743 	spin_lock_irqsave(&pmu->lock, flags);
744 
745 	/*
746 	 * Update the bitmask of enabled events and increment
747 	 * the event reference counter.
748 	 */
749 	BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
750 	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
751 	GEM_BUG_ON(pmu->enable_count[bit] == ~0);
752 
753 	pmu->enable |= BIT(bit);
754 	pmu->enable_count[bit]++;
755 
756 	/*
757 	 * Start the sampling timer if needed and not already enabled.
758 	 */
759 	__i915_pmu_maybe_start_timer(pmu);
760 
761 	/*
762 	 * For per-engine events the bitmask and reference counting
763 	 * is stored per engine.
764 	 */
765 	if (is_engine_event(event)) {
766 		u8 sample = engine_event_sample(event);
767 		struct intel_engine_cs *engine;
768 
769 		engine = intel_engine_lookup_user(i915,
770 						  engine_event_class(event),
771 						  engine_event_instance(event));
772 
773 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
774 			     I915_ENGINE_SAMPLE_COUNT);
775 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
776 			     I915_ENGINE_SAMPLE_COUNT);
777 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
778 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
779 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
780 
781 		engine->pmu.enable |= BIT(sample);
782 		engine->pmu.enable_count[sample]++;
783 	}
784 
785 	spin_unlock_irqrestore(&pmu->lock, flags);
786 
787 update:
788 	/*
789 	 * Store the current counter value so we can report the correct delta
790 	 * for all listeners. Even when the event was already enabled and has
791 	 * an existing non-zero value.
792 	 */
793 	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
794 }
795 
796 static void i915_pmu_disable(struct perf_event *event)
797 {
798 	struct i915_pmu *pmu = event_to_pmu(event);
799 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
800 	const unsigned int bit = event_bit(event);
801 	unsigned long flags;
802 
803 	if (bit == -1)
804 		return;
805 
806 	spin_lock_irqsave(&pmu->lock, flags);
807 
808 	if (is_engine_event(event)) {
809 		u8 sample = engine_event_sample(event);
810 		struct intel_engine_cs *engine;
811 
812 		engine = intel_engine_lookup_user(i915,
813 						  engine_event_class(event),
814 						  engine_event_instance(event));
815 
816 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
817 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
818 		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
819 
820 		/*
821 		 * Decrement the reference count and clear the enabled
822 		 * bitmask when the last listener on an event goes away.
823 		 */
824 		if (--engine->pmu.enable_count[sample] == 0)
825 			engine->pmu.enable &= ~BIT(sample);
826 	}
827 
828 	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
829 	GEM_BUG_ON(pmu->enable_count[bit] == 0);
830 	/*
831 	 * Decrement the reference count and clear the enabled
832 	 * bitmask when the last listener on an event goes away.
833 	 */
834 	if (--pmu->enable_count[bit] == 0) {
835 		pmu->enable &= ~BIT(bit);
836 		pmu->timer_enabled &= pmu_needs_timer(pmu);
837 	}
838 
839 	spin_unlock_irqrestore(&pmu->lock, flags);
840 }
841 
842 static void i915_pmu_event_start(struct perf_event *event, int flags)
843 {
844 	struct i915_pmu *pmu = event_to_pmu(event);
845 
846 	if (!pmu->registered)
847 		return;
848 
849 	i915_pmu_enable(event);
850 	event->hw.state = 0;
851 }
852 
853 static void i915_pmu_event_stop(struct perf_event *event, int flags)
854 {
855 	struct i915_pmu *pmu = event_to_pmu(event);
856 
857 	if (!pmu->registered)
858 		goto out;
859 
860 	if (flags & PERF_EF_UPDATE)
861 		i915_pmu_event_read(event);
862 
863 	i915_pmu_disable(event);
864 
865 out:
866 	event->hw.state = PERF_HES_STOPPED;
867 }
868 
869 static int i915_pmu_event_add(struct perf_event *event, int flags)
870 {
871 	struct i915_pmu *pmu = event_to_pmu(event);
872 
873 	if (!pmu->registered)
874 		return -ENODEV;
875 
876 	if (flags & PERF_EF_START)
877 		i915_pmu_event_start(event, flags);
878 
879 	return 0;
880 }
881 
882 static void i915_pmu_event_del(struct perf_event *event, int flags)
883 {
884 	i915_pmu_event_stop(event, PERF_EF_UPDATE);
885 }
886 
887 struct i915_str_attribute {
888 	struct device_attribute attr;
889 	const char *str;
890 };
891 
892 static ssize_t i915_pmu_format_show(struct device *dev,
893 				    struct device_attribute *attr, char *buf)
894 {
895 	struct i915_str_attribute *eattr;
896 
897 	eattr = container_of(attr, struct i915_str_attribute, attr);
898 	return sprintf(buf, "%s\n", eattr->str);
899 }
900 
901 #define I915_PMU_FORMAT_ATTR(_name, _config) \
902 	(&((struct i915_str_attribute[]) { \
903 		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
904 		  .str = _config, } \
905 	})[0].attr.attr)
906 
907 static struct attribute *i915_pmu_format_attrs[] = {
908 	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
909 	NULL,
910 };
911 
912 static const struct attribute_group i915_pmu_format_attr_group = {
913 	.name = "format",
914 	.attrs = i915_pmu_format_attrs,
915 };
916 
917 struct i915_ext_attribute {
918 	struct device_attribute attr;
919 	unsigned long val;
920 };
921 
922 static ssize_t i915_pmu_event_show(struct device *dev,
923 				   struct device_attribute *attr, char *buf)
924 {
925 	struct i915_ext_attribute *eattr;
926 
927 	eattr = container_of(attr, struct i915_ext_attribute, attr);
928 	return sprintf(buf, "config=0x%lx\n", eattr->val);
929 }
930 
931 #define __event(__counter, __name, __unit) \
932 { \
933 	.counter = (__counter), \
934 	.name = (__name), \
935 	.unit = (__unit), \
936 	.global = false, \
937 }
938 
939 #define __global_event(__counter, __name, __unit) \
940 { \
941 	.counter = (__counter), \
942 	.name = (__name), \
943 	.unit = (__unit), \
944 	.global = true, \
945 }
946 
947 #define __engine_event(__sample, __name) \
948 { \
949 	.sample = (__sample), \
950 	.name = (__name), \
951 }
952 
953 static struct i915_ext_attribute *
954 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
955 {
956 	sysfs_attr_init(&attr->attr.attr);
957 	attr->attr.attr.name = name;
958 	attr->attr.attr.mode = 0444;
959 	attr->attr.show = i915_pmu_event_show;
960 	attr->val = config;
961 
962 	return ++attr;
963 }
964 
965 static struct perf_pmu_events_attr *
966 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
967 	     const char *str)
968 {
969 	sysfs_attr_init(&attr->attr.attr);
970 	attr->attr.attr.name = name;
971 	attr->attr.attr.mode = 0444;
972 	attr->attr.show = perf_event_sysfs_show;
973 	attr->event_str = str;
974 
975 	return ++attr;
976 }
977 
978 static struct attribute **
979 create_event_attributes(struct i915_pmu *pmu)
980 {
981 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
982 	static const struct {
983 		unsigned int counter;
984 		const char *name;
985 		const char *unit;
986 		bool global;
987 	} events[] = {
988 		__event(0, "actual-frequency", "M"),
989 		__event(1, "requested-frequency", "M"),
990 		__global_event(2, "interrupts", NULL),
991 		__event(3, "rc6-residency", "ns"),
992 		__event(4, "software-gt-awake-time", "ns"),
993 	};
994 	static const struct {
995 		enum drm_i915_pmu_engine_sample sample;
996 		char *name;
997 	} engine_events[] = {
998 		__engine_event(I915_SAMPLE_BUSY, "busy"),
999 		__engine_event(I915_SAMPLE_SEMA, "sema"),
1000 		__engine_event(I915_SAMPLE_WAIT, "wait"),
1001 	};
1002 	unsigned int count = 0;
1003 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
1004 	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
1005 	struct attribute **attr = NULL, **attr_iter;
1006 	struct intel_engine_cs *engine;
1007 	struct intel_gt *gt;
1008 	unsigned int i, j;
1009 
1010 	/* Count how many counters we will be exposing. */
1011 	for_each_gt(gt, i915, j) {
1012 		for (i = 0; i < ARRAY_SIZE(events); i++) {
1013 			u64 config = ___I915_PMU_OTHER(j, events[i].counter);
1014 
1015 			if (!config_status(i915, config))
1016 				count++;
1017 		}
1018 	}
1019 
1020 	for_each_uabi_engine(engine, i915) {
1021 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
1022 			if (!engine_event_status(engine,
1023 						 engine_events[i].sample))
1024 				count++;
1025 		}
1026 	}
1027 
1028 	/* Allocate attribute objects and table. */
1029 	i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
1030 	if (!i915_attr)
1031 		goto err_alloc;
1032 
1033 	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
1034 	if (!pmu_attr)
1035 		goto err_alloc;
1036 
1037 	/* Max one pointer of each attribute type plus a termination entry. */
1038 	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
1039 	if (!attr)
1040 		goto err_alloc;
1041 
1042 	i915_iter = i915_attr;
1043 	pmu_iter = pmu_attr;
1044 	attr_iter = attr;
1045 
1046 	/* Initialize supported non-engine counters. */
1047 	for_each_gt(gt, i915, j) {
1048 		for (i = 0; i < ARRAY_SIZE(events); i++) {
1049 			u64 config = ___I915_PMU_OTHER(j, events[i].counter);
1050 			char *str;
1051 
1052 			if (config_status(i915, config))
1053 				continue;
1054 
1055 			if (events[i].global || !HAS_EXTRA_GT_LIST(i915))
1056 				str = kstrdup(events[i].name, GFP_KERNEL);
1057 			else
1058 				str = kasprintf(GFP_KERNEL, "%s-gt%u",
1059 						events[i].name, j);
1060 			if (!str)
1061 				goto err;
1062 
1063 			*attr_iter++ = &i915_iter->attr.attr;
1064 			i915_iter = add_i915_attr(i915_iter, str, config);
1065 
1066 			if (events[i].unit) {
1067 				if (events[i].global || !HAS_EXTRA_GT_LIST(i915))
1068 					str = kasprintf(GFP_KERNEL, "%s.unit",
1069 							events[i].name);
1070 				else
1071 					str = kasprintf(GFP_KERNEL, "%s-gt%u.unit",
1072 							events[i].name, j);
1073 				if (!str)
1074 					goto err;
1075 
1076 				*attr_iter++ = &pmu_iter->attr.attr;
1077 				pmu_iter = add_pmu_attr(pmu_iter, str,
1078 							events[i].unit);
1079 			}
1080 		}
1081 	}
1082 
1083 	/* Initialize supported engine counters. */
1084 	for_each_uabi_engine(engine, i915) {
1085 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
1086 			char *str;
1087 
1088 			if (engine_event_status(engine,
1089 						engine_events[i].sample))
1090 				continue;
1091 
1092 			str = kasprintf(GFP_KERNEL, "%s-%s",
1093 					engine->name, engine_events[i].name);
1094 			if (!str)
1095 				goto err;
1096 
1097 			*attr_iter++ = &i915_iter->attr.attr;
1098 			i915_iter =
1099 				add_i915_attr(i915_iter, str,
1100 					      __I915_PMU_ENGINE(engine->uabi_class,
1101 								engine->uabi_instance,
1102 								engine_events[i].sample));
1103 
1104 			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
1105 					engine->name, engine_events[i].name);
1106 			if (!str)
1107 				goto err;
1108 
1109 			*attr_iter++ = &pmu_iter->attr.attr;
1110 			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
1111 		}
1112 	}
1113 
1114 	pmu->i915_attr = i915_attr;
1115 	pmu->pmu_attr = pmu_attr;
1116 
1117 	return attr;
1118 
1119 err:;
1120 	for (attr_iter = attr; *attr_iter; attr_iter++)
1121 		kfree((*attr_iter)->name);
1122 
1123 err_alloc:
1124 	kfree(attr);
1125 	kfree(i915_attr);
1126 	kfree(pmu_attr);
1127 
1128 	return NULL;
1129 }
1130 
1131 static void free_event_attributes(struct i915_pmu *pmu)
1132 {
1133 	struct attribute **attr_iter = pmu->events_attr_group.attrs;
1134 
1135 	for (; *attr_iter; attr_iter++)
1136 		kfree((*attr_iter)->name);
1137 
1138 	kfree(pmu->events_attr_group.attrs);
1139 	kfree(pmu->i915_attr);
1140 	kfree(pmu->pmu_attr);
1141 
1142 	pmu->events_attr_group.attrs = NULL;
1143 	pmu->i915_attr = NULL;
1144 	pmu->pmu_attr = NULL;
1145 }
1146 
1147 void i915_pmu_register(struct drm_i915_private *i915)
1148 {
1149 	struct i915_pmu *pmu = &i915->pmu;
1150 	const struct attribute_group *attr_groups[] = {
1151 		&i915_pmu_format_attr_group,
1152 		&pmu->events_attr_group,
1153 		NULL
1154 	};
1155 	int ret = -ENOMEM;
1156 
1157 	spin_lock_init(&pmu->lock);
1158 	hrtimer_setup(&pmu->timer, i915_sample, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1159 	init_rc6(pmu);
1160 
1161 	if (IS_DGFX(i915)) {
1162 		pmu->name = kasprintf(GFP_KERNEL,
1163 				      "i915_%s",
1164 				      dev_name(i915->drm.dev));
1165 		if (pmu->name) {
1166 			/* tools/perf reserves colons as special. */
1167 			strreplace((char *)pmu->name, ':', '_');
1168 		}
1169 	} else {
1170 		pmu->name = "i915";
1171 	}
1172 	if (!pmu->name)
1173 		goto err;
1174 
1175 	pmu->events_attr_group.name = "events";
1176 	pmu->events_attr_group.attrs = create_event_attributes(pmu);
1177 	if (!pmu->events_attr_group.attrs)
1178 		goto err_name;
1179 
1180 	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1181 					GFP_KERNEL);
1182 	if (!pmu->base.attr_groups)
1183 		goto err_attr;
1184 
1185 	pmu->base.module	= THIS_MODULE;
1186 	pmu->base.task_ctx_nr	= perf_invalid_context;
1187 	pmu->base.scope		= PERF_PMU_SCOPE_SYS_WIDE;
1188 	pmu->base.event_init	= i915_pmu_event_init;
1189 	pmu->base.add		= i915_pmu_event_add;
1190 	pmu->base.del		= i915_pmu_event_del;
1191 	pmu->base.start		= i915_pmu_event_start;
1192 	pmu->base.stop		= i915_pmu_event_stop;
1193 	pmu->base.read		= i915_pmu_event_read;
1194 
1195 	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1196 	if (ret)
1197 		goto err_groups;
1198 
1199 	pmu->registered = true;
1200 
1201 	return;
1202 
1203 err_groups:
1204 	kfree(pmu->base.attr_groups);
1205 err_attr:
1206 	free_event_attributes(pmu);
1207 err_name:
1208 	if (IS_DGFX(i915))
1209 		kfree(pmu->name);
1210 err:
1211 	drm_notice(&i915->drm, "Failed to register PMU!\n");
1212 }
1213 
1214 void i915_pmu_unregister(struct drm_i915_private *i915)
1215 {
1216 	struct i915_pmu *pmu = &i915->pmu;
1217 
1218 	if (!pmu->registered)
1219 		return;
1220 
1221 	/* Disconnect the PMU callbacks */
1222 	pmu->registered = false;
1223 
1224 	hrtimer_cancel(&pmu->timer);
1225 
1226 	perf_pmu_unregister(&pmu->base);
1227 	kfree(pmu->base.attr_groups);
1228 	if (IS_DGFX(i915))
1229 		kfree(pmu->name);
1230 	free_event_attributes(pmu);
1231 }
1232