xref: /linux/drivers/gpu/drm/i915/i915_pmu.c (revision ff40b5769a50fab654a70575ff0f49853b799b0e)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include <linux/pm_runtime.h>
8 
9 #include "gt/intel_engine.h"
10 #include "gt/intel_engine_pm.h"
11 #include "gt/intel_engine_regs.h"
12 #include "gt/intel_engine_user.h"
13 #include "gt/intel_gt_pm.h"
14 #include "gt/intel_gt_regs.h"
15 #include "gt/intel_rc6.h"
16 #include "gt/intel_rps.h"
17 
18 #include "i915_drv.h"
19 #include "i915_pmu.h"
20 
21 /* Frequency for the sampling timer for events which need it. */
22 #define FREQUENCY 200
23 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
24 
25 #define ENGINE_SAMPLE_MASK \
26 	(BIT(I915_SAMPLE_BUSY) | \
27 	 BIT(I915_SAMPLE_WAIT) | \
28 	 BIT(I915_SAMPLE_SEMA))
29 
30 static cpumask_t i915_pmu_cpumask;
31 static unsigned int i915_pmu_target_cpu = -1;
32 
33 static u8 engine_config_sample(u64 config)
34 {
35 	return config & I915_PMU_SAMPLE_MASK;
36 }
37 
38 static u8 engine_event_sample(struct perf_event *event)
39 {
40 	return engine_config_sample(event->attr.config);
41 }
42 
43 static u8 engine_event_class(struct perf_event *event)
44 {
45 	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
46 }
47 
48 static u8 engine_event_instance(struct perf_event *event)
49 {
50 	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
51 }
52 
53 static bool is_engine_config(u64 config)
54 {
55 	return config < __I915_PMU_OTHER(0);
56 }
57 
58 static unsigned int other_bit(const u64 config)
59 {
60 	unsigned int val;
61 
62 	switch (config) {
63 	case I915_PMU_ACTUAL_FREQUENCY:
64 		val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
65 		break;
66 	case I915_PMU_REQUESTED_FREQUENCY:
67 		val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED;
68 		break;
69 	case I915_PMU_RC6_RESIDENCY:
70 		val = __I915_PMU_RC6_RESIDENCY_ENABLED;
71 		break;
72 	default:
73 		/*
74 		 * Events that do not require sampling, or tracking state
75 		 * transitions between enabled and disabled can be ignored.
76 		 */
77 		return -1;
78 	}
79 
80 	return I915_ENGINE_SAMPLE_COUNT + val;
81 }
82 
83 static unsigned int config_bit(const u64 config)
84 {
85 	if (is_engine_config(config))
86 		return engine_config_sample(config);
87 	else
88 		return other_bit(config);
89 }
90 
91 static u64 config_mask(u64 config)
92 {
93 	return BIT_ULL(config_bit(config));
94 }
95 
96 static bool is_engine_event(struct perf_event *event)
97 {
98 	return is_engine_config(event->attr.config);
99 }
100 
101 static unsigned int event_bit(struct perf_event *event)
102 {
103 	return config_bit(event->attr.config);
104 }
105 
106 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
107 {
108 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
109 	u32 enable;
110 
111 	/*
112 	 * Only some counters need the sampling timer.
113 	 *
114 	 * We start with a bitmask of all currently enabled events.
115 	 */
116 	enable = pmu->enable;
117 
118 	/*
119 	 * Mask out all the ones which do not need the timer, or in
120 	 * other words keep all the ones that could need the timer.
121 	 */
122 	enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
123 		  config_mask(I915_PMU_REQUESTED_FREQUENCY) |
124 		  ENGINE_SAMPLE_MASK;
125 
126 	/*
127 	 * When the GPU is idle per-engine counters do not need to be
128 	 * running so clear those bits out.
129 	 */
130 	if (!gpu_active)
131 		enable &= ~ENGINE_SAMPLE_MASK;
132 	/*
133 	 * Also there is software busyness tracking available we do not
134 	 * need the timer for I915_SAMPLE_BUSY counter.
135 	 */
136 	else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
137 		enable &= ~BIT(I915_SAMPLE_BUSY);
138 
139 	/*
140 	 * If some bits remain it means we need the sampling timer running.
141 	 */
142 	return enable;
143 }
144 
145 static u64 __get_rc6(struct intel_gt *gt)
146 {
147 	struct drm_i915_private *i915 = gt->i915;
148 	u64 val;
149 
150 	val = intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6);
151 
152 	if (HAS_RC6p(i915))
153 		val += intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6p);
154 
155 	if (HAS_RC6pp(i915))
156 		val += intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6pp);
157 
158 	return val;
159 }
160 
161 static inline s64 ktime_since_raw(const ktime_t kt)
162 {
163 	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
164 }
165 
166 static u64 get_rc6(struct intel_gt *gt)
167 {
168 	struct drm_i915_private *i915 = gt->i915;
169 	struct i915_pmu *pmu = &i915->pmu;
170 	unsigned long flags;
171 	bool awake = false;
172 	u64 val;
173 
174 	if (intel_gt_pm_get_if_awake(gt)) {
175 		val = __get_rc6(gt);
176 		intel_gt_pm_put_async(gt);
177 		awake = true;
178 	}
179 
180 	spin_lock_irqsave(&pmu->lock, flags);
181 
182 	if (awake) {
183 		pmu->sample[__I915_SAMPLE_RC6].cur = val;
184 	} else {
185 		/*
186 		 * We think we are runtime suspended.
187 		 *
188 		 * Report the delta from when the device was suspended to now,
189 		 * on top of the last known real value, as the approximated RC6
190 		 * counter value.
191 		 */
192 		val = ktime_since_raw(pmu->sleep_last);
193 		val += pmu->sample[__I915_SAMPLE_RC6].cur;
194 	}
195 
196 	if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
197 		val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
198 	else
199 		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
200 
201 	spin_unlock_irqrestore(&pmu->lock, flags);
202 
203 	return val;
204 }
205 
206 static void init_rc6(struct i915_pmu *pmu)
207 {
208 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
209 	intel_wakeref_t wakeref;
210 
211 	with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
212 		pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
213 		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
214 					pmu->sample[__I915_SAMPLE_RC6].cur;
215 		pmu->sleep_last = ktime_get_raw();
216 	}
217 }
218 
219 static void park_rc6(struct drm_i915_private *i915)
220 {
221 	struct i915_pmu *pmu = &i915->pmu;
222 
223 	pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
224 	pmu->sleep_last = ktime_get_raw();
225 }
226 
227 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
228 {
229 	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
230 		pmu->timer_enabled = true;
231 		pmu->timer_last = ktime_get();
232 		hrtimer_start_range_ns(&pmu->timer,
233 				       ns_to_ktime(PERIOD), 0,
234 				       HRTIMER_MODE_REL_PINNED);
235 	}
236 }
237 
238 void i915_pmu_gt_parked(struct drm_i915_private *i915)
239 {
240 	struct i915_pmu *pmu = &i915->pmu;
241 
242 	if (!pmu->base.event_init)
243 		return;
244 
245 	spin_lock_irq(&pmu->lock);
246 
247 	park_rc6(i915);
248 
249 	/*
250 	 * Signal sampling timer to stop if only engine events are enabled and
251 	 * GPU went idle.
252 	 */
253 	pmu->timer_enabled = pmu_needs_timer(pmu, false);
254 
255 	spin_unlock_irq(&pmu->lock);
256 }
257 
258 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
259 {
260 	struct i915_pmu *pmu = &i915->pmu;
261 
262 	if (!pmu->base.event_init)
263 		return;
264 
265 	spin_lock_irq(&pmu->lock);
266 
267 	/*
268 	 * Re-enable sampling timer when GPU goes active.
269 	 */
270 	__i915_pmu_maybe_start_timer(pmu);
271 
272 	spin_unlock_irq(&pmu->lock);
273 }
274 
275 static void
276 add_sample(struct i915_pmu_sample *sample, u32 val)
277 {
278 	sample->cur += val;
279 }
280 
281 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
282 {
283 	/*
284 	 * We have to avoid concurrent mmio cache line access on gen7 or
285 	 * risk a machine hang. For a fun history lesson dig out the old
286 	 * userspace intel_gpu_top and run it on Ivybridge or Haswell!
287 	 */
288 	return GRAPHICS_VER(i915) == 7;
289 }
290 
291 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
292 {
293 	struct intel_engine_pmu *pmu = &engine->pmu;
294 	bool busy;
295 	u32 val;
296 
297 	val = ENGINE_READ_FW(engine, RING_CTL);
298 	if (val == 0) /* powerwell off => engine idle */
299 		return;
300 
301 	if (val & RING_WAIT)
302 		add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
303 	if (val & RING_WAIT_SEMAPHORE)
304 		add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
305 
306 	/* No need to sample when busy stats are supported. */
307 	if (intel_engine_supports_stats(engine))
308 		return;
309 
310 	/*
311 	 * While waiting on a semaphore or event, MI_MODE reports the
312 	 * ring as idle. However, previously using the seqno, and with
313 	 * execlists sampling, we account for the ring waiting as the
314 	 * engine being busy. Therefore, we record the sample as being
315 	 * busy if either waiting or !idle.
316 	 */
317 	busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
318 	if (!busy) {
319 		val = ENGINE_READ_FW(engine, RING_MI_MODE);
320 		busy = !(val & MODE_IDLE);
321 	}
322 	if (busy)
323 		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
324 }
325 
326 static void
327 engines_sample(struct intel_gt *gt, unsigned int period_ns)
328 {
329 	struct drm_i915_private *i915 = gt->i915;
330 	struct intel_engine_cs *engine;
331 	enum intel_engine_id id;
332 	unsigned long flags;
333 
334 	if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
335 		return;
336 
337 	if (!intel_gt_pm_is_awake(gt))
338 		return;
339 
340 	for_each_engine(engine, gt, id) {
341 		if (!intel_engine_pm_get_if_awake(engine))
342 			continue;
343 
344 		if (exclusive_mmio_access(i915)) {
345 			spin_lock_irqsave(&engine->uncore->lock, flags);
346 			engine_sample(engine, period_ns);
347 			spin_unlock_irqrestore(&engine->uncore->lock, flags);
348 		} else {
349 			engine_sample(engine, period_ns);
350 		}
351 
352 		intel_engine_pm_put_async(engine);
353 	}
354 }
355 
356 static void
357 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
358 {
359 	sample->cur += mul_u32_u32(val, mul);
360 }
361 
362 static bool frequency_sampling_enabled(struct i915_pmu *pmu)
363 {
364 	return pmu->enable &
365 	       (config_mask(I915_PMU_ACTUAL_FREQUENCY) |
366 		config_mask(I915_PMU_REQUESTED_FREQUENCY));
367 }
368 
369 static void
370 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
371 {
372 	struct drm_i915_private *i915 = gt->i915;
373 	struct i915_pmu *pmu = &i915->pmu;
374 	struct intel_rps *rps = &gt->rps;
375 
376 	if (!frequency_sampling_enabled(pmu))
377 		return;
378 
379 	/* Report 0/0 (actual/requested) frequency while parked. */
380 	if (!intel_gt_pm_get_if_awake(gt))
381 		return;
382 
383 	if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
384 		u32 val;
385 
386 		/*
387 		 * We take a quick peek here without using forcewake
388 		 * so that we don't perturb the system under observation
389 		 * (forcewake => !rc6 => increased power use). We expect
390 		 * that if the read fails because it is outside of the
391 		 * mmio power well, then it will return 0 -- in which
392 		 * case we assume the system is running at the intended
393 		 * frequency. Fortunately, the read should rarely fail!
394 		 */
395 		val = intel_rps_read_actual_frequency_fw(rps);
396 		if (!val)
397 			val = intel_gpu_freq(rps, rps->cur_freq);
398 
399 		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
400 				val, period_ns / 1000);
401 	}
402 
403 	if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
404 		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
405 				intel_rps_get_requested_frequency(rps),
406 				period_ns / 1000);
407 	}
408 
409 	intel_gt_pm_put_async(gt);
410 }
411 
412 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
413 {
414 	struct drm_i915_private *i915 =
415 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
416 	struct i915_pmu *pmu = &i915->pmu;
417 	struct intel_gt *gt = to_gt(i915);
418 	unsigned int period_ns;
419 	ktime_t now;
420 
421 	if (!READ_ONCE(pmu->timer_enabled))
422 		return HRTIMER_NORESTART;
423 
424 	now = ktime_get();
425 	period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
426 	pmu->timer_last = now;
427 
428 	/*
429 	 * Strictly speaking the passed in period may not be 100% accurate for
430 	 * all internal calculation, since some amount of time can be spent on
431 	 * grabbing the forcewake. However the potential error from timer call-
432 	 * back delay greatly dominates this so we keep it simple.
433 	 */
434 	engines_sample(gt, period_ns);
435 	frequency_sample(gt, period_ns);
436 
437 	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
438 
439 	return HRTIMER_RESTART;
440 }
441 
442 static void i915_pmu_event_destroy(struct perf_event *event)
443 {
444 	struct drm_i915_private *i915 =
445 		container_of(event->pmu, typeof(*i915), pmu.base);
446 
447 	drm_WARN_ON(&i915->drm, event->parent);
448 
449 	drm_dev_put(&i915->drm);
450 }
451 
452 static int
453 engine_event_status(struct intel_engine_cs *engine,
454 		    enum drm_i915_pmu_engine_sample sample)
455 {
456 	switch (sample) {
457 	case I915_SAMPLE_BUSY:
458 	case I915_SAMPLE_WAIT:
459 		break;
460 	case I915_SAMPLE_SEMA:
461 		if (GRAPHICS_VER(engine->i915) < 6)
462 			return -ENODEV;
463 		break;
464 	default:
465 		return -ENOENT;
466 	}
467 
468 	return 0;
469 }
470 
471 static int
472 config_status(struct drm_i915_private *i915, u64 config)
473 {
474 	struct intel_gt *gt = to_gt(i915);
475 
476 	switch (config) {
477 	case I915_PMU_ACTUAL_FREQUENCY:
478 		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
479 			/* Requires a mutex for sampling! */
480 			return -ENODEV;
481 		fallthrough;
482 	case I915_PMU_REQUESTED_FREQUENCY:
483 		if (GRAPHICS_VER(i915) < 6)
484 			return -ENODEV;
485 		break;
486 	case I915_PMU_INTERRUPTS:
487 		break;
488 	case I915_PMU_RC6_RESIDENCY:
489 		if (!gt->rc6.supported)
490 			return -ENODEV;
491 		break;
492 	case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
493 		break;
494 	default:
495 		return -ENOENT;
496 	}
497 
498 	return 0;
499 }
500 
501 static int engine_event_init(struct perf_event *event)
502 {
503 	struct drm_i915_private *i915 =
504 		container_of(event->pmu, typeof(*i915), pmu.base);
505 	struct intel_engine_cs *engine;
506 
507 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
508 					  engine_event_instance(event));
509 	if (!engine)
510 		return -ENODEV;
511 
512 	return engine_event_status(engine, engine_event_sample(event));
513 }
514 
515 static int i915_pmu_event_init(struct perf_event *event)
516 {
517 	struct drm_i915_private *i915 =
518 		container_of(event->pmu, typeof(*i915), pmu.base);
519 	struct i915_pmu *pmu = &i915->pmu;
520 	int ret;
521 
522 	if (pmu->closed)
523 		return -ENODEV;
524 
525 	if (event->attr.type != event->pmu->type)
526 		return -ENOENT;
527 
528 	/* unsupported modes and filters */
529 	if (event->attr.sample_period) /* no sampling */
530 		return -EINVAL;
531 
532 	if (has_branch_stack(event))
533 		return -EOPNOTSUPP;
534 
535 	if (event->cpu < 0)
536 		return -EINVAL;
537 
538 	/* only allow running on one cpu at a time */
539 	if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
540 		return -EINVAL;
541 
542 	if (is_engine_event(event))
543 		ret = engine_event_init(event);
544 	else
545 		ret = config_status(i915, event->attr.config);
546 	if (ret)
547 		return ret;
548 
549 	if (!event->parent) {
550 		drm_dev_get(&i915->drm);
551 		event->destroy = i915_pmu_event_destroy;
552 	}
553 
554 	return 0;
555 }
556 
557 static u64 __i915_pmu_event_read(struct perf_event *event)
558 {
559 	struct drm_i915_private *i915 =
560 		container_of(event->pmu, typeof(*i915), pmu.base);
561 	struct i915_pmu *pmu = &i915->pmu;
562 	u64 val = 0;
563 
564 	if (is_engine_event(event)) {
565 		u8 sample = engine_event_sample(event);
566 		struct intel_engine_cs *engine;
567 
568 		engine = intel_engine_lookup_user(i915,
569 						  engine_event_class(event),
570 						  engine_event_instance(event));
571 
572 		if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
573 			/* Do nothing */
574 		} else if (sample == I915_SAMPLE_BUSY &&
575 			   intel_engine_supports_stats(engine)) {
576 			ktime_t unused;
577 
578 			val = ktime_to_ns(intel_engine_get_busy_time(engine,
579 								     &unused));
580 		} else {
581 			val = engine->pmu.sample[sample].cur;
582 		}
583 	} else {
584 		switch (event->attr.config) {
585 		case I915_PMU_ACTUAL_FREQUENCY:
586 			val =
587 			   div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
588 				   USEC_PER_SEC /* to MHz */);
589 			break;
590 		case I915_PMU_REQUESTED_FREQUENCY:
591 			val =
592 			   div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
593 				   USEC_PER_SEC /* to MHz */);
594 			break;
595 		case I915_PMU_INTERRUPTS:
596 			val = READ_ONCE(pmu->irq_count);
597 			break;
598 		case I915_PMU_RC6_RESIDENCY:
599 			val = get_rc6(to_gt(i915));
600 			break;
601 		case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
602 			val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
603 			break;
604 		}
605 	}
606 
607 	return val;
608 }
609 
610 static void i915_pmu_event_read(struct perf_event *event)
611 {
612 	struct drm_i915_private *i915 =
613 		container_of(event->pmu, typeof(*i915), pmu.base);
614 	struct hw_perf_event *hwc = &event->hw;
615 	struct i915_pmu *pmu = &i915->pmu;
616 	u64 prev, new;
617 
618 	if (pmu->closed) {
619 		event->hw.state = PERF_HES_STOPPED;
620 		return;
621 	}
622 again:
623 	prev = local64_read(&hwc->prev_count);
624 	new = __i915_pmu_event_read(event);
625 
626 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
627 		goto again;
628 
629 	local64_add(new - prev, &event->count);
630 }
631 
632 static void i915_pmu_enable(struct perf_event *event)
633 {
634 	struct drm_i915_private *i915 =
635 		container_of(event->pmu, typeof(*i915), pmu.base);
636 	struct i915_pmu *pmu = &i915->pmu;
637 	unsigned long flags;
638 	unsigned int bit;
639 
640 	bit = event_bit(event);
641 	if (bit == -1)
642 		goto update;
643 
644 	spin_lock_irqsave(&pmu->lock, flags);
645 
646 	/*
647 	 * Update the bitmask of enabled events and increment
648 	 * the event reference counter.
649 	 */
650 	BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
651 	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
652 	GEM_BUG_ON(pmu->enable_count[bit] == ~0);
653 
654 	pmu->enable |= BIT_ULL(bit);
655 	pmu->enable_count[bit]++;
656 
657 	/*
658 	 * Start the sampling timer if needed and not already enabled.
659 	 */
660 	__i915_pmu_maybe_start_timer(pmu);
661 
662 	/*
663 	 * For per-engine events the bitmask and reference counting
664 	 * is stored per engine.
665 	 */
666 	if (is_engine_event(event)) {
667 		u8 sample = engine_event_sample(event);
668 		struct intel_engine_cs *engine;
669 
670 		engine = intel_engine_lookup_user(i915,
671 						  engine_event_class(event),
672 						  engine_event_instance(event));
673 
674 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
675 			     I915_ENGINE_SAMPLE_COUNT);
676 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
677 			     I915_ENGINE_SAMPLE_COUNT);
678 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
679 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
680 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
681 
682 		engine->pmu.enable |= BIT(sample);
683 		engine->pmu.enable_count[sample]++;
684 	}
685 
686 	spin_unlock_irqrestore(&pmu->lock, flags);
687 
688 update:
689 	/*
690 	 * Store the current counter value so we can report the correct delta
691 	 * for all listeners. Even when the event was already enabled and has
692 	 * an existing non-zero value.
693 	 */
694 	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
695 }
696 
697 static void i915_pmu_disable(struct perf_event *event)
698 {
699 	struct drm_i915_private *i915 =
700 		container_of(event->pmu, typeof(*i915), pmu.base);
701 	unsigned int bit = event_bit(event);
702 	struct i915_pmu *pmu = &i915->pmu;
703 	unsigned long flags;
704 
705 	if (bit == -1)
706 		return;
707 
708 	spin_lock_irqsave(&pmu->lock, flags);
709 
710 	if (is_engine_event(event)) {
711 		u8 sample = engine_event_sample(event);
712 		struct intel_engine_cs *engine;
713 
714 		engine = intel_engine_lookup_user(i915,
715 						  engine_event_class(event),
716 						  engine_event_instance(event));
717 
718 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
719 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
720 		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
721 
722 		/*
723 		 * Decrement the reference count and clear the enabled
724 		 * bitmask when the last listener on an event goes away.
725 		 */
726 		if (--engine->pmu.enable_count[sample] == 0)
727 			engine->pmu.enable &= ~BIT(sample);
728 	}
729 
730 	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
731 	GEM_BUG_ON(pmu->enable_count[bit] == 0);
732 	/*
733 	 * Decrement the reference count and clear the enabled
734 	 * bitmask when the last listener on an event goes away.
735 	 */
736 	if (--pmu->enable_count[bit] == 0) {
737 		pmu->enable &= ~BIT_ULL(bit);
738 		pmu->timer_enabled &= pmu_needs_timer(pmu, true);
739 	}
740 
741 	spin_unlock_irqrestore(&pmu->lock, flags);
742 }
743 
744 static void i915_pmu_event_start(struct perf_event *event, int flags)
745 {
746 	struct drm_i915_private *i915 =
747 		container_of(event->pmu, typeof(*i915), pmu.base);
748 	struct i915_pmu *pmu = &i915->pmu;
749 
750 	if (pmu->closed)
751 		return;
752 
753 	i915_pmu_enable(event);
754 	event->hw.state = 0;
755 }
756 
757 static void i915_pmu_event_stop(struct perf_event *event, int flags)
758 {
759 	if (flags & PERF_EF_UPDATE)
760 		i915_pmu_event_read(event);
761 	i915_pmu_disable(event);
762 	event->hw.state = PERF_HES_STOPPED;
763 }
764 
765 static int i915_pmu_event_add(struct perf_event *event, int flags)
766 {
767 	struct drm_i915_private *i915 =
768 		container_of(event->pmu, typeof(*i915), pmu.base);
769 	struct i915_pmu *pmu = &i915->pmu;
770 
771 	if (pmu->closed)
772 		return -ENODEV;
773 
774 	if (flags & PERF_EF_START)
775 		i915_pmu_event_start(event, flags);
776 
777 	return 0;
778 }
779 
780 static void i915_pmu_event_del(struct perf_event *event, int flags)
781 {
782 	i915_pmu_event_stop(event, PERF_EF_UPDATE);
783 }
784 
785 static int i915_pmu_event_event_idx(struct perf_event *event)
786 {
787 	return 0;
788 }
789 
790 struct i915_str_attribute {
791 	struct device_attribute attr;
792 	const char *str;
793 };
794 
795 static ssize_t i915_pmu_format_show(struct device *dev,
796 				    struct device_attribute *attr, char *buf)
797 {
798 	struct i915_str_attribute *eattr;
799 
800 	eattr = container_of(attr, struct i915_str_attribute, attr);
801 	return sprintf(buf, "%s\n", eattr->str);
802 }
803 
804 #define I915_PMU_FORMAT_ATTR(_name, _config) \
805 	(&((struct i915_str_attribute[]) { \
806 		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
807 		  .str = _config, } \
808 	})[0].attr.attr)
809 
810 static struct attribute *i915_pmu_format_attrs[] = {
811 	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
812 	NULL,
813 };
814 
815 static const struct attribute_group i915_pmu_format_attr_group = {
816 	.name = "format",
817 	.attrs = i915_pmu_format_attrs,
818 };
819 
820 struct i915_ext_attribute {
821 	struct device_attribute attr;
822 	unsigned long val;
823 };
824 
825 static ssize_t i915_pmu_event_show(struct device *dev,
826 				   struct device_attribute *attr, char *buf)
827 {
828 	struct i915_ext_attribute *eattr;
829 
830 	eattr = container_of(attr, struct i915_ext_attribute, attr);
831 	return sprintf(buf, "config=0x%lx\n", eattr->val);
832 }
833 
834 static ssize_t cpumask_show(struct device *dev,
835 			    struct device_attribute *attr, char *buf)
836 {
837 	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
838 }
839 
840 static DEVICE_ATTR_RO(cpumask);
841 
842 static struct attribute *i915_cpumask_attrs[] = {
843 	&dev_attr_cpumask.attr,
844 	NULL,
845 };
846 
847 static const struct attribute_group i915_pmu_cpumask_attr_group = {
848 	.attrs = i915_cpumask_attrs,
849 };
850 
851 #define __event(__config, __name, __unit) \
852 { \
853 	.config = (__config), \
854 	.name = (__name), \
855 	.unit = (__unit), \
856 }
857 
858 #define __engine_event(__sample, __name) \
859 { \
860 	.sample = (__sample), \
861 	.name = (__name), \
862 }
863 
864 static struct i915_ext_attribute *
865 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
866 {
867 	sysfs_attr_init(&attr->attr.attr);
868 	attr->attr.attr.name = name;
869 	attr->attr.attr.mode = 0444;
870 	attr->attr.show = i915_pmu_event_show;
871 	attr->val = config;
872 
873 	return ++attr;
874 }
875 
876 static struct perf_pmu_events_attr *
877 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
878 	     const char *str)
879 {
880 	sysfs_attr_init(&attr->attr.attr);
881 	attr->attr.attr.name = name;
882 	attr->attr.attr.mode = 0444;
883 	attr->attr.show = perf_event_sysfs_show;
884 	attr->event_str = str;
885 
886 	return ++attr;
887 }
888 
889 static struct attribute **
890 create_event_attributes(struct i915_pmu *pmu)
891 {
892 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
893 	static const struct {
894 		u64 config;
895 		const char *name;
896 		const char *unit;
897 	} events[] = {
898 		__event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
899 		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
900 		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
901 		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
902 		__event(I915_PMU_SOFTWARE_GT_AWAKE_TIME, "software-gt-awake-time", "ns"),
903 	};
904 	static const struct {
905 		enum drm_i915_pmu_engine_sample sample;
906 		char *name;
907 	} engine_events[] = {
908 		__engine_event(I915_SAMPLE_BUSY, "busy"),
909 		__engine_event(I915_SAMPLE_SEMA, "sema"),
910 		__engine_event(I915_SAMPLE_WAIT, "wait"),
911 	};
912 	unsigned int count = 0;
913 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
914 	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
915 	struct attribute **attr = NULL, **attr_iter;
916 	struct intel_engine_cs *engine;
917 	unsigned int i;
918 
919 	/* Count how many counters we will be exposing. */
920 	for (i = 0; i < ARRAY_SIZE(events); i++) {
921 		if (!config_status(i915, events[i].config))
922 			count++;
923 	}
924 
925 	for_each_uabi_engine(engine, i915) {
926 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
927 			if (!engine_event_status(engine,
928 						 engine_events[i].sample))
929 				count++;
930 		}
931 	}
932 
933 	/* Allocate attribute objects and table. */
934 	i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
935 	if (!i915_attr)
936 		goto err_alloc;
937 
938 	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
939 	if (!pmu_attr)
940 		goto err_alloc;
941 
942 	/* Max one pointer of each attribute type plus a termination entry. */
943 	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
944 	if (!attr)
945 		goto err_alloc;
946 
947 	i915_iter = i915_attr;
948 	pmu_iter = pmu_attr;
949 	attr_iter = attr;
950 
951 	/* Initialize supported non-engine counters. */
952 	for (i = 0; i < ARRAY_SIZE(events); i++) {
953 		char *str;
954 
955 		if (config_status(i915, events[i].config))
956 			continue;
957 
958 		str = kstrdup(events[i].name, GFP_KERNEL);
959 		if (!str)
960 			goto err;
961 
962 		*attr_iter++ = &i915_iter->attr.attr;
963 		i915_iter = add_i915_attr(i915_iter, str, events[i].config);
964 
965 		if (events[i].unit) {
966 			str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
967 			if (!str)
968 				goto err;
969 
970 			*attr_iter++ = &pmu_iter->attr.attr;
971 			pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
972 		}
973 	}
974 
975 	/* Initialize supported engine counters. */
976 	for_each_uabi_engine(engine, i915) {
977 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
978 			char *str;
979 
980 			if (engine_event_status(engine,
981 						engine_events[i].sample))
982 				continue;
983 
984 			str = kasprintf(GFP_KERNEL, "%s-%s",
985 					engine->name, engine_events[i].name);
986 			if (!str)
987 				goto err;
988 
989 			*attr_iter++ = &i915_iter->attr.attr;
990 			i915_iter =
991 				add_i915_attr(i915_iter, str,
992 					      __I915_PMU_ENGINE(engine->uabi_class,
993 								engine->uabi_instance,
994 								engine_events[i].sample));
995 
996 			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
997 					engine->name, engine_events[i].name);
998 			if (!str)
999 				goto err;
1000 
1001 			*attr_iter++ = &pmu_iter->attr.attr;
1002 			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
1003 		}
1004 	}
1005 
1006 	pmu->i915_attr = i915_attr;
1007 	pmu->pmu_attr = pmu_attr;
1008 
1009 	return attr;
1010 
1011 err:;
1012 	for (attr_iter = attr; *attr_iter; attr_iter++)
1013 		kfree((*attr_iter)->name);
1014 
1015 err_alloc:
1016 	kfree(attr);
1017 	kfree(i915_attr);
1018 	kfree(pmu_attr);
1019 
1020 	return NULL;
1021 }
1022 
1023 static void free_event_attributes(struct i915_pmu *pmu)
1024 {
1025 	struct attribute **attr_iter = pmu->events_attr_group.attrs;
1026 
1027 	for (; *attr_iter; attr_iter++)
1028 		kfree((*attr_iter)->name);
1029 
1030 	kfree(pmu->events_attr_group.attrs);
1031 	kfree(pmu->i915_attr);
1032 	kfree(pmu->pmu_attr);
1033 
1034 	pmu->events_attr_group.attrs = NULL;
1035 	pmu->i915_attr = NULL;
1036 	pmu->pmu_attr = NULL;
1037 }
1038 
1039 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1040 {
1041 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1042 
1043 	GEM_BUG_ON(!pmu->base.event_init);
1044 
1045 	/* Select the first online CPU as a designated reader. */
1046 	if (cpumask_empty(&i915_pmu_cpumask))
1047 		cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1048 
1049 	return 0;
1050 }
1051 
1052 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1053 {
1054 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1055 	unsigned int target = i915_pmu_target_cpu;
1056 
1057 	GEM_BUG_ON(!pmu->base.event_init);
1058 
1059 	/*
1060 	 * Unregistering an instance generates a CPU offline event which we must
1061 	 * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
1062 	 */
1063 	if (pmu->closed)
1064 		return 0;
1065 
1066 	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1067 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1068 
1069 		/* Migrate events if there is a valid target */
1070 		if (target < nr_cpu_ids) {
1071 			cpumask_set_cpu(target, &i915_pmu_cpumask);
1072 			i915_pmu_target_cpu = target;
1073 		}
1074 	}
1075 
1076 	if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
1077 		perf_pmu_migrate_context(&pmu->base, cpu, target);
1078 		pmu->cpuhp.cpu = target;
1079 	}
1080 
1081 	return 0;
1082 }
1083 
1084 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1085 
1086 int i915_pmu_init(void)
1087 {
1088 	int ret;
1089 
1090 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1091 				      "perf/x86/intel/i915:online",
1092 				      i915_pmu_cpu_online,
1093 				      i915_pmu_cpu_offline);
1094 	if (ret < 0)
1095 		pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
1096 			  ret);
1097 	else
1098 		cpuhp_slot = ret;
1099 
1100 	return 0;
1101 }
1102 
1103 void i915_pmu_exit(void)
1104 {
1105 	if (cpuhp_slot != CPUHP_INVALID)
1106 		cpuhp_remove_multi_state(cpuhp_slot);
1107 }
1108 
1109 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1110 {
1111 	if (cpuhp_slot == CPUHP_INVALID)
1112 		return -EINVAL;
1113 
1114 	return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
1115 }
1116 
1117 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1118 {
1119 	cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
1120 }
1121 
1122 static bool is_igp(struct drm_i915_private *i915)
1123 {
1124 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1125 
1126 	/* IGP is 0000:00:02.0 */
1127 	return pci_domain_nr(pdev->bus) == 0 &&
1128 	       pdev->bus->number == 0 &&
1129 	       PCI_SLOT(pdev->devfn) == 2 &&
1130 	       PCI_FUNC(pdev->devfn) == 0;
1131 }
1132 
1133 void i915_pmu_register(struct drm_i915_private *i915)
1134 {
1135 	struct i915_pmu *pmu = &i915->pmu;
1136 	const struct attribute_group *attr_groups[] = {
1137 		&i915_pmu_format_attr_group,
1138 		&pmu->events_attr_group,
1139 		&i915_pmu_cpumask_attr_group,
1140 		NULL
1141 	};
1142 
1143 	int ret = -ENOMEM;
1144 
1145 	if (GRAPHICS_VER(i915) <= 2) {
1146 		drm_info(&i915->drm, "PMU not supported for this GPU.");
1147 		return;
1148 	}
1149 
1150 	spin_lock_init(&pmu->lock);
1151 	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1152 	pmu->timer.function = i915_sample;
1153 	pmu->cpuhp.cpu = -1;
1154 	init_rc6(pmu);
1155 
1156 	if (!is_igp(i915)) {
1157 		pmu->name = kasprintf(GFP_KERNEL,
1158 				      "i915_%s",
1159 				      dev_name(i915->drm.dev));
1160 		if (pmu->name) {
1161 			/* tools/perf reserves colons as special. */
1162 			strreplace((char *)pmu->name, ':', '_');
1163 		}
1164 	} else {
1165 		pmu->name = "i915";
1166 	}
1167 	if (!pmu->name)
1168 		goto err;
1169 
1170 	pmu->events_attr_group.name = "events";
1171 	pmu->events_attr_group.attrs = create_event_attributes(pmu);
1172 	if (!pmu->events_attr_group.attrs)
1173 		goto err_name;
1174 
1175 	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1176 					GFP_KERNEL);
1177 	if (!pmu->base.attr_groups)
1178 		goto err_attr;
1179 
1180 	pmu->base.module	= THIS_MODULE;
1181 	pmu->base.task_ctx_nr	= perf_invalid_context;
1182 	pmu->base.event_init	= i915_pmu_event_init;
1183 	pmu->base.add		= i915_pmu_event_add;
1184 	pmu->base.del		= i915_pmu_event_del;
1185 	pmu->base.start		= i915_pmu_event_start;
1186 	pmu->base.stop		= i915_pmu_event_stop;
1187 	pmu->base.read		= i915_pmu_event_read;
1188 	pmu->base.event_idx	= i915_pmu_event_event_idx;
1189 
1190 	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1191 	if (ret)
1192 		goto err_groups;
1193 
1194 	ret = i915_pmu_register_cpuhp_state(pmu);
1195 	if (ret)
1196 		goto err_unreg;
1197 
1198 	return;
1199 
1200 err_unreg:
1201 	perf_pmu_unregister(&pmu->base);
1202 err_groups:
1203 	kfree(pmu->base.attr_groups);
1204 err_attr:
1205 	pmu->base.event_init = NULL;
1206 	free_event_attributes(pmu);
1207 err_name:
1208 	if (!is_igp(i915))
1209 		kfree(pmu->name);
1210 err:
1211 	drm_notice(&i915->drm, "Failed to register PMU!\n");
1212 }
1213 
1214 void i915_pmu_unregister(struct drm_i915_private *i915)
1215 {
1216 	struct i915_pmu *pmu = &i915->pmu;
1217 
1218 	if (!pmu->base.event_init)
1219 		return;
1220 
1221 	/*
1222 	 * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
1223 	 * ensures all currently executing ones will have exited before we
1224 	 * proceed with unregistration.
1225 	 */
1226 	pmu->closed = true;
1227 	synchronize_rcu();
1228 
1229 	hrtimer_cancel(&pmu->timer);
1230 
1231 	i915_pmu_unregister_cpuhp_state(pmu);
1232 
1233 	perf_pmu_unregister(&pmu->base);
1234 	pmu->base.event_init = NULL;
1235 	kfree(pmu->base.attr_groups);
1236 	if (!is_igp(i915))
1237 		kfree(pmu->name);
1238 	free_event_attributes(pmu);
1239 }
1240