xref: /linux/drivers/gpu/drm/i915/i915_pmu.c (revision b0f84a84fff180718995b1269da2988e5b28be42)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include <linux/irq.h>
8 #include <linux/pm_runtime.h>
9 
10 #include "gt/intel_engine.h"
11 
12 #include "i915_drv.h"
13 #include "i915_pmu.h"
14 #include "intel_pm.h"
15 
16 /* Frequency for the sampling timer for events which need it. */
17 #define FREQUENCY 200
18 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
19 
20 #define ENGINE_SAMPLE_MASK \
21 	(BIT(I915_SAMPLE_BUSY) | \
22 	 BIT(I915_SAMPLE_WAIT) | \
23 	 BIT(I915_SAMPLE_SEMA))
24 
25 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
26 
27 static cpumask_t i915_pmu_cpumask;
28 
29 static u8 engine_config_sample(u64 config)
30 {
31 	return config & I915_PMU_SAMPLE_MASK;
32 }
33 
34 static u8 engine_event_sample(struct perf_event *event)
35 {
36 	return engine_config_sample(event->attr.config);
37 }
38 
39 static u8 engine_event_class(struct perf_event *event)
40 {
41 	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
42 }
43 
44 static u8 engine_event_instance(struct perf_event *event)
45 {
46 	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
47 }
48 
49 static bool is_engine_config(u64 config)
50 {
51 	return config < __I915_PMU_OTHER(0);
52 }
53 
54 static unsigned int config_enabled_bit(u64 config)
55 {
56 	if (is_engine_config(config))
57 		return engine_config_sample(config);
58 	else
59 		return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
60 }
61 
62 static u64 config_enabled_mask(u64 config)
63 {
64 	return BIT_ULL(config_enabled_bit(config));
65 }
66 
67 static bool is_engine_event(struct perf_event *event)
68 {
69 	return is_engine_config(event->attr.config);
70 }
71 
72 static unsigned int event_enabled_bit(struct perf_event *event)
73 {
74 	return config_enabled_bit(event->attr.config);
75 }
76 
77 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
78 {
79 	u64 enable;
80 
81 	/*
82 	 * Only some counters need the sampling timer.
83 	 *
84 	 * We start with a bitmask of all currently enabled events.
85 	 */
86 	enable = i915->pmu.enable;
87 
88 	/*
89 	 * Mask out all the ones which do not need the timer, or in
90 	 * other words keep all the ones that could need the timer.
91 	 */
92 	enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
93 		  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
94 		  ENGINE_SAMPLE_MASK;
95 
96 	/*
97 	 * When the GPU is idle per-engine counters do not need to be
98 	 * running so clear those bits out.
99 	 */
100 	if (!gpu_active)
101 		enable &= ~ENGINE_SAMPLE_MASK;
102 	/*
103 	 * Also there is software busyness tracking available we do not
104 	 * need the timer for I915_SAMPLE_BUSY counter.
105 	 *
106 	 * Use RCS as proxy for all engines.
107 	 */
108 	else if (intel_engine_supports_stats(i915->engine[RCS0]))
109 		enable &= ~BIT(I915_SAMPLE_BUSY);
110 
111 	/*
112 	 * If some bits remain it means we need the sampling timer running.
113 	 */
114 	return enable;
115 }
116 
117 void i915_pmu_gt_parked(struct drm_i915_private *i915)
118 {
119 	if (!i915->pmu.base.event_init)
120 		return;
121 
122 	spin_lock_irq(&i915->pmu.lock);
123 	/*
124 	 * Signal sampling timer to stop if only engine events are enabled and
125 	 * GPU went idle.
126 	 */
127 	i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
128 	spin_unlock_irq(&i915->pmu.lock);
129 }
130 
131 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
132 {
133 	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
134 		i915->pmu.timer_enabled = true;
135 		i915->pmu.timer_last = ktime_get();
136 		hrtimer_start_range_ns(&i915->pmu.timer,
137 				       ns_to_ktime(PERIOD), 0,
138 				       HRTIMER_MODE_REL_PINNED);
139 	}
140 }
141 
142 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
143 {
144 	if (!i915->pmu.base.event_init)
145 		return;
146 
147 	spin_lock_irq(&i915->pmu.lock);
148 	/*
149 	 * Re-enable sampling timer when GPU goes active.
150 	 */
151 	__i915_pmu_maybe_start_timer(i915);
152 	spin_unlock_irq(&i915->pmu.lock);
153 }
154 
155 static void
156 add_sample(struct i915_pmu_sample *sample, u32 val)
157 {
158 	sample->cur += val;
159 }
160 
161 static void
162 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
163 {
164 	struct intel_engine_cs *engine;
165 	enum intel_engine_id id;
166 	intel_wakeref_t wakeref;
167 	unsigned long flags;
168 
169 	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
170 		return;
171 
172 	wakeref = 0;
173 	if (READ_ONCE(dev_priv->gt.awake))
174 		wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
175 	if (!wakeref)
176 		return;
177 
178 	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
179 	for_each_engine(engine, dev_priv, id) {
180 		struct intel_engine_pmu *pmu = &engine->pmu;
181 		bool busy;
182 		u32 val;
183 
184 		val = I915_READ_FW(RING_CTL(engine->mmio_base));
185 		if (val == 0) /* powerwell off => engine idle */
186 			continue;
187 
188 		if (val & RING_WAIT)
189 			add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
190 		if (val & RING_WAIT_SEMAPHORE)
191 			add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
192 
193 		/*
194 		 * While waiting on a semaphore or event, MI_MODE reports the
195 		 * ring as idle. However, previously using the seqno, and with
196 		 * execlists sampling, we account for the ring waiting as the
197 		 * engine being busy. Therefore, we record the sample as being
198 		 * busy if either waiting or !idle.
199 		 */
200 		busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
201 		if (!busy) {
202 			val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
203 			busy = !(val & MODE_IDLE);
204 		}
205 		if (busy)
206 			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
207 	}
208 	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
209 
210 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
211 }
212 
213 static void
214 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
215 {
216 	sample->cur += mul_u32_u32(val, mul);
217 }
218 
219 static void
220 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
221 {
222 	if (dev_priv->pmu.enable &
223 	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
224 		u32 val;
225 
226 		val = dev_priv->gt_pm.rps.cur_freq;
227 		if (dev_priv->gt.awake) {
228 			intel_wakeref_t wakeref;
229 
230 			with_intel_runtime_pm_if_in_use(&dev_priv->runtime_pm,
231 							wakeref) {
232 				val = intel_uncore_read_notrace(&dev_priv->uncore,
233 								GEN6_RPSTAT1);
234 				val = intel_get_cagf(dev_priv, val);
235 			}
236 		}
237 
238 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
239 				intel_gpu_freq(dev_priv, val),
240 				period_ns / 1000);
241 	}
242 
243 	if (dev_priv->pmu.enable &
244 	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
245 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
246 				intel_gpu_freq(dev_priv,
247 					       dev_priv->gt_pm.rps.cur_freq),
248 				period_ns / 1000);
249 	}
250 }
251 
252 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
253 {
254 	struct drm_i915_private *i915 =
255 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
256 	unsigned int period_ns;
257 	ktime_t now;
258 
259 	if (!READ_ONCE(i915->pmu.timer_enabled))
260 		return HRTIMER_NORESTART;
261 
262 	now = ktime_get();
263 	period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
264 	i915->pmu.timer_last = now;
265 
266 	/*
267 	 * Strictly speaking the passed in period may not be 100% accurate for
268 	 * all internal calculation, since some amount of time can be spent on
269 	 * grabbing the forcewake. However the potential error from timer call-
270 	 * back delay greatly dominates this so we keep it simple.
271 	 */
272 	engines_sample(i915, period_ns);
273 	frequency_sample(i915, period_ns);
274 
275 	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
276 
277 	return HRTIMER_RESTART;
278 }
279 
280 static u64 count_interrupts(struct drm_i915_private *i915)
281 {
282 	/* open-coded kstat_irqs() */
283 	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
284 	u64 sum = 0;
285 	int cpu;
286 
287 	if (!desc || !desc->kstat_irqs)
288 		return 0;
289 
290 	for_each_possible_cpu(cpu)
291 		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
292 
293 	return sum;
294 }
295 
296 static void engine_event_destroy(struct perf_event *event)
297 {
298 	struct drm_i915_private *i915 =
299 		container_of(event->pmu, typeof(*i915), pmu.base);
300 	struct intel_engine_cs *engine;
301 
302 	engine = intel_engine_lookup_user(i915,
303 					  engine_event_class(event),
304 					  engine_event_instance(event));
305 	if (WARN_ON_ONCE(!engine))
306 		return;
307 
308 	if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
309 	    intel_engine_supports_stats(engine))
310 		intel_disable_engine_stats(engine);
311 }
312 
313 static void i915_pmu_event_destroy(struct perf_event *event)
314 {
315 	WARN_ON(event->parent);
316 
317 	if (is_engine_event(event))
318 		engine_event_destroy(event);
319 }
320 
321 static int
322 engine_event_status(struct intel_engine_cs *engine,
323 		    enum drm_i915_pmu_engine_sample sample)
324 {
325 	switch (sample) {
326 	case I915_SAMPLE_BUSY:
327 	case I915_SAMPLE_WAIT:
328 		break;
329 	case I915_SAMPLE_SEMA:
330 		if (INTEL_GEN(engine->i915) < 6)
331 			return -ENODEV;
332 		break;
333 	default:
334 		return -ENOENT;
335 	}
336 
337 	return 0;
338 }
339 
340 static int
341 config_status(struct drm_i915_private *i915, u64 config)
342 {
343 	switch (config) {
344 	case I915_PMU_ACTUAL_FREQUENCY:
345 		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
346 			/* Requires a mutex for sampling! */
347 			return -ENODEV;
348 		/* Fall-through. */
349 	case I915_PMU_REQUESTED_FREQUENCY:
350 		if (INTEL_GEN(i915) < 6)
351 			return -ENODEV;
352 		break;
353 	case I915_PMU_INTERRUPTS:
354 		break;
355 	case I915_PMU_RC6_RESIDENCY:
356 		if (!HAS_RC6(i915))
357 			return -ENODEV;
358 		break;
359 	default:
360 		return -ENOENT;
361 	}
362 
363 	return 0;
364 }
365 
366 static int engine_event_init(struct perf_event *event)
367 {
368 	struct drm_i915_private *i915 =
369 		container_of(event->pmu, typeof(*i915), pmu.base);
370 	struct intel_engine_cs *engine;
371 	u8 sample;
372 	int ret;
373 
374 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
375 					  engine_event_instance(event));
376 	if (!engine)
377 		return -ENODEV;
378 
379 	sample = engine_event_sample(event);
380 	ret = engine_event_status(engine, sample);
381 	if (ret)
382 		return ret;
383 
384 	if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
385 		ret = intel_enable_engine_stats(engine);
386 
387 	return ret;
388 }
389 
390 static int i915_pmu_event_init(struct perf_event *event)
391 {
392 	struct drm_i915_private *i915 =
393 		container_of(event->pmu, typeof(*i915), pmu.base);
394 	int ret;
395 
396 	if (event->attr.type != event->pmu->type)
397 		return -ENOENT;
398 
399 	/* unsupported modes and filters */
400 	if (event->attr.sample_period) /* no sampling */
401 		return -EINVAL;
402 
403 	if (has_branch_stack(event))
404 		return -EOPNOTSUPP;
405 
406 	if (event->cpu < 0)
407 		return -EINVAL;
408 
409 	/* only allow running on one cpu at a time */
410 	if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
411 		return -EINVAL;
412 
413 	if (is_engine_event(event))
414 		ret = engine_event_init(event);
415 	else
416 		ret = config_status(i915, event->attr.config);
417 	if (ret)
418 		return ret;
419 
420 	if (!event->parent)
421 		event->destroy = i915_pmu_event_destroy;
422 
423 	return 0;
424 }
425 
426 static u64 __get_rc6(struct drm_i915_private *i915)
427 {
428 	u64 val;
429 
430 	val = intel_rc6_residency_ns(i915,
431 				     IS_VALLEYVIEW(i915) ?
432 				     VLV_GT_RENDER_RC6 :
433 				     GEN6_GT_GFX_RC6);
434 
435 	if (HAS_RC6p(i915))
436 		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
437 
438 	if (HAS_RC6pp(i915))
439 		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
440 
441 	return val;
442 }
443 
444 static u64 get_rc6(struct drm_i915_private *i915)
445 {
446 #if IS_ENABLED(CONFIG_PM)
447 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
448 	intel_wakeref_t wakeref;
449 	unsigned long flags;
450 	u64 val;
451 
452 	wakeref = intel_runtime_pm_get_if_in_use(rpm);
453 	if (wakeref) {
454 		val = __get_rc6(i915);
455 		intel_runtime_pm_put(rpm, wakeref);
456 
457 		/*
458 		 * If we are coming back from being runtime suspended we must
459 		 * be careful not to report a larger value than returned
460 		 * previously.
461 		 */
462 
463 		spin_lock_irqsave(&i915->pmu.lock, flags);
464 
465 		if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
466 			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
467 			i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
468 		} else {
469 			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
470 		}
471 
472 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
473 	} else {
474 		struct device *kdev = rpm->kdev;
475 
476 		/*
477 		 * We are runtime suspended.
478 		 *
479 		 * Report the delta from when the device was suspended to now,
480 		 * on top of the last known real value, as the approximated RC6
481 		 * counter value.
482 		 */
483 		spin_lock_irqsave(&i915->pmu.lock, flags);
484 
485 		/*
486 		 * After the above branch intel_runtime_pm_get_if_in_use failed
487 		 * to get the runtime PM reference we cannot assume we are in
488 		 * runtime suspend since we can either: a) race with coming out
489 		 * of it before we took the power.lock, or b) there are other
490 		 * states than suspended which can bring us here.
491 		 *
492 		 * We need to double-check that we are indeed currently runtime
493 		 * suspended and if not we cannot do better than report the last
494 		 * known RC6 value.
495 		 */
496 		if (pm_runtime_status_suspended(kdev)) {
497 			val = pm_runtime_suspended_time(kdev);
498 
499 			if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
500 				i915->pmu.suspended_time_last = val;
501 
502 			val -= i915->pmu.suspended_time_last;
503 			val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
504 
505 			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
506 		} else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
507 			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
508 		} else {
509 			val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
510 		}
511 
512 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
513 	}
514 
515 	return val;
516 #else
517 	return __get_rc6(i915);
518 #endif
519 }
520 
521 static u64 __i915_pmu_event_read(struct perf_event *event)
522 {
523 	struct drm_i915_private *i915 =
524 		container_of(event->pmu, typeof(*i915), pmu.base);
525 	u64 val = 0;
526 
527 	if (is_engine_event(event)) {
528 		u8 sample = engine_event_sample(event);
529 		struct intel_engine_cs *engine;
530 
531 		engine = intel_engine_lookup_user(i915,
532 						  engine_event_class(event),
533 						  engine_event_instance(event));
534 
535 		if (WARN_ON_ONCE(!engine)) {
536 			/* Do nothing */
537 		} else if (sample == I915_SAMPLE_BUSY &&
538 			   intel_engine_supports_stats(engine)) {
539 			val = ktime_to_ns(intel_engine_get_busy_time(engine));
540 		} else {
541 			val = engine->pmu.sample[sample].cur;
542 		}
543 	} else {
544 		switch (event->attr.config) {
545 		case I915_PMU_ACTUAL_FREQUENCY:
546 			val =
547 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
548 				   USEC_PER_SEC /* to MHz */);
549 			break;
550 		case I915_PMU_REQUESTED_FREQUENCY:
551 			val =
552 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
553 				   USEC_PER_SEC /* to MHz */);
554 			break;
555 		case I915_PMU_INTERRUPTS:
556 			val = count_interrupts(i915);
557 			break;
558 		case I915_PMU_RC6_RESIDENCY:
559 			val = get_rc6(i915);
560 			break;
561 		}
562 	}
563 
564 	return val;
565 }
566 
567 static void i915_pmu_event_read(struct perf_event *event)
568 {
569 	struct hw_perf_event *hwc = &event->hw;
570 	u64 prev, new;
571 
572 again:
573 	prev = local64_read(&hwc->prev_count);
574 	new = __i915_pmu_event_read(event);
575 
576 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
577 		goto again;
578 
579 	local64_add(new - prev, &event->count);
580 }
581 
582 static void i915_pmu_enable(struct perf_event *event)
583 {
584 	struct drm_i915_private *i915 =
585 		container_of(event->pmu, typeof(*i915), pmu.base);
586 	unsigned int bit = event_enabled_bit(event);
587 	unsigned long flags;
588 
589 	spin_lock_irqsave(&i915->pmu.lock, flags);
590 
591 	/*
592 	 * Update the bitmask of enabled events and increment
593 	 * the event reference counter.
594 	 */
595 	BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS);
596 	GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
597 	GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
598 	i915->pmu.enable |= BIT_ULL(bit);
599 	i915->pmu.enable_count[bit]++;
600 
601 	/*
602 	 * Start the sampling timer if needed and not already enabled.
603 	 */
604 	__i915_pmu_maybe_start_timer(i915);
605 
606 	/*
607 	 * For per-engine events the bitmask and reference counting
608 	 * is stored per engine.
609 	 */
610 	if (is_engine_event(event)) {
611 		u8 sample = engine_event_sample(event);
612 		struct intel_engine_cs *engine;
613 
614 		engine = intel_engine_lookup_user(i915,
615 						  engine_event_class(event),
616 						  engine_event_instance(event));
617 
618 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
619 			     I915_ENGINE_SAMPLE_COUNT);
620 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
621 			     I915_ENGINE_SAMPLE_COUNT);
622 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
623 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
624 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
625 
626 		engine->pmu.enable |= BIT(sample);
627 		engine->pmu.enable_count[sample]++;
628 	}
629 
630 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
631 
632 	/*
633 	 * Store the current counter value so we can report the correct delta
634 	 * for all listeners. Even when the event was already enabled and has
635 	 * an existing non-zero value.
636 	 */
637 	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
638 }
639 
640 static void i915_pmu_disable(struct perf_event *event)
641 {
642 	struct drm_i915_private *i915 =
643 		container_of(event->pmu, typeof(*i915), pmu.base);
644 	unsigned int bit = event_enabled_bit(event);
645 	unsigned long flags;
646 
647 	spin_lock_irqsave(&i915->pmu.lock, flags);
648 
649 	if (is_engine_event(event)) {
650 		u8 sample = engine_event_sample(event);
651 		struct intel_engine_cs *engine;
652 
653 		engine = intel_engine_lookup_user(i915,
654 						  engine_event_class(event),
655 						  engine_event_instance(event));
656 
657 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
658 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
659 		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
660 
661 		/*
662 		 * Decrement the reference count and clear the enabled
663 		 * bitmask when the last listener on an event goes away.
664 		 */
665 		if (--engine->pmu.enable_count[sample] == 0)
666 			engine->pmu.enable &= ~BIT(sample);
667 	}
668 
669 	GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
670 	GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
671 	/*
672 	 * Decrement the reference count and clear the enabled
673 	 * bitmask when the last listener on an event goes away.
674 	 */
675 	if (--i915->pmu.enable_count[bit] == 0) {
676 		i915->pmu.enable &= ~BIT_ULL(bit);
677 		i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
678 	}
679 
680 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
681 }
682 
683 static void i915_pmu_event_start(struct perf_event *event, int flags)
684 {
685 	i915_pmu_enable(event);
686 	event->hw.state = 0;
687 }
688 
689 static void i915_pmu_event_stop(struct perf_event *event, int flags)
690 {
691 	if (flags & PERF_EF_UPDATE)
692 		i915_pmu_event_read(event);
693 	i915_pmu_disable(event);
694 	event->hw.state = PERF_HES_STOPPED;
695 }
696 
697 static int i915_pmu_event_add(struct perf_event *event, int flags)
698 {
699 	if (flags & PERF_EF_START)
700 		i915_pmu_event_start(event, flags);
701 
702 	return 0;
703 }
704 
705 static void i915_pmu_event_del(struct perf_event *event, int flags)
706 {
707 	i915_pmu_event_stop(event, PERF_EF_UPDATE);
708 }
709 
710 static int i915_pmu_event_event_idx(struct perf_event *event)
711 {
712 	return 0;
713 }
714 
715 struct i915_str_attribute {
716 	struct device_attribute attr;
717 	const char *str;
718 };
719 
720 static ssize_t i915_pmu_format_show(struct device *dev,
721 				    struct device_attribute *attr, char *buf)
722 {
723 	struct i915_str_attribute *eattr;
724 
725 	eattr = container_of(attr, struct i915_str_attribute, attr);
726 	return sprintf(buf, "%s\n", eattr->str);
727 }
728 
729 #define I915_PMU_FORMAT_ATTR(_name, _config) \
730 	(&((struct i915_str_attribute[]) { \
731 		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
732 		  .str = _config, } \
733 	})[0].attr.attr)
734 
735 static struct attribute *i915_pmu_format_attrs[] = {
736 	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
737 	NULL,
738 };
739 
740 static const struct attribute_group i915_pmu_format_attr_group = {
741 	.name = "format",
742 	.attrs = i915_pmu_format_attrs,
743 };
744 
745 struct i915_ext_attribute {
746 	struct device_attribute attr;
747 	unsigned long val;
748 };
749 
750 static ssize_t i915_pmu_event_show(struct device *dev,
751 				   struct device_attribute *attr, char *buf)
752 {
753 	struct i915_ext_attribute *eattr;
754 
755 	eattr = container_of(attr, struct i915_ext_attribute, attr);
756 	return sprintf(buf, "config=0x%lx\n", eattr->val);
757 }
758 
759 static struct attribute_group i915_pmu_events_attr_group = {
760 	.name = "events",
761 	/* Patch in attrs at runtime. */
762 };
763 
764 static ssize_t
765 i915_pmu_get_attr_cpumask(struct device *dev,
766 			  struct device_attribute *attr,
767 			  char *buf)
768 {
769 	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
770 }
771 
772 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
773 
774 static struct attribute *i915_cpumask_attrs[] = {
775 	&dev_attr_cpumask.attr,
776 	NULL,
777 };
778 
779 static const struct attribute_group i915_pmu_cpumask_attr_group = {
780 	.attrs = i915_cpumask_attrs,
781 };
782 
783 static const struct attribute_group *i915_pmu_attr_groups[] = {
784 	&i915_pmu_format_attr_group,
785 	&i915_pmu_events_attr_group,
786 	&i915_pmu_cpumask_attr_group,
787 	NULL
788 };
789 
790 #define __event(__config, __name, __unit) \
791 { \
792 	.config = (__config), \
793 	.name = (__name), \
794 	.unit = (__unit), \
795 }
796 
797 #define __engine_event(__sample, __name) \
798 { \
799 	.sample = (__sample), \
800 	.name = (__name), \
801 }
802 
803 static struct i915_ext_attribute *
804 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
805 {
806 	sysfs_attr_init(&attr->attr.attr);
807 	attr->attr.attr.name = name;
808 	attr->attr.attr.mode = 0444;
809 	attr->attr.show = i915_pmu_event_show;
810 	attr->val = config;
811 
812 	return ++attr;
813 }
814 
815 static struct perf_pmu_events_attr *
816 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
817 	     const char *str)
818 {
819 	sysfs_attr_init(&attr->attr.attr);
820 	attr->attr.attr.name = name;
821 	attr->attr.attr.mode = 0444;
822 	attr->attr.show = perf_event_sysfs_show;
823 	attr->event_str = str;
824 
825 	return ++attr;
826 }
827 
828 static struct attribute **
829 create_event_attributes(struct drm_i915_private *i915)
830 {
831 	static const struct {
832 		u64 config;
833 		const char *name;
834 		const char *unit;
835 	} events[] = {
836 		__event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
837 		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
838 		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
839 		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
840 	};
841 	static const struct {
842 		enum drm_i915_pmu_engine_sample sample;
843 		char *name;
844 	} engine_events[] = {
845 		__engine_event(I915_SAMPLE_BUSY, "busy"),
846 		__engine_event(I915_SAMPLE_SEMA, "sema"),
847 		__engine_event(I915_SAMPLE_WAIT, "wait"),
848 	};
849 	unsigned int count = 0;
850 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
851 	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
852 	struct attribute **attr = NULL, **attr_iter;
853 	struct intel_engine_cs *engine;
854 	enum intel_engine_id id;
855 	unsigned int i;
856 
857 	/* Count how many counters we will be exposing. */
858 	for (i = 0; i < ARRAY_SIZE(events); i++) {
859 		if (!config_status(i915, events[i].config))
860 			count++;
861 	}
862 
863 	for_each_engine(engine, i915, id) {
864 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
865 			if (!engine_event_status(engine,
866 						 engine_events[i].sample))
867 				count++;
868 		}
869 	}
870 
871 	/* Allocate attribute objects and table. */
872 	i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
873 	if (!i915_attr)
874 		goto err_alloc;
875 
876 	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
877 	if (!pmu_attr)
878 		goto err_alloc;
879 
880 	/* Max one pointer of each attribute type plus a termination entry. */
881 	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
882 	if (!attr)
883 		goto err_alloc;
884 
885 	i915_iter = i915_attr;
886 	pmu_iter = pmu_attr;
887 	attr_iter = attr;
888 
889 	/* Initialize supported non-engine counters. */
890 	for (i = 0; i < ARRAY_SIZE(events); i++) {
891 		char *str;
892 
893 		if (config_status(i915, events[i].config))
894 			continue;
895 
896 		str = kstrdup(events[i].name, GFP_KERNEL);
897 		if (!str)
898 			goto err;
899 
900 		*attr_iter++ = &i915_iter->attr.attr;
901 		i915_iter = add_i915_attr(i915_iter, str, events[i].config);
902 
903 		if (events[i].unit) {
904 			str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
905 			if (!str)
906 				goto err;
907 
908 			*attr_iter++ = &pmu_iter->attr.attr;
909 			pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
910 		}
911 	}
912 
913 	/* Initialize supported engine counters. */
914 	for_each_engine(engine, i915, id) {
915 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
916 			char *str;
917 
918 			if (engine_event_status(engine,
919 						engine_events[i].sample))
920 				continue;
921 
922 			str = kasprintf(GFP_KERNEL, "%s-%s",
923 					engine->name, engine_events[i].name);
924 			if (!str)
925 				goto err;
926 
927 			*attr_iter++ = &i915_iter->attr.attr;
928 			i915_iter =
929 				add_i915_attr(i915_iter, str,
930 					      __I915_PMU_ENGINE(engine->uabi_class,
931 								engine->instance,
932 								engine_events[i].sample));
933 
934 			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
935 					engine->name, engine_events[i].name);
936 			if (!str)
937 				goto err;
938 
939 			*attr_iter++ = &pmu_iter->attr.attr;
940 			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
941 		}
942 	}
943 
944 	i915->pmu.i915_attr = i915_attr;
945 	i915->pmu.pmu_attr = pmu_attr;
946 
947 	return attr;
948 
949 err:;
950 	for (attr_iter = attr; *attr_iter; attr_iter++)
951 		kfree((*attr_iter)->name);
952 
953 err_alloc:
954 	kfree(attr);
955 	kfree(i915_attr);
956 	kfree(pmu_attr);
957 
958 	return NULL;
959 }
960 
961 static void free_event_attributes(struct drm_i915_private *i915)
962 {
963 	struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
964 
965 	for (; *attr_iter; attr_iter++)
966 		kfree((*attr_iter)->name);
967 
968 	kfree(i915_pmu_events_attr_group.attrs);
969 	kfree(i915->pmu.i915_attr);
970 	kfree(i915->pmu.pmu_attr);
971 
972 	i915_pmu_events_attr_group.attrs = NULL;
973 	i915->pmu.i915_attr = NULL;
974 	i915->pmu.pmu_attr = NULL;
975 }
976 
977 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
978 {
979 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
980 
981 	GEM_BUG_ON(!pmu->base.event_init);
982 
983 	/* Select the first online CPU as a designated reader. */
984 	if (!cpumask_weight(&i915_pmu_cpumask))
985 		cpumask_set_cpu(cpu, &i915_pmu_cpumask);
986 
987 	return 0;
988 }
989 
990 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
991 {
992 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
993 	unsigned int target;
994 
995 	GEM_BUG_ON(!pmu->base.event_init);
996 
997 	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
998 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
999 		/* Migrate events if there is a valid target */
1000 		if (target < nr_cpu_ids) {
1001 			cpumask_set_cpu(target, &i915_pmu_cpumask);
1002 			perf_pmu_migrate_context(&pmu->base, cpu, target);
1003 		}
1004 	}
1005 
1006 	return 0;
1007 }
1008 
1009 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1010 
1011 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
1012 {
1013 	enum cpuhp_state slot;
1014 	int ret;
1015 
1016 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1017 				      "perf/x86/intel/i915:online",
1018 				      i915_pmu_cpu_online,
1019 				      i915_pmu_cpu_offline);
1020 	if (ret < 0)
1021 		return ret;
1022 
1023 	slot = ret;
1024 	ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
1025 	if (ret) {
1026 		cpuhp_remove_multi_state(slot);
1027 		return ret;
1028 	}
1029 
1030 	cpuhp_slot = slot;
1031 	return 0;
1032 }
1033 
1034 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1035 {
1036 	WARN_ON(cpuhp_slot == CPUHP_INVALID);
1037 	WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1038 	cpuhp_remove_multi_state(cpuhp_slot);
1039 }
1040 
1041 void i915_pmu_register(struct drm_i915_private *i915)
1042 {
1043 	int ret;
1044 
1045 	if (INTEL_GEN(i915) <= 2) {
1046 		DRM_INFO("PMU not supported for this GPU.");
1047 		return;
1048 	}
1049 
1050 	i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1051 	if (!i915_pmu_events_attr_group.attrs) {
1052 		ret = -ENOMEM;
1053 		goto err;
1054 	}
1055 
1056 	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
1057 	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
1058 	i915->pmu.base.event_init	= i915_pmu_event_init;
1059 	i915->pmu.base.add		= i915_pmu_event_add;
1060 	i915->pmu.base.del		= i915_pmu_event_del;
1061 	i915->pmu.base.start		= i915_pmu_event_start;
1062 	i915->pmu.base.stop		= i915_pmu_event_stop;
1063 	i915->pmu.base.read		= i915_pmu_event_read;
1064 	i915->pmu.base.event_idx	= i915_pmu_event_event_idx;
1065 
1066 	spin_lock_init(&i915->pmu.lock);
1067 	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1068 	i915->pmu.timer.function = i915_sample;
1069 
1070 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1071 	if (ret)
1072 		goto err;
1073 
1074 	ret = i915_pmu_register_cpuhp_state(i915);
1075 	if (ret)
1076 		goto err_unreg;
1077 
1078 	return;
1079 
1080 err_unreg:
1081 	perf_pmu_unregister(&i915->pmu.base);
1082 err:
1083 	i915->pmu.base.event_init = NULL;
1084 	free_event_attributes(i915);
1085 	DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1086 }
1087 
1088 void i915_pmu_unregister(struct drm_i915_private *i915)
1089 {
1090 	if (!i915->pmu.base.event_init)
1091 		return;
1092 
1093 	WARN_ON(i915->pmu.enable);
1094 
1095 	hrtimer_cancel(&i915->pmu.timer);
1096 
1097 	i915_pmu_unregister_cpuhp_state(i915);
1098 
1099 	perf_pmu_unregister(&i915->pmu.base);
1100 	i915->pmu.base.event_init = NULL;
1101 	free_event_attributes(i915);
1102 }
1103