xref: /linux/drivers/gpu/drm/i915/i915_pmu.c (revision 9abdb50cda0ffe33bbb2e40cbad97b32fb7ff892)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include <linux/irq.h>
8 #include <linux/pm_runtime.h>
9 #include "i915_pmu.h"
10 #include "intel_ringbuffer.h"
11 #include "i915_drv.h"
12 
13 /* Frequency for the sampling timer for events which need it. */
14 #define FREQUENCY 200
15 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
16 
17 #define ENGINE_SAMPLE_MASK \
18 	(BIT(I915_SAMPLE_BUSY) | \
19 	 BIT(I915_SAMPLE_WAIT) | \
20 	 BIT(I915_SAMPLE_SEMA))
21 
22 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
23 
24 static cpumask_t i915_pmu_cpumask;
25 
26 static u8 engine_config_sample(u64 config)
27 {
28 	return config & I915_PMU_SAMPLE_MASK;
29 }
30 
31 static u8 engine_event_sample(struct perf_event *event)
32 {
33 	return engine_config_sample(event->attr.config);
34 }
35 
36 static u8 engine_event_class(struct perf_event *event)
37 {
38 	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
39 }
40 
41 static u8 engine_event_instance(struct perf_event *event)
42 {
43 	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
44 }
45 
46 static bool is_engine_config(u64 config)
47 {
48 	return config < __I915_PMU_OTHER(0);
49 }
50 
51 static unsigned int config_enabled_bit(u64 config)
52 {
53 	if (is_engine_config(config))
54 		return engine_config_sample(config);
55 	else
56 		return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
57 }
58 
59 static u64 config_enabled_mask(u64 config)
60 {
61 	return BIT_ULL(config_enabled_bit(config));
62 }
63 
64 static bool is_engine_event(struct perf_event *event)
65 {
66 	return is_engine_config(event->attr.config);
67 }
68 
69 static unsigned int event_enabled_bit(struct perf_event *event)
70 {
71 	return config_enabled_bit(event->attr.config);
72 }
73 
74 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
75 {
76 	u64 enable;
77 
78 	/*
79 	 * Only some counters need the sampling timer.
80 	 *
81 	 * We start with a bitmask of all currently enabled events.
82 	 */
83 	enable = i915->pmu.enable;
84 
85 	/*
86 	 * Mask out all the ones which do not need the timer, or in
87 	 * other words keep all the ones that could need the timer.
88 	 */
89 	enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
90 		  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
91 		  ENGINE_SAMPLE_MASK;
92 
93 	/*
94 	 * When the GPU is idle per-engine counters do not need to be
95 	 * running so clear those bits out.
96 	 */
97 	if (!gpu_active)
98 		enable &= ~ENGINE_SAMPLE_MASK;
99 	/*
100 	 * Also there is software busyness tracking available we do not
101 	 * need the timer for I915_SAMPLE_BUSY counter.
102 	 *
103 	 * Use RCS as proxy for all engines.
104 	 */
105 	else if (intel_engine_supports_stats(i915->engine[RCS]))
106 		enable &= ~BIT(I915_SAMPLE_BUSY);
107 
108 	/*
109 	 * If some bits remain it means we need the sampling timer running.
110 	 */
111 	return enable;
112 }
113 
114 void i915_pmu_gt_parked(struct drm_i915_private *i915)
115 {
116 	if (!i915->pmu.base.event_init)
117 		return;
118 
119 	spin_lock_irq(&i915->pmu.lock);
120 	/*
121 	 * Signal sampling timer to stop if only engine events are enabled and
122 	 * GPU went idle.
123 	 */
124 	i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
125 	spin_unlock_irq(&i915->pmu.lock);
126 }
127 
128 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
129 {
130 	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
131 		i915->pmu.timer_enabled = true;
132 		i915->pmu.timer_last = ktime_get();
133 		hrtimer_start_range_ns(&i915->pmu.timer,
134 				       ns_to_ktime(PERIOD), 0,
135 				       HRTIMER_MODE_REL_PINNED);
136 	}
137 }
138 
139 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
140 {
141 	if (!i915->pmu.base.event_init)
142 		return;
143 
144 	spin_lock_irq(&i915->pmu.lock);
145 	/*
146 	 * Re-enable sampling timer when GPU goes active.
147 	 */
148 	__i915_pmu_maybe_start_timer(i915);
149 	spin_unlock_irq(&i915->pmu.lock);
150 }
151 
152 static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
153 {
154 	if (!fw)
155 		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
156 
157 	return true;
158 }
159 
160 static void
161 add_sample(struct i915_pmu_sample *sample, u32 val)
162 {
163 	sample->cur += val;
164 }
165 
166 static void
167 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
168 {
169 	struct intel_engine_cs *engine;
170 	enum intel_engine_id id;
171 	bool fw = false;
172 
173 	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
174 		return;
175 
176 	if (!dev_priv->gt.awake)
177 		return;
178 
179 	if (!intel_runtime_pm_get_if_in_use(dev_priv))
180 		return;
181 
182 	for_each_engine(engine, dev_priv, id) {
183 		u32 current_seqno = intel_engine_get_seqno(engine);
184 		u32 last_seqno = intel_engine_last_submit(engine);
185 		u32 val;
186 
187 		val = !i915_seqno_passed(current_seqno, last_seqno);
188 
189 		if (val)
190 			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
191 				   period_ns);
192 
193 		if (val && (engine->pmu.enable &
194 		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
195 			fw = grab_forcewake(dev_priv, fw);
196 
197 			val = I915_READ_FW(RING_CTL(engine->mmio_base));
198 		} else {
199 			val = 0;
200 		}
201 
202 		if (val & RING_WAIT)
203 			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
204 				   period_ns);
205 
206 		if (val & RING_WAIT_SEMAPHORE)
207 			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
208 				   period_ns);
209 	}
210 
211 	if (fw)
212 		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
213 
214 	intel_runtime_pm_put(dev_priv);
215 }
216 
217 static void
218 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
219 {
220 	sample->cur += mul_u32_u32(val, mul);
221 }
222 
223 static void
224 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
225 {
226 	if (dev_priv->pmu.enable &
227 	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
228 		u32 val;
229 
230 		val = dev_priv->gt_pm.rps.cur_freq;
231 		if (dev_priv->gt.awake &&
232 		    intel_runtime_pm_get_if_in_use(dev_priv)) {
233 			val = intel_get_cagf(dev_priv,
234 					     I915_READ_NOTRACE(GEN6_RPSTAT1));
235 			intel_runtime_pm_put(dev_priv);
236 		}
237 
238 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
239 				intel_gpu_freq(dev_priv, val),
240 				period_ns / 1000);
241 	}
242 
243 	if (dev_priv->pmu.enable &
244 	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
245 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
246 				intel_gpu_freq(dev_priv,
247 					       dev_priv->gt_pm.rps.cur_freq),
248 				period_ns / 1000);
249 	}
250 }
251 
252 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
253 {
254 	struct drm_i915_private *i915 =
255 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
256 	unsigned int period_ns;
257 	ktime_t now;
258 
259 	if (!READ_ONCE(i915->pmu.timer_enabled))
260 		return HRTIMER_NORESTART;
261 
262 	now = ktime_get();
263 	period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
264 	i915->pmu.timer_last = now;
265 
266 	/*
267 	 * Strictly speaking the passed in period may not be 100% accurate for
268 	 * all internal calculation, since some amount of time can be spent on
269 	 * grabbing the forcewake. However the potential error from timer call-
270 	 * back delay greatly dominates this so we keep it simple.
271 	 */
272 	engines_sample(i915, period_ns);
273 	frequency_sample(i915, period_ns);
274 
275 	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
276 
277 	return HRTIMER_RESTART;
278 }
279 
280 static u64 count_interrupts(struct drm_i915_private *i915)
281 {
282 	/* open-coded kstat_irqs() */
283 	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
284 	u64 sum = 0;
285 	int cpu;
286 
287 	if (!desc || !desc->kstat_irqs)
288 		return 0;
289 
290 	for_each_possible_cpu(cpu)
291 		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
292 
293 	return sum;
294 }
295 
296 static void engine_event_destroy(struct perf_event *event)
297 {
298 	struct drm_i915_private *i915 =
299 		container_of(event->pmu, typeof(*i915), pmu.base);
300 	struct intel_engine_cs *engine;
301 
302 	engine = intel_engine_lookup_user(i915,
303 					  engine_event_class(event),
304 					  engine_event_instance(event));
305 	if (WARN_ON_ONCE(!engine))
306 		return;
307 
308 	if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
309 	    intel_engine_supports_stats(engine))
310 		intel_disable_engine_stats(engine);
311 }
312 
313 static void i915_pmu_event_destroy(struct perf_event *event)
314 {
315 	WARN_ON(event->parent);
316 
317 	if (is_engine_event(event))
318 		engine_event_destroy(event);
319 }
320 
321 static int
322 engine_event_status(struct intel_engine_cs *engine,
323 		    enum drm_i915_pmu_engine_sample sample)
324 {
325 	switch (sample) {
326 	case I915_SAMPLE_BUSY:
327 	case I915_SAMPLE_WAIT:
328 		break;
329 	case I915_SAMPLE_SEMA:
330 		if (INTEL_GEN(engine->i915) < 6)
331 			return -ENODEV;
332 		break;
333 	default:
334 		return -ENOENT;
335 	}
336 
337 	return 0;
338 }
339 
340 static int
341 config_status(struct drm_i915_private *i915, u64 config)
342 {
343 	switch (config) {
344 	case I915_PMU_ACTUAL_FREQUENCY:
345 		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
346 			/* Requires a mutex for sampling! */
347 			return -ENODEV;
348 		/* Fall-through. */
349 	case I915_PMU_REQUESTED_FREQUENCY:
350 		if (INTEL_GEN(i915) < 6)
351 			return -ENODEV;
352 		break;
353 	case I915_PMU_INTERRUPTS:
354 		break;
355 	case I915_PMU_RC6_RESIDENCY:
356 		if (!HAS_RC6(i915))
357 			return -ENODEV;
358 		break;
359 	default:
360 		return -ENOENT;
361 	}
362 
363 	return 0;
364 }
365 
366 static int engine_event_init(struct perf_event *event)
367 {
368 	struct drm_i915_private *i915 =
369 		container_of(event->pmu, typeof(*i915), pmu.base);
370 	struct intel_engine_cs *engine;
371 	u8 sample;
372 	int ret;
373 
374 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
375 					  engine_event_instance(event));
376 	if (!engine)
377 		return -ENODEV;
378 
379 	sample = engine_event_sample(event);
380 	ret = engine_event_status(engine, sample);
381 	if (ret)
382 		return ret;
383 
384 	if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
385 		ret = intel_enable_engine_stats(engine);
386 
387 	return ret;
388 }
389 
390 static int i915_pmu_event_init(struct perf_event *event)
391 {
392 	struct drm_i915_private *i915 =
393 		container_of(event->pmu, typeof(*i915), pmu.base);
394 	int ret;
395 
396 	if (event->attr.type != event->pmu->type)
397 		return -ENOENT;
398 
399 	/* unsupported modes and filters */
400 	if (event->attr.sample_period) /* no sampling */
401 		return -EINVAL;
402 
403 	if (has_branch_stack(event))
404 		return -EOPNOTSUPP;
405 
406 	if (event->cpu < 0)
407 		return -EINVAL;
408 
409 	/* only allow running on one cpu at a time */
410 	if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
411 		return -EINVAL;
412 
413 	if (is_engine_event(event))
414 		ret = engine_event_init(event);
415 	else
416 		ret = config_status(i915, event->attr.config);
417 	if (ret)
418 		return ret;
419 
420 	if (!event->parent)
421 		event->destroy = i915_pmu_event_destroy;
422 
423 	return 0;
424 }
425 
426 static u64 __get_rc6(struct drm_i915_private *i915)
427 {
428 	u64 val;
429 
430 	val = intel_rc6_residency_ns(i915,
431 				     IS_VALLEYVIEW(i915) ?
432 				     VLV_GT_RENDER_RC6 :
433 				     GEN6_GT_GFX_RC6);
434 
435 	if (HAS_RC6p(i915))
436 		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
437 
438 	if (HAS_RC6pp(i915))
439 		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
440 
441 	return val;
442 }
443 
444 static u64 get_rc6(struct drm_i915_private *i915)
445 {
446 #if IS_ENABLED(CONFIG_PM)
447 	unsigned long flags;
448 	u64 val;
449 
450 	if (intel_runtime_pm_get_if_in_use(i915)) {
451 		val = __get_rc6(i915);
452 		intel_runtime_pm_put(i915);
453 
454 		/*
455 		 * If we are coming back from being runtime suspended we must
456 		 * be careful not to report a larger value than returned
457 		 * previously.
458 		 */
459 
460 		spin_lock_irqsave(&i915->pmu.lock, flags);
461 
462 		if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
463 			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
464 			i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
465 		} else {
466 			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
467 		}
468 
469 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
470 	} else {
471 		struct pci_dev *pdev = i915->drm.pdev;
472 		struct device *kdev = &pdev->dev;
473 
474 		/*
475 		 * We are runtime suspended.
476 		 *
477 		 * Report the delta from when the device was suspended to now,
478 		 * on top of the last known real value, as the approximated RC6
479 		 * counter value.
480 		 */
481 		spin_lock_irqsave(&i915->pmu.lock, flags);
482 
483 		/*
484 		 * After the above branch intel_runtime_pm_get_if_in_use failed
485 		 * to get the runtime PM reference we cannot assume we are in
486 		 * runtime suspend since we can either: a) race with coming out
487 		 * of it before we took the power.lock, or b) there are other
488 		 * states than suspended which can bring us here.
489 		 *
490 		 * We need to double-check that we are indeed currently runtime
491 		 * suspended and if not we cannot do better than report the last
492 		 * known RC6 value.
493 		 */
494 		if (pm_runtime_status_suspended(kdev)) {
495 			val = pm_runtime_suspended_time(kdev);
496 
497 			if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
498 				i915->pmu.suspended_time_last = val;
499 
500 			val -= i915->pmu.suspended_time_last;
501 			val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
502 
503 			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
504 		} else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
505 			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
506 		} else {
507 			val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
508 		}
509 
510 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
511 	}
512 
513 	return val;
514 #else
515 	return __get_rc6(i915);
516 #endif
517 }
518 
519 static u64 __i915_pmu_event_read(struct perf_event *event)
520 {
521 	struct drm_i915_private *i915 =
522 		container_of(event->pmu, typeof(*i915), pmu.base);
523 	u64 val = 0;
524 
525 	if (is_engine_event(event)) {
526 		u8 sample = engine_event_sample(event);
527 		struct intel_engine_cs *engine;
528 
529 		engine = intel_engine_lookup_user(i915,
530 						  engine_event_class(event),
531 						  engine_event_instance(event));
532 
533 		if (WARN_ON_ONCE(!engine)) {
534 			/* Do nothing */
535 		} else if (sample == I915_SAMPLE_BUSY &&
536 			   intel_engine_supports_stats(engine)) {
537 			val = ktime_to_ns(intel_engine_get_busy_time(engine));
538 		} else {
539 			val = engine->pmu.sample[sample].cur;
540 		}
541 	} else {
542 		switch (event->attr.config) {
543 		case I915_PMU_ACTUAL_FREQUENCY:
544 			val =
545 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
546 				   USEC_PER_SEC /* to MHz */);
547 			break;
548 		case I915_PMU_REQUESTED_FREQUENCY:
549 			val =
550 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
551 				   USEC_PER_SEC /* to MHz */);
552 			break;
553 		case I915_PMU_INTERRUPTS:
554 			val = count_interrupts(i915);
555 			break;
556 		case I915_PMU_RC6_RESIDENCY:
557 			val = get_rc6(i915);
558 			break;
559 		}
560 	}
561 
562 	return val;
563 }
564 
565 static void i915_pmu_event_read(struct perf_event *event)
566 {
567 	struct hw_perf_event *hwc = &event->hw;
568 	u64 prev, new;
569 
570 again:
571 	prev = local64_read(&hwc->prev_count);
572 	new = __i915_pmu_event_read(event);
573 
574 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
575 		goto again;
576 
577 	local64_add(new - prev, &event->count);
578 }
579 
580 static void i915_pmu_enable(struct perf_event *event)
581 {
582 	struct drm_i915_private *i915 =
583 		container_of(event->pmu, typeof(*i915), pmu.base);
584 	unsigned int bit = event_enabled_bit(event);
585 	unsigned long flags;
586 
587 	spin_lock_irqsave(&i915->pmu.lock, flags);
588 
589 	/*
590 	 * Update the bitmask of enabled events and increment
591 	 * the event reference counter.
592 	 */
593 	BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS);
594 	GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
595 	GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
596 	i915->pmu.enable |= BIT_ULL(bit);
597 	i915->pmu.enable_count[bit]++;
598 
599 	/*
600 	 * Start the sampling timer if needed and not already enabled.
601 	 */
602 	__i915_pmu_maybe_start_timer(i915);
603 
604 	/*
605 	 * For per-engine events the bitmask and reference counting
606 	 * is stored per engine.
607 	 */
608 	if (is_engine_event(event)) {
609 		u8 sample = engine_event_sample(event);
610 		struct intel_engine_cs *engine;
611 
612 		engine = intel_engine_lookup_user(i915,
613 						  engine_event_class(event),
614 						  engine_event_instance(event));
615 
616 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
617 			     I915_ENGINE_SAMPLE_COUNT);
618 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
619 			     I915_ENGINE_SAMPLE_COUNT);
620 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
621 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
622 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
623 
624 		engine->pmu.enable |= BIT(sample);
625 		engine->pmu.enable_count[sample]++;
626 	}
627 
628 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
629 
630 	/*
631 	 * Store the current counter value so we can report the correct delta
632 	 * for all listeners. Even when the event was already enabled and has
633 	 * an existing non-zero value.
634 	 */
635 	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
636 }
637 
638 static void i915_pmu_disable(struct perf_event *event)
639 {
640 	struct drm_i915_private *i915 =
641 		container_of(event->pmu, typeof(*i915), pmu.base);
642 	unsigned int bit = event_enabled_bit(event);
643 	unsigned long flags;
644 
645 	spin_lock_irqsave(&i915->pmu.lock, flags);
646 
647 	if (is_engine_event(event)) {
648 		u8 sample = engine_event_sample(event);
649 		struct intel_engine_cs *engine;
650 
651 		engine = intel_engine_lookup_user(i915,
652 						  engine_event_class(event),
653 						  engine_event_instance(event));
654 
655 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
656 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
657 		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
658 
659 		/*
660 		 * Decrement the reference count and clear the enabled
661 		 * bitmask when the last listener on an event goes away.
662 		 */
663 		if (--engine->pmu.enable_count[sample] == 0)
664 			engine->pmu.enable &= ~BIT(sample);
665 	}
666 
667 	GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
668 	GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
669 	/*
670 	 * Decrement the reference count and clear the enabled
671 	 * bitmask when the last listener on an event goes away.
672 	 */
673 	if (--i915->pmu.enable_count[bit] == 0) {
674 		i915->pmu.enable &= ~BIT_ULL(bit);
675 		i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
676 	}
677 
678 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
679 }
680 
681 static void i915_pmu_event_start(struct perf_event *event, int flags)
682 {
683 	i915_pmu_enable(event);
684 	event->hw.state = 0;
685 }
686 
687 static void i915_pmu_event_stop(struct perf_event *event, int flags)
688 {
689 	if (flags & PERF_EF_UPDATE)
690 		i915_pmu_event_read(event);
691 	i915_pmu_disable(event);
692 	event->hw.state = PERF_HES_STOPPED;
693 }
694 
695 static int i915_pmu_event_add(struct perf_event *event, int flags)
696 {
697 	if (flags & PERF_EF_START)
698 		i915_pmu_event_start(event, flags);
699 
700 	return 0;
701 }
702 
703 static void i915_pmu_event_del(struct perf_event *event, int flags)
704 {
705 	i915_pmu_event_stop(event, PERF_EF_UPDATE);
706 }
707 
708 static int i915_pmu_event_event_idx(struct perf_event *event)
709 {
710 	return 0;
711 }
712 
713 struct i915_str_attribute {
714 	struct device_attribute attr;
715 	const char *str;
716 };
717 
718 static ssize_t i915_pmu_format_show(struct device *dev,
719 				    struct device_attribute *attr, char *buf)
720 {
721 	struct i915_str_attribute *eattr;
722 
723 	eattr = container_of(attr, struct i915_str_attribute, attr);
724 	return sprintf(buf, "%s\n", eattr->str);
725 }
726 
727 #define I915_PMU_FORMAT_ATTR(_name, _config) \
728 	(&((struct i915_str_attribute[]) { \
729 		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
730 		  .str = _config, } \
731 	})[0].attr.attr)
732 
733 static struct attribute *i915_pmu_format_attrs[] = {
734 	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
735 	NULL,
736 };
737 
738 static const struct attribute_group i915_pmu_format_attr_group = {
739 	.name = "format",
740 	.attrs = i915_pmu_format_attrs,
741 };
742 
743 struct i915_ext_attribute {
744 	struct device_attribute attr;
745 	unsigned long val;
746 };
747 
748 static ssize_t i915_pmu_event_show(struct device *dev,
749 				   struct device_attribute *attr, char *buf)
750 {
751 	struct i915_ext_attribute *eattr;
752 
753 	eattr = container_of(attr, struct i915_ext_attribute, attr);
754 	return sprintf(buf, "config=0x%lx\n", eattr->val);
755 }
756 
757 static struct attribute_group i915_pmu_events_attr_group = {
758 	.name = "events",
759 	/* Patch in attrs at runtime. */
760 };
761 
762 static ssize_t
763 i915_pmu_get_attr_cpumask(struct device *dev,
764 			  struct device_attribute *attr,
765 			  char *buf)
766 {
767 	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
768 }
769 
770 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
771 
772 static struct attribute *i915_cpumask_attrs[] = {
773 	&dev_attr_cpumask.attr,
774 	NULL,
775 };
776 
777 static const struct attribute_group i915_pmu_cpumask_attr_group = {
778 	.attrs = i915_cpumask_attrs,
779 };
780 
781 static const struct attribute_group *i915_pmu_attr_groups[] = {
782 	&i915_pmu_format_attr_group,
783 	&i915_pmu_events_attr_group,
784 	&i915_pmu_cpumask_attr_group,
785 	NULL
786 };
787 
788 #define __event(__config, __name, __unit) \
789 { \
790 	.config = (__config), \
791 	.name = (__name), \
792 	.unit = (__unit), \
793 }
794 
795 #define __engine_event(__sample, __name) \
796 { \
797 	.sample = (__sample), \
798 	.name = (__name), \
799 }
800 
801 static struct i915_ext_attribute *
802 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
803 {
804 	sysfs_attr_init(&attr->attr.attr);
805 	attr->attr.attr.name = name;
806 	attr->attr.attr.mode = 0444;
807 	attr->attr.show = i915_pmu_event_show;
808 	attr->val = config;
809 
810 	return ++attr;
811 }
812 
813 static struct perf_pmu_events_attr *
814 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
815 	     const char *str)
816 {
817 	sysfs_attr_init(&attr->attr.attr);
818 	attr->attr.attr.name = name;
819 	attr->attr.attr.mode = 0444;
820 	attr->attr.show = perf_event_sysfs_show;
821 	attr->event_str = str;
822 
823 	return ++attr;
824 }
825 
826 static struct attribute **
827 create_event_attributes(struct drm_i915_private *i915)
828 {
829 	static const struct {
830 		u64 config;
831 		const char *name;
832 		const char *unit;
833 	} events[] = {
834 		__event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
835 		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
836 		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
837 		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
838 	};
839 	static const struct {
840 		enum drm_i915_pmu_engine_sample sample;
841 		char *name;
842 	} engine_events[] = {
843 		__engine_event(I915_SAMPLE_BUSY, "busy"),
844 		__engine_event(I915_SAMPLE_SEMA, "sema"),
845 		__engine_event(I915_SAMPLE_WAIT, "wait"),
846 	};
847 	unsigned int count = 0;
848 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
849 	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
850 	struct attribute **attr = NULL, **attr_iter;
851 	struct intel_engine_cs *engine;
852 	enum intel_engine_id id;
853 	unsigned int i;
854 
855 	/* Count how many counters we will be exposing. */
856 	for (i = 0; i < ARRAY_SIZE(events); i++) {
857 		if (!config_status(i915, events[i].config))
858 			count++;
859 	}
860 
861 	for_each_engine(engine, i915, id) {
862 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
863 			if (!engine_event_status(engine,
864 						 engine_events[i].sample))
865 				count++;
866 		}
867 	}
868 
869 	/* Allocate attribute objects and table. */
870 	i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
871 	if (!i915_attr)
872 		goto err_alloc;
873 
874 	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
875 	if (!pmu_attr)
876 		goto err_alloc;
877 
878 	/* Max one pointer of each attribute type plus a termination entry. */
879 	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
880 	if (!attr)
881 		goto err_alloc;
882 
883 	i915_iter = i915_attr;
884 	pmu_iter = pmu_attr;
885 	attr_iter = attr;
886 
887 	/* Initialize supported non-engine counters. */
888 	for (i = 0; i < ARRAY_SIZE(events); i++) {
889 		char *str;
890 
891 		if (config_status(i915, events[i].config))
892 			continue;
893 
894 		str = kstrdup(events[i].name, GFP_KERNEL);
895 		if (!str)
896 			goto err;
897 
898 		*attr_iter++ = &i915_iter->attr.attr;
899 		i915_iter = add_i915_attr(i915_iter, str, events[i].config);
900 
901 		if (events[i].unit) {
902 			str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
903 			if (!str)
904 				goto err;
905 
906 			*attr_iter++ = &pmu_iter->attr.attr;
907 			pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
908 		}
909 	}
910 
911 	/* Initialize supported engine counters. */
912 	for_each_engine(engine, i915, id) {
913 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
914 			char *str;
915 
916 			if (engine_event_status(engine,
917 						engine_events[i].sample))
918 				continue;
919 
920 			str = kasprintf(GFP_KERNEL, "%s-%s",
921 					engine->name, engine_events[i].name);
922 			if (!str)
923 				goto err;
924 
925 			*attr_iter++ = &i915_iter->attr.attr;
926 			i915_iter =
927 				add_i915_attr(i915_iter, str,
928 					      __I915_PMU_ENGINE(engine->uabi_class,
929 								engine->instance,
930 								engine_events[i].sample));
931 
932 			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
933 					engine->name, engine_events[i].name);
934 			if (!str)
935 				goto err;
936 
937 			*attr_iter++ = &pmu_iter->attr.attr;
938 			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
939 		}
940 	}
941 
942 	i915->pmu.i915_attr = i915_attr;
943 	i915->pmu.pmu_attr = pmu_attr;
944 
945 	return attr;
946 
947 err:;
948 	for (attr_iter = attr; *attr_iter; attr_iter++)
949 		kfree((*attr_iter)->name);
950 
951 err_alloc:
952 	kfree(attr);
953 	kfree(i915_attr);
954 	kfree(pmu_attr);
955 
956 	return NULL;
957 }
958 
959 static void free_event_attributes(struct drm_i915_private *i915)
960 {
961 	struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
962 
963 	for (; *attr_iter; attr_iter++)
964 		kfree((*attr_iter)->name);
965 
966 	kfree(i915_pmu_events_attr_group.attrs);
967 	kfree(i915->pmu.i915_attr);
968 	kfree(i915->pmu.pmu_attr);
969 
970 	i915_pmu_events_attr_group.attrs = NULL;
971 	i915->pmu.i915_attr = NULL;
972 	i915->pmu.pmu_attr = NULL;
973 }
974 
975 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
976 {
977 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
978 
979 	GEM_BUG_ON(!pmu->base.event_init);
980 
981 	/* Select the first online CPU as a designated reader. */
982 	if (!cpumask_weight(&i915_pmu_cpumask))
983 		cpumask_set_cpu(cpu, &i915_pmu_cpumask);
984 
985 	return 0;
986 }
987 
988 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
989 {
990 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
991 	unsigned int target;
992 
993 	GEM_BUG_ON(!pmu->base.event_init);
994 
995 	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
996 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
997 		/* Migrate events if there is a valid target */
998 		if (target < nr_cpu_ids) {
999 			cpumask_set_cpu(target, &i915_pmu_cpumask);
1000 			perf_pmu_migrate_context(&pmu->base, cpu, target);
1001 		}
1002 	}
1003 
1004 	return 0;
1005 }
1006 
1007 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1008 
1009 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
1010 {
1011 	enum cpuhp_state slot;
1012 	int ret;
1013 
1014 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1015 				      "perf/x86/intel/i915:online",
1016 				      i915_pmu_cpu_online,
1017 				      i915_pmu_cpu_offline);
1018 	if (ret < 0)
1019 		return ret;
1020 
1021 	slot = ret;
1022 	ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
1023 	if (ret) {
1024 		cpuhp_remove_multi_state(slot);
1025 		return ret;
1026 	}
1027 
1028 	cpuhp_slot = slot;
1029 	return 0;
1030 }
1031 
1032 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1033 {
1034 	WARN_ON(cpuhp_slot == CPUHP_INVALID);
1035 	WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1036 	cpuhp_remove_multi_state(cpuhp_slot);
1037 }
1038 
1039 void i915_pmu_register(struct drm_i915_private *i915)
1040 {
1041 	int ret;
1042 
1043 	if (INTEL_GEN(i915) <= 2) {
1044 		DRM_INFO("PMU not supported for this GPU.");
1045 		return;
1046 	}
1047 
1048 	i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1049 	if (!i915_pmu_events_attr_group.attrs) {
1050 		ret = -ENOMEM;
1051 		goto err;
1052 	}
1053 
1054 	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
1055 	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
1056 	i915->pmu.base.event_init	= i915_pmu_event_init;
1057 	i915->pmu.base.add		= i915_pmu_event_add;
1058 	i915->pmu.base.del		= i915_pmu_event_del;
1059 	i915->pmu.base.start		= i915_pmu_event_start;
1060 	i915->pmu.base.stop		= i915_pmu_event_stop;
1061 	i915->pmu.base.read		= i915_pmu_event_read;
1062 	i915->pmu.base.event_idx	= i915_pmu_event_event_idx;
1063 
1064 	spin_lock_init(&i915->pmu.lock);
1065 	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1066 	i915->pmu.timer.function = i915_sample;
1067 
1068 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1069 	if (ret)
1070 		goto err;
1071 
1072 	ret = i915_pmu_register_cpuhp_state(i915);
1073 	if (ret)
1074 		goto err_unreg;
1075 
1076 	return;
1077 
1078 err_unreg:
1079 	perf_pmu_unregister(&i915->pmu.base);
1080 err:
1081 	i915->pmu.base.event_init = NULL;
1082 	free_event_attributes(i915);
1083 	DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1084 }
1085 
1086 void i915_pmu_unregister(struct drm_i915_private *i915)
1087 {
1088 	if (!i915->pmu.base.event_init)
1089 		return;
1090 
1091 	WARN_ON(i915->pmu.enable);
1092 
1093 	hrtimer_cancel(&i915->pmu.timer);
1094 
1095 	i915_pmu_unregister_cpuhp_state(i915);
1096 
1097 	perf_pmu_unregister(&i915->pmu.base);
1098 	i915->pmu.base.event_init = NULL;
1099 	free_event_attributes(i915);
1100 }
1101