xref: /linux/drivers/gpu/drm/i915/gt/selftest_rps.c (revision 0b364cf53b20204e92bac7c6ebd1ee7d3ec62931)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8 
9 #include "gem/i915_gem_internal.h"
10 
11 #include "i915_reg.h"
12 #include "intel_engine_heartbeat.h"
13 #include "intel_engine_pm.h"
14 #include "intel_engine_regs.h"
15 #include "intel_gpu_commands.h"
16 #include "intel_gt_clock_utils.h"
17 #include "intel_gt_pm.h"
18 #include "intel_rc6.h"
19 #include "selftest_engine_heartbeat.h"
20 #include "selftest_rps.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_spinner.h"
23 #include "selftests/librapl.h"
24 
25 /* Try to isolate the impact of cstates from determing frequency response */
26 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
27 
28 static void dummy_rps_work(struct work_struct *wrk)
29 {
30 }
31 
32 static int cmp_u64(const void *A, const void *B)
33 {
34 	const u64 *a = A, *b = B;
35 
36 	if (*a < *b)
37 		return -1;
38 	else if (*a > *b)
39 		return 1;
40 	else
41 		return 0;
42 }
43 
44 static int cmp_u32(const void *A, const void *B)
45 {
46 	const u32 *a = A, *b = B;
47 
48 	if (*a < *b)
49 		return -1;
50 	else if (*a > *b)
51 		return 1;
52 	else
53 		return 0;
54 }
55 
56 static struct i915_vma *
57 create_spin_counter(struct intel_engine_cs *engine,
58 		    struct i915_address_space *vm,
59 		    bool srm,
60 		    u32 **cancel,
61 		    u32 **counter)
62 {
63 	enum {
64 		COUNT,
65 		INC,
66 		__NGPR__,
67 	};
68 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
69 	struct drm_i915_gem_object *obj;
70 	struct i915_vma *vma;
71 	unsigned long end;
72 	u32 *base, *cs;
73 	int loop, i;
74 	int err;
75 
76 	obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
77 	if (IS_ERR(obj))
78 		return ERR_CAST(obj);
79 
80 	end = obj->base.size / sizeof(u32) - 1;
81 
82 	vma = i915_vma_instance(obj, vm, NULL);
83 	if (IS_ERR(vma)) {
84 		err = PTR_ERR(vma);
85 		goto err_put;
86 	}
87 
88 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
89 	if (err)
90 		goto err_unlock;
91 
92 	i915_vma_lock(vma);
93 
94 	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
95 	if (IS_ERR(base)) {
96 		err = PTR_ERR(base);
97 		goto err_unpin;
98 	}
99 	cs = base;
100 
101 	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
102 	for (i = 0; i < __NGPR__; i++) {
103 		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
104 		*cs++ = 0;
105 		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
106 		*cs++ = 0;
107 	}
108 
109 	*cs++ = MI_LOAD_REGISTER_IMM(1);
110 	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
111 	*cs++ = 1;
112 
113 	loop = cs - base;
114 
115 	/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
116 	for (i = 0; i < 1024; i++) {
117 		*cs++ = MI_MATH(4);
118 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
119 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
120 		*cs++ = MI_MATH_ADD;
121 		*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
122 
123 		if (srm) {
124 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
125 			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
126 			*cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
127 			*cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
128 		}
129 	}
130 
131 	*cs++ = MI_BATCH_BUFFER_START_GEN8;
132 	*cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
133 	*cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
134 	GEM_BUG_ON(cs - base > end);
135 
136 	i915_gem_object_flush_map(obj);
137 
138 	*cancel = base + loop;
139 	*counter = srm ? memset32(base + end, 0, 1) : NULL;
140 	return vma;
141 
142 err_unpin:
143 	i915_vma_unpin(vma);
144 err_unlock:
145 	i915_vma_unlock(vma);
146 err_put:
147 	i915_gem_object_put(obj);
148 	return ERR_PTR(err);
149 }
150 
151 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
152 {
153 	u8 history[64], i;
154 	unsigned long end;
155 	int sleep;
156 
157 	i = 0;
158 	memset(history, freq, sizeof(history));
159 	sleep = 20;
160 
161 	/* The PCU does not change instantly, but drifts towards the goal? */
162 	end = jiffies + msecs_to_jiffies(timeout_ms);
163 	do {
164 		u8 act;
165 
166 		act = read_cagf(rps);
167 		if (time_after(jiffies, end))
168 			return act;
169 
170 		/* Target acquired */
171 		if (act == freq)
172 			return act;
173 
174 		/* Any change within the last N samples? */
175 		if (!memchr_inv(history, act, sizeof(history)))
176 			return act;
177 
178 		history[i] = act;
179 		i = (i + 1) % ARRAY_SIZE(history);
180 
181 		usleep_range(sleep, 2 * sleep);
182 		sleep *= 2;
183 		if (sleep > timeout_ms * 20)
184 			sleep = timeout_ms * 20;
185 	} while (1);
186 }
187 
188 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
189 {
190 	mutex_lock(&rps->lock);
191 	GEM_BUG_ON(!intel_rps_is_active(rps));
192 	if (wait_for(!intel_rps_set(rps, freq), 50)) {
193 		mutex_unlock(&rps->lock);
194 		return 0;
195 	}
196 	GEM_BUG_ON(rps->last_freq != freq);
197 	mutex_unlock(&rps->lock);
198 
199 	return wait_for_freq(rps, freq, 50);
200 }
201 
202 static void show_pstate_limits(struct intel_rps *rps)
203 {
204 	struct drm_i915_private *i915 = rps_to_i915(rps);
205 
206 	if (IS_BROXTON(i915)) {
207 		pr_info("P_STATE_CAP[%x]: 0x%08x\n",
208 			i915_mmio_reg_offset(BXT_RP_STATE_CAP),
209 			intel_uncore_read(rps_to_uncore(rps),
210 					  BXT_RP_STATE_CAP));
211 	} else if (GRAPHICS_VER(i915) == 9) {
212 		pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
213 			i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
214 			intel_uncore_read(rps_to_uncore(rps),
215 					  GEN9_RP_STATE_LIMITS));
216 	}
217 }
218 
219 int live_rps_clock_interval(void *arg)
220 {
221 	struct intel_gt *gt = arg;
222 	struct intel_rps *rps = &gt->rps;
223 	void (*saved_work)(struct work_struct *wrk);
224 	struct intel_engine_cs *engine;
225 	enum intel_engine_id id;
226 	struct igt_spinner spin;
227 	intel_wakeref_t wakeref;
228 	int err = 0;
229 
230 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
231 		return 0;
232 
233 	if (igt_spinner_init(&spin, gt))
234 		return -ENOMEM;
235 
236 	intel_gt_pm_wait_for_idle(gt);
237 	saved_work = rps->work.func;
238 	rps->work.func = dummy_rps_work;
239 
240 	wakeref = intel_gt_pm_get(gt);
241 	intel_rps_disable(&gt->rps);
242 
243 	intel_gt_check_clock_frequency(gt);
244 
245 	for_each_engine(engine, gt, id) {
246 		struct i915_request *rq;
247 		u32 cycles;
248 		u64 dt;
249 
250 		if (!intel_engine_can_store_dword(engine))
251 			continue;
252 
253 		st_engine_heartbeat_disable(engine);
254 
255 		rq = igt_spinner_create_request(&spin,
256 						engine->kernel_context,
257 						MI_NOOP);
258 		if (IS_ERR(rq)) {
259 			st_engine_heartbeat_enable(engine);
260 			err = PTR_ERR(rq);
261 			break;
262 		}
263 
264 		i915_request_add(rq);
265 
266 		if (!igt_wait_for_spinner(&spin, rq)) {
267 			pr_err("%s: RPS spinner did not start\n",
268 			       engine->name);
269 			igt_spinner_end(&spin);
270 			st_engine_heartbeat_enable(engine);
271 			intel_gt_set_wedged(engine->gt);
272 			err = -EIO;
273 			break;
274 		}
275 
276 		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
277 
278 		intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
279 
280 		/* Set the evaluation interval to infinity! */
281 		intel_uncore_write_fw(gt->uncore,
282 				      GEN6_RP_UP_EI, 0xffffffff);
283 		intel_uncore_write_fw(gt->uncore,
284 				      GEN6_RP_UP_THRESHOLD, 0xffffffff);
285 
286 		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
287 				      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
288 
289 		if (wait_for(intel_uncore_read_fw(gt->uncore,
290 						  GEN6_RP_CUR_UP_EI),
291 			     10)) {
292 			/* Just skip the test; assume lack of HW support */
293 			pr_notice("%s: rps evaluation interval not ticking\n",
294 				  engine->name);
295 			err = -ENODEV;
296 		} else {
297 			ktime_t dt_[5];
298 			u32 cycles_[5];
299 			int i;
300 
301 			for (i = 0; i < 5; i++) {
302 				preempt_disable();
303 
304 				cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
305 				dt_[i] = ktime_get();
306 
307 				udelay(1000);
308 
309 				cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
310 				dt_[i] = ktime_sub(ktime_get(), dt_[i]);
311 
312 				preempt_enable();
313 			}
314 
315 			/* Use the median of both cycle/dt; close enough */
316 			sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
317 			cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
318 			sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
319 			dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
320 		}
321 
322 		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
323 		intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
324 
325 		igt_spinner_end(&spin);
326 		st_engine_heartbeat_enable(engine);
327 
328 		if (err == 0) {
329 			u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
330 			u32 expected =
331 				intel_gt_ns_to_pm_interval(gt, dt);
332 
333 			pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
334 				engine->name, cycles, time, dt, expected,
335 				gt->clock_frequency / 1000);
336 
337 			if (10 * time < 8 * dt ||
338 			    8 * time > 10 * dt) {
339 				pr_err("%s: rps clock time does not match walltime!\n",
340 				       engine->name);
341 				err = -EINVAL;
342 			}
343 
344 			if (10 * expected < 8 * cycles ||
345 			    8 * expected > 10 * cycles) {
346 				pr_err("%s: walltime does not match rps clock ticks!\n",
347 				       engine->name);
348 				err = -EINVAL;
349 			}
350 		}
351 
352 		if (igt_flush_test(gt->i915))
353 			err = -EIO;
354 
355 		break; /* once is enough */
356 	}
357 
358 	intel_rps_enable(&gt->rps);
359 	intel_gt_pm_put(gt, wakeref);
360 
361 	igt_spinner_fini(&spin);
362 
363 	intel_gt_pm_wait_for_idle(gt);
364 	rps->work.func = saved_work;
365 
366 	if (err == -ENODEV) /* skipped, don't report a fail */
367 		err = 0;
368 
369 	return err;
370 }
371 
372 int live_rps_control(void *arg)
373 {
374 	struct intel_gt *gt = arg;
375 	struct intel_rps *rps = &gt->rps;
376 	void (*saved_work)(struct work_struct *wrk);
377 	struct intel_engine_cs *engine;
378 	enum intel_engine_id id;
379 	struct igt_spinner spin;
380 	intel_wakeref_t wakeref;
381 	int err = 0;
382 
383 	/*
384 	 * Check that the actual frequency matches our requested frequency,
385 	 * to verify our control mechanism. We have to be careful that the
386 	 * PCU may throttle the GPU in which case the actual frequency used
387 	 * will be lowered than requested.
388 	 */
389 
390 	if (!intel_rps_is_enabled(rps))
391 		return 0;
392 
393 	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
394 		return 0;
395 
396 	if (igt_spinner_init(&spin, gt))
397 		return -ENOMEM;
398 
399 	intel_gt_pm_wait_for_idle(gt);
400 	saved_work = rps->work.func;
401 	rps->work.func = dummy_rps_work;
402 
403 	wakeref = intel_gt_pm_get(gt);
404 	for_each_engine(engine, gt, id) {
405 		struct i915_request *rq;
406 		ktime_t min_dt, max_dt;
407 		int f, limit;
408 		int min, max;
409 
410 		if (!intel_engine_can_store_dword(engine))
411 			continue;
412 
413 		st_engine_heartbeat_disable(engine);
414 
415 		rq = igt_spinner_create_request(&spin,
416 						engine->kernel_context,
417 						MI_NOOP);
418 		if (IS_ERR(rq)) {
419 			err = PTR_ERR(rq);
420 			break;
421 		}
422 
423 		i915_request_add(rq);
424 
425 		if (!igt_wait_for_spinner(&spin, rq)) {
426 			pr_err("%s: RPS spinner did not start\n",
427 			       engine->name);
428 			igt_spinner_end(&spin);
429 			st_engine_heartbeat_enable(engine);
430 			intel_gt_set_wedged(engine->gt);
431 			err = -EIO;
432 			break;
433 		}
434 
435 		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
436 			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
437 			       engine->name, rps->min_freq, read_cagf(rps));
438 			igt_spinner_end(&spin);
439 			st_engine_heartbeat_enable(engine);
440 			show_pstate_limits(rps);
441 			err = -EINVAL;
442 			break;
443 		}
444 
445 		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
446 			if (rps_set_check(rps, f) < f)
447 				break;
448 		}
449 
450 		limit = rps_set_check(rps, f);
451 
452 		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
453 			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
454 			       engine->name, rps->min_freq, read_cagf(rps));
455 			igt_spinner_end(&spin);
456 			st_engine_heartbeat_enable(engine);
457 			show_pstate_limits(rps);
458 			err = -EINVAL;
459 			break;
460 		}
461 
462 		max_dt = ktime_get();
463 		max = rps_set_check(rps, limit);
464 		max_dt = ktime_sub(ktime_get(), max_dt);
465 
466 		min_dt = ktime_get();
467 		min = rps_set_check(rps, rps->min_freq);
468 		min_dt = ktime_sub(ktime_get(), min_dt);
469 
470 		igt_spinner_end(&spin);
471 		st_engine_heartbeat_enable(engine);
472 
473 		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
474 			engine->name,
475 			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
476 			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
477 			limit, intel_gpu_freq(rps, limit),
478 			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
479 
480 		if (limit == rps->min_freq) {
481 			pr_err("%s: GPU throttled to minimum!\n",
482 			       engine->name);
483 			show_pstate_limits(rps);
484 			err = -ENODEV;
485 			break;
486 		}
487 
488 		if (igt_flush_test(gt->i915)) {
489 			err = -EIO;
490 			break;
491 		}
492 	}
493 	intel_gt_pm_put(gt, wakeref);
494 
495 	igt_spinner_fini(&spin);
496 
497 	intel_gt_pm_wait_for_idle(gt);
498 	rps->work.func = saved_work;
499 
500 	return err;
501 }
502 
503 static void show_pcu_config(struct intel_rps *rps)
504 {
505 	struct drm_i915_private *i915 = rps_to_i915(rps);
506 	unsigned int max_gpu_freq, min_gpu_freq;
507 	intel_wakeref_t wakeref;
508 	int gpu_freq;
509 
510 	if (!HAS_LLC(i915))
511 		return;
512 
513 	min_gpu_freq = rps->min_freq;
514 	max_gpu_freq = rps->max_freq;
515 	if (GRAPHICS_VER(i915) >= 9) {
516 		/* Convert GT frequency to 50 HZ units */
517 		min_gpu_freq /= GEN9_FREQ_SCALER;
518 		max_gpu_freq /= GEN9_FREQ_SCALER;
519 	}
520 
521 	wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
522 
523 	pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
524 	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
525 		int ia_freq = gpu_freq;
526 
527 		snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
528 			       &ia_freq, NULL);
529 
530 		pr_info("%5d  %5d  %5d\n",
531 			gpu_freq * 50,
532 			((ia_freq >> 0) & 0xff) * 100,
533 			((ia_freq >> 8) & 0xff) * 100);
534 	}
535 
536 	intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
537 }
538 
539 static u64 __measure_frequency(u32 *cntr, int duration_ms)
540 {
541 	u64 dc, dt;
542 
543 	dc = READ_ONCE(*cntr);
544 	dt = ktime_get();
545 	usleep_range(1000 * duration_ms, 2000 * duration_ms);
546 	dc = READ_ONCE(*cntr) - dc;
547 	dt = ktime_get() - dt;
548 
549 	return div64_u64(1000 * 1000 * dc, dt);
550 }
551 
552 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
553 {
554 	u64 x[5];
555 	int i;
556 
557 	*freq = rps_set_check(rps, *freq);
558 	for (i = 0; i < 5; i++)
559 		x[i] = __measure_frequency(cntr, 2);
560 	*freq = (*freq + read_cagf(rps)) / 2;
561 
562 	/* A simple triangle filter for better result stability */
563 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
564 	return div_u64(x[1] + 2 * x[2] + x[3], 4);
565 }
566 
567 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
568 				  int duration_ms)
569 {
570 	u64 dc, dt;
571 
572 	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
573 	dt = ktime_get();
574 	usleep_range(1000 * duration_ms, 2000 * duration_ms);
575 	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
576 	dt = ktime_get() - dt;
577 
578 	return div64_u64(1000 * 1000 * dc, dt);
579 }
580 
581 static u64 measure_cs_frequency_at(struct intel_rps *rps,
582 				   struct intel_engine_cs *engine,
583 				   int *freq)
584 {
585 	u64 x[5];
586 	int i;
587 
588 	*freq = rps_set_check(rps, *freq);
589 	for (i = 0; i < 5; i++)
590 		x[i] = __measure_cs_frequency(engine, 2);
591 	*freq = (*freq + read_cagf(rps)) / 2;
592 
593 	/* A simple triangle filter for better result stability */
594 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
595 	return div_u64(x[1] + 2 * x[2] + x[3], 4);
596 }
597 
598 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
599 {
600 	return f_d * x > f_n * y && f_n * x < f_d * y;
601 }
602 
603 int live_rps_frequency_cs(void *arg)
604 {
605 	void (*saved_work)(struct work_struct *wrk);
606 	struct intel_gt *gt = arg;
607 	struct intel_rps *rps = &gt->rps;
608 	struct intel_engine_cs *engine;
609 	struct pm_qos_request qos;
610 	enum intel_engine_id id;
611 	int err = 0;
612 
613 	/*
614 	 * The premise is that the GPU does change frequency at our behest.
615 	 * Let's check there is a correspondence between the requested
616 	 * frequency, the actual frequency, and the observed clock rate.
617 	 */
618 
619 	if (!intel_rps_is_enabled(rps))
620 		return 0;
621 
622 	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
623 		return 0;
624 
625 	if (CPU_LATENCY >= 0)
626 		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
627 
628 	intel_gt_pm_wait_for_idle(gt);
629 	saved_work = rps->work.func;
630 	rps->work.func = dummy_rps_work;
631 
632 	for_each_engine(engine, gt, id) {
633 		struct i915_request *rq;
634 		struct i915_vma *vma;
635 		u32 *cancel, *cntr;
636 		struct {
637 			u64 count;
638 			int freq;
639 		} min, max;
640 
641 		st_engine_heartbeat_disable(engine);
642 
643 		vma = create_spin_counter(engine,
644 					  engine->kernel_context->vm, false,
645 					  &cancel, &cntr);
646 		if (IS_ERR(vma)) {
647 			err = PTR_ERR(vma);
648 			st_engine_heartbeat_enable(engine);
649 			break;
650 		}
651 
652 		rq = intel_engine_create_kernel_request(engine);
653 		if (IS_ERR(rq)) {
654 			err = PTR_ERR(rq);
655 			goto err_vma;
656 		}
657 
658 		err = i915_vma_move_to_active(vma, rq, 0);
659 		if (!err)
660 			err = rq->engine->emit_bb_start(rq,
661 							i915_vma_offset(vma),
662 							PAGE_SIZE, 0);
663 		i915_request_add(rq);
664 		if (err)
665 			goto err_vma;
666 
667 		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
668 			     10)) {
669 			pr_err("%s: timed loop did not start\n",
670 			       engine->name);
671 			goto err_vma;
672 		}
673 
674 		min.freq = rps->min_freq;
675 		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
676 
677 		max.freq = rps->max_freq;
678 		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
679 
680 		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
681 			engine->name,
682 			min.count, intel_gpu_freq(rps, min.freq),
683 			max.count, intel_gpu_freq(rps, max.freq),
684 			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
685 						     max.freq * min.count));
686 
687 		if (!scaled_within(max.freq * min.count,
688 				   min.freq * max.count,
689 				   2, 3)) {
690 			int f;
691 
692 			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
693 			       engine->name,
694 			       max.freq * min.count,
695 			       min.freq * max.count);
696 			show_pcu_config(rps);
697 
698 			for (f = min.freq + 1; f <= rps->max_freq; f++) {
699 				int act = f;
700 				u64 count;
701 
702 				count = measure_cs_frequency_at(rps, engine, &act);
703 				if (act < f)
704 					break;
705 
706 				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
707 					engine->name,
708 					act, intel_gpu_freq(rps, act), count,
709 					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
710 								     act * min.count));
711 
712 				f = act; /* may skip ahead [pcu granularity] */
713 			}
714 
715 			err = -EINTR; /* ignore error, continue on with test */
716 		}
717 
718 err_vma:
719 		*cancel = MI_BATCH_BUFFER_END;
720 		i915_gem_object_flush_map(vma->obj);
721 		i915_gem_object_unpin_map(vma->obj);
722 		i915_vma_unpin(vma);
723 		i915_vma_unlock(vma);
724 		i915_vma_put(vma);
725 
726 		st_engine_heartbeat_enable(engine);
727 		if (igt_flush_test(gt->i915))
728 			err = -EIO;
729 		if (err)
730 			break;
731 	}
732 
733 	intel_gt_pm_wait_for_idle(gt);
734 	rps->work.func = saved_work;
735 
736 	if (CPU_LATENCY >= 0)
737 		cpu_latency_qos_remove_request(&qos);
738 
739 	return err;
740 }
741 
742 int live_rps_frequency_srm(void *arg)
743 {
744 	void (*saved_work)(struct work_struct *wrk);
745 	struct intel_gt *gt = arg;
746 	struct intel_rps *rps = &gt->rps;
747 	struct intel_engine_cs *engine;
748 	struct pm_qos_request qos;
749 	enum intel_engine_id id;
750 	int err = 0;
751 
752 	/*
753 	 * The premise is that the GPU does change frequency at our behest.
754 	 * Let's check there is a correspondence between the requested
755 	 * frequency, the actual frequency, and the observed clock rate.
756 	 */
757 
758 	if (!intel_rps_is_enabled(rps))
759 		return 0;
760 
761 	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
762 		return 0;
763 
764 	if (CPU_LATENCY >= 0)
765 		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
766 
767 	intel_gt_pm_wait_for_idle(gt);
768 	saved_work = rps->work.func;
769 	rps->work.func = dummy_rps_work;
770 
771 	for_each_engine(engine, gt, id) {
772 		struct i915_request *rq;
773 		struct i915_vma *vma;
774 		u32 *cancel, *cntr;
775 		struct {
776 			u64 count;
777 			int freq;
778 		} min, max;
779 
780 		st_engine_heartbeat_disable(engine);
781 
782 		vma = create_spin_counter(engine,
783 					  engine->kernel_context->vm, true,
784 					  &cancel, &cntr);
785 		if (IS_ERR(vma)) {
786 			err = PTR_ERR(vma);
787 			st_engine_heartbeat_enable(engine);
788 			break;
789 		}
790 
791 		rq = intel_engine_create_kernel_request(engine);
792 		if (IS_ERR(rq)) {
793 			err = PTR_ERR(rq);
794 			goto err_vma;
795 		}
796 
797 		err = i915_vma_move_to_active(vma, rq, 0);
798 		if (!err)
799 			err = rq->engine->emit_bb_start(rq,
800 							i915_vma_offset(vma),
801 							PAGE_SIZE, 0);
802 		i915_request_add(rq);
803 		if (err)
804 			goto err_vma;
805 
806 		if (wait_for(READ_ONCE(*cntr), 10)) {
807 			pr_err("%s: timed loop did not start\n",
808 			       engine->name);
809 			goto err_vma;
810 		}
811 
812 		min.freq = rps->min_freq;
813 		min.count = measure_frequency_at(rps, cntr, &min.freq);
814 
815 		max.freq = rps->max_freq;
816 		max.count = measure_frequency_at(rps, cntr, &max.freq);
817 
818 		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
819 			engine->name,
820 			min.count, intel_gpu_freq(rps, min.freq),
821 			max.count, intel_gpu_freq(rps, max.freq),
822 			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
823 						     max.freq * min.count));
824 
825 		if (!scaled_within(max.freq * min.count,
826 				   min.freq * max.count,
827 				   1, 2)) {
828 			int f;
829 
830 			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
831 			       engine->name,
832 			       max.freq * min.count,
833 			       min.freq * max.count);
834 			show_pcu_config(rps);
835 
836 			for (f = min.freq + 1; f <= rps->max_freq; f++) {
837 				int act = f;
838 				u64 count;
839 
840 				count = measure_frequency_at(rps, cntr, &act);
841 				if (act < f)
842 					break;
843 
844 				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
845 					engine->name,
846 					act, intel_gpu_freq(rps, act), count,
847 					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
848 								     act * min.count));
849 
850 				f = act; /* may skip ahead [pcu granularity] */
851 			}
852 
853 			err = -EINTR; /* ignore error, continue on with test */
854 		}
855 
856 err_vma:
857 		*cancel = MI_BATCH_BUFFER_END;
858 		i915_gem_object_flush_map(vma->obj);
859 		i915_gem_object_unpin_map(vma->obj);
860 		i915_vma_unpin(vma);
861 		i915_vma_unlock(vma);
862 		i915_vma_put(vma);
863 
864 		st_engine_heartbeat_enable(engine);
865 		if (igt_flush_test(gt->i915))
866 			err = -EIO;
867 		if (err)
868 			break;
869 	}
870 
871 	intel_gt_pm_wait_for_idle(gt);
872 	rps->work.func = saved_work;
873 
874 	if (CPU_LATENCY >= 0)
875 		cpu_latency_qos_remove_request(&qos);
876 
877 	return err;
878 }
879 
880 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
881 {
882 	/* Flush any previous EI */
883 	usleep_range(timeout_us, 2 * timeout_us);
884 
885 	/* Reset the interrupt status */
886 	rps_disable_interrupts(rps);
887 	GEM_BUG_ON(rps->pm_iir);
888 	rps_enable_interrupts(rps);
889 
890 	/* And then wait for the timeout, for real this time */
891 	usleep_range(2 * timeout_us, 3 * timeout_us);
892 }
893 
894 static int __rps_up_interrupt(struct intel_rps *rps,
895 			      struct intel_engine_cs *engine,
896 			      struct igt_spinner *spin)
897 {
898 	struct intel_uncore *uncore = engine->uncore;
899 	struct i915_request *rq;
900 	u32 timeout;
901 
902 	if (!intel_engine_can_store_dword(engine))
903 		return 0;
904 
905 	rps_set_check(rps, rps->min_freq);
906 
907 	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
908 	if (IS_ERR(rq))
909 		return PTR_ERR(rq);
910 
911 	i915_request_get(rq);
912 	i915_request_add(rq);
913 
914 	if (!igt_wait_for_spinner(spin, rq)) {
915 		pr_err("%s: RPS spinner did not start\n",
916 		       engine->name);
917 		i915_request_put(rq);
918 		intel_gt_set_wedged(engine->gt);
919 		return -EIO;
920 	}
921 
922 	if (!intel_rps_is_active(rps)) {
923 		pr_err("%s: RPS not enabled on starting spinner\n",
924 		       engine->name);
925 		igt_spinner_end(spin);
926 		i915_request_put(rq);
927 		return -EINVAL;
928 	}
929 
930 	if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
931 		pr_err("%s: RPS did not register UP interrupt\n",
932 		       engine->name);
933 		i915_request_put(rq);
934 		return -EINVAL;
935 	}
936 
937 	if (rps->last_freq != rps->min_freq) {
938 		pr_err("%s: RPS did not program min frequency\n",
939 		       engine->name);
940 		i915_request_put(rq);
941 		return -EINVAL;
942 	}
943 
944 	timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
945 	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
946 	timeout = DIV_ROUND_UP(timeout, 1000);
947 
948 	sleep_for_ei(rps, timeout);
949 	GEM_BUG_ON(i915_request_completed(rq));
950 
951 	igt_spinner_end(spin);
952 	i915_request_put(rq);
953 
954 	if (rps->cur_freq != rps->min_freq) {
955 		pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
956 		       engine->name, intel_rps_read_actual_frequency(rps));
957 		return -EINVAL;
958 	}
959 
960 	if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
961 		pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
962 		       engine->name, rps->pm_iir,
963 		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
964 		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
965 		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
966 		return -EINVAL;
967 	}
968 
969 	return 0;
970 }
971 
972 static int __rps_down_interrupt(struct intel_rps *rps,
973 				struct intel_engine_cs *engine)
974 {
975 	struct intel_uncore *uncore = engine->uncore;
976 	u32 timeout;
977 
978 	rps_set_check(rps, rps->max_freq);
979 
980 	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
981 		pr_err("%s: RPS did not register DOWN interrupt\n",
982 		       engine->name);
983 		return -EINVAL;
984 	}
985 
986 	if (rps->last_freq != rps->max_freq) {
987 		pr_err("%s: RPS did not program max frequency\n",
988 		       engine->name);
989 		return -EINVAL;
990 	}
991 
992 	timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
993 	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
994 	timeout = DIV_ROUND_UP(timeout, 1000);
995 
996 	sleep_for_ei(rps, timeout);
997 
998 	if (rps->cur_freq != rps->max_freq) {
999 		pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1000 		       engine->name,
1001 		       intel_rps_read_actual_frequency(rps));
1002 		return -EINVAL;
1003 	}
1004 
1005 	if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1006 		pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1007 		       engine->name, rps->pm_iir,
1008 		       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1009 		       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1010 		       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1011 		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1012 		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1013 		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
1014 		return -EINVAL;
1015 	}
1016 
1017 	return 0;
1018 }
1019 
1020 int live_rps_interrupt(void *arg)
1021 {
1022 	struct intel_gt *gt = arg;
1023 	struct intel_rps *rps = &gt->rps;
1024 	void (*saved_work)(struct work_struct *wrk);
1025 	struct intel_engine_cs *engine;
1026 	enum intel_engine_id id;
1027 	struct igt_spinner spin;
1028 	intel_wakeref_t wakeref;
1029 	u32 pm_events;
1030 	int err = 0;
1031 
1032 	/*
1033 	 * First, let's check whether or not we are receiving interrupts.
1034 	 */
1035 
1036 	if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1037 		return 0;
1038 
1039 	pm_events = 0;
1040 	with_intel_gt_pm(gt, wakeref)
1041 		pm_events = rps->pm_events;
1042 	if (!pm_events) {
1043 		pr_err("No RPS PM events registered, but RPS is enabled?\n");
1044 		return -ENODEV;
1045 	}
1046 
1047 	if (igt_spinner_init(&spin, gt))
1048 		return -ENOMEM;
1049 
1050 	intel_gt_pm_wait_for_idle(gt);
1051 	saved_work = rps->work.func;
1052 	rps->work.func = dummy_rps_work;
1053 
1054 	for_each_engine(engine, gt, id) {
1055 		/* Keep the engine busy with a spinner; expect an UP! */
1056 		if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1057 			intel_gt_pm_wait_for_idle(engine->gt);
1058 			GEM_BUG_ON(intel_rps_is_active(rps));
1059 
1060 			st_engine_heartbeat_disable(engine);
1061 
1062 			err = __rps_up_interrupt(rps, engine, &spin);
1063 
1064 			st_engine_heartbeat_enable(engine);
1065 			if (err)
1066 				goto out;
1067 
1068 			intel_gt_pm_wait_for_idle(engine->gt);
1069 		}
1070 
1071 		/* Keep the engine awake but idle and check for DOWN */
1072 		if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1073 			st_engine_heartbeat_disable(engine);
1074 			intel_rc6_disable(&gt->rc6);
1075 
1076 			err = __rps_down_interrupt(rps, engine);
1077 
1078 			intel_rc6_enable(&gt->rc6);
1079 			st_engine_heartbeat_enable(engine);
1080 			if (err)
1081 				goto out;
1082 		}
1083 	}
1084 
1085 out:
1086 	if (igt_flush_test(gt->i915))
1087 		err = -EIO;
1088 
1089 	igt_spinner_fini(&spin);
1090 
1091 	intel_gt_pm_wait_for_idle(gt);
1092 	rps->work.func = saved_work;
1093 
1094 	return err;
1095 }
1096 
1097 static u64 __measure_power(int duration_ms)
1098 {
1099 	u64 dE, dt;
1100 
1101 	dE = librapl_energy_uJ();
1102 	dt = ktime_get();
1103 	usleep_range(1000 * duration_ms, 2000 * duration_ms);
1104 	dE = librapl_energy_uJ() - dE;
1105 	dt = ktime_get() - dt;
1106 
1107 	return div64_u64(1000 * 1000 * dE, dt);
1108 }
1109 
1110 static u64 measure_power(struct intel_rps *rps, int *freq)
1111 {
1112 	u64 x[5];
1113 	int i;
1114 
1115 	for (i = 0; i < 5; i++)
1116 		x[i] = __measure_power(5);
1117 
1118 	*freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
1119 
1120 	/* A simple triangle filter for better result stability */
1121 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
1122 	return div_u64(x[1] + 2 * x[2] + x[3], 4);
1123 }
1124 
1125 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1126 {
1127 	*freq = rps_set_check(rps, *freq);
1128 	return measure_power(rps, freq);
1129 }
1130 
1131 int live_rps_power(void *arg)
1132 {
1133 	struct intel_gt *gt = arg;
1134 	struct intel_rps *rps = &gt->rps;
1135 	void (*saved_work)(struct work_struct *wrk);
1136 	struct intel_engine_cs *engine;
1137 	enum intel_engine_id id;
1138 	struct igt_spinner spin;
1139 	int err = 0;
1140 
1141 	/*
1142 	 * Our fundamental assumption is that running at lower frequency
1143 	 * actually saves power. Let's see if our RAPL measurement support
1144 	 * that theory.
1145 	 */
1146 
1147 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1148 		return 0;
1149 
1150 	if (!librapl_supported(gt->i915))
1151 		return 0;
1152 
1153 	if (igt_spinner_init(&spin, gt))
1154 		return -ENOMEM;
1155 
1156 	intel_gt_pm_wait_for_idle(gt);
1157 	saved_work = rps->work.func;
1158 	rps->work.func = dummy_rps_work;
1159 
1160 	for_each_engine(engine, gt, id) {
1161 		struct i915_request *rq;
1162 		struct {
1163 			u64 power;
1164 			int freq;
1165 		} min, max;
1166 
1167 		if (!intel_engine_can_store_dword(engine))
1168 			continue;
1169 
1170 		st_engine_heartbeat_disable(engine);
1171 
1172 		rq = igt_spinner_create_request(&spin,
1173 						engine->kernel_context,
1174 						MI_NOOP);
1175 		if (IS_ERR(rq)) {
1176 			st_engine_heartbeat_enable(engine);
1177 			err = PTR_ERR(rq);
1178 			break;
1179 		}
1180 
1181 		i915_request_add(rq);
1182 
1183 		if (!igt_wait_for_spinner(&spin, rq)) {
1184 			pr_err("%s: RPS spinner did not start\n",
1185 			       engine->name);
1186 			igt_spinner_end(&spin);
1187 			st_engine_heartbeat_enable(engine);
1188 			intel_gt_set_wedged(engine->gt);
1189 			err = -EIO;
1190 			break;
1191 		}
1192 
1193 		max.freq = rps->max_freq;
1194 		max.power = measure_power_at(rps, &max.freq);
1195 
1196 		min.freq = rps->min_freq;
1197 		min.power = measure_power_at(rps, &min.freq);
1198 
1199 		igt_spinner_end(&spin);
1200 		st_engine_heartbeat_enable(engine);
1201 
1202 		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1203 			engine->name,
1204 			min.power, intel_gpu_freq(rps, min.freq),
1205 			max.power, intel_gpu_freq(rps, max.freq));
1206 
1207 		if (10 * min.freq >= 9 * max.freq) {
1208 			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1209 				  min.freq, intel_gpu_freq(rps, min.freq),
1210 				  max.freq, intel_gpu_freq(rps, max.freq));
1211 			continue;
1212 		}
1213 
1214 		if (11 * min.power > 10 * max.power) {
1215 			pr_err("%s: did not conserve power when setting lower frequency!\n",
1216 			       engine->name);
1217 			err = -EINVAL;
1218 			break;
1219 		}
1220 
1221 		if (igt_flush_test(gt->i915)) {
1222 			err = -EIO;
1223 			break;
1224 		}
1225 	}
1226 
1227 	igt_spinner_fini(&spin);
1228 
1229 	intel_gt_pm_wait_for_idle(gt);
1230 	rps->work.func = saved_work;
1231 
1232 	return err;
1233 }
1234 
1235 int live_rps_dynamic(void *arg)
1236 {
1237 	struct intel_gt *gt = arg;
1238 	struct intel_rps *rps = &gt->rps;
1239 	struct intel_engine_cs *engine;
1240 	enum intel_engine_id id;
1241 	struct igt_spinner spin;
1242 	int err = 0;
1243 
1244 	/*
1245 	 * We've looked at the bascs, and have established that we
1246 	 * can change the clock frequency and that the HW will generate
1247 	 * interrupts based on load. Now we check how we integrate those
1248 	 * moving parts into dynamic reclocking based on load.
1249 	 */
1250 
1251 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1252 		return 0;
1253 
1254 	if (igt_spinner_init(&spin, gt))
1255 		return -ENOMEM;
1256 
1257 	if (intel_rps_has_interrupts(rps))
1258 		pr_info("RPS has interrupt support\n");
1259 	if (intel_rps_uses_timer(rps))
1260 		pr_info("RPS has timer support\n");
1261 
1262 	for_each_engine(engine, gt, id) {
1263 		struct i915_request *rq;
1264 		struct {
1265 			ktime_t dt;
1266 			u8 freq;
1267 		} min, max;
1268 
1269 		if (!intel_engine_can_store_dword(engine))
1270 			continue;
1271 
1272 		intel_gt_pm_wait_for_idle(gt);
1273 		GEM_BUG_ON(intel_rps_is_active(rps));
1274 		rps->cur_freq = rps->min_freq;
1275 
1276 		intel_engine_pm_get(engine);
1277 		intel_rc6_disable(&gt->rc6);
1278 		GEM_BUG_ON(rps->last_freq != rps->min_freq);
1279 
1280 		rq = igt_spinner_create_request(&spin,
1281 						engine->kernel_context,
1282 						MI_NOOP);
1283 		if (IS_ERR(rq)) {
1284 			err = PTR_ERR(rq);
1285 			goto err;
1286 		}
1287 
1288 		i915_request_add(rq);
1289 
1290 		max.dt = ktime_get();
1291 		max.freq = wait_for_freq(rps, rps->max_freq, 500);
1292 		max.dt = ktime_sub(ktime_get(), max.dt);
1293 
1294 		igt_spinner_end(&spin);
1295 
1296 		min.dt = ktime_get();
1297 		min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1298 		min.dt = ktime_sub(ktime_get(), min.dt);
1299 
1300 		pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1301 			engine->name,
1302 			max.freq, intel_gpu_freq(rps, max.freq),
1303 			ktime_to_ns(max.dt),
1304 			min.freq, intel_gpu_freq(rps, min.freq),
1305 			ktime_to_ns(min.dt));
1306 		if (min.freq >= max.freq) {
1307 			pr_err("%s: dynamic reclocking of spinner failed\n!",
1308 			       engine->name);
1309 			err = -EINVAL;
1310 		}
1311 
1312 err:
1313 		intel_rc6_enable(&gt->rc6);
1314 		intel_engine_pm_put(engine);
1315 
1316 		if (igt_flush_test(gt->i915))
1317 			err = -EIO;
1318 		if (err)
1319 			break;
1320 	}
1321 
1322 	igt_spinner_fini(&spin);
1323 
1324 	return err;
1325 }
1326