xref: /linux/drivers/gpu/drm/i915/gt/intel_rps.c (revision a1ff5a7d78a036d6c2178ee5acd6ba4946243800)
1  // SPDX-License-Identifier: MIT
2  /*
3   * Copyright © 2019 Intel Corporation
4   */
5  
6  #include <linux/string_helpers.h>
7  
8  #include <drm/intel/i915_drm.h>
9  
10  #include "display/intel_display.h"
11  #include "display/intel_display_irq.h"
12  #include "i915_drv.h"
13  #include "i915_irq.h"
14  #include "i915_reg.h"
15  #include "intel_breadcrumbs.h"
16  #include "intel_gt.h"
17  #include "intel_gt_clock_utils.h"
18  #include "intel_gt_irq.h"
19  #include "intel_gt_pm.h"
20  #include "intel_gt_pm_irq.h"
21  #include "intel_gt_print.h"
22  #include "intel_gt_regs.h"
23  #include "intel_mchbar_regs.h"
24  #include "intel_pcode.h"
25  #include "intel_rps.h"
26  #include "vlv_sideband.h"
27  #include "../../../platform/x86/intel_ips.h"
28  
29  #define BUSY_MAX_EI	20u /* ms */
30  
31  /*
32   * Lock protecting IPS related data structures
33   */
34  static DEFINE_SPINLOCK(mchdev_lock);
35  
rps_to_gt(struct intel_rps * rps)36  static struct intel_gt *rps_to_gt(struct intel_rps *rps)
37  {
38  	return container_of(rps, struct intel_gt, rps);
39  }
40  
rps_to_i915(struct intel_rps * rps)41  static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
42  {
43  	return rps_to_gt(rps)->i915;
44  }
45  
rps_to_uncore(struct intel_rps * rps)46  static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
47  {
48  	return rps_to_gt(rps)->uncore;
49  }
50  
rps_to_slpc(struct intel_rps * rps)51  static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)
52  {
53  	struct intel_gt *gt = rps_to_gt(rps);
54  
55  	return &gt_to_guc(gt)->slpc;
56  }
57  
rps_uses_slpc(struct intel_rps * rps)58  static bool rps_uses_slpc(struct intel_rps *rps)
59  {
60  	struct intel_gt *gt = rps_to_gt(rps);
61  
62  	return intel_uc_uses_guc_slpc(&gt->uc);
63  }
64  
rps_pm_sanitize_mask(struct intel_rps * rps,u32 mask)65  static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
66  {
67  	return mask & ~rps->pm_intrmsk_mbz;
68  }
69  
set(struct intel_uncore * uncore,i915_reg_t reg,u32 val)70  static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
71  {
72  	intel_uncore_write_fw(uncore, reg, val);
73  }
74  
rps_timer(struct timer_list * t)75  static void rps_timer(struct timer_list *t)
76  {
77  	struct intel_rps *rps = from_timer(rps, t, timer);
78  	struct intel_gt *gt = rps_to_gt(rps);
79  	struct intel_engine_cs *engine;
80  	ktime_t dt, last, timestamp;
81  	enum intel_engine_id id;
82  	s64 max_busy[3] = {};
83  
84  	timestamp = 0;
85  	for_each_engine(engine, gt, id) {
86  		s64 busy;
87  		int i;
88  
89  		dt = intel_engine_get_busy_time(engine, &timestamp);
90  		last = engine->stats.rps;
91  		engine->stats.rps = dt;
92  
93  		busy = ktime_to_ns(ktime_sub(dt, last));
94  		for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
95  			if (busy > max_busy[i])
96  				swap(busy, max_busy[i]);
97  		}
98  	}
99  	last = rps->pm_timestamp;
100  	rps->pm_timestamp = timestamp;
101  
102  	if (intel_rps_is_active(rps)) {
103  		s64 busy;
104  		int i;
105  
106  		dt = ktime_sub(timestamp, last);
107  
108  		/*
109  		 * Our goal is to evaluate each engine independently, so we run
110  		 * at the lowest clocks required to sustain the heaviest
111  		 * workload. However, a task may be split into sequential
112  		 * dependent operations across a set of engines, such that
113  		 * the independent contributions do not account for high load,
114  		 * but overall the task is GPU bound. For example, consider
115  		 * video decode on vcs followed by colour post-processing
116  		 * on vecs, followed by general post-processing on rcs.
117  		 * Since multi-engines being active does imply a single
118  		 * continuous workload across all engines, we hedge our
119  		 * bets by only contributing a factor of the distributed
120  		 * load into our busyness calculation.
121  		 */
122  		busy = max_busy[0];
123  		for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
124  			if (!max_busy[i])
125  				break;
126  
127  			busy += div_u64(max_busy[i], 1 << i);
128  		}
129  		GT_TRACE(gt,
130  			 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
131  			 busy, (int)div64_u64(100 * busy, dt),
132  			 max_busy[0], max_busy[1], max_busy[2],
133  			 rps->pm_interval);
134  
135  		if (100 * busy > rps->power.up_threshold * dt &&
136  		    rps->cur_freq < rps->max_freq_softlimit) {
137  			rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
138  			rps->pm_interval = 1;
139  			queue_work(gt->i915->unordered_wq, &rps->work);
140  		} else if (100 * busy < rps->power.down_threshold * dt &&
141  			   rps->cur_freq > rps->min_freq_softlimit) {
142  			rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
143  			rps->pm_interval = 1;
144  			queue_work(gt->i915->unordered_wq, &rps->work);
145  		} else {
146  			rps->last_adj = 0;
147  		}
148  
149  		mod_timer(&rps->timer,
150  			  jiffies + msecs_to_jiffies(rps->pm_interval));
151  		rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
152  	}
153  }
154  
rps_start_timer(struct intel_rps * rps)155  static void rps_start_timer(struct intel_rps *rps)
156  {
157  	rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
158  	rps->pm_interval = 1;
159  	mod_timer(&rps->timer, jiffies + 1);
160  }
161  
rps_stop_timer(struct intel_rps * rps)162  static void rps_stop_timer(struct intel_rps *rps)
163  {
164  	del_timer_sync(&rps->timer);
165  	rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
166  	cancel_work_sync(&rps->work);
167  }
168  
rps_pm_mask(struct intel_rps * rps,u8 val)169  static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
170  {
171  	u32 mask = 0;
172  
173  	/* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
174  	if (val > rps->min_freq_softlimit)
175  		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
176  			 GEN6_PM_RP_DOWN_THRESHOLD |
177  			 GEN6_PM_RP_DOWN_TIMEOUT);
178  
179  	if (val < rps->max_freq_softlimit)
180  		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
181  
182  	mask &= rps->pm_events;
183  
184  	return rps_pm_sanitize_mask(rps, ~mask);
185  }
186  
rps_reset_ei(struct intel_rps * rps)187  static void rps_reset_ei(struct intel_rps *rps)
188  {
189  	memset(&rps->ei, 0, sizeof(rps->ei));
190  }
191  
rps_enable_interrupts(struct intel_rps * rps)192  static void rps_enable_interrupts(struct intel_rps *rps)
193  {
194  	struct intel_gt *gt = rps_to_gt(rps);
195  
196  	GEM_BUG_ON(rps_uses_slpc(rps));
197  
198  	GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
199  		 rps->pm_events, rps_pm_mask(rps, rps->last_freq));
200  
201  	rps_reset_ei(rps);
202  
203  	spin_lock_irq(gt->irq_lock);
204  	gen6_gt_pm_enable_irq(gt, rps->pm_events);
205  	spin_unlock_irq(gt->irq_lock);
206  
207  	intel_uncore_write(gt->uncore,
208  			   GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq));
209  }
210  
gen6_rps_reset_interrupts(struct intel_rps * rps)211  static void gen6_rps_reset_interrupts(struct intel_rps *rps)
212  {
213  	gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
214  }
215  
gen11_rps_reset_interrupts(struct intel_rps * rps)216  static void gen11_rps_reset_interrupts(struct intel_rps *rps)
217  {
218  	while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
219  		;
220  }
221  
rps_reset_interrupts(struct intel_rps * rps)222  static void rps_reset_interrupts(struct intel_rps *rps)
223  {
224  	struct intel_gt *gt = rps_to_gt(rps);
225  
226  	spin_lock_irq(gt->irq_lock);
227  	if (GRAPHICS_VER(gt->i915) >= 11)
228  		gen11_rps_reset_interrupts(rps);
229  	else
230  		gen6_rps_reset_interrupts(rps);
231  
232  	rps->pm_iir = 0;
233  	spin_unlock_irq(gt->irq_lock);
234  }
235  
rps_disable_interrupts(struct intel_rps * rps)236  static void rps_disable_interrupts(struct intel_rps *rps)
237  {
238  	struct intel_gt *gt = rps_to_gt(rps);
239  
240  	intel_uncore_write(gt->uncore,
241  			   GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
242  
243  	spin_lock_irq(gt->irq_lock);
244  	gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
245  	spin_unlock_irq(gt->irq_lock);
246  
247  	intel_synchronize_irq(gt->i915);
248  
249  	/*
250  	 * Now that we will not be generating any more work, flush any
251  	 * outstanding tasks. As we are called on the RPS idle path,
252  	 * we will reset the GPU to minimum frequencies, so the current
253  	 * state of the worker can be discarded.
254  	 */
255  	cancel_work_sync(&rps->work);
256  
257  	rps_reset_interrupts(rps);
258  	GT_TRACE(gt, "interrupts:off\n");
259  }
260  
261  static const struct cparams {
262  	u16 i;
263  	u16 t;
264  	u16 m;
265  	u16 c;
266  } cparams[] = {
267  	{ 1, 1333, 301, 28664 },
268  	{ 1, 1067, 294, 24460 },
269  	{ 1, 800, 294, 25192 },
270  	{ 0, 1333, 276, 27605 },
271  	{ 0, 1067, 276, 27605 },
272  	{ 0, 800, 231, 23784 },
273  };
274  
gen5_rps_init(struct intel_rps * rps)275  static void gen5_rps_init(struct intel_rps *rps)
276  {
277  	struct drm_i915_private *i915 = rps_to_i915(rps);
278  	struct intel_uncore *uncore = rps_to_uncore(rps);
279  	u8 fmax, fmin, fstart;
280  	u32 rgvmodectl;
281  	int c_m, i;
282  
283  	if (i915->fsb_freq <= 3200000)
284  		c_m = 0;
285  	else if (i915->fsb_freq <= 4800000)
286  		c_m = 1;
287  	else
288  		c_m = 2;
289  
290  	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
291  		if (cparams[i].i == c_m &&
292  		    cparams[i].t == DIV_ROUND_CLOSEST(i915->mem_freq, 1000)) {
293  			rps->ips.m = cparams[i].m;
294  			rps->ips.c = cparams[i].c;
295  			break;
296  		}
297  	}
298  
299  	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
300  
301  	/* Set up min, max, and cur for interrupt handling */
302  	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
303  	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
304  	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
305  		MEMMODE_FSTART_SHIFT;
306  	drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n",
307  		fmax, fmin, fstart);
308  
309  	rps->min_freq = fmax;
310  	rps->efficient_freq = fstart;
311  	rps->max_freq = fmin;
312  }
313  
314  static unsigned long
__ips_chipset_val(struct intel_ips * ips)315  __ips_chipset_val(struct intel_ips *ips)
316  {
317  	struct intel_uncore *uncore =
318  		rps_to_uncore(container_of(ips, struct intel_rps, ips));
319  	unsigned long now = jiffies_to_msecs(jiffies), dt;
320  	unsigned long result;
321  	u64 total, delta;
322  
323  	lockdep_assert_held(&mchdev_lock);
324  
325  	/*
326  	 * Prevent division-by-zero if we are asking too fast.
327  	 * Also, we don't get interesting results if we are polling
328  	 * faster than once in 10ms, so just return the saved value
329  	 * in such cases.
330  	 */
331  	dt = now - ips->last_time1;
332  	if (dt <= 10)
333  		return ips->chipset_power;
334  
335  	/* FIXME: handle per-counter overflow */
336  	total = intel_uncore_read(uncore, DMIEC);
337  	total += intel_uncore_read(uncore, DDREC);
338  	total += intel_uncore_read(uncore, CSIEC);
339  
340  	delta = total - ips->last_count1;
341  
342  	result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
343  
344  	ips->last_count1 = total;
345  	ips->last_time1 = now;
346  
347  	ips->chipset_power = result;
348  
349  	return result;
350  }
351  
ips_mch_val(struct intel_uncore * uncore)352  static unsigned long ips_mch_val(struct intel_uncore *uncore)
353  {
354  	unsigned int m, x, b;
355  	u32 tsfs;
356  
357  	tsfs = intel_uncore_read(uncore, TSFS);
358  	x = intel_uncore_read8(uncore, TR1);
359  
360  	b = tsfs & TSFS_INTR_MASK;
361  	m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
362  
363  	return m * x / 127 - b;
364  }
365  
_pxvid_to_vd(u8 pxvid)366  static int _pxvid_to_vd(u8 pxvid)
367  {
368  	if (pxvid == 0)
369  		return 0;
370  
371  	if (pxvid >= 8 && pxvid < 31)
372  		pxvid = 31;
373  
374  	return (pxvid + 2) * 125;
375  }
376  
pvid_to_extvid(struct drm_i915_private * i915,u8 pxvid)377  static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
378  {
379  	const int vd = _pxvid_to_vd(pxvid);
380  
381  	if (INTEL_INFO(i915)->is_mobile)
382  		return max(vd - 1125, 0);
383  
384  	return vd;
385  }
386  
__gen5_ips_update(struct intel_ips * ips)387  static void __gen5_ips_update(struct intel_ips *ips)
388  {
389  	struct intel_uncore *uncore =
390  		rps_to_uncore(container_of(ips, struct intel_rps, ips));
391  	u64 now, delta, dt;
392  	u32 count;
393  
394  	lockdep_assert_held(&mchdev_lock);
395  
396  	now = ktime_get_raw_ns();
397  	dt = now - ips->last_time2;
398  	do_div(dt, NSEC_PER_MSEC);
399  
400  	/* Don't divide by 0 */
401  	if (dt <= 10)
402  		return;
403  
404  	count = intel_uncore_read(uncore, GFXEC);
405  	delta = count - ips->last_count2;
406  
407  	ips->last_count2 = count;
408  	ips->last_time2 = now;
409  
410  	/* More magic constants... */
411  	ips->gfx_power = div_u64(delta * 1181, dt * 10);
412  }
413  
gen5_rps_update(struct intel_rps * rps)414  static void gen5_rps_update(struct intel_rps *rps)
415  {
416  	spin_lock_irq(&mchdev_lock);
417  	__gen5_ips_update(&rps->ips);
418  	spin_unlock_irq(&mchdev_lock);
419  }
420  
gen5_invert_freq(struct intel_rps * rps,unsigned int val)421  static unsigned int gen5_invert_freq(struct intel_rps *rps,
422  				     unsigned int val)
423  {
424  	/* Invert the frequency bin into an ips delay */
425  	val = rps->max_freq - val;
426  	val = rps->min_freq + val;
427  
428  	return val;
429  }
430  
__gen5_rps_set(struct intel_rps * rps,u8 val)431  static int __gen5_rps_set(struct intel_rps *rps, u8 val)
432  {
433  	struct intel_uncore *uncore = rps_to_uncore(rps);
434  	u16 rgvswctl;
435  
436  	lockdep_assert_held(&mchdev_lock);
437  
438  	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
439  	if (rgvswctl & MEMCTL_CMD_STS) {
440  		drm_dbg(&rps_to_i915(rps)->drm,
441  			"gpu busy, RCS change rejected\n");
442  		return -EBUSY; /* still busy with another command */
443  	}
444  
445  	/* Invert the frequency bin into an ips delay */
446  	val = gen5_invert_freq(rps, val);
447  
448  	rgvswctl =
449  		(MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
450  		(val << MEMCTL_FREQ_SHIFT) |
451  		MEMCTL_SFCAVM;
452  	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
453  	intel_uncore_posting_read16(uncore, MEMSWCTL);
454  
455  	rgvswctl |= MEMCTL_CMD_STS;
456  	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
457  
458  	return 0;
459  }
460  
gen5_rps_set(struct intel_rps * rps,u8 val)461  static int gen5_rps_set(struct intel_rps *rps, u8 val)
462  {
463  	int err;
464  
465  	spin_lock_irq(&mchdev_lock);
466  	err = __gen5_rps_set(rps, val);
467  	spin_unlock_irq(&mchdev_lock);
468  
469  	return err;
470  }
471  
intel_pxfreq(u32 vidfreq)472  static unsigned long intel_pxfreq(u32 vidfreq)
473  {
474  	int div = (vidfreq & 0x3f0000) >> 16;
475  	int post = (vidfreq & 0x3000) >> 12;
476  	int pre = (vidfreq & 0x7);
477  
478  	if (!pre)
479  		return 0;
480  
481  	return div * 133333 / (pre << post);
482  }
483  
init_emon(struct intel_uncore * uncore)484  static unsigned int init_emon(struct intel_uncore *uncore)
485  {
486  	u8 pxw[16];
487  	int i;
488  
489  	/* Disable to program */
490  	intel_uncore_write(uncore, ECR, 0);
491  	intel_uncore_posting_read(uncore, ECR);
492  
493  	/* Program energy weights for various events */
494  	intel_uncore_write(uncore, SDEW, 0x15040d00);
495  	intel_uncore_write(uncore, CSIEW0, 0x007f0000);
496  	intel_uncore_write(uncore, CSIEW1, 0x1e220004);
497  	intel_uncore_write(uncore, CSIEW2, 0x04000004);
498  
499  	for (i = 0; i < 5; i++)
500  		intel_uncore_write(uncore, PEW(i), 0);
501  	for (i = 0; i < 3; i++)
502  		intel_uncore_write(uncore, DEW(i), 0);
503  
504  	/* Program P-state weights to account for frequency power adjustment */
505  	for (i = 0; i < 16; i++) {
506  		u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
507  		unsigned int freq = intel_pxfreq(pxvidfreq);
508  		unsigned int vid =
509  			(pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
510  		unsigned int val;
511  
512  		val = vid * vid * freq / 1000 * 255;
513  		val /= 127 * 127 * 900;
514  
515  		pxw[i] = val;
516  	}
517  	/* Render standby states get 0 weight */
518  	pxw[14] = 0;
519  	pxw[15] = 0;
520  
521  	for (i = 0; i < 4; i++) {
522  		intel_uncore_write(uncore, PXW(i),
523  				   pxw[i * 4 + 0] << 24 |
524  				   pxw[i * 4 + 1] << 16 |
525  				   pxw[i * 4 + 2] <<  8 |
526  				   pxw[i * 4 + 3] <<  0);
527  	}
528  
529  	/* Adjust magic regs to magic values (more experimental results) */
530  	intel_uncore_write(uncore, OGW0, 0);
531  	intel_uncore_write(uncore, OGW1, 0);
532  	intel_uncore_write(uncore, EG0, 0x00007f00);
533  	intel_uncore_write(uncore, EG1, 0x0000000e);
534  	intel_uncore_write(uncore, EG2, 0x000e0000);
535  	intel_uncore_write(uncore, EG3, 0x68000300);
536  	intel_uncore_write(uncore, EG4, 0x42000000);
537  	intel_uncore_write(uncore, EG5, 0x00140031);
538  	intel_uncore_write(uncore, EG6, 0);
539  	intel_uncore_write(uncore, EG7, 0);
540  
541  	for (i = 0; i < 8; i++)
542  		intel_uncore_write(uncore, PXWL(i), 0);
543  
544  	/* Enable PMON + select events */
545  	intel_uncore_write(uncore, ECR, 0x80000019);
546  
547  	return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
548  }
549  
gen5_rps_enable(struct intel_rps * rps)550  static bool gen5_rps_enable(struct intel_rps *rps)
551  {
552  	struct drm_i915_private *i915 = rps_to_i915(rps);
553  	struct intel_uncore *uncore = rps_to_uncore(rps);
554  	u8 fstart, vstart;
555  	u32 rgvmodectl;
556  
557  	spin_lock_irq(&mchdev_lock);
558  
559  	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
560  
561  	/* Enable temp reporting */
562  	intel_uncore_write16(uncore, PMMISC,
563  			     intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
564  	intel_uncore_write16(uncore, TSC1,
565  			     intel_uncore_read16(uncore, TSC1) | TSE);
566  
567  	/* 100ms RC evaluation intervals */
568  	intel_uncore_write(uncore, RCUPEI, 100000);
569  	intel_uncore_write(uncore, RCDNEI, 100000);
570  
571  	/* Set max/min thresholds to 90ms and 80ms respectively */
572  	intel_uncore_write(uncore, RCBMAXAVG, 90000);
573  	intel_uncore_write(uncore, RCBMINAVG, 80000);
574  
575  	intel_uncore_write(uncore, MEMIHYST, 1);
576  
577  	/* Set up min, max, and cur for interrupt handling */
578  	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
579  		MEMMODE_FSTART_SHIFT;
580  
581  	vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
582  		  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
583  
584  	intel_uncore_write(uncore,
585  			   MEMINTREN,
586  			   MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
587  
588  	intel_uncore_write(uncore, VIDSTART, vstart);
589  	intel_uncore_posting_read(uncore, VIDSTART);
590  
591  	rgvmodectl |= MEMMODE_SWMODE_EN;
592  	intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
593  
594  	if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
595  			     MEMCTL_CMD_STS) == 0, 10))
596  		drm_err(&uncore->i915->drm,
597  			"stuck trying to change perf mode\n");
598  	mdelay(1);
599  
600  	__gen5_rps_set(rps, rps->cur_freq);
601  
602  	rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
603  	rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
604  	rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
605  	rps->ips.last_time1 = jiffies_to_msecs(jiffies);
606  
607  	rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
608  	rps->ips.last_time2 = ktime_get_raw_ns();
609  
610  	spin_lock(&i915->irq_lock);
611  	ilk_enable_display_irq(i915, DE_PCU_EVENT);
612  	spin_unlock(&i915->irq_lock);
613  
614  	spin_unlock_irq(&mchdev_lock);
615  
616  	rps->ips.corr = init_emon(uncore);
617  
618  	return true;
619  }
620  
gen5_rps_disable(struct intel_rps * rps)621  static void gen5_rps_disable(struct intel_rps *rps)
622  {
623  	struct drm_i915_private *i915 = rps_to_i915(rps);
624  	struct intel_uncore *uncore = rps_to_uncore(rps);
625  	u16 rgvswctl;
626  
627  	spin_lock_irq(&mchdev_lock);
628  
629  	spin_lock(&i915->irq_lock);
630  	ilk_disable_display_irq(i915, DE_PCU_EVENT);
631  	spin_unlock(&i915->irq_lock);
632  
633  	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
634  
635  	/* Ack interrupts, disable EFC interrupt */
636  	intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0);
637  	intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
638  
639  	/* Go back to the starting frequency */
640  	__gen5_rps_set(rps, rps->idle_freq);
641  	mdelay(1);
642  	rgvswctl |= MEMCTL_CMD_STS;
643  	intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
644  	mdelay(1);
645  
646  	spin_unlock_irq(&mchdev_lock);
647  }
648  
rps_limits(struct intel_rps * rps,u8 val)649  static u32 rps_limits(struct intel_rps *rps, u8 val)
650  {
651  	u32 limits;
652  
653  	/*
654  	 * Only set the down limit when we've reached the lowest level to avoid
655  	 * getting more interrupts, otherwise leave this clear. This prevents a
656  	 * race in the hw when coming out of rc6: There's a tiny window where
657  	 * the hw runs at the minimal clock before selecting the desired
658  	 * frequency, if the down threshold expires in that window we will not
659  	 * receive a down interrupt.
660  	 */
661  	if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
662  		limits = rps->max_freq_softlimit << 23;
663  		if (val <= rps->min_freq_softlimit)
664  			limits |= rps->min_freq_softlimit << 14;
665  	} else {
666  		limits = rps->max_freq_softlimit << 24;
667  		if (val <= rps->min_freq_softlimit)
668  			limits |= rps->min_freq_softlimit << 16;
669  	}
670  
671  	return limits;
672  }
673  
rps_set_power(struct intel_rps * rps,int new_power)674  static void rps_set_power(struct intel_rps *rps, int new_power)
675  {
676  	struct intel_gt *gt = rps_to_gt(rps);
677  	struct intel_uncore *uncore = gt->uncore;
678  	u32 ei_up = 0, ei_down = 0;
679  
680  	lockdep_assert_held(&rps->power.mutex);
681  
682  	if (new_power == rps->power.mode)
683  		return;
684  
685  	/* Note the units here are not exactly 1us, but 1280ns. */
686  	switch (new_power) {
687  	case LOW_POWER:
688  		ei_up = 16000;
689  		ei_down = 32000;
690  		break;
691  
692  	case BETWEEN:
693  		ei_up = 13000;
694  		ei_down = 32000;
695  		break;
696  
697  	case HIGH_POWER:
698  		ei_up = 10000;
699  		ei_down = 32000;
700  		break;
701  	}
702  
703  	/* When byt can survive without system hang with dynamic
704  	 * sw freq adjustments, this restriction can be lifted.
705  	 */
706  	if (IS_VALLEYVIEW(gt->i915))
707  		goto skip_hw_write;
708  
709  	GT_TRACE(gt,
710  		 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
711  		 new_power,
712  		 rps->power.up_threshold, ei_up,
713  		 rps->power.down_threshold, ei_down);
714  
715  	set(uncore, GEN6_RP_UP_EI,
716  	    intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
717  	set(uncore, GEN6_RP_UP_THRESHOLD,
718  	    intel_gt_ns_to_pm_interval(gt,
719  				       ei_up * rps->power.up_threshold * 10));
720  
721  	set(uncore, GEN6_RP_DOWN_EI,
722  	    intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
723  	set(uncore, GEN6_RP_DOWN_THRESHOLD,
724  	    intel_gt_ns_to_pm_interval(gt,
725  				       ei_down *
726  				       rps->power.down_threshold * 10));
727  
728  	set(uncore, GEN6_RP_CONTROL,
729  	    (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
730  	    GEN6_RP_MEDIA_HW_NORMAL_MODE |
731  	    GEN6_RP_MEDIA_IS_GFX |
732  	    GEN6_RP_ENABLE |
733  	    GEN6_RP_UP_BUSY_AVG |
734  	    GEN6_RP_DOWN_IDLE_AVG);
735  
736  skip_hw_write:
737  	rps->power.mode = new_power;
738  }
739  
gen6_rps_set_thresholds(struct intel_rps * rps,u8 val)740  static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
741  {
742  	int new_power;
743  
744  	new_power = rps->power.mode;
745  	switch (rps->power.mode) {
746  	case LOW_POWER:
747  		if (val > rps->efficient_freq + 1 &&
748  		    val > rps->cur_freq)
749  			new_power = BETWEEN;
750  		break;
751  
752  	case BETWEEN:
753  		if (val <= rps->efficient_freq &&
754  		    val < rps->cur_freq)
755  			new_power = LOW_POWER;
756  		else if (val >= rps->rp0_freq &&
757  			 val > rps->cur_freq)
758  			new_power = HIGH_POWER;
759  		break;
760  
761  	case HIGH_POWER:
762  		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
763  		    val < rps->cur_freq)
764  			new_power = BETWEEN;
765  		break;
766  	}
767  	/* Max/min bins are special */
768  	if (val <= rps->min_freq_softlimit)
769  		new_power = LOW_POWER;
770  	if (val >= rps->max_freq_softlimit)
771  		new_power = HIGH_POWER;
772  
773  	mutex_lock(&rps->power.mutex);
774  	if (rps->power.interactive)
775  		new_power = HIGH_POWER;
776  	rps_set_power(rps, new_power);
777  	mutex_unlock(&rps->power.mutex);
778  }
779  
intel_rps_mark_interactive(struct intel_rps * rps,bool interactive)780  void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
781  {
782  	GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n",
783  		 str_yes_no(interactive));
784  
785  	mutex_lock(&rps->power.mutex);
786  	if (interactive) {
787  		if (!rps->power.interactive++ && intel_rps_is_active(rps))
788  			rps_set_power(rps, HIGH_POWER);
789  	} else {
790  		GEM_BUG_ON(!rps->power.interactive);
791  		rps->power.interactive--;
792  	}
793  	mutex_unlock(&rps->power.mutex);
794  }
795  
gen6_rps_set(struct intel_rps * rps,u8 val)796  static int gen6_rps_set(struct intel_rps *rps, u8 val)
797  {
798  	struct intel_uncore *uncore = rps_to_uncore(rps);
799  	struct drm_i915_private *i915 = rps_to_i915(rps);
800  	u32 swreq;
801  
802  	GEM_BUG_ON(rps_uses_slpc(rps));
803  
804  	if (GRAPHICS_VER(i915) >= 9)
805  		swreq = GEN9_FREQUENCY(val);
806  	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
807  		swreq = HSW_FREQUENCY(val);
808  	else
809  		swreq = (GEN6_FREQUENCY(val) |
810  			 GEN6_OFFSET(0) |
811  			 GEN6_AGGRESSIVE_TURBO);
812  	set(uncore, GEN6_RPNSWREQ, swreq);
813  
814  	GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n",
815  		 val, intel_gpu_freq(rps, val), swreq);
816  
817  	return 0;
818  }
819  
vlv_rps_set(struct intel_rps * rps,u8 val)820  static int vlv_rps_set(struct intel_rps *rps, u8 val)
821  {
822  	struct drm_i915_private *i915 = rps_to_i915(rps);
823  	int err;
824  
825  	vlv_punit_get(i915);
826  	err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
827  	vlv_punit_put(i915);
828  
829  	GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
830  		 val, intel_gpu_freq(rps, val));
831  
832  	return err;
833  }
834  
rps_set(struct intel_rps * rps,u8 val,bool update)835  static int rps_set(struct intel_rps *rps, u8 val, bool update)
836  {
837  	struct drm_i915_private *i915 = rps_to_i915(rps);
838  	int err;
839  
840  	if (val == rps->last_freq)
841  		return 0;
842  
843  	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
844  		err = vlv_rps_set(rps, val);
845  	else if (GRAPHICS_VER(i915) >= 6)
846  		err = gen6_rps_set(rps, val);
847  	else
848  		err = gen5_rps_set(rps, val);
849  	if (err)
850  		return err;
851  
852  	if (update && GRAPHICS_VER(i915) >= 6)
853  		gen6_rps_set_thresholds(rps, val);
854  	rps->last_freq = val;
855  
856  	return 0;
857  }
858  
intel_rps_unpark(struct intel_rps * rps)859  void intel_rps_unpark(struct intel_rps *rps)
860  {
861  	if (!intel_rps_is_enabled(rps))
862  		return;
863  
864  	GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq);
865  
866  	/*
867  	 * Use the user's desired frequency as a guide, but for better
868  	 * performance, jump directly to RPe as our starting frequency.
869  	 */
870  	mutex_lock(&rps->lock);
871  
872  	intel_rps_set_active(rps);
873  	intel_rps_set(rps,
874  		      clamp(rps->cur_freq,
875  			    rps->min_freq_softlimit,
876  			    rps->max_freq_softlimit));
877  
878  	mutex_unlock(&rps->lock);
879  
880  	rps->pm_iir = 0;
881  	if (intel_rps_has_interrupts(rps))
882  		rps_enable_interrupts(rps);
883  	if (intel_rps_uses_timer(rps))
884  		rps_start_timer(rps);
885  
886  	if (GRAPHICS_VER(rps_to_i915(rps)) == 5)
887  		gen5_rps_update(rps);
888  }
889  
intel_rps_park(struct intel_rps * rps)890  void intel_rps_park(struct intel_rps *rps)
891  {
892  	int adj;
893  
894  	if (!intel_rps_is_enabled(rps))
895  		return;
896  
897  	if (!intel_rps_clear_active(rps))
898  		return;
899  
900  	if (intel_rps_uses_timer(rps))
901  		rps_stop_timer(rps);
902  	if (intel_rps_has_interrupts(rps))
903  		rps_disable_interrupts(rps);
904  
905  	if (rps->last_freq <= rps->idle_freq)
906  		return;
907  
908  	/*
909  	 * The punit delays the write of the frequency and voltage until it
910  	 * determines the GPU is awake. During normal usage we don't want to
911  	 * waste power changing the frequency if the GPU is sleeping (rc6).
912  	 * However, the GPU and driver is now idle and we do not want to delay
913  	 * switching to minimum voltage (reducing power whilst idle) as we do
914  	 * not expect to be woken in the near future and so must flush the
915  	 * change by waking the device.
916  	 *
917  	 * We choose to take the media powerwell (either would do to trick the
918  	 * punit into committing the voltage change) as that takes a lot less
919  	 * power than the render powerwell.
920  	 */
921  	intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
922  	rps_set(rps, rps->idle_freq, false);
923  	intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
924  
925  	/*
926  	 * Since we will try and restart from the previously requested
927  	 * frequency on unparking, treat this idle point as a downclock
928  	 * interrupt and reduce the frequency for resume. If we park/unpark
929  	 * more frequently than the rps worker can run, we will not respond
930  	 * to any EI and never see a change in frequency.
931  	 *
932  	 * (Note we accommodate Cherryview's limitation of only using an
933  	 * even bin by applying it to all.)
934  	 */
935  	adj = rps->last_adj;
936  	if (adj < 0)
937  		adj *= 2;
938  	else /* CHV needs even encode values */
939  		adj = -2;
940  	rps->last_adj = adj;
941  	rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
942  	if (rps->cur_freq < rps->efficient_freq) {
943  		rps->cur_freq = rps->efficient_freq;
944  		rps->last_adj = 0;
945  	}
946  
947  	GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
948  }
949  
intel_rps_get_boost_frequency(struct intel_rps * rps)950  u32 intel_rps_get_boost_frequency(struct intel_rps *rps)
951  {
952  	struct intel_guc_slpc *slpc;
953  
954  	if (rps_uses_slpc(rps)) {
955  		slpc = rps_to_slpc(rps);
956  
957  		return slpc->boost_freq;
958  	} else {
959  		return intel_gpu_freq(rps, rps->boost_freq);
960  	}
961  }
962  
rps_set_boost_freq(struct intel_rps * rps,u32 val)963  static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
964  {
965  	bool boost = false;
966  
967  	/* Validate against (static) hardware limits */
968  	val = intel_freq_opcode(rps, val);
969  	if (val < rps->min_freq || val > rps->max_freq)
970  		return -EINVAL;
971  
972  	mutex_lock(&rps->lock);
973  	if (val != rps->boost_freq) {
974  		rps->boost_freq = val;
975  		boost = atomic_read(&rps->num_waiters);
976  	}
977  	mutex_unlock(&rps->lock);
978  	if (boost)
979  		queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
980  
981  	return 0;
982  }
983  
intel_rps_set_boost_frequency(struct intel_rps * rps,u32 freq)984  int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq)
985  {
986  	struct intel_guc_slpc *slpc;
987  
988  	if (rps_uses_slpc(rps)) {
989  		slpc = rps_to_slpc(rps);
990  
991  		return intel_guc_slpc_set_boost_freq(slpc, freq);
992  	} else {
993  		return rps_set_boost_freq(rps, freq);
994  	}
995  }
996  
intel_rps_dec_waiters(struct intel_rps * rps)997  void intel_rps_dec_waiters(struct intel_rps *rps)
998  {
999  	struct intel_guc_slpc *slpc;
1000  
1001  	if (rps_uses_slpc(rps)) {
1002  		slpc = rps_to_slpc(rps);
1003  
1004  		intel_guc_slpc_dec_waiters(slpc);
1005  	} else {
1006  		atomic_dec(&rps->num_waiters);
1007  	}
1008  }
1009  
intel_rps_boost(struct i915_request * rq)1010  void intel_rps_boost(struct i915_request *rq)
1011  {
1012  	struct intel_guc_slpc *slpc;
1013  
1014  	if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
1015  		return;
1016  
1017  	/* Waitboost is not needed for contexts marked with a Freq hint */
1018  	if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags))
1019  		return;
1020  
1021  	/* Serializes with i915_request_retire() */
1022  	if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
1023  		struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
1024  
1025  		if (rps_uses_slpc(rps)) {
1026  			slpc = rps_to_slpc(rps);
1027  
1028  			if (slpc->min_freq_softlimit >= slpc->boost_freq)
1029  				return;
1030  
1031  			/* Return if old value is non zero */
1032  			if (!atomic_fetch_inc(&slpc->num_waiters)) {
1033  				GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
1034  					 rq->fence.context, rq->fence.seqno);
1035  				queue_work(rps_to_gt(rps)->i915->unordered_wq,
1036  					   &slpc->boost_work);
1037  			}
1038  
1039  			return;
1040  		}
1041  
1042  		if (atomic_fetch_inc(&rps->num_waiters))
1043  			return;
1044  
1045  		if (!intel_rps_is_active(rps))
1046  			return;
1047  
1048  		GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
1049  			 rq->fence.context, rq->fence.seqno);
1050  
1051  		if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
1052  			queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
1053  
1054  		WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
1055  	}
1056  }
1057  
intel_rps_set(struct intel_rps * rps,u8 val)1058  int intel_rps_set(struct intel_rps *rps, u8 val)
1059  {
1060  	int err;
1061  
1062  	lockdep_assert_held(&rps->lock);
1063  	GEM_BUG_ON(val > rps->max_freq);
1064  	GEM_BUG_ON(val < rps->min_freq);
1065  
1066  	if (intel_rps_is_active(rps)) {
1067  		err = rps_set(rps, val, true);
1068  		if (err)
1069  			return err;
1070  
1071  		/*
1072  		 * Make sure we continue to get interrupts
1073  		 * until we hit the minimum or maximum frequencies.
1074  		 */
1075  		if (intel_rps_has_interrupts(rps)) {
1076  			struct intel_uncore *uncore = rps_to_uncore(rps);
1077  
1078  			set(uncore,
1079  			    GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
1080  
1081  			set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
1082  		}
1083  	}
1084  
1085  	rps->cur_freq = val;
1086  	return 0;
1087  }
1088  
intel_rps_read_state_cap(struct intel_rps * rps)1089  static u32 intel_rps_read_state_cap(struct intel_rps *rps)
1090  {
1091  	struct drm_i915_private *i915 = rps_to_i915(rps);
1092  	struct intel_uncore *uncore = rps_to_uncore(rps);
1093  
1094  	if (IS_GEN9_LP(i915))
1095  		return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
1096  	else
1097  		return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
1098  }
1099  
1100  static void
mtl_get_freq_caps(struct intel_rps * rps,struct intel_rps_freq_caps * caps)1101  mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
1102  {
1103  	struct intel_uncore *uncore = rps_to_uncore(rps);
1104  	u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ?
1105  				intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) :
1106  				intel_uncore_read(uncore, MTL_RP_STATE_CAP);
1107  	u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ?
1108  			intel_uncore_read(uncore, MTL_MPE_FREQUENCY) :
1109  			intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY);
1110  
1111  	/* MTL values are in units of 16.67 MHz */
1112  	caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap);
1113  	caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap);
1114  	caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe);
1115  }
1116  
1117  static void
__gen6_rps_get_freq_caps(struct intel_rps * rps,struct intel_rps_freq_caps * caps)1118  __gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
1119  {
1120  	struct drm_i915_private *i915 = rps_to_i915(rps);
1121  	u32 rp_state_cap;
1122  
1123  	rp_state_cap = intel_rps_read_state_cap(rps);
1124  
1125  	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
1126  	if (IS_GEN9_LP(i915)) {
1127  		caps->rp0_freq = (rp_state_cap >> 16) & 0xff;
1128  		caps->rp1_freq = (rp_state_cap >>  8) & 0xff;
1129  		caps->min_freq = (rp_state_cap >>  0) & 0xff;
1130  	} else {
1131  		caps->rp0_freq = (rp_state_cap >>  0) & 0xff;
1132  		if (GRAPHICS_VER(i915) >= 10)
1133  			caps->rp1_freq = REG_FIELD_GET(RPE_MASK,
1134  						       intel_uncore_read(to_gt(i915)->uncore,
1135  						       GEN10_FREQ_INFO_REC));
1136  		else
1137  			caps->rp1_freq = (rp_state_cap >>  8) & 0xff;
1138  		caps->min_freq = (rp_state_cap >> 16) & 0xff;
1139  	}
1140  
1141  	if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
1142  		/*
1143  		 * In this case rp_state_cap register reports frequencies in
1144  		 * units of 50 MHz. Convert these to the actual "hw unit", i.e.
1145  		 * units of 16.67 MHz
1146  		 */
1147  		caps->rp0_freq *= GEN9_FREQ_SCALER;
1148  		caps->rp1_freq *= GEN9_FREQ_SCALER;
1149  		caps->min_freq *= GEN9_FREQ_SCALER;
1150  	}
1151  }
1152  
1153  /**
1154   * gen6_rps_get_freq_caps - Get freq caps exposed by HW
1155   * @rps: the intel_rps structure
1156   * @caps: returned freq caps
1157   *
1158   * Returned "caps" frequencies should be converted to MHz using
1159   * intel_gpu_freq()
1160   */
gen6_rps_get_freq_caps(struct intel_rps * rps,struct intel_rps_freq_caps * caps)1161  void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
1162  {
1163  	struct drm_i915_private *i915 = rps_to_i915(rps);
1164  
1165  	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
1166  		return mtl_get_freq_caps(rps, caps);
1167  	else
1168  		return __gen6_rps_get_freq_caps(rps, caps);
1169  }
1170  
gen6_rps_init(struct intel_rps * rps)1171  static void gen6_rps_init(struct intel_rps *rps)
1172  {
1173  	struct drm_i915_private *i915 = rps_to_i915(rps);
1174  	struct intel_rps_freq_caps caps;
1175  
1176  	gen6_rps_get_freq_caps(rps, &caps);
1177  	rps->rp0_freq = caps.rp0_freq;
1178  	rps->rp1_freq = caps.rp1_freq;
1179  	rps->min_freq = caps.min_freq;
1180  
1181  	/* hw_max = RP0 until we check for overclocking */
1182  	rps->max_freq = rps->rp0_freq;
1183  
1184  	rps->efficient_freq = rps->rp1_freq;
1185  	if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
1186  	    IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
1187  		u32 ddcc_status = 0;
1188  		u32 mult = 1;
1189  
1190  		if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11)
1191  			mult = GEN9_FREQ_SCALER;
1192  		if (snb_pcode_read(rps_to_gt(rps)->uncore,
1193  				   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
1194  				   &ddcc_status, NULL) == 0)
1195  			rps->efficient_freq =
1196  				clamp_t(u32,
1197  					((ddcc_status >> 8) & 0xff) * mult,
1198  					rps->min_freq,
1199  					rps->max_freq);
1200  	}
1201  }
1202  
rps_reset(struct intel_rps * rps)1203  static bool rps_reset(struct intel_rps *rps)
1204  {
1205  	struct drm_i915_private *i915 = rps_to_i915(rps);
1206  
1207  	/* force a reset */
1208  	rps->power.mode = -1;
1209  	rps->last_freq = -1;
1210  
1211  	if (rps_set(rps, rps->min_freq, true)) {
1212  		drm_err(&i915->drm, "Failed to reset RPS to initial values\n");
1213  		return false;
1214  	}
1215  
1216  	rps->cur_freq = rps->min_freq;
1217  	return true;
1218  }
1219  
1220  /* See the Gen9_GT_PM_Programming_Guide doc for the below */
gen9_rps_enable(struct intel_rps * rps)1221  static bool gen9_rps_enable(struct intel_rps *rps)
1222  {
1223  	struct intel_gt *gt = rps_to_gt(rps);
1224  	struct intel_uncore *uncore = gt->uncore;
1225  
1226  	/* Program defaults and thresholds for RPS */
1227  	if (GRAPHICS_VER(gt->i915) == 9)
1228  		intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
1229  				      GEN9_FREQUENCY(rps->rp1_freq));
1230  
1231  	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
1232  
1233  	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
1234  
1235  	return rps_reset(rps);
1236  }
1237  
gen8_rps_enable(struct intel_rps * rps)1238  static bool gen8_rps_enable(struct intel_rps *rps)
1239  {
1240  	struct intel_uncore *uncore = rps_to_uncore(rps);
1241  
1242  	intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
1243  			      HSW_FREQUENCY(rps->rp1_freq));
1244  
1245  	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1246  
1247  	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
1248  
1249  	return rps_reset(rps);
1250  }
1251  
gen6_rps_enable(struct intel_rps * rps)1252  static bool gen6_rps_enable(struct intel_rps *rps)
1253  {
1254  	struct intel_uncore *uncore = rps_to_uncore(rps);
1255  
1256  	/* Power down if completely idle for over 50ms */
1257  	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
1258  	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1259  
1260  	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
1261  			  GEN6_PM_RP_DOWN_THRESHOLD |
1262  			  GEN6_PM_RP_DOWN_TIMEOUT);
1263  
1264  	return rps_reset(rps);
1265  }
1266  
chv_rps_max_freq(struct intel_rps * rps)1267  static int chv_rps_max_freq(struct intel_rps *rps)
1268  {
1269  	struct drm_i915_private *i915 = rps_to_i915(rps);
1270  	struct intel_gt *gt = rps_to_gt(rps);
1271  	u32 val;
1272  
1273  	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1274  
1275  	switch (gt->info.sseu.eu_total) {
1276  	case 8:
1277  		/* (2 * 4) config */
1278  		val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
1279  		break;
1280  	case 12:
1281  		/* (2 * 6) config */
1282  		val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
1283  		break;
1284  	case 16:
1285  		/* (2 * 8) config */
1286  	default:
1287  		/* Setting (2 * 8) Min RP0 for any other combination */
1288  		val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
1289  		break;
1290  	}
1291  
1292  	return val & FB_GFX_FREQ_FUSE_MASK;
1293  }
1294  
chv_rps_rpe_freq(struct intel_rps * rps)1295  static int chv_rps_rpe_freq(struct intel_rps *rps)
1296  {
1297  	struct drm_i915_private *i915 = rps_to_i915(rps);
1298  	u32 val;
1299  
1300  	val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
1301  	val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
1302  
1303  	return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
1304  }
1305  
chv_rps_guar_freq(struct intel_rps * rps)1306  static int chv_rps_guar_freq(struct intel_rps *rps)
1307  {
1308  	struct drm_i915_private *i915 = rps_to_i915(rps);
1309  	u32 val;
1310  
1311  	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1312  
1313  	return val & FB_GFX_FREQ_FUSE_MASK;
1314  }
1315  
chv_rps_min_freq(struct intel_rps * rps)1316  static u32 chv_rps_min_freq(struct intel_rps *rps)
1317  {
1318  	struct drm_i915_private *i915 = rps_to_i915(rps);
1319  	u32 val;
1320  
1321  	val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
1322  	val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
1323  
1324  	return val & FB_GFX_FREQ_FUSE_MASK;
1325  }
1326  
chv_rps_enable(struct intel_rps * rps)1327  static bool chv_rps_enable(struct intel_rps *rps)
1328  {
1329  	struct intel_uncore *uncore = rps_to_uncore(rps);
1330  	struct drm_i915_private *i915 = rps_to_i915(rps);
1331  	u32 val;
1332  
1333  	/* 1: Program defaults and thresholds for RPS*/
1334  	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1335  	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1336  	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1337  	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1338  	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1339  
1340  	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1341  
1342  	/* 2: Enable RPS */
1343  	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1344  			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
1345  			      GEN6_RP_MEDIA_IS_GFX |
1346  			      GEN6_RP_ENABLE |
1347  			      GEN6_RP_UP_BUSY_AVG |
1348  			      GEN6_RP_DOWN_IDLE_AVG);
1349  
1350  	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
1351  			  GEN6_PM_RP_DOWN_THRESHOLD |
1352  			  GEN6_PM_RP_DOWN_TIMEOUT);
1353  
1354  	/* Setting Fixed Bias */
1355  	vlv_punit_get(i915);
1356  
1357  	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
1358  	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1359  
1360  	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1361  
1362  	vlv_punit_put(i915);
1363  
1364  	/* RPS code assumes GPLL is used */
1365  	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1366  		      "GPLL not enabled\n");
1367  
1368  	drm_dbg(&i915->drm, "GPLL enabled? %s\n",
1369  		str_yes_no(val & GPLLENABLE));
1370  	drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
1371  
1372  	return rps_reset(rps);
1373  }
1374  
vlv_rps_guar_freq(struct intel_rps * rps)1375  static int vlv_rps_guar_freq(struct intel_rps *rps)
1376  {
1377  	struct drm_i915_private *i915 = rps_to_i915(rps);
1378  	u32 val, rp1;
1379  
1380  	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1381  
1382  	rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
1383  	rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
1384  
1385  	return rp1;
1386  }
1387  
vlv_rps_max_freq(struct intel_rps * rps)1388  static int vlv_rps_max_freq(struct intel_rps *rps)
1389  {
1390  	struct drm_i915_private *i915 = rps_to_i915(rps);
1391  	u32 val, rp0;
1392  
1393  	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1394  
1395  	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
1396  	/* Clamp to max */
1397  	rp0 = min_t(u32, rp0, 0xea);
1398  
1399  	return rp0;
1400  }
1401  
vlv_rps_rpe_freq(struct intel_rps * rps)1402  static int vlv_rps_rpe_freq(struct intel_rps *rps)
1403  {
1404  	struct drm_i915_private *i915 = rps_to_i915(rps);
1405  	u32 val, rpe;
1406  
1407  	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
1408  	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
1409  	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
1410  	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
1411  
1412  	return rpe;
1413  }
1414  
vlv_rps_min_freq(struct intel_rps * rps)1415  static int vlv_rps_min_freq(struct intel_rps *rps)
1416  {
1417  	struct drm_i915_private *i915 = rps_to_i915(rps);
1418  	u32 val;
1419  
1420  	val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
1421  	/*
1422  	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
1423  	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
1424  	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
1425  	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
1426  	 * to make sure it matches what Punit accepts.
1427  	 */
1428  	return max_t(u32, val, 0xc0);
1429  }
1430  
vlv_rps_enable(struct intel_rps * rps)1431  static bool vlv_rps_enable(struct intel_rps *rps)
1432  {
1433  	struct intel_uncore *uncore = rps_to_uncore(rps);
1434  	struct drm_i915_private *i915 = rps_to_i915(rps);
1435  	u32 val;
1436  
1437  	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1438  	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1439  	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1440  	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1441  	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1442  
1443  	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1444  
1445  	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1446  			      GEN6_RP_MEDIA_TURBO |
1447  			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
1448  			      GEN6_RP_MEDIA_IS_GFX |
1449  			      GEN6_RP_ENABLE |
1450  			      GEN6_RP_UP_BUSY_AVG |
1451  			      GEN6_RP_DOWN_IDLE_CONT);
1452  
1453  	/* WaGsvRC0ResidencyMethod:vlv */
1454  	rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
1455  
1456  	vlv_punit_get(i915);
1457  
1458  	/* Setting Fixed Bias */
1459  	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
1460  	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1461  
1462  	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1463  
1464  	vlv_punit_put(i915);
1465  
1466  	/* RPS code assumes GPLL is used */
1467  	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1468  		      "GPLL not enabled\n");
1469  
1470  	drm_dbg(&i915->drm, "GPLL enabled? %s\n",
1471  		str_yes_no(val & GPLLENABLE));
1472  	drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
1473  
1474  	return rps_reset(rps);
1475  }
1476  
__ips_gfx_val(struct intel_ips * ips)1477  static unsigned long __ips_gfx_val(struct intel_ips *ips)
1478  {
1479  	struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
1480  	struct intel_uncore *uncore = rps_to_uncore(rps);
1481  	unsigned int t, state1, state2;
1482  	u32 pxvid, ext_v;
1483  	u64 corr, corr2;
1484  
1485  	lockdep_assert_held(&mchdev_lock);
1486  
1487  	pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
1488  	pxvid = (pxvid >> 24) & 0x7f;
1489  	ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
1490  
1491  	state1 = ext_v;
1492  
1493  	/* Revel in the empirically derived constants */
1494  
1495  	/* Correction factor in 1/100000 units */
1496  	t = ips_mch_val(uncore);
1497  	if (t > 80)
1498  		corr = t * 2349 + 135940;
1499  	else if (t >= 50)
1500  		corr = t * 964 + 29317;
1501  	else /* < 50 */
1502  		corr = t * 301 + 1004;
1503  
1504  	corr = div_u64(corr * 150142 * state1, 10000) - 78642;
1505  	corr2 = div_u64(corr, 100000) * ips->corr;
1506  
1507  	state2 = div_u64(corr2 * state1, 10000);
1508  	state2 /= 100; /* convert to mW */
1509  
1510  	__gen5_ips_update(ips);
1511  
1512  	return ips->gfx_power + state2;
1513  }
1514  
has_busy_stats(struct intel_rps * rps)1515  static bool has_busy_stats(struct intel_rps *rps)
1516  {
1517  	struct intel_engine_cs *engine;
1518  	enum intel_engine_id id;
1519  
1520  	for_each_engine(engine, rps_to_gt(rps), id) {
1521  		if (!intel_engine_supports_stats(engine))
1522  			return false;
1523  	}
1524  
1525  	return true;
1526  }
1527  
intel_rps_enable(struct intel_rps * rps)1528  void intel_rps_enable(struct intel_rps *rps)
1529  {
1530  	struct drm_i915_private *i915 = rps_to_i915(rps);
1531  	struct intel_uncore *uncore = rps_to_uncore(rps);
1532  	bool enabled = false;
1533  
1534  	if (!HAS_RPS(i915))
1535  		return;
1536  
1537  	if (rps_uses_slpc(rps))
1538  		return;
1539  
1540  	intel_gt_check_clock_frequency(rps_to_gt(rps));
1541  
1542  	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1543  	if (rps->max_freq <= rps->min_freq)
1544  		/* leave disabled, no room for dynamic reclocking */;
1545  	else if (IS_CHERRYVIEW(i915))
1546  		enabled = chv_rps_enable(rps);
1547  	else if (IS_VALLEYVIEW(i915))
1548  		enabled = vlv_rps_enable(rps);
1549  	else if (GRAPHICS_VER(i915) >= 9)
1550  		enabled = gen9_rps_enable(rps);
1551  	else if (GRAPHICS_VER(i915) >= 8)
1552  		enabled = gen8_rps_enable(rps);
1553  	else if (GRAPHICS_VER(i915) >= 6)
1554  		enabled = gen6_rps_enable(rps);
1555  	else if (IS_IRONLAKE_M(i915))
1556  		enabled = gen5_rps_enable(rps);
1557  	else
1558  		MISSING_CASE(GRAPHICS_VER(i915));
1559  	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
1560  	if (!enabled)
1561  		return;
1562  
1563  	GT_TRACE(rps_to_gt(rps),
1564  		 "min:%x, max:%x, freq:[%d, %d], thresholds:[%u, %u]\n",
1565  		 rps->min_freq, rps->max_freq,
1566  		 intel_gpu_freq(rps, rps->min_freq),
1567  		 intel_gpu_freq(rps, rps->max_freq),
1568  		 rps->power.up_threshold,
1569  		 rps->power.down_threshold);
1570  
1571  	GEM_BUG_ON(rps->max_freq < rps->min_freq);
1572  	GEM_BUG_ON(rps->idle_freq > rps->max_freq);
1573  
1574  	GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
1575  	GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
1576  
1577  	if (has_busy_stats(rps))
1578  		intel_rps_set_timer(rps);
1579  	else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11)
1580  		intel_rps_set_interrupts(rps);
1581  	else
1582  		/* Ironlake currently uses intel_ips.ko */ {}
1583  
1584  	intel_rps_set_enabled(rps);
1585  }
1586  
gen6_rps_disable(struct intel_rps * rps)1587  static void gen6_rps_disable(struct intel_rps *rps)
1588  {
1589  	set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
1590  }
1591  
intel_rps_disable(struct intel_rps * rps)1592  void intel_rps_disable(struct intel_rps *rps)
1593  {
1594  	struct drm_i915_private *i915 = rps_to_i915(rps);
1595  
1596  	if (!intel_rps_is_enabled(rps))
1597  		return;
1598  
1599  	intel_rps_clear_enabled(rps);
1600  	intel_rps_clear_interrupts(rps);
1601  	intel_rps_clear_timer(rps);
1602  
1603  	if (GRAPHICS_VER(i915) >= 6)
1604  		gen6_rps_disable(rps);
1605  	else if (IS_IRONLAKE_M(i915))
1606  		gen5_rps_disable(rps);
1607  }
1608  
byt_gpu_freq(struct intel_rps * rps,int val)1609  static int byt_gpu_freq(struct intel_rps *rps, int val)
1610  {
1611  	/*
1612  	 * N = val - 0xb7
1613  	 * Slow = Fast = GPLL ref * N
1614  	 */
1615  	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
1616  }
1617  
byt_freq_opcode(struct intel_rps * rps,int val)1618  static int byt_freq_opcode(struct intel_rps *rps, int val)
1619  {
1620  	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
1621  }
1622  
chv_gpu_freq(struct intel_rps * rps,int val)1623  static int chv_gpu_freq(struct intel_rps *rps, int val)
1624  {
1625  	/*
1626  	 * N = val / 2
1627  	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
1628  	 */
1629  	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
1630  }
1631  
chv_freq_opcode(struct intel_rps * rps,int val)1632  static int chv_freq_opcode(struct intel_rps *rps, int val)
1633  {
1634  	/* CHV needs even values */
1635  	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
1636  }
1637  
intel_gpu_freq(struct intel_rps * rps,int val)1638  int intel_gpu_freq(struct intel_rps *rps, int val)
1639  {
1640  	struct drm_i915_private *i915 = rps_to_i915(rps);
1641  
1642  	if (GRAPHICS_VER(i915) >= 9)
1643  		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
1644  					 GEN9_FREQ_SCALER);
1645  	else if (IS_CHERRYVIEW(i915))
1646  		return chv_gpu_freq(rps, val);
1647  	else if (IS_VALLEYVIEW(i915))
1648  		return byt_gpu_freq(rps, val);
1649  	else if (GRAPHICS_VER(i915) >= 6)
1650  		return val * GT_FREQUENCY_MULTIPLIER;
1651  	else
1652  		return val;
1653  }
1654  
intel_freq_opcode(struct intel_rps * rps,int val)1655  int intel_freq_opcode(struct intel_rps *rps, int val)
1656  {
1657  	struct drm_i915_private *i915 = rps_to_i915(rps);
1658  
1659  	if (GRAPHICS_VER(i915) >= 9)
1660  		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
1661  					 GT_FREQUENCY_MULTIPLIER);
1662  	else if (IS_CHERRYVIEW(i915))
1663  		return chv_freq_opcode(rps, val);
1664  	else if (IS_VALLEYVIEW(i915))
1665  		return byt_freq_opcode(rps, val);
1666  	else if (GRAPHICS_VER(i915) >= 6)
1667  		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
1668  	else
1669  		return val;
1670  }
1671  
vlv_init_gpll_ref_freq(struct intel_rps * rps)1672  static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
1673  {
1674  	struct drm_i915_private *i915 = rps_to_i915(rps);
1675  
1676  	rps->gpll_ref_freq =
1677  		vlv_get_cck_clock(i915, "GPLL ref",
1678  				  CCK_GPLL_CLOCK_CONTROL,
1679  				  i915->czclk_freq);
1680  
1681  	drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
1682  		rps->gpll_ref_freq);
1683  }
1684  
vlv_rps_init(struct intel_rps * rps)1685  static void vlv_rps_init(struct intel_rps *rps)
1686  {
1687  	struct drm_i915_private *i915 = rps_to_i915(rps);
1688  
1689  	vlv_iosf_sb_get(i915,
1690  			BIT(VLV_IOSF_SB_PUNIT) |
1691  			BIT(VLV_IOSF_SB_NC) |
1692  			BIT(VLV_IOSF_SB_CCK));
1693  
1694  	vlv_init_gpll_ref_freq(rps);
1695  
1696  	rps->max_freq = vlv_rps_max_freq(rps);
1697  	rps->rp0_freq = rps->max_freq;
1698  	drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
1699  		intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
1700  
1701  	rps->efficient_freq = vlv_rps_rpe_freq(rps);
1702  	drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
1703  		intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
1704  
1705  	rps->rp1_freq = vlv_rps_guar_freq(rps);
1706  	drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
1707  		intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
1708  
1709  	rps->min_freq = vlv_rps_min_freq(rps);
1710  	drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
1711  		intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
1712  
1713  	vlv_iosf_sb_put(i915,
1714  			BIT(VLV_IOSF_SB_PUNIT) |
1715  			BIT(VLV_IOSF_SB_NC) |
1716  			BIT(VLV_IOSF_SB_CCK));
1717  }
1718  
chv_rps_init(struct intel_rps * rps)1719  static void chv_rps_init(struct intel_rps *rps)
1720  {
1721  	struct drm_i915_private *i915 = rps_to_i915(rps);
1722  
1723  	vlv_iosf_sb_get(i915,
1724  			BIT(VLV_IOSF_SB_PUNIT) |
1725  			BIT(VLV_IOSF_SB_NC) |
1726  			BIT(VLV_IOSF_SB_CCK));
1727  
1728  	vlv_init_gpll_ref_freq(rps);
1729  
1730  	rps->max_freq = chv_rps_max_freq(rps);
1731  	rps->rp0_freq = rps->max_freq;
1732  	drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
1733  		intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
1734  
1735  	rps->efficient_freq = chv_rps_rpe_freq(rps);
1736  	drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
1737  		intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
1738  
1739  	rps->rp1_freq = chv_rps_guar_freq(rps);
1740  	drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n",
1741  		intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
1742  
1743  	rps->min_freq = chv_rps_min_freq(rps);
1744  	drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
1745  		intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
1746  
1747  	vlv_iosf_sb_put(i915,
1748  			BIT(VLV_IOSF_SB_PUNIT) |
1749  			BIT(VLV_IOSF_SB_NC) |
1750  			BIT(VLV_IOSF_SB_CCK));
1751  
1752  	drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq |
1753  				   rps->rp1_freq | rps->min_freq) & 1,
1754  		      "Odd GPU freq values\n");
1755  }
1756  
vlv_c0_read(struct intel_uncore * uncore,struct intel_rps_ei * ei)1757  static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
1758  {
1759  	ei->ktime = ktime_get_raw();
1760  	ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
1761  	ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
1762  }
1763  
vlv_wa_c0_ei(struct intel_rps * rps,u32 pm_iir)1764  static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
1765  {
1766  	struct intel_uncore *uncore = rps_to_uncore(rps);
1767  	const struct intel_rps_ei *prev = &rps->ei;
1768  	struct intel_rps_ei now;
1769  	u32 events = 0;
1770  
1771  	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
1772  		return 0;
1773  
1774  	vlv_c0_read(uncore, &now);
1775  
1776  	if (prev->ktime) {
1777  		u64 time, c0;
1778  		u32 render, media;
1779  
1780  		time = ktime_us_delta(now.ktime, prev->ktime);
1781  
1782  		time *= rps_to_i915(rps)->czclk_freq;
1783  
1784  		/* Workload can be split between render + media,
1785  		 * e.g. SwapBuffers being blitted in X after being rendered in
1786  		 * mesa. To account for this we need to combine both engines
1787  		 * into our activity counter.
1788  		 */
1789  		render = now.render_c0 - prev->render_c0;
1790  		media = now.media_c0 - prev->media_c0;
1791  		c0 = max(render, media);
1792  		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
1793  
1794  		if (c0 > time * rps->power.up_threshold)
1795  			events = GEN6_PM_RP_UP_THRESHOLD;
1796  		else if (c0 < time * rps->power.down_threshold)
1797  			events = GEN6_PM_RP_DOWN_THRESHOLD;
1798  	}
1799  
1800  	rps->ei = now;
1801  	return events;
1802  }
1803  
rps_work(struct work_struct * work)1804  static void rps_work(struct work_struct *work)
1805  {
1806  	struct intel_rps *rps = container_of(work, typeof(*rps), work);
1807  	struct intel_gt *gt = rps_to_gt(rps);
1808  	struct drm_i915_private *i915 = rps_to_i915(rps);
1809  	bool client_boost = false;
1810  	int new_freq, adj, min, max;
1811  	u32 pm_iir = 0;
1812  
1813  	spin_lock_irq(gt->irq_lock);
1814  	pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
1815  	client_boost = atomic_read(&rps->num_waiters);
1816  	spin_unlock_irq(gt->irq_lock);
1817  
1818  	/* Make sure we didn't queue anything we're not going to process. */
1819  	if (!pm_iir && !client_boost)
1820  		goto out;
1821  
1822  	mutex_lock(&rps->lock);
1823  	if (!intel_rps_is_active(rps)) {
1824  		mutex_unlock(&rps->lock);
1825  		return;
1826  	}
1827  
1828  	pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
1829  
1830  	adj = rps->last_adj;
1831  	new_freq = rps->cur_freq;
1832  	min = rps->min_freq_softlimit;
1833  	max = rps->max_freq_softlimit;
1834  	if (client_boost)
1835  		max = rps->max_freq;
1836  
1837  	GT_TRACE(gt,
1838  		 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
1839  		 pm_iir, str_yes_no(client_boost),
1840  		 adj, new_freq, min, max);
1841  
1842  	if (client_boost && new_freq < rps->boost_freq) {
1843  		new_freq = rps->boost_freq;
1844  		adj = 0;
1845  	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
1846  		if (adj > 0)
1847  			adj *= 2;
1848  		else /* CHV needs even encode values */
1849  			adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
1850  
1851  		if (new_freq >= rps->max_freq_softlimit)
1852  			adj = 0;
1853  	} else if (client_boost) {
1854  		adj = 0;
1855  	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
1856  		if (rps->cur_freq > rps->efficient_freq)
1857  			new_freq = rps->efficient_freq;
1858  		else if (rps->cur_freq > rps->min_freq_softlimit)
1859  			new_freq = rps->min_freq_softlimit;
1860  		adj = 0;
1861  	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
1862  		if (adj < 0)
1863  			adj *= 2;
1864  		else /* CHV needs even encode values */
1865  			adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
1866  
1867  		if (new_freq <= rps->min_freq_softlimit)
1868  			adj = 0;
1869  	} else { /* unknown event */
1870  		adj = 0;
1871  	}
1872  
1873  	/*
1874  	 * sysfs frequency limits may have snuck in while
1875  	 * servicing the interrupt
1876  	 */
1877  	new_freq += adj;
1878  	new_freq = clamp_t(int, new_freq, min, max);
1879  
1880  	if (intel_rps_set(rps, new_freq)) {
1881  		drm_dbg(&i915->drm, "Failed to set new GPU frequency\n");
1882  		adj = 0;
1883  	}
1884  	rps->last_adj = adj;
1885  
1886  	mutex_unlock(&rps->lock);
1887  
1888  out:
1889  	spin_lock_irq(gt->irq_lock);
1890  	gen6_gt_pm_unmask_irq(gt, rps->pm_events);
1891  	spin_unlock_irq(gt->irq_lock);
1892  }
1893  
gen11_rps_irq_handler(struct intel_rps * rps,u32 pm_iir)1894  void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1895  {
1896  	struct intel_gt *gt = rps_to_gt(rps);
1897  	const u32 events = rps->pm_events & pm_iir;
1898  
1899  	lockdep_assert_held(gt->irq_lock);
1900  
1901  	if (unlikely(!events))
1902  		return;
1903  
1904  	GT_TRACE(gt, "irq events:%x\n", events);
1905  
1906  	gen6_gt_pm_mask_irq(gt, events);
1907  
1908  	rps->pm_iir |= events;
1909  	queue_work(gt->i915->unordered_wq, &rps->work);
1910  }
1911  
gen6_rps_irq_handler(struct intel_rps * rps,u32 pm_iir)1912  void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1913  {
1914  	struct intel_gt *gt = rps_to_gt(rps);
1915  	u32 events;
1916  
1917  	events = pm_iir & rps->pm_events;
1918  	if (events) {
1919  		spin_lock(gt->irq_lock);
1920  
1921  		GT_TRACE(gt, "irq events:%x\n", events);
1922  
1923  		gen6_gt_pm_mask_irq(gt, events);
1924  		rps->pm_iir |= events;
1925  
1926  		queue_work(gt->i915->unordered_wq, &rps->work);
1927  		spin_unlock(gt->irq_lock);
1928  	}
1929  
1930  	if (GRAPHICS_VER(gt->i915) >= 8)
1931  		return;
1932  
1933  	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
1934  		intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10);
1935  
1936  	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
1937  		drm_dbg(&rps_to_i915(rps)->drm,
1938  			"Command parser error, pm_iir 0x%08x\n", pm_iir);
1939  }
1940  
gen5_rps_irq_handler(struct intel_rps * rps)1941  void gen5_rps_irq_handler(struct intel_rps *rps)
1942  {
1943  	struct intel_uncore *uncore = rps_to_uncore(rps);
1944  	u32 busy_up, busy_down, max_avg, min_avg;
1945  	u8 new_freq;
1946  
1947  	spin_lock(&mchdev_lock);
1948  
1949  	intel_uncore_write16(uncore,
1950  			     MEMINTRSTS,
1951  			     intel_uncore_read(uncore, MEMINTRSTS));
1952  
1953  	intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
1954  	busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
1955  	busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
1956  	max_avg = intel_uncore_read(uncore, RCBMAXAVG);
1957  	min_avg = intel_uncore_read(uncore, RCBMINAVG);
1958  
1959  	/* Handle RCS change request from hw */
1960  	new_freq = rps->cur_freq;
1961  	if (busy_up > max_avg)
1962  		new_freq++;
1963  	else if (busy_down < min_avg)
1964  		new_freq--;
1965  	new_freq = clamp(new_freq,
1966  			 rps->min_freq_softlimit,
1967  			 rps->max_freq_softlimit);
1968  
1969  	if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq))
1970  		rps->cur_freq = new_freq;
1971  
1972  	spin_unlock(&mchdev_lock);
1973  }
1974  
intel_rps_init_early(struct intel_rps * rps)1975  void intel_rps_init_early(struct intel_rps *rps)
1976  {
1977  	mutex_init(&rps->lock);
1978  	mutex_init(&rps->power.mutex);
1979  
1980  	INIT_WORK(&rps->work, rps_work);
1981  	timer_setup(&rps->timer, rps_timer, 0);
1982  
1983  	atomic_set(&rps->num_waiters, 0);
1984  }
1985  
intel_rps_init(struct intel_rps * rps)1986  void intel_rps_init(struct intel_rps *rps)
1987  {
1988  	struct drm_i915_private *i915 = rps_to_i915(rps);
1989  
1990  	if (rps_uses_slpc(rps))
1991  		return;
1992  
1993  	if (IS_CHERRYVIEW(i915))
1994  		chv_rps_init(rps);
1995  	else if (IS_VALLEYVIEW(i915))
1996  		vlv_rps_init(rps);
1997  	else if (GRAPHICS_VER(i915) >= 6)
1998  		gen6_rps_init(rps);
1999  	else if (IS_IRONLAKE_M(i915))
2000  		gen5_rps_init(rps);
2001  
2002  	/* Derive initial user preferences/limits from the hardware limits */
2003  	rps->max_freq_softlimit = rps->max_freq;
2004  	rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit;
2005  	rps->min_freq_softlimit = rps->min_freq;
2006  	rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit;
2007  
2008  	/* After setting max-softlimit, find the overclock max freq */
2009  	if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
2010  		u32 params = 0;
2011  
2012  		snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_READ_OC_PARAMS, &params, NULL);
2013  		if (params & BIT(31)) { /* OC supported */
2014  			drm_dbg(&i915->drm,
2015  				"Overclocking supported, max: %dMHz, overclock: %dMHz\n",
2016  				(rps->max_freq & 0xff) * 50,
2017  				(params & 0xff) * 50);
2018  			rps->max_freq = params & 0xff;
2019  		}
2020  	}
2021  
2022  	/* Set default thresholds in % */
2023  	rps->power.up_threshold = 95;
2024  	rps_to_gt(rps)->defaults.rps_up_threshold = rps->power.up_threshold;
2025  	rps->power.down_threshold = 85;
2026  	rps_to_gt(rps)->defaults.rps_down_threshold = rps->power.down_threshold;
2027  
2028  	/* Finally allow us to boost to max by default */
2029  	rps->boost_freq = rps->max_freq;
2030  	rps->idle_freq = rps->min_freq;
2031  
2032  	/* Start in the middle, from here we will autotune based on workload */
2033  	rps->cur_freq = rps->efficient_freq;
2034  
2035  	rps->pm_intrmsk_mbz = 0;
2036  
2037  	/*
2038  	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
2039  	 * if GEN6_PM_UP_EI_EXPIRED is masked.
2040  	 *
2041  	 * TODO: verify if this can be reproduced on VLV,CHV.
2042  	 */
2043  	if (GRAPHICS_VER(i915) <= 7)
2044  		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
2045  
2046  	if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11)
2047  		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
2048  
2049  	/* GuC needs ARAT expired interrupt unmasked */
2050  	if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc))
2051  		rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
2052  }
2053  
intel_rps_sanitize(struct intel_rps * rps)2054  void intel_rps_sanitize(struct intel_rps *rps)
2055  {
2056  	if (rps_uses_slpc(rps))
2057  		return;
2058  
2059  	if (GRAPHICS_VER(rps_to_i915(rps)) >= 6)
2060  		rps_disable_interrupts(rps);
2061  }
2062  
intel_rps_read_rpstat(struct intel_rps * rps)2063  u32 intel_rps_read_rpstat(struct intel_rps *rps)
2064  {
2065  	struct drm_i915_private *i915 = rps_to_i915(rps);
2066  	i915_reg_t rpstat;
2067  
2068  	rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
2069  
2070  	return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
2071  }
2072  
intel_rps_get_cagf(struct intel_rps * rps,u32 rpstat)2073  static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
2074  {
2075  	struct drm_i915_private *i915 = rps_to_i915(rps);
2076  	u32 cagf;
2077  
2078  	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
2079  		cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat);
2080  	else if (GRAPHICS_VER(i915) >= 12)
2081  		cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat);
2082  	else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
2083  		cagf = REG_FIELD_GET(RPE_MASK, rpstat);
2084  	else if (GRAPHICS_VER(i915) >= 9)
2085  		cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat);
2086  	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
2087  		cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat);
2088  	else if (GRAPHICS_VER(i915) >= 6)
2089  		cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat);
2090  	else
2091  		cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat));
2092  
2093  	return cagf;
2094  }
2095  
__read_cagf(struct intel_rps * rps,bool take_fw)2096  static u32 __read_cagf(struct intel_rps *rps, bool take_fw)
2097  {
2098  	struct drm_i915_private *i915 = rps_to_i915(rps);
2099  	struct intel_uncore *uncore = rps_to_uncore(rps);
2100  	i915_reg_t r = INVALID_MMIO_REG;
2101  	u32 freq;
2102  
2103  	/*
2104  	 * For Gen12+ reading freq from HW does not need a forcewake and
2105  	 * registers will return 0 freq when GT is in RC6
2106  	 */
2107  	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
2108  		r = MTL_MIRROR_TARGET_WP1;
2109  	} else if (GRAPHICS_VER(i915) >= 12) {
2110  		r = GEN12_RPSTAT1;
2111  	} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
2112  		vlv_punit_get(i915);
2113  		freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
2114  		vlv_punit_put(i915);
2115  	} else if (GRAPHICS_VER(i915) >= 6) {
2116  		r = GEN6_RPSTAT1;
2117  	} else {
2118  		r = MEMSTAT_ILK;
2119  	}
2120  
2121  	if (i915_mmio_reg_valid(r))
2122  		freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r);
2123  
2124  	return intel_rps_get_cagf(rps, freq);
2125  }
2126  
read_cagf(struct intel_rps * rps)2127  static u32 read_cagf(struct intel_rps *rps)
2128  {
2129  	return __read_cagf(rps, true);
2130  }
2131  
intel_rps_read_actual_frequency(struct intel_rps * rps)2132  u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
2133  {
2134  	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
2135  	intel_wakeref_t wakeref;
2136  	u32 freq = 0;
2137  
2138  	with_intel_runtime_pm_if_in_use(rpm, wakeref)
2139  		freq = intel_gpu_freq(rps, read_cagf(rps));
2140  
2141  	return freq;
2142  }
2143  
intel_rps_read_actual_frequency_fw(struct intel_rps * rps)2144  u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps)
2145  {
2146  	return intel_gpu_freq(rps, __read_cagf(rps, false));
2147  }
2148  
intel_rps_read_punit_req(struct intel_rps * rps)2149  static u32 intel_rps_read_punit_req(struct intel_rps *rps)
2150  {
2151  	struct intel_uncore *uncore = rps_to_uncore(rps);
2152  	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
2153  	intel_wakeref_t wakeref;
2154  	u32 freq = 0;
2155  
2156  	with_intel_runtime_pm_if_in_use(rpm, wakeref)
2157  		freq = intel_uncore_read(uncore, GEN6_RPNSWREQ);
2158  
2159  	return freq;
2160  }
2161  
intel_rps_get_req(u32 pureq)2162  static u32 intel_rps_get_req(u32 pureq)
2163  {
2164  	u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT;
2165  
2166  	return req;
2167  }
2168  
intel_rps_read_punit_req_frequency(struct intel_rps * rps)2169  u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps)
2170  {
2171  	u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps));
2172  
2173  	return intel_gpu_freq(rps, freq);
2174  }
2175  
intel_rps_get_requested_frequency(struct intel_rps * rps)2176  u32 intel_rps_get_requested_frequency(struct intel_rps *rps)
2177  {
2178  	if (rps_uses_slpc(rps))
2179  		return intel_rps_read_punit_req_frequency(rps);
2180  	else
2181  		return intel_gpu_freq(rps, rps->cur_freq);
2182  }
2183  
intel_rps_get_max_frequency(struct intel_rps * rps)2184  u32 intel_rps_get_max_frequency(struct intel_rps *rps)
2185  {
2186  	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2187  
2188  	if (rps_uses_slpc(rps))
2189  		return slpc->max_freq_softlimit;
2190  	else
2191  		return intel_gpu_freq(rps, rps->max_freq_softlimit);
2192  }
2193  
2194  /**
2195   * intel_rps_get_max_raw_freq - returns the max frequency in some raw format.
2196   * @rps: the intel_rps structure
2197   *
2198   * Returns the max frequency in a raw format. In newer platforms raw is in
2199   * units of 50 MHz.
2200   */
intel_rps_get_max_raw_freq(struct intel_rps * rps)2201  u32 intel_rps_get_max_raw_freq(struct intel_rps *rps)
2202  {
2203  	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2204  	u32 freq;
2205  
2206  	if (rps_uses_slpc(rps)) {
2207  		return DIV_ROUND_CLOSEST(slpc->rp0_freq,
2208  					 GT_FREQUENCY_MULTIPLIER);
2209  	} else {
2210  		freq = rps->max_freq;
2211  		if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
2212  			/* Convert GT frequency to 50 MHz units */
2213  			freq /= GEN9_FREQ_SCALER;
2214  		}
2215  		return freq;
2216  	}
2217  }
2218  
intel_rps_get_rp0_frequency(struct intel_rps * rps)2219  u32 intel_rps_get_rp0_frequency(struct intel_rps *rps)
2220  {
2221  	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2222  
2223  	if (rps_uses_slpc(rps))
2224  		return slpc->rp0_freq;
2225  	else
2226  		return intel_gpu_freq(rps, rps->rp0_freq);
2227  }
2228  
intel_rps_get_rp1_frequency(struct intel_rps * rps)2229  u32 intel_rps_get_rp1_frequency(struct intel_rps *rps)
2230  {
2231  	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2232  
2233  	if (rps_uses_slpc(rps))
2234  		return slpc->rp1_freq;
2235  	else
2236  		return intel_gpu_freq(rps, rps->rp1_freq);
2237  }
2238  
intel_rps_get_rpn_frequency(struct intel_rps * rps)2239  u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
2240  {
2241  	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2242  
2243  	if (rps_uses_slpc(rps))
2244  		return slpc->min_freq;
2245  	else
2246  		return intel_gpu_freq(rps, rps->min_freq);
2247  }
2248  
rps_frequency_dump(struct intel_rps * rps,struct drm_printer * p)2249  static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
2250  {
2251  	struct intel_gt *gt = rps_to_gt(rps);
2252  	struct drm_i915_private *i915 = gt->i915;
2253  	struct intel_uncore *uncore = gt->uncore;
2254  	struct intel_rps_freq_caps caps;
2255  	u32 rp_state_limits;
2256  	u32 gt_perf_status;
2257  	u32 rpmodectl, rpinclimit, rpdeclimit;
2258  	u32 rpstat, cagf, reqf;
2259  	u32 rpcurupei, rpcurup, rpprevup;
2260  	u32 rpcurdownei, rpcurdown, rpprevdown;
2261  	u32 rpupei, rpupt, rpdownei, rpdownt;
2262  	u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
2263  
2264  	rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
2265  	gen6_rps_get_freq_caps(rps, &caps);
2266  	if (IS_GEN9_LP(i915))
2267  		gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
2268  	else
2269  		gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
2270  
2271  	/* RPSTAT1 is in the GT power well */
2272  	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
2273  
2274  	reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
2275  	if (GRAPHICS_VER(i915) >= 9) {
2276  		reqf >>= 23;
2277  	} else {
2278  		reqf &= ~GEN6_TURBO_DISABLE;
2279  		if (IS_HASWELL(i915) || IS_BROADWELL(i915))
2280  			reqf >>= 24;
2281  		else
2282  			reqf >>= 25;
2283  	}
2284  	reqf = intel_gpu_freq(rps, reqf);
2285  
2286  	rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
2287  	rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
2288  	rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
2289  
2290  	rpstat = intel_rps_read_rpstat(rps);
2291  	rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
2292  	rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
2293  	rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
2294  	rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
2295  	rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
2296  	rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
2297  
2298  	rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
2299  	rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
2300  
2301  	rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
2302  	rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
2303  
2304  	cagf = intel_rps_read_actual_frequency(rps);
2305  
2306  	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
2307  
2308  	if (GRAPHICS_VER(i915) >= 11) {
2309  		pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
2310  		pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
2311  		/*
2312  		 * The equivalent to the PM ISR & IIR cannot be read
2313  		 * without affecting the current state of the system
2314  		 */
2315  		pm_isr = 0;
2316  		pm_iir = 0;
2317  	} else if (GRAPHICS_VER(i915) >= 8) {
2318  		pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
2319  		pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
2320  		pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
2321  		pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
2322  	} else {
2323  		pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
2324  		pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
2325  		pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
2326  		pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
2327  	}
2328  	pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
2329  
2330  	drm_printf(p, "Video Turbo Mode: %s\n",
2331  		   str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
2332  	drm_printf(p, "HW control enabled: %s\n",
2333  		   str_yes_no(rpmodectl & GEN6_RP_ENABLE));
2334  	drm_printf(p, "SW control enabled: %s\n",
2335  		   str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
2336  
2337  	drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
2338  		   pm_ier, pm_imr, pm_mask);
2339  	if (GRAPHICS_VER(i915) <= 10)
2340  		drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
2341  			   pm_isr, pm_iir);
2342  	drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
2343  		   rps->pm_intrmsk_mbz);
2344  	drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
2345  	drm_printf(p, "Render p-state ratio: %d\n",
2346  		   (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
2347  	drm_printf(p, "Render p-state VID: %d\n",
2348  		   gt_perf_status & 0xff);
2349  	drm_printf(p, "Render p-state limit: %d\n",
2350  		   rp_state_limits & 0xff);
2351  	drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
2352  	drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
2353  	drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
2354  	drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
2355  	drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
2356  	drm_printf(p, "CAGF: %dMHz\n", cagf);
2357  	drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
2358  		   rpcurupei,
2359  		   intel_gt_pm_interval_to_ns(gt, rpcurupei));
2360  	drm_printf(p, "RP CUR UP: %d (%lldns)\n",
2361  		   rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
2362  	drm_printf(p, "RP PREV UP: %d (%lldns)\n",
2363  		   rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
2364  	drm_printf(p, "Up threshold: %d%%\n",
2365  		   rps->power.up_threshold);
2366  	drm_printf(p, "RP UP EI: %d (%lldns)\n",
2367  		   rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
2368  	drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
2369  		   rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
2370  
2371  	drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
2372  		   rpcurdownei,
2373  		   intel_gt_pm_interval_to_ns(gt, rpcurdownei));
2374  	drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
2375  		   rpcurdown,
2376  		   intel_gt_pm_interval_to_ns(gt, rpcurdown));
2377  	drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
2378  		   rpprevdown,
2379  		   intel_gt_pm_interval_to_ns(gt, rpprevdown));
2380  	drm_printf(p, "Down threshold: %d%%\n",
2381  		   rps->power.down_threshold);
2382  	drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
2383  		   rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
2384  	drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
2385  		   rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
2386  
2387  	drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
2388  		   intel_gpu_freq(rps, caps.min_freq));
2389  	drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
2390  		   intel_gpu_freq(rps, caps.rp1_freq));
2391  	drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
2392  		   intel_gpu_freq(rps, caps.rp0_freq));
2393  	drm_printf(p, "Max overclocked frequency: %dMHz\n",
2394  		   intel_gpu_freq(rps, rps->max_freq));
2395  
2396  	drm_printf(p, "Current freq: %d MHz\n",
2397  		   intel_gpu_freq(rps, rps->cur_freq));
2398  	drm_printf(p, "Actual freq: %d MHz\n", cagf);
2399  	drm_printf(p, "Idle freq: %d MHz\n",
2400  		   intel_gpu_freq(rps, rps->idle_freq));
2401  	drm_printf(p, "Min freq: %d MHz\n",
2402  		   intel_gpu_freq(rps, rps->min_freq));
2403  	drm_printf(p, "Boost freq: %d MHz\n",
2404  		   intel_gpu_freq(rps, rps->boost_freq));
2405  	drm_printf(p, "Max freq: %d MHz\n",
2406  		   intel_gpu_freq(rps, rps->max_freq));
2407  	drm_printf(p,
2408  		   "efficient (RPe) frequency: %d MHz\n",
2409  		   intel_gpu_freq(rps, rps->efficient_freq));
2410  }
2411  
slpc_frequency_dump(struct intel_rps * rps,struct drm_printer * p)2412  static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
2413  {
2414  	struct intel_gt *gt = rps_to_gt(rps);
2415  	struct intel_uncore *uncore = gt->uncore;
2416  	struct intel_rps_freq_caps caps;
2417  	u32 pm_mask;
2418  
2419  	gen6_rps_get_freq_caps(rps, &caps);
2420  	pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
2421  
2422  	drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
2423  	drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
2424  		   rps->pm_intrmsk_mbz);
2425  	drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps));
2426  	drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps));
2427  	drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
2428  		   intel_gpu_freq(rps, caps.min_freq));
2429  	drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
2430  		   intel_gpu_freq(rps, caps.rp1_freq));
2431  	drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
2432  		   intel_gpu_freq(rps, caps.rp0_freq));
2433  	drm_printf(p, "Current freq: %d MHz\n",
2434  		   intel_rps_get_requested_frequency(rps));
2435  	drm_printf(p, "Actual freq: %d MHz\n",
2436  		   intel_rps_read_actual_frequency(rps));
2437  	drm_printf(p, "Min freq: %d MHz\n",
2438  		   intel_rps_get_min_frequency(rps));
2439  	drm_printf(p, "Boost freq: %d MHz\n",
2440  		   intel_rps_get_boost_frequency(rps));
2441  	drm_printf(p, "Max freq: %d MHz\n",
2442  		   intel_rps_get_max_frequency(rps));
2443  	drm_printf(p,
2444  		   "efficient (RPe) frequency: %d MHz\n",
2445  		   intel_gpu_freq(rps, caps.rp1_freq));
2446  }
2447  
gen6_rps_frequency_dump(struct intel_rps * rps,struct drm_printer * p)2448  void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
2449  {
2450  	if (rps_uses_slpc(rps))
2451  		return slpc_frequency_dump(rps, p);
2452  	else
2453  		return rps_frequency_dump(rps, p);
2454  }
2455  
set_max_freq(struct intel_rps * rps,u32 val)2456  static int set_max_freq(struct intel_rps *rps, u32 val)
2457  {
2458  	struct drm_i915_private *i915 = rps_to_i915(rps);
2459  	int ret = 0;
2460  
2461  	mutex_lock(&rps->lock);
2462  
2463  	val = intel_freq_opcode(rps, val);
2464  	if (val < rps->min_freq ||
2465  	    val > rps->max_freq ||
2466  	    val < rps->min_freq_softlimit) {
2467  		ret = -EINVAL;
2468  		goto unlock;
2469  	}
2470  
2471  	if (val > rps->rp0_freq)
2472  		drm_dbg(&i915->drm, "User requested overclocking to %d\n",
2473  			intel_gpu_freq(rps, val));
2474  
2475  	rps->max_freq_softlimit = val;
2476  
2477  	val = clamp_t(int, rps->cur_freq,
2478  		      rps->min_freq_softlimit,
2479  		      rps->max_freq_softlimit);
2480  
2481  	/*
2482  	 * We still need *_set_rps to process the new max_delay and
2483  	 * update the interrupt limits and PMINTRMSK even though
2484  	 * frequency request may be unchanged.
2485  	 */
2486  	intel_rps_set(rps, val);
2487  
2488  unlock:
2489  	mutex_unlock(&rps->lock);
2490  
2491  	return ret;
2492  }
2493  
intel_rps_set_max_frequency(struct intel_rps * rps,u32 val)2494  int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val)
2495  {
2496  	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2497  
2498  	if (rps_uses_slpc(rps))
2499  		return intel_guc_slpc_set_max_freq(slpc, val);
2500  	else
2501  		return set_max_freq(rps, val);
2502  }
2503  
intel_rps_get_min_frequency(struct intel_rps * rps)2504  u32 intel_rps_get_min_frequency(struct intel_rps *rps)
2505  {
2506  	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2507  
2508  	if (rps_uses_slpc(rps))
2509  		return slpc->min_freq_softlimit;
2510  	else
2511  		return intel_gpu_freq(rps, rps->min_freq_softlimit);
2512  }
2513  
2514  /**
2515   * intel_rps_get_min_raw_freq - returns the min frequency in some raw format.
2516   * @rps: the intel_rps structure
2517   *
2518   * Returns the min frequency in a raw format. In newer platforms raw is in
2519   * units of 50 MHz.
2520   */
intel_rps_get_min_raw_freq(struct intel_rps * rps)2521  u32 intel_rps_get_min_raw_freq(struct intel_rps *rps)
2522  {
2523  	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2524  	u32 freq;
2525  
2526  	if (rps_uses_slpc(rps)) {
2527  		return DIV_ROUND_CLOSEST(slpc->min_freq,
2528  					 GT_FREQUENCY_MULTIPLIER);
2529  	} else {
2530  		freq = rps->min_freq;
2531  		if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
2532  			/* Convert GT frequency to 50 MHz units */
2533  			freq /= GEN9_FREQ_SCALER;
2534  		}
2535  		return freq;
2536  	}
2537  }
2538  
set_min_freq(struct intel_rps * rps,u32 val)2539  static int set_min_freq(struct intel_rps *rps, u32 val)
2540  {
2541  	int ret = 0;
2542  
2543  	mutex_lock(&rps->lock);
2544  
2545  	val = intel_freq_opcode(rps, val);
2546  	if (val < rps->min_freq ||
2547  	    val > rps->max_freq ||
2548  	    val > rps->max_freq_softlimit) {
2549  		ret = -EINVAL;
2550  		goto unlock;
2551  	}
2552  
2553  	rps->min_freq_softlimit = val;
2554  
2555  	val = clamp_t(int, rps->cur_freq,
2556  		      rps->min_freq_softlimit,
2557  		      rps->max_freq_softlimit);
2558  
2559  	/*
2560  	 * We still need *_set_rps to process the new min_delay and
2561  	 * update the interrupt limits and PMINTRMSK even though
2562  	 * frequency request may be unchanged.
2563  	 */
2564  	intel_rps_set(rps, val);
2565  
2566  unlock:
2567  	mutex_unlock(&rps->lock);
2568  
2569  	return ret;
2570  }
2571  
intel_rps_set_min_frequency(struct intel_rps * rps,u32 val)2572  int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
2573  {
2574  	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2575  
2576  	if (rps_uses_slpc(rps))
2577  		return intel_guc_slpc_set_min_freq(slpc, val);
2578  	else
2579  		return set_min_freq(rps, val);
2580  }
2581  
intel_rps_get_up_threshold(struct intel_rps * rps)2582  u8 intel_rps_get_up_threshold(struct intel_rps *rps)
2583  {
2584  	return rps->power.up_threshold;
2585  }
2586  
rps_set_threshold(struct intel_rps * rps,u8 * threshold,u8 val)2587  static int rps_set_threshold(struct intel_rps *rps, u8 *threshold, u8 val)
2588  {
2589  	int ret;
2590  
2591  	if (val > 100)
2592  		return -EINVAL;
2593  
2594  	ret = mutex_lock_interruptible(&rps->lock);
2595  	if (ret)
2596  		return ret;
2597  
2598  	if (*threshold == val)
2599  		goto out_unlock;
2600  
2601  	*threshold = val;
2602  
2603  	/* Force reset. */
2604  	rps->last_freq = -1;
2605  	mutex_lock(&rps->power.mutex);
2606  	rps->power.mode = -1;
2607  	mutex_unlock(&rps->power.mutex);
2608  
2609  	intel_rps_set(rps, clamp(rps->cur_freq,
2610  				 rps->min_freq_softlimit,
2611  				 rps->max_freq_softlimit));
2612  
2613  out_unlock:
2614  	mutex_unlock(&rps->lock);
2615  
2616  	return ret;
2617  }
2618  
intel_rps_set_up_threshold(struct intel_rps * rps,u8 threshold)2619  int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold)
2620  {
2621  	return rps_set_threshold(rps, &rps->power.up_threshold, threshold);
2622  }
2623  
intel_rps_get_down_threshold(struct intel_rps * rps)2624  u8 intel_rps_get_down_threshold(struct intel_rps *rps)
2625  {
2626  	return rps->power.down_threshold;
2627  }
2628  
intel_rps_set_down_threshold(struct intel_rps * rps,u8 threshold)2629  int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold)
2630  {
2631  	return rps_set_threshold(rps, &rps->power.down_threshold, threshold);
2632  }
2633  
intel_rps_set_manual(struct intel_rps * rps,bool enable)2634  static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
2635  {
2636  	struct intel_uncore *uncore = rps_to_uncore(rps);
2637  	u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE;
2638  
2639  	/* Allow punit to process software requests */
2640  	intel_uncore_write(uncore, GEN6_RP_CONTROL, state);
2641  }
2642  
intel_rps_raise_unslice(struct intel_rps * rps)2643  void intel_rps_raise_unslice(struct intel_rps *rps)
2644  {
2645  	struct intel_uncore *uncore = rps_to_uncore(rps);
2646  
2647  	mutex_lock(&rps->lock);
2648  
2649  	if (rps_uses_slpc(rps)) {
2650  		/* RP limits have not been initialized yet for SLPC path */
2651  		struct intel_rps_freq_caps caps;
2652  
2653  		gen6_rps_get_freq_caps(rps, &caps);
2654  
2655  		intel_rps_set_manual(rps, true);
2656  		intel_uncore_write(uncore, GEN6_RPNSWREQ,
2657  				   ((caps.rp0_freq <<
2658  				   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
2659  				   GEN9_IGNORE_SLICE_RATIO));
2660  		intel_rps_set_manual(rps, false);
2661  	} else {
2662  		intel_rps_set(rps, rps->rp0_freq);
2663  	}
2664  
2665  	mutex_unlock(&rps->lock);
2666  }
2667  
intel_rps_lower_unslice(struct intel_rps * rps)2668  void intel_rps_lower_unslice(struct intel_rps *rps)
2669  {
2670  	struct intel_uncore *uncore = rps_to_uncore(rps);
2671  
2672  	mutex_lock(&rps->lock);
2673  
2674  	if (rps_uses_slpc(rps)) {
2675  		/* RP limits have not been initialized yet for SLPC path */
2676  		struct intel_rps_freq_caps caps;
2677  
2678  		gen6_rps_get_freq_caps(rps, &caps);
2679  
2680  		intel_rps_set_manual(rps, true);
2681  		intel_uncore_write(uncore, GEN6_RPNSWREQ,
2682  				   ((caps.min_freq <<
2683  				   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
2684  				   GEN9_IGNORE_SLICE_RATIO));
2685  		intel_rps_set_manual(rps, false);
2686  	} else {
2687  		intel_rps_set(rps, rps->min_freq);
2688  	}
2689  
2690  	mutex_unlock(&rps->lock);
2691  }
2692  
rps_read_mmio(struct intel_rps * rps,i915_reg_t reg32)2693  static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32)
2694  {
2695  	struct intel_gt *gt = rps_to_gt(rps);
2696  	intel_wakeref_t wakeref;
2697  	u32 val;
2698  
2699  	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
2700  		val = intel_uncore_read(gt->uncore, reg32);
2701  
2702  	return val;
2703  }
2704  
rps_read_mask_mmio(struct intel_rps * rps,i915_reg_t reg32,u32 mask)2705  bool rps_read_mask_mmio(struct intel_rps *rps,
2706  			i915_reg_t reg32, u32 mask)
2707  {
2708  	return rps_read_mmio(rps, reg32) & mask;
2709  }
2710  
2711  /* External interface for intel_ips.ko */
2712  
2713  static struct drm_i915_private __rcu *ips_mchdev;
2714  
2715  /*
2716   * Tells the intel_ips driver that the i915 driver is now loaded, if
2717   * IPS got loaded first.
2718   *
2719   * This awkward dance is so that neither module has to depend on the
2720   * other in order for IPS to do the appropriate communication of
2721   * GPU turbo limits to i915.
2722   */
2723  static void
ips_ping_for_i915_load(void)2724  ips_ping_for_i915_load(void)
2725  {
2726  	void (*link)(void);
2727  
2728  	link = symbol_get(ips_link_to_i915_driver);
2729  	if (link) {
2730  		link();
2731  		symbol_put(ips_link_to_i915_driver);
2732  	}
2733  }
2734  
intel_rps_driver_register(struct intel_rps * rps)2735  void intel_rps_driver_register(struct intel_rps *rps)
2736  {
2737  	struct intel_gt *gt = rps_to_gt(rps);
2738  
2739  	/*
2740  	 * We only register the i915 ips part with intel-ips once everything is
2741  	 * set up, to avoid intel-ips sneaking in and reading bogus values.
2742  	 */
2743  	if (GRAPHICS_VER(gt->i915) == 5) {
2744  		GEM_BUG_ON(ips_mchdev);
2745  		rcu_assign_pointer(ips_mchdev, gt->i915);
2746  		ips_ping_for_i915_load();
2747  	}
2748  }
2749  
intel_rps_driver_unregister(struct intel_rps * rps)2750  void intel_rps_driver_unregister(struct intel_rps *rps)
2751  {
2752  	if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
2753  		rcu_assign_pointer(ips_mchdev, NULL);
2754  }
2755  
mchdev_get(void)2756  static struct drm_i915_private *mchdev_get(void)
2757  {
2758  	struct drm_i915_private *i915;
2759  
2760  	rcu_read_lock();
2761  	i915 = rcu_dereference(ips_mchdev);
2762  	if (i915 && !kref_get_unless_zero(&i915->drm.ref))
2763  		i915 = NULL;
2764  	rcu_read_unlock();
2765  
2766  	return i915;
2767  }
2768  
2769  /**
2770   * i915_read_mch_val - return value for IPS use
2771   *
2772   * Calculate and return a value for the IPS driver to use when deciding whether
2773   * we have thermal and power headroom to increase CPU or GPU power budget.
2774   */
i915_read_mch_val(void)2775  unsigned long i915_read_mch_val(void)
2776  {
2777  	struct drm_i915_private *i915;
2778  	unsigned long chipset_val = 0;
2779  	unsigned long graphics_val = 0;
2780  	intel_wakeref_t wakeref;
2781  
2782  	i915 = mchdev_get();
2783  	if (!i915)
2784  		return 0;
2785  
2786  	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
2787  		struct intel_ips *ips = &to_gt(i915)->rps.ips;
2788  
2789  		spin_lock_irq(&mchdev_lock);
2790  		chipset_val = __ips_chipset_val(ips);
2791  		graphics_val = __ips_gfx_val(ips);
2792  		spin_unlock_irq(&mchdev_lock);
2793  	}
2794  
2795  	drm_dev_put(&i915->drm);
2796  	return chipset_val + graphics_val;
2797  }
2798  EXPORT_SYMBOL_GPL(i915_read_mch_val);
2799  
2800  /**
2801   * i915_gpu_raise - raise GPU frequency limit
2802   *
2803   * Raise the limit; IPS indicates we have thermal headroom.
2804   */
i915_gpu_raise(void)2805  bool i915_gpu_raise(void)
2806  {
2807  	struct drm_i915_private *i915;
2808  	struct intel_rps *rps;
2809  
2810  	i915 = mchdev_get();
2811  	if (!i915)
2812  		return false;
2813  
2814  	rps = &to_gt(i915)->rps;
2815  
2816  	spin_lock_irq(&mchdev_lock);
2817  	if (rps->max_freq_softlimit < rps->max_freq)
2818  		rps->max_freq_softlimit++;
2819  	spin_unlock_irq(&mchdev_lock);
2820  
2821  	drm_dev_put(&i915->drm);
2822  	return true;
2823  }
2824  EXPORT_SYMBOL_GPL(i915_gpu_raise);
2825  
2826  /**
2827   * i915_gpu_lower - lower GPU frequency limit
2828   *
2829   * IPS indicates we're close to a thermal limit, so throttle back the GPU
2830   * frequency maximum.
2831   */
i915_gpu_lower(void)2832  bool i915_gpu_lower(void)
2833  {
2834  	struct drm_i915_private *i915;
2835  	struct intel_rps *rps;
2836  
2837  	i915 = mchdev_get();
2838  	if (!i915)
2839  		return false;
2840  
2841  	rps = &to_gt(i915)->rps;
2842  
2843  	spin_lock_irq(&mchdev_lock);
2844  	if (rps->max_freq_softlimit > rps->min_freq)
2845  		rps->max_freq_softlimit--;
2846  	spin_unlock_irq(&mchdev_lock);
2847  
2848  	drm_dev_put(&i915->drm);
2849  	return true;
2850  }
2851  EXPORT_SYMBOL_GPL(i915_gpu_lower);
2852  
2853  /**
2854   * i915_gpu_busy - indicate GPU business to IPS
2855   *
2856   * Tell the IPS driver whether or not the GPU is busy.
2857   */
i915_gpu_busy(void)2858  bool i915_gpu_busy(void)
2859  {
2860  	struct drm_i915_private *i915;
2861  	bool ret;
2862  
2863  	i915 = mchdev_get();
2864  	if (!i915)
2865  		return false;
2866  
2867  	ret = to_gt(i915)->awake;
2868  
2869  	drm_dev_put(&i915->drm);
2870  	return ret;
2871  }
2872  EXPORT_SYMBOL_GPL(i915_gpu_busy);
2873  
2874  /**
2875   * i915_gpu_turbo_disable - disable graphics turbo
2876   *
2877   * Disable graphics turbo by resetting the max frequency and setting the
2878   * current frequency to the default.
2879   */
i915_gpu_turbo_disable(void)2880  bool i915_gpu_turbo_disable(void)
2881  {
2882  	struct drm_i915_private *i915;
2883  	struct intel_rps *rps;
2884  	bool ret;
2885  
2886  	i915 = mchdev_get();
2887  	if (!i915)
2888  		return false;
2889  
2890  	rps = &to_gt(i915)->rps;
2891  
2892  	spin_lock_irq(&mchdev_lock);
2893  	rps->max_freq_softlimit = rps->min_freq;
2894  	ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq);
2895  	spin_unlock_irq(&mchdev_lock);
2896  
2897  	drm_dev_put(&i915->drm);
2898  	return ret;
2899  }
2900  EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
2901  
2902  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2903  #include "selftest_rps.c"
2904  #include "selftest_slpc.c"
2905  #endif
2906