xref: /linux/drivers/gpu/drm/i915/display/intel_bw.c (revision 2dcb8e8782d8e4c38903bf37b1a24d3ffd193da7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include <drm/drm_atomic_state_helper.h>
7 
8 #include "i915_reg.h"
9 #include "intel_atomic.h"
10 #include "intel_bw.h"
11 #include "intel_cdclk.h"
12 #include "intel_display_types.h"
13 #include "intel_mchbar_regs.h"
14 #include "intel_pcode.h"
15 #include "intel_pm.h"
16 
17 /* Parameters for Qclk Geyserville (QGV) */
18 struct intel_qgv_point {
19 	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
20 };
21 
22 struct intel_psf_gv_point {
23 	u8 clk; /* clock in multiples of 16.6666 MHz */
24 };
25 
26 struct intel_qgv_info {
27 	struct intel_qgv_point points[I915_NUM_QGV_POINTS];
28 	struct intel_psf_gv_point psf_points[I915_NUM_PSF_GV_POINTS];
29 	u8 num_points;
30 	u8 num_psf_points;
31 	u8 t_bl;
32 	u8 max_numchannels;
33 	u8 channel_width;
34 	u8 deinterleave;
35 };
36 
37 static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv,
38 					  struct intel_qgv_point *sp,
39 					  int point)
40 {
41 	u32 dclk_ratio, dclk_reference;
42 	u32 val;
43 
44 	val = intel_uncore_read(&dev_priv->uncore, SA_PERF_STATUS_0_0_0_MCHBAR_PC);
45 	dclk_ratio = REG_FIELD_GET(DG1_QCLK_RATIO_MASK, val);
46 	if (val & DG1_QCLK_REFERENCE)
47 		dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */
48 	else
49 		dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */
50 	sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) + 500, 1000);
51 
52 	val = intel_uncore_read(&dev_priv->uncore, SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU);
53 	if (val & DG1_GEAR_TYPE)
54 		sp->dclk *= 2;
55 
56 	if (sp->dclk == 0)
57 		return -EINVAL;
58 
59 	val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR);
60 	sp->t_rp = REG_FIELD_GET(DG1_DRAM_T_RP_MASK, val);
61 	sp->t_rdpre = REG_FIELD_GET(DG1_DRAM_T_RDPRE_MASK, val);
62 
63 	val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH);
64 	sp->t_rcd = REG_FIELD_GET(DG1_DRAM_T_RCD_MASK, val);
65 	sp->t_ras = REG_FIELD_GET(DG1_DRAM_T_RAS_MASK, val);
66 
67 	sp->t_rc = sp->t_rp + sp->t_ras;
68 
69 	return 0;
70 }
71 
72 static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
73 					 struct intel_qgv_point *sp,
74 					 int point)
75 {
76 	u32 val = 0, val2 = 0;
77 	u16 dclk;
78 	int ret;
79 
80 	ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
81 			     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
82 			     &val, &val2);
83 	if (ret)
84 		return ret;
85 
86 	dclk = val & 0xffff;
87 	sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000);
88 	sp->t_rp = (val & 0xff0000) >> 16;
89 	sp->t_rcd = (val & 0xff000000) >> 24;
90 
91 	sp->t_rdpre = val2 & 0xff;
92 	sp->t_ras = (val2 & 0xff00) >> 8;
93 
94 	sp->t_rc = sp->t_rp + sp->t_ras;
95 
96 	return 0;
97 }
98 
99 static int adls_pcode_read_psf_gv_point_info(struct drm_i915_private *dev_priv,
100 					    struct intel_psf_gv_point *points)
101 {
102 	u32 val = 0;
103 	int ret;
104 	int i;
105 
106 	ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
107 			     ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, &val, NULL);
108 	if (ret)
109 		return ret;
110 
111 	for (i = 0; i < I915_NUM_PSF_GV_POINTS; i++) {
112 		points[i].clk = val & 0xff;
113 		val >>= 8;
114 	}
115 
116 	return 0;
117 }
118 
119 int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv,
120 				  u32 points_mask)
121 {
122 	int ret;
123 
124 	/* bspec says to keep retrying for at least 1 ms */
125 	ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG,
126 				points_mask,
127 				ICL_PCODE_POINTS_RESTRICTED_MASK,
128 				ICL_PCODE_POINTS_RESTRICTED,
129 				1);
130 
131 	if (ret < 0) {
132 		drm_err(&dev_priv->drm, "Failed to disable qgv points (%d) points: 0x%x\n", ret, points_mask);
133 		return ret;
134 	}
135 
136 	return 0;
137 }
138 
139 static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
140 			      struct intel_qgv_info *qi,
141 			      bool is_y_tile)
142 {
143 	const struct dram_info *dram_info = &dev_priv->dram_info;
144 	int i, ret;
145 
146 	qi->num_points = dram_info->num_qgv_points;
147 	qi->num_psf_points = dram_info->num_psf_gv_points;
148 
149 	if (DISPLAY_VER(dev_priv) >= 12)
150 		switch (dram_info->type) {
151 		case INTEL_DRAM_DDR4:
152 			qi->t_bl = is_y_tile ? 8 : 4;
153 			qi->max_numchannels = 2;
154 			qi->channel_width = 64;
155 			qi->deinterleave = is_y_tile ? 1 : 2;
156 			break;
157 		case INTEL_DRAM_DDR5:
158 			qi->t_bl = is_y_tile ? 16 : 8;
159 			qi->max_numchannels = 4;
160 			qi->channel_width = 32;
161 			qi->deinterleave = is_y_tile ? 1 : 2;
162 			break;
163 		case INTEL_DRAM_LPDDR4:
164 			if (IS_ROCKETLAKE(dev_priv)) {
165 				qi->t_bl = 8;
166 				qi->max_numchannels = 4;
167 				qi->channel_width = 32;
168 				qi->deinterleave = 2;
169 				break;
170 			}
171 			fallthrough;
172 		case INTEL_DRAM_LPDDR5:
173 			qi->t_bl = 16;
174 			qi->max_numchannels = 8;
175 			qi->channel_width = 16;
176 			qi->deinterleave = is_y_tile ? 2 : 4;
177 			break;
178 		default:
179 			qi->t_bl = 16;
180 			qi->max_numchannels = 1;
181 			break;
182 		}
183 	else if (DISPLAY_VER(dev_priv) == 11) {
184 		qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8;
185 		qi->max_numchannels = 1;
186 	}
187 
188 	if (drm_WARN_ON(&dev_priv->drm,
189 			qi->num_points > ARRAY_SIZE(qi->points)))
190 		qi->num_points = ARRAY_SIZE(qi->points);
191 
192 	for (i = 0; i < qi->num_points; i++) {
193 		struct intel_qgv_point *sp = &qi->points[i];
194 
195 		if (IS_DG1(dev_priv))
196 			ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i);
197 		else
198 			ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
199 
200 		if (ret)
201 			return ret;
202 
203 		drm_dbg_kms(&dev_priv->drm,
204 			    "QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
205 			    i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
206 			    sp->t_rcd, sp->t_rc);
207 	}
208 
209 	if (qi->num_psf_points > 0) {
210 		ret = adls_pcode_read_psf_gv_point_info(dev_priv, qi->psf_points);
211 		if (ret) {
212 			drm_err(&dev_priv->drm, "Failed to read PSF point data; PSF points will not be considered in bandwidth calculations.\n");
213 			qi->num_psf_points = 0;
214 		}
215 
216 		for (i = 0; i < qi->num_psf_points; i++)
217 			drm_dbg_kms(&dev_priv->drm,
218 				    "PSF GV %d: CLK=%d \n",
219 				    i, qi->psf_points[i].clk);
220 	}
221 
222 	return 0;
223 }
224 
225 static int adl_calc_psf_bw(int clk)
226 {
227 	/*
228 	 * clk is multiples of 16.666MHz (100/6)
229 	 * According to BSpec PSF GV bandwidth is
230 	 * calculated as BW = 64 * clk * 16.666Mhz
231 	 */
232 	return DIV_ROUND_CLOSEST(64 * clk * 100, 6);
233 }
234 
235 static int icl_sagv_max_dclk(const struct intel_qgv_info *qi)
236 {
237 	u16 dclk = 0;
238 	int i;
239 
240 	for (i = 0; i < qi->num_points; i++)
241 		dclk = max(dclk, qi->points[i].dclk);
242 
243 	return dclk;
244 }
245 
246 struct intel_sa_info {
247 	u16 displayrtids;
248 	u8 deburst, deprogbwlimit, derating;
249 };
250 
251 static const struct intel_sa_info icl_sa_info = {
252 	.deburst = 8,
253 	.deprogbwlimit = 25, /* GB/s */
254 	.displayrtids = 128,
255 	.derating = 10,
256 };
257 
258 static const struct intel_sa_info tgl_sa_info = {
259 	.deburst = 16,
260 	.deprogbwlimit = 34, /* GB/s */
261 	.displayrtids = 256,
262 	.derating = 10,
263 };
264 
265 static const struct intel_sa_info rkl_sa_info = {
266 	.deburst = 8,
267 	.deprogbwlimit = 20, /* GB/s */
268 	.displayrtids = 128,
269 	.derating = 10,
270 };
271 
272 static const struct intel_sa_info adls_sa_info = {
273 	.deburst = 16,
274 	.deprogbwlimit = 38, /* GB/s */
275 	.displayrtids = 256,
276 	.derating = 10,
277 };
278 
279 static const struct intel_sa_info adlp_sa_info = {
280 	.deburst = 16,
281 	.deprogbwlimit = 38, /* GB/s */
282 	.displayrtids = 256,
283 	.derating = 20,
284 };
285 
286 static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
287 {
288 	struct intel_qgv_info qi = {};
289 	bool is_y_tile = true; /* assume y tile may be used */
290 	int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels);
291 	int ipqdepth, ipqdepthpch = 16;
292 	int dclk_max;
293 	int maxdebw;
294 	int num_groups = ARRAY_SIZE(dev_priv->max_bw);
295 	int i, ret;
296 
297 	ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
298 	if (ret) {
299 		drm_dbg_kms(&dev_priv->drm,
300 			    "Failed to get memory subsystem information, ignoring bandwidth limits");
301 		return ret;
302 	}
303 
304 	dclk_max = icl_sagv_max_dclk(&qi);
305 	maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10);
306 	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
307 	qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
308 
309 	for (i = 0; i < num_groups; i++) {
310 		struct intel_bw_info *bi = &dev_priv->max_bw[i];
311 		int clpchgroup;
312 		int j;
313 
314 		clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;
315 		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
316 
317 		bi->num_qgv_points = qi.num_points;
318 		bi->num_psf_gv_points = qi.num_psf_points;
319 
320 		for (j = 0; j < qi.num_points; j++) {
321 			const struct intel_qgv_point *sp = &qi.points[j];
322 			int ct, bw;
323 
324 			/*
325 			 * Max row cycle time
326 			 *
327 			 * FIXME what is the logic behind the
328 			 * assumed burst length?
329 			 */
330 			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
331 				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
332 			bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct);
333 
334 			bi->deratedbw[j] = min(maxdebw,
335 					       bw * (100 - sa->derating) / 100);
336 
337 			drm_dbg_kms(&dev_priv->drm,
338 				    "BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
339 				    i, j, bi->num_planes, bi->deratedbw[j]);
340 		}
341 	}
342 	/*
343 	 * In case if SAGV is disabled in BIOS, we always get 1
344 	 * SAGV point, but we can't send PCode commands to restrict it
345 	 * as it will fail and pointless anyway.
346 	 */
347 	if (qi.num_points == 1)
348 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
349 	else
350 		dev_priv->sagv_status = I915_SAGV_ENABLED;
351 
352 	return 0;
353 }
354 
355 static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
356 {
357 	struct intel_qgv_info qi = {};
358 	const struct dram_info *dram_info = &dev_priv->dram_info;
359 	bool is_y_tile = true; /* assume y tile may be used */
360 	int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels);
361 	int ipqdepth, ipqdepthpch = 16;
362 	int dclk_max;
363 	int maxdebw, peakbw;
364 	int clperchgroup;
365 	int num_groups = ARRAY_SIZE(dev_priv->max_bw);
366 	int i, ret;
367 
368 	ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
369 	if (ret) {
370 		drm_dbg_kms(&dev_priv->drm,
371 			    "Failed to get memory subsystem information, ignoring bandwidth limits");
372 		return ret;
373 	}
374 
375 	if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5)
376 		num_channels *= 2;
377 
378 	qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
379 
380 	if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12)
381 		qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1);
382 
383 	if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels)
384 		drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels.");
385 	if (qi.max_numchannels != 0)
386 		num_channels = min_t(u8, num_channels, qi.max_numchannels);
387 
388 	dclk_max = icl_sagv_max_dclk(&qi);
389 
390 	peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) * dclk_max;
391 	maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% */
392 
393 	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
394 	/*
395 	 * clperchgroup = 4kpagespermempage * clperchperblock,
396 	 * clperchperblock = 8 / num_channels * interleave
397 	 */
398 	clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * qi.deinterleave;
399 
400 	for (i = 0; i < num_groups; i++) {
401 		struct intel_bw_info *bi = &dev_priv->max_bw[i];
402 		struct intel_bw_info *bi_next;
403 		int clpchgroup;
404 		int j;
405 
406 		if (i < num_groups - 1)
407 			bi_next = &dev_priv->max_bw[i + 1];
408 
409 		clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;
410 
411 		if (i < num_groups - 1 && clpchgroup < clperchgroup)
412 			bi_next->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
413 		else
414 			bi_next->num_planes = 0;
415 
416 		bi->num_qgv_points = qi.num_points;
417 		bi->num_psf_gv_points = qi.num_psf_points;
418 
419 		for (j = 0; j < qi.num_points; j++) {
420 			const struct intel_qgv_point *sp = &qi.points[j];
421 			int ct, bw;
422 
423 			/*
424 			 * Max row cycle time
425 			 *
426 			 * FIXME what is the logic behind the
427 			 * assumed burst length?
428 			 */
429 			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
430 				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
431 			bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct);
432 
433 			bi->deratedbw[j] = min(maxdebw,
434 					       bw * (100 - sa->derating) / 100);
435 
436 			drm_dbg_kms(&dev_priv->drm,
437 				    "BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
438 				    i, j, bi->num_planes, bi->deratedbw[j]);
439 		}
440 
441 		for (j = 0; j < qi.num_psf_points; j++) {
442 			const struct intel_psf_gv_point *sp = &qi.psf_points[j];
443 
444 			bi->psf_bw[j] = adl_calc_psf_bw(sp->clk);
445 
446 			drm_dbg_kms(&dev_priv->drm,
447 				    "BW%d / PSF GV %d: num_planes=%d bw=%u\n",
448 				    i, j, bi->num_planes, bi->psf_bw[j]);
449 		}
450 	}
451 
452 	/*
453 	 * In case if SAGV is disabled in BIOS, we always get 1
454 	 * SAGV point, but we can't send PCode commands to restrict it
455 	 * as it will fail and pointless anyway.
456 	 */
457 	if (qi.num_points == 1)
458 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
459 	else
460 		dev_priv->sagv_status = I915_SAGV_ENABLED;
461 
462 	return 0;
463 }
464 
465 static void dg2_get_bw_info(struct drm_i915_private *i915)
466 {
467 	struct intel_bw_info *bi = &i915->max_bw[0];
468 
469 	/*
470 	 * DG2 doesn't have SAGV or QGV points, just a constant max bandwidth
471 	 * that doesn't depend on the number of planes enabled.  Create a
472 	 * single dummy QGV point to reflect that.  DG2-G10 platforms have a
473 	 * constant 50 GB/s bandwidth, whereas DG2-G11 platforms have 38 GB/s.
474 	 */
475 	bi->num_planes = 1;
476 	bi->num_qgv_points = 1;
477 	if (IS_DG2_G11(i915))
478 		bi->deratedbw[0] = 38000;
479 	else
480 		bi->deratedbw[0] = 50000;
481 
482 	i915->sagv_status = I915_SAGV_NOT_CONTROLLED;
483 }
484 
485 static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
486 			       int num_planes, int qgv_point)
487 {
488 	int i;
489 
490 	/*
491 	 * Let's return max bw for 0 planes
492 	 */
493 	num_planes = max(1, num_planes);
494 
495 	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
496 		const struct intel_bw_info *bi =
497 			&dev_priv->max_bw[i];
498 
499 		/*
500 		 * Pcode will not expose all QGV points when
501 		 * SAGV is forced to off/min/med/max.
502 		 */
503 		if (qgv_point >= bi->num_qgv_points)
504 			return UINT_MAX;
505 
506 		if (num_planes >= bi->num_planes)
507 			return bi->deratedbw[qgv_point];
508 	}
509 
510 	return 0;
511 }
512 
513 static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv,
514 			       int num_planes, int qgv_point)
515 {
516 	int i;
517 
518 	/*
519 	 * Let's return max bw for 0 planes
520 	 */
521 	num_planes = max(1, num_planes);
522 
523 	for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) {
524 		const struct intel_bw_info *bi =
525 			&dev_priv->max_bw[i];
526 
527 		/*
528 		 * Pcode will not expose all QGV points when
529 		 * SAGV is forced to off/min/med/max.
530 		 */
531 		if (qgv_point >= bi->num_qgv_points)
532 			return UINT_MAX;
533 
534 		if (num_planes <= bi->num_planes)
535 			return bi->deratedbw[qgv_point];
536 	}
537 
538 	return dev_priv->max_bw[0].deratedbw[qgv_point];
539 }
540 
541 static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv,
542 			       int psf_gv_point)
543 {
544 	const struct intel_bw_info *bi =
545 			&dev_priv->max_bw[0];
546 
547 	return bi->psf_bw[psf_gv_point];
548 }
549 
550 void intel_bw_init_hw(struct drm_i915_private *dev_priv)
551 {
552 	if (!HAS_DISPLAY(dev_priv))
553 		return;
554 
555 	if (IS_DG2(dev_priv))
556 		dg2_get_bw_info(dev_priv);
557 	else if (IS_ALDERLAKE_P(dev_priv))
558 		tgl_get_bw_info(dev_priv, &adlp_sa_info);
559 	else if (IS_ALDERLAKE_S(dev_priv))
560 		tgl_get_bw_info(dev_priv, &adls_sa_info);
561 	else if (IS_ROCKETLAKE(dev_priv))
562 		tgl_get_bw_info(dev_priv, &rkl_sa_info);
563 	else if (DISPLAY_VER(dev_priv) == 12)
564 		tgl_get_bw_info(dev_priv, &tgl_sa_info);
565 	else if (DISPLAY_VER(dev_priv) == 11)
566 		icl_get_bw_info(dev_priv, &icl_sa_info);
567 }
568 
569 static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
570 {
571 	/*
572 	 * We assume cursors are small enough
573 	 * to not not cause bandwidth problems.
574 	 */
575 	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
576 }
577 
578 static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
579 {
580 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
581 	unsigned int data_rate = 0;
582 	enum plane_id plane_id;
583 
584 	for_each_plane_id_on_crtc(crtc, plane_id) {
585 		/*
586 		 * We assume cursors are small enough
587 		 * to not not cause bandwidth problems.
588 		 */
589 		if (plane_id == PLANE_CURSOR)
590 			continue;
591 
592 		data_rate += crtc_state->data_rate[plane_id];
593 	}
594 
595 	return data_rate;
596 }
597 
598 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
599 			  const struct intel_crtc_state *crtc_state)
600 {
601 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
602 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
603 
604 	bw_state->data_rate[crtc->pipe] =
605 		intel_bw_crtc_data_rate(crtc_state);
606 	bw_state->num_active_planes[crtc->pipe] =
607 		intel_bw_crtc_num_active_planes(crtc_state);
608 
609 	drm_dbg_kms(&i915->drm, "pipe %c data rate %u num active planes %u\n",
610 		    pipe_name(crtc->pipe),
611 		    bw_state->data_rate[crtc->pipe],
612 		    bw_state->num_active_planes[crtc->pipe]);
613 }
614 
615 static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
616 					       const struct intel_bw_state *bw_state)
617 {
618 	unsigned int num_active_planes = 0;
619 	enum pipe pipe;
620 
621 	for_each_pipe(dev_priv, pipe)
622 		num_active_planes += bw_state->num_active_planes[pipe];
623 
624 	return num_active_planes;
625 }
626 
627 static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
628 				       const struct intel_bw_state *bw_state)
629 {
630 	unsigned int data_rate = 0;
631 	enum pipe pipe;
632 
633 	for_each_pipe(dev_priv, pipe)
634 		data_rate += bw_state->data_rate[pipe];
635 
636 	if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active(dev_priv))
637 		data_rate = data_rate * 105 / 100;
638 
639 	return data_rate;
640 }
641 
642 struct intel_bw_state *
643 intel_atomic_get_old_bw_state(struct intel_atomic_state *state)
644 {
645 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
646 	struct intel_global_state *bw_state;
647 
648 	bw_state = intel_atomic_get_old_global_obj_state(state, &dev_priv->bw_obj);
649 
650 	return to_intel_bw_state(bw_state);
651 }
652 
653 struct intel_bw_state *
654 intel_atomic_get_new_bw_state(struct intel_atomic_state *state)
655 {
656 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
657 	struct intel_global_state *bw_state;
658 
659 	bw_state = intel_atomic_get_new_global_obj_state(state, &dev_priv->bw_obj);
660 
661 	return to_intel_bw_state(bw_state);
662 }
663 
664 struct intel_bw_state *
665 intel_atomic_get_bw_state(struct intel_atomic_state *state)
666 {
667 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
668 	struct intel_global_state *bw_state;
669 
670 	bw_state = intel_atomic_get_global_obj_state(state, &dev_priv->bw_obj);
671 	if (IS_ERR(bw_state))
672 		return ERR_CAST(bw_state);
673 
674 	return to_intel_bw_state(bw_state);
675 }
676 
677 static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
678 				  const struct intel_crtc_state *crtc_state)
679 {
680 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
681 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
682 	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
683 	enum plane_id plane_id;
684 
685 	memset(&crtc_bw->used_bw, 0, sizeof(crtc_bw->used_bw));
686 
687 	if (!crtc_state->hw.active)
688 		return;
689 
690 	for_each_plane_id_on_crtc(crtc, plane_id) {
691 		const struct skl_ddb_entry *ddb_y =
692 			&crtc_state->wm.skl.plane_ddb_y[plane_id];
693 		const struct skl_ddb_entry *ddb_uv =
694 			&crtc_state->wm.skl.plane_ddb_uv[plane_id];
695 		unsigned int data_rate = crtc_state->data_rate[plane_id];
696 		unsigned int dbuf_mask = 0;
697 		enum dbuf_slice slice;
698 
699 		dbuf_mask |= skl_ddb_dbuf_slice_mask(i915, ddb_y);
700 		dbuf_mask |= skl_ddb_dbuf_slice_mask(i915, ddb_uv);
701 
702 		/*
703 		 * FIXME: To calculate that more properly we probably
704 		 * need to split per plane data_rate into data_rate_y
705 		 * and data_rate_uv for multiplanar formats in order not
706 		 * to get accounted those twice if they happen to reside
707 		 * on different slices.
708 		 * However for pre-icl this would work anyway because
709 		 * we have only single slice and for icl+ uv plane has
710 		 * non-zero data rate.
711 		 * So in worst case those calculation are a bit
712 		 * pessimistic, which shouldn't pose any significant
713 		 * problem anyway.
714 		 */
715 		for_each_dbuf_slice_in_mask(i915, slice, dbuf_mask)
716 			crtc_bw->used_bw[slice] += data_rate;
717 	}
718 }
719 
720 int skl_bw_calc_min_cdclk(struct intel_atomic_state *state)
721 {
722 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
723 	struct intel_bw_state *new_bw_state = NULL;
724 	struct intel_bw_state *old_bw_state = NULL;
725 	const struct intel_crtc_state *crtc_state;
726 	struct intel_crtc *crtc;
727 	int max_bw = 0;
728 	enum pipe pipe;
729 	int i;
730 
731 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
732 		new_bw_state = intel_atomic_get_bw_state(state);
733 		if (IS_ERR(new_bw_state))
734 			return PTR_ERR(new_bw_state);
735 
736 		old_bw_state = intel_atomic_get_old_bw_state(state);
737 
738 		skl_crtc_calc_dbuf_bw(new_bw_state, crtc_state);
739 	}
740 
741 	if (!old_bw_state)
742 		return 0;
743 
744 	for_each_pipe(dev_priv, pipe) {
745 		struct intel_dbuf_bw *crtc_bw;
746 		enum dbuf_slice slice;
747 
748 		crtc_bw = &new_bw_state->dbuf_bw[pipe];
749 
750 		for_each_dbuf_slice(dev_priv, slice) {
751 			/*
752 			 * Current experimental observations show that contrary
753 			 * to BSpec we get underruns once we exceed 64 * CDCLK
754 			 * for slices in total.
755 			 * As a temporary measure in order not to keep CDCLK
756 			 * bumped up all the time we calculate CDCLK according
757 			 * to this formula for  overall bw consumed by slices.
758 			 */
759 			max_bw += crtc_bw->used_bw[slice];
760 		}
761 	}
762 
763 	new_bw_state->min_cdclk = max_bw / 64;
764 
765 	if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) {
766 		int ret = intel_atomic_lock_global_state(&new_bw_state->base);
767 
768 		if (ret)
769 			return ret;
770 	}
771 
772 	return 0;
773 }
774 
775 int intel_bw_calc_min_cdclk(struct intel_atomic_state *state)
776 {
777 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
778 	struct intel_bw_state *new_bw_state = NULL;
779 	struct intel_bw_state *old_bw_state = NULL;
780 	const struct intel_crtc_state *crtc_state;
781 	struct intel_crtc *crtc;
782 	int min_cdclk = 0;
783 	enum pipe pipe;
784 	int i;
785 
786 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
787 		new_bw_state = intel_atomic_get_bw_state(state);
788 		if (IS_ERR(new_bw_state))
789 			return PTR_ERR(new_bw_state);
790 
791 		old_bw_state = intel_atomic_get_old_bw_state(state);
792 	}
793 
794 	if (!old_bw_state)
795 		return 0;
796 
797 	for_each_pipe(dev_priv, pipe) {
798 		struct intel_cdclk_state *cdclk_state;
799 
800 		cdclk_state = intel_atomic_get_new_cdclk_state(state);
801 		if (!cdclk_state)
802 			return 0;
803 
804 		min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk);
805 	}
806 
807 	new_bw_state->min_cdclk = min_cdclk;
808 
809 	if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) {
810 		int ret = intel_atomic_lock_global_state(&new_bw_state->base);
811 
812 		if (ret)
813 			return ret;
814 	}
815 
816 	return 0;
817 }
818 
819 static u16 icl_qgv_points_mask(struct drm_i915_private *i915)
820 {
821 	unsigned int num_psf_gv_points = i915->max_bw[0].num_psf_gv_points;
822 	unsigned int num_qgv_points = i915->max_bw[0].num_qgv_points;
823 	u16 mask = 0;
824 
825 	/*
826 	 * We can _not_ use the whole ADLS_QGV_PT_MASK here, as PCode rejects
827 	 * it with failure if we try masking any unadvertised points.
828 	 * So need to operate only with those returned from PCode.
829 	 */
830 	if (num_qgv_points > 0)
831 		mask |= REG_GENMASK(num_qgv_points - 1, 0);
832 
833 	if (num_psf_gv_points > 0)
834 		mask |= REG_GENMASK(num_psf_gv_points - 1, 0) << ADLS_PSF_PT_SHIFT;
835 
836 	return mask;
837 }
838 
839 static int intel_bw_check_data_rate(struct intel_atomic_state *state, bool *changed)
840 {
841 	struct drm_i915_private *i915 = to_i915(state->base.dev);
842 	const struct intel_crtc_state *new_crtc_state, *old_crtc_state;
843 	struct intel_crtc *crtc;
844 	int i;
845 
846 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
847 					    new_crtc_state, i) {
848 		unsigned int old_data_rate =
849 			intel_bw_crtc_data_rate(old_crtc_state);
850 		unsigned int new_data_rate =
851 			intel_bw_crtc_data_rate(new_crtc_state);
852 		unsigned int old_active_planes =
853 			intel_bw_crtc_num_active_planes(old_crtc_state);
854 		unsigned int new_active_planes =
855 			intel_bw_crtc_num_active_planes(new_crtc_state);
856 		struct intel_bw_state *new_bw_state;
857 
858 		/*
859 		 * Avoid locking the bw state when
860 		 * nothing significant has changed.
861 		 */
862 		if (old_data_rate == new_data_rate &&
863 		    old_active_planes == new_active_planes)
864 			continue;
865 
866 		new_bw_state = intel_atomic_get_bw_state(state);
867 		if (IS_ERR(new_bw_state))
868 			return PTR_ERR(new_bw_state);
869 
870 		new_bw_state->data_rate[crtc->pipe] = new_data_rate;
871 		new_bw_state->num_active_planes[crtc->pipe] = new_active_planes;
872 
873 		*changed = true;
874 
875 		drm_dbg_kms(&i915->drm,
876 			    "[CRTC:%d:%s] data rate %u num active planes %u\n",
877 			    crtc->base.base.id, crtc->base.name,
878 			    new_bw_state->data_rate[crtc->pipe],
879 			    new_bw_state->num_active_planes[crtc->pipe]);
880 	}
881 
882 	return 0;
883 }
884 
885 int intel_bw_atomic_check(struct intel_atomic_state *state)
886 {
887 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
888 	const struct intel_bw_state *old_bw_state;
889 	struct intel_bw_state *new_bw_state;
890 	unsigned int data_rate;
891 	unsigned int num_active_planes;
892 	int i, ret;
893 	u32 allowed_points = 0;
894 	unsigned int max_bw_point = 0, max_bw = 0;
895 	unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points;
896 	unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points;
897 	bool changed = false;
898 
899 	/* FIXME earlier gens need some checks too */
900 	if (DISPLAY_VER(dev_priv) < 11)
901 		return 0;
902 
903 	ret = intel_bw_check_data_rate(state, &changed);
904 	if (ret)
905 		return ret;
906 
907 	old_bw_state = intel_atomic_get_old_bw_state(state);
908 	new_bw_state = intel_atomic_get_new_bw_state(state);
909 
910 	if (new_bw_state &&
911 	    intel_can_enable_sagv(dev_priv, old_bw_state) !=
912 	    intel_can_enable_sagv(dev_priv, new_bw_state))
913 		changed = true;
914 
915 	/*
916 	 * If none of our inputs (data rates, number of active
917 	 * planes, SAGV yes/no) changed then nothing to do here.
918 	 */
919 	if (!changed)
920 		return 0;
921 
922 	ret = intel_atomic_lock_global_state(&new_bw_state->base);
923 	if (ret)
924 		return ret;
925 
926 	data_rate = intel_bw_data_rate(dev_priv, new_bw_state);
927 	data_rate = DIV_ROUND_UP(data_rate, 1000);
928 
929 	num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state);
930 
931 	for (i = 0; i < num_qgv_points; i++) {
932 		unsigned int max_data_rate;
933 
934 		if (DISPLAY_VER(dev_priv) > 11)
935 			max_data_rate = tgl_max_bw(dev_priv, num_active_planes, i);
936 		else
937 			max_data_rate = icl_max_bw(dev_priv, num_active_planes, i);
938 		/*
939 		 * We need to know which qgv point gives us
940 		 * maximum bandwidth in order to disable SAGV
941 		 * if we find that we exceed SAGV block time
942 		 * with watermarks. By that moment we already
943 		 * have those, as it is calculated earlier in
944 		 * intel_atomic_check,
945 		 */
946 		if (max_data_rate > max_bw) {
947 			max_bw_point = i;
948 			max_bw = max_data_rate;
949 		}
950 		if (max_data_rate >= data_rate)
951 			allowed_points |= REG_FIELD_PREP(ADLS_QGV_PT_MASK, BIT(i));
952 
953 		drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n",
954 			    i, max_data_rate, data_rate);
955 	}
956 
957 	for (i = 0; i < num_psf_gv_points; i++) {
958 		unsigned int max_data_rate = adl_psf_bw(dev_priv, i);
959 
960 		if (max_data_rate >= data_rate)
961 			allowed_points |= REG_FIELD_PREP(ADLS_PSF_PT_MASK, BIT(i));
962 
963 		drm_dbg_kms(&dev_priv->drm, "PSF GV point %d: max bw %d"
964 			    " required %d\n",
965 			    i, max_data_rate, data_rate);
966 	}
967 
968 	/*
969 	 * BSpec states that we always should have at least one allowed point
970 	 * left, so if we couldn't - simply reject the configuration for obvious
971 	 * reasons.
972 	 */
973 	if ((allowed_points & ADLS_QGV_PT_MASK) == 0) {
974 		drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory"
975 			    " bandwidth %d for display configuration(%d active planes).\n",
976 			    data_rate, num_active_planes);
977 		return -EINVAL;
978 	}
979 
980 	if (num_psf_gv_points > 0) {
981 		if ((allowed_points & ADLS_PSF_PT_MASK) == 0) {
982 			drm_dbg_kms(&dev_priv->drm, "No PSF GV points provide sufficient memory"
983 				    " bandwidth %d for display configuration(%d active planes).\n",
984 				    data_rate, num_active_planes);
985 			return -EINVAL;
986 		}
987 	}
988 
989 	/*
990 	 * Leave only single point with highest bandwidth, if
991 	 * we can't enable SAGV due to the increased memory latency it may
992 	 * cause.
993 	 */
994 	if (!intel_can_enable_sagv(dev_priv, new_bw_state)) {
995 		allowed_points &= ADLS_PSF_PT_MASK;
996 		allowed_points |= BIT(max_bw_point);
997 		drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n",
998 			    max_bw_point);
999 	}
1000 	/*
1001 	 * We store the ones which need to be masked as that is what PCode
1002 	 * actually accepts as a parameter.
1003 	 */
1004 	new_bw_state->qgv_points_mask = ~allowed_points &
1005 		icl_qgv_points_mask(dev_priv);
1006 
1007 	/*
1008 	 * If the actual mask had changed we need to make sure that
1009 	 * the commits are serialized(in case this is a nomodeset, nonblocking)
1010 	 */
1011 	if (new_bw_state->qgv_points_mask != old_bw_state->qgv_points_mask) {
1012 		ret = intel_atomic_serialize_global_state(&new_bw_state->base);
1013 		if (ret)
1014 			return ret;
1015 	}
1016 
1017 	return 0;
1018 }
1019 
1020 static struct intel_global_state *
1021 intel_bw_duplicate_state(struct intel_global_obj *obj)
1022 {
1023 	struct intel_bw_state *state;
1024 
1025 	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
1026 	if (!state)
1027 		return NULL;
1028 
1029 	return &state->base;
1030 }
1031 
1032 static void intel_bw_destroy_state(struct intel_global_obj *obj,
1033 				   struct intel_global_state *state)
1034 {
1035 	kfree(state);
1036 }
1037 
1038 static const struct intel_global_state_funcs intel_bw_funcs = {
1039 	.atomic_duplicate_state = intel_bw_duplicate_state,
1040 	.atomic_destroy_state = intel_bw_destroy_state,
1041 };
1042 
1043 int intel_bw_init(struct drm_i915_private *dev_priv)
1044 {
1045 	struct intel_bw_state *state;
1046 
1047 	state = kzalloc(sizeof(*state), GFP_KERNEL);
1048 	if (!state)
1049 		return -ENOMEM;
1050 
1051 	intel_atomic_global_obj_init(dev_priv, &dev_priv->bw_obj,
1052 				     &state->base, &intel_bw_funcs);
1053 
1054 	return 0;
1055 }
1056