xref: /linux/drivers/gpu/drm/i915/gt/intel_sseu.c (revision b9d7eb6a31be296ca0af95641a23c4c758703c0a)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include "i915_drv.h"
7 #include "intel_engine_regs.h"
8 #include "intel_gt_regs.h"
9 #include "intel_sseu.h"
10 
11 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
12 			 u8 max_subslices, u8 max_eus_per_subslice)
13 {
14 	sseu->max_slices = max_slices;
15 	sseu->max_subslices = max_subslices;
16 	sseu->max_eus_per_subslice = max_eus_per_subslice;
17 
18 	sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
19 	GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
20 	sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
21 	GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
22 }
23 
24 unsigned int
25 intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
26 {
27 	unsigned int i, total = 0;
28 
29 	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
30 		total += hweight8(sseu->subslice_mask[i]);
31 
32 	return total;
33 }
34 
35 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
36 {
37 	int i, offset = slice * sseu->ss_stride;
38 	u32 mask = 0;
39 
40 	GEM_BUG_ON(slice >= sseu->max_slices);
41 
42 	for (i = 0; i < sseu->ss_stride; i++)
43 		mask |= (u32)sseu->subslice_mask[offset + i] <<
44 			i * BITS_PER_BYTE;
45 
46 	return mask;
47 }
48 
49 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
50 			      u8 *subslice_mask, u32 ss_mask)
51 {
52 	int offset = slice * sseu->ss_stride;
53 
54 	memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride);
55 }
56 
57 unsigned int
58 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
59 {
60 	return hweight32(intel_sseu_get_subslices(sseu, slice));
61 }
62 
63 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
64 		       int subslice)
65 {
66 	int slice_stride = sseu->max_subslices * sseu->eu_stride;
67 
68 	return slice * slice_stride + subslice * sseu->eu_stride;
69 }
70 
71 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
72 			int subslice)
73 {
74 	int i, offset = sseu_eu_idx(sseu, slice, subslice);
75 	u16 eu_mask = 0;
76 
77 	for (i = 0; i < sseu->eu_stride; i++)
78 		eu_mask |=
79 			((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
80 
81 	return eu_mask;
82 }
83 
84 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
85 			 u16 eu_mask)
86 {
87 	int i, offset = sseu_eu_idx(sseu, slice, subslice);
88 
89 	for (i = 0; i < sseu->eu_stride; i++)
90 		sseu->eu_mask[offset + i] =
91 			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
92 }
93 
94 static u16 compute_eu_total(const struct sseu_dev_info *sseu)
95 {
96 	u16 i, total = 0;
97 
98 	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
99 		total += hweight8(sseu->eu_mask[i]);
100 
101 	return total;
102 }
103 
104 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
105 {
106 	u32 ss_mask;
107 
108 	ss_mask = ss_en >> (s * sseu->max_subslices);
109 	ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
110 
111 	return ss_mask;
112 }
113 
114 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
115 				    u32 g_ss_en, u32 c_ss_en, u16 eu_en)
116 {
117 	int s, ss;
118 
119 	/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
120 	GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
121 		   sizeof(g_ss_en) * BITS_PER_BYTE);
122 
123 	for (s = 0; s < sseu->max_slices; s++) {
124 		if ((s_en & BIT(s)) == 0)
125 			continue;
126 
127 		sseu->slice_mask |= BIT(s);
128 
129 		/*
130 		 * XeHP introduces the concept of compute vs geometry DSS. To
131 		 * reduce variation between GENs around subslice usage, store a
132 		 * mask for both the geometry and compute enabled masks since
133 		 * userspace will need to be able to query these masks
134 		 * independently.  Also compute a total enabled subslice count
135 		 * for the purposes of selecting subslices to use in a
136 		 * particular GEM context.
137 		 */
138 		intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
139 					 get_ss_stride_mask(sseu, s, c_ss_en));
140 		intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
141 					 get_ss_stride_mask(sseu, s, g_ss_en));
142 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
143 					 get_ss_stride_mask(sseu, s,
144 							    g_ss_en | c_ss_en));
145 
146 		for (ss = 0; ss < sseu->max_subslices; ss++)
147 			if (intel_sseu_has_subslice(sseu, s, ss))
148 				sseu_set_eus(sseu, s, ss, eu_en);
149 	}
150 	sseu->eu_per_subslice = hweight16(eu_en);
151 	sseu->eu_total = compute_eu_total(sseu);
152 }
153 
154 static void gen12_sseu_info_init(struct intel_gt *gt)
155 {
156 	struct sseu_dev_info *sseu = &gt->info.sseu;
157 	struct intel_uncore *uncore = gt->uncore;
158 	u32 g_dss_en, c_dss_en = 0;
159 	u16 eu_en = 0;
160 	u8 eu_en_fuse;
161 	u8 s_en;
162 	int eu;
163 
164 	/*
165 	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
166 	 * Instead of splitting these, provide userspace with an array
167 	 * of DSS to more closely represent the hardware resource.
168 	 *
169 	 * In addition, the concept of slice has been removed in Xe_HP.
170 	 * To be compatible with prior generations, assume a single slice
171 	 * across the entire device. Then calculate out the DSS for each
172 	 * workload type within that software slice.
173 	 */
174 	if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915))
175 		intel_sseu_set_info(sseu, 1, 32, 16);
176 	else
177 		intel_sseu_set_info(sseu, 1, 6, 16);
178 
179 	/*
180 	 * As mentioned above, Xe_HP does not have the concept of a slice.
181 	 * Enable one for software backwards compatibility.
182 	 */
183 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
184 		s_en = 0x1;
185 	else
186 		s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
187 		       GEN11_GT_S_ENA_MASK;
188 
189 	g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
190 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
191 		c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
192 
193 	/* one bit per pair of EUs */
194 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
195 		eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK;
196 	else
197 		eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
198 			       GEN11_EU_DIS_MASK);
199 
200 	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
201 		if (eu_en_fuse & BIT(eu))
202 			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
203 
204 	gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
205 
206 	/* TGL only supports slice-level power gating */
207 	sseu->has_slice_pg = 1;
208 }
209 
210 static void gen11_sseu_info_init(struct intel_gt *gt)
211 {
212 	struct sseu_dev_info *sseu = &gt->info.sseu;
213 	struct intel_uncore *uncore = gt->uncore;
214 	u32 ss_en;
215 	u8 eu_en;
216 	u8 s_en;
217 
218 	if (IS_JSL_EHL(gt->i915))
219 		intel_sseu_set_info(sseu, 1, 4, 8);
220 	else
221 		intel_sseu_set_info(sseu, 1, 8, 8);
222 
223 	s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
224 		GEN11_GT_S_ENA_MASK;
225 	ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE);
226 
227 	eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
228 		  GEN11_EU_DIS_MASK);
229 
230 	gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en);
231 
232 	/* ICL has no power gating restrictions. */
233 	sseu->has_slice_pg = 1;
234 	sseu->has_subslice_pg = 1;
235 	sseu->has_eu_pg = 1;
236 }
237 
238 static void cherryview_sseu_info_init(struct intel_gt *gt)
239 {
240 	struct sseu_dev_info *sseu = &gt->info.sseu;
241 	u32 fuse;
242 	u8 subslice_mask = 0;
243 
244 	fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
245 
246 	sseu->slice_mask = BIT(0);
247 	intel_sseu_set_info(sseu, 1, 2, 8);
248 
249 	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
250 		u8 disabled_mask =
251 			((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
252 			 CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
253 			(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
254 			  CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
255 
256 		subslice_mask |= BIT(0);
257 		sseu_set_eus(sseu, 0, 0, ~disabled_mask);
258 	}
259 
260 	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
261 		u8 disabled_mask =
262 			((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
263 			 CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
264 			(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
265 			  CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
266 
267 		subslice_mask |= BIT(1);
268 		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
269 	}
270 
271 	intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask);
272 
273 	sseu->eu_total = compute_eu_total(sseu);
274 
275 	/*
276 	 * CHV expected to always have a uniform distribution of EU
277 	 * across subslices.
278 	 */
279 	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
280 		sseu->eu_total /
281 		intel_sseu_subslice_total(sseu) :
282 		0;
283 	/*
284 	 * CHV supports subslice power gating on devices with more than
285 	 * one subslice, and supports EU power gating on devices with
286 	 * more than one EU pair per subslice.
287 	 */
288 	sseu->has_slice_pg = 0;
289 	sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
290 	sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
291 }
292 
293 static void gen9_sseu_info_init(struct intel_gt *gt)
294 {
295 	struct drm_i915_private *i915 = gt->i915;
296 	struct intel_device_info *info = mkwrite_device_info(i915);
297 	struct sseu_dev_info *sseu = &gt->info.sseu;
298 	struct intel_uncore *uncore = gt->uncore;
299 	u32 fuse2, eu_disable, subslice_mask;
300 	const u8 eu_mask = 0xff;
301 	int s, ss;
302 
303 	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
304 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
305 
306 	/* BXT has a single slice and at most 3 subslices. */
307 	intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
308 			    IS_GEN9_LP(i915) ? 3 : 4, 8);
309 
310 	/*
311 	 * The subslice disable field is global, i.e. it applies
312 	 * to each of the enabled slices.
313 	 */
314 	subslice_mask = (1 << sseu->max_subslices) - 1;
315 	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
316 			   GEN9_F2_SS_DIS_SHIFT);
317 
318 	/*
319 	 * Iterate through enabled slices and subslices to
320 	 * count the total enabled EU.
321 	 */
322 	for (s = 0; s < sseu->max_slices; s++) {
323 		if (!(sseu->slice_mask & BIT(s)))
324 			/* skip disabled slice */
325 			continue;
326 
327 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
328 					 subslice_mask);
329 
330 		eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
331 		for (ss = 0; ss < sseu->max_subslices; ss++) {
332 			int eu_per_ss;
333 			u8 eu_disabled_mask;
334 
335 			if (!intel_sseu_has_subslice(sseu, s, ss))
336 				/* skip disabled subslice */
337 				continue;
338 
339 			eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask;
340 
341 			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
342 
343 			eu_per_ss = sseu->max_eus_per_subslice -
344 				hweight8(eu_disabled_mask);
345 
346 			/*
347 			 * Record which subslice(s) has(have) 7 EUs. we
348 			 * can tune the hash used to spread work among
349 			 * subslices if they are unbalanced.
350 			 */
351 			if (eu_per_ss == 7)
352 				sseu->subslice_7eu[s] |= BIT(ss);
353 		}
354 	}
355 
356 	sseu->eu_total = compute_eu_total(sseu);
357 
358 	/*
359 	 * SKL is expected to always have a uniform distribution
360 	 * of EU across subslices with the exception that any one
361 	 * EU in any one subslice may be fused off for die
362 	 * recovery. BXT is expected to be perfectly uniform in EU
363 	 * distribution.
364 	 */
365 	sseu->eu_per_subslice =
366 		intel_sseu_subslice_total(sseu) ?
367 		DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
368 		0;
369 
370 	/*
371 	 * SKL+ supports slice power gating on devices with more than
372 	 * one slice, and supports EU power gating on devices with
373 	 * more than one EU pair per subslice. BXT+ supports subslice
374 	 * power gating on devices with more than one subslice, and
375 	 * supports EU power gating on devices with more than one EU
376 	 * pair per subslice.
377 	 */
378 	sseu->has_slice_pg =
379 		!IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1;
380 	sseu->has_subslice_pg =
381 		IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1;
382 	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
383 
384 	if (IS_GEN9_LP(i915)) {
385 #define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask[0] & BIT(ss)))
386 		info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
387 
388 		sseu->min_eu_in_pool = 0;
389 		if (info->has_pooled_eu) {
390 			if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0))
391 				sseu->min_eu_in_pool = 3;
392 			else if (IS_SS_DISABLED(1))
393 				sseu->min_eu_in_pool = 6;
394 			else
395 				sseu->min_eu_in_pool = 9;
396 		}
397 #undef IS_SS_DISABLED
398 	}
399 }
400 
401 static void bdw_sseu_info_init(struct intel_gt *gt)
402 {
403 	struct sseu_dev_info *sseu = &gt->info.sseu;
404 	struct intel_uncore *uncore = gt->uncore;
405 	int s, ss;
406 	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
407 	u32 eu_disable0, eu_disable1, eu_disable2;
408 
409 	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
410 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
411 	intel_sseu_set_info(sseu, 3, 3, 8);
412 
413 	/*
414 	 * The subslice disable field is global, i.e. it applies
415 	 * to each of the enabled slices.
416 	 */
417 	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
418 	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
419 			   GEN8_F2_SS_DIS_SHIFT);
420 	eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0);
421 	eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1);
422 	eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2);
423 	eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK;
424 	eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) |
425 		((eu_disable1 & GEN8_EU_DIS1_S1_MASK) <<
426 		 (32 - GEN8_EU_DIS0_S1_SHIFT));
427 	eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) |
428 		((eu_disable2 & GEN8_EU_DIS2_S2_MASK) <<
429 		 (32 - GEN8_EU_DIS1_S2_SHIFT));
430 
431 	/*
432 	 * Iterate through enabled slices and subslices to
433 	 * count the total enabled EU.
434 	 */
435 	for (s = 0; s < sseu->max_slices; s++) {
436 		if (!(sseu->slice_mask & BIT(s)))
437 			/* skip disabled slice */
438 			continue;
439 
440 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
441 					 subslice_mask);
442 
443 		for (ss = 0; ss < sseu->max_subslices; ss++) {
444 			u8 eu_disabled_mask;
445 			u32 n_disabled;
446 
447 			if (!intel_sseu_has_subslice(sseu, s, ss))
448 				/* skip disabled subslice */
449 				continue;
450 
451 			eu_disabled_mask =
452 				eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
453 
454 			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
455 
456 			n_disabled = hweight8(eu_disabled_mask);
457 
458 			/*
459 			 * Record which subslices have 7 EUs.
460 			 */
461 			if (sseu->max_eus_per_subslice - n_disabled == 7)
462 				sseu->subslice_7eu[s] |= 1 << ss;
463 		}
464 	}
465 
466 	sseu->eu_total = compute_eu_total(sseu);
467 
468 	/*
469 	 * BDW is expected to always have a uniform distribution of EU across
470 	 * subslices with the exception that any one EU in any one subslice may
471 	 * be fused off for die recovery.
472 	 */
473 	sseu->eu_per_subslice =
474 		intel_sseu_subslice_total(sseu) ?
475 		DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
476 		0;
477 
478 	/*
479 	 * BDW supports slice power gating on devices with more than
480 	 * one slice.
481 	 */
482 	sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1;
483 	sseu->has_subslice_pg = 0;
484 	sseu->has_eu_pg = 0;
485 }
486 
487 static void hsw_sseu_info_init(struct intel_gt *gt)
488 {
489 	struct drm_i915_private *i915 = gt->i915;
490 	struct sseu_dev_info *sseu = &gt->info.sseu;
491 	u32 fuse1;
492 	u8 subslice_mask = 0;
493 	int s, ss;
494 
495 	/*
496 	 * There isn't a register to tell us how many slices/subslices. We
497 	 * work off the PCI-ids here.
498 	 */
499 	switch (INTEL_INFO(i915)->gt) {
500 	default:
501 		MISSING_CASE(INTEL_INFO(i915)->gt);
502 		fallthrough;
503 	case 1:
504 		sseu->slice_mask = BIT(0);
505 		subslice_mask = BIT(0);
506 		break;
507 	case 2:
508 		sseu->slice_mask = BIT(0);
509 		subslice_mask = BIT(0) | BIT(1);
510 		break;
511 	case 3:
512 		sseu->slice_mask = BIT(0) | BIT(1);
513 		subslice_mask = BIT(0) | BIT(1);
514 		break;
515 	}
516 
517 	fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
518 	switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) {
519 	default:
520 		MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1));
521 		fallthrough;
522 	case HSW_F1_EU_DIS_10EUS:
523 		sseu->eu_per_subslice = 10;
524 		break;
525 	case HSW_F1_EU_DIS_8EUS:
526 		sseu->eu_per_subslice = 8;
527 		break;
528 	case HSW_F1_EU_DIS_6EUS:
529 		sseu->eu_per_subslice = 6;
530 		break;
531 	}
532 
533 	intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
534 			    hweight8(subslice_mask),
535 			    sseu->eu_per_subslice);
536 
537 	for (s = 0; s < sseu->max_slices; s++) {
538 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
539 					 subslice_mask);
540 
541 		for (ss = 0; ss < sseu->max_subslices; ss++) {
542 			sseu_set_eus(sseu, s, ss,
543 				     (1UL << sseu->eu_per_subslice) - 1);
544 		}
545 	}
546 
547 	sseu->eu_total = compute_eu_total(sseu);
548 
549 	/* No powergating for you. */
550 	sseu->has_slice_pg = 0;
551 	sseu->has_subslice_pg = 0;
552 	sseu->has_eu_pg = 0;
553 }
554 
555 void intel_sseu_info_init(struct intel_gt *gt)
556 {
557 	struct drm_i915_private *i915 = gt->i915;
558 
559 	if (IS_HASWELL(i915))
560 		hsw_sseu_info_init(gt);
561 	else if (IS_CHERRYVIEW(i915))
562 		cherryview_sseu_info_init(gt);
563 	else if (IS_BROADWELL(i915))
564 		bdw_sseu_info_init(gt);
565 	else if (GRAPHICS_VER(i915) == 9)
566 		gen9_sseu_info_init(gt);
567 	else if (GRAPHICS_VER(i915) == 11)
568 		gen11_sseu_info_init(gt);
569 	else if (GRAPHICS_VER(i915) >= 12)
570 		gen12_sseu_info_init(gt);
571 }
572 
573 u32 intel_sseu_make_rpcs(struct intel_gt *gt,
574 			 const struct intel_sseu *req_sseu)
575 {
576 	struct drm_i915_private *i915 = gt->i915;
577 	const struct sseu_dev_info *sseu = &gt->info.sseu;
578 	bool subslice_pg = sseu->has_subslice_pg;
579 	u8 slices, subslices;
580 	u32 rpcs = 0;
581 
582 	/*
583 	 * No explicit RPCS request is needed to ensure full
584 	 * slice/subslice/EU enablement prior to Gen9.
585 	 */
586 	if (GRAPHICS_VER(i915) < 9)
587 		return 0;
588 
589 	/*
590 	 * If i915/perf is active, we want a stable powergating configuration
591 	 * on the system. Use the configuration pinned by i915/perf.
592 	 */
593 	if (i915->perf.exclusive_stream)
594 		req_sseu = &i915->perf.sseu;
595 
596 	slices = hweight8(req_sseu->slice_mask);
597 	subslices = hweight8(req_sseu->subslice_mask);
598 
599 	/*
600 	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
601 	 * wide and Icelake has up to eight subslices, specfial programming is
602 	 * needed in order to correctly enable all subslices.
603 	 *
604 	 * According to documentation software must consider the configuration
605 	 * as 2x4x8 and hardware will translate this to 1x8x8.
606 	 *
607 	 * Furthemore, even though SScount is three bits, maximum documented
608 	 * value for it is four. From this some rules/restrictions follow:
609 	 *
610 	 * 1.
611 	 * If enabled subslice count is greater than four, two whole slices must
612 	 * be enabled instead.
613 	 *
614 	 * 2.
615 	 * When more than one slice is enabled, hardware ignores the subslice
616 	 * count altogether.
617 	 *
618 	 * From these restrictions it follows that it is not possible to enable
619 	 * a count of subslices between the SScount maximum of four restriction,
620 	 * and the maximum available number on a particular SKU. Either all
621 	 * subslices are enabled, or a count between one and four on the first
622 	 * slice.
623 	 */
624 	if (GRAPHICS_VER(i915) == 11 &&
625 	    slices == 1 &&
626 	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
627 		GEM_BUG_ON(subslices & 1);
628 
629 		subslice_pg = false;
630 		slices *= 2;
631 	}
632 
633 	/*
634 	 * Starting in Gen9, render power gating can leave
635 	 * slice/subslice/EU in a partially enabled state. We
636 	 * must make an explicit request through RPCS for full
637 	 * enablement.
638 	 */
639 	if (sseu->has_slice_pg) {
640 		u32 mask, val = slices;
641 
642 		if (GRAPHICS_VER(i915) >= 11) {
643 			mask = GEN11_RPCS_S_CNT_MASK;
644 			val <<= GEN11_RPCS_S_CNT_SHIFT;
645 		} else {
646 			mask = GEN8_RPCS_S_CNT_MASK;
647 			val <<= GEN8_RPCS_S_CNT_SHIFT;
648 		}
649 
650 		GEM_BUG_ON(val & ~mask);
651 		val &= mask;
652 
653 		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
654 	}
655 
656 	if (subslice_pg) {
657 		u32 val = subslices;
658 
659 		val <<= GEN8_RPCS_SS_CNT_SHIFT;
660 
661 		GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
662 		val &= GEN8_RPCS_SS_CNT_MASK;
663 
664 		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
665 	}
666 
667 	if (sseu->has_eu_pg) {
668 		u32 val;
669 
670 		val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
671 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
672 		val &= GEN8_RPCS_EU_MIN_MASK;
673 
674 		rpcs |= val;
675 
676 		val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
677 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
678 		val &= GEN8_RPCS_EU_MAX_MASK;
679 
680 		rpcs |= val;
681 
682 		rpcs |= GEN8_RPCS_ENABLE;
683 	}
684 
685 	return rpcs;
686 }
687 
688 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
689 {
690 	int s;
691 
692 	drm_printf(p, "slice total: %u, mask=%04x\n",
693 		   hweight8(sseu->slice_mask), sseu->slice_mask);
694 	drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
695 	for (s = 0; s < sseu->max_slices; s++) {
696 		drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
697 			   s, intel_sseu_subslices_per_slice(sseu, s),
698 			   intel_sseu_get_subslices(sseu, s));
699 	}
700 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
701 	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
702 	drm_printf(p, "has slice power gating: %s\n",
703 		   yesno(sseu->has_slice_pg));
704 	drm_printf(p, "has subslice power gating: %s\n",
705 		   yesno(sseu->has_subslice_pg));
706 	drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg));
707 }
708 
709 void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
710 			       struct drm_printer *p)
711 {
712 	int s, ss;
713 
714 	if (sseu->max_slices == 0) {
715 		drm_printf(p, "Unavailable\n");
716 		return;
717 	}
718 
719 	for (s = 0; s < sseu->max_slices; s++) {
720 		drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
721 			   s, intel_sseu_subslices_per_slice(sseu, s),
722 			   intel_sseu_get_subslices(sseu, s));
723 
724 		for (ss = 0; ss < sseu->max_subslices; ss++) {
725 			u16 enabled_eus = sseu_get_eus(sseu, s, ss);
726 
727 			drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
728 				   ss, hweight16(enabled_eus), enabled_eus);
729 		}
730 	}
731 }
732 
733 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
734 {
735 	u16 slice_mask = 0;
736 	int i;
737 
738 	WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask));
739 
740 	for (i = 0; dss_mask; i++) {
741 		if (dss_mask & GENMASK(dss_per_slice - 1, 0))
742 			slice_mask |= BIT(i);
743 
744 		dss_mask >>= dss_per_slice;
745 	}
746 
747 	return slice_mask;
748 }
749 
750