xref: /linux/drivers/gpu/drm/i915/gt/intel_sseu.c (revision d27656d02d85078c63f060fca9c5d99794791a75)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include "i915_drv.h"
7 #include "intel_lrc_reg.h"
8 #include "intel_sseu.h"
9 
10 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
11 			 u8 max_subslices, u8 max_eus_per_subslice)
12 {
13 	sseu->max_slices = max_slices;
14 	sseu->max_subslices = max_subslices;
15 	sseu->max_eus_per_subslice = max_eus_per_subslice;
16 
17 	sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
18 	GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
19 	sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
20 	GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
21 }
22 
23 unsigned int
24 intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
25 {
26 	unsigned int i, total = 0;
27 
28 	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
29 		total += hweight8(sseu->subslice_mask[i]);
30 
31 	return total;
32 }
33 
34 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
35 {
36 	int i, offset = slice * sseu->ss_stride;
37 	u32 mask = 0;
38 
39 	GEM_BUG_ON(slice >= sseu->max_slices);
40 
41 	for (i = 0; i < sseu->ss_stride; i++)
42 		mask |= (u32)sseu->subslice_mask[offset + i] <<
43 			i * BITS_PER_BYTE;
44 
45 	return mask;
46 }
47 
48 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
49 			      u8 *subslice_mask, u32 ss_mask)
50 {
51 	int offset = slice * sseu->ss_stride;
52 
53 	memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride);
54 }
55 
56 unsigned int
57 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
58 {
59 	return hweight32(intel_sseu_get_subslices(sseu, slice));
60 }
61 
62 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
63 		       int subslice)
64 {
65 	int slice_stride = sseu->max_subslices * sseu->eu_stride;
66 
67 	return slice * slice_stride + subslice * sseu->eu_stride;
68 }
69 
70 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
71 			int subslice)
72 {
73 	int i, offset = sseu_eu_idx(sseu, slice, subslice);
74 	u16 eu_mask = 0;
75 
76 	for (i = 0; i < sseu->eu_stride; i++)
77 		eu_mask |=
78 			((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
79 
80 	return eu_mask;
81 }
82 
83 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
84 			 u16 eu_mask)
85 {
86 	int i, offset = sseu_eu_idx(sseu, slice, subslice);
87 
88 	for (i = 0; i < sseu->eu_stride; i++)
89 		sseu->eu_mask[offset + i] =
90 			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
91 }
92 
93 static u16 compute_eu_total(const struct sseu_dev_info *sseu)
94 {
95 	u16 i, total = 0;
96 
97 	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
98 		total += hweight8(sseu->eu_mask[i]);
99 
100 	return total;
101 }
102 
103 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
104 {
105 	u32 ss_mask;
106 
107 	ss_mask = ss_en >> (s * sseu->max_subslices);
108 	ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
109 
110 	return ss_mask;
111 }
112 
113 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
114 				    u32 g_ss_en, u32 c_ss_en, u16 eu_en)
115 {
116 	int s, ss;
117 
118 	/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
119 	GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
120 		   sizeof(g_ss_en) * BITS_PER_BYTE);
121 
122 	for (s = 0; s < sseu->max_slices; s++) {
123 		if ((s_en & BIT(s)) == 0)
124 			continue;
125 
126 		sseu->slice_mask |= BIT(s);
127 
128 		/*
129 		 * XeHP introduces the concept of compute vs geometry DSS. To
130 		 * reduce variation between GENs around subslice usage, store a
131 		 * mask for both the geometry and compute enabled masks since
132 		 * userspace will need to be able to query these masks
133 		 * independently.  Also compute a total enabled subslice count
134 		 * for the purposes of selecting subslices to use in a
135 		 * particular GEM context.
136 		 */
137 		intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
138 					 get_ss_stride_mask(sseu, s, c_ss_en));
139 		intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
140 					 get_ss_stride_mask(sseu, s, g_ss_en));
141 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
142 					 get_ss_stride_mask(sseu, s,
143 							    g_ss_en | c_ss_en));
144 
145 		for (ss = 0; ss < sseu->max_subslices; ss++)
146 			if (intel_sseu_has_subslice(sseu, s, ss))
147 				sseu_set_eus(sseu, s, ss, eu_en);
148 	}
149 	sseu->eu_per_subslice = hweight16(eu_en);
150 	sseu->eu_total = compute_eu_total(sseu);
151 }
152 
153 static void gen12_sseu_info_init(struct intel_gt *gt)
154 {
155 	struct sseu_dev_info *sseu = &gt->info.sseu;
156 	struct intel_uncore *uncore = gt->uncore;
157 	u32 g_dss_en, c_dss_en = 0;
158 	u16 eu_en = 0;
159 	u8 eu_en_fuse;
160 	u8 s_en;
161 	int eu;
162 
163 	/*
164 	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
165 	 * Instead of splitting these, provide userspace with an array
166 	 * of DSS to more closely represent the hardware resource.
167 	 *
168 	 * In addition, the concept of slice has been removed in Xe_HP.
169 	 * To be compatible with prior generations, assume a single slice
170 	 * across the entire device. Then calculate out the DSS for each
171 	 * workload type within that software slice.
172 	 */
173 	if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915))
174 		intel_sseu_set_info(sseu, 1, 32, 16);
175 	else
176 		intel_sseu_set_info(sseu, 1, 6, 16);
177 
178 	/*
179 	 * As mentioned above, Xe_HP does not have the concept of a slice.
180 	 * Enable one for software backwards compatibility.
181 	 */
182 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
183 		s_en = 0x1;
184 	else
185 		s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
186 		       GEN11_GT_S_ENA_MASK;
187 
188 	g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
189 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
190 		c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
191 
192 	/* one bit per pair of EUs */
193 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
194 		eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK;
195 	else
196 		eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
197 			       GEN11_EU_DIS_MASK);
198 
199 	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
200 		if (eu_en_fuse & BIT(eu))
201 			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
202 
203 	gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
204 
205 	/* TGL only supports slice-level power gating */
206 	sseu->has_slice_pg = 1;
207 }
208 
209 static void gen11_sseu_info_init(struct intel_gt *gt)
210 {
211 	struct sseu_dev_info *sseu = &gt->info.sseu;
212 	struct intel_uncore *uncore = gt->uncore;
213 	u32 ss_en;
214 	u8 eu_en;
215 	u8 s_en;
216 
217 	if (IS_JSL_EHL(gt->i915))
218 		intel_sseu_set_info(sseu, 1, 4, 8);
219 	else
220 		intel_sseu_set_info(sseu, 1, 8, 8);
221 
222 	s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
223 		GEN11_GT_S_ENA_MASK;
224 	ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE);
225 
226 	eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
227 		  GEN11_EU_DIS_MASK);
228 
229 	gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en);
230 
231 	/* ICL has no power gating restrictions. */
232 	sseu->has_slice_pg = 1;
233 	sseu->has_subslice_pg = 1;
234 	sseu->has_eu_pg = 1;
235 }
236 
237 static void cherryview_sseu_info_init(struct intel_gt *gt)
238 {
239 	struct sseu_dev_info *sseu = &gt->info.sseu;
240 	u32 fuse;
241 	u8 subslice_mask = 0;
242 
243 	fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
244 
245 	sseu->slice_mask = BIT(0);
246 	intel_sseu_set_info(sseu, 1, 2, 8);
247 
248 	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
249 		u8 disabled_mask =
250 			((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
251 			 CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
252 			(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
253 			  CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
254 
255 		subslice_mask |= BIT(0);
256 		sseu_set_eus(sseu, 0, 0, ~disabled_mask);
257 	}
258 
259 	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
260 		u8 disabled_mask =
261 			((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
262 			 CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
263 			(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
264 			  CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
265 
266 		subslice_mask |= BIT(1);
267 		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
268 	}
269 
270 	intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask);
271 
272 	sseu->eu_total = compute_eu_total(sseu);
273 
274 	/*
275 	 * CHV expected to always have a uniform distribution of EU
276 	 * across subslices.
277 	 */
278 	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
279 		sseu->eu_total /
280 		intel_sseu_subslice_total(sseu) :
281 		0;
282 	/*
283 	 * CHV supports subslice power gating on devices with more than
284 	 * one subslice, and supports EU power gating on devices with
285 	 * more than one EU pair per subslice.
286 	 */
287 	sseu->has_slice_pg = 0;
288 	sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
289 	sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
290 }
291 
292 static void gen9_sseu_info_init(struct intel_gt *gt)
293 {
294 	struct drm_i915_private *i915 = gt->i915;
295 	struct intel_device_info *info = mkwrite_device_info(i915);
296 	struct sseu_dev_info *sseu = &gt->info.sseu;
297 	struct intel_uncore *uncore = gt->uncore;
298 	u32 fuse2, eu_disable, subslice_mask;
299 	const u8 eu_mask = 0xff;
300 	int s, ss;
301 
302 	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
303 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
304 
305 	/* BXT has a single slice and at most 3 subslices. */
306 	intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
307 			    IS_GEN9_LP(i915) ? 3 : 4, 8);
308 
309 	/*
310 	 * The subslice disable field is global, i.e. it applies
311 	 * to each of the enabled slices.
312 	 */
313 	subslice_mask = (1 << sseu->max_subslices) - 1;
314 	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
315 			   GEN9_F2_SS_DIS_SHIFT);
316 
317 	/*
318 	 * Iterate through enabled slices and subslices to
319 	 * count the total enabled EU.
320 	 */
321 	for (s = 0; s < sseu->max_slices; s++) {
322 		if (!(sseu->slice_mask & BIT(s)))
323 			/* skip disabled slice */
324 			continue;
325 
326 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
327 					 subslice_mask);
328 
329 		eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
330 		for (ss = 0; ss < sseu->max_subslices; ss++) {
331 			int eu_per_ss;
332 			u8 eu_disabled_mask;
333 
334 			if (!intel_sseu_has_subslice(sseu, s, ss))
335 				/* skip disabled subslice */
336 				continue;
337 
338 			eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask;
339 
340 			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
341 
342 			eu_per_ss = sseu->max_eus_per_subslice -
343 				hweight8(eu_disabled_mask);
344 
345 			/*
346 			 * Record which subslice(s) has(have) 7 EUs. we
347 			 * can tune the hash used to spread work among
348 			 * subslices if they are unbalanced.
349 			 */
350 			if (eu_per_ss == 7)
351 				sseu->subslice_7eu[s] |= BIT(ss);
352 		}
353 	}
354 
355 	sseu->eu_total = compute_eu_total(sseu);
356 
357 	/*
358 	 * SKL is expected to always have a uniform distribution
359 	 * of EU across subslices with the exception that any one
360 	 * EU in any one subslice may be fused off for die
361 	 * recovery. BXT is expected to be perfectly uniform in EU
362 	 * distribution.
363 	 */
364 	sseu->eu_per_subslice =
365 		intel_sseu_subslice_total(sseu) ?
366 		DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
367 		0;
368 
369 	/*
370 	 * SKL+ supports slice power gating on devices with more than
371 	 * one slice, and supports EU power gating on devices with
372 	 * more than one EU pair per subslice. BXT+ supports subslice
373 	 * power gating on devices with more than one subslice, and
374 	 * supports EU power gating on devices with more than one EU
375 	 * pair per subslice.
376 	 */
377 	sseu->has_slice_pg =
378 		!IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1;
379 	sseu->has_subslice_pg =
380 		IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1;
381 	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
382 
383 	if (IS_GEN9_LP(i915)) {
384 #define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask[0] & BIT(ss)))
385 		info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
386 
387 		sseu->min_eu_in_pool = 0;
388 		if (info->has_pooled_eu) {
389 			if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0))
390 				sseu->min_eu_in_pool = 3;
391 			else if (IS_SS_DISABLED(1))
392 				sseu->min_eu_in_pool = 6;
393 			else
394 				sseu->min_eu_in_pool = 9;
395 		}
396 #undef IS_SS_DISABLED
397 	}
398 }
399 
400 static void bdw_sseu_info_init(struct intel_gt *gt)
401 {
402 	struct sseu_dev_info *sseu = &gt->info.sseu;
403 	struct intel_uncore *uncore = gt->uncore;
404 	int s, ss;
405 	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
406 	u32 eu_disable0, eu_disable1, eu_disable2;
407 
408 	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
409 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
410 	intel_sseu_set_info(sseu, 3, 3, 8);
411 
412 	/*
413 	 * The subslice disable field is global, i.e. it applies
414 	 * to each of the enabled slices.
415 	 */
416 	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
417 	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
418 			   GEN8_F2_SS_DIS_SHIFT);
419 	eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0);
420 	eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1);
421 	eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2);
422 	eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK;
423 	eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) |
424 		((eu_disable1 & GEN8_EU_DIS1_S1_MASK) <<
425 		 (32 - GEN8_EU_DIS0_S1_SHIFT));
426 	eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) |
427 		((eu_disable2 & GEN8_EU_DIS2_S2_MASK) <<
428 		 (32 - GEN8_EU_DIS1_S2_SHIFT));
429 
430 	/*
431 	 * Iterate through enabled slices and subslices to
432 	 * count the total enabled EU.
433 	 */
434 	for (s = 0; s < sseu->max_slices; s++) {
435 		if (!(sseu->slice_mask & BIT(s)))
436 			/* skip disabled slice */
437 			continue;
438 
439 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
440 					 subslice_mask);
441 
442 		for (ss = 0; ss < sseu->max_subslices; ss++) {
443 			u8 eu_disabled_mask;
444 			u32 n_disabled;
445 
446 			if (!intel_sseu_has_subslice(sseu, s, ss))
447 				/* skip disabled subslice */
448 				continue;
449 
450 			eu_disabled_mask =
451 				eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
452 
453 			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
454 
455 			n_disabled = hweight8(eu_disabled_mask);
456 
457 			/*
458 			 * Record which subslices have 7 EUs.
459 			 */
460 			if (sseu->max_eus_per_subslice - n_disabled == 7)
461 				sseu->subslice_7eu[s] |= 1 << ss;
462 		}
463 	}
464 
465 	sseu->eu_total = compute_eu_total(sseu);
466 
467 	/*
468 	 * BDW is expected to always have a uniform distribution of EU across
469 	 * subslices with the exception that any one EU in any one subslice may
470 	 * be fused off for die recovery.
471 	 */
472 	sseu->eu_per_subslice =
473 		intel_sseu_subslice_total(sseu) ?
474 		DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
475 		0;
476 
477 	/*
478 	 * BDW supports slice power gating on devices with more than
479 	 * one slice.
480 	 */
481 	sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1;
482 	sseu->has_subslice_pg = 0;
483 	sseu->has_eu_pg = 0;
484 }
485 
486 static void hsw_sseu_info_init(struct intel_gt *gt)
487 {
488 	struct drm_i915_private *i915 = gt->i915;
489 	struct sseu_dev_info *sseu = &gt->info.sseu;
490 	u32 fuse1;
491 	u8 subslice_mask = 0;
492 	int s, ss;
493 
494 	/*
495 	 * There isn't a register to tell us how many slices/subslices. We
496 	 * work off the PCI-ids here.
497 	 */
498 	switch (INTEL_INFO(i915)->gt) {
499 	default:
500 		MISSING_CASE(INTEL_INFO(i915)->gt);
501 		fallthrough;
502 	case 1:
503 		sseu->slice_mask = BIT(0);
504 		subslice_mask = BIT(0);
505 		break;
506 	case 2:
507 		sseu->slice_mask = BIT(0);
508 		subslice_mask = BIT(0) | BIT(1);
509 		break;
510 	case 3:
511 		sseu->slice_mask = BIT(0) | BIT(1);
512 		subslice_mask = BIT(0) | BIT(1);
513 		break;
514 	}
515 
516 	fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
517 	switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) {
518 	default:
519 		MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1));
520 		fallthrough;
521 	case HSW_F1_EU_DIS_10EUS:
522 		sseu->eu_per_subslice = 10;
523 		break;
524 	case HSW_F1_EU_DIS_8EUS:
525 		sseu->eu_per_subslice = 8;
526 		break;
527 	case HSW_F1_EU_DIS_6EUS:
528 		sseu->eu_per_subslice = 6;
529 		break;
530 	}
531 
532 	intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
533 			    hweight8(subslice_mask),
534 			    sseu->eu_per_subslice);
535 
536 	for (s = 0; s < sseu->max_slices; s++) {
537 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
538 					 subslice_mask);
539 
540 		for (ss = 0; ss < sseu->max_subslices; ss++) {
541 			sseu_set_eus(sseu, s, ss,
542 				     (1UL << sseu->eu_per_subslice) - 1);
543 		}
544 	}
545 
546 	sseu->eu_total = compute_eu_total(sseu);
547 
548 	/* No powergating for you. */
549 	sseu->has_slice_pg = 0;
550 	sseu->has_subslice_pg = 0;
551 	sseu->has_eu_pg = 0;
552 }
553 
554 void intel_sseu_info_init(struct intel_gt *gt)
555 {
556 	struct drm_i915_private *i915 = gt->i915;
557 
558 	if (IS_HASWELL(i915))
559 		hsw_sseu_info_init(gt);
560 	else if (IS_CHERRYVIEW(i915))
561 		cherryview_sseu_info_init(gt);
562 	else if (IS_BROADWELL(i915))
563 		bdw_sseu_info_init(gt);
564 	else if (GRAPHICS_VER(i915) == 9)
565 		gen9_sseu_info_init(gt);
566 	else if (GRAPHICS_VER(i915) == 11)
567 		gen11_sseu_info_init(gt);
568 	else if (GRAPHICS_VER(i915) >= 12)
569 		gen12_sseu_info_init(gt);
570 }
571 
572 u32 intel_sseu_make_rpcs(struct intel_gt *gt,
573 			 const struct intel_sseu *req_sseu)
574 {
575 	struct drm_i915_private *i915 = gt->i915;
576 	const struct sseu_dev_info *sseu = &gt->info.sseu;
577 	bool subslice_pg = sseu->has_subslice_pg;
578 	u8 slices, subslices;
579 	u32 rpcs = 0;
580 
581 	/*
582 	 * No explicit RPCS request is needed to ensure full
583 	 * slice/subslice/EU enablement prior to Gen9.
584 	 */
585 	if (GRAPHICS_VER(i915) < 9)
586 		return 0;
587 
588 	/*
589 	 * If i915/perf is active, we want a stable powergating configuration
590 	 * on the system. Use the configuration pinned by i915/perf.
591 	 */
592 	if (i915->perf.exclusive_stream)
593 		req_sseu = &i915->perf.sseu;
594 
595 	slices = hweight8(req_sseu->slice_mask);
596 	subslices = hweight8(req_sseu->subslice_mask);
597 
598 	/*
599 	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
600 	 * wide and Icelake has up to eight subslices, specfial programming is
601 	 * needed in order to correctly enable all subslices.
602 	 *
603 	 * According to documentation software must consider the configuration
604 	 * as 2x4x8 and hardware will translate this to 1x8x8.
605 	 *
606 	 * Furthemore, even though SScount is three bits, maximum documented
607 	 * value for it is four. From this some rules/restrictions follow:
608 	 *
609 	 * 1.
610 	 * If enabled subslice count is greater than four, two whole slices must
611 	 * be enabled instead.
612 	 *
613 	 * 2.
614 	 * When more than one slice is enabled, hardware ignores the subslice
615 	 * count altogether.
616 	 *
617 	 * From these restrictions it follows that it is not possible to enable
618 	 * a count of subslices between the SScount maximum of four restriction,
619 	 * and the maximum available number on a particular SKU. Either all
620 	 * subslices are enabled, or a count between one and four on the first
621 	 * slice.
622 	 */
623 	if (GRAPHICS_VER(i915) == 11 &&
624 	    slices == 1 &&
625 	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
626 		GEM_BUG_ON(subslices & 1);
627 
628 		subslice_pg = false;
629 		slices *= 2;
630 	}
631 
632 	/*
633 	 * Starting in Gen9, render power gating can leave
634 	 * slice/subslice/EU in a partially enabled state. We
635 	 * must make an explicit request through RPCS for full
636 	 * enablement.
637 	 */
638 	if (sseu->has_slice_pg) {
639 		u32 mask, val = slices;
640 
641 		if (GRAPHICS_VER(i915) >= 11) {
642 			mask = GEN11_RPCS_S_CNT_MASK;
643 			val <<= GEN11_RPCS_S_CNT_SHIFT;
644 		} else {
645 			mask = GEN8_RPCS_S_CNT_MASK;
646 			val <<= GEN8_RPCS_S_CNT_SHIFT;
647 		}
648 
649 		GEM_BUG_ON(val & ~mask);
650 		val &= mask;
651 
652 		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
653 	}
654 
655 	if (subslice_pg) {
656 		u32 val = subslices;
657 
658 		val <<= GEN8_RPCS_SS_CNT_SHIFT;
659 
660 		GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
661 		val &= GEN8_RPCS_SS_CNT_MASK;
662 
663 		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
664 	}
665 
666 	if (sseu->has_eu_pg) {
667 		u32 val;
668 
669 		val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
670 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
671 		val &= GEN8_RPCS_EU_MIN_MASK;
672 
673 		rpcs |= val;
674 
675 		val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
676 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
677 		val &= GEN8_RPCS_EU_MAX_MASK;
678 
679 		rpcs |= val;
680 
681 		rpcs |= GEN8_RPCS_ENABLE;
682 	}
683 
684 	return rpcs;
685 }
686 
687 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
688 {
689 	int s;
690 
691 	drm_printf(p, "slice total: %u, mask=%04x\n",
692 		   hweight8(sseu->slice_mask), sseu->slice_mask);
693 	drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
694 	for (s = 0; s < sseu->max_slices; s++) {
695 		drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
696 			   s, intel_sseu_subslices_per_slice(sseu, s),
697 			   intel_sseu_get_subslices(sseu, s));
698 	}
699 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
700 	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
701 	drm_printf(p, "has slice power gating: %s\n",
702 		   yesno(sseu->has_slice_pg));
703 	drm_printf(p, "has subslice power gating: %s\n",
704 		   yesno(sseu->has_subslice_pg));
705 	drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg));
706 }
707 
708 void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
709 			       struct drm_printer *p)
710 {
711 	int s, ss;
712 
713 	if (sseu->max_slices == 0) {
714 		drm_printf(p, "Unavailable\n");
715 		return;
716 	}
717 
718 	for (s = 0; s < sseu->max_slices; s++) {
719 		drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
720 			   s, intel_sseu_subslices_per_slice(sseu, s),
721 			   intel_sseu_get_subslices(sseu, s));
722 
723 		for (ss = 0; ss < sseu->max_subslices; ss++) {
724 			u16 enabled_eus = sseu_get_eus(sseu, s, ss);
725 
726 			drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
727 				   ss, hweight16(enabled_eus), enabled_eus);
728 		}
729 	}
730 }
731 
732 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
733 {
734 	u16 slice_mask = 0;
735 	int i;
736 
737 	WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask));
738 
739 	for (i = 0; dss_mask; i++) {
740 		if (dss_mask & GENMASK(dss_per_slice - 1, 0))
741 			slice_mask |= BIT(i);
742 
743 		dss_mask >>= dss_per_slice;
744 	}
745 
746 	return slice_mask;
747 }
748 
749