xref: /linux/drivers/gpu/drm/xe/xe_gt_topology.c (revision 79d2e1919a2728ef49d938eb20ebd5903c14dfb0)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_gt_topology.h"
7 
8 #include <generated/xe_wa_oob.h>
9 #include <linux/bitmap.h>
10 #include <linux/compiler.h>
11 
12 #include "regs/xe_gt_regs.h"
13 #include "xe_assert.h"
14 #include "xe_gt.h"
15 #include "xe_mmio.h"
16 #include "xe_wa.h"
17 
18 static void
19 load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
20 {
21 	va_list argp;
22 	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
23 	int i;
24 
25 	if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
26 		numregs = XE_MAX_DSS_FUSE_REGS;
27 
28 	va_start(argp, numregs);
29 	for (i = 0; i < numregs; i++)
30 		fuse_val[i] = xe_mmio_read32(&gt->mmio, va_arg(argp, struct xe_reg));
31 	va_end(argp);
32 
33 	bitmap_from_arr32(mask, fuse_val, numregs * 32);
34 }
35 
36 static void
37 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
38 {
39 	struct xe_device *xe = gt_to_xe(gt);
40 	u32 reg_val = xe_mmio_read32(&gt->mmio, XELP_EU_ENABLE);
41 	u32 val = 0;
42 	int i;
43 
44 	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
45 
46 	/*
47 	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
48 	 * of enable).
49 	 */
50 	if (GRAPHICS_VERx100(xe) < 1250)
51 		reg_val = ~reg_val & XELP_EU_MASK;
52 
53 	if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
54 		/* SIMD16 EUs, one bit == one EU */
55 		*eu_type = XE_GT_EU_TYPE_SIMD16;
56 		val = reg_val;
57 	} else {
58 		/* SIMD8 EUs, one bit == 2 EU */
59 		*eu_type = XE_GT_EU_TYPE_SIMD8;
60 		for (i = 0; i < fls(reg_val); i++)
61 			if (reg_val & BIT(i))
62 				val |= 0x3 << 2 * i;
63 	}
64 
65 	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
66 }
67 
68 /**
69  * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
70  *
71  * It is used to compute the L3 bank masks in a generic format on
72  * various platforms where the internal representation of L3 node
73  * and masks from registers are different.
74  *
75  * @xe: device
76  * @dst: destination
77  * @pattern: pattern to replicate
78  * @patternbits: size of the pattern, in bits
79  * @mask: mask describing where to replicate the pattern
80  *
81  * Example 1:
82  * ----------
83  * @pattern =    0b1111
84  *                 └┬─┘
85  * @patternbits =   4 (bits)
86  * @mask = 0b0101
87  *           ││││
88  *           │││└────────────────── 0b1111 (=1×0b1111)
89  *           ││└──────────── 0b0000    │   (=0×0b1111)
90  *           │└────── 0b1111    │      │   (=1×0b1111)
91  *           └ 0b0000    │      │      │   (=0×0b1111)
92  *                │      │      │      │
93  * @dst =      0b0000 0b1111 0b0000 0b1111
94  *
95  * Example 2:
96  * ----------
97  * @pattern =    0b11111111
98  *                 └┬─────┘
99  * @patternbits =   8 (bits)
100  * @mask = 0b10
101  *           ││
102  *           ││
103  *           ││
104  *           │└────────── 0b00000000 (=0×0b11111111)
105  *           └ 0b11111111      │     (=1×0b11111111)
106  *                  │          │
107  * @dst =      0b11111111 0b00000000
108  */
109 static void
110 gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
111 			 xe_l3_bank_mask_t pattern, int patternbits,
112 			 unsigned long mask)
113 {
114 	unsigned long bit;
115 
116 	xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits ||
117 		  bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS));
118 	xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS);
119 	for_each_set_bit(bit, &mask, 32) {
120 		xe_l3_bank_mask_t shifted_pattern = {};
121 
122 		bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
123 				  XE_MAX_L3_BANK_MASK_BITS);
124 		bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
125 	}
126 }
127 
128 static void
129 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
130 {
131 	struct xe_device *xe = gt_to_xe(gt);
132 	u32 fuse3 = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
133 
134 	/*
135 	 * PTL platforms with media version 30.00 do not provide proper values
136 	 * for the media GT's L3 bank registers.  Skip the readout since we
137 	 * don't have any way to obtain real values.
138 	 *
139 	 * This may get re-described as an official workaround in the future,
140 	 * but there's no tracking number assigned yet so we use a custom
141 	 * OOB workaround descriptor.
142 	 */
143 	if (XE_WA(gt, no_media_l3))
144 		return;
145 
146 	if (GRAPHICS_VER(xe) >= 20) {
147 		xe_l3_bank_mask_t per_node = {};
148 		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
149 		u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
150 
151 		bitmap_from_arr32(per_node, &bank_val, 32);
152 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
153 					 meml3_en);
154 	} else if (GRAPHICS_VERx100(xe) >= 1270) {
155 		xe_l3_bank_mask_t per_node = {};
156 		xe_l3_bank_mask_t per_mask_bit = {};
157 		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
158 		u32 fuse4 = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
159 		u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
160 
161 		bitmap_set_value8(per_mask_bit, 0x3, 0);
162 		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
163 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
164 					 meml3_en);
165 	} else if (xe->info.platform == XE_PVC) {
166 		xe_l3_bank_mask_t per_node = {};
167 		xe_l3_bank_mask_t per_mask_bit = {};
168 		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
169 		u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
170 
171 		bitmap_set_value8(per_mask_bit, 0xf, 0);
172 		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
173 					 bank_val);
174 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
175 					 meml3_en);
176 	} else if (xe->info.platform == XE_DG2) {
177 		xe_l3_bank_mask_t per_node = {};
178 		u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
179 
180 		bitmap_set_value8(per_node, 0xff, 0);
181 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
182 	} else {
183 		/* 1:1 register bit to mask bit (inverted register bits) */
184 		u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
185 
186 		bitmap_from_arr32(l3_bank_mask, &mask, 32);
187 	}
188 }
189 
190 static void
191 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
192 {
193 	if (GRAPHICS_VER(xe) > 20) {
194 		*geometry_regs = 3;
195 		*compute_regs = 3;
196 	} else if (GRAPHICS_VERx100(xe) == 1260) {
197 		*geometry_regs = 0;
198 		*compute_regs = 2;
199 	} else if (GRAPHICS_VERx100(xe) >= 1250) {
200 		*geometry_regs = 1;
201 		*compute_regs = 1;
202 	} else {
203 		*geometry_regs = 1;
204 		*compute_regs = 0;
205 	}
206 }
207 
208 void
209 xe_gt_topology_init(struct xe_gt *gt)
210 {
211 	struct xe_device *xe = gt_to_xe(gt);
212 	struct drm_printer p;
213 	int num_geometry_regs, num_compute_regs;
214 
215 	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
216 
217 	/*
218 	 * Register counts returned shouldn't exceed the number of registers
219 	 * passed as parameters below.
220 	 */
221 	drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
222 	drm_WARN_ON(&xe->drm, num_compute_regs > 3);
223 
224 	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
225 		      num_geometry_regs,
226 		      XELP_GT_GEOMETRY_DSS_ENABLE,
227 		      XE2_GT_GEOMETRY_DSS_1,
228 		      XE2_GT_GEOMETRY_DSS_2);
229 	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
230 		      XEHP_GT_COMPUTE_DSS_ENABLE,
231 		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
232 		      XE2_GT_COMPUTE_DSS_2);
233 	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_type);
234 	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
235 
236 	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
237 
238 	xe_gt_topology_dump(gt, &p);
239 }
240 
241 static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
242 {
243 	switch (eu_type) {
244 	case XE_GT_EU_TYPE_SIMD16:
245 		return "simd16";
246 	case XE_GT_EU_TYPE_SIMD8:
247 		return "simd8";
248 	}
249 
250 	return NULL;
251 }
252 
253 void
254 xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
255 {
256 	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
257 		   gt->fuse_topo.g_dss_mask);
258 	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
259 		   gt->fuse_topo.c_dss_mask);
260 
261 	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
262 		   gt->fuse_topo.eu_mask_per_dss);
263 	drm_printf(p, "EU type:             %s\n",
264 		   eu_type_to_str(gt->fuse_topo.eu_type));
265 
266 	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
267 		   gt->fuse_topo.l3_bank_mask);
268 }
269 
270 /*
271  * Used to obtain the index of the first DSS.  Can start searching from the
272  * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
273  * groupsize and groupnum are non-zero.
274  */
275 unsigned int
276 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
277 {
278 	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
279 }
280 
281 bool xe_dss_mask_empty(const xe_dss_mask_t mask)
282 {
283 	return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
284 }
285 
286 /**
287  * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
288  * @gt: GT to check
289  * @quad: Which quadrant of the DSS space to check
290  *
291  * Since Xe_HP platforms can have up to four CCS engines, those engines
292  * are each logically associated with a quarter of the possible DSS.  If there
293  * are no DSS present in one of the four quadrants of the DSS space, the
294  * corresponding CCS engine is also not available for use.
295  *
296  * Returns false if all DSS in a quadrant of the GT are fused off, else true.
297  */
298 bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
299 {
300 	struct xe_device *xe = gt_to_xe(gt);
301 	xe_dss_mask_t all_dss;
302 	int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
303 
304 	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
305 		  XE_MAX_DSS_FUSE_BITS);
306 
307 	get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
308 	dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
309 
310 	quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
311 
312 	return quad_first < (quad + 1) * dss_per_quad;
313 }
314 
315 bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss)
316 {
317 	return test_bit(dss, gt->fuse_topo.g_dss_mask);
318 }
319 
320 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
321 {
322 	return test_bit(dss, gt->fuse_topo.c_dss_mask);
323 }
324