xref: /linux/drivers/gpu/drm/xe/xe_gt_topology.c (revision f6e8dc9edf963dbc99085e54f6ced6da9daa6100)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_gt_topology.h"
7 
8 #include <generated/xe_wa_oob.h>
9 #include <linux/bitmap.h>
10 #include <linux/compiler.h>
11 
12 #include "regs/xe_gt_regs.h"
13 #include "xe_assert.h"
14 #include "xe_gt.h"
15 #include "xe_gt_mcr.h"
16 #include "xe_gt_printk.h"
17 #include "xe_mmio.h"
18 #include "xe_wa.h"
19 
20 static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs,
21 			  const struct xe_reg regs[])
22 {
23 	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
24 	int i;
25 
26 	xe_gt_assert(gt, numregs <= ARRAY_SIZE(fuse_val));
27 
28 	for (i = 0; i < numregs; i++)
29 		fuse_val[i] = xe_mmio_read32(&gt->mmio, regs[i]);
30 
31 	bitmap_from_arr32(mask, fuse_val, numregs * 32);
32 }
33 
34 static void
35 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
36 {
37 	struct xe_device *xe = gt_to_xe(gt);
38 	u32 reg_val = xe_mmio_read32(&gt->mmio, XELP_EU_ENABLE);
39 	u32 val = 0;
40 	int i;
41 
42 	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
43 
44 	/*
45 	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
46 	 * of enable).
47 	 */
48 	if (GRAPHICS_VERx100(xe) < 1250)
49 		reg_val = ~reg_val & XELP_EU_MASK;
50 
51 	if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
52 		/* SIMD16 EUs, one bit == one EU */
53 		*eu_type = XE_GT_EU_TYPE_SIMD16;
54 		val = reg_val;
55 	} else {
56 		/* SIMD8 EUs, one bit == 2 EU */
57 		*eu_type = XE_GT_EU_TYPE_SIMD8;
58 		for (i = 0; i < fls(reg_val); i++)
59 			if (reg_val & BIT(i))
60 				val |= 0x3 << 2 * i;
61 	}
62 
63 	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
64 }
65 
66 /**
67  * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
68  *
69  * It is used to compute the L3 bank masks in a generic format on
70  * various platforms where the internal representation of L3 node
71  * and masks from registers are different.
72  *
73  * @xe: device
74  * @dst: destination
75  * @pattern: pattern to replicate
76  * @patternbits: size of the pattern, in bits
77  * @mask: mask describing where to replicate the pattern
78  *
79  * Example 1:
80  * ----------
81  * @pattern =    0b1111
82  *                 └┬─┘
83  * @patternbits =   4 (bits)
84  * @mask = 0b0101
85  *           ││││
86  *           │││└────────────────── 0b1111 (=1×0b1111)
87  *           ││└──────────── 0b0000    │   (=0×0b1111)
88  *           │└────── 0b1111    │      │   (=1×0b1111)
89  *           └ 0b0000    │      │      │   (=0×0b1111)
90  *                │      │      │      │
91  * @dst =      0b0000 0b1111 0b0000 0b1111
92  *
93  * Example 2:
94  * ----------
95  * @pattern =    0b11111111
96  *                 └┬─────┘
97  * @patternbits =   8 (bits)
98  * @mask = 0b10
99  *           ││
100  *           ││
101  *           ││
102  *           │└────────── 0b00000000 (=0×0b11111111)
103  *           └ 0b11111111      │     (=1×0b11111111)
104  *                  │          │
105  * @dst =      0b11111111 0b00000000
106  */
107 static void
108 gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
109 			 xe_l3_bank_mask_t pattern, int patternbits,
110 			 unsigned long mask)
111 {
112 	unsigned long bit;
113 
114 	xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits ||
115 		  bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS));
116 	xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS);
117 	for_each_set_bit(bit, &mask, 32) {
118 		xe_l3_bank_mask_t shifted_pattern = {};
119 
120 		bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
121 				  XE_MAX_L3_BANK_MASK_BITS);
122 		bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
123 	}
124 }
125 
126 bool xe_gt_topology_report_l3(struct xe_gt *gt)
127 {
128 	/*
129 	 * No known userspace needs/uses the L3 bank mask reported by
130 	 * the media GT, and the hardware itself is known to report bogus
131 	 * values on several platforms.  Only report L3 bank mask as part
132 	 * of the media GT's topology on pre-Xe3 platforms since that's
133 	 * already part of our ABI.
134 	 */
135 	if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) >= 30)
136 		return false;
137 
138 	return true;
139 }
140 
141 static void
142 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
143 {
144 	struct xe_device *xe = gt_to_xe(gt);
145 	struct xe_mmio *mmio = &gt->mmio;
146 	u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3);
147 
148 	if (!xe_gt_topology_report_l3(gt))
149 		return;
150 
151 	if (GRAPHICS_VER(xe) >= 35) {
152 		u32 fuse_val = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE);
153 
154 		bitmap_from_arr32(l3_bank_mask, &fuse_val, 32);
155 	} else if (GRAPHICS_VER(xe) >= 30) {
156 		xe_l3_bank_mask_t per_node = {};
157 		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
158 		u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE);
159 		u32 bank_val = REG_FIELD_GET(XE3_L3BANK_ENABLE, mirror_l3bank_enable);
160 
161 		bitmap_from_arr32(per_node, &bank_val, 32);
162 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 32,
163 					 meml3_en);
164 	} else if (GRAPHICS_VER(xe) >= 20) {
165 		xe_l3_bank_mask_t per_node = {};
166 		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
167 		u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
168 
169 		bitmap_from_arr32(per_node, &bank_val, 32);
170 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
171 					 meml3_en);
172 	} else if (GRAPHICS_VERx100(xe) >= 1270) {
173 		xe_l3_bank_mask_t per_node = {};
174 		xe_l3_bank_mask_t per_mask_bit = {};
175 		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
176 		u32 fuse4 = xe_mmio_read32(mmio, XEHP_FUSE4);
177 		u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
178 
179 		bitmap_set_value8(per_mask_bit, 0x3, 0);
180 		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
181 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
182 					 meml3_en);
183 	} else if (xe->info.platform == XE_PVC) {
184 		xe_l3_bank_mask_t per_node = {};
185 		xe_l3_bank_mask_t per_mask_bit = {};
186 		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
187 		u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
188 
189 		bitmap_set_value8(per_mask_bit, 0xf, 0);
190 		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
191 					 bank_val);
192 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
193 					 meml3_en);
194 	} else if (xe->info.platform == XE_DG2) {
195 		xe_l3_bank_mask_t per_node = {};
196 		u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
197 
198 		bitmap_set_value8(per_node, 0xff, 0);
199 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
200 	} else {
201 		/* 1:1 register bit to mask bit (inverted register bits) */
202 		u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
203 
204 		bitmap_from_arr32(l3_bank_mask, &mask, 32);
205 	}
206 }
207 
208 static void
209 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
210 {
211 	if (GRAPHICS_VER(xe) > 20) {
212 		*geometry_regs = 3;
213 		*compute_regs = 3;
214 	} else if (GRAPHICS_VERx100(xe) == 1260) {
215 		*geometry_regs = 0;
216 		*compute_regs = 2;
217 	} else if (GRAPHICS_VERx100(xe) >= 1250) {
218 		*geometry_regs = 1;
219 		*compute_regs = 1;
220 	} else {
221 		*geometry_regs = 1;
222 		*compute_regs = 0;
223 	}
224 }
225 
226 void
227 xe_gt_topology_init(struct xe_gt *gt)
228 {
229 	static const struct xe_reg geometry_regs[] = {
230 		XELP_GT_GEOMETRY_DSS_ENABLE,
231 		XE2_GT_GEOMETRY_DSS_1,
232 		XE2_GT_GEOMETRY_DSS_2,
233 	};
234 	static const struct xe_reg compute_regs[] = {
235 		XEHP_GT_COMPUTE_DSS_ENABLE,
236 		XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
237 		XE2_GT_COMPUTE_DSS_2,
238 	};
239 	int num_geometry_regs, num_compute_regs;
240 	struct xe_device *xe = gt_to_xe(gt);
241 	struct drm_printer p;
242 
243 	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
244 
245 	/*
246 	 * Register counts returned shouldn't exceed the number of registers
247 	 * passed as parameters below.
248 	 */
249 	xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs));
250 	xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs));
251 
252 	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
253 		      num_geometry_regs, geometry_regs);
254 	load_dss_mask(gt, gt->fuse_topo.c_dss_mask,
255 		      num_compute_regs, compute_regs);
256 
257 	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_type);
258 	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
259 
260 	p = xe_gt_dbg_printer(gt);
261 	xe_gt_topology_dump(gt, &p);
262 }
263 
264 static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
265 {
266 	switch (eu_type) {
267 	case XE_GT_EU_TYPE_SIMD16:
268 		return "simd16";
269 	case XE_GT_EU_TYPE_SIMD8:
270 		return "simd8";
271 	}
272 
273 	return NULL;
274 }
275 
276 /**
277  * xe_gt_topology_dump() - Dump GT topology into a drm printer.
278  * @gt: the &xe_gt
279  * @p: the &drm_printer
280  *
281  * Return: always 0.
282  */
283 int xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
284 {
285 	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
286 		   gt->fuse_topo.g_dss_mask);
287 	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
288 		   gt->fuse_topo.c_dss_mask);
289 
290 	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
291 		   gt->fuse_topo.eu_mask_per_dss);
292 	drm_printf(p, "EU type:             %s\n",
293 		   eu_type_to_str(gt->fuse_topo.eu_type));
294 
295 	if (xe_gt_topology_report_l3(gt))
296 		drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
297 			   gt->fuse_topo.l3_bank_mask);
298 	return 0;
299 }
300 
301 /*
302  * Used to obtain the index of the first DSS.  Can start searching from the
303  * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
304  * groupsize and groupnum are non-zero.
305  */
306 unsigned int
307 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
308 {
309 	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
310 }
311 
312 /**
313  * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
314  * @gt: GT to check
315  * @quad: Which quadrant of the DSS space to check
316  *
317  * Since Xe_HP platforms can have up to four CCS engines, those engines
318  * are each logically associated with a quarter of the possible DSS.  If there
319  * are no DSS present in one of the four quadrants of the DSS space, the
320  * corresponding CCS engine is also not available for use.
321  *
322  * Returns false if all DSS in a quadrant of the GT are fused off, else true.
323  */
324 bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
325 {
326 	struct xe_device *xe = gt_to_xe(gt);
327 	xe_dss_mask_t all_dss;
328 	int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
329 
330 	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
331 		  XE_MAX_DSS_FUSE_BITS);
332 
333 	get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
334 	dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
335 
336 	quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
337 
338 	return quad_first < (quad + 1) * dss_per_quad;
339 }
340 
341 bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss)
342 {
343 	return test_bit(dss, gt->fuse_topo.g_dss_mask);
344 }
345 
346 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
347 {
348 	return test_bit(dss, gt->fuse_topo.c_dss_mask);
349 }
350 
351 bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt)
352 {
353 	unsigned int xecore;
354 	int last_group = -1;
355 	u16 group, instance;
356 
357 	for_each_dss_steering(xecore, gt, group, instance) {
358 		if (last_group != group) {
359 			if (group - last_group > 1)
360 				return true;
361 			last_group = group;
362 		}
363 	}
364 	return false;
365 }
366