xref: /linux/drivers/gpu/drm/xe/xe_gt_topology.c (revision a9aaf1ff88a8cb99a1335c9eb76de637f0cf8c10)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_gt_topology.h"
7 
8 #include <linux/bitmap.h>
9 
10 #include "regs/xe_gt_regs.h"
11 #include "xe_assert.h"
12 #include "xe_gt.h"
13 #include "xe_mmio.h"
14 
15 static void
16 load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
17 {
18 	va_list argp;
19 	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
20 	int i;
21 
22 	if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
23 		numregs = XE_MAX_DSS_FUSE_REGS;
24 
25 	va_start(argp, numregs);
26 	for (i = 0; i < numregs; i++)
27 		fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
28 	va_end(argp);
29 
30 	bitmap_from_arr32(mask, fuse_val, numregs * 32);
31 }
32 
33 static void
34 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
35 {
36 	struct xe_device *xe = gt_to_xe(gt);
37 	u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
38 	u32 val = 0;
39 	int i;
40 
41 	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
42 
43 	/*
44 	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
45 	 * of enable).
46 	 */
47 	if (GRAPHICS_VERx100(xe) < 1250)
48 		reg_val = ~reg_val & XELP_EU_MASK;
49 
50 	/* On PVC, one bit = one EU */
51 	if (GRAPHICS_VERx100(xe) == 1260) {
52 		val = reg_val;
53 	} else {
54 		/* All other platforms, one bit = 2 EU */
55 		for (i = 0; i < fls(reg_val); i++)
56 			if (reg_val & BIT(i))
57 				val |= 0x3 << 2 * i;
58 	}
59 
60 	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
61 }
62 
63 /**
64  * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
65  *
66  * It is used to compute the L3 bank masks in a generic format on
67  * various platforms where the internal representation of L3 node
68  * and masks from registers are different.
69  *
70  * @xe: device
71  * @dst: destination
72  * @pattern: pattern to replicate
73  * @patternbits: size of the pattern, in bits
74  * @mask: mask describing where to replicate the pattern
75  *
76  * Example 1:
77  * ----------
78  * @pattern =    0b1111
79  *                 └┬─┘
80  * @patternbits =   4 (bits)
81  * @mask = 0b0101
82  *           ││││
83  *           │││└────────────────── 0b1111 (=1×0b1111)
84  *           ││└──────────── 0b0000    │   (=0×0b1111)
85  *           │└────── 0b1111    │      │   (=1×0b1111)
86  *           └ 0b0000    │      │      │   (=0×0b1111)
87  *                │      │      │      │
88  * @dst =      0b0000 0b1111 0b0000 0b1111
89  *
90  * Example 2:
91  * ----------
92  * @pattern =    0b11111111
93  *                 └┬─────┘
94  * @patternbits =   8 (bits)
95  * @mask = 0b10
96  *           ││
97  *           ││
98  *           ││
99  *           │└────────── 0b00000000 (=0×0b11111111)
100  *           └ 0b11111111      │     (=1×0b11111111)
101  *                  │          │
102  * @dst =      0b11111111 0b00000000
103  */
104 static void
105 gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
106 			 xe_l3_bank_mask_t pattern, int patternbits,
107 			 unsigned long mask)
108 {
109 	unsigned long bit;
110 
111 	xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits ||
112 		  bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS));
113 	xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS);
114 	for_each_set_bit(bit, &mask, 32) {
115 		xe_l3_bank_mask_t shifted_pattern = {};
116 
117 		bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
118 				  XE_MAX_L3_BANK_MASK_BITS);
119 		bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
120 	}
121 }
122 
123 static void
124 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
125 {
126 	struct xe_device *xe = gt_to_xe(gt);
127 	u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3);
128 
129 	if (GRAPHICS_VER(xe) >= 20) {
130 		xe_l3_bank_mask_t per_node = {};
131 		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
132 		u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
133 
134 		bitmap_from_arr32(per_node, &bank_val, 32);
135 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
136 					 meml3_en);
137 	} else if (GRAPHICS_VERx100(xe) >= 1270) {
138 		xe_l3_bank_mask_t per_node = {};
139 		xe_l3_bank_mask_t per_mask_bit = {};
140 		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
141 		u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4);
142 		u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
143 
144 		bitmap_set_value8(per_mask_bit, 0x3, 0);
145 		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
146 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
147 					 meml3_en);
148 	} else if (xe->info.platform == XE_PVC) {
149 		xe_l3_bank_mask_t per_node = {};
150 		xe_l3_bank_mask_t per_mask_bit = {};
151 		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
152 		u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
153 
154 		bitmap_set_value8(per_mask_bit, 0xf, 0);
155 		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
156 					 bank_val);
157 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
158 					 meml3_en);
159 	} else if (xe->info.platform == XE_DG2) {
160 		xe_l3_bank_mask_t per_node = {};
161 		u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
162 
163 		bitmap_set_value8(per_node, 0xff, 0);
164 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
165 	} else {
166 		/* 1:1 register bit to mask bit (inverted register bits) */
167 		u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
168 
169 		bitmap_from_arr32(l3_bank_mask, &mask, 32);
170 	}
171 }
172 
173 static void
174 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
175 {
176 	if (GRAPHICS_VER(xe) > 20) {
177 		*geometry_regs = 3;
178 		*compute_regs = 3;
179 	} else if (GRAPHICS_VERx100(xe) == 1260) {
180 		*geometry_regs = 0;
181 		*compute_regs = 2;
182 	} else if (GRAPHICS_VERx100(xe) >= 1250) {
183 		*geometry_regs = 1;
184 		*compute_regs = 1;
185 	} else {
186 		*geometry_regs = 1;
187 		*compute_regs = 0;
188 	}
189 }
190 
191 void
192 xe_gt_topology_init(struct xe_gt *gt)
193 {
194 	struct xe_device *xe = gt_to_xe(gt);
195 	struct drm_printer p;
196 	int num_geometry_regs, num_compute_regs;
197 
198 	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
199 
200 	/*
201 	 * Register counts returned shouldn't exceed the number of registers
202 	 * passed as parameters below.
203 	 */
204 	drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
205 	drm_WARN_ON(&xe->drm, num_compute_regs > 3);
206 
207 	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
208 		      num_geometry_regs,
209 		      XELP_GT_GEOMETRY_DSS_ENABLE,
210 		      XE2_GT_GEOMETRY_DSS_1,
211 		      XE2_GT_GEOMETRY_DSS_2);
212 	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
213 		      XEHP_GT_COMPUTE_DSS_ENABLE,
214 		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
215 		      XE2_GT_COMPUTE_DSS_2);
216 	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
217 	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
218 
219 	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
220 
221 	xe_gt_topology_dump(gt, &p);
222 }
223 
224 void
225 xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
226 {
227 	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
228 		   gt->fuse_topo.g_dss_mask);
229 	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
230 		   gt->fuse_topo.c_dss_mask);
231 
232 	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
233 		   gt->fuse_topo.eu_mask_per_dss);
234 
235 	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
236 		   gt->fuse_topo.l3_bank_mask);
237 }
238 
239 /*
240  * Used to obtain the index of the first DSS.  Can start searching from the
241  * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
242  * groupsize and groupnum are non-zero.
243  */
244 unsigned int
245 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
246 {
247 	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
248 }
249 
250 bool xe_dss_mask_empty(const xe_dss_mask_t mask)
251 {
252 	return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
253 }
254 
255 /**
256  * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
257  * @gt: GT to check
258  * @quad: Which quadrant of the DSS space to check
259  *
260  * Since Xe_HP platforms can have up to four CCS engines, those engines
261  * are each logically associated with a quarter of the possible DSS.  If there
262  * are no DSS present in one of the four quadrants of the DSS space, the
263  * corresponding CCS engine is also not available for use.
264  *
265  * Returns false if all DSS in a quadrant of the GT are fused off, else true.
266  */
267 bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
268 {
269 	struct xe_device *xe = gt_to_xe(gt);
270 	xe_dss_mask_t all_dss;
271 	int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
272 
273 	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
274 		  XE_MAX_DSS_FUSE_BITS);
275 
276 	get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
277 	dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
278 
279 	quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
280 
281 	return quad_first < (quad + 1) * dss_per_quad;
282 }
283 
284 bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss)
285 {
286 	return test_bit(dss, gt->fuse_topo.g_dss_mask);
287 }
288 
289 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
290 {
291 	return test_bit(dss, gt->fuse_topo.c_dss_mask);
292 }
293