xref: /linux/drivers/gpu/drm/xe/xe_gt_topology.c (revision 24168c5e6dfbdd5b414f048f47f75d64533296ca)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_gt_topology.h"
7 
8 #include <linux/bitmap.h>
9 
10 #include "regs/xe_gt_regs.h"
11 #include "xe_assert.h"
12 #include "xe_gt.h"
13 #include "xe_mmio.h"
14 
15 static void
16 load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
17 {
18 	va_list argp;
19 	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
20 	int i;
21 
22 	if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
23 		numregs = XE_MAX_DSS_FUSE_REGS;
24 
25 	va_start(argp, numregs);
26 	for (i = 0; i < numregs; i++)
27 		fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
28 	va_end(argp);
29 
30 	bitmap_from_arr32(mask, fuse_val, numregs * 32);
31 }
32 
33 static void
34 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
35 {
36 	struct xe_device *xe = gt_to_xe(gt);
37 	u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
38 	u32 val = 0;
39 	int i;
40 
41 	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
42 
43 	/*
44 	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
45 	 * of enable).
46 	 */
47 	if (GRAPHICS_VERx100(xe) < 1250)
48 		reg_val = ~reg_val & XELP_EU_MASK;
49 
50 	/* On PVC, one bit = one EU */
51 	if (GRAPHICS_VERx100(xe) == 1260) {
52 		val = reg_val;
53 	} else {
54 		/* All other platforms, one bit = 2 EU */
55 		for (i = 0; i < fls(reg_val); i++)
56 			if (reg_val & BIT(i))
57 				val |= 0x3 << 2 * i;
58 	}
59 
60 	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
61 }
62 
63 /**
64  * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
65  *
66  * It is used to compute the L3 bank masks in a generic format on
67  * various platforms where the internal representation of L3 node
68  * and masks from registers are different.
69  *
70  * @xe: device
71  * @dst: destination
72  * @pattern: pattern to replicate
73  * @patternbits: size of the pattern, in bits
74  * @mask: mask describing where to replicate the pattern
75  *
76  * Example 1:
77  * ----------
78  * @pattern =    0b1111
79  *                 └┬─┘
80  * @patternbits =   4 (bits)
81  * @mask = 0b0101
82  *           ││││
83  *           │││└────────────────── 0b1111 (=1×0b1111)
84  *           ││└──────────── 0b0000    │   (=0×0b1111)
85  *           │└────── 0b1111    │      │   (=1×0b1111)
86  *           └ 0b0000    │      │      │   (=0×0b1111)
87  *                │      │      │      │
88  * @dst =      0b0000 0b1111 0b0000 0b1111
89  *
90  * Example 2:
91  * ----------
92  * @pattern =    0b11111111
93  *                 └┬─────┘
94  * @patternbits =   8 (bits)
95  * @mask = 0b10
96  *           ││
97  *           ││
98  *           ││
99  *           │└────────── 0b00000000 (=0×0b11111111)
100  *           └ 0b11111111      │     (=1×0b11111111)
101  *                  │          │
102  * @dst =      0b11111111 0b00000000
103  */
104 static void
105 gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
106 			 xe_l3_bank_mask_t pattern, int patternbits,
107 			 unsigned long mask)
108 {
109 	unsigned long bit;
110 
111 	xe_assert(xe, fls(mask) <= patternbits);
112 	for_each_set_bit(bit, &mask, 32) {
113 		xe_l3_bank_mask_t shifted_pattern = {};
114 
115 		bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
116 				  XE_MAX_L3_BANK_MASK_BITS);
117 		bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
118 	}
119 }
120 
121 static void
122 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
123 {
124 	struct xe_device *xe = gt_to_xe(gt);
125 	u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3);
126 
127 	if (GRAPHICS_VER(xe) >= 20) {
128 		xe_l3_bank_mask_t per_node = {};
129 		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
130 		u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
131 
132 		bitmap_from_arr32(per_node, &bank_val, 32);
133 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
134 					 meml3_en);
135 	} else if (GRAPHICS_VERx100(xe) >= 1270) {
136 		xe_l3_bank_mask_t per_node = {};
137 		xe_l3_bank_mask_t per_mask_bit = {};
138 		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
139 		u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4);
140 		u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
141 
142 		bitmap_set_value8(per_mask_bit, 0x3, 0);
143 		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
144 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
145 					 meml3_en);
146 	} else if (xe->info.platform == XE_PVC) {
147 		xe_l3_bank_mask_t per_node = {};
148 		xe_l3_bank_mask_t per_mask_bit = {};
149 		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
150 		u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
151 
152 		bitmap_set_value8(per_mask_bit, 0xf, 0);
153 		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
154 					 bank_val);
155 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
156 					 meml3_en);
157 	} else if (xe->info.platform == XE_DG2) {
158 		xe_l3_bank_mask_t per_node = {};
159 		u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
160 
161 		bitmap_set_value8(per_node, 0xff, 0);
162 		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
163 	} else {
164 		/* 1:1 register bit to mask bit (inverted register bits) */
165 		u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
166 
167 		bitmap_from_arr32(l3_bank_mask, &mask, 32);
168 	}
169 }
170 
171 static void
172 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
173 {
174 	if (GRAPHICS_VER(xe) > 20) {
175 		*geometry_regs = 3;
176 		*compute_regs = 3;
177 	} else if (GRAPHICS_VERx100(xe) == 1260) {
178 		*geometry_regs = 0;
179 		*compute_regs = 2;
180 	} else if (GRAPHICS_VERx100(xe) >= 1250) {
181 		*geometry_regs = 1;
182 		*compute_regs = 1;
183 	} else {
184 		*geometry_regs = 1;
185 		*compute_regs = 0;
186 	}
187 }
188 
189 void
190 xe_gt_topology_init(struct xe_gt *gt)
191 {
192 	struct xe_device *xe = gt_to_xe(gt);
193 	struct drm_printer p;
194 	int num_geometry_regs, num_compute_regs;
195 
196 	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
197 
198 	/*
199 	 * Register counts returned shouldn't exceed the number of registers
200 	 * passed as parameters below.
201 	 */
202 	drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
203 	drm_WARN_ON(&xe->drm, num_compute_regs > 3);
204 
205 	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
206 		      num_geometry_regs,
207 		      XELP_GT_GEOMETRY_DSS_ENABLE,
208 		      XE2_GT_GEOMETRY_DSS_1,
209 		      XE2_GT_GEOMETRY_DSS_2);
210 	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
211 		      XEHP_GT_COMPUTE_DSS_ENABLE,
212 		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
213 		      XE2_GT_COMPUTE_DSS_2);
214 	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
215 	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
216 
217 	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
218 
219 	xe_gt_topology_dump(gt, &p);
220 }
221 
222 void
223 xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
224 {
225 	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
226 		   gt->fuse_topo.g_dss_mask);
227 	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
228 		   gt->fuse_topo.c_dss_mask);
229 
230 	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
231 		   gt->fuse_topo.eu_mask_per_dss);
232 
233 	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
234 		   gt->fuse_topo.l3_bank_mask);
235 }
236 
237 /*
238  * Used to obtain the index of the first DSS.  Can start searching from the
239  * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
240  * groupsize and groupnum are non-zero.
241  */
242 unsigned int
243 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
244 {
245 	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
246 }
247 
248 bool xe_dss_mask_empty(const xe_dss_mask_t mask)
249 {
250 	return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
251 }
252 
253 /**
254  * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
255  * @gt: GT to check
256  * @quad: Which quadrant of the DSS space to check
257  *
258  * Since Xe_HP platforms can have up to four CCS engines, those engines
259  * are each logically associated with a quarter of the possible DSS.  If there
260  * are no DSS present in one of the four quadrants of the DSS space, the
261  * corresponding CCS engine is also not available for use.
262  *
263  * Returns false if all DSS in a quadrant of the GT are fused off, else true.
264  */
265 bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
266 {
267 	struct xe_device *xe = gt_to_xe(gt);
268 	xe_dss_mask_t all_dss;
269 	int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
270 
271 	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
272 		  XE_MAX_DSS_FUSE_BITS);
273 
274 	get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
275 	dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
276 
277 	quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
278 
279 	return quad_first < (quad + 1) * dss_per_quad;
280 }
281