1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_gt_topology.h"
7
8 #include <linux/bitmap.h>
9 #include <linux/compiler.h>
10
11 #include "regs/xe_gt_regs.h"
12 #include "xe_assert.h"
13 #include "xe_gt.h"
14 #include "xe_mmio.h"
15
16 static void
load_dss_mask(struct xe_gt * gt,xe_dss_mask_t mask,int numregs,...)17 load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
18 {
19 va_list argp;
20 u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
21 int i;
22
23 if (drm_WARN_ON(>_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
24 numregs = XE_MAX_DSS_FUSE_REGS;
25
26 va_start(argp, numregs);
27 for (i = 0; i < numregs; i++)
28 fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
29 va_end(argp);
30
31 bitmap_from_arr32(mask, fuse_val, numregs * 32);
32 }
33
34 static void
load_eu_mask(struct xe_gt * gt,xe_eu_mask_t mask,enum xe_gt_eu_type * eu_type)35 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
36 {
37 struct xe_device *xe = gt_to_xe(gt);
38 u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
39 u32 val = 0;
40 int i;
41
42 BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
43
44 /*
45 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
46 * of enable).
47 */
48 if (GRAPHICS_VERx100(xe) < 1250)
49 reg_val = ~reg_val & XELP_EU_MASK;
50
51 if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
52 /* SIMD16 EUs, one bit == one EU */
53 *eu_type = XE_GT_EU_TYPE_SIMD16;
54 val = reg_val;
55 } else {
56 /* SIMD8 EUs, one bit == 2 EU */
57 *eu_type = XE_GT_EU_TYPE_SIMD8;
58 for (i = 0; i < fls(reg_val); i++)
59 if (reg_val & BIT(i))
60 val |= 0x3 << 2 * i;
61 }
62
63 bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
64 }
65
66 /**
67 * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
68 *
69 * It is used to compute the L3 bank masks in a generic format on
70 * various platforms where the internal representation of L3 node
71 * and masks from registers are different.
72 *
73 * @xe: device
74 * @dst: destination
75 * @pattern: pattern to replicate
76 * @patternbits: size of the pattern, in bits
77 * @mask: mask describing where to replicate the pattern
78 *
79 * Example 1:
80 * ----------
81 * @pattern = 0b1111
82 * └┬─┘
83 * @patternbits = 4 (bits)
84 * @mask = 0b0101
85 * ││││
86 * │││└────────────────── 0b1111 (=1×0b1111)
87 * ││└──────────── 0b0000 │ (=0×0b1111)
88 * │└────── 0b1111 │ │ (=1×0b1111)
89 * └ 0b0000 │ │ │ (=0×0b1111)
90 * │ │ │ │
91 * @dst = 0b0000 0b1111 0b0000 0b1111
92 *
93 * Example 2:
94 * ----------
95 * @pattern = 0b11111111
96 * └┬─────┘
97 * @patternbits = 8 (bits)
98 * @mask = 0b10
99 * ││
100 * ││
101 * ││
102 * │└────────── 0b00000000 (=0×0b11111111)
103 * └ 0b11111111 │ (=1×0b11111111)
104 * │ │
105 * @dst = 0b11111111 0b00000000
106 */
107 static void
gen_l3_mask_from_pattern(struct xe_device * xe,xe_l3_bank_mask_t dst,xe_l3_bank_mask_t pattern,int patternbits,unsigned long mask)108 gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
109 xe_l3_bank_mask_t pattern, int patternbits,
110 unsigned long mask)
111 {
112 unsigned long bit;
113
114 xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits ||
115 bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS));
116 xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS);
117 for_each_set_bit(bit, &mask, 32) {
118 xe_l3_bank_mask_t shifted_pattern = {};
119
120 bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
121 XE_MAX_L3_BANK_MASK_BITS);
122 bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
123 }
124 }
125
126 static void
load_l3_bank_mask(struct xe_gt * gt,xe_l3_bank_mask_t l3_bank_mask)127 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
128 {
129 struct xe_device *xe = gt_to_xe(gt);
130 u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3);
131
132 if (GRAPHICS_VER(xe) >= 20) {
133 xe_l3_bank_mask_t per_node = {};
134 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
135 u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
136
137 bitmap_from_arr32(per_node, &bank_val, 32);
138 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
139 meml3_en);
140 } else if (GRAPHICS_VERx100(xe) >= 1270) {
141 xe_l3_bank_mask_t per_node = {};
142 xe_l3_bank_mask_t per_mask_bit = {};
143 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
144 u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4);
145 u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
146
147 bitmap_set_value8(per_mask_bit, 0x3, 0);
148 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
149 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
150 meml3_en);
151 } else if (xe->info.platform == XE_PVC) {
152 xe_l3_bank_mask_t per_node = {};
153 xe_l3_bank_mask_t per_mask_bit = {};
154 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
155 u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
156
157 bitmap_set_value8(per_mask_bit, 0xf, 0);
158 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
159 bank_val);
160 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
161 meml3_en);
162 } else if (xe->info.platform == XE_DG2) {
163 xe_l3_bank_mask_t per_node = {};
164 u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
165
166 bitmap_set_value8(per_node, 0xff, 0);
167 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
168 } else {
169 /* 1:1 register bit to mask bit (inverted register bits) */
170 u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
171
172 bitmap_from_arr32(l3_bank_mask, &mask, 32);
173 }
174 }
175
176 static void
get_num_dss_regs(struct xe_device * xe,int * geometry_regs,int * compute_regs)177 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
178 {
179 if (GRAPHICS_VER(xe) > 20) {
180 *geometry_regs = 3;
181 *compute_regs = 3;
182 } else if (GRAPHICS_VERx100(xe) == 1260) {
183 *geometry_regs = 0;
184 *compute_regs = 2;
185 } else if (GRAPHICS_VERx100(xe) >= 1250) {
186 *geometry_regs = 1;
187 *compute_regs = 1;
188 } else {
189 *geometry_regs = 1;
190 *compute_regs = 0;
191 }
192 }
193
194 void
xe_gt_topology_init(struct xe_gt * gt)195 xe_gt_topology_init(struct xe_gt *gt)
196 {
197 struct xe_device *xe = gt_to_xe(gt);
198 struct drm_printer p;
199 int num_geometry_regs, num_compute_regs;
200
201 get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
202
203 /*
204 * Register counts returned shouldn't exceed the number of registers
205 * passed as parameters below.
206 */
207 drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
208 drm_WARN_ON(&xe->drm, num_compute_regs > 3);
209
210 load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
211 num_geometry_regs,
212 XELP_GT_GEOMETRY_DSS_ENABLE,
213 XE2_GT_GEOMETRY_DSS_1,
214 XE2_GT_GEOMETRY_DSS_2);
215 load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
216 XEHP_GT_COMPUTE_DSS_ENABLE,
217 XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
218 XE2_GT_COMPUTE_DSS_2);
219 load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type);
220 load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
221
222 p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
223
224 xe_gt_topology_dump(gt, &p);
225 }
226
eu_type_to_str(enum xe_gt_eu_type eu_type)227 static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
228 {
229 switch (eu_type) {
230 case XE_GT_EU_TYPE_SIMD16:
231 return "simd16";
232 case XE_GT_EU_TYPE_SIMD8:
233 return "simd8";
234 }
235
236 return NULL;
237 }
238
239 void
xe_gt_topology_dump(struct xe_gt * gt,struct drm_printer * p)240 xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
241 {
242 drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
243 gt->fuse_topo.g_dss_mask);
244 drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS,
245 gt->fuse_topo.c_dss_mask);
246
247 drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
248 gt->fuse_topo.eu_mask_per_dss);
249 drm_printf(p, "EU type: %s\n",
250 eu_type_to_str(gt->fuse_topo.eu_type));
251
252 drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
253 gt->fuse_topo.l3_bank_mask);
254 }
255
256 /*
257 * Used to obtain the index of the first DSS. Can start searching from the
258 * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
259 * groupsize and groupnum are non-zero.
260 */
261 unsigned int
xe_dss_mask_group_ffs(const xe_dss_mask_t mask,int groupsize,int groupnum)262 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
263 {
264 return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
265 }
266
xe_dss_mask_empty(const xe_dss_mask_t mask)267 bool xe_dss_mask_empty(const xe_dss_mask_t mask)
268 {
269 return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
270 }
271
272 /**
273 * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
274 * @gt: GT to check
275 * @quad: Which quadrant of the DSS space to check
276 *
277 * Since Xe_HP platforms can have up to four CCS engines, those engines
278 * are each logically associated with a quarter of the possible DSS. If there
279 * are no DSS present in one of the four quadrants of the DSS space, the
280 * corresponding CCS engine is also not available for use.
281 *
282 * Returns false if all DSS in a quadrant of the GT are fused off, else true.
283 */
xe_gt_topology_has_dss_in_quadrant(struct xe_gt * gt,int quad)284 bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
285 {
286 struct xe_device *xe = gt_to_xe(gt);
287 xe_dss_mask_t all_dss;
288 int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
289
290 bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
291 XE_MAX_DSS_FUSE_BITS);
292
293 get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
294 dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
295
296 quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
297
298 return quad_first < (quad + 1) * dss_per_quad;
299 }
300
xe_gt_has_geometry_dss(struct xe_gt * gt,unsigned int dss)301 bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss)
302 {
303 return test_bit(dss, gt->fuse_topo.g_dss_mask);
304 }
305
xe_gt_has_compute_dss(struct xe_gt * gt,unsigned int dss)306 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
307 {
308 return test_bit(dss, gt->fuse_topo.c_dss_mask);
309 }
310