1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_gt_topology.h" 7 8 #include <generated/xe_wa_oob.h> 9 #include <linux/bitmap.h> 10 #include <linux/compiler.h> 11 12 #include "regs/xe_gt_regs.h" 13 #include "xe_assert.h" 14 #include "xe_gt.h" 15 #include "xe_gt_mcr.h" 16 #include "xe_gt_printk.h" 17 #include "xe_mmio.h" 18 #include "xe_wa.h" 19 20 static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, 21 const struct xe_reg regs[]) 22 { 23 u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; 24 int i; 25 26 xe_gt_assert(gt, numregs <= ARRAY_SIZE(fuse_val)); 27 28 for (i = 0; i < numregs; i++) 29 fuse_val[i] = xe_mmio_read32(>->mmio, regs[i]); 30 31 bitmap_from_arr32(mask, fuse_val, numregs * 32); 32 } 33 34 static void 35 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type) 36 { 37 struct xe_device *xe = gt_to_xe(gt); 38 u32 reg_val = xe_mmio_read32(>->mmio, XELP_EU_ENABLE); 39 u32 val = 0; 40 int i; 41 42 BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1); 43 44 /* 45 * Pre-Xe_HP platforms inverted the bit meaning (disable instead 46 * of enable). 47 */ 48 if (GRAPHICS_VERx100(xe) < 1250) 49 reg_val = ~reg_val & XELP_EU_MASK; 50 51 if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) { 52 /* SIMD16 EUs, one bit == one EU */ 53 *eu_type = XE_GT_EU_TYPE_SIMD16; 54 val = reg_val; 55 } else { 56 /* SIMD8 EUs, one bit == 2 EU */ 57 *eu_type = XE_GT_EU_TYPE_SIMD8; 58 for (i = 0; i < fls(reg_val); i++) 59 if (reg_val & BIT(i)) 60 val |= 0x3 << 2 * i; 61 } 62 63 bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS); 64 } 65 66 /** 67 * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask 68 * 69 * It is used to compute the L3 bank masks in a generic format on 70 * various platforms where the internal representation of L3 node 71 * and masks from registers are different. 72 * 73 * @xe: device 74 * @dst: destination 75 * @pattern: pattern to replicate 76 * @patternbits: size of the pattern, in bits 77 * @mask: mask describing where to replicate the pattern 78 * 79 * Example 1: 80 * ---------- 81 * @pattern = 0b1111 82 * └┬─┘ 83 * @patternbits = 4 (bits) 84 * @mask = 0b0101 85 * ││││ 86 * │││└────────────────── 0b1111 (=1×0b1111) 87 * ││└──────────── 0b0000 │ (=0×0b1111) 88 * │└────── 0b1111 │ │ (=1×0b1111) 89 * └ 0b0000 │ │ │ (=0×0b1111) 90 * │ │ │ │ 91 * @dst = 0b0000 0b1111 0b0000 0b1111 92 * 93 * Example 2: 94 * ---------- 95 * @pattern = 0b11111111 96 * └┬─────┘ 97 * @patternbits = 8 (bits) 98 * @mask = 0b10 99 * ││ 100 * ││ 101 * ││ 102 * │└────────── 0b00000000 (=0×0b11111111) 103 * └ 0b11111111 │ (=1×0b11111111) 104 * │ │ 105 * @dst = 0b11111111 0b00000000 106 */ 107 static void 108 gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst, 109 xe_l3_bank_mask_t pattern, int patternbits, 110 unsigned long mask) 111 { 112 unsigned long bit; 113 114 xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits || 115 bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS)); 116 xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS); 117 for_each_set_bit(bit, &mask, 32) { 118 xe_l3_bank_mask_t shifted_pattern = {}; 119 120 bitmap_shift_left(shifted_pattern, pattern, bit * patternbits, 121 XE_MAX_L3_BANK_MASK_BITS); 122 bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS); 123 } 124 } 125 126 bool xe_gt_topology_report_l3(struct xe_gt *gt) 127 { 128 /* 129 * No known userspace needs/uses the L3 bank mask reported by 130 * the media GT, and the hardware itself is known to report bogus 131 * values on several platforms. Only report L3 bank mask as part 132 * of the media GT's topology on pre-Xe3 platforms since that's 133 * already part of our ABI. 134 */ 135 if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) >= 30) 136 return false; 137 138 return true; 139 } 140 141 static void 142 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) 143 { 144 struct xe_device *xe = gt_to_xe(gt); 145 struct xe_mmio *mmio = >->mmio; 146 u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3); 147 148 if (!xe_gt_topology_report_l3(gt)) 149 return; 150 151 if (GRAPHICS_VER(xe) >= 35) { 152 u32 fuse_val = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); 153 154 bitmap_from_arr32(l3_bank_mask, &fuse_val, 32); 155 } else if (GRAPHICS_VER(xe) >= 30) { 156 xe_l3_bank_mask_t per_node = {}; 157 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); 158 u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); 159 u32 bank_val = REG_FIELD_GET(XE3_L3BANK_ENABLE, mirror_l3bank_enable); 160 161 bitmap_from_arr32(per_node, &bank_val, 32); 162 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 32, 163 meml3_en); 164 } else if (GRAPHICS_VER(xe) >= 20) { 165 xe_l3_bank_mask_t per_node = {}; 166 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); 167 u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3); 168 169 bitmap_from_arr32(per_node, &bank_val, 32); 170 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4, 171 meml3_en); 172 } else if (GRAPHICS_VERx100(xe) >= 1270) { 173 xe_l3_bank_mask_t per_node = {}; 174 xe_l3_bank_mask_t per_mask_bit = {}; 175 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); 176 u32 fuse4 = xe_mmio_read32(mmio, XEHP_FUSE4); 177 u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4); 178 179 bitmap_set_value8(per_mask_bit, 0x3, 0); 180 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val); 181 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4, 182 meml3_en); 183 } else if (xe->info.platform == XE_PVC) { 184 xe_l3_bank_mask_t per_node = {}; 185 xe_l3_bank_mask_t per_mask_bit = {}; 186 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); 187 u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3); 188 189 bitmap_set_value8(per_mask_bit, 0xf, 0); 190 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4, 191 bank_val); 192 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16, 193 meml3_en); 194 } else if (xe->info.platform == XE_DG2) { 195 xe_l3_bank_mask_t per_node = {}; 196 u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3); 197 198 bitmap_set_value8(per_node, 0xff, 0); 199 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask); 200 } else { 201 /* 1:1 register bit to mask bit (inverted register bits) */ 202 u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3); 203 204 bitmap_from_arr32(l3_bank_mask, &mask, 32); 205 } 206 } 207 208 static void 209 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) 210 { 211 if (GRAPHICS_VER(xe) > 20) { 212 *geometry_regs = 3; 213 *compute_regs = 3; 214 } else if (GRAPHICS_VERx100(xe) == 1260) { 215 *geometry_regs = 0; 216 *compute_regs = 2; 217 } else if (GRAPHICS_VERx100(xe) >= 1250) { 218 *geometry_regs = 1; 219 *compute_regs = 1; 220 } else { 221 *geometry_regs = 1; 222 *compute_regs = 0; 223 } 224 } 225 226 void 227 xe_gt_topology_init(struct xe_gt *gt) 228 { 229 static const struct xe_reg geometry_regs[] = { 230 XELP_GT_GEOMETRY_DSS_ENABLE, 231 XE2_GT_GEOMETRY_DSS_1, 232 XE2_GT_GEOMETRY_DSS_2, 233 }; 234 static const struct xe_reg compute_regs[] = { 235 XEHP_GT_COMPUTE_DSS_ENABLE, 236 XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, 237 XE2_GT_COMPUTE_DSS_2, 238 }; 239 int num_geometry_regs, num_compute_regs; 240 struct xe_device *xe = gt_to_xe(gt); 241 struct drm_printer p; 242 243 get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); 244 245 /* 246 * Register counts returned shouldn't exceed the number of registers 247 * passed as parameters below. 248 */ 249 xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs)); 250 xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs)); 251 252 load_dss_mask(gt, gt->fuse_topo.g_dss_mask, 253 num_geometry_regs, geometry_regs); 254 load_dss_mask(gt, gt->fuse_topo.c_dss_mask, 255 num_compute_regs, compute_regs); 256 257 load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); 258 load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); 259 260 p = xe_gt_dbg_printer(gt); 261 xe_gt_topology_dump(gt, &p); 262 } 263 264 static const char *eu_type_to_str(enum xe_gt_eu_type eu_type) 265 { 266 switch (eu_type) { 267 case XE_GT_EU_TYPE_SIMD16: 268 return "simd16"; 269 case XE_GT_EU_TYPE_SIMD8: 270 return "simd8"; 271 } 272 273 return NULL; 274 } 275 276 /** 277 * xe_gt_topology_dump() - Dump GT topology into a drm printer. 278 * @gt: the &xe_gt 279 * @p: the &drm_printer 280 * 281 * Return: always 0. 282 */ 283 int xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) 284 { 285 drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS, 286 gt->fuse_topo.g_dss_mask); 287 drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS, 288 gt->fuse_topo.c_dss_mask); 289 290 drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, 291 gt->fuse_topo.eu_mask_per_dss); 292 drm_printf(p, "EU type: %s\n", 293 eu_type_to_str(gt->fuse_topo.eu_type)); 294 295 if (xe_gt_topology_report_l3(gt)) 296 drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, 297 gt->fuse_topo.l3_bank_mask); 298 return 0; 299 } 300 301 /* 302 * Used to obtain the index of the first DSS. Can start searching from the 303 * beginning of a specific dss group (e.g., gslice, cslice, etc.) if 304 * groupsize and groupnum are non-zero. 305 */ 306 unsigned int 307 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) 308 { 309 return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); 310 } 311 312 /* Used to obtain the index of the first L3 bank. */ 313 unsigned int 314 xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask) 315 { 316 return find_first_bit(mask, XE_MAX_L3_BANK_MASK_BITS); 317 } 318 319 /** 320 * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant 321 * @gt: GT to check 322 * @quad: Which quadrant of the DSS space to check 323 * 324 * Since Xe_HP platforms can have up to four CCS engines, those engines 325 * are each logically associated with a quarter of the possible DSS. If there 326 * are no DSS present in one of the four quadrants of the DSS space, the 327 * corresponding CCS engine is also not available for use. 328 * 329 * Returns false if all DSS in a quadrant of the GT are fused off, else true. 330 */ 331 bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad) 332 { 333 struct xe_device *xe = gt_to_xe(gt); 334 xe_dss_mask_t all_dss; 335 int g_dss_regs, c_dss_regs, dss_per_quad, quad_first; 336 337 bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 338 XE_MAX_DSS_FUSE_BITS); 339 340 get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs); 341 dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4; 342 343 quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad); 344 345 return quad_first < (quad + 1) * dss_per_quad; 346 } 347 348 bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss) 349 { 350 return test_bit(dss, gt->fuse_topo.g_dss_mask); 351 } 352 353 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss) 354 { 355 return test_bit(dss, gt->fuse_topo.c_dss_mask); 356 } 357 358 bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt) 359 { 360 unsigned int xecore; 361 int last_group = -1; 362 u16 group, instance; 363 364 for_each_dss_steering(xecore, gt, group, instance) { 365 if (last_group != group) { 366 if (group - last_group > 1) 367 return true; 368 last_group = group; 369 } 370 } 371 return false; 372 } 373