1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_gt_topology.h" 7 8 #include <generated/xe_wa_oob.h> 9 #include <linux/bitmap.h> 10 #include <linux/compiler.h> 11 12 #include "regs/xe_gt_regs.h" 13 #include "xe_assert.h" 14 #include "xe_gt.h" 15 #include "xe_gt_printk.h" 16 #include "xe_mmio.h" 17 #include "xe_wa.h" 18 19 static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, 20 const struct xe_reg regs[]) 21 { 22 u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; 23 int i; 24 25 xe_gt_assert(gt, numregs <= ARRAY_SIZE(fuse_val)); 26 27 for (i = 0; i < numregs; i++) 28 fuse_val[i] = xe_mmio_read32(>->mmio, regs[i]); 29 30 bitmap_from_arr32(mask, fuse_val, numregs * 32); 31 } 32 33 static void 34 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type) 35 { 36 struct xe_device *xe = gt_to_xe(gt); 37 u32 reg_val = xe_mmio_read32(>->mmio, XELP_EU_ENABLE); 38 u32 val = 0; 39 int i; 40 41 BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1); 42 43 /* 44 * Pre-Xe_HP platforms inverted the bit meaning (disable instead 45 * of enable). 46 */ 47 if (GRAPHICS_VERx100(xe) < 1250) 48 reg_val = ~reg_val & XELP_EU_MASK; 49 50 if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) { 51 /* SIMD16 EUs, one bit == one EU */ 52 *eu_type = XE_GT_EU_TYPE_SIMD16; 53 val = reg_val; 54 } else { 55 /* SIMD8 EUs, one bit == 2 EU */ 56 *eu_type = XE_GT_EU_TYPE_SIMD8; 57 for (i = 0; i < fls(reg_val); i++) 58 if (reg_val & BIT(i)) 59 val |= 0x3 << 2 * i; 60 } 61 62 bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS); 63 } 64 65 /** 66 * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask 67 * 68 * It is used to compute the L3 bank masks in a generic format on 69 * various platforms where the internal representation of L3 node 70 * and masks from registers are different. 71 * 72 * @xe: device 73 * @dst: destination 74 * @pattern: pattern to replicate 75 * @patternbits: size of the pattern, in bits 76 * @mask: mask describing where to replicate the pattern 77 * 78 * Example 1: 79 * ---------- 80 * @pattern = 0b1111 81 * └┬─┘ 82 * @patternbits = 4 (bits) 83 * @mask = 0b0101 84 * ││││ 85 * │││└────────────────── 0b1111 (=1×0b1111) 86 * ││└──────────── 0b0000 │ (=0×0b1111) 87 * │└────── 0b1111 │ │ (=1×0b1111) 88 * └ 0b0000 │ │ │ (=0×0b1111) 89 * │ │ │ │ 90 * @dst = 0b0000 0b1111 0b0000 0b1111 91 * 92 * Example 2: 93 * ---------- 94 * @pattern = 0b11111111 95 * └┬─────┘ 96 * @patternbits = 8 (bits) 97 * @mask = 0b10 98 * ││ 99 * ││ 100 * ││ 101 * │└────────── 0b00000000 (=0×0b11111111) 102 * └ 0b11111111 │ (=1×0b11111111) 103 * │ │ 104 * @dst = 0b11111111 0b00000000 105 */ 106 static void 107 gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst, 108 xe_l3_bank_mask_t pattern, int patternbits, 109 unsigned long mask) 110 { 111 unsigned long bit; 112 113 xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits || 114 bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS)); 115 xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS); 116 for_each_set_bit(bit, &mask, 32) { 117 xe_l3_bank_mask_t shifted_pattern = {}; 118 119 bitmap_shift_left(shifted_pattern, pattern, bit * patternbits, 120 XE_MAX_L3_BANK_MASK_BITS); 121 bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS); 122 } 123 } 124 125 static void 126 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) 127 { 128 struct xe_device *xe = gt_to_xe(gt); 129 struct xe_mmio *mmio = >->mmio; 130 u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3); 131 132 /* 133 * PTL platforms with media version 30.00 do not provide proper values 134 * for the media GT's L3 bank registers. Skip the readout since we 135 * don't have any way to obtain real values. 136 * 137 * This may get re-described as an official workaround in the future, 138 * but there's no tracking number assigned yet so we use a custom 139 * OOB workaround descriptor. 140 */ 141 if (XE_WA(gt, no_media_l3)) 142 return; 143 144 if (GRAPHICS_VER(xe) >= 30) { 145 xe_l3_bank_mask_t per_node = {}; 146 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); 147 u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); 148 u32 bank_val = REG_FIELD_GET(XE3_L3BANK_ENABLE, mirror_l3bank_enable); 149 150 bitmap_from_arr32(per_node, &bank_val, 32); 151 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 32, 152 meml3_en); 153 } else if (GRAPHICS_VER(xe) >= 20) { 154 xe_l3_bank_mask_t per_node = {}; 155 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); 156 u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3); 157 158 bitmap_from_arr32(per_node, &bank_val, 32); 159 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4, 160 meml3_en); 161 } else if (GRAPHICS_VERx100(xe) >= 1270) { 162 xe_l3_bank_mask_t per_node = {}; 163 xe_l3_bank_mask_t per_mask_bit = {}; 164 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); 165 u32 fuse4 = xe_mmio_read32(mmio, XEHP_FUSE4); 166 u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4); 167 168 bitmap_set_value8(per_mask_bit, 0x3, 0); 169 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val); 170 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4, 171 meml3_en); 172 } else if (xe->info.platform == XE_PVC) { 173 xe_l3_bank_mask_t per_node = {}; 174 xe_l3_bank_mask_t per_mask_bit = {}; 175 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); 176 u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3); 177 178 bitmap_set_value8(per_mask_bit, 0xf, 0); 179 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4, 180 bank_val); 181 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16, 182 meml3_en); 183 } else if (xe->info.platform == XE_DG2) { 184 xe_l3_bank_mask_t per_node = {}; 185 u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3); 186 187 bitmap_set_value8(per_node, 0xff, 0); 188 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask); 189 } else { 190 /* 1:1 register bit to mask bit (inverted register bits) */ 191 u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3); 192 193 bitmap_from_arr32(l3_bank_mask, &mask, 32); 194 } 195 } 196 197 static void 198 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) 199 { 200 if (GRAPHICS_VER(xe) > 20) { 201 *geometry_regs = 3; 202 *compute_regs = 3; 203 } else if (GRAPHICS_VERx100(xe) == 1260) { 204 *geometry_regs = 0; 205 *compute_regs = 2; 206 } else if (GRAPHICS_VERx100(xe) >= 1250) { 207 *geometry_regs = 1; 208 *compute_regs = 1; 209 } else { 210 *geometry_regs = 1; 211 *compute_regs = 0; 212 } 213 } 214 215 void 216 xe_gt_topology_init(struct xe_gt *gt) 217 { 218 static const struct xe_reg geometry_regs[] = { 219 XELP_GT_GEOMETRY_DSS_ENABLE, 220 XE2_GT_GEOMETRY_DSS_1, 221 XE2_GT_GEOMETRY_DSS_2, 222 }; 223 static const struct xe_reg compute_regs[] = { 224 XEHP_GT_COMPUTE_DSS_ENABLE, 225 XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, 226 XE2_GT_COMPUTE_DSS_2, 227 }; 228 int num_geometry_regs, num_compute_regs; 229 struct xe_device *xe = gt_to_xe(gt); 230 struct drm_printer p; 231 232 get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); 233 234 /* 235 * Register counts returned shouldn't exceed the number of registers 236 * passed as parameters below. 237 */ 238 xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs)); 239 xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs)); 240 241 load_dss_mask(gt, gt->fuse_topo.g_dss_mask, 242 num_geometry_regs, geometry_regs); 243 load_dss_mask(gt, gt->fuse_topo.c_dss_mask, 244 num_compute_regs, compute_regs); 245 246 load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); 247 load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); 248 249 p = xe_gt_dbg_printer(gt); 250 xe_gt_topology_dump(gt, &p); 251 } 252 253 static const char *eu_type_to_str(enum xe_gt_eu_type eu_type) 254 { 255 switch (eu_type) { 256 case XE_GT_EU_TYPE_SIMD16: 257 return "simd16"; 258 case XE_GT_EU_TYPE_SIMD8: 259 return "simd8"; 260 } 261 262 return NULL; 263 } 264 265 void 266 xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) 267 { 268 drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS, 269 gt->fuse_topo.g_dss_mask); 270 drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS, 271 gt->fuse_topo.c_dss_mask); 272 273 drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, 274 gt->fuse_topo.eu_mask_per_dss); 275 drm_printf(p, "EU type: %s\n", 276 eu_type_to_str(gt->fuse_topo.eu_type)); 277 278 drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, 279 gt->fuse_topo.l3_bank_mask); 280 } 281 282 /* 283 * Used to obtain the index of the first DSS. Can start searching from the 284 * beginning of a specific dss group (e.g., gslice, cslice, etc.) if 285 * groupsize and groupnum are non-zero. 286 */ 287 unsigned int 288 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) 289 { 290 return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); 291 } 292 293 /** 294 * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant 295 * @gt: GT to check 296 * @quad: Which quadrant of the DSS space to check 297 * 298 * Since Xe_HP platforms can have up to four CCS engines, those engines 299 * are each logically associated with a quarter of the possible DSS. If there 300 * are no DSS present in one of the four quadrants of the DSS space, the 301 * corresponding CCS engine is also not available for use. 302 * 303 * Returns false if all DSS in a quadrant of the GT are fused off, else true. 304 */ 305 bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad) 306 { 307 struct xe_device *xe = gt_to_xe(gt); 308 xe_dss_mask_t all_dss; 309 int g_dss_regs, c_dss_regs, dss_per_quad, quad_first; 310 311 bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 312 XE_MAX_DSS_FUSE_BITS); 313 314 get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs); 315 dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4; 316 317 quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad); 318 319 return quad_first < (quad + 1) * dss_per_quad; 320 } 321 322 bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss) 323 { 324 return test_bit(dss, gt->fuse_topo.g_dss_mask); 325 } 326 327 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss) 328 { 329 return test_bit(dss, gt->fuse_topo.c_dss_mask); 330 } 331