1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/string_helpers.h> 7 8 #include <drm/drm_print.h> 9 10 #include "i915_drv.h" 11 #include "i915_perf_types.h" 12 #include "intel_engine_regs.h" 13 #include "intel_gt_regs.h" 14 #include "intel_sseu.h" 15 16 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, 17 u8 max_subslices, u8 max_eus_per_subslice) 18 { 19 sseu->max_slices = max_slices; 20 sseu->max_subslices = max_subslices; 21 sseu->max_eus_per_subslice = max_eus_per_subslice; 22 } 23 24 unsigned int 25 intel_sseu_subslice_total(const struct sseu_dev_info *sseu) 26 { 27 unsigned int i, total = 0; 28 29 if (sseu->has_xehp_dss) 30 return bitmap_weight(sseu->subslice_mask.xehp, 31 XEHP_BITMAP_BITS(sseu->subslice_mask)); 32 33 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask.hsw); i++) 34 total += hweight8(sseu->subslice_mask.hsw[i]); 35 36 return total; 37 } 38 39 unsigned int 40 intel_sseu_get_hsw_subslices(const struct sseu_dev_info *sseu, u8 slice) 41 { 42 WARN_ON(sseu->has_xehp_dss); 43 if (WARN_ON(slice >= sseu->max_slices)) 44 return 0; 45 46 return sseu->subslice_mask.hsw[slice]; 47 } 48 49 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, 50 int subslice) 51 { 52 if (sseu->has_xehp_dss) { 53 WARN_ON(slice > 0); 54 return sseu->eu_mask.xehp[subslice]; 55 } else { 56 return sseu->eu_mask.hsw[slice][subslice]; 57 } 58 } 59 60 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, 61 u16 eu_mask) 62 { 63 GEM_WARN_ON(eu_mask && __fls(eu_mask) >= sseu->max_eus_per_subslice); 64 if (sseu->has_xehp_dss) { 65 GEM_WARN_ON(slice > 0); 66 sseu->eu_mask.xehp[subslice] = eu_mask; 67 } else { 68 sseu->eu_mask.hsw[slice][subslice] = eu_mask; 69 } 70 } 71 72 static u16 compute_eu_total(const struct sseu_dev_info *sseu) 73 { 74 int s, ss, total = 0; 75 76 for (s = 0; s < sseu->max_slices; s++) 77 for (ss = 0; ss < sseu->max_subslices; ss++) 78 if (sseu->has_xehp_dss) 79 total += hweight16(sseu->eu_mask.xehp[ss]); 80 else 81 total += hweight16(sseu->eu_mask.hsw[s][ss]); 82 83 return total; 84 } 85 86 /** 87 * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer 88 * @to: Pointer to userspace buffer to copy to 89 * @sseu: SSEU structure containing EU mask to copy 90 * 91 * Copies the EU mask to a userspace buffer in the format expected by 92 * the query ioctl's topology queries. 93 * 94 * Returns the result of the copy_to_user() operation. 95 */ 96 int intel_sseu_copy_eumask_to_user(void __user *to, 97 const struct sseu_dev_info *sseu) 98 { 99 u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {}; 100 int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); 101 int len = sseu->max_slices * sseu->max_subslices * eu_stride; 102 int s, ss, i; 103 104 for (s = 0; s < sseu->max_slices; s++) { 105 for (ss = 0; ss < sseu->max_subslices; ss++) { 106 int uapi_offset = 107 s * sseu->max_subslices * eu_stride + 108 ss * eu_stride; 109 u16 mask = sseu_get_eus(sseu, s, ss); 110 111 for (i = 0; i < eu_stride; i++) 112 eu_mask[uapi_offset + i] = 113 (mask >> (BITS_PER_BYTE * i)) & 0xff; 114 } 115 } 116 117 return copy_to_user(to, eu_mask, len); 118 } 119 120 /** 121 * intel_sseu_copy_ssmask_to_user - Copy subslice mask into a userspace buffer 122 * @to: Pointer to userspace buffer to copy to 123 * @sseu: SSEU structure containing subslice mask to copy 124 * 125 * Copies the subslice mask to a userspace buffer in the format expected by 126 * the query ioctl's topology queries. 127 * 128 * Returns the result of the copy_to_user() operation. 129 */ 130 int intel_sseu_copy_ssmask_to_user(void __user *to, 131 const struct sseu_dev_info *sseu) 132 { 133 u8 ss_mask[GEN_SS_MASK_SIZE] = {}; 134 int ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); 135 int len = sseu->max_slices * ss_stride; 136 int s, ss, i; 137 138 for (s = 0; s < sseu->max_slices; s++) { 139 for (ss = 0; ss < sseu->max_subslices; ss++) { 140 i = s * ss_stride * BITS_PER_BYTE + ss; 141 142 if (!intel_sseu_has_subslice(sseu, s, ss)) 143 continue; 144 145 ss_mask[i / BITS_PER_BYTE] |= BIT(i % BITS_PER_BYTE); 146 } 147 } 148 149 return copy_to_user(to, ss_mask, len); 150 } 151 152 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, 153 u32 ss_en, u16 eu_en) 154 { 155 u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0); 156 int ss; 157 158 sseu->slice_mask |= BIT(0); 159 sseu->subslice_mask.hsw[0] = ss_en & valid_ss_mask; 160 161 for (ss = 0; ss < sseu->max_subslices; ss++) 162 if (intel_sseu_has_subslice(sseu, 0, ss)) 163 sseu_set_eus(sseu, 0, ss, eu_en); 164 165 sseu->eu_per_subslice = hweight16(eu_en); 166 sseu->eu_total = compute_eu_total(sseu); 167 } 168 169 static void xehp_compute_sseu_info(struct sseu_dev_info *sseu, 170 u16 eu_en) 171 { 172 int ss; 173 174 sseu->slice_mask |= BIT(0); 175 176 bitmap_or(sseu->subslice_mask.xehp, 177 sseu->compute_subslice_mask.xehp, 178 sseu->geometry_subslice_mask.xehp, 179 XEHP_BITMAP_BITS(sseu->subslice_mask)); 180 181 for (ss = 0; ss < sseu->max_subslices; ss++) 182 if (intel_sseu_has_subslice(sseu, 0, ss)) 183 sseu_set_eus(sseu, 0, ss, eu_en); 184 185 sseu->eu_per_subslice = hweight16(eu_en); 186 sseu->eu_total = compute_eu_total(sseu); 187 } 188 189 static void 190 xehp_load_dss_mask(struct intel_uncore *uncore, 191 intel_sseu_ss_mask_t *ssmask, 192 int numregs, 193 ...) 194 { 195 va_list argp; 196 u32 fuse_val[I915_MAX_SS_FUSE_REGS] = {}; 197 int i; 198 199 if (WARN_ON(numregs > I915_MAX_SS_FUSE_REGS)) 200 numregs = I915_MAX_SS_FUSE_REGS; 201 202 va_start(argp, numregs); 203 for (i = 0; i < numregs; i++) 204 fuse_val[i] = intel_uncore_read(uncore, va_arg(argp, i915_reg_t)); 205 va_end(argp); 206 207 bitmap_from_arr32(ssmask->xehp, fuse_val, numregs * 32); 208 } 209 210 static void xehp_sseu_info_init(struct intel_gt *gt) 211 { 212 struct sseu_dev_info *sseu = >->info.sseu; 213 struct intel_uncore *uncore = gt->uncore; 214 u16 eu_en = 0; 215 u8 eu_en_fuse; 216 int num_compute_regs, num_geometry_regs; 217 int eu; 218 219 num_geometry_regs = 1; 220 num_compute_regs = 1; 221 222 /* 223 * The concept of slice has been removed in Xe_HP. To be compatible 224 * with prior generations, assume a single slice across the entire 225 * device. Then calculate out the DSS for each workload type within 226 * that software slice. 227 */ 228 intel_sseu_set_info(sseu, 1, 229 32 * max(num_geometry_regs, num_compute_regs), 230 HAS_ONE_EU_PER_FUSE_BIT(gt->i915) ? 8 : 16); 231 sseu->has_xehp_dss = 1; 232 233 xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, 234 num_geometry_regs, 235 GEN12_GT_GEOMETRY_DSS_ENABLE); 236 xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, 237 num_compute_regs, 238 GEN12_GT_COMPUTE_DSS_ENABLE, 239 XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); 240 241 eu_en_fuse = REG_FIELD_GET(XEHP_EU_ENA_MASK, 242 intel_uncore_read(uncore, XEHP_EU_ENABLE)); 243 244 if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915)) 245 eu_en = eu_en_fuse; 246 else 247 for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) 248 if (eu_en_fuse & BIT(eu)) 249 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); 250 251 xehp_compute_sseu_info(sseu, eu_en); 252 } 253 254 static void gen12_sseu_info_init(struct intel_gt *gt) 255 { 256 struct sseu_dev_info *sseu = >->info.sseu; 257 struct intel_uncore *uncore = gt->uncore; 258 u32 g_dss_en; 259 u16 eu_en = 0; 260 u8 eu_en_fuse; 261 u8 s_en; 262 int eu; 263 264 /* 265 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. 266 * Instead of splitting these, provide userspace with an array 267 * of DSS to more closely represent the hardware resource. 268 */ 269 intel_sseu_set_info(sseu, 1, 6, 16); 270 271 /* 272 * Although gen12 architecture supported multiple slices, TGL, RKL, 273 * DG1, and ADL only had a single slice. 274 */ 275 s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK, 276 intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE)); 277 drm_WARN_ON(>->i915->drm, s_en != 0x1); 278 279 g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); 280 281 /* one bit per pair of EUs */ 282 eu_en_fuse = ~REG_FIELD_GET(GEN11_EU_DIS_MASK, 283 intel_uncore_read(uncore, GEN11_EU_DISABLE)); 284 285 for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) 286 if (eu_en_fuse & BIT(eu)) 287 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); 288 289 gen11_compute_sseu_info(sseu, g_dss_en, eu_en); 290 291 /* TGL only supports slice-level power gating */ 292 sseu->has_slice_pg = 1; 293 } 294 295 static void gen11_sseu_info_init(struct intel_gt *gt) 296 { 297 struct sseu_dev_info *sseu = >->info.sseu; 298 struct intel_uncore *uncore = gt->uncore; 299 u32 ss_en; 300 u8 eu_en; 301 u8 s_en; 302 303 if (IS_JASPERLAKE(gt->i915) || IS_ELKHARTLAKE(gt->i915)) 304 intel_sseu_set_info(sseu, 1, 4, 8); 305 else 306 intel_sseu_set_info(sseu, 1, 8, 8); 307 308 /* 309 * Although gen11 architecture supported multiple slices, ICL and 310 * EHL/JSL only had a single slice in practice. 311 */ 312 s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK, 313 intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE)); 314 drm_WARN_ON(>->i915->drm, s_en != 0x1); 315 316 ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); 317 318 eu_en = ~REG_FIELD_GET(GEN11_EU_DIS_MASK, 319 intel_uncore_read(uncore, GEN11_EU_DISABLE)); 320 321 gen11_compute_sseu_info(sseu, ss_en, eu_en); 322 323 /* ICL has no power gating restrictions. */ 324 sseu->has_slice_pg = 1; 325 sseu->has_subslice_pg = 1; 326 sseu->has_eu_pg = 1; 327 } 328 329 static void cherryview_sseu_info_init(struct intel_gt *gt) 330 { 331 struct sseu_dev_info *sseu = >->info.sseu; 332 u32 fuse; 333 334 fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); 335 336 sseu->slice_mask = BIT(0); 337 intel_sseu_set_info(sseu, 1, 2, 8); 338 339 if (!(fuse & CHV_FGT_DISABLE_SS0)) { 340 u8 disabled_mask = 341 REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R0_MASK, fuse) | 342 REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS0_R0_MASK); 343 344 sseu->subslice_mask.hsw[0] |= BIT(0); 345 sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF); 346 } 347 348 if (!(fuse & CHV_FGT_DISABLE_SS1)) { 349 u8 disabled_mask = 350 REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R0_MASK, fuse) | 351 REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS1_R0_MASK); 352 353 sseu->subslice_mask.hsw[0] |= BIT(1); 354 sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF); 355 } 356 357 sseu->eu_total = compute_eu_total(sseu); 358 359 /* 360 * CHV expected to always have a uniform distribution of EU 361 * across subslices. 362 */ 363 sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? 364 sseu->eu_total / 365 intel_sseu_subslice_total(sseu) : 366 0; 367 /* 368 * CHV supports subslice power gating on devices with more than 369 * one subslice, and supports EU power gating on devices with 370 * more than one EU pair per subslice. 371 */ 372 sseu->has_slice_pg = 0; 373 sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; 374 sseu->has_eu_pg = (sseu->eu_per_subslice > 2); 375 } 376 377 static void gen9_sseu_info_init(struct intel_gt *gt) 378 { 379 struct drm_i915_private *i915 = gt->i915; 380 struct sseu_dev_info *sseu = >->info.sseu; 381 struct intel_uncore *uncore = gt->uncore; 382 u32 fuse2, eu_disable, subslice_mask; 383 const u8 eu_mask = 0xff; 384 int s, ss; 385 386 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 387 sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2); 388 389 /* BXT has a single slice and at most 3 subslices. */ 390 intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, 391 IS_GEN9_LP(i915) ? 3 : 4, 8); 392 393 /* 394 * The subslice disable field is global, i.e. it applies 395 * to each of the enabled slices. 396 */ 397 subslice_mask = (1 << sseu->max_subslices) - 1; 398 subslice_mask &= ~REG_FIELD_GET(GEN9_F2_SS_DIS_MASK, fuse2); 399 400 /* 401 * Iterate through enabled slices and subslices to 402 * count the total enabled EU. 403 */ 404 for (s = 0; s < sseu->max_slices; s++) { 405 if (!(sseu->slice_mask & BIT(s))) 406 /* skip disabled slice */ 407 continue; 408 409 sseu->subslice_mask.hsw[s] = subslice_mask; 410 411 eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); 412 for (ss = 0; ss < sseu->max_subslices; ss++) { 413 int eu_per_ss; 414 u8 eu_disabled_mask; 415 416 if (!intel_sseu_has_subslice(sseu, s, ss)) 417 /* skip disabled subslice */ 418 continue; 419 420 eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; 421 422 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & eu_mask); 423 424 eu_per_ss = sseu->max_eus_per_subslice - 425 hweight8(eu_disabled_mask); 426 427 /* 428 * Record which subslice(s) has(have) 7 EUs. we 429 * can tune the hash used to spread work among 430 * subslices if they are unbalanced. 431 */ 432 if (eu_per_ss == 7) 433 sseu->subslice_7eu[s] |= BIT(ss); 434 } 435 } 436 437 sseu->eu_total = compute_eu_total(sseu); 438 439 /* 440 * SKL is expected to always have a uniform distribution 441 * of EU across subslices with the exception that any one 442 * EU in any one subslice may be fused off for die 443 * recovery. BXT is expected to be perfectly uniform in EU 444 * distribution. 445 */ 446 sseu->eu_per_subslice = 447 intel_sseu_subslice_total(sseu) ? 448 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 449 0; 450 451 /* 452 * SKL+ supports slice power gating on devices with more than 453 * one slice, and supports EU power gating on devices with 454 * more than one EU pair per subslice. BXT+ supports subslice 455 * power gating on devices with more than one subslice, and 456 * supports EU power gating on devices with more than one EU 457 * pair per subslice. 458 */ 459 sseu->has_slice_pg = 460 !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1; 461 sseu->has_subslice_pg = 462 IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1; 463 sseu->has_eu_pg = sseu->eu_per_subslice > 2; 464 465 if (IS_GEN9_LP(i915)) { 466 #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask.hsw[0] & BIT(ss))) 467 RUNTIME_INFO(i915)->has_pooled_eu = hweight8(sseu->subslice_mask.hsw[0]) == 3; 468 469 sseu->min_eu_in_pool = 0; 470 if (HAS_POOLED_EU(i915)) { 471 if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) 472 sseu->min_eu_in_pool = 3; 473 else if (IS_SS_DISABLED(1)) 474 sseu->min_eu_in_pool = 6; 475 else 476 sseu->min_eu_in_pool = 9; 477 } 478 #undef IS_SS_DISABLED 479 } 480 } 481 482 static void bdw_sseu_info_init(struct intel_gt *gt) 483 { 484 struct sseu_dev_info *sseu = >->info.sseu; 485 struct intel_uncore *uncore = gt->uncore; 486 int s, ss; 487 u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ 488 u32 eu_disable0, eu_disable1, eu_disable2; 489 490 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 491 sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2); 492 intel_sseu_set_info(sseu, 3, 3, 8); 493 494 /* 495 * The subslice disable field is global, i.e. it applies 496 * to each of the enabled slices. 497 */ 498 subslice_mask = GENMASK(sseu->max_subslices - 1, 0); 499 subslice_mask &= ~REG_FIELD_GET(GEN8_F2_SS_DIS_MASK, fuse2); 500 eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); 501 eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); 502 eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); 503 eu_disable[0] = 504 REG_FIELD_GET(GEN8_EU_DIS0_S0_MASK, eu_disable0); 505 eu_disable[1] = 506 REG_FIELD_GET(GEN8_EU_DIS0_S1_MASK, eu_disable0) | 507 REG_FIELD_GET(GEN8_EU_DIS1_S1_MASK, eu_disable1) << hweight32(GEN8_EU_DIS0_S1_MASK); 508 eu_disable[2] = 509 REG_FIELD_GET(GEN8_EU_DIS1_S2_MASK, eu_disable1) | 510 REG_FIELD_GET(GEN8_EU_DIS2_S2_MASK, eu_disable2) << hweight32(GEN8_EU_DIS1_S2_MASK); 511 512 /* 513 * Iterate through enabled slices and subslices to 514 * count the total enabled EU. 515 */ 516 for (s = 0; s < sseu->max_slices; s++) { 517 if (!(sseu->slice_mask & BIT(s))) 518 /* skip disabled slice */ 519 continue; 520 521 sseu->subslice_mask.hsw[s] = subslice_mask; 522 523 for (ss = 0; ss < sseu->max_subslices; ss++) { 524 u8 eu_disabled_mask; 525 u32 n_disabled; 526 527 if (!intel_sseu_has_subslice(sseu, s, ss)) 528 /* skip disabled subslice */ 529 continue; 530 531 eu_disabled_mask = 532 eu_disable[s] >> (ss * sseu->max_eus_per_subslice); 533 534 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & 0xFF); 535 536 n_disabled = hweight8(eu_disabled_mask); 537 538 /* 539 * Record which subslices have 7 EUs. 540 */ 541 if (sseu->max_eus_per_subslice - n_disabled == 7) 542 sseu->subslice_7eu[s] |= 1 << ss; 543 } 544 } 545 546 sseu->eu_total = compute_eu_total(sseu); 547 548 /* 549 * BDW is expected to always have a uniform distribution of EU across 550 * subslices with the exception that any one EU in any one subslice may 551 * be fused off for die recovery. 552 */ 553 sseu->eu_per_subslice = 554 intel_sseu_subslice_total(sseu) ? 555 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 556 0; 557 558 /* 559 * BDW supports slice power gating on devices with more than 560 * one slice. 561 */ 562 sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1; 563 sseu->has_subslice_pg = 0; 564 sseu->has_eu_pg = 0; 565 } 566 567 static void hsw_sseu_info_init(struct intel_gt *gt) 568 { 569 struct drm_i915_private *i915 = gt->i915; 570 struct sseu_dev_info *sseu = >->info.sseu; 571 u32 fuse1; 572 u8 subslice_mask = 0; 573 int s, ss; 574 575 /* 576 * There isn't a register to tell us how many slices/subslices. We 577 * work off the PCI-ids here. 578 */ 579 switch (INTEL_INFO(i915)->gt) { 580 default: 581 MISSING_CASE(INTEL_INFO(i915)->gt); 582 fallthrough; 583 case 1: 584 sseu->slice_mask = BIT(0); 585 subslice_mask = BIT(0); 586 break; 587 case 2: 588 sseu->slice_mask = BIT(0); 589 subslice_mask = BIT(0) | BIT(1); 590 break; 591 case 3: 592 sseu->slice_mask = BIT(0) | BIT(1); 593 subslice_mask = BIT(0) | BIT(1); 594 break; 595 } 596 597 fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); 598 switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) { 599 default: 600 MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)); 601 fallthrough; 602 case HSW_F1_EU_DIS_10EUS: 603 sseu->eu_per_subslice = 10; 604 break; 605 case HSW_F1_EU_DIS_8EUS: 606 sseu->eu_per_subslice = 8; 607 break; 608 case HSW_F1_EU_DIS_6EUS: 609 sseu->eu_per_subslice = 6; 610 break; 611 } 612 613 intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), 614 hweight8(subslice_mask), 615 sseu->eu_per_subslice); 616 617 for (s = 0; s < sseu->max_slices; s++) { 618 sseu->subslice_mask.hsw[s] = subslice_mask; 619 620 for (ss = 0; ss < sseu->max_subslices; ss++) { 621 sseu_set_eus(sseu, s, ss, 622 (1UL << sseu->eu_per_subslice) - 1); 623 } 624 } 625 626 sseu->eu_total = compute_eu_total(sseu); 627 628 /* No powergating for you. */ 629 sseu->has_slice_pg = 0; 630 sseu->has_subslice_pg = 0; 631 sseu->has_eu_pg = 0; 632 } 633 634 void intel_sseu_info_init(struct intel_gt *gt) 635 { 636 struct drm_i915_private *i915 = gt->i915; 637 638 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) 639 xehp_sseu_info_init(gt); 640 else if (GRAPHICS_VER(i915) >= 12) 641 gen12_sseu_info_init(gt); 642 else if (GRAPHICS_VER(i915) >= 11) 643 gen11_sseu_info_init(gt); 644 else if (GRAPHICS_VER(i915) >= 9) 645 gen9_sseu_info_init(gt); 646 else if (IS_BROADWELL(i915)) 647 bdw_sseu_info_init(gt); 648 else if (IS_CHERRYVIEW(i915)) 649 cherryview_sseu_info_init(gt); 650 else if (IS_HASWELL(i915)) 651 hsw_sseu_info_init(gt); 652 } 653 654 u32 intel_sseu_make_rpcs(struct intel_gt *gt, 655 const struct intel_sseu *req_sseu) 656 { 657 struct drm_i915_private *i915 = gt->i915; 658 const struct sseu_dev_info *sseu = >->info.sseu; 659 bool subslice_pg = sseu->has_subslice_pg; 660 u8 slices, subslices; 661 u32 rpcs = 0; 662 663 /* 664 * No explicit RPCS request is needed to ensure full 665 * slice/subslice/EU enablement prior to Gen9. 666 */ 667 if (GRAPHICS_VER(i915) < 9) 668 return 0; 669 670 /* 671 * If i915/perf is active, we want a stable powergating configuration 672 * on the system. Use the configuration pinned by i915/perf. 673 */ 674 if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream) 675 req_sseu = >->perf.sseu; 676 677 slices = hweight8(req_sseu->slice_mask); 678 subslices = hweight8(req_sseu->subslice_mask); 679 680 /* 681 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits 682 * wide and Icelake has up to eight subslices, specfial programming is 683 * needed in order to correctly enable all subslices. 684 * 685 * According to documentation software must consider the configuration 686 * as 2x4x8 and hardware will translate this to 1x8x8. 687 * 688 * Furthermore, even though SScount is three bits, maximum documented 689 * value for it is four. From this some rules/restrictions follow: 690 * 691 * 1. 692 * If enabled subslice count is greater than four, two whole slices must 693 * be enabled instead. 694 * 695 * 2. 696 * When more than one slice is enabled, hardware ignores the subslice 697 * count altogether. 698 * 699 * From these restrictions it follows that it is not possible to enable 700 * a count of subslices between the SScount maximum of four restriction, 701 * and the maximum available number on a particular SKU. Either all 702 * subslices are enabled, or a count between one and four on the first 703 * slice. 704 */ 705 if (GRAPHICS_VER(i915) == 11 && 706 slices == 1 && 707 subslices > min_t(u8, 4, hweight8(sseu->subslice_mask.hsw[0]) / 2)) { 708 GEM_BUG_ON(subslices & 1); 709 710 subslice_pg = false; 711 slices *= 2; 712 } 713 714 /* 715 * Starting in Gen9, render power gating can leave 716 * slice/subslice/EU in a partially enabled state. We 717 * must make an explicit request through RPCS for full 718 * enablement. 719 */ 720 if (sseu->has_slice_pg) { 721 u32 mask, val = slices; 722 723 if (GRAPHICS_VER(i915) >= 11) { 724 mask = GEN11_RPCS_S_CNT_MASK; 725 val <<= GEN11_RPCS_S_CNT_SHIFT; 726 } else { 727 mask = GEN8_RPCS_S_CNT_MASK; 728 val <<= GEN8_RPCS_S_CNT_SHIFT; 729 } 730 731 GEM_BUG_ON(val & ~mask); 732 val &= mask; 733 734 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; 735 } 736 737 if (subslice_pg) { 738 u32 val = subslices; 739 740 val <<= GEN8_RPCS_SS_CNT_SHIFT; 741 742 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); 743 val &= GEN8_RPCS_SS_CNT_MASK; 744 745 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; 746 } 747 748 if (sseu->has_eu_pg) { 749 u32 val; 750 751 val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; 752 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); 753 val &= GEN8_RPCS_EU_MIN_MASK; 754 755 rpcs |= val; 756 757 val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; 758 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); 759 val &= GEN8_RPCS_EU_MAX_MASK; 760 761 rpcs |= val; 762 763 rpcs |= GEN8_RPCS_ENABLE; 764 } 765 766 return rpcs; 767 } 768 769 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) 770 { 771 int s; 772 773 if (sseu->has_xehp_dss) { 774 drm_printf(p, "subslice total: %u\n", 775 intel_sseu_subslice_total(sseu)); 776 drm_printf(p, "geometry dss mask=%*pb\n", 777 XEHP_BITMAP_BITS(sseu->geometry_subslice_mask), 778 sseu->geometry_subslice_mask.xehp); 779 drm_printf(p, "compute dss mask=%*pb\n", 780 XEHP_BITMAP_BITS(sseu->compute_subslice_mask), 781 sseu->compute_subslice_mask.xehp); 782 } else { 783 drm_printf(p, "slice total: %u, mask=%04x\n", 784 hweight8(sseu->slice_mask), sseu->slice_mask); 785 drm_printf(p, "subslice total: %u\n", 786 intel_sseu_subslice_total(sseu)); 787 788 for (s = 0; s < sseu->max_slices; s++) { 789 u8 ss_mask = sseu->subslice_mask.hsw[s]; 790 791 drm_printf(p, "slice%d: %u subslices, mask=%08x\n", 792 s, hweight8(ss_mask), ss_mask); 793 } 794 } 795 796 drm_printf(p, "EU total: %u\n", sseu->eu_total); 797 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); 798 drm_printf(p, "has slice power gating: %s\n", 799 str_yes_no(sseu->has_slice_pg)); 800 drm_printf(p, "has subslice power gating: %s\n", 801 str_yes_no(sseu->has_subslice_pg)); 802 drm_printf(p, "has EU power gating: %s\n", 803 str_yes_no(sseu->has_eu_pg)); 804 } 805 806 static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu, 807 struct drm_printer *p) 808 { 809 int s, ss; 810 811 for (s = 0; s < sseu->max_slices; s++) { 812 u8 ss_mask = sseu->subslice_mask.hsw[s]; 813 814 drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", 815 s, hweight8(ss_mask), ss_mask); 816 817 for (ss = 0; ss < sseu->max_subslices; ss++) { 818 u16 enabled_eus = sseu_get_eus(sseu, s, ss); 819 820 drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", 821 ss, hweight16(enabled_eus), enabled_eus); 822 } 823 } 824 } 825 826 static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu, 827 struct drm_printer *p) 828 { 829 int dss; 830 831 for (dss = 0; dss < sseu->max_subslices; dss++) { 832 u16 enabled_eus = sseu_get_eus(sseu, 0, dss); 833 834 drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss, 835 str_yes_no(test_bit(dss, sseu->geometry_subslice_mask.xehp)), 836 str_yes_no(test_bit(dss, sseu->compute_subslice_mask.xehp)), 837 hweight16(enabled_eus), enabled_eus); 838 } 839 } 840 841 void intel_sseu_print_topology(struct drm_i915_private *i915, 842 const struct sseu_dev_info *sseu, 843 struct drm_printer *p) 844 { 845 if (sseu->max_slices == 0) 846 drm_printf(p, "Unavailable\n"); 847 else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) 848 sseu_print_xehp_topology(sseu, p); 849 else 850 sseu_print_hsw_topology(sseu, p); 851 } 852 853 void intel_sseu_print_ss_info(const char *type, 854 const struct sseu_dev_info *sseu, 855 struct seq_file *m) 856 { 857 int s; 858 859 if (sseu->has_xehp_dss) { 860 seq_printf(m, " %s Geometry DSS: %u\n", type, 861 bitmap_weight(sseu->geometry_subslice_mask.xehp, 862 XEHP_BITMAP_BITS(sseu->geometry_subslice_mask))); 863 seq_printf(m, " %s Compute DSS: %u\n", type, 864 bitmap_weight(sseu->compute_subslice_mask.xehp, 865 XEHP_BITMAP_BITS(sseu->compute_subslice_mask))); 866 } else { 867 for (s = 0; s < fls(sseu->slice_mask); s++) 868 seq_printf(m, " %s Slice%i subslices: %u\n", type, 869 s, hweight8(sseu->subslice_mask.hsw[s])); 870 } 871 } 872 873 u16 intel_slicemask_from_xehp_dssmask(intel_sseu_ss_mask_t dss_mask, 874 int dss_per_slice) 875 { 876 intel_sseu_ss_mask_t per_slice_mask = {}; 877 unsigned long slice_mask = 0; 878 int i; 879 880 WARN_ON(DIV_ROUND_UP(XEHP_BITMAP_BITS(dss_mask), dss_per_slice) > 881 8 * sizeof(slice_mask)); 882 883 bitmap_fill(per_slice_mask.xehp, dss_per_slice); 884 for (i = 0; !bitmap_empty(dss_mask.xehp, XEHP_BITMAP_BITS(dss_mask)); i++) { 885 if (bitmap_intersects(dss_mask.xehp, per_slice_mask.xehp, dss_per_slice)) 886 slice_mask |= BIT(i); 887 888 bitmap_shift_right(dss_mask.xehp, dss_mask.xehp, dss_per_slice, 889 XEHP_BITMAP_BITS(dss_mask)); 890 } 891 892 return slice_mask; 893 } 894