1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "i915_drv.h" 7 #include "intel_engine_regs.h" 8 #include "intel_gt_regs.h" 9 #include "intel_sseu.h" 10 11 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, 12 u8 max_subslices, u8 max_eus_per_subslice) 13 { 14 sseu->max_slices = max_slices; 15 sseu->max_subslices = max_subslices; 16 sseu->max_eus_per_subslice = max_eus_per_subslice; 17 18 sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); 19 GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); 20 sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); 21 GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); 22 } 23 24 unsigned int 25 intel_sseu_subslice_total(const struct sseu_dev_info *sseu) 26 { 27 unsigned int i, total = 0; 28 29 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) 30 total += hweight8(sseu->subslice_mask[i]); 31 32 return total; 33 } 34 35 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) 36 { 37 int i, offset = slice * sseu->ss_stride; 38 u32 mask = 0; 39 40 GEM_BUG_ON(slice >= sseu->max_slices); 41 42 for (i = 0; i < sseu->ss_stride; i++) 43 mask |= (u32)sseu->subslice_mask[offset + i] << 44 i * BITS_PER_BYTE; 45 46 return mask; 47 } 48 49 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, 50 u8 *subslice_mask, u32 ss_mask) 51 { 52 int offset = slice * sseu->ss_stride; 53 54 memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); 55 } 56 57 unsigned int 58 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) 59 { 60 return hweight32(intel_sseu_get_subslices(sseu, slice)); 61 } 62 63 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, 64 int subslice) 65 { 66 int slice_stride = sseu->max_subslices * sseu->eu_stride; 67 68 return slice * slice_stride + subslice * sseu->eu_stride; 69 } 70 71 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, 72 int subslice) 73 { 74 int i, offset = sseu_eu_idx(sseu, slice, subslice); 75 u16 eu_mask = 0; 76 77 for (i = 0; i < sseu->eu_stride; i++) 78 eu_mask |= 79 ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); 80 81 return eu_mask; 82 } 83 84 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, 85 u16 eu_mask) 86 { 87 int i, offset = sseu_eu_idx(sseu, slice, subslice); 88 89 for (i = 0; i < sseu->eu_stride; i++) 90 sseu->eu_mask[offset + i] = 91 (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; 92 } 93 94 static u16 compute_eu_total(const struct sseu_dev_info *sseu) 95 { 96 u16 i, total = 0; 97 98 for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) 99 total += hweight8(sseu->eu_mask[i]); 100 101 return total; 102 } 103 104 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) 105 { 106 u32 ss_mask; 107 108 ss_mask = ss_en >> (s * sseu->max_subslices); 109 ss_mask &= GENMASK(sseu->max_subslices - 1, 0); 110 111 return ss_mask; 112 } 113 114 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, 115 u32 g_ss_en, u32 c_ss_en, u16 eu_en) 116 { 117 int s, ss; 118 119 /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ 120 GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > 121 sizeof(g_ss_en) * BITS_PER_BYTE); 122 123 for (s = 0; s < sseu->max_slices; s++) { 124 if ((s_en & BIT(s)) == 0) 125 continue; 126 127 sseu->slice_mask |= BIT(s); 128 129 /* 130 * XeHP introduces the concept of compute vs geometry DSS. To 131 * reduce variation between GENs around subslice usage, store a 132 * mask for both the geometry and compute enabled masks since 133 * userspace will need to be able to query these masks 134 * independently. Also compute a total enabled subslice count 135 * for the purposes of selecting subslices to use in a 136 * particular GEM context. 137 */ 138 intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, 139 get_ss_stride_mask(sseu, s, c_ss_en)); 140 intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, 141 get_ss_stride_mask(sseu, s, g_ss_en)); 142 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 143 get_ss_stride_mask(sseu, s, 144 g_ss_en | c_ss_en)); 145 146 for (ss = 0; ss < sseu->max_subslices; ss++) 147 if (intel_sseu_has_subslice(sseu, s, ss)) 148 sseu_set_eus(sseu, s, ss, eu_en); 149 } 150 sseu->eu_per_subslice = hweight16(eu_en); 151 sseu->eu_total = compute_eu_total(sseu); 152 } 153 154 static void gen12_sseu_info_init(struct intel_gt *gt) 155 { 156 struct sseu_dev_info *sseu = >->info.sseu; 157 struct intel_uncore *uncore = gt->uncore; 158 u32 g_dss_en, c_dss_en = 0; 159 u16 eu_en = 0; 160 u8 eu_en_fuse; 161 u8 s_en; 162 int eu; 163 164 /* 165 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. 166 * Instead of splitting these, provide userspace with an array 167 * of DSS to more closely represent the hardware resource. 168 * 169 * In addition, the concept of slice has been removed in Xe_HP. 170 * To be compatible with prior generations, assume a single slice 171 * across the entire device. Then calculate out the DSS for each 172 * workload type within that software slice. 173 */ 174 if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) 175 intel_sseu_set_info(sseu, 1, 32, 16); 176 else 177 intel_sseu_set_info(sseu, 1, 6, 16); 178 179 /* 180 * As mentioned above, Xe_HP does not have the concept of a slice. 181 * Enable one for software backwards compatibility. 182 */ 183 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 184 s_en = 0x1; 185 else 186 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 187 GEN11_GT_S_ENA_MASK; 188 189 g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); 190 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 191 c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); 192 193 /* one bit per pair of EUs */ 194 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 195 eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; 196 else 197 eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 198 GEN11_EU_DIS_MASK); 199 200 for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) 201 if (eu_en_fuse & BIT(eu)) 202 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); 203 204 gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); 205 206 /* TGL only supports slice-level power gating */ 207 sseu->has_slice_pg = 1; 208 } 209 210 static void gen11_sseu_info_init(struct intel_gt *gt) 211 { 212 struct sseu_dev_info *sseu = >->info.sseu; 213 struct intel_uncore *uncore = gt->uncore; 214 u32 ss_en; 215 u8 eu_en; 216 u8 s_en; 217 218 if (IS_JSL_EHL(gt->i915)) 219 intel_sseu_set_info(sseu, 1, 4, 8); 220 else 221 intel_sseu_set_info(sseu, 1, 8, 8); 222 223 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 224 GEN11_GT_S_ENA_MASK; 225 ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); 226 227 eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 228 GEN11_EU_DIS_MASK); 229 230 gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); 231 232 /* ICL has no power gating restrictions. */ 233 sseu->has_slice_pg = 1; 234 sseu->has_subslice_pg = 1; 235 sseu->has_eu_pg = 1; 236 } 237 238 static void cherryview_sseu_info_init(struct intel_gt *gt) 239 { 240 struct sseu_dev_info *sseu = >->info.sseu; 241 u32 fuse; 242 u8 subslice_mask = 0; 243 244 fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); 245 246 sseu->slice_mask = BIT(0); 247 intel_sseu_set_info(sseu, 1, 2, 8); 248 249 if (!(fuse & CHV_FGT_DISABLE_SS0)) { 250 u8 disabled_mask = 251 ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 252 CHV_FGT_EU_DIS_SS0_R0_SHIFT) | 253 (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> 254 CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); 255 256 subslice_mask |= BIT(0); 257 sseu_set_eus(sseu, 0, 0, ~disabled_mask); 258 } 259 260 if (!(fuse & CHV_FGT_DISABLE_SS1)) { 261 u8 disabled_mask = 262 ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 263 CHV_FGT_EU_DIS_SS1_R0_SHIFT) | 264 (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> 265 CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); 266 267 subslice_mask |= BIT(1); 268 sseu_set_eus(sseu, 0, 1, ~disabled_mask); 269 } 270 271 intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); 272 273 sseu->eu_total = compute_eu_total(sseu); 274 275 /* 276 * CHV expected to always have a uniform distribution of EU 277 * across subslices. 278 */ 279 sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? 280 sseu->eu_total / 281 intel_sseu_subslice_total(sseu) : 282 0; 283 /* 284 * CHV supports subslice power gating on devices with more than 285 * one subslice, and supports EU power gating on devices with 286 * more than one EU pair per subslice. 287 */ 288 sseu->has_slice_pg = 0; 289 sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; 290 sseu->has_eu_pg = (sseu->eu_per_subslice > 2); 291 } 292 293 static void gen9_sseu_info_init(struct intel_gt *gt) 294 { 295 struct drm_i915_private *i915 = gt->i915; 296 struct intel_device_info *info = mkwrite_device_info(i915); 297 struct sseu_dev_info *sseu = >->info.sseu; 298 struct intel_uncore *uncore = gt->uncore; 299 u32 fuse2, eu_disable, subslice_mask; 300 const u8 eu_mask = 0xff; 301 int s, ss; 302 303 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 304 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 305 306 /* BXT has a single slice and at most 3 subslices. */ 307 intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, 308 IS_GEN9_LP(i915) ? 3 : 4, 8); 309 310 /* 311 * The subslice disable field is global, i.e. it applies 312 * to each of the enabled slices. 313 */ 314 subslice_mask = (1 << sseu->max_subslices) - 1; 315 subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> 316 GEN9_F2_SS_DIS_SHIFT); 317 318 /* 319 * Iterate through enabled slices and subslices to 320 * count the total enabled EU. 321 */ 322 for (s = 0; s < sseu->max_slices; s++) { 323 if (!(sseu->slice_mask & BIT(s))) 324 /* skip disabled slice */ 325 continue; 326 327 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 328 subslice_mask); 329 330 eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); 331 for (ss = 0; ss < sseu->max_subslices; ss++) { 332 int eu_per_ss; 333 u8 eu_disabled_mask; 334 335 if (!intel_sseu_has_subslice(sseu, s, ss)) 336 /* skip disabled subslice */ 337 continue; 338 339 eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; 340 341 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 342 343 eu_per_ss = sseu->max_eus_per_subslice - 344 hweight8(eu_disabled_mask); 345 346 /* 347 * Record which subslice(s) has(have) 7 EUs. we 348 * can tune the hash used to spread work among 349 * subslices if they are unbalanced. 350 */ 351 if (eu_per_ss == 7) 352 sseu->subslice_7eu[s] |= BIT(ss); 353 } 354 } 355 356 sseu->eu_total = compute_eu_total(sseu); 357 358 /* 359 * SKL is expected to always have a uniform distribution 360 * of EU across subslices with the exception that any one 361 * EU in any one subslice may be fused off for die 362 * recovery. BXT is expected to be perfectly uniform in EU 363 * distribution. 364 */ 365 sseu->eu_per_subslice = 366 intel_sseu_subslice_total(sseu) ? 367 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 368 0; 369 370 /* 371 * SKL+ supports slice power gating on devices with more than 372 * one slice, and supports EU power gating on devices with 373 * more than one EU pair per subslice. BXT+ supports subslice 374 * power gating on devices with more than one subslice, and 375 * supports EU power gating on devices with more than one EU 376 * pair per subslice. 377 */ 378 sseu->has_slice_pg = 379 !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1; 380 sseu->has_subslice_pg = 381 IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1; 382 sseu->has_eu_pg = sseu->eu_per_subslice > 2; 383 384 if (IS_GEN9_LP(i915)) { 385 #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) 386 info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; 387 388 sseu->min_eu_in_pool = 0; 389 if (info->has_pooled_eu) { 390 if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) 391 sseu->min_eu_in_pool = 3; 392 else if (IS_SS_DISABLED(1)) 393 sseu->min_eu_in_pool = 6; 394 else 395 sseu->min_eu_in_pool = 9; 396 } 397 #undef IS_SS_DISABLED 398 } 399 } 400 401 static void bdw_sseu_info_init(struct intel_gt *gt) 402 { 403 struct sseu_dev_info *sseu = >->info.sseu; 404 struct intel_uncore *uncore = gt->uncore; 405 int s, ss; 406 u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ 407 u32 eu_disable0, eu_disable1, eu_disable2; 408 409 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 410 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 411 intel_sseu_set_info(sseu, 3, 3, 8); 412 413 /* 414 * The subslice disable field is global, i.e. it applies 415 * to each of the enabled slices. 416 */ 417 subslice_mask = GENMASK(sseu->max_subslices - 1, 0); 418 subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> 419 GEN8_F2_SS_DIS_SHIFT); 420 eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); 421 eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); 422 eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); 423 eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; 424 eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | 425 ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << 426 (32 - GEN8_EU_DIS0_S1_SHIFT)); 427 eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | 428 ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << 429 (32 - GEN8_EU_DIS1_S2_SHIFT)); 430 431 /* 432 * Iterate through enabled slices and subslices to 433 * count the total enabled EU. 434 */ 435 for (s = 0; s < sseu->max_slices; s++) { 436 if (!(sseu->slice_mask & BIT(s))) 437 /* skip disabled slice */ 438 continue; 439 440 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 441 subslice_mask); 442 443 for (ss = 0; ss < sseu->max_subslices; ss++) { 444 u8 eu_disabled_mask; 445 u32 n_disabled; 446 447 if (!intel_sseu_has_subslice(sseu, s, ss)) 448 /* skip disabled subslice */ 449 continue; 450 451 eu_disabled_mask = 452 eu_disable[s] >> (ss * sseu->max_eus_per_subslice); 453 454 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 455 456 n_disabled = hweight8(eu_disabled_mask); 457 458 /* 459 * Record which subslices have 7 EUs. 460 */ 461 if (sseu->max_eus_per_subslice - n_disabled == 7) 462 sseu->subslice_7eu[s] |= 1 << ss; 463 } 464 } 465 466 sseu->eu_total = compute_eu_total(sseu); 467 468 /* 469 * BDW is expected to always have a uniform distribution of EU across 470 * subslices with the exception that any one EU in any one subslice may 471 * be fused off for die recovery. 472 */ 473 sseu->eu_per_subslice = 474 intel_sseu_subslice_total(sseu) ? 475 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 476 0; 477 478 /* 479 * BDW supports slice power gating on devices with more than 480 * one slice. 481 */ 482 sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1; 483 sseu->has_subslice_pg = 0; 484 sseu->has_eu_pg = 0; 485 } 486 487 static void hsw_sseu_info_init(struct intel_gt *gt) 488 { 489 struct drm_i915_private *i915 = gt->i915; 490 struct sseu_dev_info *sseu = >->info.sseu; 491 u32 fuse1; 492 u8 subslice_mask = 0; 493 int s, ss; 494 495 /* 496 * There isn't a register to tell us how many slices/subslices. We 497 * work off the PCI-ids here. 498 */ 499 switch (INTEL_INFO(i915)->gt) { 500 default: 501 MISSING_CASE(INTEL_INFO(i915)->gt); 502 fallthrough; 503 case 1: 504 sseu->slice_mask = BIT(0); 505 subslice_mask = BIT(0); 506 break; 507 case 2: 508 sseu->slice_mask = BIT(0); 509 subslice_mask = BIT(0) | BIT(1); 510 break; 511 case 3: 512 sseu->slice_mask = BIT(0) | BIT(1); 513 subslice_mask = BIT(0) | BIT(1); 514 break; 515 } 516 517 fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); 518 switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) { 519 default: 520 MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)); 521 fallthrough; 522 case HSW_F1_EU_DIS_10EUS: 523 sseu->eu_per_subslice = 10; 524 break; 525 case HSW_F1_EU_DIS_8EUS: 526 sseu->eu_per_subslice = 8; 527 break; 528 case HSW_F1_EU_DIS_6EUS: 529 sseu->eu_per_subslice = 6; 530 break; 531 } 532 533 intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), 534 hweight8(subslice_mask), 535 sseu->eu_per_subslice); 536 537 for (s = 0; s < sseu->max_slices; s++) { 538 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 539 subslice_mask); 540 541 for (ss = 0; ss < sseu->max_subslices; ss++) { 542 sseu_set_eus(sseu, s, ss, 543 (1UL << sseu->eu_per_subslice) - 1); 544 } 545 } 546 547 sseu->eu_total = compute_eu_total(sseu); 548 549 /* No powergating for you. */ 550 sseu->has_slice_pg = 0; 551 sseu->has_subslice_pg = 0; 552 sseu->has_eu_pg = 0; 553 } 554 555 void intel_sseu_info_init(struct intel_gt *gt) 556 { 557 struct drm_i915_private *i915 = gt->i915; 558 559 if (IS_HASWELL(i915)) 560 hsw_sseu_info_init(gt); 561 else if (IS_CHERRYVIEW(i915)) 562 cherryview_sseu_info_init(gt); 563 else if (IS_BROADWELL(i915)) 564 bdw_sseu_info_init(gt); 565 else if (GRAPHICS_VER(i915) == 9) 566 gen9_sseu_info_init(gt); 567 else if (GRAPHICS_VER(i915) == 11) 568 gen11_sseu_info_init(gt); 569 else if (GRAPHICS_VER(i915) >= 12) 570 gen12_sseu_info_init(gt); 571 } 572 573 u32 intel_sseu_make_rpcs(struct intel_gt *gt, 574 const struct intel_sseu *req_sseu) 575 { 576 struct drm_i915_private *i915 = gt->i915; 577 const struct sseu_dev_info *sseu = >->info.sseu; 578 bool subslice_pg = sseu->has_subslice_pg; 579 u8 slices, subslices; 580 u32 rpcs = 0; 581 582 /* 583 * No explicit RPCS request is needed to ensure full 584 * slice/subslice/EU enablement prior to Gen9. 585 */ 586 if (GRAPHICS_VER(i915) < 9) 587 return 0; 588 589 /* 590 * If i915/perf is active, we want a stable powergating configuration 591 * on the system. Use the configuration pinned by i915/perf. 592 */ 593 if (i915->perf.exclusive_stream) 594 req_sseu = &i915->perf.sseu; 595 596 slices = hweight8(req_sseu->slice_mask); 597 subslices = hweight8(req_sseu->subslice_mask); 598 599 /* 600 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits 601 * wide and Icelake has up to eight subslices, specfial programming is 602 * needed in order to correctly enable all subslices. 603 * 604 * According to documentation software must consider the configuration 605 * as 2x4x8 and hardware will translate this to 1x8x8. 606 * 607 * Furthemore, even though SScount is three bits, maximum documented 608 * value for it is four. From this some rules/restrictions follow: 609 * 610 * 1. 611 * If enabled subslice count is greater than four, two whole slices must 612 * be enabled instead. 613 * 614 * 2. 615 * When more than one slice is enabled, hardware ignores the subslice 616 * count altogether. 617 * 618 * From these restrictions it follows that it is not possible to enable 619 * a count of subslices between the SScount maximum of four restriction, 620 * and the maximum available number on a particular SKU. Either all 621 * subslices are enabled, or a count between one and four on the first 622 * slice. 623 */ 624 if (GRAPHICS_VER(i915) == 11 && 625 slices == 1 && 626 subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { 627 GEM_BUG_ON(subslices & 1); 628 629 subslice_pg = false; 630 slices *= 2; 631 } 632 633 /* 634 * Starting in Gen9, render power gating can leave 635 * slice/subslice/EU in a partially enabled state. We 636 * must make an explicit request through RPCS for full 637 * enablement. 638 */ 639 if (sseu->has_slice_pg) { 640 u32 mask, val = slices; 641 642 if (GRAPHICS_VER(i915) >= 11) { 643 mask = GEN11_RPCS_S_CNT_MASK; 644 val <<= GEN11_RPCS_S_CNT_SHIFT; 645 } else { 646 mask = GEN8_RPCS_S_CNT_MASK; 647 val <<= GEN8_RPCS_S_CNT_SHIFT; 648 } 649 650 GEM_BUG_ON(val & ~mask); 651 val &= mask; 652 653 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; 654 } 655 656 if (subslice_pg) { 657 u32 val = subslices; 658 659 val <<= GEN8_RPCS_SS_CNT_SHIFT; 660 661 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); 662 val &= GEN8_RPCS_SS_CNT_MASK; 663 664 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; 665 } 666 667 if (sseu->has_eu_pg) { 668 u32 val; 669 670 val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; 671 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); 672 val &= GEN8_RPCS_EU_MIN_MASK; 673 674 rpcs |= val; 675 676 val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; 677 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); 678 val &= GEN8_RPCS_EU_MAX_MASK; 679 680 rpcs |= val; 681 682 rpcs |= GEN8_RPCS_ENABLE; 683 } 684 685 return rpcs; 686 } 687 688 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) 689 { 690 int s; 691 692 drm_printf(p, "slice total: %u, mask=%04x\n", 693 hweight8(sseu->slice_mask), sseu->slice_mask); 694 drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); 695 for (s = 0; s < sseu->max_slices; s++) { 696 drm_printf(p, "slice%d: %u subslices, mask=%08x\n", 697 s, intel_sseu_subslices_per_slice(sseu, s), 698 intel_sseu_get_subslices(sseu, s)); 699 } 700 drm_printf(p, "EU total: %u\n", sseu->eu_total); 701 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); 702 drm_printf(p, "has slice power gating: %s\n", 703 yesno(sseu->has_slice_pg)); 704 drm_printf(p, "has subslice power gating: %s\n", 705 yesno(sseu->has_subslice_pg)); 706 drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg)); 707 } 708 709 void intel_sseu_print_topology(const struct sseu_dev_info *sseu, 710 struct drm_printer *p) 711 { 712 int s, ss; 713 714 if (sseu->max_slices == 0) { 715 drm_printf(p, "Unavailable\n"); 716 return; 717 } 718 719 for (s = 0; s < sseu->max_slices; s++) { 720 drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", 721 s, intel_sseu_subslices_per_slice(sseu, s), 722 intel_sseu_get_subslices(sseu, s)); 723 724 for (ss = 0; ss < sseu->max_subslices; ss++) { 725 u16 enabled_eus = sseu_get_eus(sseu, s, ss); 726 727 drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", 728 ss, hweight16(enabled_eus), enabled_eus); 729 } 730 } 731 } 732 733 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) 734 { 735 u16 slice_mask = 0; 736 int i; 737 738 WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); 739 740 for (i = 0; dss_mask; i++) { 741 if (dss_mask & GENMASK(dss_per_slice - 1, 0)) 742 slice_mask |= BIT(i); 743 744 dss_mask >>= dss_per_slice; 745 } 746 747 return slice_mask; 748 } 749 750