1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_hw_engine.h" 7 8 #include <drm/drm_managed.h> 9 10 #include "regs/xe_engine_regs.h" 11 #include "regs/xe_gt_regs.h" 12 #include "regs/xe_regs.h" 13 #include "xe_assert.h" 14 #include "xe_bo.h" 15 #include "xe_device.h" 16 #include "xe_execlist.h" 17 #include "xe_force_wake.h" 18 #include "xe_gt.h" 19 #include "xe_gt_topology.h" 20 #include "xe_hw_fence.h" 21 #include "xe_irq.h" 22 #include "xe_lrc.h" 23 #include "xe_macros.h" 24 #include "xe_mmio.h" 25 #include "xe_reg_sr.h" 26 #include "xe_rtp.h" 27 #include "xe_sched_job.h" 28 #include "xe_tuning.h" 29 #include "xe_wa.h" 30 31 #define MAX_MMIO_BASES 3 32 struct engine_info { 33 const char *name; 34 unsigned int class : 8; 35 unsigned int instance : 8; 36 enum xe_force_wake_domains domain; 37 u32 mmio_base; 38 }; 39 40 static const struct engine_info engine_infos[] = { 41 [XE_HW_ENGINE_RCS0] = { 42 .name = "rcs0", 43 .class = XE_ENGINE_CLASS_RENDER, 44 .instance = 0, 45 .domain = XE_FW_RENDER, 46 .mmio_base = RENDER_RING_BASE, 47 }, 48 [XE_HW_ENGINE_BCS0] = { 49 .name = "bcs0", 50 .class = XE_ENGINE_CLASS_COPY, 51 .instance = 0, 52 .domain = XE_FW_RENDER, 53 .mmio_base = BLT_RING_BASE, 54 }, 55 [XE_HW_ENGINE_BCS1] = { 56 .name = "bcs1", 57 .class = XE_ENGINE_CLASS_COPY, 58 .instance = 1, 59 .domain = XE_FW_RENDER, 60 .mmio_base = XEHPC_BCS1_RING_BASE, 61 }, 62 [XE_HW_ENGINE_BCS2] = { 63 .name = "bcs2", 64 .class = XE_ENGINE_CLASS_COPY, 65 .instance = 2, 66 .domain = XE_FW_RENDER, 67 .mmio_base = XEHPC_BCS2_RING_BASE, 68 }, 69 [XE_HW_ENGINE_BCS3] = { 70 .name = "bcs3", 71 .class = XE_ENGINE_CLASS_COPY, 72 .instance = 3, 73 .domain = XE_FW_RENDER, 74 .mmio_base = XEHPC_BCS3_RING_BASE, 75 }, 76 [XE_HW_ENGINE_BCS4] = { 77 .name = "bcs4", 78 .class = XE_ENGINE_CLASS_COPY, 79 .instance = 4, 80 .domain = XE_FW_RENDER, 81 .mmio_base = XEHPC_BCS4_RING_BASE, 82 }, 83 [XE_HW_ENGINE_BCS5] = { 84 .name = "bcs5", 85 .class = XE_ENGINE_CLASS_COPY, 86 .instance = 5, 87 .domain = XE_FW_RENDER, 88 .mmio_base = XEHPC_BCS5_RING_BASE, 89 }, 90 [XE_HW_ENGINE_BCS6] = { 91 .name = "bcs6", 92 .class = XE_ENGINE_CLASS_COPY, 93 .instance = 6, 94 .domain = XE_FW_RENDER, 95 .mmio_base = XEHPC_BCS6_RING_BASE, 96 }, 97 [XE_HW_ENGINE_BCS7] = { 98 .name = "bcs7", 99 .class = XE_ENGINE_CLASS_COPY, 100 .instance = 7, 101 .domain = XE_FW_RENDER, 102 .mmio_base = XEHPC_BCS7_RING_BASE, 103 }, 104 [XE_HW_ENGINE_BCS8] = { 105 .name = "bcs8", 106 .class = XE_ENGINE_CLASS_COPY, 107 .instance = 8, 108 .domain = XE_FW_RENDER, 109 .mmio_base = XEHPC_BCS8_RING_BASE, 110 }, 111 112 [XE_HW_ENGINE_VCS0] = { 113 .name = "vcs0", 114 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 115 .instance = 0, 116 .domain = XE_FW_MEDIA_VDBOX0, 117 .mmio_base = BSD_RING_BASE, 118 }, 119 [XE_HW_ENGINE_VCS1] = { 120 .name = "vcs1", 121 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 122 .instance = 1, 123 .domain = XE_FW_MEDIA_VDBOX1, 124 .mmio_base = BSD2_RING_BASE, 125 }, 126 [XE_HW_ENGINE_VCS2] = { 127 .name = "vcs2", 128 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 129 .instance = 2, 130 .domain = XE_FW_MEDIA_VDBOX2, 131 .mmio_base = BSD3_RING_BASE, 132 }, 133 [XE_HW_ENGINE_VCS3] = { 134 .name = "vcs3", 135 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 136 .instance = 3, 137 .domain = XE_FW_MEDIA_VDBOX3, 138 .mmio_base = BSD4_RING_BASE, 139 }, 140 [XE_HW_ENGINE_VCS4] = { 141 .name = "vcs4", 142 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 143 .instance = 4, 144 .domain = XE_FW_MEDIA_VDBOX4, 145 .mmio_base = XEHP_BSD5_RING_BASE, 146 }, 147 [XE_HW_ENGINE_VCS5] = { 148 .name = "vcs5", 149 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 150 .instance = 5, 151 .domain = XE_FW_MEDIA_VDBOX5, 152 .mmio_base = XEHP_BSD6_RING_BASE, 153 }, 154 [XE_HW_ENGINE_VCS6] = { 155 .name = "vcs6", 156 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 157 .instance = 6, 158 .domain = XE_FW_MEDIA_VDBOX6, 159 .mmio_base = XEHP_BSD7_RING_BASE, 160 }, 161 [XE_HW_ENGINE_VCS7] = { 162 .name = "vcs7", 163 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 164 .instance = 7, 165 .domain = XE_FW_MEDIA_VDBOX7, 166 .mmio_base = XEHP_BSD8_RING_BASE, 167 }, 168 [XE_HW_ENGINE_VECS0] = { 169 .name = "vecs0", 170 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 171 .instance = 0, 172 .domain = XE_FW_MEDIA_VEBOX0, 173 .mmio_base = VEBOX_RING_BASE, 174 }, 175 [XE_HW_ENGINE_VECS1] = { 176 .name = "vecs1", 177 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 178 .instance = 1, 179 .domain = XE_FW_MEDIA_VEBOX1, 180 .mmio_base = VEBOX2_RING_BASE, 181 }, 182 [XE_HW_ENGINE_VECS2] = { 183 .name = "vecs2", 184 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 185 .instance = 2, 186 .domain = XE_FW_MEDIA_VEBOX2, 187 .mmio_base = XEHP_VEBOX3_RING_BASE, 188 }, 189 [XE_HW_ENGINE_VECS3] = { 190 .name = "vecs3", 191 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 192 .instance = 3, 193 .domain = XE_FW_MEDIA_VEBOX3, 194 .mmio_base = XEHP_VEBOX4_RING_BASE, 195 }, 196 [XE_HW_ENGINE_CCS0] = { 197 .name = "ccs0", 198 .class = XE_ENGINE_CLASS_COMPUTE, 199 .instance = 0, 200 .domain = XE_FW_RENDER, 201 .mmio_base = COMPUTE0_RING_BASE, 202 }, 203 [XE_HW_ENGINE_CCS1] = { 204 .name = "ccs1", 205 .class = XE_ENGINE_CLASS_COMPUTE, 206 .instance = 1, 207 .domain = XE_FW_RENDER, 208 .mmio_base = COMPUTE1_RING_BASE, 209 }, 210 [XE_HW_ENGINE_CCS2] = { 211 .name = "ccs2", 212 .class = XE_ENGINE_CLASS_COMPUTE, 213 .instance = 2, 214 .domain = XE_FW_RENDER, 215 .mmio_base = COMPUTE2_RING_BASE, 216 }, 217 [XE_HW_ENGINE_CCS3] = { 218 .name = "ccs3", 219 .class = XE_ENGINE_CLASS_COMPUTE, 220 .instance = 3, 221 .domain = XE_FW_RENDER, 222 .mmio_base = COMPUTE3_RING_BASE, 223 }, 224 [XE_HW_ENGINE_GSCCS0] = { 225 .name = "gsccs0", 226 .class = XE_ENGINE_CLASS_OTHER, 227 .instance = OTHER_GSC_INSTANCE, 228 .domain = XE_FW_GSC, 229 .mmio_base = GSCCS_RING_BASE, 230 }, 231 }; 232 233 static void hw_engine_fini(struct drm_device *drm, void *arg) 234 { 235 struct xe_hw_engine *hwe = arg; 236 237 if (hwe->exl_port) 238 xe_execlist_port_destroy(hwe->exl_port); 239 xe_lrc_finish(&hwe->kernel_lrc); 240 241 xe_bo_unpin_map_no_vm(hwe->hwsp); 242 243 hwe->gt = NULL; 244 } 245 246 static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, 247 u32 val) 248 { 249 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 250 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 251 252 reg.addr += hwe->mmio_base; 253 254 xe_mmio_write32(hwe->gt, reg, val); 255 } 256 257 static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) 258 { 259 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 260 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 261 262 reg.addr += hwe->mmio_base; 263 264 return xe_mmio_read32(hwe->gt, reg); 265 } 266 267 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) 268 { 269 u32 ccs_mask = 270 xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); 271 272 if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) 273 xe_mmio_write32(hwe->gt, RCU_MODE, 274 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); 275 276 hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); 277 hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), 278 xe_bo_ggtt_addr(hwe->hwsp)); 279 hw_engine_mmio_write32(hwe, RING_MODE(0), 280 _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 281 hw_engine_mmio_write32(hwe, RING_MI_MODE(0), 282 _MASKED_BIT_DISABLE(STOP_RING)); 283 hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 284 } 285 286 void 287 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) 288 { 289 struct xe_gt *gt = hwe->gt; 290 const u8 mocs_write_idx = gt->mocs.uc_index; 291 const u8 mocs_read_idx = gt->mocs.uc_index; 292 u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | 293 REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); 294 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 295 const struct xe_rtp_entry_sr lrc_was[] = { 296 /* 297 * Some blitter commands do not have a field for MOCS, those 298 * commands will use MOCS index pointed by BLIT_CCTL. 299 * BLIT_CCTL registers are needed to be programmed to un-cached. 300 */ 301 { XE_RTP_NAME("BLIT_CCTL_default_MOCS"), 302 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), 303 ENGINE_CLASS(COPY)), 304 XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0), 305 BLIT_CCTL_DST_MOCS_MASK | 306 BLIT_CCTL_SRC_MOCS_MASK, 307 blit_cctl_val, 308 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 309 }, 310 {} 311 }; 312 313 xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc); 314 } 315 316 static void 317 hw_engine_setup_default_state(struct xe_hw_engine *hwe) 318 { 319 struct xe_gt *gt = hwe->gt; 320 struct xe_device *xe = gt_to_xe(gt); 321 /* 322 * RING_CMD_CCTL specifies the default MOCS entry that will be 323 * used by the command streamer when executing commands that 324 * don't have a way to explicitly specify a MOCS setting. 325 * The default should usually reference whichever MOCS entry 326 * corresponds to uncached behavior, although use of a WB cached 327 * entry is recommended by the spec in certain circumstances on 328 * specific platforms. 329 * Bspec: 72161 330 */ 331 const u8 mocs_write_idx = gt->mocs.uc_index; 332 const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && 333 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? 334 gt->mocs.wb_index : gt->mocs.uc_index; 335 u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | 336 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); 337 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 338 const struct xe_rtp_entry_sr engine_entries[] = { 339 { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), 340 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)), 341 XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0), 342 CMD_CCTL_WRITE_OVERRIDE_MASK | 343 CMD_CCTL_READ_OVERRIDE_MASK, 344 ring_cmd_cctl_val, 345 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 346 }, 347 /* 348 * To allow the GSC engine to go idle on MTL we need to enable 349 * idle messaging and set the hysteresis value (we use 0xA=5us 350 * as recommended in spec). On platforms after MTL this is 351 * enabled by default. 352 */ 353 { XE_RTP_NAME("MTL GSCCS IDLE MSG enable"), 354 XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)), 355 XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0), 356 IDLE_MSG_DISABLE, 357 XE_RTP_ACTION_FLAG(ENGINE_BASE)), 358 FIELD_SET(RING_PWRCTX_MAXCNT(0), 359 IDLE_WAIT_TIME, 360 0xA, 361 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 362 }, 363 {} 364 }; 365 366 xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr); 367 } 368 369 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, 370 enum xe_hw_engine_id id) 371 { 372 const struct engine_info *info; 373 374 if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name)) 375 return; 376 377 if (!(gt->info.engine_mask & BIT(id))) 378 return; 379 380 info = &engine_infos[id]; 381 382 xe_gt_assert(gt, !hwe->gt); 383 384 hwe->gt = gt; 385 hwe->class = info->class; 386 hwe->instance = info->instance; 387 hwe->mmio_base = info->mmio_base; 388 hwe->domain = info->domain; 389 hwe->name = info->name; 390 hwe->fence_irq = >->fence_irq[info->class]; 391 hwe->engine_id = id; 392 393 hwe->eclass = >->eclass[hwe->class]; 394 if (!hwe->eclass->sched_props.job_timeout_ms) { 395 hwe->eclass->sched_props.job_timeout_ms = 5 * 1000; 396 hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 397 hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 398 hwe->eclass->sched_props.timeslice_us = 1 * 1000; 399 hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN; 400 hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX; 401 hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT; 402 hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 403 hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 404 /* Record default props */ 405 hwe->eclass->defaults = hwe->eclass->sched_props; 406 } 407 408 xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); 409 xe_tuning_process_engine(hwe); 410 xe_wa_process_engine(hwe); 411 hw_engine_setup_default_state(hwe); 412 413 xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); 414 xe_reg_whitelist_process_engine(hwe); 415 } 416 417 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, 418 enum xe_hw_engine_id id) 419 { 420 struct xe_device *xe = gt_to_xe(gt); 421 struct xe_tile *tile = gt_to_tile(gt); 422 int err; 423 424 xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name); 425 xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); 426 427 xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); 428 xe_reg_sr_apply_whitelist(hwe); 429 430 hwe->hwsp = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, 431 XE_BO_CREATE_VRAM_IF_DGFX(tile) | 432 XE_BO_CREATE_GGTT_BIT); 433 if (IS_ERR(hwe->hwsp)) { 434 err = PTR_ERR(hwe->hwsp); 435 goto err_name; 436 } 437 438 err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K); 439 if (err) 440 goto err_hwsp; 441 442 if (!xe_device_uc_enabled(xe)) { 443 hwe->exl_port = xe_execlist_port_create(xe, hwe); 444 if (IS_ERR(hwe->exl_port)) { 445 err = PTR_ERR(hwe->exl_port); 446 goto err_kernel_lrc; 447 } 448 } 449 450 if (xe_device_uc_enabled(xe)) 451 xe_hw_engine_enable_ring(hwe); 452 453 /* We reserve the highest BCS instance for USM */ 454 if (xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY) 455 gt->usm.reserved_bcs_instance = hwe->instance; 456 457 err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); 458 if (err) 459 return err; 460 461 return 0; 462 463 err_kernel_lrc: 464 xe_lrc_finish(&hwe->kernel_lrc); 465 err_hwsp: 466 xe_bo_unpin_map_no_vm(hwe->hwsp); 467 err_name: 468 hwe->name = NULL; 469 470 return err; 471 } 472 473 static void hw_engine_setup_logical_mapping(struct xe_gt *gt) 474 { 475 int class; 476 477 /* FIXME: Doing a simple logical mapping that works for most hardware */ 478 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 479 struct xe_hw_engine *hwe; 480 enum xe_hw_engine_id id; 481 int logical_instance = 0; 482 483 for_each_hw_engine(hwe, gt, id) 484 if (hwe->class == class) 485 hwe->logical_instance = logical_instance++; 486 } 487 } 488 489 static void read_media_fuses(struct xe_gt *gt) 490 { 491 struct xe_device *xe = gt_to_xe(gt); 492 u32 media_fuse; 493 u16 vdbox_mask; 494 u16 vebox_mask; 495 int i, j; 496 497 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 498 499 media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE); 500 501 /* 502 * Pre-Xe_HP platforms had register bits representing absent engines, 503 * whereas Xe_HP and beyond have bits representing present engines. 504 * Invert the polarity on old platforms so that we can use common 505 * handling below. 506 */ 507 if (GRAPHICS_VERx100(xe) < 1250) 508 media_fuse = ~media_fuse; 509 510 vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse); 511 vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse); 512 513 for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { 514 if (!(gt->info.engine_mask & BIT(i))) 515 continue; 516 517 if (!(BIT(j) & vdbox_mask)) { 518 gt->info.engine_mask &= ~BIT(i); 519 drm_info(&xe->drm, "vcs%u fused off\n", j); 520 } 521 } 522 523 for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { 524 if (!(gt->info.engine_mask & BIT(i))) 525 continue; 526 527 if (!(BIT(j) & vebox_mask)) { 528 gt->info.engine_mask &= ~BIT(i); 529 drm_info(&xe->drm, "vecs%u fused off\n", j); 530 } 531 } 532 } 533 534 static void read_copy_fuses(struct xe_gt *gt) 535 { 536 struct xe_device *xe = gt_to_xe(gt); 537 u32 bcs_mask; 538 539 if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270) 540 return; 541 542 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 543 544 bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3); 545 bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); 546 547 /* BCS0 is always present; only BCS1-BCS8 may be fused off */ 548 for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { 549 if (!(gt->info.engine_mask & BIT(i))) 550 continue; 551 552 if (!(BIT(j / 2) & bcs_mask)) { 553 gt->info.engine_mask &= ~BIT(i); 554 drm_info(&xe->drm, "bcs%u fused off\n", j); 555 } 556 } 557 } 558 559 static void read_compute_fuses_from_dss(struct xe_gt *gt) 560 { 561 struct xe_device *xe = gt_to_xe(gt); 562 563 /* 564 * CCS fusing based on DSS masks only applies to platforms that can 565 * have more than one CCS. 566 */ 567 if (hweight64(gt->info.engine_mask & 568 GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1) 569 return; 570 571 /* 572 * CCS availability on Xe_HP is inferred from the presence of DSS in 573 * each quadrant. 574 */ 575 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { 576 if (!(gt->info.engine_mask & BIT(i))) 577 continue; 578 579 if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { 580 gt->info.engine_mask &= ~BIT(i); 581 drm_info(&xe->drm, "ccs%u fused off\n", j); 582 } 583 } 584 } 585 586 static void read_compute_fuses_from_reg(struct xe_gt *gt) 587 { 588 struct xe_device *xe = gt_to_xe(gt); 589 u32 ccs_mask; 590 591 ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4); 592 ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask); 593 594 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { 595 if (!(gt->info.engine_mask & BIT(i))) 596 continue; 597 598 if ((ccs_mask & BIT(j)) == 0) { 599 gt->info.engine_mask &= ~BIT(i); 600 drm_info(&xe->drm, "ccs%u fused off\n", j); 601 } 602 } 603 } 604 605 static void read_compute_fuses(struct xe_gt *gt) 606 { 607 if (GRAPHICS_VER(gt_to_xe(gt)) >= 20) 608 read_compute_fuses_from_reg(gt); 609 else 610 read_compute_fuses_from_dss(gt); 611 } 612 613 int xe_hw_engines_init_early(struct xe_gt *gt) 614 { 615 int i; 616 617 read_media_fuses(gt); 618 read_copy_fuses(gt); 619 read_compute_fuses(gt); 620 621 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); 622 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); 623 624 for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) 625 hw_engine_init_early(gt, >->hw_engines[i], i); 626 627 return 0; 628 } 629 630 int xe_hw_engines_init(struct xe_gt *gt) 631 { 632 int err; 633 struct xe_hw_engine *hwe; 634 enum xe_hw_engine_id id; 635 636 for_each_hw_engine(hwe, gt, id) { 637 err = hw_engine_init(gt, hwe, id); 638 if (err) 639 return err; 640 } 641 642 hw_engine_setup_logical_mapping(gt); 643 644 return 0; 645 } 646 647 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) 648 { 649 wake_up_all(>_to_xe(hwe->gt)->ufence_wq); 650 651 if (hwe->irq_handler) 652 hwe->irq_handler(hwe, intr_vec); 653 654 if (intr_vec & GT_RENDER_USER_INTERRUPT) 655 xe_hw_fence_irq_run(hwe->fence_irq); 656 } 657 658 /** 659 * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. 660 * @hwe: Xe HW Engine. 661 * 662 * This can be printed out in a later stage like during dev_coredump 663 * analysis. 664 * 665 * Returns: a Xe HW Engine snapshot object that must be freed by the 666 * caller, using `xe_hw_engine_snapshot_free`. 667 */ 668 struct xe_hw_engine_snapshot * 669 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) 670 { 671 struct xe_hw_engine_snapshot *snapshot; 672 int len; 673 674 if (!xe_hw_engine_is_valid(hwe)) 675 return NULL; 676 677 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 678 679 if (!snapshot) 680 return NULL; 681 682 len = strlen(hwe->name) + 1; 683 snapshot->name = kzalloc(len, GFP_ATOMIC); 684 if (snapshot->name) 685 strscpy(snapshot->name, hwe->name, len); 686 687 snapshot->class = hwe->class; 688 snapshot->logical_instance = hwe->logical_instance; 689 snapshot->forcewake.domain = hwe->domain; 690 snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), 691 hwe->domain); 692 snapshot->mmio_base = hwe->mmio_base; 693 694 snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); 695 snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, 696 RING_HWS_PGA(0)); 697 snapshot->reg.ring_execlist_status_lo = 698 hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); 699 snapshot->reg.ring_execlist_status_hi = 700 hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); 701 snapshot->reg.ring_execlist_sq_contents_lo = 702 hw_engine_mmio_read32(hwe, 703 RING_EXECLIST_SQ_CONTENTS_LO(0)); 704 snapshot->reg.ring_execlist_sq_contents_hi = 705 hw_engine_mmio_read32(hwe, 706 RING_EXECLIST_SQ_CONTENTS_HI(0)); 707 snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); 708 snapshot->reg.ring_head = 709 hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; 710 snapshot->reg.ring_tail = 711 hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; 712 snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0)); 713 snapshot->reg.ring_mi_mode = 714 hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 715 snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0)); 716 snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0)); 717 snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); 718 snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); 719 snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); 720 snapshot->reg.ring_acthd_udw = 721 hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); 722 snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); 723 snapshot->reg.ring_bbaddr_udw = 724 hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); 725 snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); 726 snapshot->reg.ring_dma_fadd_udw = 727 hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); 728 snapshot->reg.ring_dma_fadd = 729 hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); 730 snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); 731 732 if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) 733 snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE); 734 735 return snapshot; 736 } 737 738 /** 739 * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot. 740 * @snapshot: Xe HW Engine snapshot object. 741 * @p: drm_printer where it will be printed out. 742 * 743 * This function prints out a given Xe HW Engine snapshot object. 744 */ 745 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, 746 struct drm_printer *p) 747 { 748 if (!snapshot) 749 return; 750 751 drm_printf(p, "%s (physical), logical instance=%d\n", 752 snapshot->name ? snapshot->name : "", 753 snapshot->logical_instance); 754 drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", 755 snapshot->forcewake.domain, snapshot->forcewake.ref); 756 drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam); 757 drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga); 758 drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n", 759 snapshot->reg.ring_execlist_status_lo); 760 drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n", 761 snapshot->reg.ring_execlist_status_hi); 762 drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", 763 snapshot->reg.ring_execlist_sq_contents_lo); 764 drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", 765 snapshot->reg.ring_execlist_sq_contents_hi); 766 drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start); 767 drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); 768 drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); 769 drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); 770 drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode); 771 drm_printf(p, "\tRING_MODE: 0x%08x\n", 772 snapshot->reg.ring_mode); 773 drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr); 774 drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr); 775 drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr); 776 drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir); 777 drm_printf(p, "\tACTHD: 0x%08x_%08x\n", snapshot->reg.ring_acthd_udw, 778 snapshot->reg.ring_acthd); 779 drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw, 780 snapshot->reg.ring_bbaddr); 781 drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", 782 snapshot->reg.ring_dma_fadd_udw, 783 snapshot->reg.ring_dma_fadd); 784 drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr); 785 if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) 786 drm_printf(p, "\tRCU_MODE: 0x%08x\n", 787 snapshot->reg.rcu_mode); 788 } 789 790 /** 791 * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot. 792 * @snapshot: Xe HW Engine snapshot object. 793 * 794 * This function free all the memory that needed to be allocated at capture 795 * time. 796 */ 797 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot) 798 { 799 if (!snapshot) 800 return; 801 802 kfree(snapshot->name); 803 kfree(snapshot); 804 } 805 806 /** 807 * xe_hw_engine_print - Xe HW Engine Print. 808 * @hwe: Hardware Engine. 809 * @p: drm_printer. 810 * 811 * This function quickly capture a snapshot and immediately print it out. 812 */ 813 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p) 814 { 815 struct xe_hw_engine_snapshot *snapshot; 816 817 snapshot = xe_hw_engine_snapshot_capture(hwe); 818 xe_hw_engine_snapshot_print(snapshot, p); 819 xe_hw_engine_snapshot_free(snapshot); 820 } 821 822 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, 823 enum xe_engine_class engine_class) 824 { 825 u32 mask = 0; 826 enum xe_hw_engine_id id; 827 828 for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 829 if (engine_infos[id].class == engine_class && 830 gt->info.engine_mask & BIT(id)) 831 mask |= BIT(engine_infos[id].instance); 832 } 833 return mask; 834 } 835 836 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) 837 { 838 struct xe_gt *gt = hwe->gt; 839 struct xe_device *xe = gt_to_xe(gt); 840 841 if (hwe->class == XE_ENGINE_CLASS_OTHER) 842 return true; 843 844 return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && 845 hwe->instance == gt->usm.reserved_bcs_instance; 846 } 847