1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_hw_engine.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_managed.h> 11 #include <drm/drm_print.h> 12 #include <uapi/drm/xe_drm.h> 13 #include <generated/xe_wa_oob.h> 14 15 #include "regs/xe_engine_regs.h" 16 #include "regs/xe_gt_regs.h" 17 #include "regs/xe_irq_regs.h" 18 #include "xe_assert.h" 19 #include "xe_bo.h" 20 #include "xe_configfs.h" 21 #include "xe_device.h" 22 #include "xe_execlist.h" 23 #include "xe_force_wake.h" 24 #include "xe_gsc.h" 25 #include "xe_gt.h" 26 #include "xe_gt_ccs_mode.h" 27 #include "xe_gt_clock.h" 28 #include "xe_gt_printk.h" 29 #include "xe_gt_mcr.h" 30 #include "xe_gt_topology.h" 31 #include "xe_guc_capture.h" 32 #include "xe_hw_engine_group.h" 33 #include "xe_hw_fence.h" 34 #include "xe_irq.h" 35 #include "xe_lrc.h" 36 #include "xe_macros.h" 37 #include "xe_mmio.h" 38 #include "xe_reg_sr.h" 39 #include "xe_reg_whitelist.h" 40 #include "xe_rtp.h" 41 #include "xe_sched_job.h" 42 #include "xe_sriov.h" 43 #include "xe_tuning.h" 44 #include "xe_uc_fw.h" 45 #include "xe_wa.h" 46 47 #define MAX_MMIO_BASES 3 48 struct engine_info { 49 const char *name; 50 unsigned int class : 8; 51 unsigned int instance : 8; 52 unsigned int irq_offset : 8; 53 enum xe_force_wake_domains domain; 54 u32 mmio_base; 55 }; 56 57 static const struct engine_info engine_infos[] = { 58 [XE_HW_ENGINE_RCS0] = { 59 .name = "rcs0", 60 .class = XE_ENGINE_CLASS_RENDER, 61 .instance = 0, 62 .irq_offset = ilog2(INTR_RCS0), 63 .domain = XE_FW_RENDER, 64 .mmio_base = RENDER_RING_BASE, 65 }, 66 [XE_HW_ENGINE_BCS0] = { 67 .name = "bcs0", 68 .class = XE_ENGINE_CLASS_COPY, 69 .instance = 0, 70 .irq_offset = ilog2(INTR_BCS(0)), 71 .domain = XE_FW_RENDER, 72 .mmio_base = BLT_RING_BASE, 73 }, 74 [XE_HW_ENGINE_BCS1] = { 75 .name = "bcs1", 76 .class = XE_ENGINE_CLASS_COPY, 77 .instance = 1, 78 .irq_offset = ilog2(INTR_BCS(1)), 79 .domain = XE_FW_RENDER, 80 .mmio_base = XEHPC_BCS1_RING_BASE, 81 }, 82 [XE_HW_ENGINE_BCS2] = { 83 .name = "bcs2", 84 .class = XE_ENGINE_CLASS_COPY, 85 .instance = 2, 86 .irq_offset = ilog2(INTR_BCS(2)), 87 .domain = XE_FW_RENDER, 88 .mmio_base = XEHPC_BCS2_RING_BASE, 89 }, 90 [XE_HW_ENGINE_BCS3] = { 91 .name = "bcs3", 92 .class = XE_ENGINE_CLASS_COPY, 93 .instance = 3, 94 .irq_offset = ilog2(INTR_BCS(3)), 95 .domain = XE_FW_RENDER, 96 .mmio_base = XEHPC_BCS3_RING_BASE, 97 }, 98 [XE_HW_ENGINE_BCS4] = { 99 .name = "bcs4", 100 .class = XE_ENGINE_CLASS_COPY, 101 .instance = 4, 102 .irq_offset = ilog2(INTR_BCS(4)), 103 .domain = XE_FW_RENDER, 104 .mmio_base = XEHPC_BCS4_RING_BASE, 105 }, 106 [XE_HW_ENGINE_BCS5] = { 107 .name = "bcs5", 108 .class = XE_ENGINE_CLASS_COPY, 109 .instance = 5, 110 .irq_offset = ilog2(INTR_BCS(5)), 111 .domain = XE_FW_RENDER, 112 .mmio_base = XEHPC_BCS5_RING_BASE, 113 }, 114 [XE_HW_ENGINE_BCS6] = { 115 .name = "bcs6", 116 .class = XE_ENGINE_CLASS_COPY, 117 .instance = 6, 118 .irq_offset = ilog2(INTR_BCS(6)), 119 .domain = XE_FW_RENDER, 120 .mmio_base = XEHPC_BCS6_RING_BASE, 121 }, 122 [XE_HW_ENGINE_BCS7] = { 123 .name = "bcs7", 124 .class = XE_ENGINE_CLASS_COPY, 125 .irq_offset = ilog2(INTR_BCS(7)), 126 .instance = 7, 127 .domain = XE_FW_RENDER, 128 .mmio_base = XEHPC_BCS7_RING_BASE, 129 }, 130 [XE_HW_ENGINE_BCS8] = { 131 .name = "bcs8", 132 .class = XE_ENGINE_CLASS_COPY, 133 .instance = 8, 134 .irq_offset = ilog2(INTR_BCS8), 135 .domain = XE_FW_RENDER, 136 .mmio_base = XEHPC_BCS8_RING_BASE, 137 }, 138 139 [XE_HW_ENGINE_VCS0] = { 140 .name = "vcs0", 141 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 142 .instance = 0, 143 .irq_offset = 32 + ilog2(INTR_VCS(0)), 144 .domain = XE_FW_MEDIA_VDBOX0, 145 .mmio_base = BSD_RING_BASE, 146 }, 147 [XE_HW_ENGINE_VCS1] = { 148 .name = "vcs1", 149 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 150 .instance = 1, 151 .irq_offset = 32 + ilog2(INTR_VCS(1)), 152 .domain = XE_FW_MEDIA_VDBOX1, 153 .mmio_base = BSD2_RING_BASE, 154 }, 155 [XE_HW_ENGINE_VCS2] = { 156 .name = "vcs2", 157 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 158 .instance = 2, 159 .irq_offset = 32 + ilog2(INTR_VCS(2)), 160 .domain = XE_FW_MEDIA_VDBOX2, 161 .mmio_base = BSD3_RING_BASE, 162 }, 163 [XE_HW_ENGINE_VCS3] = { 164 .name = "vcs3", 165 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 166 .instance = 3, 167 .irq_offset = 32 + ilog2(INTR_VCS(3)), 168 .domain = XE_FW_MEDIA_VDBOX3, 169 .mmio_base = BSD4_RING_BASE, 170 }, 171 [XE_HW_ENGINE_VCS4] = { 172 .name = "vcs4", 173 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 174 .instance = 4, 175 .irq_offset = 32 + ilog2(INTR_VCS(4)), 176 .domain = XE_FW_MEDIA_VDBOX4, 177 .mmio_base = XEHP_BSD5_RING_BASE, 178 }, 179 [XE_HW_ENGINE_VCS5] = { 180 .name = "vcs5", 181 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 182 .instance = 5, 183 .irq_offset = 32 + ilog2(INTR_VCS(5)), 184 .domain = XE_FW_MEDIA_VDBOX5, 185 .mmio_base = XEHP_BSD6_RING_BASE, 186 }, 187 [XE_HW_ENGINE_VCS6] = { 188 .name = "vcs6", 189 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 190 .instance = 6, 191 .irq_offset = 32 + ilog2(INTR_VCS(6)), 192 .domain = XE_FW_MEDIA_VDBOX6, 193 .mmio_base = XEHP_BSD7_RING_BASE, 194 }, 195 [XE_HW_ENGINE_VCS7] = { 196 .name = "vcs7", 197 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 198 .instance = 7, 199 .irq_offset = 32 + ilog2(INTR_VCS(7)), 200 .domain = XE_FW_MEDIA_VDBOX7, 201 .mmio_base = XEHP_BSD8_RING_BASE, 202 }, 203 [XE_HW_ENGINE_VECS0] = { 204 .name = "vecs0", 205 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 206 .instance = 0, 207 .irq_offset = 32 + ilog2(INTR_VECS(0)), 208 .domain = XE_FW_MEDIA_VEBOX0, 209 .mmio_base = VEBOX_RING_BASE, 210 }, 211 [XE_HW_ENGINE_VECS1] = { 212 .name = "vecs1", 213 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 214 .instance = 1, 215 .irq_offset = 32 + ilog2(INTR_VECS(1)), 216 .domain = XE_FW_MEDIA_VEBOX1, 217 .mmio_base = VEBOX2_RING_BASE, 218 }, 219 [XE_HW_ENGINE_VECS2] = { 220 .name = "vecs2", 221 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 222 .instance = 2, 223 .irq_offset = 32 + ilog2(INTR_VECS(2)), 224 .domain = XE_FW_MEDIA_VEBOX2, 225 .mmio_base = XEHP_VEBOX3_RING_BASE, 226 }, 227 [XE_HW_ENGINE_VECS3] = { 228 .name = "vecs3", 229 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 230 .instance = 3, 231 .irq_offset = 32 + ilog2(INTR_VECS(3)), 232 .domain = XE_FW_MEDIA_VEBOX3, 233 .mmio_base = XEHP_VEBOX4_RING_BASE, 234 }, 235 [XE_HW_ENGINE_CCS0] = { 236 .name = "ccs0", 237 .class = XE_ENGINE_CLASS_COMPUTE, 238 .instance = 0, 239 .irq_offset = ilog2(INTR_CCS(0)), 240 .domain = XE_FW_RENDER, 241 .mmio_base = COMPUTE0_RING_BASE, 242 }, 243 [XE_HW_ENGINE_CCS1] = { 244 .name = "ccs1", 245 .class = XE_ENGINE_CLASS_COMPUTE, 246 .instance = 1, 247 .irq_offset = ilog2(INTR_CCS(1)), 248 .domain = XE_FW_RENDER, 249 .mmio_base = COMPUTE1_RING_BASE, 250 }, 251 [XE_HW_ENGINE_CCS2] = { 252 .name = "ccs2", 253 .class = XE_ENGINE_CLASS_COMPUTE, 254 .instance = 2, 255 .irq_offset = ilog2(INTR_CCS(2)), 256 .domain = XE_FW_RENDER, 257 .mmio_base = COMPUTE2_RING_BASE, 258 }, 259 [XE_HW_ENGINE_CCS3] = { 260 .name = "ccs3", 261 .class = XE_ENGINE_CLASS_COMPUTE, 262 .instance = 3, 263 .irq_offset = ilog2(INTR_CCS(3)), 264 .domain = XE_FW_RENDER, 265 .mmio_base = COMPUTE3_RING_BASE, 266 }, 267 [XE_HW_ENGINE_GSCCS0] = { 268 .name = "gsccs0", 269 .class = XE_ENGINE_CLASS_OTHER, 270 .instance = OTHER_GSC_INSTANCE, 271 .domain = XE_FW_GSC, 272 .mmio_base = GSCCS_RING_BASE, 273 }, 274 }; 275 276 static void hw_engine_fini(void *arg) 277 { 278 struct xe_hw_engine *hwe = arg; 279 280 if (hwe->exl_port) 281 xe_execlist_port_destroy(hwe->exl_port); 282 283 hwe->gt = NULL; 284 } 285 286 /** 287 * xe_hw_engine_mmio_write32() - Write engine register 288 * @hwe: engine 289 * @reg: register to write into 290 * @val: desired 32-bit value to write 291 * 292 * This function will write val into an engine specific register. 293 * Forcewake must be held by the caller. 294 * 295 */ 296 void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, 297 struct xe_reg reg, u32 val) 298 { 299 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 300 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 301 302 reg.addr += hwe->mmio_base; 303 304 xe_mmio_write32(&hwe->gt->mmio, reg, val); 305 } 306 307 /** 308 * xe_hw_engine_mmio_read32() - Read engine register 309 * @hwe: engine 310 * @reg: register to read from 311 * 312 * This function will read from an engine specific register. 313 * Forcewake must be held by the caller. 314 * 315 * Return: value of the 32-bit register. 316 */ 317 u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) 318 { 319 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 320 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 321 322 reg.addr += hwe->mmio_base; 323 324 return xe_mmio_read32(&hwe->gt->mmio, reg); 325 } 326 327 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) 328 { 329 u32 ccs_mask = 330 xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); 331 u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); 332 333 if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) 334 xe_mmio_write32(&hwe->gt->mmio, RCU_MODE, 335 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); 336 337 xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); 338 xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), 339 xe_bo_ggtt_addr(hwe->hwsp)); 340 341 if (xe_device_has_msix(gt_to_xe(hwe->gt))) 342 ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); 343 xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode); 344 xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), 345 _MASKED_BIT_DISABLE(STOP_RING)); 346 xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 347 } 348 349 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_device *xe, 350 const struct xe_gt *gt, 351 const struct xe_hw_engine *hwe) 352 { 353 /* 354 * Xe3p no longer supports load balance mode, so "fixed cslice" mode 355 * is automatic and no RCU_MODE programming is required. 356 */ 357 if (GRAPHICS_VER(gt_to_xe(gt)) >= 35) 358 return false; 359 360 return xe_gt_ccs_mode_enabled(gt) && 361 xe_rtp_match_first_render_or_compute(xe, gt, hwe); 362 } 363 364 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_device *xe, 365 const struct xe_gt *gt, 366 const struct xe_hw_engine *hwe) 367 { 368 if (GRAPHICS_VER(xe) < 20) 369 return false; 370 371 if (hwe->class != XE_ENGINE_CLASS_COMPUTE && 372 hwe->class != XE_ENGINE_CLASS_RENDER) 373 return false; 374 375 return xe_mmio_read32(&hwe->gt->mmio, XEHP_FUSE4) & CFEG_WMTP_DISABLE; 376 } 377 378 void 379 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) 380 { 381 struct xe_gt *gt = hwe->gt; 382 const u8 mocs_write_idx = gt->mocs.uc_index; 383 const u8 mocs_read_idx = gt->mocs.uc_index; 384 u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | 385 REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); 386 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 387 const struct xe_rtp_entry_sr lrc_setup[] = { 388 /* 389 * Some blitter commands do not have a field for MOCS, those 390 * commands will use MOCS index pointed by BLIT_CCTL. 391 * BLIT_CCTL registers are needed to be programmed to un-cached. 392 */ 393 { XE_RTP_NAME("BLIT_CCTL_default_MOCS"), 394 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), 395 ENGINE_CLASS(COPY)), 396 XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0), 397 BLIT_CCTL_DST_MOCS_MASK | 398 BLIT_CCTL_SRC_MOCS_MASK, 399 blit_cctl_val, 400 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 401 }, 402 /* Disable WMTP if HW doesn't support it */ 403 { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), 404 XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), 405 XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0), 406 PREEMPT_GPGPU_LEVEL_MASK, 407 PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), 408 XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) 409 }, 410 }; 411 412 xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc); 413 } 414 415 static void 416 hw_engine_setup_default_state(struct xe_hw_engine *hwe) 417 { 418 struct xe_gt *gt = hwe->gt; 419 struct xe_device *xe = gt_to_xe(gt); 420 /* 421 * RING_CMD_CCTL specifies the default MOCS entry that will be 422 * used by the command streamer when executing commands that 423 * don't have a way to explicitly specify a MOCS setting. 424 * The default should usually reference whichever MOCS entry 425 * corresponds to uncached behavior, although use of a WB cached 426 * entry is recommended by the spec in certain circumstances on 427 * specific platforms. 428 * Bspec: 72161 429 */ 430 const u8 mocs_write_idx = gt->mocs.uc_index; 431 const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && 432 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? 433 gt->mocs.wb_index : gt->mocs.uc_index; 434 u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | 435 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); 436 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 437 const struct xe_rtp_entry_sr engine_entries[] = { 438 { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), 439 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)), 440 XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0), 441 CMD_CCTL_WRITE_OVERRIDE_MASK | 442 CMD_CCTL_READ_OVERRIDE_MASK, 443 ring_cmd_cctl_val, 444 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 445 }, 446 /* 447 * To allow the GSC engine to go idle on MTL we need to enable 448 * idle messaging and set the hysteresis value (we use 0xA=5us 449 * as recommended in spec). On platforms after MTL this is 450 * enabled by default. 451 */ 452 { XE_RTP_NAME("MTL GSCCS IDLE MSG enable"), 453 XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)), 454 XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0), 455 IDLE_MSG_DISABLE, 456 XE_RTP_ACTION_FLAG(ENGINE_BASE)), 457 FIELD_SET(RING_PWRCTX_MAXCNT(0), 458 IDLE_WAIT_TIME, 459 0xA, 460 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 461 }, 462 /* Enable Priority Mem Read */ 463 { XE_RTP_NAME("Priority_Mem_Read"), 464 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 465 XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, 466 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 467 }, 468 /* Use Fixed slice CCS mode */ 469 { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), 470 XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), 471 XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, 472 RCU_MODE_FIXED_SLICE_CCS_MODE)) 473 }, 474 }; 475 476 xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); 477 } 478 479 static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance) 480 { 481 const struct engine_info *info; 482 enum xe_hw_engine_id id; 483 484 for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 485 info = &engine_infos[id]; 486 if (info->class == class && info->instance == instance) 487 return info; 488 } 489 490 return NULL; 491 } 492 493 static u16 get_msix_irq_offset(struct xe_gt *gt, enum xe_engine_class class) 494 { 495 /* For MSI-X, hw engines report to offset of engine instance zero */ 496 const struct engine_info *info = find_engine_info(class, 0); 497 498 xe_gt_assert(gt, info); 499 500 return info ? info->irq_offset : 0; 501 } 502 503 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, 504 enum xe_hw_engine_id id) 505 { 506 const struct engine_info *info; 507 508 if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name)) 509 return; 510 511 if (!(gt->info.engine_mask & BIT(id))) 512 return; 513 514 info = &engine_infos[id]; 515 516 xe_gt_assert(gt, !hwe->gt); 517 518 hwe->gt = gt; 519 hwe->class = info->class; 520 hwe->instance = info->instance; 521 hwe->mmio_base = info->mmio_base; 522 hwe->irq_offset = xe_device_has_msix(gt_to_xe(gt)) ? 523 get_msix_irq_offset(gt, info->class) : 524 info->irq_offset; 525 hwe->domain = info->domain; 526 hwe->name = info->name; 527 hwe->fence_irq = >->fence_irq[info->class]; 528 hwe->engine_id = id; 529 530 hwe->eclass = >->eclass[hwe->class]; 531 if (!hwe->eclass->sched_props.job_timeout_ms) { 532 hwe->eclass->sched_props.job_timeout_ms = 5 * 1000; 533 hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 534 hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 535 hwe->eclass->sched_props.timeslice_us = 1 * 1000; 536 hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN; 537 hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX; 538 hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT; 539 hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 540 hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 541 542 /* 543 * The GSC engine can accept submissions while the GSC shim is 544 * being reset, during which time the submission is stalled. In 545 * the worst case, the shim reset can take up to the maximum GSC 546 * command execution time (250ms), so the request start can be 547 * delayed by that much; the request itself can take that long 548 * without being preemptible, which means worst case it can 549 * theoretically take up to 500ms for a preemption to go through 550 * on the GSC engine. Adding to that an extra 100ms as a safety 551 * margin, we get a minimum recommended timeout of 600ms. 552 * The preempt_timeout value can't be tuned for OTHER_CLASS 553 * because the class is reserved for kernel usage, so we just 554 * need to make sure that the starting value is above that 555 * threshold; since our default value (640ms) is greater than 556 * 600ms, the only way we can go below is via a kconfig setting. 557 * If that happens, log it in dmesg and update the value. 558 */ 559 if (hwe->class == XE_ENGINE_CLASS_OTHER) { 560 const u32 min_preempt_timeout = 600 * 1000; 561 if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) { 562 hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout; 563 xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n"); 564 } 565 } 566 567 /* Record default props */ 568 hwe->eclass->defaults = hwe->eclass->sched_props; 569 } 570 571 xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); 572 xe_tuning_process_engine(hwe); 573 xe_wa_process_engine(hwe); 574 hw_engine_setup_default_state(hwe); 575 576 xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); 577 xe_reg_whitelist_process_engine(hwe); 578 } 579 580 static void adjust_idledly(struct xe_hw_engine *hwe) 581 { 582 struct xe_gt *gt = hwe->gt; 583 u32 idledly, maxcnt; 584 u32 idledly_units_ps = 8 * gt->info.timestamp_base; 585 u32 maxcnt_units_ns = 640; 586 bool inhibit_switch = 0; 587 588 if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_GT_WA(gt, 16023105232)) { 589 idledly = xe_mmio_read32(>->mmio, RING_IDLEDLY(hwe->mmio_base)); 590 maxcnt = xe_mmio_read32(>->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base)); 591 592 inhibit_switch = idledly & INHIBIT_SWITCH_UNTIL_PREEMPTED; 593 idledly = REG_FIELD_GET(IDLE_DELAY, idledly); 594 idledly = DIV_ROUND_CLOSEST(idledly * idledly_units_ps, 1000); 595 maxcnt = REG_FIELD_GET(IDLE_WAIT_TIME, maxcnt); 596 maxcnt *= maxcnt_units_ns; 597 598 if (xe_gt_WARN_ON(gt, idledly >= maxcnt || inhibit_switch)) { 599 idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * maxcnt_units_ns), 600 idledly_units_ps); 601 idledly = DIV_ROUND_CLOSEST(idledly, 1000); 602 xe_mmio_write32(>->mmio, RING_IDLEDLY(hwe->mmio_base), idledly); 603 } 604 } 605 } 606 607 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, 608 enum xe_hw_engine_id id) 609 { 610 struct xe_device *xe = gt_to_xe(gt); 611 struct xe_tile *tile = gt_to_tile(gt); 612 int err; 613 614 xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name); 615 xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); 616 617 xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); 618 619 hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, 620 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 621 XE_BO_FLAG_GGTT | 622 XE_BO_FLAG_GGTT_INVALIDATE); 623 if (IS_ERR(hwe->hwsp)) { 624 err = PTR_ERR(hwe->hwsp); 625 goto err_name; 626 } 627 628 if (!xe_device_uc_enabled(xe)) { 629 hwe->exl_port = xe_execlist_port_create(xe, hwe); 630 if (IS_ERR(hwe->exl_port)) { 631 err = PTR_ERR(hwe->exl_port); 632 goto err_hwsp; 633 } 634 } else { 635 /* GSCCS has a special interrupt for reset */ 636 if (hwe->class == XE_ENGINE_CLASS_OTHER) 637 hwe->irq_handler = xe_gsc_hwe_irq_handler; 638 639 if (!IS_SRIOV_VF(xe)) 640 xe_hw_engine_enable_ring(hwe); 641 } 642 643 /* We reserve the highest BCS instance for USM */ 644 if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) 645 gt->usm.reserved_bcs_instance = hwe->instance; 646 647 /* Ensure IDLEDLY is lower than MAXCNT */ 648 adjust_idledly(hwe); 649 650 return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); 651 652 err_hwsp: 653 xe_bo_unpin_map_no_vm(hwe->hwsp); 654 err_name: 655 hwe->name = NULL; 656 657 return err; 658 } 659 660 static void hw_engine_setup_logical_mapping(struct xe_gt *gt) 661 { 662 int class; 663 664 /* FIXME: Doing a simple logical mapping that works for most hardware */ 665 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 666 struct xe_hw_engine *hwe; 667 enum xe_hw_engine_id id; 668 int logical_instance = 0; 669 670 for_each_hw_engine(hwe, gt, id) 671 if (hwe->class == class) 672 hwe->logical_instance = logical_instance++; 673 } 674 } 675 676 static void read_media_fuses(struct xe_gt *gt) 677 { 678 struct xe_device *xe = gt_to_xe(gt); 679 u32 media_fuse; 680 u16 vdbox_mask; 681 u16 vebox_mask; 682 int i, j; 683 684 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 685 686 media_fuse = xe_mmio_read32(>->mmio, GT_VEBOX_VDBOX_DISABLE); 687 688 /* 689 * Pre-Xe_HP platforms had register bits representing absent engines, 690 * whereas Xe_HP and beyond have bits representing present engines. 691 * Invert the polarity on old platforms so that we can use common 692 * handling below. 693 */ 694 if (GRAPHICS_VERx100(xe) < 1250) 695 media_fuse = ~media_fuse; 696 697 vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse); 698 vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse); 699 700 for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { 701 if (!(gt->info.engine_mask & BIT(i))) 702 continue; 703 704 if (!(BIT(j) & vdbox_mask)) { 705 gt->info.engine_mask &= ~BIT(i); 706 xe_gt_info(gt, "vcs%u fused off\n", j); 707 } 708 } 709 710 for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { 711 if (!(gt->info.engine_mask & BIT(i))) 712 continue; 713 714 if (!(BIT(j) & vebox_mask)) { 715 gt->info.engine_mask &= ~BIT(i); 716 xe_gt_info(gt, "vecs%u fused off\n", j); 717 } 718 } 719 } 720 721 static u32 infer_svccopy_from_meml3(struct xe_gt *gt) 722 { 723 u32 meml3 = REG_FIELD_GET(MEML3_EN_MASK, 724 xe_mmio_read32(>->mmio, MIRROR_FUSE3)); 725 u32 svccopy_mask = 0; 726 727 /* 728 * Each of the four meml3 bits determines the fusing of two service 729 * copy engines. 730 */ 731 for (int i = 0; i < 4; i++) 732 svccopy_mask |= (meml3 & BIT(i)) ? 0b11 << 2 * i : 0; 733 734 return svccopy_mask; 735 } 736 737 static u32 read_svccopy_fuses(struct xe_gt *gt) 738 { 739 return REG_FIELD_GET(FUSE_SERVICE_COPY_ENABLE_MASK, 740 xe_mmio_read32(>->mmio, SERVICE_COPY_ENABLE)); 741 } 742 743 static void read_copy_fuses(struct xe_gt *gt) 744 { 745 struct xe_device *xe = gt_to_xe(gt); 746 u32 bcs_mask; 747 748 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 749 750 if (GRAPHICS_VER(xe) >= 35) 751 bcs_mask = read_svccopy_fuses(gt); 752 else if (GRAPHICS_VERx100(xe) == 1260) 753 bcs_mask = infer_svccopy_from_meml3(gt); 754 else 755 return; 756 757 /* Only BCS1-BCS8 may be fused off */ 758 bcs_mask <<= XE_HW_ENGINE_BCS1; 759 for (int i = XE_HW_ENGINE_BCS1; i <= XE_HW_ENGINE_BCS8; ++i) { 760 if (!(gt->info.engine_mask & BIT(i))) 761 continue; 762 763 if (!(bcs_mask & BIT(i))) { 764 gt->info.engine_mask &= ~BIT(i); 765 xe_gt_info(gt, "bcs%u fused off\n", 766 i - XE_HW_ENGINE_BCS0); 767 } 768 } 769 } 770 771 static void read_compute_fuses_from_dss(struct xe_gt *gt) 772 { 773 /* 774 * CCS fusing based on DSS masks only applies to platforms that can 775 * have more than one CCS. 776 */ 777 if (hweight64(gt->info.engine_mask & 778 GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1) 779 return; 780 781 /* 782 * CCS availability on Xe_HP is inferred from the presence of DSS in 783 * each quadrant. 784 */ 785 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { 786 if (!(gt->info.engine_mask & BIT(i))) 787 continue; 788 789 if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { 790 gt->info.engine_mask &= ~BIT(i); 791 xe_gt_info(gt, "ccs%u fused off\n", j); 792 } 793 } 794 } 795 796 static void read_compute_fuses_from_reg(struct xe_gt *gt) 797 { 798 u32 ccs_mask; 799 800 ccs_mask = xe_mmio_read32(>->mmio, XEHP_FUSE4); 801 ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask); 802 803 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { 804 if (!(gt->info.engine_mask & BIT(i))) 805 continue; 806 807 if ((ccs_mask & BIT(j)) == 0) { 808 gt->info.engine_mask &= ~BIT(i); 809 xe_gt_info(gt, "ccs%u fused off\n", j); 810 } 811 } 812 } 813 814 static void read_compute_fuses(struct xe_gt *gt) 815 { 816 if (GRAPHICS_VER(gt_to_xe(gt)) >= 20) 817 read_compute_fuses_from_reg(gt); 818 else 819 read_compute_fuses_from_dss(gt); 820 } 821 822 static void check_gsc_availability(struct xe_gt *gt) 823 { 824 if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) 825 return; 826 827 /* 828 * The GSCCS is only used to communicate with the GSC FW, so if we don't 829 * have the FW there is nothing we need the engine for and can therefore 830 * skip its initialization. 831 */ 832 if (!xe_uc_fw_is_available(>->uc.gsc.fw)) { 833 gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0); 834 835 /* interrupts where previously enabled, so turn them off */ 836 xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0); 837 xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0); 838 839 xe_gt_dbg(gt, "GSC FW not used, disabling gsccs\n"); 840 } 841 } 842 843 static void check_sw_disable(struct xe_gt *gt) 844 { 845 struct xe_device *xe = gt_to_xe(gt); 846 u64 sw_allowed = xe_configfs_get_engines_allowed(to_pci_dev(xe->drm.dev)); 847 enum xe_hw_engine_id id; 848 849 for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 850 if (!(gt->info.engine_mask & BIT(id))) 851 continue; 852 853 if (!(sw_allowed & BIT(id))) { 854 gt->info.engine_mask &= ~BIT(id); 855 xe_gt_info(gt, "%s disabled via configfs\n", 856 engine_infos[id].name); 857 } 858 } 859 } 860 861 int xe_hw_engines_init_early(struct xe_gt *gt) 862 { 863 int i; 864 865 read_media_fuses(gt); 866 read_copy_fuses(gt); 867 read_compute_fuses(gt); 868 check_gsc_availability(gt); 869 check_sw_disable(gt); 870 871 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); 872 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); 873 874 for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) 875 hw_engine_init_early(gt, >->hw_engines[i], i); 876 877 return 0; 878 } 879 880 int xe_hw_engines_init(struct xe_gt *gt) 881 { 882 int err; 883 struct xe_hw_engine *hwe; 884 enum xe_hw_engine_id id; 885 886 for_each_hw_engine(hwe, gt, id) { 887 err = hw_engine_init(gt, hwe, id); 888 if (err) 889 return err; 890 } 891 892 hw_engine_setup_logical_mapping(gt); 893 err = xe_hw_engine_setup_groups(gt); 894 if (err) 895 return err; 896 897 return 0; 898 } 899 900 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) 901 { 902 wake_up_all(>_to_xe(hwe->gt)->ufence_wq); 903 904 if (hwe->irq_handler) 905 hwe->irq_handler(hwe, intr_vec); 906 907 if (intr_vec & GT_MI_USER_INTERRUPT) 908 xe_hw_fence_irq_run(hwe->fence_irq); 909 } 910 911 /** 912 * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. 913 * @hwe: Xe HW Engine. 914 * @q: The exec queue object. 915 * 916 * This can be printed out in a later stage like during dev_coredump 917 * analysis. 918 * 919 * Returns: a Xe HW Engine snapshot object that must be freed by the 920 * caller, using `xe_hw_engine_snapshot_free`. 921 */ 922 struct xe_hw_engine_snapshot * 923 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q) 924 { 925 struct xe_hw_engine_snapshot *snapshot; 926 struct __guc_capture_parsed_output *node; 927 928 if (!xe_hw_engine_is_valid(hwe)) 929 return NULL; 930 931 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 932 933 if (!snapshot) 934 return NULL; 935 936 snapshot->name = kstrdup(hwe->name, GFP_ATOMIC); 937 snapshot->hwe = hwe; 938 snapshot->logical_instance = hwe->logical_instance; 939 snapshot->forcewake.domain = hwe->domain; 940 snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), 941 hwe->domain); 942 snapshot->mmio_base = hwe->mmio_base; 943 snapshot->kernel_reserved = xe_hw_engine_is_reserved(hwe); 944 945 /* no more VF accessible data below this point */ 946 if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) 947 return snapshot; 948 949 if (q) { 950 /* If got guc capture, set source to GuC */ 951 node = xe_guc_capture_get_matching_and_lock(q); 952 if (node) { 953 struct xe_device *xe = gt_to_xe(hwe->gt); 954 struct xe_devcoredump *coredump = &xe->devcoredump; 955 956 coredump->snapshot.matched_node = node; 957 xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node"); 958 return snapshot; 959 } 960 } 961 962 /* otherwise, do manual capture */ 963 xe_engine_manual_capture(hwe, snapshot); 964 xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot"); 965 966 return snapshot; 967 } 968 969 /** 970 * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot. 971 * @snapshot: Xe HW Engine snapshot object. 972 * 973 * This function free all the memory that needed to be allocated at capture 974 * time. 975 */ 976 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot) 977 { 978 struct xe_gt *gt; 979 if (!snapshot) 980 return; 981 982 gt = snapshot->hwe->gt; 983 /* 984 * xe_guc_capture_put_matched_nodes is called here and from 985 * xe_devcoredump_snapshot_free, to cover the 2 calling paths 986 * of hw_engines - debugfs and devcoredump free. 987 */ 988 xe_guc_capture_put_matched_nodes(>->uc.guc); 989 990 kfree(snapshot->name); 991 kfree(snapshot); 992 } 993 994 /** 995 * xe_hw_engine_print - Xe HW Engine Print. 996 * @hwe: Hardware Engine. 997 * @p: drm_printer. 998 * 999 * This function quickly capture a snapshot and immediately print it out. 1000 */ 1001 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p) 1002 { 1003 struct xe_hw_engine_snapshot *snapshot; 1004 1005 snapshot = xe_hw_engine_snapshot_capture(hwe, NULL); 1006 xe_engine_snapshot_print(snapshot, p); 1007 xe_hw_engine_snapshot_free(snapshot); 1008 } 1009 1010 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, 1011 enum xe_engine_class engine_class) 1012 { 1013 u32 mask = 0; 1014 enum xe_hw_engine_id id; 1015 1016 for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 1017 if (engine_infos[id].class == engine_class && 1018 gt->info.engine_mask & BIT(id)) 1019 mask |= BIT(engine_infos[id].instance); 1020 } 1021 return mask; 1022 } 1023 1024 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) 1025 { 1026 struct xe_gt *gt = hwe->gt; 1027 struct xe_device *xe = gt_to_xe(gt); 1028 1029 if (hwe->class == XE_ENGINE_CLASS_OTHER) 1030 return true; 1031 1032 /* Check for engines disabled by ccs_mode setting */ 1033 if (xe_gt_ccs_mode_enabled(gt) && 1034 hwe->class == XE_ENGINE_CLASS_COMPUTE && 1035 hwe->logical_instance >= gt->ccs_mode) 1036 return true; 1037 1038 return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY && 1039 hwe->instance == gt->usm.reserved_bcs_instance; 1040 } 1041 1042 const char *xe_hw_engine_class_to_str(enum xe_engine_class class) 1043 { 1044 switch (class) { 1045 case XE_ENGINE_CLASS_RENDER: 1046 return "rcs"; 1047 case XE_ENGINE_CLASS_VIDEO_DECODE: 1048 return "vcs"; 1049 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 1050 return "vecs"; 1051 case XE_ENGINE_CLASS_COPY: 1052 return "bcs"; 1053 case XE_ENGINE_CLASS_OTHER: 1054 return "other"; 1055 case XE_ENGINE_CLASS_COMPUTE: 1056 return "ccs"; 1057 case XE_ENGINE_CLASS_MAX: 1058 break; 1059 } 1060 1061 return NULL; 1062 } 1063 1064 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe) 1065 { 1066 return xe_mmio_read64_2x32(&hwe->gt->mmio, RING_TIMESTAMP(hwe->mmio_base)); 1067 } 1068 1069 enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) 1070 { 1071 return engine_infos[hwe->engine_id].domain; 1072 } 1073 1074 static const enum xe_engine_class user_to_xe_engine_class[] = { 1075 [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, 1076 [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, 1077 [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, 1078 [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, 1079 [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, 1080 }; 1081 1082 /** 1083 * xe_hw_engine_lookup() - Lookup hardware engine for class:instance 1084 * @xe: xe device 1085 * @eci: engine class and instance 1086 * 1087 * This function will find a hardware engine for given engine 1088 * class and instance. 1089 * 1090 * Return: If found xe_hw_engine pointer, NULL otherwise. 1091 */ 1092 struct xe_hw_engine * 1093 xe_hw_engine_lookup(struct xe_device *xe, 1094 struct drm_xe_engine_class_instance eci) 1095 { 1096 struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id); 1097 unsigned int idx; 1098 1099 if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) 1100 return NULL; 1101 1102 if (!gt) 1103 return NULL; 1104 1105 idx = array_index_nospec(eci.engine_class, 1106 ARRAY_SIZE(user_to_xe_engine_class)); 1107 1108 return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), 1109 user_to_xe_engine_class[idx], 1110 eci.engine_instance, true); 1111 } 1112