1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_hw_engine.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_managed.h> 11 #include <drm/drm_print.h> 12 #include <uapi/drm/xe_drm.h> 13 #include <generated/xe_wa_oob.h> 14 15 #include "regs/xe_engine_regs.h" 16 #include "regs/xe_gt_regs.h" 17 #include "regs/xe_irq_regs.h" 18 #include "xe_assert.h" 19 #include "xe_bo.h" 20 #include "xe_configfs.h" 21 #include "xe_device.h" 22 #include "xe_execlist.h" 23 #include "xe_force_wake.h" 24 #include "xe_gsc.h" 25 #include "xe_gt.h" 26 #include "xe_gt_ccs_mode.h" 27 #include "xe_gt_clock.h" 28 #include "xe_gt_printk.h" 29 #include "xe_gt_mcr.h" 30 #include "xe_gt_topology.h" 31 #include "xe_guc_capture.h" 32 #include "xe_hw_engine_group.h" 33 #include "xe_hw_fence.h" 34 #include "xe_irq.h" 35 #include "xe_lrc.h" 36 #include "xe_macros.h" 37 #include "xe_mmio.h" 38 #include "xe_reg_sr.h" 39 #include "xe_reg_whitelist.h" 40 #include "xe_rtp.h" 41 #include "xe_sched_job.h" 42 #include "xe_sriov.h" 43 #include "xe_tuning.h" 44 #include "xe_uc_fw.h" 45 #include "xe_wa.h" 46 47 #define MAX_MMIO_BASES 3 48 struct engine_info { 49 const char *name; 50 unsigned int class : 8; 51 unsigned int instance : 8; 52 unsigned int irq_offset : 8; 53 enum xe_force_wake_domains domain; 54 u32 mmio_base; 55 }; 56 57 static const struct engine_info engine_infos[] = { 58 [XE_HW_ENGINE_RCS0] = { 59 .name = "rcs0", 60 .class = XE_ENGINE_CLASS_RENDER, 61 .instance = 0, 62 .irq_offset = ilog2(INTR_RCS0), 63 .domain = XE_FW_RENDER, 64 .mmio_base = RENDER_RING_BASE, 65 }, 66 [XE_HW_ENGINE_BCS0] = { 67 .name = "bcs0", 68 .class = XE_ENGINE_CLASS_COPY, 69 .instance = 0, 70 .irq_offset = ilog2(INTR_BCS(0)), 71 .domain = XE_FW_RENDER, 72 .mmio_base = BLT_RING_BASE, 73 }, 74 [XE_HW_ENGINE_BCS1] = { 75 .name = "bcs1", 76 .class = XE_ENGINE_CLASS_COPY, 77 .instance = 1, 78 .irq_offset = ilog2(INTR_BCS(1)), 79 .domain = XE_FW_RENDER, 80 .mmio_base = XEHPC_BCS1_RING_BASE, 81 }, 82 [XE_HW_ENGINE_BCS2] = { 83 .name = "bcs2", 84 .class = XE_ENGINE_CLASS_COPY, 85 .instance = 2, 86 .irq_offset = ilog2(INTR_BCS(2)), 87 .domain = XE_FW_RENDER, 88 .mmio_base = XEHPC_BCS2_RING_BASE, 89 }, 90 [XE_HW_ENGINE_BCS3] = { 91 .name = "bcs3", 92 .class = XE_ENGINE_CLASS_COPY, 93 .instance = 3, 94 .irq_offset = ilog2(INTR_BCS(3)), 95 .domain = XE_FW_RENDER, 96 .mmio_base = XEHPC_BCS3_RING_BASE, 97 }, 98 [XE_HW_ENGINE_BCS4] = { 99 .name = "bcs4", 100 .class = XE_ENGINE_CLASS_COPY, 101 .instance = 4, 102 .irq_offset = ilog2(INTR_BCS(4)), 103 .domain = XE_FW_RENDER, 104 .mmio_base = XEHPC_BCS4_RING_BASE, 105 }, 106 [XE_HW_ENGINE_BCS5] = { 107 .name = "bcs5", 108 .class = XE_ENGINE_CLASS_COPY, 109 .instance = 5, 110 .irq_offset = ilog2(INTR_BCS(5)), 111 .domain = XE_FW_RENDER, 112 .mmio_base = XEHPC_BCS5_RING_BASE, 113 }, 114 [XE_HW_ENGINE_BCS6] = { 115 .name = "bcs6", 116 .class = XE_ENGINE_CLASS_COPY, 117 .instance = 6, 118 .irq_offset = ilog2(INTR_BCS(6)), 119 .domain = XE_FW_RENDER, 120 .mmio_base = XEHPC_BCS6_RING_BASE, 121 }, 122 [XE_HW_ENGINE_BCS7] = { 123 .name = "bcs7", 124 .class = XE_ENGINE_CLASS_COPY, 125 .irq_offset = ilog2(INTR_BCS(7)), 126 .instance = 7, 127 .domain = XE_FW_RENDER, 128 .mmio_base = XEHPC_BCS7_RING_BASE, 129 }, 130 [XE_HW_ENGINE_BCS8] = { 131 .name = "bcs8", 132 .class = XE_ENGINE_CLASS_COPY, 133 .instance = 8, 134 .irq_offset = ilog2(INTR_BCS8), 135 .domain = XE_FW_RENDER, 136 .mmio_base = XEHPC_BCS8_RING_BASE, 137 }, 138 139 [XE_HW_ENGINE_VCS0] = { 140 .name = "vcs0", 141 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 142 .instance = 0, 143 .irq_offset = 32 + ilog2(INTR_VCS(0)), 144 .domain = XE_FW_MEDIA_VDBOX0, 145 .mmio_base = BSD_RING_BASE, 146 }, 147 [XE_HW_ENGINE_VCS1] = { 148 .name = "vcs1", 149 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 150 .instance = 1, 151 .irq_offset = 32 + ilog2(INTR_VCS(1)), 152 .domain = XE_FW_MEDIA_VDBOX1, 153 .mmio_base = BSD2_RING_BASE, 154 }, 155 [XE_HW_ENGINE_VCS2] = { 156 .name = "vcs2", 157 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 158 .instance = 2, 159 .irq_offset = 32 + ilog2(INTR_VCS(2)), 160 .domain = XE_FW_MEDIA_VDBOX2, 161 .mmio_base = BSD3_RING_BASE, 162 }, 163 [XE_HW_ENGINE_VCS3] = { 164 .name = "vcs3", 165 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 166 .instance = 3, 167 .irq_offset = 32 + ilog2(INTR_VCS(3)), 168 .domain = XE_FW_MEDIA_VDBOX3, 169 .mmio_base = BSD4_RING_BASE, 170 }, 171 [XE_HW_ENGINE_VCS4] = { 172 .name = "vcs4", 173 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 174 .instance = 4, 175 .irq_offset = 32 + ilog2(INTR_VCS(4)), 176 .domain = XE_FW_MEDIA_VDBOX4, 177 .mmio_base = XEHP_BSD5_RING_BASE, 178 }, 179 [XE_HW_ENGINE_VCS5] = { 180 .name = "vcs5", 181 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 182 .instance = 5, 183 .irq_offset = 32 + ilog2(INTR_VCS(5)), 184 .domain = XE_FW_MEDIA_VDBOX5, 185 .mmio_base = XEHP_BSD6_RING_BASE, 186 }, 187 [XE_HW_ENGINE_VCS6] = { 188 .name = "vcs6", 189 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 190 .instance = 6, 191 .irq_offset = 32 + ilog2(INTR_VCS(6)), 192 .domain = XE_FW_MEDIA_VDBOX6, 193 .mmio_base = XEHP_BSD7_RING_BASE, 194 }, 195 [XE_HW_ENGINE_VCS7] = { 196 .name = "vcs7", 197 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 198 .instance = 7, 199 .irq_offset = 32 + ilog2(INTR_VCS(7)), 200 .domain = XE_FW_MEDIA_VDBOX7, 201 .mmio_base = XEHP_BSD8_RING_BASE, 202 }, 203 [XE_HW_ENGINE_VECS0] = { 204 .name = "vecs0", 205 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 206 .instance = 0, 207 .irq_offset = 32 + ilog2(INTR_VECS(0)), 208 .domain = XE_FW_MEDIA_VEBOX0, 209 .mmio_base = VEBOX_RING_BASE, 210 }, 211 [XE_HW_ENGINE_VECS1] = { 212 .name = "vecs1", 213 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 214 .instance = 1, 215 .irq_offset = 32 + ilog2(INTR_VECS(1)), 216 .domain = XE_FW_MEDIA_VEBOX1, 217 .mmio_base = VEBOX2_RING_BASE, 218 }, 219 [XE_HW_ENGINE_VECS2] = { 220 .name = "vecs2", 221 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 222 .instance = 2, 223 .irq_offset = 32 + ilog2(INTR_VECS(2)), 224 .domain = XE_FW_MEDIA_VEBOX2, 225 .mmio_base = XEHP_VEBOX3_RING_BASE, 226 }, 227 [XE_HW_ENGINE_VECS3] = { 228 .name = "vecs3", 229 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 230 .instance = 3, 231 .irq_offset = 32 + ilog2(INTR_VECS(3)), 232 .domain = XE_FW_MEDIA_VEBOX3, 233 .mmio_base = XEHP_VEBOX4_RING_BASE, 234 }, 235 [XE_HW_ENGINE_CCS0] = { 236 .name = "ccs0", 237 .class = XE_ENGINE_CLASS_COMPUTE, 238 .instance = 0, 239 .irq_offset = ilog2(INTR_CCS(0)), 240 .domain = XE_FW_RENDER, 241 .mmio_base = COMPUTE0_RING_BASE, 242 }, 243 [XE_HW_ENGINE_CCS1] = { 244 .name = "ccs1", 245 .class = XE_ENGINE_CLASS_COMPUTE, 246 .instance = 1, 247 .irq_offset = ilog2(INTR_CCS(1)), 248 .domain = XE_FW_RENDER, 249 .mmio_base = COMPUTE1_RING_BASE, 250 }, 251 [XE_HW_ENGINE_CCS2] = { 252 .name = "ccs2", 253 .class = XE_ENGINE_CLASS_COMPUTE, 254 .instance = 2, 255 .irq_offset = ilog2(INTR_CCS(2)), 256 .domain = XE_FW_RENDER, 257 .mmio_base = COMPUTE2_RING_BASE, 258 }, 259 [XE_HW_ENGINE_CCS3] = { 260 .name = "ccs3", 261 .class = XE_ENGINE_CLASS_COMPUTE, 262 .instance = 3, 263 .irq_offset = ilog2(INTR_CCS(3)), 264 .domain = XE_FW_RENDER, 265 .mmio_base = COMPUTE3_RING_BASE, 266 }, 267 [XE_HW_ENGINE_GSCCS0] = { 268 .name = "gsccs0", 269 .class = XE_ENGINE_CLASS_OTHER, 270 .instance = OTHER_GSC_INSTANCE, 271 .domain = XE_FW_GSC, 272 .mmio_base = GSCCS_RING_BASE, 273 }, 274 }; 275 276 static void hw_engine_fini(void *arg) 277 { 278 struct xe_hw_engine *hwe = arg; 279 280 if (hwe->exl_port) 281 xe_execlist_port_destroy(hwe->exl_port); 282 283 hwe->gt = NULL; 284 } 285 286 /** 287 * xe_hw_engine_mmio_write32() - Write engine register 288 * @hwe: engine 289 * @reg: register to write into 290 * @val: desired 32-bit value to write 291 * 292 * This function will write val into an engine specific register. 293 * Forcewake must be held by the caller. 294 * 295 */ 296 void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, 297 struct xe_reg reg, u32 val) 298 { 299 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 300 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 301 302 reg.addr += hwe->mmio_base; 303 304 xe_mmio_write32(&hwe->gt->mmio, reg, val); 305 } 306 307 /** 308 * xe_hw_engine_mmio_read32() - Read engine register 309 * @hwe: engine 310 * @reg: register to read from 311 * 312 * This function will read from an engine specific register. 313 * Forcewake must be held by the caller. 314 * 315 * Return: value of the 32-bit register. 316 */ 317 u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) 318 { 319 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 320 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 321 322 reg.addr += hwe->mmio_base; 323 324 return xe_mmio_read32(&hwe->gt->mmio, reg); 325 } 326 327 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) 328 { 329 u32 ccs_mask = 330 xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); 331 u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); 332 333 if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) 334 xe_mmio_write32(&hwe->gt->mmio, RCU_MODE, 335 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); 336 337 xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); 338 xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), 339 xe_bo_ggtt_addr(hwe->hwsp)); 340 341 if (xe_device_has_msix(gt_to_xe(hwe->gt))) 342 ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); 343 xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode); 344 xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), 345 _MASKED_BIT_DISABLE(STOP_RING)); 346 xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 347 } 348 349 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, 350 const struct xe_hw_engine *hwe) 351 { 352 return xe_gt_ccs_mode_enabled(gt) && 353 xe_rtp_match_first_render_or_compute(gt, hwe); 354 } 355 356 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt, 357 const struct xe_hw_engine *hwe) 358 { 359 if (GRAPHICS_VER(gt_to_xe(gt)) < 20) 360 return false; 361 362 if (hwe->class != XE_ENGINE_CLASS_COMPUTE && 363 hwe->class != XE_ENGINE_CLASS_RENDER) 364 return false; 365 366 return xe_mmio_read32(&hwe->gt->mmio, XEHP_FUSE4) & CFEG_WMTP_DISABLE; 367 } 368 369 void 370 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) 371 { 372 struct xe_gt *gt = hwe->gt; 373 const u8 mocs_write_idx = gt->mocs.uc_index; 374 const u8 mocs_read_idx = gt->mocs.uc_index; 375 u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | 376 REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); 377 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 378 const struct xe_rtp_entry_sr lrc_setup[] = { 379 /* 380 * Some blitter commands do not have a field for MOCS, those 381 * commands will use MOCS index pointed by BLIT_CCTL. 382 * BLIT_CCTL registers are needed to be programmed to un-cached. 383 */ 384 { XE_RTP_NAME("BLIT_CCTL_default_MOCS"), 385 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), 386 ENGINE_CLASS(COPY)), 387 XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0), 388 BLIT_CCTL_DST_MOCS_MASK | 389 BLIT_CCTL_SRC_MOCS_MASK, 390 blit_cctl_val, 391 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 392 }, 393 /* Disable WMTP if HW doesn't support it */ 394 { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), 395 XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), 396 XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0), 397 PREEMPT_GPGPU_LEVEL_MASK, 398 PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), 399 XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) 400 }, 401 }; 402 403 xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc); 404 } 405 406 static void 407 hw_engine_setup_default_state(struct xe_hw_engine *hwe) 408 { 409 struct xe_gt *gt = hwe->gt; 410 struct xe_device *xe = gt_to_xe(gt); 411 /* 412 * RING_CMD_CCTL specifies the default MOCS entry that will be 413 * used by the command streamer when executing commands that 414 * don't have a way to explicitly specify a MOCS setting. 415 * The default should usually reference whichever MOCS entry 416 * corresponds to uncached behavior, although use of a WB cached 417 * entry is recommended by the spec in certain circumstances on 418 * specific platforms. 419 * Bspec: 72161 420 */ 421 const u8 mocs_write_idx = gt->mocs.uc_index; 422 const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && 423 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? 424 gt->mocs.wb_index : gt->mocs.uc_index; 425 u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | 426 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); 427 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 428 const struct xe_rtp_entry_sr engine_entries[] = { 429 { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), 430 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)), 431 XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0), 432 CMD_CCTL_WRITE_OVERRIDE_MASK | 433 CMD_CCTL_READ_OVERRIDE_MASK, 434 ring_cmd_cctl_val, 435 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 436 }, 437 /* 438 * To allow the GSC engine to go idle on MTL we need to enable 439 * idle messaging and set the hysteresis value (we use 0xA=5us 440 * as recommended in spec). On platforms after MTL this is 441 * enabled by default. 442 */ 443 { XE_RTP_NAME("MTL GSCCS IDLE MSG enable"), 444 XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)), 445 XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0), 446 IDLE_MSG_DISABLE, 447 XE_RTP_ACTION_FLAG(ENGINE_BASE)), 448 FIELD_SET(RING_PWRCTX_MAXCNT(0), 449 IDLE_WAIT_TIME, 450 0xA, 451 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 452 }, 453 /* Enable Priority Mem Read */ 454 { XE_RTP_NAME("Priority_Mem_Read"), 455 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 456 XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, 457 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 458 }, 459 /* Use Fixed slice CCS mode */ 460 { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), 461 XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), 462 XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, 463 RCU_MODE_FIXED_SLICE_CCS_MODE)) 464 }, 465 }; 466 467 xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); 468 } 469 470 static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance) 471 { 472 const struct engine_info *info; 473 enum xe_hw_engine_id id; 474 475 for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 476 info = &engine_infos[id]; 477 if (info->class == class && info->instance == instance) 478 return info; 479 } 480 481 return NULL; 482 } 483 484 static u16 get_msix_irq_offset(struct xe_gt *gt, enum xe_engine_class class) 485 { 486 /* For MSI-X, hw engines report to offset of engine instance zero */ 487 const struct engine_info *info = find_engine_info(class, 0); 488 489 xe_gt_assert(gt, info); 490 491 return info ? info->irq_offset : 0; 492 } 493 494 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, 495 enum xe_hw_engine_id id) 496 { 497 const struct engine_info *info; 498 499 if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name)) 500 return; 501 502 if (!(gt->info.engine_mask & BIT(id))) 503 return; 504 505 info = &engine_infos[id]; 506 507 xe_gt_assert(gt, !hwe->gt); 508 509 hwe->gt = gt; 510 hwe->class = info->class; 511 hwe->instance = info->instance; 512 hwe->mmio_base = info->mmio_base; 513 hwe->irq_offset = xe_device_has_msix(gt_to_xe(gt)) ? 514 get_msix_irq_offset(gt, info->class) : 515 info->irq_offset; 516 hwe->domain = info->domain; 517 hwe->name = info->name; 518 hwe->fence_irq = >->fence_irq[info->class]; 519 hwe->engine_id = id; 520 521 hwe->eclass = >->eclass[hwe->class]; 522 if (!hwe->eclass->sched_props.job_timeout_ms) { 523 hwe->eclass->sched_props.job_timeout_ms = 5 * 1000; 524 hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 525 hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 526 hwe->eclass->sched_props.timeslice_us = 1 * 1000; 527 hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN; 528 hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX; 529 hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT; 530 hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 531 hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 532 533 /* 534 * The GSC engine can accept submissions while the GSC shim is 535 * being reset, during which time the submission is stalled. In 536 * the worst case, the shim reset can take up to the maximum GSC 537 * command execution time (250ms), so the request start can be 538 * delayed by that much; the request itself can take that long 539 * without being preemptible, which means worst case it can 540 * theoretically take up to 500ms for a preemption to go through 541 * on the GSC engine. Adding to that an extra 100ms as a safety 542 * margin, we get a minimum recommended timeout of 600ms. 543 * The preempt_timeout value can't be tuned for OTHER_CLASS 544 * because the class is reserved for kernel usage, so we just 545 * need to make sure that the starting value is above that 546 * threshold; since our default value (640ms) is greater than 547 * 600ms, the only way we can go below is via a kconfig setting. 548 * If that happens, log it in dmesg and update the value. 549 */ 550 if (hwe->class == XE_ENGINE_CLASS_OTHER) { 551 const u32 min_preempt_timeout = 600 * 1000; 552 if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) { 553 hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout; 554 xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n"); 555 } 556 } 557 558 /* Record default props */ 559 hwe->eclass->defaults = hwe->eclass->sched_props; 560 } 561 562 xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); 563 xe_tuning_process_engine(hwe); 564 xe_wa_process_engine(hwe); 565 hw_engine_setup_default_state(hwe); 566 567 xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); 568 xe_reg_whitelist_process_engine(hwe); 569 } 570 571 static void adjust_idledly(struct xe_hw_engine *hwe) 572 { 573 struct xe_gt *gt = hwe->gt; 574 u32 idledly, maxcnt; 575 u32 idledly_units_ps = 8 * gt->info.timestamp_base; 576 u32 maxcnt_units_ns = 640; 577 bool inhibit_switch = 0; 578 579 if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_WA(gt, 16023105232)) { 580 idledly = xe_mmio_read32(>->mmio, RING_IDLEDLY(hwe->mmio_base)); 581 maxcnt = xe_mmio_read32(>->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base)); 582 583 inhibit_switch = idledly & INHIBIT_SWITCH_UNTIL_PREEMPTED; 584 idledly = REG_FIELD_GET(IDLE_DELAY, idledly); 585 idledly = DIV_ROUND_CLOSEST(idledly * idledly_units_ps, 1000); 586 maxcnt = REG_FIELD_GET(IDLE_WAIT_TIME, maxcnt); 587 maxcnt *= maxcnt_units_ns; 588 589 if (xe_gt_WARN_ON(gt, idledly >= maxcnt || inhibit_switch)) { 590 idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * maxcnt_units_ns), 591 idledly_units_ps); 592 idledly = DIV_ROUND_CLOSEST(idledly, 1000); 593 xe_mmio_write32(>->mmio, RING_IDLEDLY(hwe->mmio_base), idledly); 594 } 595 } 596 } 597 598 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, 599 enum xe_hw_engine_id id) 600 { 601 struct xe_device *xe = gt_to_xe(gt); 602 struct xe_tile *tile = gt_to_tile(gt); 603 int err; 604 605 xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name); 606 xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); 607 608 xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); 609 610 hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, 611 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 612 XE_BO_FLAG_GGTT | 613 XE_BO_FLAG_GGTT_INVALIDATE); 614 if (IS_ERR(hwe->hwsp)) { 615 err = PTR_ERR(hwe->hwsp); 616 goto err_name; 617 } 618 619 if (!xe_device_uc_enabled(xe)) { 620 hwe->exl_port = xe_execlist_port_create(xe, hwe); 621 if (IS_ERR(hwe->exl_port)) { 622 err = PTR_ERR(hwe->exl_port); 623 goto err_hwsp; 624 } 625 } else { 626 /* GSCCS has a special interrupt for reset */ 627 if (hwe->class == XE_ENGINE_CLASS_OTHER) 628 hwe->irq_handler = xe_gsc_hwe_irq_handler; 629 630 if (!IS_SRIOV_VF(xe)) 631 xe_hw_engine_enable_ring(hwe); 632 } 633 634 /* We reserve the highest BCS instance for USM */ 635 if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) 636 gt->usm.reserved_bcs_instance = hwe->instance; 637 638 /* Ensure IDLEDLY is lower than MAXCNT */ 639 adjust_idledly(hwe); 640 641 return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); 642 643 err_hwsp: 644 xe_bo_unpin_map_no_vm(hwe->hwsp); 645 err_name: 646 hwe->name = NULL; 647 648 return err; 649 } 650 651 static void hw_engine_setup_logical_mapping(struct xe_gt *gt) 652 { 653 int class; 654 655 /* FIXME: Doing a simple logical mapping that works for most hardware */ 656 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 657 struct xe_hw_engine *hwe; 658 enum xe_hw_engine_id id; 659 int logical_instance = 0; 660 661 for_each_hw_engine(hwe, gt, id) 662 if (hwe->class == class) 663 hwe->logical_instance = logical_instance++; 664 } 665 } 666 667 static void read_media_fuses(struct xe_gt *gt) 668 { 669 struct xe_device *xe = gt_to_xe(gt); 670 u32 media_fuse; 671 u16 vdbox_mask; 672 u16 vebox_mask; 673 int i, j; 674 675 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 676 677 media_fuse = xe_mmio_read32(>->mmio, GT_VEBOX_VDBOX_DISABLE); 678 679 /* 680 * Pre-Xe_HP platforms had register bits representing absent engines, 681 * whereas Xe_HP and beyond have bits representing present engines. 682 * Invert the polarity on old platforms so that we can use common 683 * handling below. 684 */ 685 if (GRAPHICS_VERx100(xe) < 1250) 686 media_fuse = ~media_fuse; 687 688 vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse); 689 vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse); 690 691 for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { 692 if (!(gt->info.engine_mask & BIT(i))) 693 continue; 694 695 if (!(BIT(j) & vdbox_mask)) { 696 gt->info.engine_mask &= ~BIT(i); 697 xe_gt_info(gt, "vcs%u fused off\n", j); 698 } 699 } 700 701 for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { 702 if (!(gt->info.engine_mask & BIT(i))) 703 continue; 704 705 if (!(BIT(j) & vebox_mask)) { 706 gt->info.engine_mask &= ~BIT(i); 707 xe_gt_info(gt, "vecs%u fused off\n", j); 708 } 709 } 710 } 711 712 static void read_copy_fuses(struct xe_gt *gt) 713 { 714 struct xe_device *xe = gt_to_xe(gt); 715 u32 bcs_mask; 716 717 if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270) 718 return; 719 720 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 721 722 bcs_mask = xe_mmio_read32(>->mmio, MIRROR_FUSE3); 723 bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); 724 725 /* BCS0 is always present; only BCS1-BCS8 may be fused off */ 726 for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { 727 if (!(gt->info.engine_mask & BIT(i))) 728 continue; 729 730 if (!(BIT(j / 2) & bcs_mask)) { 731 gt->info.engine_mask &= ~BIT(i); 732 xe_gt_info(gt, "bcs%u fused off\n", j); 733 } 734 } 735 } 736 737 static void read_compute_fuses_from_dss(struct xe_gt *gt) 738 { 739 /* 740 * CCS fusing based on DSS masks only applies to platforms that can 741 * have more than one CCS. 742 */ 743 if (hweight64(gt->info.engine_mask & 744 GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1) 745 return; 746 747 /* 748 * CCS availability on Xe_HP is inferred from the presence of DSS in 749 * each quadrant. 750 */ 751 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { 752 if (!(gt->info.engine_mask & BIT(i))) 753 continue; 754 755 if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { 756 gt->info.engine_mask &= ~BIT(i); 757 xe_gt_info(gt, "ccs%u fused off\n", j); 758 } 759 } 760 } 761 762 static void read_compute_fuses_from_reg(struct xe_gt *gt) 763 { 764 u32 ccs_mask; 765 766 ccs_mask = xe_mmio_read32(>->mmio, XEHP_FUSE4); 767 ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask); 768 769 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { 770 if (!(gt->info.engine_mask & BIT(i))) 771 continue; 772 773 if ((ccs_mask & BIT(j)) == 0) { 774 gt->info.engine_mask &= ~BIT(i); 775 xe_gt_info(gt, "ccs%u fused off\n", j); 776 } 777 } 778 } 779 780 static void read_compute_fuses(struct xe_gt *gt) 781 { 782 if (GRAPHICS_VER(gt_to_xe(gt)) >= 20) 783 read_compute_fuses_from_reg(gt); 784 else 785 read_compute_fuses_from_dss(gt); 786 } 787 788 static void check_gsc_availability(struct xe_gt *gt) 789 { 790 if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) 791 return; 792 793 /* 794 * The GSCCS is only used to communicate with the GSC FW, so if we don't 795 * have the FW there is nothing we need the engine for and can therefore 796 * skip its initialization. 797 */ 798 if (!xe_uc_fw_is_available(>->uc.gsc.fw)) { 799 gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0); 800 801 /* interrupts where previously enabled, so turn them off */ 802 xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0); 803 xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0); 804 805 xe_gt_dbg(gt, "GSC FW not used, disabling gsccs\n"); 806 } 807 } 808 809 static void check_sw_disable(struct xe_gt *gt) 810 { 811 struct xe_device *xe = gt_to_xe(gt); 812 u64 sw_allowed = xe_configfs_get_engines_allowed(to_pci_dev(xe->drm.dev)); 813 enum xe_hw_engine_id id; 814 815 for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 816 if (!(gt->info.engine_mask & BIT(id))) 817 continue; 818 819 if (!(sw_allowed & BIT(id))) { 820 gt->info.engine_mask &= ~BIT(id); 821 xe_gt_info(gt, "%s disabled via configfs\n", 822 engine_infos[id].name); 823 } 824 } 825 } 826 827 int xe_hw_engines_init_early(struct xe_gt *gt) 828 { 829 int i; 830 831 read_media_fuses(gt); 832 read_copy_fuses(gt); 833 read_compute_fuses(gt); 834 check_gsc_availability(gt); 835 check_sw_disable(gt); 836 837 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); 838 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); 839 840 for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) 841 hw_engine_init_early(gt, >->hw_engines[i], i); 842 843 return 0; 844 } 845 846 int xe_hw_engines_init(struct xe_gt *gt) 847 { 848 int err; 849 struct xe_hw_engine *hwe; 850 enum xe_hw_engine_id id; 851 852 for_each_hw_engine(hwe, gt, id) { 853 err = hw_engine_init(gt, hwe, id); 854 if (err) 855 return err; 856 } 857 858 hw_engine_setup_logical_mapping(gt); 859 err = xe_hw_engine_setup_groups(gt); 860 if (err) 861 return err; 862 863 return 0; 864 } 865 866 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) 867 { 868 wake_up_all(>_to_xe(hwe->gt)->ufence_wq); 869 870 if (hwe->irq_handler) 871 hwe->irq_handler(hwe, intr_vec); 872 873 if (intr_vec & GT_RENDER_USER_INTERRUPT) 874 xe_hw_fence_irq_run(hwe->fence_irq); 875 } 876 877 /** 878 * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. 879 * @hwe: Xe HW Engine. 880 * @q: The exec queue object. 881 * 882 * This can be printed out in a later stage like during dev_coredump 883 * analysis. 884 * 885 * Returns: a Xe HW Engine snapshot object that must be freed by the 886 * caller, using `xe_hw_engine_snapshot_free`. 887 */ 888 struct xe_hw_engine_snapshot * 889 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q) 890 { 891 struct xe_hw_engine_snapshot *snapshot; 892 struct __guc_capture_parsed_output *node; 893 894 if (!xe_hw_engine_is_valid(hwe)) 895 return NULL; 896 897 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 898 899 if (!snapshot) 900 return NULL; 901 902 snapshot->name = kstrdup(hwe->name, GFP_ATOMIC); 903 snapshot->hwe = hwe; 904 snapshot->logical_instance = hwe->logical_instance; 905 snapshot->forcewake.domain = hwe->domain; 906 snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), 907 hwe->domain); 908 snapshot->mmio_base = hwe->mmio_base; 909 snapshot->kernel_reserved = xe_hw_engine_is_reserved(hwe); 910 911 /* no more VF accessible data below this point */ 912 if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) 913 return snapshot; 914 915 if (q) { 916 /* If got guc capture, set source to GuC */ 917 node = xe_guc_capture_get_matching_and_lock(q); 918 if (node) { 919 struct xe_device *xe = gt_to_xe(hwe->gt); 920 struct xe_devcoredump *coredump = &xe->devcoredump; 921 922 coredump->snapshot.matched_node = node; 923 xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node"); 924 return snapshot; 925 } 926 } 927 928 /* otherwise, do manual capture */ 929 xe_engine_manual_capture(hwe, snapshot); 930 xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot"); 931 932 return snapshot; 933 } 934 935 /** 936 * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot. 937 * @snapshot: Xe HW Engine snapshot object. 938 * 939 * This function free all the memory that needed to be allocated at capture 940 * time. 941 */ 942 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot) 943 { 944 struct xe_gt *gt; 945 if (!snapshot) 946 return; 947 948 gt = snapshot->hwe->gt; 949 /* 950 * xe_guc_capture_put_matched_nodes is called here and from 951 * xe_devcoredump_snapshot_free, to cover the 2 calling paths 952 * of hw_engines - debugfs and devcoredump free. 953 */ 954 xe_guc_capture_put_matched_nodes(>->uc.guc); 955 956 kfree(snapshot->name); 957 kfree(snapshot); 958 } 959 960 /** 961 * xe_hw_engine_print - Xe HW Engine Print. 962 * @hwe: Hardware Engine. 963 * @p: drm_printer. 964 * 965 * This function quickly capture a snapshot and immediately print it out. 966 */ 967 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p) 968 { 969 struct xe_hw_engine_snapshot *snapshot; 970 971 snapshot = xe_hw_engine_snapshot_capture(hwe, NULL); 972 xe_engine_snapshot_print(snapshot, p); 973 xe_hw_engine_snapshot_free(snapshot); 974 } 975 976 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, 977 enum xe_engine_class engine_class) 978 { 979 u32 mask = 0; 980 enum xe_hw_engine_id id; 981 982 for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 983 if (engine_infos[id].class == engine_class && 984 gt->info.engine_mask & BIT(id)) 985 mask |= BIT(engine_infos[id].instance); 986 } 987 return mask; 988 } 989 990 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) 991 { 992 struct xe_gt *gt = hwe->gt; 993 struct xe_device *xe = gt_to_xe(gt); 994 995 if (hwe->class == XE_ENGINE_CLASS_OTHER) 996 return true; 997 998 /* Check for engines disabled by ccs_mode setting */ 999 if (xe_gt_ccs_mode_enabled(gt) && 1000 hwe->class == XE_ENGINE_CLASS_COMPUTE && 1001 hwe->logical_instance >= gt->ccs_mode) 1002 return true; 1003 1004 return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY && 1005 hwe->instance == gt->usm.reserved_bcs_instance; 1006 } 1007 1008 const char *xe_hw_engine_class_to_str(enum xe_engine_class class) 1009 { 1010 switch (class) { 1011 case XE_ENGINE_CLASS_RENDER: 1012 return "rcs"; 1013 case XE_ENGINE_CLASS_VIDEO_DECODE: 1014 return "vcs"; 1015 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 1016 return "vecs"; 1017 case XE_ENGINE_CLASS_COPY: 1018 return "bcs"; 1019 case XE_ENGINE_CLASS_OTHER: 1020 return "other"; 1021 case XE_ENGINE_CLASS_COMPUTE: 1022 return "ccs"; 1023 case XE_ENGINE_CLASS_MAX: 1024 break; 1025 } 1026 1027 return NULL; 1028 } 1029 1030 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe) 1031 { 1032 return xe_mmio_read64_2x32(&hwe->gt->mmio, RING_TIMESTAMP(hwe->mmio_base)); 1033 } 1034 1035 enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) 1036 { 1037 return engine_infos[hwe->engine_id].domain; 1038 } 1039 1040 static const enum xe_engine_class user_to_xe_engine_class[] = { 1041 [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, 1042 [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, 1043 [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, 1044 [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, 1045 [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, 1046 }; 1047 1048 /** 1049 * xe_hw_engine_lookup() - Lookup hardware engine for class:instance 1050 * @xe: xe device 1051 * @eci: engine class and instance 1052 * 1053 * This function will find a hardware engine for given engine 1054 * class and instance. 1055 * 1056 * Return: If found xe_hw_engine pointer, NULL otherwise. 1057 */ 1058 struct xe_hw_engine * 1059 xe_hw_engine_lookup(struct xe_device *xe, 1060 struct drm_xe_engine_class_instance eci) 1061 { 1062 struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id); 1063 unsigned int idx; 1064 1065 if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) 1066 return NULL; 1067 1068 if (!gt) 1069 return NULL; 1070 1071 idx = array_index_nospec(eci.engine_class, 1072 ARRAY_SIZE(user_to_xe_engine_class)); 1073 1074 return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), 1075 user_to_xe_engine_class[idx], 1076 eci.engine_instance, true); 1077 } 1078