1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_hw_engine.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_managed.h> 11 #include <drm/xe_drm.h> 12 13 #include "regs/xe_engine_regs.h" 14 #include "regs/xe_gt_regs.h" 15 #include "xe_assert.h" 16 #include "xe_bo.h" 17 #include "xe_device.h" 18 #include "xe_execlist.h" 19 #include "xe_force_wake.h" 20 #include "xe_gsc.h" 21 #include "xe_gt.h" 22 #include "xe_gt_ccs_mode.h" 23 #include "xe_gt_printk.h" 24 #include "xe_gt_mcr.h" 25 #include "xe_gt_topology.h" 26 #include "xe_hw_engine_group.h" 27 #include "xe_hw_fence.h" 28 #include "xe_irq.h" 29 #include "xe_lrc.h" 30 #include "xe_macros.h" 31 #include "xe_mmio.h" 32 #include "xe_reg_sr.h" 33 #include "xe_reg_whitelist.h" 34 #include "xe_rtp.h" 35 #include "xe_sched_job.h" 36 #include "xe_sriov.h" 37 #include "xe_tuning.h" 38 #include "xe_uc_fw.h" 39 #include "xe_wa.h" 40 41 #define MAX_MMIO_BASES 3 42 struct engine_info { 43 const char *name; 44 unsigned int class : 8; 45 unsigned int instance : 8; 46 unsigned int irq_offset : 8; 47 enum xe_force_wake_domains domain; 48 u32 mmio_base; 49 }; 50 51 static const struct engine_info engine_infos[] = { 52 [XE_HW_ENGINE_RCS0] = { 53 .name = "rcs0", 54 .class = XE_ENGINE_CLASS_RENDER, 55 .instance = 0, 56 .irq_offset = ilog2(INTR_RCS0), 57 .domain = XE_FW_RENDER, 58 .mmio_base = RENDER_RING_BASE, 59 }, 60 [XE_HW_ENGINE_BCS0] = { 61 .name = "bcs0", 62 .class = XE_ENGINE_CLASS_COPY, 63 .instance = 0, 64 .irq_offset = ilog2(INTR_BCS(0)), 65 .domain = XE_FW_RENDER, 66 .mmio_base = BLT_RING_BASE, 67 }, 68 [XE_HW_ENGINE_BCS1] = { 69 .name = "bcs1", 70 .class = XE_ENGINE_CLASS_COPY, 71 .instance = 1, 72 .irq_offset = ilog2(INTR_BCS(1)), 73 .domain = XE_FW_RENDER, 74 .mmio_base = XEHPC_BCS1_RING_BASE, 75 }, 76 [XE_HW_ENGINE_BCS2] = { 77 .name = "bcs2", 78 .class = XE_ENGINE_CLASS_COPY, 79 .instance = 2, 80 .irq_offset = ilog2(INTR_BCS(2)), 81 .domain = XE_FW_RENDER, 82 .mmio_base = XEHPC_BCS2_RING_BASE, 83 }, 84 [XE_HW_ENGINE_BCS3] = { 85 .name = "bcs3", 86 .class = XE_ENGINE_CLASS_COPY, 87 .instance = 3, 88 .irq_offset = ilog2(INTR_BCS(3)), 89 .domain = XE_FW_RENDER, 90 .mmio_base = XEHPC_BCS3_RING_BASE, 91 }, 92 [XE_HW_ENGINE_BCS4] = { 93 .name = "bcs4", 94 .class = XE_ENGINE_CLASS_COPY, 95 .instance = 4, 96 .irq_offset = ilog2(INTR_BCS(4)), 97 .domain = XE_FW_RENDER, 98 .mmio_base = XEHPC_BCS4_RING_BASE, 99 }, 100 [XE_HW_ENGINE_BCS5] = { 101 .name = "bcs5", 102 .class = XE_ENGINE_CLASS_COPY, 103 .instance = 5, 104 .irq_offset = ilog2(INTR_BCS(5)), 105 .domain = XE_FW_RENDER, 106 .mmio_base = XEHPC_BCS5_RING_BASE, 107 }, 108 [XE_HW_ENGINE_BCS6] = { 109 .name = "bcs6", 110 .class = XE_ENGINE_CLASS_COPY, 111 .instance = 6, 112 .irq_offset = ilog2(INTR_BCS(6)), 113 .domain = XE_FW_RENDER, 114 .mmio_base = XEHPC_BCS6_RING_BASE, 115 }, 116 [XE_HW_ENGINE_BCS7] = { 117 .name = "bcs7", 118 .class = XE_ENGINE_CLASS_COPY, 119 .irq_offset = ilog2(INTR_BCS(7)), 120 .instance = 7, 121 .domain = XE_FW_RENDER, 122 .mmio_base = XEHPC_BCS7_RING_BASE, 123 }, 124 [XE_HW_ENGINE_BCS8] = { 125 .name = "bcs8", 126 .class = XE_ENGINE_CLASS_COPY, 127 .instance = 8, 128 .irq_offset = ilog2(INTR_BCS8), 129 .domain = XE_FW_RENDER, 130 .mmio_base = XEHPC_BCS8_RING_BASE, 131 }, 132 133 [XE_HW_ENGINE_VCS0] = { 134 .name = "vcs0", 135 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 136 .instance = 0, 137 .irq_offset = 32 + ilog2(INTR_VCS(0)), 138 .domain = XE_FW_MEDIA_VDBOX0, 139 .mmio_base = BSD_RING_BASE, 140 }, 141 [XE_HW_ENGINE_VCS1] = { 142 .name = "vcs1", 143 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 144 .instance = 1, 145 .irq_offset = 32 + ilog2(INTR_VCS(1)), 146 .domain = XE_FW_MEDIA_VDBOX1, 147 .mmio_base = BSD2_RING_BASE, 148 }, 149 [XE_HW_ENGINE_VCS2] = { 150 .name = "vcs2", 151 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 152 .instance = 2, 153 .irq_offset = 32 + ilog2(INTR_VCS(2)), 154 .domain = XE_FW_MEDIA_VDBOX2, 155 .mmio_base = BSD3_RING_BASE, 156 }, 157 [XE_HW_ENGINE_VCS3] = { 158 .name = "vcs3", 159 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 160 .instance = 3, 161 .irq_offset = 32 + ilog2(INTR_VCS(3)), 162 .domain = XE_FW_MEDIA_VDBOX3, 163 .mmio_base = BSD4_RING_BASE, 164 }, 165 [XE_HW_ENGINE_VCS4] = { 166 .name = "vcs4", 167 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 168 .instance = 4, 169 .irq_offset = 32 + ilog2(INTR_VCS(4)), 170 .domain = XE_FW_MEDIA_VDBOX4, 171 .mmio_base = XEHP_BSD5_RING_BASE, 172 }, 173 [XE_HW_ENGINE_VCS5] = { 174 .name = "vcs5", 175 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 176 .instance = 5, 177 .irq_offset = 32 + ilog2(INTR_VCS(5)), 178 .domain = XE_FW_MEDIA_VDBOX5, 179 .mmio_base = XEHP_BSD6_RING_BASE, 180 }, 181 [XE_HW_ENGINE_VCS6] = { 182 .name = "vcs6", 183 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 184 .instance = 6, 185 .irq_offset = 32 + ilog2(INTR_VCS(6)), 186 .domain = XE_FW_MEDIA_VDBOX6, 187 .mmio_base = XEHP_BSD7_RING_BASE, 188 }, 189 [XE_HW_ENGINE_VCS7] = { 190 .name = "vcs7", 191 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 192 .instance = 7, 193 .irq_offset = 32 + ilog2(INTR_VCS(7)), 194 .domain = XE_FW_MEDIA_VDBOX7, 195 .mmio_base = XEHP_BSD8_RING_BASE, 196 }, 197 [XE_HW_ENGINE_VECS0] = { 198 .name = "vecs0", 199 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 200 .instance = 0, 201 .irq_offset = 32 + ilog2(INTR_VECS(0)), 202 .domain = XE_FW_MEDIA_VEBOX0, 203 .mmio_base = VEBOX_RING_BASE, 204 }, 205 [XE_HW_ENGINE_VECS1] = { 206 .name = "vecs1", 207 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 208 .instance = 1, 209 .irq_offset = 32 + ilog2(INTR_VECS(1)), 210 .domain = XE_FW_MEDIA_VEBOX1, 211 .mmio_base = VEBOX2_RING_BASE, 212 }, 213 [XE_HW_ENGINE_VECS2] = { 214 .name = "vecs2", 215 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 216 .instance = 2, 217 .irq_offset = 32 + ilog2(INTR_VECS(2)), 218 .domain = XE_FW_MEDIA_VEBOX2, 219 .mmio_base = XEHP_VEBOX3_RING_BASE, 220 }, 221 [XE_HW_ENGINE_VECS3] = { 222 .name = "vecs3", 223 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 224 .instance = 3, 225 .irq_offset = 32 + ilog2(INTR_VECS(3)), 226 .domain = XE_FW_MEDIA_VEBOX3, 227 .mmio_base = XEHP_VEBOX4_RING_BASE, 228 }, 229 [XE_HW_ENGINE_CCS0] = { 230 .name = "ccs0", 231 .class = XE_ENGINE_CLASS_COMPUTE, 232 .instance = 0, 233 .irq_offset = ilog2(INTR_CCS(0)), 234 .domain = XE_FW_RENDER, 235 .mmio_base = COMPUTE0_RING_BASE, 236 }, 237 [XE_HW_ENGINE_CCS1] = { 238 .name = "ccs1", 239 .class = XE_ENGINE_CLASS_COMPUTE, 240 .instance = 1, 241 .irq_offset = ilog2(INTR_CCS(1)), 242 .domain = XE_FW_RENDER, 243 .mmio_base = COMPUTE1_RING_BASE, 244 }, 245 [XE_HW_ENGINE_CCS2] = { 246 .name = "ccs2", 247 .class = XE_ENGINE_CLASS_COMPUTE, 248 .instance = 2, 249 .irq_offset = ilog2(INTR_CCS(2)), 250 .domain = XE_FW_RENDER, 251 .mmio_base = COMPUTE2_RING_BASE, 252 }, 253 [XE_HW_ENGINE_CCS3] = { 254 .name = "ccs3", 255 .class = XE_ENGINE_CLASS_COMPUTE, 256 .instance = 3, 257 .irq_offset = ilog2(INTR_CCS(3)), 258 .domain = XE_FW_RENDER, 259 .mmio_base = COMPUTE3_RING_BASE, 260 }, 261 [XE_HW_ENGINE_GSCCS0] = { 262 .name = "gsccs0", 263 .class = XE_ENGINE_CLASS_OTHER, 264 .instance = OTHER_GSC_INSTANCE, 265 .domain = XE_FW_GSC, 266 .mmio_base = GSCCS_RING_BASE, 267 }, 268 }; 269 270 static void hw_engine_fini(void *arg) 271 { 272 struct xe_hw_engine *hwe = arg; 273 274 if (hwe->exl_port) 275 xe_execlist_port_destroy(hwe->exl_port); 276 xe_lrc_put(hwe->kernel_lrc); 277 278 hwe->gt = NULL; 279 } 280 281 /** 282 * xe_hw_engine_mmio_write32() - Write engine register 283 * @hwe: engine 284 * @reg: register to write into 285 * @val: desired 32-bit value to write 286 * 287 * This function will write val into an engine specific register. 288 * Forcewake must be held by the caller. 289 * 290 */ 291 void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, 292 struct xe_reg reg, u32 val) 293 { 294 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 295 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 296 297 reg.addr += hwe->mmio_base; 298 299 xe_mmio_write32(hwe->gt, reg, val); 300 } 301 302 /** 303 * xe_hw_engine_mmio_read32() - Read engine register 304 * @hwe: engine 305 * @reg: register to read from 306 * 307 * This function will read from an engine specific register. 308 * Forcewake must be held by the caller. 309 * 310 * Return: value of the 32-bit register. 311 */ 312 u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) 313 { 314 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 315 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 316 317 reg.addr += hwe->mmio_base; 318 319 return xe_mmio_read32(hwe->gt, reg); 320 } 321 322 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) 323 { 324 u32 ccs_mask = 325 xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); 326 327 if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) 328 xe_mmio_write32(hwe->gt, RCU_MODE, 329 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); 330 331 xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); 332 xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), 333 xe_bo_ggtt_addr(hwe->hwsp)); 334 xe_hw_engine_mmio_write32(hwe, RING_MODE(0), 335 _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 336 xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), 337 _MASKED_BIT_DISABLE(STOP_RING)); 338 xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 339 } 340 341 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, 342 const struct xe_hw_engine *hwe) 343 { 344 return xe_gt_ccs_mode_enabled(gt) && 345 xe_rtp_match_first_render_or_compute(gt, hwe); 346 } 347 348 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt, 349 const struct xe_hw_engine *hwe) 350 { 351 if (GRAPHICS_VER(gt_to_xe(gt)) < 20) 352 return false; 353 354 if (hwe->class != XE_ENGINE_CLASS_COMPUTE && 355 hwe->class != XE_ENGINE_CLASS_RENDER) 356 return false; 357 358 return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE; 359 } 360 361 void 362 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) 363 { 364 struct xe_gt *gt = hwe->gt; 365 const u8 mocs_write_idx = gt->mocs.uc_index; 366 const u8 mocs_read_idx = gt->mocs.uc_index; 367 u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | 368 REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); 369 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 370 const struct xe_rtp_entry_sr lrc_setup[] = { 371 /* 372 * Some blitter commands do not have a field for MOCS, those 373 * commands will use MOCS index pointed by BLIT_CCTL. 374 * BLIT_CCTL registers are needed to be programmed to un-cached. 375 */ 376 { XE_RTP_NAME("BLIT_CCTL_default_MOCS"), 377 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), 378 ENGINE_CLASS(COPY)), 379 XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0), 380 BLIT_CCTL_DST_MOCS_MASK | 381 BLIT_CCTL_SRC_MOCS_MASK, 382 blit_cctl_val, 383 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 384 }, 385 /* Use Fixed slice CCS mode */ 386 { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), 387 XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), 388 XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, 389 RCU_MODE_FIXED_SLICE_CCS_MODE)) 390 }, 391 /* Disable WMTP if HW doesn't support it */ 392 { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), 393 XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), 394 XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0), 395 PREEMPT_GPGPU_LEVEL_MASK, 396 PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), 397 XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) 398 }, 399 {} 400 }; 401 402 xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc); 403 } 404 405 static void 406 hw_engine_setup_default_state(struct xe_hw_engine *hwe) 407 { 408 struct xe_gt *gt = hwe->gt; 409 struct xe_device *xe = gt_to_xe(gt); 410 /* 411 * RING_CMD_CCTL specifies the default MOCS entry that will be 412 * used by the command streamer when executing commands that 413 * don't have a way to explicitly specify a MOCS setting. 414 * The default should usually reference whichever MOCS entry 415 * corresponds to uncached behavior, although use of a WB cached 416 * entry is recommended by the spec in certain circumstances on 417 * specific platforms. 418 * Bspec: 72161 419 */ 420 const u8 mocs_write_idx = gt->mocs.uc_index; 421 const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && 422 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? 423 gt->mocs.wb_index : gt->mocs.uc_index; 424 u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | 425 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); 426 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 427 const struct xe_rtp_entry_sr engine_entries[] = { 428 { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), 429 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)), 430 XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0), 431 CMD_CCTL_WRITE_OVERRIDE_MASK | 432 CMD_CCTL_READ_OVERRIDE_MASK, 433 ring_cmd_cctl_val, 434 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 435 }, 436 /* 437 * To allow the GSC engine to go idle on MTL we need to enable 438 * idle messaging and set the hysteresis value (we use 0xA=5us 439 * as recommended in spec). On platforms after MTL this is 440 * enabled by default. 441 */ 442 { XE_RTP_NAME("MTL GSCCS IDLE MSG enable"), 443 XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)), 444 XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0), 445 IDLE_MSG_DISABLE, 446 XE_RTP_ACTION_FLAG(ENGINE_BASE)), 447 FIELD_SET(RING_PWRCTX_MAXCNT(0), 448 IDLE_WAIT_TIME, 449 0xA, 450 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 451 }, 452 /* Enable Priority Mem Read */ 453 { XE_RTP_NAME("Priority_Mem_Read"), 454 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 455 XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, 456 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 457 }, 458 {} 459 }; 460 461 xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr); 462 } 463 464 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, 465 enum xe_hw_engine_id id) 466 { 467 const struct engine_info *info; 468 469 if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name)) 470 return; 471 472 if (!(gt->info.engine_mask & BIT(id))) 473 return; 474 475 info = &engine_infos[id]; 476 477 xe_gt_assert(gt, !hwe->gt); 478 479 hwe->gt = gt; 480 hwe->class = info->class; 481 hwe->instance = info->instance; 482 hwe->mmio_base = info->mmio_base; 483 hwe->irq_offset = info->irq_offset; 484 hwe->domain = info->domain; 485 hwe->name = info->name; 486 hwe->fence_irq = >->fence_irq[info->class]; 487 hwe->engine_id = id; 488 489 hwe->eclass = >->eclass[hwe->class]; 490 if (!hwe->eclass->sched_props.job_timeout_ms) { 491 hwe->eclass->sched_props.job_timeout_ms = 5 * 1000; 492 hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 493 hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 494 hwe->eclass->sched_props.timeslice_us = 1 * 1000; 495 hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN; 496 hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX; 497 hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT; 498 hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 499 hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 500 501 /* 502 * The GSC engine can accept submissions while the GSC shim is 503 * being reset, during which time the submission is stalled. In 504 * the worst case, the shim reset can take up to the maximum GSC 505 * command execution time (250ms), so the request start can be 506 * delayed by that much; the request itself can take that long 507 * without being preemptible, which means worst case it can 508 * theoretically take up to 500ms for a preemption to go through 509 * on the GSC engine. Adding to that an extra 100ms as a safety 510 * margin, we get a minimum recommended timeout of 600ms. 511 * The preempt_timeout value can't be tuned for OTHER_CLASS 512 * because the class is reserved for kernel usage, so we just 513 * need to make sure that the starting value is above that 514 * threshold; since our default value (640ms) is greater than 515 * 600ms, the only way we can go below is via a kconfig setting. 516 * If that happens, log it in dmesg and update the value. 517 */ 518 if (hwe->class == XE_ENGINE_CLASS_OTHER) { 519 const u32 min_preempt_timeout = 600 * 1000; 520 if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) { 521 hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout; 522 xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n"); 523 } 524 } 525 526 /* Record default props */ 527 hwe->eclass->defaults = hwe->eclass->sched_props; 528 } 529 530 xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); 531 xe_tuning_process_engine(hwe); 532 xe_wa_process_engine(hwe); 533 hw_engine_setup_default_state(hwe); 534 535 xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); 536 xe_reg_whitelist_process_engine(hwe); 537 } 538 539 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, 540 enum xe_hw_engine_id id) 541 { 542 struct xe_device *xe = gt_to_xe(gt); 543 struct xe_tile *tile = gt_to_tile(gt); 544 int err; 545 546 xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name); 547 xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); 548 549 xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); 550 xe_reg_sr_apply_whitelist(hwe); 551 552 hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, 553 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 554 XE_BO_FLAG_GGTT | 555 XE_BO_FLAG_GGTT_INVALIDATE); 556 if (IS_ERR(hwe->hwsp)) { 557 err = PTR_ERR(hwe->hwsp); 558 goto err_name; 559 } 560 561 hwe->kernel_lrc = xe_lrc_create(hwe, NULL, SZ_16K); 562 if (IS_ERR(hwe->kernel_lrc)) { 563 err = PTR_ERR(hwe->kernel_lrc); 564 goto err_hwsp; 565 } 566 567 if (!xe_device_uc_enabled(xe)) { 568 hwe->exl_port = xe_execlist_port_create(xe, hwe); 569 if (IS_ERR(hwe->exl_port)) { 570 err = PTR_ERR(hwe->exl_port); 571 goto err_kernel_lrc; 572 } 573 } 574 575 if (xe_device_uc_enabled(xe)) { 576 /* GSCCS has a special interrupt for reset */ 577 if (hwe->class == XE_ENGINE_CLASS_OTHER) 578 hwe->irq_handler = xe_gsc_hwe_irq_handler; 579 580 if (!IS_SRIOV_VF(xe)) 581 xe_hw_engine_enable_ring(hwe); 582 } 583 584 /* We reserve the highest BCS instance for USM */ 585 if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) 586 gt->usm.reserved_bcs_instance = hwe->instance; 587 588 return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); 589 590 err_kernel_lrc: 591 xe_lrc_put(hwe->kernel_lrc); 592 err_hwsp: 593 xe_bo_unpin_map_no_vm(hwe->hwsp); 594 err_name: 595 hwe->name = NULL; 596 597 return err; 598 } 599 600 static void hw_engine_setup_logical_mapping(struct xe_gt *gt) 601 { 602 int class; 603 604 /* FIXME: Doing a simple logical mapping that works for most hardware */ 605 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 606 struct xe_hw_engine *hwe; 607 enum xe_hw_engine_id id; 608 int logical_instance = 0; 609 610 for_each_hw_engine(hwe, gt, id) 611 if (hwe->class == class) 612 hwe->logical_instance = logical_instance++; 613 } 614 } 615 616 static void read_media_fuses(struct xe_gt *gt) 617 { 618 struct xe_device *xe = gt_to_xe(gt); 619 u32 media_fuse; 620 u16 vdbox_mask; 621 u16 vebox_mask; 622 int i, j; 623 624 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 625 626 media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE); 627 628 /* 629 * Pre-Xe_HP platforms had register bits representing absent engines, 630 * whereas Xe_HP and beyond have bits representing present engines. 631 * Invert the polarity on old platforms so that we can use common 632 * handling below. 633 */ 634 if (GRAPHICS_VERx100(xe) < 1250) 635 media_fuse = ~media_fuse; 636 637 vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse); 638 vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse); 639 640 for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { 641 if (!(gt->info.engine_mask & BIT(i))) 642 continue; 643 644 if (!(BIT(j) & vdbox_mask)) { 645 gt->info.engine_mask &= ~BIT(i); 646 drm_info(&xe->drm, "vcs%u fused off\n", j); 647 } 648 } 649 650 for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { 651 if (!(gt->info.engine_mask & BIT(i))) 652 continue; 653 654 if (!(BIT(j) & vebox_mask)) { 655 gt->info.engine_mask &= ~BIT(i); 656 drm_info(&xe->drm, "vecs%u fused off\n", j); 657 } 658 } 659 } 660 661 static void read_copy_fuses(struct xe_gt *gt) 662 { 663 struct xe_device *xe = gt_to_xe(gt); 664 u32 bcs_mask; 665 666 if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270) 667 return; 668 669 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 670 671 bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3); 672 bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); 673 674 /* BCS0 is always present; only BCS1-BCS8 may be fused off */ 675 for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { 676 if (!(gt->info.engine_mask & BIT(i))) 677 continue; 678 679 if (!(BIT(j / 2) & bcs_mask)) { 680 gt->info.engine_mask &= ~BIT(i); 681 drm_info(&xe->drm, "bcs%u fused off\n", j); 682 } 683 } 684 } 685 686 static void read_compute_fuses_from_dss(struct xe_gt *gt) 687 { 688 struct xe_device *xe = gt_to_xe(gt); 689 690 /* 691 * CCS fusing based on DSS masks only applies to platforms that can 692 * have more than one CCS. 693 */ 694 if (hweight64(gt->info.engine_mask & 695 GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1) 696 return; 697 698 /* 699 * CCS availability on Xe_HP is inferred from the presence of DSS in 700 * each quadrant. 701 */ 702 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { 703 if (!(gt->info.engine_mask & BIT(i))) 704 continue; 705 706 if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { 707 gt->info.engine_mask &= ~BIT(i); 708 drm_info(&xe->drm, "ccs%u fused off\n", j); 709 } 710 } 711 } 712 713 static void read_compute_fuses_from_reg(struct xe_gt *gt) 714 { 715 struct xe_device *xe = gt_to_xe(gt); 716 u32 ccs_mask; 717 718 ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4); 719 ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask); 720 721 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { 722 if (!(gt->info.engine_mask & BIT(i))) 723 continue; 724 725 if ((ccs_mask & BIT(j)) == 0) { 726 gt->info.engine_mask &= ~BIT(i); 727 drm_info(&xe->drm, "ccs%u fused off\n", j); 728 } 729 } 730 } 731 732 static void read_compute_fuses(struct xe_gt *gt) 733 { 734 if (GRAPHICS_VER(gt_to_xe(gt)) >= 20) 735 read_compute_fuses_from_reg(gt); 736 else 737 read_compute_fuses_from_dss(gt); 738 } 739 740 static void check_gsc_availability(struct xe_gt *gt) 741 { 742 struct xe_device *xe = gt_to_xe(gt); 743 744 if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) 745 return; 746 747 /* 748 * The GSCCS is only used to communicate with the GSC FW, so if we don't 749 * have the FW there is nothing we need the engine for and can therefore 750 * skip its initialization. 751 */ 752 if (!xe_uc_fw_is_available(>->uc.gsc.fw)) { 753 gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0); 754 755 /* interrupts where previously enabled, so turn them off */ 756 xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0); 757 xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0); 758 759 drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); 760 } 761 } 762 763 int xe_hw_engines_init_early(struct xe_gt *gt) 764 { 765 int i; 766 767 read_media_fuses(gt); 768 read_copy_fuses(gt); 769 read_compute_fuses(gt); 770 check_gsc_availability(gt); 771 772 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); 773 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); 774 775 for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) 776 hw_engine_init_early(gt, >->hw_engines[i], i); 777 778 return 0; 779 } 780 781 int xe_hw_engines_init(struct xe_gt *gt) 782 { 783 int err; 784 struct xe_hw_engine *hwe; 785 enum xe_hw_engine_id id; 786 787 for_each_hw_engine(hwe, gt, id) { 788 err = hw_engine_init(gt, hwe, id); 789 if (err) 790 return err; 791 } 792 793 hw_engine_setup_logical_mapping(gt); 794 err = xe_hw_engine_setup_groups(gt); 795 if (err) 796 return err; 797 798 return 0; 799 } 800 801 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) 802 { 803 wake_up_all(>_to_xe(hwe->gt)->ufence_wq); 804 805 if (hwe->irq_handler) 806 hwe->irq_handler(hwe, intr_vec); 807 808 if (intr_vec & GT_RENDER_USER_INTERRUPT) 809 xe_hw_fence_irq_run(hwe->fence_irq); 810 } 811 812 static bool 813 is_slice_common_per_gslice(struct xe_device *xe) 814 { 815 return GRAPHICS_VERx100(xe) >= 1255; 816 } 817 818 static void 819 xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, 820 struct xe_hw_engine_snapshot *snapshot) 821 { 822 struct xe_gt *gt = hwe->gt; 823 struct xe_device *xe = gt_to_xe(gt); 824 unsigned int dss; 825 u16 group, instance; 826 827 snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); 828 829 if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) 830 return; 831 832 if (is_slice_common_per_gslice(xe) == false) { 833 snapshot->reg.instdone.slice_common[0] = 834 xe_mmio_read32(gt, SC_INSTDONE); 835 snapshot->reg.instdone.slice_common_extra[0] = 836 xe_mmio_read32(gt, SC_INSTDONE_EXTRA); 837 snapshot->reg.instdone.slice_common_extra2[0] = 838 xe_mmio_read32(gt, SC_INSTDONE_EXTRA2); 839 } else { 840 for_each_geometry_dss(dss, gt, group, instance) { 841 snapshot->reg.instdone.slice_common[dss] = 842 xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance); 843 snapshot->reg.instdone.slice_common_extra[dss] = 844 xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance); 845 snapshot->reg.instdone.slice_common_extra2[dss] = 846 xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance); 847 } 848 } 849 850 for_each_geometry_dss(dss, gt, group, instance) { 851 snapshot->reg.instdone.sampler[dss] = 852 xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance); 853 snapshot->reg.instdone.row[dss] = 854 xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance); 855 856 if (GRAPHICS_VERx100(xe) >= 1255) 857 snapshot->reg.instdone.geom_svg[dss] = 858 xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT, 859 group, instance); 860 } 861 } 862 863 /** 864 * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. 865 * @hwe: Xe HW Engine. 866 * 867 * This can be printed out in a later stage like during dev_coredump 868 * analysis. 869 * 870 * Returns: a Xe HW Engine snapshot object that must be freed by the 871 * caller, using `xe_hw_engine_snapshot_free`. 872 */ 873 struct xe_hw_engine_snapshot * 874 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) 875 { 876 struct xe_hw_engine_snapshot *snapshot; 877 size_t len; 878 u64 val; 879 880 if (!xe_hw_engine_is_valid(hwe)) 881 return NULL; 882 883 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 884 885 if (!snapshot) 886 return NULL; 887 888 /* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it 889 * includes xe_hw_engine_types.h the length of this 3 registers can't be 890 * set in struct xe_hw_engine_snapshot, so here doing additional 891 * allocations. 892 */ 893 len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32)); 894 snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC); 895 snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC); 896 snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC); 897 snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC); 898 snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC); 899 snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC); 900 if (!snapshot->reg.instdone.slice_common || 901 !snapshot->reg.instdone.slice_common_extra || 902 !snapshot->reg.instdone.slice_common_extra2 || 903 !snapshot->reg.instdone.sampler || 904 !snapshot->reg.instdone.row || 905 !snapshot->reg.instdone.geom_svg) { 906 xe_hw_engine_snapshot_free(snapshot); 907 return NULL; 908 } 909 910 snapshot->name = kstrdup(hwe->name, GFP_ATOMIC); 911 snapshot->hwe = hwe; 912 snapshot->logical_instance = hwe->logical_instance; 913 snapshot->forcewake.domain = hwe->domain; 914 snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), 915 hwe->domain); 916 snapshot->mmio_base = hwe->mmio_base; 917 918 /* no more VF accessible data below this point */ 919 if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) 920 return snapshot; 921 922 snapshot->reg.ring_execlist_status = 923 xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); 924 val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); 925 snapshot->reg.ring_execlist_status |= val << 32; 926 927 snapshot->reg.ring_execlist_sq_contents = 928 xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); 929 val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); 930 snapshot->reg.ring_execlist_sq_contents |= val << 32; 931 932 snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0)); 933 val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); 934 snapshot->reg.ring_acthd |= val << 32; 935 936 snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0)); 937 val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); 938 snapshot->reg.ring_bbaddr |= val << 32; 939 940 snapshot->reg.ring_dma_fadd = 941 xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); 942 val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); 943 snapshot->reg.ring_dma_fadd |= val << 32; 944 945 snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); 946 snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); 947 snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0)); 948 if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { 949 val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0)); 950 snapshot->reg.ring_start |= val << 32; 951 } 952 if (xe_gt_has_indirect_ring_state(hwe->gt)) { 953 snapshot->reg.indirect_ring_state = 954 xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); 955 } 956 957 snapshot->reg.ring_head = 958 xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; 959 snapshot->reg.ring_tail = 960 xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; 961 snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0)); 962 snapshot->reg.ring_mi_mode = 963 xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 964 snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0)); 965 snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0)); 966 snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0)); 967 snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0)); 968 snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0)); 969 snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0)); 970 xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); 971 972 if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) 973 snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE); 974 975 return snapshot; 976 } 977 978 static void 979 xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p) 980 { 981 struct xe_gt *gt = snapshot->hwe->gt; 982 struct xe_device *xe = gt_to_xe(gt); 983 u16 group, instance; 984 unsigned int dss; 985 986 drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring); 987 988 if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) 989 return; 990 991 if (is_slice_common_per_gslice(xe) == false) { 992 drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n", 993 snapshot->reg.instdone.slice_common[0]); 994 drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n", 995 snapshot->reg.instdone.slice_common_extra[0]); 996 drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n", 997 snapshot->reg.instdone.slice_common_extra2[0]); 998 } else { 999 for_each_geometry_dss(dss, gt, group, instance) { 1000 drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss, 1001 snapshot->reg.instdone.slice_common[dss]); 1002 drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss, 1003 snapshot->reg.instdone.slice_common_extra[dss]); 1004 drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss, 1005 snapshot->reg.instdone.slice_common_extra2[dss]); 1006 } 1007 } 1008 1009 for_each_geometry_dss(dss, gt, group, instance) { 1010 drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss, 1011 snapshot->reg.instdone.sampler[dss]); 1012 drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss, 1013 snapshot->reg.instdone.row[dss]); 1014 1015 if (GRAPHICS_VERx100(xe) >= 1255) 1016 drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n", 1017 dss, snapshot->reg.instdone.geom_svg[dss]); 1018 } 1019 } 1020 1021 /** 1022 * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot. 1023 * @snapshot: Xe HW Engine snapshot object. 1024 * @p: drm_printer where it will be printed out. 1025 * 1026 * This function prints out a given Xe HW Engine snapshot object. 1027 */ 1028 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, 1029 struct drm_printer *p) 1030 { 1031 if (!snapshot) 1032 return; 1033 1034 drm_printf(p, "%s (physical), logical instance=%d\n", 1035 snapshot->name ? snapshot->name : "", 1036 snapshot->logical_instance); 1037 drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", 1038 snapshot->forcewake.domain, snapshot->forcewake.ref); 1039 drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam); 1040 drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga); 1041 drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n", 1042 snapshot->reg.ring_execlist_status); 1043 drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n", 1044 snapshot->reg.ring_execlist_sq_contents); 1045 drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start); 1046 drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); 1047 drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); 1048 drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); 1049 drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode); 1050 drm_printf(p, "\tRING_MODE: 0x%08x\n", 1051 snapshot->reg.ring_mode); 1052 drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr); 1053 drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr); 1054 drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr); 1055 drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir); 1056 drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd); 1057 drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr); 1058 drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd); 1059 drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n", 1060 snapshot->reg.indirect_ring_state); 1061 drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr); 1062 xe_hw_engine_snapshot_instdone_print(snapshot, p); 1063 1064 if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) 1065 drm_printf(p, "\tRCU_MODE: 0x%08x\n", 1066 snapshot->reg.rcu_mode); 1067 drm_puts(p, "\n"); 1068 } 1069 1070 /** 1071 * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot. 1072 * @snapshot: Xe HW Engine snapshot object. 1073 * 1074 * This function free all the memory that needed to be allocated at capture 1075 * time. 1076 */ 1077 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot) 1078 { 1079 if (!snapshot) 1080 return; 1081 1082 kfree(snapshot->reg.instdone.slice_common); 1083 kfree(snapshot->reg.instdone.slice_common_extra); 1084 kfree(snapshot->reg.instdone.slice_common_extra2); 1085 kfree(snapshot->reg.instdone.sampler); 1086 kfree(snapshot->reg.instdone.row); 1087 kfree(snapshot->reg.instdone.geom_svg); 1088 kfree(snapshot->name); 1089 kfree(snapshot); 1090 } 1091 1092 /** 1093 * xe_hw_engine_print - Xe HW Engine Print. 1094 * @hwe: Hardware Engine. 1095 * @p: drm_printer. 1096 * 1097 * This function quickly capture a snapshot and immediately print it out. 1098 */ 1099 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p) 1100 { 1101 struct xe_hw_engine_snapshot *snapshot; 1102 1103 snapshot = xe_hw_engine_snapshot_capture(hwe); 1104 xe_hw_engine_snapshot_print(snapshot, p); 1105 xe_hw_engine_snapshot_free(snapshot); 1106 } 1107 1108 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, 1109 enum xe_engine_class engine_class) 1110 { 1111 u32 mask = 0; 1112 enum xe_hw_engine_id id; 1113 1114 for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 1115 if (engine_infos[id].class == engine_class && 1116 gt->info.engine_mask & BIT(id)) 1117 mask |= BIT(engine_infos[id].instance); 1118 } 1119 return mask; 1120 } 1121 1122 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) 1123 { 1124 struct xe_gt *gt = hwe->gt; 1125 struct xe_device *xe = gt_to_xe(gt); 1126 1127 if (hwe->class == XE_ENGINE_CLASS_OTHER) 1128 return true; 1129 1130 /* Check for engines disabled by ccs_mode setting */ 1131 if (xe_gt_ccs_mode_enabled(gt) && 1132 hwe->class == XE_ENGINE_CLASS_COMPUTE && 1133 hwe->logical_instance >= gt->ccs_mode) 1134 return true; 1135 1136 return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY && 1137 hwe->instance == gt->usm.reserved_bcs_instance; 1138 } 1139 1140 const char *xe_hw_engine_class_to_str(enum xe_engine_class class) 1141 { 1142 switch (class) { 1143 case XE_ENGINE_CLASS_RENDER: 1144 return "rcs"; 1145 case XE_ENGINE_CLASS_VIDEO_DECODE: 1146 return "vcs"; 1147 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 1148 return "vecs"; 1149 case XE_ENGINE_CLASS_COPY: 1150 return "bcs"; 1151 case XE_ENGINE_CLASS_OTHER: 1152 return "other"; 1153 case XE_ENGINE_CLASS_COMPUTE: 1154 return "ccs"; 1155 case XE_ENGINE_CLASS_MAX: 1156 break; 1157 } 1158 1159 return NULL; 1160 } 1161 1162 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe) 1163 { 1164 return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base)); 1165 } 1166 1167 enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) 1168 { 1169 return engine_infos[hwe->engine_id].domain; 1170 } 1171 1172 static const enum xe_engine_class user_to_xe_engine_class[] = { 1173 [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, 1174 [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, 1175 [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, 1176 [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, 1177 [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, 1178 }; 1179 1180 /** 1181 * xe_hw_engine_lookup() - Lookup hardware engine for class:instance 1182 * @xe: xe device 1183 * @eci: engine class and instance 1184 * 1185 * This function will find a hardware engine for given engine 1186 * class and instance. 1187 * 1188 * Return: If found xe_hw_engine pointer, NULL otherwise. 1189 */ 1190 struct xe_hw_engine * 1191 xe_hw_engine_lookup(struct xe_device *xe, 1192 struct drm_xe_engine_class_instance eci) 1193 { 1194 unsigned int idx; 1195 1196 if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) 1197 return NULL; 1198 1199 if (eci.gt_id >= xe->info.gt_count) 1200 return NULL; 1201 1202 idx = array_index_nospec(eci.engine_class, 1203 ARRAY_SIZE(user_to_xe_engine_class)); 1204 1205 return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), 1206 user_to_xe_engine_class[idx], 1207 eci.engine_instance, true); 1208 } 1209