1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_hw_engine.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_managed.h> 11 #include <uapi/drm/xe_drm.h> 12 13 #include "regs/xe_engine_regs.h" 14 #include "regs/xe_gt_regs.h" 15 #include "xe_assert.h" 16 #include "xe_bo.h" 17 #include "xe_device.h" 18 #include "xe_execlist.h" 19 #include "xe_force_wake.h" 20 #include "xe_gsc.h" 21 #include "xe_gt.h" 22 #include "xe_gt_ccs_mode.h" 23 #include "xe_gt_printk.h" 24 #include "xe_gt_mcr.h" 25 #include "xe_gt_topology.h" 26 #include "xe_hw_engine_group.h" 27 #include "xe_hw_fence.h" 28 #include "xe_irq.h" 29 #include "xe_lrc.h" 30 #include "xe_macros.h" 31 #include "xe_mmio.h" 32 #include "xe_reg_sr.h" 33 #include "xe_reg_whitelist.h" 34 #include "xe_rtp.h" 35 #include "xe_sched_job.h" 36 #include "xe_sriov.h" 37 #include "xe_tuning.h" 38 #include "xe_uc_fw.h" 39 #include "xe_wa.h" 40 41 #define MAX_MMIO_BASES 3 42 struct engine_info { 43 const char *name; 44 unsigned int class : 8; 45 unsigned int instance : 8; 46 unsigned int irq_offset : 8; 47 enum xe_force_wake_domains domain; 48 u32 mmio_base; 49 }; 50 51 static const struct engine_info engine_infos[] = { 52 [XE_HW_ENGINE_RCS0] = { 53 .name = "rcs0", 54 .class = XE_ENGINE_CLASS_RENDER, 55 .instance = 0, 56 .irq_offset = ilog2(INTR_RCS0), 57 .domain = XE_FW_RENDER, 58 .mmio_base = RENDER_RING_BASE, 59 }, 60 [XE_HW_ENGINE_BCS0] = { 61 .name = "bcs0", 62 .class = XE_ENGINE_CLASS_COPY, 63 .instance = 0, 64 .irq_offset = ilog2(INTR_BCS(0)), 65 .domain = XE_FW_RENDER, 66 .mmio_base = BLT_RING_BASE, 67 }, 68 [XE_HW_ENGINE_BCS1] = { 69 .name = "bcs1", 70 .class = XE_ENGINE_CLASS_COPY, 71 .instance = 1, 72 .irq_offset = ilog2(INTR_BCS(1)), 73 .domain = XE_FW_RENDER, 74 .mmio_base = XEHPC_BCS1_RING_BASE, 75 }, 76 [XE_HW_ENGINE_BCS2] = { 77 .name = "bcs2", 78 .class = XE_ENGINE_CLASS_COPY, 79 .instance = 2, 80 .irq_offset = ilog2(INTR_BCS(2)), 81 .domain = XE_FW_RENDER, 82 .mmio_base = XEHPC_BCS2_RING_BASE, 83 }, 84 [XE_HW_ENGINE_BCS3] = { 85 .name = "bcs3", 86 .class = XE_ENGINE_CLASS_COPY, 87 .instance = 3, 88 .irq_offset = ilog2(INTR_BCS(3)), 89 .domain = XE_FW_RENDER, 90 .mmio_base = XEHPC_BCS3_RING_BASE, 91 }, 92 [XE_HW_ENGINE_BCS4] = { 93 .name = "bcs4", 94 .class = XE_ENGINE_CLASS_COPY, 95 .instance = 4, 96 .irq_offset = ilog2(INTR_BCS(4)), 97 .domain = XE_FW_RENDER, 98 .mmio_base = XEHPC_BCS4_RING_BASE, 99 }, 100 [XE_HW_ENGINE_BCS5] = { 101 .name = "bcs5", 102 .class = XE_ENGINE_CLASS_COPY, 103 .instance = 5, 104 .irq_offset = ilog2(INTR_BCS(5)), 105 .domain = XE_FW_RENDER, 106 .mmio_base = XEHPC_BCS5_RING_BASE, 107 }, 108 [XE_HW_ENGINE_BCS6] = { 109 .name = "bcs6", 110 .class = XE_ENGINE_CLASS_COPY, 111 .instance = 6, 112 .irq_offset = ilog2(INTR_BCS(6)), 113 .domain = XE_FW_RENDER, 114 .mmio_base = XEHPC_BCS6_RING_BASE, 115 }, 116 [XE_HW_ENGINE_BCS7] = { 117 .name = "bcs7", 118 .class = XE_ENGINE_CLASS_COPY, 119 .irq_offset = ilog2(INTR_BCS(7)), 120 .instance = 7, 121 .domain = XE_FW_RENDER, 122 .mmio_base = XEHPC_BCS7_RING_BASE, 123 }, 124 [XE_HW_ENGINE_BCS8] = { 125 .name = "bcs8", 126 .class = XE_ENGINE_CLASS_COPY, 127 .instance = 8, 128 .irq_offset = ilog2(INTR_BCS8), 129 .domain = XE_FW_RENDER, 130 .mmio_base = XEHPC_BCS8_RING_BASE, 131 }, 132 133 [XE_HW_ENGINE_VCS0] = { 134 .name = "vcs0", 135 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 136 .instance = 0, 137 .irq_offset = 32 + ilog2(INTR_VCS(0)), 138 .domain = XE_FW_MEDIA_VDBOX0, 139 .mmio_base = BSD_RING_BASE, 140 }, 141 [XE_HW_ENGINE_VCS1] = { 142 .name = "vcs1", 143 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 144 .instance = 1, 145 .irq_offset = 32 + ilog2(INTR_VCS(1)), 146 .domain = XE_FW_MEDIA_VDBOX1, 147 .mmio_base = BSD2_RING_BASE, 148 }, 149 [XE_HW_ENGINE_VCS2] = { 150 .name = "vcs2", 151 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 152 .instance = 2, 153 .irq_offset = 32 + ilog2(INTR_VCS(2)), 154 .domain = XE_FW_MEDIA_VDBOX2, 155 .mmio_base = BSD3_RING_BASE, 156 }, 157 [XE_HW_ENGINE_VCS3] = { 158 .name = "vcs3", 159 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 160 .instance = 3, 161 .irq_offset = 32 + ilog2(INTR_VCS(3)), 162 .domain = XE_FW_MEDIA_VDBOX3, 163 .mmio_base = BSD4_RING_BASE, 164 }, 165 [XE_HW_ENGINE_VCS4] = { 166 .name = "vcs4", 167 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 168 .instance = 4, 169 .irq_offset = 32 + ilog2(INTR_VCS(4)), 170 .domain = XE_FW_MEDIA_VDBOX4, 171 .mmio_base = XEHP_BSD5_RING_BASE, 172 }, 173 [XE_HW_ENGINE_VCS5] = { 174 .name = "vcs5", 175 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 176 .instance = 5, 177 .irq_offset = 32 + ilog2(INTR_VCS(5)), 178 .domain = XE_FW_MEDIA_VDBOX5, 179 .mmio_base = XEHP_BSD6_RING_BASE, 180 }, 181 [XE_HW_ENGINE_VCS6] = { 182 .name = "vcs6", 183 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 184 .instance = 6, 185 .irq_offset = 32 + ilog2(INTR_VCS(6)), 186 .domain = XE_FW_MEDIA_VDBOX6, 187 .mmio_base = XEHP_BSD7_RING_BASE, 188 }, 189 [XE_HW_ENGINE_VCS7] = { 190 .name = "vcs7", 191 .class = XE_ENGINE_CLASS_VIDEO_DECODE, 192 .instance = 7, 193 .irq_offset = 32 + ilog2(INTR_VCS(7)), 194 .domain = XE_FW_MEDIA_VDBOX7, 195 .mmio_base = XEHP_BSD8_RING_BASE, 196 }, 197 [XE_HW_ENGINE_VECS0] = { 198 .name = "vecs0", 199 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 200 .instance = 0, 201 .irq_offset = 32 + ilog2(INTR_VECS(0)), 202 .domain = XE_FW_MEDIA_VEBOX0, 203 .mmio_base = VEBOX_RING_BASE, 204 }, 205 [XE_HW_ENGINE_VECS1] = { 206 .name = "vecs1", 207 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 208 .instance = 1, 209 .irq_offset = 32 + ilog2(INTR_VECS(1)), 210 .domain = XE_FW_MEDIA_VEBOX1, 211 .mmio_base = VEBOX2_RING_BASE, 212 }, 213 [XE_HW_ENGINE_VECS2] = { 214 .name = "vecs2", 215 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 216 .instance = 2, 217 .irq_offset = 32 + ilog2(INTR_VECS(2)), 218 .domain = XE_FW_MEDIA_VEBOX2, 219 .mmio_base = XEHP_VEBOX3_RING_BASE, 220 }, 221 [XE_HW_ENGINE_VECS3] = { 222 .name = "vecs3", 223 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 224 .instance = 3, 225 .irq_offset = 32 + ilog2(INTR_VECS(3)), 226 .domain = XE_FW_MEDIA_VEBOX3, 227 .mmio_base = XEHP_VEBOX4_RING_BASE, 228 }, 229 [XE_HW_ENGINE_CCS0] = { 230 .name = "ccs0", 231 .class = XE_ENGINE_CLASS_COMPUTE, 232 .instance = 0, 233 .irq_offset = ilog2(INTR_CCS(0)), 234 .domain = XE_FW_RENDER, 235 .mmio_base = COMPUTE0_RING_BASE, 236 }, 237 [XE_HW_ENGINE_CCS1] = { 238 .name = "ccs1", 239 .class = XE_ENGINE_CLASS_COMPUTE, 240 .instance = 1, 241 .irq_offset = ilog2(INTR_CCS(1)), 242 .domain = XE_FW_RENDER, 243 .mmio_base = COMPUTE1_RING_BASE, 244 }, 245 [XE_HW_ENGINE_CCS2] = { 246 .name = "ccs2", 247 .class = XE_ENGINE_CLASS_COMPUTE, 248 .instance = 2, 249 .irq_offset = ilog2(INTR_CCS(2)), 250 .domain = XE_FW_RENDER, 251 .mmio_base = COMPUTE2_RING_BASE, 252 }, 253 [XE_HW_ENGINE_CCS3] = { 254 .name = "ccs3", 255 .class = XE_ENGINE_CLASS_COMPUTE, 256 .instance = 3, 257 .irq_offset = ilog2(INTR_CCS(3)), 258 .domain = XE_FW_RENDER, 259 .mmio_base = COMPUTE3_RING_BASE, 260 }, 261 [XE_HW_ENGINE_GSCCS0] = { 262 .name = "gsccs0", 263 .class = XE_ENGINE_CLASS_OTHER, 264 .instance = OTHER_GSC_INSTANCE, 265 .domain = XE_FW_GSC, 266 .mmio_base = GSCCS_RING_BASE, 267 }, 268 }; 269 270 static void hw_engine_fini(void *arg) 271 { 272 struct xe_hw_engine *hwe = arg; 273 274 if (hwe->exl_port) 275 xe_execlist_port_destroy(hwe->exl_port); 276 277 hwe->gt = NULL; 278 } 279 280 /** 281 * xe_hw_engine_mmio_write32() - Write engine register 282 * @hwe: engine 283 * @reg: register to write into 284 * @val: desired 32-bit value to write 285 * 286 * This function will write val into an engine specific register. 287 * Forcewake must be held by the caller. 288 * 289 */ 290 void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, 291 struct xe_reg reg, u32 val) 292 { 293 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 294 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 295 296 reg.addr += hwe->mmio_base; 297 298 xe_mmio_write32(hwe->gt, reg, val); 299 } 300 301 /** 302 * xe_hw_engine_mmio_read32() - Read engine register 303 * @hwe: engine 304 * @reg: register to read from 305 * 306 * This function will read from an engine specific register. 307 * Forcewake must be held by the caller. 308 * 309 * Return: value of the 32-bit register. 310 */ 311 u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) 312 { 313 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); 314 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 315 316 reg.addr += hwe->mmio_base; 317 318 return xe_mmio_read32(hwe->gt, reg); 319 } 320 321 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) 322 { 323 u32 ccs_mask = 324 xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); 325 326 if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) 327 xe_mmio_write32(hwe->gt, RCU_MODE, 328 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); 329 330 xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); 331 xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), 332 xe_bo_ggtt_addr(hwe->hwsp)); 333 xe_hw_engine_mmio_write32(hwe, RING_MODE(0), 334 _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 335 xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), 336 _MASKED_BIT_DISABLE(STOP_RING)); 337 xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 338 } 339 340 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, 341 const struct xe_hw_engine *hwe) 342 { 343 return xe_gt_ccs_mode_enabled(gt) && 344 xe_rtp_match_first_render_or_compute(gt, hwe); 345 } 346 347 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt, 348 const struct xe_hw_engine *hwe) 349 { 350 if (GRAPHICS_VER(gt_to_xe(gt)) < 20) 351 return false; 352 353 if (hwe->class != XE_ENGINE_CLASS_COMPUTE && 354 hwe->class != XE_ENGINE_CLASS_RENDER) 355 return false; 356 357 return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE; 358 } 359 360 void 361 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) 362 { 363 struct xe_gt *gt = hwe->gt; 364 const u8 mocs_write_idx = gt->mocs.uc_index; 365 const u8 mocs_read_idx = gt->mocs.uc_index; 366 u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | 367 REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); 368 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 369 const struct xe_rtp_entry_sr lrc_setup[] = { 370 /* 371 * Some blitter commands do not have a field for MOCS, those 372 * commands will use MOCS index pointed by BLIT_CCTL. 373 * BLIT_CCTL registers are needed to be programmed to un-cached. 374 */ 375 { XE_RTP_NAME("BLIT_CCTL_default_MOCS"), 376 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), 377 ENGINE_CLASS(COPY)), 378 XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0), 379 BLIT_CCTL_DST_MOCS_MASK | 380 BLIT_CCTL_SRC_MOCS_MASK, 381 blit_cctl_val, 382 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 383 }, 384 /* Use Fixed slice CCS mode */ 385 { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), 386 XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), 387 XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, 388 RCU_MODE_FIXED_SLICE_CCS_MODE)) 389 }, 390 /* Disable WMTP if HW doesn't support it */ 391 { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), 392 XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), 393 XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0), 394 PREEMPT_GPGPU_LEVEL_MASK, 395 PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), 396 XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) 397 }, 398 {} 399 }; 400 401 xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc); 402 } 403 404 static void 405 hw_engine_setup_default_state(struct xe_hw_engine *hwe) 406 { 407 struct xe_gt *gt = hwe->gt; 408 struct xe_device *xe = gt_to_xe(gt); 409 /* 410 * RING_CMD_CCTL specifies the default MOCS entry that will be 411 * used by the command streamer when executing commands that 412 * don't have a way to explicitly specify a MOCS setting. 413 * The default should usually reference whichever MOCS entry 414 * corresponds to uncached behavior, although use of a WB cached 415 * entry is recommended by the spec in certain circumstances on 416 * specific platforms. 417 * Bspec: 72161 418 */ 419 const u8 mocs_write_idx = gt->mocs.uc_index; 420 const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && 421 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? 422 gt->mocs.wb_index : gt->mocs.uc_index; 423 u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | 424 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); 425 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 426 const struct xe_rtp_entry_sr engine_entries[] = { 427 { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), 428 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)), 429 XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0), 430 CMD_CCTL_WRITE_OVERRIDE_MASK | 431 CMD_CCTL_READ_OVERRIDE_MASK, 432 ring_cmd_cctl_val, 433 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 434 }, 435 /* 436 * To allow the GSC engine to go idle on MTL we need to enable 437 * idle messaging and set the hysteresis value (we use 0xA=5us 438 * as recommended in spec). On platforms after MTL this is 439 * enabled by default. 440 */ 441 { XE_RTP_NAME("MTL GSCCS IDLE MSG enable"), 442 XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)), 443 XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0), 444 IDLE_MSG_DISABLE, 445 XE_RTP_ACTION_FLAG(ENGINE_BASE)), 446 FIELD_SET(RING_PWRCTX_MAXCNT(0), 447 IDLE_WAIT_TIME, 448 0xA, 449 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 450 }, 451 /* Enable Priority Mem Read */ 452 { XE_RTP_NAME("Priority_Mem_Read"), 453 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 454 XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, 455 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 456 }, 457 {} 458 }; 459 460 xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr); 461 } 462 463 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, 464 enum xe_hw_engine_id id) 465 { 466 const struct engine_info *info; 467 468 if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name)) 469 return; 470 471 if (!(gt->info.engine_mask & BIT(id))) 472 return; 473 474 info = &engine_infos[id]; 475 476 xe_gt_assert(gt, !hwe->gt); 477 478 hwe->gt = gt; 479 hwe->class = info->class; 480 hwe->instance = info->instance; 481 hwe->mmio_base = info->mmio_base; 482 hwe->irq_offset = info->irq_offset; 483 hwe->domain = info->domain; 484 hwe->name = info->name; 485 hwe->fence_irq = >->fence_irq[info->class]; 486 hwe->engine_id = id; 487 488 hwe->eclass = >->eclass[hwe->class]; 489 if (!hwe->eclass->sched_props.job_timeout_ms) { 490 hwe->eclass->sched_props.job_timeout_ms = 5 * 1000; 491 hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 492 hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 493 hwe->eclass->sched_props.timeslice_us = 1 * 1000; 494 hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN; 495 hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX; 496 hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT; 497 hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 498 hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 499 500 /* 501 * The GSC engine can accept submissions while the GSC shim is 502 * being reset, during which time the submission is stalled. In 503 * the worst case, the shim reset can take up to the maximum GSC 504 * command execution time (250ms), so the request start can be 505 * delayed by that much; the request itself can take that long 506 * without being preemptible, which means worst case it can 507 * theoretically take up to 500ms for a preemption to go through 508 * on the GSC engine. Adding to that an extra 100ms as a safety 509 * margin, we get a minimum recommended timeout of 600ms. 510 * The preempt_timeout value can't be tuned for OTHER_CLASS 511 * because the class is reserved for kernel usage, so we just 512 * need to make sure that the starting value is above that 513 * threshold; since our default value (640ms) is greater than 514 * 600ms, the only way we can go below is via a kconfig setting. 515 * If that happens, log it in dmesg and update the value. 516 */ 517 if (hwe->class == XE_ENGINE_CLASS_OTHER) { 518 const u32 min_preempt_timeout = 600 * 1000; 519 if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) { 520 hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout; 521 xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n"); 522 } 523 } 524 525 /* Record default props */ 526 hwe->eclass->defaults = hwe->eclass->sched_props; 527 } 528 529 xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); 530 xe_tuning_process_engine(hwe); 531 xe_wa_process_engine(hwe); 532 hw_engine_setup_default_state(hwe); 533 534 xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); 535 xe_reg_whitelist_process_engine(hwe); 536 } 537 538 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, 539 enum xe_hw_engine_id id) 540 { 541 struct xe_device *xe = gt_to_xe(gt); 542 struct xe_tile *tile = gt_to_tile(gt); 543 int err; 544 545 xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name); 546 xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); 547 548 xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); 549 xe_reg_sr_apply_whitelist(hwe); 550 551 hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, 552 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 553 XE_BO_FLAG_GGTT | 554 XE_BO_FLAG_GGTT_INVALIDATE); 555 if (IS_ERR(hwe->hwsp)) { 556 err = PTR_ERR(hwe->hwsp); 557 goto err_name; 558 } 559 560 if (!xe_device_uc_enabled(xe)) { 561 hwe->exl_port = xe_execlist_port_create(xe, hwe); 562 if (IS_ERR(hwe->exl_port)) { 563 err = PTR_ERR(hwe->exl_port); 564 goto err_hwsp; 565 } 566 } else { 567 /* GSCCS has a special interrupt for reset */ 568 if (hwe->class == XE_ENGINE_CLASS_OTHER) 569 hwe->irq_handler = xe_gsc_hwe_irq_handler; 570 571 if (!IS_SRIOV_VF(xe)) 572 xe_hw_engine_enable_ring(hwe); 573 } 574 575 /* We reserve the highest BCS instance for USM */ 576 if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) 577 gt->usm.reserved_bcs_instance = hwe->instance; 578 579 return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); 580 581 err_hwsp: 582 xe_bo_unpin_map_no_vm(hwe->hwsp); 583 err_name: 584 hwe->name = NULL; 585 586 return err; 587 } 588 589 static void hw_engine_setup_logical_mapping(struct xe_gt *gt) 590 { 591 int class; 592 593 /* FIXME: Doing a simple logical mapping that works for most hardware */ 594 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 595 struct xe_hw_engine *hwe; 596 enum xe_hw_engine_id id; 597 int logical_instance = 0; 598 599 for_each_hw_engine(hwe, gt, id) 600 if (hwe->class == class) 601 hwe->logical_instance = logical_instance++; 602 } 603 } 604 605 static void read_media_fuses(struct xe_gt *gt) 606 { 607 struct xe_device *xe = gt_to_xe(gt); 608 u32 media_fuse; 609 u16 vdbox_mask; 610 u16 vebox_mask; 611 int i, j; 612 613 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 614 615 media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE); 616 617 /* 618 * Pre-Xe_HP platforms had register bits representing absent engines, 619 * whereas Xe_HP and beyond have bits representing present engines. 620 * Invert the polarity on old platforms so that we can use common 621 * handling below. 622 */ 623 if (GRAPHICS_VERx100(xe) < 1250) 624 media_fuse = ~media_fuse; 625 626 vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse); 627 vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse); 628 629 for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { 630 if (!(gt->info.engine_mask & BIT(i))) 631 continue; 632 633 if (!(BIT(j) & vdbox_mask)) { 634 gt->info.engine_mask &= ~BIT(i); 635 drm_info(&xe->drm, "vcs%u fused off\n", j); 636 } 637 } 638 639 for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { 640 if (!(gt->info.engine_mask & BIT(i))) 641 continue; 642 643 if (!(BIT(j) & vebox_mask)) { 644 gt->info.engine_mask &= ~BIT(i); 645 drm_info(&xe->drm, "vecs%u fused off\n", j); 646 } 647 } 648 } 649 650 static void read_copy_fuses(struct xe_gt *gt) 651 { 652 struct xe_device *xe = gt_to_xe(gt); 653 u32 bcs_mask; 654 655 if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270) 656 return; 657 658 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 659 660 bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3); 661 bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); 662 663 /* BCS0 is always present; only BCS1-BCS8 may be fused off */ 664 for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { 665 if (!(gt->info.engine_mask & BIT(i))) 666 continue; 667 668 if (!(BIT(j / 2) & bcs_mask)) { 669 gt->info.engine_mask &= ~BIT(i); 670 drm_info(&xe->drm, "bcs%u fused off\n", j); 671 } 672 } 673 } 674 675 static void read_compute_fuses_from_dss(struct xe_gt *gt) 676 { 677 struct xe_device *xe = gt_to_xe(gt); 678 679 /* 680 * CCS fusing based on DSS masks only applies to platforms that can 681 * have more than one CCS. 682 */ 683 if (hweight64(gt->info.engine_mask & 684 GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1) 685 return; 686 687 /* 688 * CCS availability on Xe_HP is inferred from the presence of DSS in 689 * each quadrant. 690 */ 691 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { 692 if (!(gt->info.engine_mask & BIT(i))) 693 continue; 694 695 if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { 696 gt->info.engine_mask &= ~BIT(i); 697 drm_info(&xe->drm, "ccs%u fused off\n", j); 698 } 699 } 700 } 701 702 static void read_compute_fuses_from_reg(struct xe_gt *gt) 703 { 704 struct xe_device *xe = gt_to_xe(gt); 705 u32 ccs_mask; 706 707 ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4); 708 ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask); 709 710 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { 711 if (!(gt->info.engine_mask & BIT(i))) 712 continue; 713 714 if ((ccs_mask & BIT(j)) == 0) { 715 gt->info.engine_mask &= ~BIT(i); 716 drm_info(&xe->drm, "ccs%u fused off\n", j); 717 } 718 } 719 } 720 721 static void read_compute_fuses(struct xe_gt *gt) 722 { 723 if (GRAPHICS_VER(gt_to_xe(gt)) >= 20) 724 read_compute_fuses_from_reg(gt); 725 else 726 read_compute_fuses_from_dss(gt); 727 } 728 729 static void check_gsc_availability(struct xe_gt *gt) 730 { 731 struct xe_device *xe = gt_to_xe(gt); 732 733 if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) 734 return; 735 736 /* 737 * The GSCCS is only used to communicate with the GSC FW, so if we don't 738 * have the FW there is nothing we need the engine for and can therefore 739 * skip its initialization. 740 */ 741 if (!xe_uc_fw_is_available(>->uc.gsc.fw)) { 742 gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0); 743 744 /* interrupts where previously enabled, so turn them off */ 745 xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0); 746 xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0); 747 748 drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); 749 } 750 } 751 752 int xe_hw_engines_init_early(struct xe_gt *gt) 753 { 754 int i; 755 756 read_media_fuses(gt); 757 read_copy_fuses(gt); 758 read_compute_fuses(gt); 759 check_gsc_availability(gt); 760 761 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); 762 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); 763 764 for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) 765 hw_engine_init_early(gt, >->hw_engines[i], i); 766 767 return 0; 768 } 769 770 int xe_hw_engines_init(struct xe_gt *gt) 771 { 772 int err; 773 struct xe_hw_engine *hwe; 774 enum xe_hw_engine_id id; 775 776 for_each_hw_engine(hwe, gt, id) { 777 err = hw_engine_init(gt, hwe, id); 778 if (err) 779 return err; 780 } 781 782 hw_engine_setup_logical_mapping(gt); 783 err = xe_hw_engine_setup_groups(gt); 784 if (err) 785 return err; 786 787 return 0; 788 } 789 790 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) 791 { 792 wake_up_all(>_to_xe(hwe->gt)->ufence_wq); 793 794 if (hwe->irq_handler) 795 hwe->irq_handler(hwe, intr_vec); 796 797 if (intr_vec & GT_RENDER_USER_INTERRUPT) 798 xe_hw_fence_irq_run(hwe->fence_irq); 799 } 800 801 static bool 802 is_slice_common_per_gslice(struct xe_device *xe) 803 { 804 return GRAPHICS_VERx100(xe) >= 1255; 805 } 806 807 static void 808 xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, 809 struct xe_hw_engine_snapshot *snapshot) 810 { 811 struct xe_gt *gt = hwe->gt; 812 struct xe_device *xe = gt_to_xe(gt); 813 unsigned int dss; 814 u16 group, instance; 815 816 snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); 817 818 if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) 819 return; 820 821 if (is_slice_common_per_gslice(xe) == false) { 822 snapshot->reg.instdone.slice_common[0] = 823 xe_mmio_read32(gt, SC_INSTDONE); 824 snapshot->reg.instdone.slice_common_extra[0] = 825 xe_mmio_read32(gt, SC_INSTDONE_EXTRA); 826 snapshot->reg.instdone.slice_common_extra2[0] = 827 xe_mmio_read32(gt, SC_INSTDONE_EXTRA2); 828 } else { 829 for_each_geometry_dss(dss, gt, group, instance) { 830 snapshot->reg.instdone.slice_common[dss] = 831 xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance); 832 snapshot->reg.instdone.slice_common_extra[dss] = 833 xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance); 834 snapshot->reg.instdone.slice_common_extra2[dss] = 835 xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance); 836 } 837 } 838 839 for_each_geometry_dss(dss, gt, group, instance) { 840 snapshot->reg.instdone.sampler[dss] = 841 xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance); 842 snapshot->reg.instdone.row[dss] = 843 xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance); 844 845 if (GRAPHICS_VERx100(xe) >= 1255) 846 snapshot->reg.instdone.geom_svg[dss] = 847 xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT, 848 group, instance); 849 } 850 } 851 852 /** 853 * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. 854 * @hwe: Xe HW Engine. 855 * 856 * This can be printed out in a later stage like during dev_coredump 857 * analysis. 858 * 859 * Returns: a Xe HW Engine snapshot object that must be freed by the 860 * caller, using `xe_hw_engine_snapshot_free`. 861 */ 862 struct xe_hw_engine_snapshot * 863 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) 864 { 865 struct xe_hw_engine_snapshot *snapshot; 866 size_t len; 867 u64 val; 868 869 if (!xe_hw_engine_is_valid(hwe)) 870 return NULL; 871 872 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 873 874 if (!snapshot) 875 return NULL; 876 877 /* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it 878 * includes xe_hw_engine_types.h the length of this 3 registers can't be 879 * set in struct xe_hw_engine_snapshot, so here doing additional 880 * allocations. 881 */ 882 len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32)); 883 snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC); 884 snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC); 885 snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC); 886 snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC); 887 snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC); 888 snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC); 889 if (!snapshot->reg.instdone.slice_common || 890 !snapshot->reg.instdone.slice_common_extra || 891 !snapshot->reg.instdone.slice_common_extra2 || 892 !snapshot->reg.instdone.sampler || 893 !snapshot->reg.instdone.row || 894 !snapshot->reg.instdone.geom_svg) { 895 xe_hw_engine_snapshot_free(snapshot); 896 return NULL; 897 } 898 899 snapshot->name = kstrdup(hwe->name, GFP_ATOMIC); 900 snapshot->hwe = hwe; 901 snapshot->logical_instance = hwe->logical_instance; 902 snapshot->forcewake.domain = hwe->domain; 903 snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), 904 hwe->domain); 905 snapshot->mmio_base = hwe->mmio_base; 906 907 /* no more VF accessible data below this point */ 908 if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) 909 return snapshot; 910 911 snapshot->reg.ring_execlist_status = 912 xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); 913 val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); 914 snapshot->reg.ring_execlist_status |= val << 32; 915 916 snapshot->reg.ring_execlist_sq_contents = 917 xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); 918 val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); 919 snapshot->reg.ring_execlist_sq_contents |= val << 32; 920 921 snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0)); 922 val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); 923 snapshot->reg.ring_acthd |= val << 32; 924 925 snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0)); 926 val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); 927 snapshot->reg.ring_bbaddr |= val << 32; 928 929 snapshot->reg.ring_dma_fadd = 930 xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); 931 val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); 932 snapshot->reg.ring_dma_fadd |= val << 32; 933 934 snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); 935 snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); 936 snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0)); 937 if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { 938 val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0)); 939 snapshot->reg.ring_start |= val << 32; 940 } 941 if (xe_gt_has_indirect_ring_state(hwe->gt)) { 942 snapshot->reg.indirect_ring_state = 943 xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); 944 } 945 946 snapshot->reg.ring_head = 947 xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; 948 snapshot->reg.ring_tail = 949 xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; 950 snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0)); 951 snapshot->reg.ring_mi_mode = 952 xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); 953 snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0)); 954 snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0)); 955 snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0)); 956 snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0)); 957 snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0)); 958 snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0)); 959 xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); 960 961 if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) 962 snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE); 963 964 return snapshot; 965 } 966 967 static void 968 xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p) 969 { 970 struct xe_gt *gt = snapshot->hwe->gt; 971 struct xe_device *xe = gt_to_xe(gt); 972 u16 group, instance; 973 unsigned int dss; 974 975 drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring); 976 977 if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) 978 return; 979 980 if (is_slice_common_per_gslice(xe) == false) { 981 drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n", 982 snapshot->reg.instdone.slice_common[0]); 983 drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n", 984 snapshot->reg.instdone.slice_common_extra[0]); 985 drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n", 986 snapshot->reg.instdone.slice_common_extra2[0]); 987 } else { 988 for_each_geometry_dss(dss, gt, group, instance) { 989 drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss, 990 snapshot->reg.instdone.slice_common[dss]); 991 drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss, 992 snapshot->reg.instdone.slice_common_extra[dss]); 993 drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss, 994 snapshot->reg.instdone.slice_common_extra2[dss]); 995 } 996 } 997 998 for_each_geometry_dss(dss, gt, group, instance) { 999 drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss, 1000 snapshot->reg.instdone.sampler[dss]); 1001 drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss, 1002 snapshot->reg.instdone.row[dss]); 1003 1004 if (GRAPHICS_VERx100(xe) >= 1255) 1005 drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n", 1006 dss, snapshot->reg.instdone.geom_svg[dss]); 1007 } 1008 } 1009 1010 /** 1011 * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot. 1012 * @snapshot: Xe HW Engine snapshot object. 1013 * @p: drm_printer where it will be printed out. 1014 * 1015 * This function prints out a given Xe HW Engine snapshot object. 1016 */ 1017 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, 1018 struct drm_printer *p) 1019 { 1020 if (!snapshot) 1021 return; 1022 1023 drm_printf(p, "%s (physical), logical instance=%d\n", 1024 snapshot->name ? snapshot->name : "", 1025 snapshot->logical_instance); 1026 drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", 1027 snapshot->forcewake.domain, snapshot->forcewake.ref); 1028 drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam); 1029 drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga); 1030 drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n", 1031 snapshot->reg.ring_execlist_status); 1032 drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n", 1033 snapshot->reg.ring_execlist_sq_contents); 1034 drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start); 1035 drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); 1036 drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); 1037 drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); 1038 drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode); 1039 drm_printf(p, "\tRING_MODE: 0x%08x\n", 1040 snapshot->reg.ring_mode); 1041 drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr); 1042 drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr); 1043 drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr); 1044 drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir); 1045 drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd); 1046 drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr); 1047 drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd); 1048 drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n", 1049 snapshot->reg.indirect_ring_state); 1050 drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr); 1051 xe_hw_engine_snapshot_instdone_print(snapshot, p); 1052 1053 if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) 1054 drm_printf(p, "\tRCU_MODE: 0x%08x\n", 1055 snapshot->reg.rcu_mode); 1056 drm_puts(p, "\n"); 1057 } 1058 1059 /** 1060 * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot. 1061 * @snapshot: Xe HW Engine snapshot object. 1062 * 1063 * This function free all the memory that needed to be allocated at capture 1064 * time. 1065 */ 1066 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot) 1067 { 1068 if (!snapshot) 1069 return; 1070 1071 kfree(snapshot->reg.instdone.slice_common); 1072 kfree(snapshot->reg.instdone.slice_common_extra); 1073 kfree(snapshot->reg.instdone.slice_common_extra2); 1074 kfree(snapshot->reg.instdone.sampler); 1075 kfree(snapshot->reg.instdone.row); 1076 kfree(snapshot->reg.instdone.geom_svg); 1077 kfree(snapshot->name); 1078 kfree(snapshot); 1079 } 1080 1081 /** 1082 * xe_hw_engine_print - Xe HW Engine Print. 1083 * @hwe: Hardware Engine. 1084 * @p: drm_printer. 1085 * 1086 * This function quickly capture a snapshot and immediately print it out. 1087 */ 1088 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p) 1089 { 1090 struct xe_hw_engine_snapshot *snapshot; 1091 1092 snapshot = xe_hw_engine_snapshot_capture(hwe); 1093 xe_hw_engine_snapshot_print(snapshot, p); 1094 xe_hw_engine_snapshot_free(snapshot); 1095 } 1096 1097 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, 1098 enum xe_engine_class engine_class) 1099 { 1100 u32 mask = 0; 1101 enum xe_hw_engine_id id; 1102 1103 for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 1104 if (engine_infos[id].class == engine_class && 1105 gt->info.engine_mask & BIT(id)) 1106 mask |= BIT(engine_infos[id].instance); 1107 } 1108 return mask; 1109 } 1110 1111 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) 1112 { 1113 struct xe_gt *gt = hwe->gt; 1114 struct xe_device *xe = gt_to_xe(gt); 1115 1116 if (hwe->class == XE_ENGINE_CLASS_OTHER) 1117 return true; 1118 1119 /* Check for engines disabled by ccs_mode setting */ 1120 if (xe_gt_ccs_mode_enabled(gt) && 1121 hwe->class == XE_ENGINE_CLASS_COMPUTE && 1122 hwe->logical_instance >= gt->ccs_mode) 1123 return true; 1124 1125 return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY && 1126 hwe->instance == gt->usm.reserved_bcs_instance; 1127 } 1128 1129 const char *xe_hw_engine_class_to_str(enum xe_engine_class class) 1130 { 1131 switch (class) { 1132 case XE_ENGINE_CLASS_RENDER: 1133 return "rcs"; 1134 case XE_ENGINE_CLASS_VIDEO_DECODE: 1135 return "vcs"; 1136 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 1137 return "vecs"; 1138 case XE_ENGINE_CLASS_COPY: 1139 return "bcs"; 1140 case XE_ENGINE_CLASS_OTHER: 1141 return "other"; 1142 case XE_ENGINE_CLASS_COMPUTE: 1143 return "ccs"; 1144 case XE_ENGINE_CLASS_MAX: 1145 break; 1146 } 1147 1148 return NULL; 1149 } 1150 1151 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe) 1152 { 1153 return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base)); 1154 } 1155 1156 enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) 1157 { 1158 return engine_infos[hwe->engine_id].domain; 1159 } 1160 1161 static const enum xe_engine_class user_to_xe_engine_class[] = { 1162 [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, 1163 [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, 1164 [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, 1165 [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, 1166 [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, 1167 }; 1168 1169 /** 1170 * xe_hw_engine_lookup() - Lookup hardware engine for class:instance 1171 * @xe: xe device 1172 * @eci: engine class and instance 1173 * 1174 * This function will find a hardware engine for given engine 1175 * class and instance. 1176 * 1177 * Return: If found xe_hw_engine pointer, NULL otherwise. 1178 */ 1179 struct xe_hw_engine * 1180 xe_hw_engine_lookup(struct xe_device *xe, 1181 struct drm_xe_engine_class_instance eci) 1182 { 1183 unsigned int idx; 1184 1185 if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) 1186 return NULL; 1187 1188 if (eci.gt_id >= xe->info.gt_count) 1189 return NULL; 1190 1191 idx = array_index_nospec(eci.engine_class, 1192 ARRAY_SIZE(user_to_xe_engine_class)); 1193 1194 return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), 1195 user_to_xe_engine_class[idx], 1196 eci.engine_instance, true); 1197 } 1198