1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/delay.h> 12 #include <linux/dma-fence-array.h> 13 #include <linux/math64.h> 14 15 #include <drm/drm_managed.h> 16 17 #include "abi/guc_actions_abi.h" 18 #include "abi/guc_actions_slpc_abi.h" 19 #include "abi/guc_klvs_abi.h" 20 #include "regs/xe_lrc_layout.h" 21 #include "xe_assert.h" 22 #include "xe_devcoredump.h" 23 #include "xe_device.h" 24 #include "xe_exec_queue.h" 25 #include "xe_force_wake.h" 26 #include "xe_gpu_scheduler.h" 27 #include "xe_gt.h" 28 #include "xe_gt_clock.h" 29 #include "xe_gt_printk.h" 30 #include "xe_guc.h" 31 #include "xe_guc_capture.h" 32 #include "xe_guc_ct.h" 33 #include "xe_guc_exec_queue_types.h" 34 #include "xe_guc_id_mgr.h" 35 #include "xe_guc_klv_helpers.h" 36 #include "xe_guc_submit_types.h" 37 #include "xe_hw_engine.h" 38 #include "xe_hw_fence.h" 39 #include "xe_lrc.h" 40 #include "xe_macros.h" 41 #include "xe_map.h" 42 #include "xe_mocs.h" 43 #include "xe_pm.h" 44 #include "xe_ring_ops_types.h" 45 #include "xe_sched_job.h" 46 #include "xe_trace.h" 47 #include "xe_vm.h" 48 49 static struct xe_guc * 50 exec_queue_to_guc(struct xe_exec_queue *q) 51 { 52 return &q->gt->uc.guc; 53 } 54 55 /* 56 * Helpers for engine state, using an atomic as some of the bits can transition 57 * as the same time (e.g. a suspend can be happning at the same time as schedule 58 * engine done being processed). 59 */ 60 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 61 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 62 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 63 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 64 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 65 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 66 #define EXEC_QUEUE_STATE_RESET (1 << 6) 67 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 68 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 69 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 70 #define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) 71 #define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) 72 73 static bool exec_queue_registered(struct xe_exec_queue *q) 74 { 75 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 76 } 77 78 static void set_exec_queue_registered(struct xe_exec_queue *q) 79 { 80 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 81 } 82 83 static void clear_exec_queue_registered(struct xe_exec_queue *q) 84 { 85 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 86 } 87 88 static bool exec_queue_enabled(struct xe_exec_queue *q) 89 { 90 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 91 } 92 93 static void set_exec_queue_enabled(struct xe_exec_queue *q) 94 { 95 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 96 } 97 98 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 99 { 100 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 101 } 102 103 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 104 { 105 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 106 } 107 108 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 109 { 110 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 111 } 112 113 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 114 { 115 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 116 } 117 118 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 119 { 120 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 121 } 122 123 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 124 { 125 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 126 } 127 128 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 129 { 130 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 131 } 132 133 static bool exec_queue_destroyed(struct xe_exec_queue *q) 134 { 135 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 136 } 137 138 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 139 { 140 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 141 } 142 143 static bool exec_queue_banned(struct xe_exec_queue *q) 144 { 145 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 146 } 147 148 static void set_exec_queue_banned(struct xe_exec_queue *q) 149 { 150 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 151 } 152 153 static bool exec_queue_suspended(struct xe_exec_queue *q) 154 { 155 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 156 } 157 158 static void set_exec_queue_suspended(struct xe_exec_queue *q) 159 { 160 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 161 } 162 163 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 164 { 165 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 166 } 167 168 static bool exec_queue_reset(struct xe_exec_queue *q) 169 { 170 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 171 } 172 173 static void set_exec_queue_reset(struct xe_exec_queue *q) 174 { 175 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 176 } 177 178 static bool exec_queue_killed(struct xe_exec_queue *q) 179 { 180 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 181 } 182 183 static void set_exec_queue_killed(struct xe_exec_queue *q) 184 { 185 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 186 } 187 188 static bool exec_queue_wedged(struct xe_exec_queue *q) 189 { 190 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 191 } 192 193 static void set_exec_queue_wedged(struct xe_exec_queue *q) 194 { 195 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 196 } 197 198 static bool exec_queue_check_timeout(struct xe_exec_queue *q) 199 { 200 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; 201 } 202 203 static void set_exec_queue_check_timeout(struct xe_exec_queue *q) 204 { 205 atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); 206 } 207 208 static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) 209 { 210 atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); 211 } 212 213 static bool exec_queue_extra_ref(struct xe_exec_queue *q) 214 { 215 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; 216 } 217 218 static void set_exec_queue_extra_ref(struct xe_exec_queue *q) 219 { 220 atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); 221 } 222 223 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 224 { 225 return (atomic_read(&q->guc->state) & 226 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 227 EXEC_QUEUE_STATE_BANNED)); 228 } 229 230 static void guc_submit_fini(struct drm_device *drm, void *arg) 231 { 232 struct xe_guc *guc = arg; 233 struct xe_device *xe = guc_to_xe(guc); 234 struct xe_gt *gt = guc_to_gt(guc); 235 int ret; 236 237 ret = wait_event_timeout(guc->submission_state.fini_wq, 238 xa_empty(&guc->submission_state.exec_queue_lookup), 239 HZ * 5); 240 241 drain_workqueue(xe->destroy_wq); 242 243 xe_gt_assert(gt, ret); 244 245 xa_destroy(&guc->submission_state.exec_queue_lookup); 246 } 247 248 static void guc_submit_wedged_fini(void *arg) 249 { 250 struct xe_guc *guc = arg; 251 struct xe_exec_queue *q; 252 unsigned long index; 253 254 mutex_lock(&guc->submission_state.lock); 255 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 256 if (exec_queue_wedged(q)) { 257 mutex_unlock(&guc->submission_state.lock); 258 xe_exec_queue_put(q); 259 mutex_lock(&guc->submission_state.lock); 260 } 261 } 262 mutex_unlock(&guc->submission_state.lock); 263 } 264 265 static const struct xe_exec_queue_ops guc_exec_queue_ops; 266 267 static void primelockdep(struct xe_guc *guc) 268 { 269 if (!IS_ENABLED(CONFIG_LOCKDEP)) 270 return; 271 272 fs_reclaim_acquire(GFP_KERNEL); 273 274 mutex_lock(&guc->submission_state.lock); 275 mutex_unlock(&guc->submission_state.lock); 276 277 fs_reclaim_release(GFP_KERNEL); 278 } 279 280 /** 281 * xe_guc_submit_init() - Initialize GuC submission. 282 * @guc: the &xe_guc to initialize 283 * @num_ids: number of GuC context IDs to use 284 * 285 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 286 * GuC context IDs supported by the GuC firmware should be used for submission. 287 * 288 * Only VF drivers will have to provide explicit number of GuC context IDs 289 * that they can use for submission. 290 * 291 * Return: 0 on success or a negative error code on failure. 292 */ 293 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 294 { 295 struct xe_device *xe = guc_to_xe(guc); 296 struct xe_gt *gt = guc_to_gt(guc); 297 int err; 298 299 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 300 if (err) 301 return err; 302 303 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 304 if (err) 305 return err; 306 307 gt->exec_queue_ops = &guc_exec_queue_ops; 308 309 xa_init(&guc->submission_state.exec_queue_lookup); 310 311 init_waitqueue_head(&guc->submission_state.fini_wq); 312 313 primelockdep(guc); 314 315 guc->submission_state.initialized = true; 316 317 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 318 } 319 320 /* 321 * Given that we want to guarantee enough RCS throughput to avoid missing 322 * frames, we set the yield policy to 20% of each 80ms interval. 323 */ 324 #define RC_YIELD_DURATION 80 /* in ms */ 325 #define RC_YIELD_RATIO 20 /* in percent */ 326 static u32 *emit_render_compute_yield_klv(u32 *emit) 327 { 328 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 329 *emit++ = RC_YIELD_DURATION; 330 *emit++ = RC_YIELD_RATIO; 331 332 return emit; 333 } 334 335 #define SCHEDULING_POLICY_MAX_DWORDS 16 336 static int guc_init_global_schedule_policy(struct xe_guc *guc) 337 { 338 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 339 u32 *emit = data; 340 u32 count = 0; 341 int ret; 342 343 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 344 return 0; 345 346 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 347 348 if (CCS_MASK(guc_to_gt(guc))) 349 emit = emit_render_compute_yield_klv(emit); 350 351 count = emit - data; 352 if (count > 1) { 353 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 354 355 ret = xe_guc_ct_send_block(&guc->ct, data, count); 356 if (ret < 0) { 357 xe_gt_err(guc_to_gt(guc), 358 "failed to enable GuC scheduling policies: %pe\n", 359 ERR_PTR(ret)); 360 return ret; 361 } 362 } 363 364 return 0; 365 } 366 367 int xe_guc_submit_enable(struct xe_guc *guc) 368 { 369 int ret; 370 371 ret = guc_init_global_schedule_policy(guc); 372 if (ret) 373 return ret; 374 375 guc->submission_state.enabled = true; 376 377 return 0; 378 } 379 380 void xe_guc_submit_disable(struct xe_guc *guc) 381 { 382 guc->submission_state.enabled = false; 383 } 384 385 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 386 { 387 int i; 388 389 lockdep_assert_held(&guc->submission_state.lock); 390 391 for (i = 0; i < xa_count; ++i) 392 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 393 394 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 395 q->guc->id, q->width); 396 397 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 398 wake_up(&guc->submission_state.fini_wq); 399 } 400 401 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 402 { 403 int ret; 404 int i; 405 406 /* 407 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 408 * worse case user gets -ENOMEM on engine create and has to try again. 409 * 410 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 411 * failure. 412 */ 413 lockdep_assert_held(&guc->submission_state.lock); 414 415 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 416 q->width); 417 if (ret < 0) 418 return ret; 419 420 q->guc->id = ret; 421 422 for (i = 0; i < q->width; ++i) { 423 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 424 q->guc->id + i, q, GFP_NOWAIT)); 425 if (ret) 426 goto err_release; 427 } 428 429 return 0; 430 431 err_release: 432 __release_guc_id(guc, q, i); 433 434 return ret; 435 } 436 437 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 438 { 439 mutex_lock(&guc->submission_state.lock); 440 __release_guc_id(guc, q, q->width); 441 mutex_unlock(&guc->submission_state.lock); 442 } 443 444 struct exec_queue_policy { 445 u32 count; 446 struct guc_update_exec_queue_policy h2g; 447 }; 448 449 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 450 { 451 size_t bytes = sizeof(policy->h2g.header) + 452 (sizeof(policy->h2g.klv[0]) * policy->count); 453 454 return bytes / sizeof(u32); 455 } 456 457 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 458 u16 guc_id) 459 { 460 policy->h2g.header.action = 461 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 462 policy->h2g.header.guc_id = guc_id; 463 policy->count = 0; 464 } 465 466 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 467 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 468 u32 data) \ 469 { \ 470 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 471 \ 472 policy->h2g.klv[policy->count].kl = \ 473 FIELD_PREP(GUC_KLV_0_KEY, \ 474 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 475 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 476 policy->h2g.klv[policy->count].value = data; \ 477 policy->count++; \ 478 } 479 480 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 481 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 482 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 483 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 484 #undef MAKE_EXEC_QUEUE_POLICY_ADD 485 486 static const int xe_exec_queue_prio_to_guc[] = { 487 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 488 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 489 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 490 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 491 }; 492 493 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 494 { 495 struct exec_queue_policy policy; 496 enum xe_exec_queue_priority prio = q->sched_props.priority; 497 u32 timeslice_us = q->sched_props.timeslice_us; 498 u32 slpc_exec_queue_freq_req = 0; 499 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 500 501 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 502 503 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 504 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 505 506 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 507 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 508 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 509 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 510 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 511 slpc_exec_queue_freq_req); 512 513 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 514 __guc_exec_queue_policy_action_size(&policy), 0, 0); 515 } 516 517 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 518 { 519 struct exec_queue_policy policy; 520 521 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 522 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 523 524 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 525 __guc_exec_queue_policy_action_size(&policy), 0, 0); 526 } 527 528 #define parallel_read(xe_, map_, field_) \ 529 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 530 field_) 531 #define parallel_write(xe_, map_, field_, val_) \ 532 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 533 field_, val_) 534 535 static void __register_mlrc_exec_queue(struct xe_guc *guc, 536 struct xe_exec_queue *q, 537 struct guc_ctxt_registration_info *info) 538 { 539 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 540 u32 action[MAX_MLRC_REG_SIZE]; 541 int len = 0; 542 int i; 543 544 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 545 546 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 547 action[len++] = info->flags; 548 action[len++] = info->context_idx; 549 action[len++] = info->engine_class; 550 action[len++] = info->engine_submit_mask; 551 action[len++] = info->wq_desc_lo; 552 action[len++] = info->wq_desc_hi; 553 action[len++] = info->wq_base_lo; 554 action[len++] = info->wq_base_hi; 555 action[len++] = info->wq_size; 556 action[len++] = q->width; 557 action[len++] = info->hwlrca_lo; 558 action[len++] = info->hwlrca_hi; 559 560 for (i = 1; i < q->width; ++i) { 561 struct xe_lrc *lrc = q->lrc[i]; 562 563 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 564 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 565 } 566 567 /* explicitly checks some fields that we might fixup later */ 568 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 569 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 570 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 571 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 572 xe_gt_assert(guc_to_gt(guc), q->width == 573 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 574 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 575 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 576 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 577 #undef MAX_MLRC_REG_SIZE 578 579 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 580 } 581 582 static void __register_exec_queue(struct xe_guc *guc, 583 struct guc_ctxt_registration_info *info) 584 { 585 u32 action[] = { 586 XE_GUC_ACTION_REGISTER_CONTEXT, 587 info->flags, 588 info->context_idx, 589 info->engine_class, 590 info->engine_submit_mask, 591 info->wq_desc_lo, 592 info->wq_desc_hi, 593 info->wq_base_lo, 594 info->wq_base_hi, 595 info->wq_size, 596 info->hwlrca_lo, 597 info->hwlrca_hi, 598 }; 599 600 /* explicitly checks some fields that we might fixup later */ 601 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 602 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 603 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 604 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 605 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 606 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 607 608 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 609 } 610 611 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 612 { 613 struct xe_guc *guc = exec_queue_to_guc(q); 614 struct xe_device *xe = guc_to_xe(guc); 615 struct xe_lrc *lrc = q->lrc[0]; 616 struct guc_ctxt_registration_info info; 617 618 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 619 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 620 621 memset(&info, 0, sizeof(info)); 622 info.context_idx = q->guc->id; 623 info.engine_class = xe_engine_class_to_guc_class(q->class); 624 info.engine_submit_mask = q->logical_mask; 625 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 626 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 627 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 628 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 629 630 if (xe_exec_queue_is_parallel(q)) { 631 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 632 struct iosys_map map = xe_lrc_parallel_map(lrc); 633 634 info.wq_desc_lo = lower_32_bits(ggtt_addr + 635 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 636 info.wq_desc_hi = upper_32_bits(ggtt_addr + 637 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 638 info.wq_base_lo = lower_32_bits(ggtt_addr + 639 offsetof(struct guc_submit_parallel_scratch, wq[0])); 640 info.wq_base_hi = upper_32_bits(ggtt_addr + 641 offsetof(struct guc_submit_parallel_scratch, wq[0])); 642 info.wq_size = WQ_SIZE; 643 644 q->guc->wqi_head = 0; 645 q->guc->wqi_tail = 0; 646 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 647 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 648 } 649 650 /* 651 * We must keep a reference for LR engines if engine is registered with 652 * the GuC as jobs signal immediately and can't destroy an engine if the 653 * GuC has a reference to it. 654 */ 655 if (xe_exec_queue_is_lr(q)) 656 xe_exec_queue_get(q); 657 658 set_exec_queue_registered(q); 659 trace_xe_exec_queue_register(q); 660 if (xe_exec_queue_is_parallel(q)) 661 __register_mlrc_exec_queue(guc, q, &info); 662 else 663 __register_exec_queue(guc, &info); 664 init_policies(guc, q); 665 } 666 667 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 668 { 669 return (WQ_SIZE - q->guc->wqi_tail); 670 } 671 672 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 673 { 674 struct xe_guc *guc = exec_queue_to_guc(q); 675 struct xe_device *xe = guc_to_xe(guc); 676 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 677 unsigned int sleep_period_ms = 1; 678 679 #define AVAILABLE_SPACE \ 680 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 681 if (wqi_size > AVAILABLE_SPACE) { 682 try_again: 683 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 684 if (wqi_size > AVAILABLE_SPACE) { 685 if (sleep_period_ms == 1024) { 686 xe_gt_reset_async(q->gt); 687 return -ENODEV; 688 } 689 690 msleep(sleep_period_ms); 691 sleep_period_ms <<= 1; 692 goto try_again; 693 } 694 } 695 #undef AVAILABLE_SPACE 696 697 return 0; 698 } 699 700 static int wq_noop_append(struct xe_exec_queue *q) 701 { 702 struct xe_guc *guc = exec_queue_to_guc(q); 703 struct xe_device *xe = guc_to_xe(guc); 704 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 705 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 706 707 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 708 return -ENODEV; 709 710 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 711 712 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 713 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 714 FIELD_PREP(WQ_LEN_MASK, len_dw)); 715 q->guc->wqi_tail = 0; 716 717 return 0; 718 } 719 720 static void wq_item_append(struct xe_exec_queue *q) 721 { 722 struct xe_guc *guc = exec_queue_to_guc(q); 723 struct xe_device *xe = guc_to_xe(guc); 724 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 725 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 726 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 727 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 728 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 729 int i = 0, j; 730 731 if (wqi_size > wq_space_until_wrap(q)) { 732 if (wq_noop_append(q)) 733 return; 734 } 735 if (wq_wait_for_space(q, wqi_size)) 736 return; 737 738 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 739 FIELD_PREP(WQ_LEN_MASK, len_dw); 740 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 741 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 742 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 743 wqi[i++] = 0; 744 for (j = 1; j < q->width; ++j) { 745 struct xe_lrc *lrc = q->lrc[j]; 746 747 wqi[i++] = lrc->ring.tail / sizeof(u64); 748 } 749 750 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 751 752 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 753 wq[q->guc->wqi_tail / sizeof(u32)])); 754 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 755 q->guc->wqi_tail += wqi_size; 756 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 757 758 xe_device_wmb(xe); 759 760 map = xe_lrc_parallel_map(q->lrc[0]); 761 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 762 } 763 764 #define RESUME_PENDING ~0x0ull 765 static void submit_exec_queue(struct xe_exec_queue *q) 766 { 767 struct xe_guc *guc = exec_queue_to_guc(q); 768 struct xe_lrc *lrc = q->lrc[0]; 769 u32 action[3]; 770 u32 g2h_len = 0; 771 u32 num_g2h = 0; 772 int len = 0; 773 bool extra_submit = false; 774 775 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 776 777 if (xe_exec_queue_is_parallel(q)) 778 wq_item_append(q); 779 else 780 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 781 782 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 783 return; 784 785 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 786 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 787 action[len++] = q->guc->id; 788 action[len++] = GUC_CONTEXT_ENABLE; 789 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 790 num_g2h = 1; 791 if (xe_exec_queue_is_parallel(q)) 792 extra_submit = true; 793 794 q->guc->resume_time = RESUME_PENDING; 795 set_exec_queue_pending_enable(q); 796 set_exec_queue_enabled(q); 797 trace_xe_exec_queue_scheduling_enable(q); 798 } else { 799 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 800 action[len++] = q->guc->id; 801 trace_xe_exec_queue_submit(q); 802 } 803 804 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 805 806 if (extra_submit) { 807 len = 0; 808 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 809 action[len++] = q->guc->id; 810 trace_xe_exec_queue_submit(q); 811 812 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 813 } 814 } 815 816 static struct dma_fence * 817 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 818 { 819 struct xe_sched_job *job = to_xe_sched_job(drm_job); 820 struct xe_exec_queue *q = job->q; 821 struct xe_guc *guc = exec_queue_to_guc(q); 822 struct dma_fence *fence = NULL; 823 bool lr = xe_exec_queue_is_lr(q); 824 825 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 826 exec_queue_banned(q) || exec_queue_suspended(q)); 827 828 trace_xe_sched_job_run(job); 829 830 if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { 831 if (!exec_queue_registered(q)) 832 register_exec_queue(q, GUC_CONTEXT_NORMAL); 833 if (!lr) /* LR jobs are emitted in the exec IOCTL */ 834 q->ring_ops->emit_job(job); 835 submit_exec_queue(q); 836 } 837 838 if (lr) { 839 xe_sched_job_set_error(job, -EOPNOTSUPP); 840 dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */ 841 } else { 842 fence = job->fence; 843 } 844 845 return fence; 846 } 847 848 /** 849 * xe_guc_jobs_ring_rebase - Re-emit ring commands of requests pending 850 * on all queues under a guc. 851 * @guc: the &xe_guc struct instance 852 */ 853 void xe_guc_jobs_ring_rebase(struct xe_guc *guc) 854 { 855 struct xe_exec_queue *q; 856 unsigned long index; 857 858 /* 859 * This routine is used within VF migration recovery. This means 860 * using the lock here introduces a restriction: we cannot wait 861 * for any GFX HW response while the lock is taken. 862 */ 863 mutex_lock(&guc->submission_state.lock); 864 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 865 if (exec_queue_killed_or_banned_or_wedged(q)) 866 continue; 867 xe_exec_queue_jobs_ring_restore(q); 868 } 869 mutex_unlock(&guc->submission_state.lock); 870 } 871 872 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 873 { 874 struct xe_sched_job *job = to_xe_sched_job(drm_job); 875 876 trace_xe_sched_job_free(job); 877 xe_sched_job_put(job); 878 } 879 880 int xe_guc_read_stopped(struct xe_guc *guc) 881 { 882 return atomic_read(&guc->submission_state.stopped); 883 } 884 885 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 886 u32 action[] = { \ 887 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 888 q->guc->id, \ 889 GUC_CONTEXT_##enable_disable, \ 890 } 891 892 static void disable_scheduling_deregister(struct xe_guc *guc, 893 struct xe_exec_queue *q) 894 { 895 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 896 int ret; 897 898 set_min_preemption_timeout(guc, q); 899 smp_rmb(); 900 ret = wait_event_timeout(guc->ct.wq, 901 (!exec_queue_pending_enable(q) && 902 !exec_queue_pending_disable(q)) || 903 xe_guc_read_stopped(guc), 904 HZ * 5); 905 if (!ret) { 906 struct xe_gpu_scheduler *sched = &q->guc->sched; 907 908 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 909 xe_sched_submission_start(sched); 910 xe_gt_reset_async(q->gt); 911 xe_sched_tdr_queue_imm(sched); 912 return; 913 } 914 915 clear_exec_queue_enabled(q); 916 set_exec_queue_pending_disable(q); 917 set_exec_queue_destroyed(q); 918 trace_xe_exec_queue_scheduling_disable(q); 919 920 /* 921 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 922 * handler and we are not allowed to reserved G2H space in handlers. 923 */ 924 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 925 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 926 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 927 } 928 929 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 930 { 931 struct xe_guc *guc = exec_queue_to_guc(q); 932 struct xe_device *xe = guc_to_xe(guc); 933 934 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 935 wake_up_all(&xe->ufence_wq); 936 937 if (xe_exec_queue_is_lr(q)) 938 queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); 939 else 940 xe_sched_tdr_queue_imm(&q->guc->sched); 941 } 942 943 /** 944 * xe_guc_submit_wedge() - Wedge GuC submission 945 * @guc: the GuC object 946 * 947 * Save exec queue's registered with GuC state by taking a ref to each queue. 948 * Register a DRMM handler to drop refs upon driver unload. 949 */ 950 void xe_guc_submit_wedge(struct xe_guc *guc) 951 { 952 struct xe_gt *gt = guc_to_gt(guc); 953 struct xe_exec_queue *q; 954 unsigned long index; 955 int err; 956 957 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 958 959 /* 960 * If device is being wedged even before submission_state is 961 * initialized, there's nothing to do here. 962 */ 963 if (!guc->submission_state.initialized) 964 return; 965 966 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, 967 guc_submit_wedged_fini, guc); 968 if (err) { 969 xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " 970 "Although device is wedged.\n"); 971 return; 972 } 973 974 mutex_lock(&guc->submission_state.lock); 975 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 976 if (xe_exec_queue_get_unless_zero(q)) 977 set_exec_queue_wedged(q); 978 mutex_unlock(&guc->submission_state.lock); 979 } 980 981 static bool guc_submit_hint_wedged(struct xe_guc *guc) 982 { 983 struct xe_device *xe = guc_to_xe(guc); 984 985 if (xe->wedged.mode != 2) 986 return false; 987 988 if (xe_device_wedged(xe)) 989 return true; 990 991 xe_device_declare_wedged(xe); 992 993 return true; 994 } 995 996 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) 997 { 998 struct xe_guc_exec_queue *ge = 999 container_of(w, struct xe_guc_exec_queue, lr_tdr); 1000 struct xe_exec_queue *q = ge->q; 1001 struct xe_guc *guc = exec_queue_to_guc(q); 1002 struct xe_gpu_scheduler *sched = &ge->sched; 1003 bool wedged = false; 1004 1005 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); 1006 trace_xe_exec_queue_lr_cleanup(q); 1007 1008 if (!exec_queue_killed(q)) 1009 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1010 1011 /* Kill the run_job / process_msg entry points */ 1012 xe_sched_submission_stop(sched); 1013 1014 /* 1015 * Engine state now mostly stable, disable scheduling / deregister if 1016 * needed. This cleanup routine might be called multiple times, where 1017 * the actual async engine deregister drops the final engine ref. 1018 * Calling disable_scheduling_deregister will mark the engine as 1019 * destroyed and fire off the CT requests to disable scheduling / 1020 * deregister, which we only want to do once. We also don't want to mark 1021 * the engine as pending_disable again as this may race with the 1022 * xe_guc_deregister_done_handler() which treats it as an unexpected 1023 * state. 1024 */ 1025 if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { 1026 struct xe_guc *guc = exec_queue_to_guc(q); 1027 int ret; 1028 1029 set_exec_queue_banned(q); 1030 disable_scheduling_deregister(guc, q); 1031 1032 /* 1033 * Must wait for scheduling to be disabled before signalling 1034 * any fences, if GT broken the GT reset code should signal us. 1035 */ 1036 ret = wait_event_timeout(guc->ct.wq, 1037 !exec_queue_pending_disable(q) || 1038 xe_guc_read_stopped(guc), HZ * 5); 1039 if (!ret) { 1040 xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n", 1041 q->guc->id); 1042 xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n", 1043 q->guc->id); 1044 xe_sched_submission_start(sched); 1045 xe_gt_reset_async(q->gt); 1046 return; 1047 } 1048 } 1049 1050 if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0])) 1051 xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id); 1052 1053 xe_sched_submission_start(sched); 1054 } 1055 1056 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1057 1058 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1059 { 1060 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1061 u32 ctx_timestamp, ctx_job_timestamp; 1062 u32 timeout_ms = q->sched_props.job_timeout_ms; 1063 u32 diff; 1064 u64 running_time_ms; 1065 1066 if (!xe_sched_job_started(job)) { 1067 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1068 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1069 q->guc->id); 1070 1071 return xe_sched_invalidate_job(job, 2); 1072 } 1073 1074 ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0])); 1075 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1076 1077 /* 1078 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1079 * possible overflows with a high timeout. 1080 */ 1081 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1082 1083 diff = ctx_timestamp - ctx_job_timestamp; 1084 1085 /* 1086 * Ensure timeout is within 5% to account for an GuC scheduling latency 1087 */ 1088 running_time_ms = 1089 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1090 1091 xe_gt_dbg(gt, 1092 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1093 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1094 q->guc->id, running_time_ms, timeout_ms, diff); 1095 1096 return running_time_ms >= timeout_ms; 1097 } 1098 1099 static void enable_scheduling(struct xe_exec_queue *q) 1100 { 1101 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1102 struct xe_guc *guc = exec_queue_to_guc(q); 1103 int ret; 1104 1105 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1106 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1107 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1108 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1109 1110 set_exec_queue_pending_enable(q); 1111 set_exec_queue_enabled(q); 1112 trace_xe_exec_queue_scheduling_enable(q); 1113 1114 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1115 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1116 1117 ret = wait_event_timeout(guc->ct.wq, 1118 !exec_queue_pending_enable(q) || 1119 xe_guc_read_stopped(guc), HZ * 5); 1120 if (!ret || xe_guc_read_stopped(guc)) { 1121 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1122 set_exec_queue_banned(q); 1123 xe_gt_reset_async(q->gt); 1124 xe_sched_tdr_queue_imm(&q->guc->sched); 1125 } 1126 } 1127 1128 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1129 { 1130 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1131 struct xe_guc *guc = exec_queue_to_guc(q); 1132 1133 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1134 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1135 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1136 1137 if (immediate) 1138 set_min_preemption_timeout(guc, q); 1139 clear_exec_queue_enabled(q); 1140 set_exec_queue_pending_disable(q); 1141 trace_xe_exec_queue_scheduling_disable(q); 1142 1143 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1144 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1145 } 1146 1147 static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 1148 { 1149 u32 action[] = { 1150 XE_GUC_ACTION_DEREGISTER_CONTEXT, 1151 q->guc->id, 1152 }; 1153 1154 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1155 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1156 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1157 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1158 1159 set_exec_queue_destroyed(q); 1160 trace_xe_exec_queue_deregister(q); 1161 1162 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1163 G2H_LEN_DW_DEREGISTER_CONTEXT, 1); 1164 } 1165 1166 static enum drm_gpu_sched_stat 1167 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1168 { 1169 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1170 struct xe_sched_job *tmp_job; 1171 struct xe_exec_queue *q = job->q; 1172 struct xe_gpu_scheduler *sched = &q->guc->sched; 1173 struct xe_guc *guc = exec_queue_to_guc(q); 1174 const char *process_name = "no process"; 1175 struct xe_device *xe = guc_to_xe(guc); 1176 unsigned int fw_ref; 1177 int err = -ETIME; 1178 pid_t pid = -1; 1179 int i = 0; 1180 bool wedged = false, skip_timeout_check; 1181 1182 /* 1183 * TDR has fired before free job worker. Common if exec queue 1184 * immediately closed after last fence signaled. Add back to pending 1185 * list so job can be freed and kick scheduler ensuring free job is not 1186 * lost. 1187 */ 1188 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) 1189 return DRM_GPU_SCHED_STAT_NO_HANG; 1190 1191 /* Kill the run_job entry point */ 1192 xe_sched_submission_stop(sched); 1193 1194 /* Must check all state after stopping scheduler */ 1195 skip_timeout_check = exec_queue_reset(q) || 1196 exec_queue_killed_or_banned_or_wedged(q) || 1197 exec_queue_destroyed(q); 1198 1199 /* 1200 * If devcoredump not captured and GuC capture for the job is not ready 1201 * do manual capture first and decide later if we need to use it 1202 */ 1203 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1204 !xe_guc_capture_get_matching_and_lock(q)) { 1205 /* take force wake before engine register manual capture */ 1206 fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1207 if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 1208 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1209 1210 xe_engine_snapshot_capture_for_queue(q); 1211 1212 xe_force_wake_put(gt_to_fw(q->gt), fw_ref); 1213 } 1214 1215 /* 1216 * XXX: Sampling timeout doesn't work in wedged mode as we have to 1217 * modify scheduling state to read timestamp. We could read the 1218 * timestamp from a register to accumulate current running time but this 1219 * doesn't work for SRIOV. For now assuming timeouts in wedged mode are 1220 * genuine timeouts. 1221 */ 1222 if (!exec_queue_killed(q)) 1223 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1224 1225 /* Engine state now stable, disable scheduling to check timestamp */ 1226 if (!wedged && exec_queue_registered(q)) { 1227 int ret; 1228 1229 if (exec_queue_reset(q)) 1230 err = -EIO; 1231 1232 if (!exec_queue_destroyed(q)) { 1233 /* 1234 * Wait for any pending G2H to flush out before 1235 * modifying state 1236 */ 1237 ret = wait_event_timeout(guc->ct.wq, 1238 (!exec_queue_pending_enable(q) && 1239 !exec_queue_pending_disable(q)) || 1240 xe_guc_read_stopped(guc), HZ * 5); 1241 if (!ret || xe_guc_read_stopped(guc)) 1242 goto trigger_reset; 1243 1244 /* 1245 * Flag communicates to G2H handler that schedule 1246 * disable originated from a timeout check. The G2H then 1247 * avoid triggering cleanup or deregistering the exec 1248 * queue. 1249 */ 1250 set_exec_queue_check_timeout(q); 1251 disable_scheduling(q, skip_timeout_check); 1252 } 1253 1254 /* 1255 * Must wait for scheduling to be disabled before signalling 1256 * any fences, if GT broken the GT reset code should signal us. 1257 * 1258 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1259 * error) messages which can cause the schedule disable to get 1260 * lost. If this occurs, trigger a GT reset to recover. 1261 */ 1262 smp_rmb(); 1263 ret = wait_event_timeout(guc->ct.wq, 1264 !exec_queue_pending_disable(q) || 1265 xe_guc_read_stopped(guc), HZ * 5); 1266 if (!ret || xe_guc_read_stopped(guc)) { 1267 trigger_reset: 1268 if (!ret) 1269 xe_gt_warn(guc_to_gt(guc), 1270 "Schedule disable failed to respond, guc_id=%d", 1271 q->guc->id); 1272 xe_devcoredump(q, job, 1273 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1274 q->guc->id, ret, xe_guc_read_stopped(guc)); 1275 set_exec_queue_extra_ref(q); 1276 xe_exec_queue_get(q); /* GT reset owns this */ 1277 set_exec_queue_banned(q); 1278 xe_gt_reset_async(q->gt); 1279 xe_sched_tdr_queue_imm(sched); 1280 goto rearm; 1281 } 1282 } 1283 1284 /* 1285 * Check if job is actually timed out, if so restart job execution and TDR 1286 */ 1287 if (!wedged && !skip_timeout_check && !check_timeout(q, job) && 1288 !exec_queue_reset(q) && exec_queue_registered(q)) { 1289 clear_exec_queue_check_timeout(q); 1290 goto sched_enable; 1291 } 1292 1293 if (q->vm && q->vm->xef) { 1294 process_name = q->vm->xef->process_name; 1295 pid = q->vm->xef->pid; 1296 } 1297 1298 if (!exec_queue_killed(q)) 1299 xe_gt_notice(guc_to_gt(guc), 1300 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1301 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1302 q->guc->id, q->flags, process_name, pid); 1303 1304 trace_xe_sched_job_timedout(job); 1305 1306 if (!exec_queue_killed(q)) 1307 xe_devcoredump(q, job, 1308 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1309 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1310 q->guc->id, q->flags); 1311 1312 /* 1313 * Kernel jobs should never fail, nor should VM jobs if they do 1314 * somethings has gone wrong and the GT needs a reset 1315 */ 1316 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1317 "Kernel-submitted job timed out\n"); 1318 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1319 "VM job timed out on non-killed execqueue\n"); 1320 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1321 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1322 if (!xe_sched_invalidate_job(job, 2)) { 1323 clear_exec_queue_check_timeout(q); 1324 xe_gt_reset_async(q->gt); 1325 goto rearm; 1326 } 1327 } 1328 1329 /* Finish cleaning up exec queue via deregister */ 1330 set_exec_queue_banned(q); 1331 if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { 1332 set_exec_queue_extra_ref(q); 1333 xe_exec_queue_get(q); 1334 __deregister_exec_queue(guc, q); 1335 } 1336 1337 /* Stop fence signaling */ 1338 xe_hw_fence_irq_stop(q->fence_irq); 1339 1340 /* 1341 * Fence state now stable, stop / start scheduler which cleans up any 1342 * fences that are complete 1343 */ 1344 xe_sched_add_pending_job(sched, job); 1345 xe_sched_submission_start(sched); 1346 1347 xe_guc_exec_queue_trigger_cleanup(q); 1348 1349 /* Mark all outstanding jobs as bad, thus completing them */ 1350 spin_lock(&sched->base.job_list_lock); 1351 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) 1352 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); 1353 spin_unlock(&sched->base.job_list_lock); 1354 1355 /* Start fence signaling */ 1356 xe_hw_fence_irq_start(q->fence_irq); 1357 1358 return DRM_GPU_SCHED_STAT_RESET; 1359 1360 sched_enable: 1361 enable_scheduling(q); 1362 rearm: 1363 /* 1364 * XXX: Ideally want to adjust timeout based on current execution time 1365 * but there is not currently an easy way to do in DRM scheduler. With 1366 * some thought, do this in a follow up. 1367 */ 1368 xe_sched_submission_start(sched); 1369 return DRM_GPU_SCHED_STAT_NO_HANG; 1370 } 1371 1372 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1373 { 1374 struct xe_guc_exec_queue *ge = q->guc; 1375 struct xe_guc *guc = exec_queue_to_guc(q); 1376 1377 release_guc_id(guc, q); 1378 xe_sched_entity_fini(&ge->entity); 1379 xe_sched_fini(&ge->sched); 1380 1381 /* 1382 * RCU free due sched being exported via DRM scheduler fences 1383 * (timeline name). 1384 */ 1385 kfree_rcu(ge, rcu); 1386 } 1387 1388 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1389 { 1390 struct xe_guc_exec_queue *ge = 1391 container_of(w, struct xe_guc_exec_queue, destroy_async); 1392 struct xe_exec_queue *q = ge->q; 1393 struct xe_guc *guc = exec_queue_to_guc(q); 1394 1395 xe_pm_runtime_get(guc_to_xe(guc)); 1396 trace_xe_exec_queue_destroy(q); 1397 1398 if (xe_exec_queue_is_lr(q)) 1399 cancel_work_sync(&ge->lr_tdr); 1400 /* Confirm no work left behind accessing device structures */ 1401 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1402 1403 xe_exec_queue_fini(q); 1404 1405 xe_pm_runtime_put(guc_to_xe(guc)); 1406 } 1407 1408 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1409 { 1410 struct xe_guc *guc = exec_queue_to_guc(q); 1411 struct xe_device *xe = guc_to_xe(guc); 1412 1413 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1414 1415 /* We must block on kernel engines so slabs are empty on driver unload */ 1416 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1417 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1418 else 1419 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1420 } 1421 1422 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1423 { 1424 /* 1425 * Might be done from within the GPU scheduler, need to do async as we 1426 * fini the scheduler when the engine is fini'd, the scheduler can't 1427 * complete fini within itself (circular dependency). Async resolves 1428 * this we and don't really care when everything is fini'd, just that it 1429 * is. 1430 */ 1431 guc_exec_queue_destroy_async(q); 1432 } 1433 1434 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1435 { 1436 struct xe_exec_queue *q = msg->private_data; 1437 struct xe_guc *guc = exec_queue_to_guc(q); 1438 1439 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1440 trace_xe_exec_queue_cleanup_entity(q); 1441 1442 if (exec_queue_registered(q)) 1443 disable_scheduling_deregister(guc, q); 1444 else 1445 __guc_exec_queue_destroy(guc, q); 1446 } 1447 1448 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1449 { 1450 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1451 } 1452 1453 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1454 { 1455 struct xe_exec_queue *q = msg->private_data; 1456 struct xe_guc *guc = exec_queue_to_guc(q); 1457 1458 if (guc_exec_queue_allowed_to_change_state(q)) 1459 init_policies(guc, q); 1460 kfree(msg); 1461 } 1462 1463 static void __suspend_fence_signal(struct xe_exec_queue *q) 1464 { 1465 if (!q->guc->suspend_pending) 1466 return; 1467 1468 WRITE_ONCE(q->guc->suspend_pending, false); 1469 wake_up(&q->guc->suspend_wait); 1470 } 1471 1472 static void suspend_fence_signal(struct xe_exec_queue *q) 1473 { 1474 struct xe_guc *guc = exec_queue_to_guc(q); 1475 1476 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1477 xe_guc_read_stopped(guc)); 1478 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1479 1480 __suspend_fence_signal(q); 1481 } 1482 1483 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1484 { 1485 struct xe_exec_queue *q = msg->private_data; 1486 struct xe_guc *guc = exec_queue_to_guc(q); 1487 1488 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1489 exec_queue_enabled(q)) { 1490 wait_event(guc->ct.wq, (q->guc->resume_time != RESUME_PENDING || 1491 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)); 1492 1493 if (!xe_guc_read_stopped(guc)) { 1494 s64 since_resume_ms = 1495 ktime_ms_delta(ktime_get(), 1496 q->guc->resume_time); 1497 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1498 since_resume_ms; 1499 1500 if (wait_ms > 0 && q->guc->resume_time) 1501 msleep(wait_ms); 1502 1503 set_exec_queue_suspended(q); 1504 disable_scheduling(q, false); 1505 } 1506 } else if (q->guc->suspend_pending) { 1507 set_exec_queue_suspended(q); 1508 suspend_fence_signal(q); 1509 } 1510 } 1511 1512 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1513 { 1514 struct xe_exec_queue *q = msg->private_data; 1515 1516 if (guc_exec_queue_allowed_to_change_state(q)) { 1517 clear_exec_queue_suspended(q); 1518 if (!exec_queue_enabled(q)) { 1519 q->guc->resume_time = RESUME_PENDING; 1520 enable_scheduling(q); 1521 } 1522 } else { 1523 clear_exec_queue_suspended(q); 1524 } 1525 } 1526 1527 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1528 #define SET_SCHED_PROPS 2 1529 #define SUSPEND 3 1530 #define RESUME 4 1531 #define OPCODE_MASK 0xf 1532 #define MSG_LOCKED BIT(8) 1533 1534 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1535 { 1536 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1537 1538 trace_xe_sched_msg_recv(msg); 1539 1540 switch (msg->opcode) { 1541 case CLEANUP: 1542 __guc_exec_queue_process_msg_cleanup(msg); 1543 break; 1544 case SET_SCHED_PROPS: 1545 __guc_exec_queue_process_msg_set_sched_props(msg); 1546 break; 1547 case SUSPEND: 1548 __guc_exec_queue_process_msg_suspend(msg); 1549 break; 1550 case RESUME: 1551 __guc_exec_queue_process_msg_resume(msg); 1552 break; 1553 default: 1554 XE_WARN_ON("Unknown message type"); 1555 } 1556 1557 xe_pm_runtime_put(xe); 1558 } 1559 1560 static const struct drm_sched_backend_ops drm_sched_ops = { 1561 .run_job = guc_exec_queue_run_job, 1562 .free_job = guc_exec_queue_free_job, 1563 .timedout_job = guc_exec_queue_timedout_job, 1564 }; 1565 1566 static const struct xe_sched_backend_ops xe_sched_ops = { 1567 .process_msg = guc_exec_queue_process_msg, 1568 }; 1569 1570 static int guc_exec_queue_init(struct xe_exec_queue *q) 1571 { 1572 struct xe_gpu_scheduler *sched; 1573 struct xe_guc *guc = exec_queue_to_guc(q); 1574 struct xe_guc_exec_queue *ge; 1575 long timeout; 1576 int err, i; 1577 1578 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1579 1580 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1581 if (!ge) 1582 return -ENOMEM; 1583 1584 q->guc = ge; 1585 ge->q = q; 1586 init_rcu_head(&ge->rcu); 1587 init_waitqueue_head(&ge->suspend_wait); 1588 1589 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1590 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1591 1592 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1593 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1594 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1595 NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, 1596 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1597 q->name, gt_to_xe(q->gt)->drm.dev); 1598 if (err) 1599 goto err_free; 1600 1601 sched = &ge->sched; 1602 err = xe_sched_entity_init(&ge->entity, sched); 1603 if (err) 1604 goto err_sched; 1605 1606 if (xe_exec_queue_is_lr(q)) 1607 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); 1608 1609 mutex_lock(&guc->submission_state.lock); 1610 1611 err = alloc_guc_id(guc, q); 1612 if (err) 1613 goto err_entity; 1614 1615 q->entity = &ge->entity; 1616 1617 if (xe_guc_read_stopped(guc)) 1618 xe_sched_stop(sched); 1619 1620 mutex_unlock(&guc->submission_state.lock); 1621 1622 xe_exec_queue_assign_name(q, q->guc->id); 1623 1624 trace_xe_exec_queue_create(q); 1625 1626 return 0; 1627 1628 err_entity: 1629 mutex_unlock(&guc->submission_state.lock); 1630 xe_sched_entity_fini(&ge->entity); 1631 err_sched: 1632 xe_sched_fini(&ge->sched); 1633 err_free: 1634 kfree(ge); 1635 1636 return err; 1637 } 1638 1639 static void guc_exec_queue_kill(struct xe_exec_queue *q) 1640 { 1641 trace_xe_exec_queue_kill(q); 1642 set_exec_queue_killed(q); 1643 __suspend_fence_signal(q); 1644 xe_guc_exec_queue_trigger_cleanup(q); 1645 } 1646 1647 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 1648 u32 opcode) 1649 { 1650 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 1651 1652 INIT_LIST_HEAD(&msg->link); 1653 msg->opcode = opcode & OPCODE_MASK; 1654 msg->private_data = q; 1655 1656 trace_xe_sched_msg_add(msg); 1657 if (opcode & MSG_LOCKED) 1658 xe_sched_add_msg_locked(&q->guc->sched, msg); 1659 else 1660 xe_sched_add_msg(&q->guc->sched, msg); 1661 } 1662 1663 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 1664 struct xe_sched_msg *msg, 1665 u32 opcode) 1666 { 1667 if (!list_empty(&msg->link)) 1668 return false; 1669 1670 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 1671 1672 return true; 1673 } 1674 1675 #define STATIC_MSG_CLEANUP 0 1676 #define STATIC_MSG_SUSPEND 1 1677 #define STATIC_MSG_RESUME 2 1678 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 1679 { 1680 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 1681 1682 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 1683 guc_exec_queue_add_msg(q, msg, CLEANUP); 1684 else 1685 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 1686 } 1687 1688 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 1689 enum xe_exec_queue_priority priority) 1690 { 1691 struct xe_sched_msg *msg; 1692 1693 if (q->sched_props.priority == priority || 1694 exec_queue_killed_or_banned_or_wedged(q)) 1695 return 0; 1696 1697 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1698 if (!msg) 1699 return -ENOMEM; 1700 1701 q->sched_props.priority = priority; 1702 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1703 1704 return 0; 1705 } 1706 1707 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 1708 { 1709 struct xe_sched_msg *msg; 1710 1711 if (q->sched_props.timeslice_us == timeslice_us || 1712 exec_queue_killed_or_banned_or_wedged(q)) 1713 return 0; 1714 1715 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1716 if (!msg) 1717 return -ENOMEM; 1718 1719 q->sched_props.timeslice_us = timeslice_us; 1720 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1721 1722 return 0; 1723 } 1724 1725 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 1726 u32 preempt_timeout_us) 1727 { 1728 struct xe_sched_msg *msg; 1729 1730 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 1731 exec_queue_killed_or_banned_or_wedged(q)) 1732 return 0; 1733 1734 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1735 if (!msg) 1736 return -ENOMEM; 1737 1738 q->sched_props.preempt_timeout_us = preempt_timeout_us; 1739 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1740 1741 return 0; 1742 } 1743 1744 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 1745 { 1746 struct xe_gpu_scheduler *sched = &q->guc->sched; 1747 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 1748 1749 if (exec_queue_killed_or_banned_or_wedged(q)) 1750 return -EINVAL; 1751 1752 xe_sched_msg_lock(sched); 1753 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 1754 q->guc->suspend_pending = true; 1755 xe_sched_msg_unlock(sched); 1756 1757 return 0; 1758 } 1759 1760 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 1761 { 1762 struct xe_guc *guc = exec_queue_to_guc(q); 1763 int ret; 1764 1765 /* 1766 * Likely don't need to check exec_queue_killed() as we clear 1767 * suspend_pending upon kill but to be paranoid but races in which 1768 * suspend_pending is set after kill also check kill here. 1769 */ 1770 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 1771 !READ_ONCE(q->guc->suspend_pending) || 1772 exec_queue_killed(q) || 1773 xe_guc_read_stopped(guc), 1774 HZ * 5); 1775 1776 if (!ret) { 1777 xe_gt_warn(guc_to_gt(guc), 1778 "Suspend fence, guc_id=%d, failed to respond", 1779 q->guc->id); 1780 /* XXX: Trigger GT reset? */ 1781 return -ETIME; 1782 } 1783 1784 return ret < 0 ? ret : 0; 1785 } 1786 1787 static void guc_exec_queue_resume(struct xe_exec_queue *q) 1788 { 1789 struct xe_gpu_scheduler *sched = &q->guc->sched; 1790 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 1791 struct xe_guc *guc = exec_queue_to_guc(q); 1792 1793 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 1794 1795 xe_sched_msg_lock(sched); 1796 guc_exec_queue_try_add_msg(q, msg, RESUME); 1797 xe_sched_msg_unlock(sched); 1798 } 1799 1800 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 1801 { 1802 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 1803 } 1804 1805 /* 1806 * All of these functions are an abstraction layer which other parts of XE can 1807 * use to trap into the GuC backend. All of these functions, aside from init, 1808 * really shouldn't do much other than trap into the DRM scheduler which 1809 * synchronizes these operations. 1810 */ 1811 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 1812 .init = guc_exec_queue_init, 1813 .kill = guc_exec_queue_kill, 1814 .fini = guc_exec_queue_fini, 1815 .destroy = guc_exec_queue_destroy, 1816 .set_priority = guc_exec_queue_set_priority, 1817 .set_timeslice = guc_exec_queue_set_timeslice, 1818 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 1819 .suspend = guc_exec_queue_suspend, 1820 .suspend_wait = guc_exec_queue_suspend_wait, 1821 .resume = guc_exec_queue_resume, 1822 .reset_status = guc_exec_queue_reset_status, 1823 }; 1824 1825 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 1826 { 1827 struct xe_gpu_scheduler *sched = &q->guc->sched; 1828 1829 /* Stop scheduling + flush any DRM scheduler operations */ 1830 xe_sched_submission_stop(sched); 1831 1832 /* Clean up lost G2H + reset engine state */ 1833 if (exec_queue_registered(q)) { 1834 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 1835 xe_exec_queue_put(q); 1836 else if (exec_queue_destroyed(q)) 1837 __guc_exec_queue_destroy(guc, q); 1838 } 1839 if (q->guc->suspend_pending) { 1840 set_exec_queue_suspended(q); 1841 suspend_fence_signal(q); 1842 } 1843 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 1844 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 1845 EXEC_QUEUE_STATE_SUSPENDED, 1846 &q->guc->state); 1847 q->guc->resume_time = 0; 1848 trace_xe_exec_queue_stop(q); 1849 1850 /* 1851 * Ban any engine (aside from kernel and engines used for VM ops) with a 1852 * started but not complete job or if a job has gone through a GT reset 1853 * more than twice. 1854 */ 1855 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 1856 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 1857 bool ban = false; 1858 1859 if (job) { 1860 if ((xe_sched_job_started(job) && 1861 !xe_sched_job_completed(job)) || 1862 xe_sched_invalidate_job(job, 2)) { 1863 trace_xe_sched_job_ban(job); 1864 ban = true; 1865 } 1866 } else if (xe_exec_queue_is_lr(q) && 1867 !xe_lrc_ring_is_idle(q->lrc[0])) { 1868 ban = true; 1869 } 1870 1871 if (ban) { 1872 set_exec_queue_banned(q); 1873 xe_guc_exec_queue_trigger_cleanup(q); 1874 } 1875 } 1876 } 1877 1878 /** 1879 * xe_guc_submit_reset_block - Disallow reset calls on given GuC. 1880 * @guc: the &xe_guc struct instance 1881 */ 1882 int xe_guc_submit_reset_block(struct xe_guc *guc) 1883 { 1884 return atomic_fetch_or(1, &guc->submission_state.reset_blocked); 1885 } 1886 1887 /** 1888 * xe_guc_submit_reset_unblock - Allow back reset calls on given GuC. 1889 * @guc: the &xe_guc struct instance 1890 */ 1891 void xe_guc_submit_reset_unblock(struct xe_guc *guc) 1892 { 1893 atomic_set_release(&guc->submission_state.reset_blocked, 0); 1894 wake_up_all(&guc->ct.wq); 1895 } 1896 1897 static int guc_submit_reset_is_blocked(struct xe_guc *guc) 1898 { 1899 return atomic_read_acquire(&guc->submission_state.reset_blocked); 1900 } 1901 1902 /* Maximum time of blocking reset */ 1903 #define RESET_BLOCK_PERIOD_MAX (HZ * 5) 1904 1905 /** 1906 * xe_guc_wait_reset_unblock - Wait until reset blocking flag is lifted, or timeout. 1907 * @guc: the &xe_guc struct instance 1908 */ 1909 int xe_guc_wait_reset_unblock(struct xe_guc *guc) 1910 { 1911 return wait_event_timeout(guc->ct.wq, 1912 !guc_submit_reset_is_blocked(guc), RESET_BLOCK_PERIOD_MAX); 1913 } 1914 1915 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 1916 { 1917 int ret; 1918 1919 if (!guc->submission_state.initialized) 1920 return 0; 1921 1922 /* 1923 * Using an atomic here rather than submission_state.lock as this 1924 * function can be called while holding the CT lock (engine reset 1925 * failure). submission_state.lock needs the CT lock to resubmit jobs. 1926 * Atomic is not ideal, but it works to prevent against concurrent reset 1927 * and releasing any TDRs waiting on guc->submission_state.stopped. 1928 */ 1929 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 1930 smp_wmb(); 1931 wake_up_all(&guc->ct.wq); 1932 1933 return ret; 1934 } 1935 1936 void xe_guc_submit_reset_wait(struct xe_guc *guc) 1937 { 1938 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 1939 !xe_guc_read_stopped(guc)); 1940 } 1941 1942 void xe_guc_submit_stop(struct xe_guc *guc) 1943 { 1944 struct xe_exec_queue *q; 1945 unsigned long index; 1946 1947 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 1948 1949 mutex_lock(&guc->submission_state.lock); 1950 1951 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 1952 /* Prevent redundant attempts to stop parallel queues */ 1953 if (q->guc->id != index) 1954 continue; 1955 1956 guc_exec_queue_stop(guc, q); 1957 } 1958 1959 mutex_unlock(&guc->submission_state.lock); 1960 1961 /* 1962 * No one can enter the backend at this point, aside from new engine 1963 * creation which is protected by guc->submission_state.lock. 1964 */ 1965 1966 } 1967 1968 /** 1969 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 1970 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 1971 */ 1972 void xe_guc_submit_pause(struct xe_guc *guc) 1973 { 1974 struct xe_exec_queue *q; 1975 unsigned long index; 1976 1977 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1978 xe_sched_submission_stop_async(&q->guc->sched); 1979 } 1980 1981 static void guc_exec_queue_start(struct xe_exec_queue *q) 1982 { 1983 struct xe_gpu_scheduler *sched = &q->guc->sched; 1984 1985 if (!exec_queue_killed_or_banned_or_wedged(q)) { 1986 int i; 1987 1988 trace_xe_exec_queue_resubmit(q); 1989 for (i = 0; i < q->width; ++i) 1990 xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail); 1991 xe_sched_resubmit_jobs(sched); 1992 } 1993 1994 xe_sched_submission_start(sched); 1995 xe_sched_submission_resume_tdr(sched); 1996 } 1997 1998 int xe_guc_submit_start(struct xe_guc *guc) 1999 { 2000 struct xe_exec_queue *q; 2001 unsigned long index; 2002 2003 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2004 2005 mutex_lock(&guc->submission_state.lock); 2006 atomic_dec(&guc->submission_state.stopped); 2007 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2008 /* Prevent redundant attempts to start parallel queues */ 2009 if (q->guc->id != index) 2010 continue; 2011 2012 guc_exec_queue_start(q); 2013 } 2014 mutex_unlock(&guc->submission_state.lock); 2015 2016 wake_up_all(&guc->ct.wq); 2017 2018 return 0; 2019 } 2020 2021 static void guc_exec_queue_unpause(struct xe_exec_queue *q) 2022 { 2023 struct xe_gpu_scheduler *sched = &q->guc->sched; 2024 2025 xe_sched_submission_start(sched); 2026 } 2027 2028 /** 2029 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2030 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2031 */ 2032 void xe_guc_submit_unpause(struct xe_guc *guc) 2033 { 2034 struct xe_exec_queue *q; 2035 unsigned long index; 2036 2037 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2038 guc_exec_queue_unpause(q); 2039 2040 wake_up_all(&guc->ct.wq); 2041 } 2042 2043 static struct xe_exec_queue * 2044 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2045 { 2046 struct xe_gt *gt = guc_to_gt(guc); 2047 struct xe_exec_queue *q; 2048 2049 if (unlikely(guc_id >= GUC_ID_MAX)) { 2050 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2051 return NULL; 2052 } 2053 2054 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2055 if (unlikely(!q)) { 2056 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2057 return NULL; 2058 } 2059 2060 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2061 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2062 2063 return q; 2064 } 2065 2066 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2067 { 2068 u32 action[] = { 2069 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2070 q->guc->id, 2071 }; 2072 2073 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2074 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2075 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2076 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2077 2078 trace_xe_exec_queue_deregister(q); 2079 2080 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); 2081 } 2082 2083 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2084 u32 runnable_state) 2085 { 2086 trace_xe_exec_queue_scheduling_done(q); 2087 2088 if (runnable_state == 1) { 2089 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2090 2091 q->guc->resume_time = ktime_get(); 2092 clear_exec_queue_pending_enable(q); 2093 smp_wmb(); 2094 wake_up_all(&guc->ct.wq); 2095 } else { 2096 bool check_timeout = exec_queue_check_timeout(q); 2097 2098 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2099 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2100 2101 if (q->guc->suspend_pending) { 2102 suspend_fence_signal(q); 2103 clear_exec_queue_pending_disable(q); 2104 } else { 2105 if (exec_queue_banned(q) || check_timeout) { 2106 smp_wmb(); 2107 wake_up_all(&guc->ct.wq); 2108 } 2109 if (!check_timeout && exec_queue_destroyed(q)) { 2110 /* 2111 * Make sure to clear the pending_disable only 2112 * after sampling the destroyed state. We want 2113 * to ensure we don't trigger the unregister too 2114 * early with something intending to only 2115 * disable scheduling. The caller doing the 2116 * destroy must wait for an ongoing 2117 * pending_disable before marking as destroyed. 2118 */ 2119 clear_exec_queue_pending_disable(q); 2120 deregister_exec_queue(guc, q); 2121 } else { 2122 clear_exec_queue_pending_disable(q); 2123 } 2124 } 2125 } 2126 } 2127 2128 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2129 { 2130 struct xe_exec_queue *q; 2131 u32 guc_id, runnable_state; 2132 2133 if (unlikely(len < 2)) 2134 return -EPROTO; 2135 2136 guc_id = msg[0]; 2137 runnable_state = msg[1]; 2138 2139 q = g2h_exec_queue_lookup(guc, guc_id); 2140 if (unlikely(!q)) 2141 return -EPROTO; 2142 2143 if (unlikely(!exec_queue_pending_enable(q) && 2144 !exec_queue_pending_disable(q))) { 2145 xe_gt_err(guc_to_gt(guc), 2146 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2147 atomic_read(&q->guc->state), q->guc->id, 2148 runnable_state); 2149 return -EPROTO; 2150 } 2151 2152 handle_sched_done(guc, q, runnable_state); 2153 2154 return 0; 2155 } 2156 2157 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2158 { 2159 trace_xe_exec_queue_deregister_done(q); 2160 2161 clear_exec_queue_registered(q); 2162 2163 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 2164 xe_exec_queue_put(q); 2165 else 2166 __guc_exec_queue_destroy(guc, q); 2167 } 2168 2169 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2170 { 2171 struct xe_exec_queue *q; 2172 u32 guc_id; 2173 2174 if (unlikely(len < 1)) 2175 return -EPROTO; 2176 2177 guc_id = msg[0]; 2178 2179 q = g2h_exec_queue_lookup(guc, guc_id); 2180 if (unlikely(!q)) 2181 return -EPROTO; 2182 2183 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2184 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2185 xe_gt_err(guc_to_gt(guc), 2186 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2187 atomic_read(&q->guc->state), q->guc->id); 2188 return -EPROTO; 2189 } 2190 2191 handle_deregister_done(guc, q); 2192 2193 return 0; 2194 } 2195 2196 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2197 { 2198 struct xe_gt *gt = guc_to_gt(guc); 2199 struct xe_exec_queue *q; 2200 u32 guc_id; 2201 2202 if (unlikely(len < 1)) 2203 return -EPROTO; 2204 2205 guc_id = msg[0]; 2206 2207 q = g2h_exec_queue_lookup(guc, guc_id); 2208 if (unlikely(!q)) 2209 return -EPROTO; 2210 2211 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d", 2212 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2213 2214 trace_xe_exec_queue_reset(q); 2215 2216 /* 2217 * A banned engine is a NOP at this point (came from 2218 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2219 * jobs by setting timeout of the job to the minimum value kicking 2220 * guc_exec_queue_timedout_job. 2221 */ 2222 set_exec_queue_reset(q); 2223 if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) 2224 xe_guc_exec_queue_trigger_cleanup(q); 2225 2226 return 0; 2227 } 2228 2229 /* 2230 * xe_guc_error_capture_handler - Handler of GuC captured message 2231 * @guc: The GuC object 2232 * @msg: Point to the message 2233 * @len: The message length 2234 * 2235 * When GuC captured data is ready, GuC will send message 2236 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2237 * called 1st to check status before process the data comes with the message. 2238 * 2239 * Returns: error code. 0 if success 2240 */ 2241 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 2242 { 2243 u32 status; 2244 2245 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2246 return -EPROTO; 2247 2248 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2249 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 2250 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 2251 2252 xe_guc_capture_process(guc); 2253 2254 return 0; 2255 } 2256 2257 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 2258 u32 len) 2259 { 2260 struct xe_gt *gt = guc_to_gt(guc); 2261 struct xe_exec_queue *q; 2262 u32 guc_id; 2263 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 2264 2265 if (unlikely(!len || len > 2)) 2266 return -EPROTO; 2267 2268 guc_id = msg[0]; 2269 2270 if (len == 2) 2271 type = msg[1]; 2272 2273 if (guc_id == GUC_ID_UNKNOWN) { 2274 /* 2275 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 2276 * context. In such case only PF will be notified about that fault. 2277 */ 2278 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 2279 return 0; 2280 } 2281 2282 q = g2h_exec_queue_lookup(guc, guc_id); 2283 if (unlikely(!q)) 2284 return -EPROTO; 2285 2286 /* 2287 * The type is HW-defined and changes based on platform, so we don't 2288 * decode it in the kernel and only check if it is valid. 2289 * See bspec 54047 and 72187 for details. 2290 */ 2291 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 2292 xe_gt_dbg(gt, 2293 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 2294 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2295 else 2296 xe_gt_dbg(gt, 2297 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 2298 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2299 2300 trace_xe_exec_queue_memory_cat_error(q); 2301 2302 /* Treat the same as engine reset */ 2303 set_exec_queue_reset(q); 2304 if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) 2305 xe_guc_exec_queue_trigger_cleanup(q); 2306 2307 return 0; 2308 } 2309 2310 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 2311 { 2312 struct xe_gt *gt = guc_to_gt(guc); 2313 u8 guc_class, instance; 2314 u32 reason; 2315 2316 if (unlikely(len != 3)) 2317 return -EPROTO; 2318 2319 guc_class = msg[0]; 2320 instance = msg[1]; 2321 reason = msg[2]; 2322 2323 /* Unexpected failure of a hardware feature, log an actual error */ 2324 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 2325 guc_class, instance, reason); 2326 2327 xe_gt_reset_async(gt); 2328 2329 return 0; 2330 } 2331 2332 static void 2333 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 2334 struct xe_guc_submit_exec_queue_snapshot *snapshot) 2335 { 2336 struct xe_guc *guc = exec_queue_to_guc(q); 2337 struct xe_device *xe = guc_to_xe(guc); 2338 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 2339 int i; 2340 2341 snapshot->guc.wqi_head = q->guc->wqi_head; 2342 snapshot->guc.wqi_tail = q->guc->wqi_tail; 2343 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 2344 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 2345 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 2346 wq_desc.wq_status); 2347 2348 if (snapshot->parallel.wq_desc.head != 2349 snapshot->parallel.wq_desc.tail) { 2350 for (i = snapshot->parallel.wq_desc.head; 2351 i != snapshot->parallel.wq_desc.tail; 2352 i = (i + sizeof(u32)) % WQ_SIZE) 2353 snapshot->parallel.wq[i / sizeof(u32)] = 2354 parallel_read(xe, map, wq[i / sizeof(u32)]); 2355 } 2356 } 2357 2358 static void 2359 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 2360 struct drm_printer *p) 2361 { 2362 int i; 2363 2364 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 2365 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 2366 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 2367 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 2368 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 2369 2370 if (snapshot->parallel.wq_desc.head != 2371 snapshot->parallel.wq_desc.tail) { 2372 for (i = snapshot->parallel.wq_desc.head; 2373 i != snapshot->parallel.wq_desc.tail; 2374 i = (i + sizeof(u32)) % WQ_SIZE) 2375 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 2376 snapshot->parallel.wq[i / sizeof(u32)]); 2377 } 2378 } 2379 2380 /** 2381 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 2382 * @q: faulty exec queue 2383 * 2384 * This can be printed out in a later stage like during dev_coredump 2385 * analysis. 2386 * 2387 * Returns: a GuC Submit Engine snapshot object that must be freed by the 2388 * caller, using `xe_guc_exec_queue_snapshot_free`. 2389 */ 2390 struct xe_guc_submit_exec_queue_snapshot * 2391 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 2392 { 2393 struct xe_gpu_scheduler *sched = &q->guc->sched; 2394 struct xe_guc_submit_exec_queue_snapshot *snapshot; 2395 int i; 2396 2397 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 2398 2399 if (!snapshot) 2400 return NULL; 2401 2402 snapshot->guc.id = q->guc->id; 2403 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 2404 snapshot->class = q->class; 2405 snapshot->logical_mask = q->logical_mask; 2406 snapshot->width = q->width; 2407 snapshot->refcount = kref_read(&q->refcount); 2408 snapshot->sched_timeout = sched->base.timeout; 2409 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 2410 snapshot->sched_props.preempt_timeout_us = 2411 q->sched_props.preempt_timeout_us; 2412 2413 snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *), 2414 GFP_ATOMIC); 2415 2416 if (snapshot->lrc) { 2417 for (i = 0; i < q->width; ++i) { 2418 struct xe_lrc *lrc = q->lrc[i]; 2419 2420 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 2421 } 2422 } 2423 2424 snapshot->schedule_state = atomic_read(&q->guc->state); 2425 snapshot->exec_queue_flags = q->flags; 2426 2427 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 2428 if (snapshot->parallel_execution) 2429 guc_exec_queue_wq_snapshot_capture(q, snapshot); 2430 2431 spin_lock(&sched->base.job_list_lock); 2432 snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); 2433 snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, 2434 sizeof(struct pending_list_snapshot), 2435 GFP_ATOMIC); 2436 2437 if (snapshot->pending_list) { 2438 struct xe_sched_job *job_iter; 2439 2440 i = 0; 2441 list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { 2442 snapshot->pending_list[i].seqno = 2443 xe_sched_job_seqno(job_iter); 2444 snapshot->pending_list[i].fence = 2445 dma_fence_is_signaled(job_iter->fence) ? 1 : 0; 2446 snapshot->pending_list[i].finished = 2447 dma_fence_is_signaled(&job_iter->drm.s_fence->finished) 2448 ? 1 : 0; 2449 i++; 2450 } 2451 } 2452 2453 spin_unlock(&sched->base.job_list_lock); 2454 2455 return snapshot; 2456 } 2457 2458 /** 2459 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 2460 * @snapshot: Previously captured snapshot of job. 2461 * 2462 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 2463 */ 2464 void 2465 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 2466 { 2467 int i; 2468 2469 if (!snapshot || !snapshot->lrc) 2470 return; 2471 2472 for (i = 0; i < snapshot->width; ++i) 2473 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 2474 } 2475 2476 /** 2477 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 2478 * @snapshot: GuC Submit Engine snapshot object. 2479 * @p: drm_printer where it will be printed out. 2480 * 2481 * This function prints out a given GuC Submit Engine snapshot object. 2482 */ 2483 void 2484 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 2485 struct drm_printer *p) 2486 { 2487 int i; 2488 2489 if (!snapshot) 2490 return; 2491 2492 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 2493 drm_printf(p, "\tName: %s\n", snapshot->name); 2494 drm_printf(p, "\tClass: %d\n", snapshot->class); 2495 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 2496 drm_printf(p, "\tWidth: %d\n", snapshot->width); 2497 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 2498 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 2499 drm_printf(p, "\tTimeslice: %u (us)\n", 2500 snapshot->sched_props.timeslice_us); 2501 drm_printf(p, "\tPreempt timeout: %u (us)\n", 2502 snapshot->sched_props.preempt_timeout_us); 2503 2504 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 2505 xe_lrc_snapshot_print(snapshot->lrc[i], p); 2506 2507 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 2508 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 2509 2510 if (snapshot->parallel_execution) 2511 guc_exec_queue_wq_snapshot_print(snapshot, p); 2512 2513 for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; 2514 i++) 2515 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", 2516 snapshot->pending_list[i].seqno, 2517 snapshot->pending_list[i].fence, 2518 snapshot->pending_list[i].finished); 2519 } 2520 2521 /** 2522 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 2523 * snapshot. 2524 * @snapshot: GuC Submit Engine snapshot object. 2525 * 2526 * This function free all the memory that needed to be allocated at capture 2527 * time. 2528 */ 2529 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 2530 { 2531 int i; 2532 2533 if (!snapshot) 2534 return; 2535 2536 if (snapshot->lrc) { 2537 for (i = 0; i < snapshot->width; i++) 2538 xe_lrc_snapshot_free(snapshot->lrc[i]); 2539 kfree(snapshot->lrc); 2540 } 2541 kfree(snapshot->pending_list); 2542 kfree(snapshot); 2543 } 2544 2545 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 2546 { 2547 struct xe_guc_submit_exec_queue_snapshot *snapshot; 2548 2549 snapshot = xe_guc_exec_queue_snapshot_capture(q); 2550 xe_guc_exec_queue_snapshot_print(snapshot, p); 2551 xe_guc_exec_queue_snapshot_free(snapshot); 2552 } 2553 2554 /** 2555 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 2556 * @q: Execution queue 2557 * @ctx_type: Type of the context 2558 * 2559 * This function registers the execution queue with the guc. Special context 2560 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 2561 * are only applicable for IGPU and in the VF. 2562 * Submits the execution queue to GUC after registering it. 2563 * 2564 * Returns - None. 2565 */ 2566 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 2567 { 2568 struct xe_guc *guc = exec_queue_to_guc(q); 2569 struct xe_device *xe = guc_to_xe(guc); 2570 struct xe_gt *gt = guc_to_gt(guc); 2571 2572 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 2573 xe_gt_assert(gt, !IS_DGFX(xe)); 2574 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 2575 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 2576 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 2577 2578 register_exec_queue(q, ctx_type); 2579 enable_scheduling(q); 2580 } 2581 2582 /** 2583 * xe_guc_submit_print - GuC Submit Print. 2584 * @guc: GuC. 2585 * @p: drm_printer where it will be printed out. 2586 * 2587 * This function capture and prints snapshots of **all** GuC Engines. 2588 */ 2589 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 2590 { 2591 struct xe_exec_queue *q; 2592 unsigned long index; 2593 2594 if (!xe_device_uc_enabled(guc_to_xe(guc))) 2595 return; 2596 2597 mutex_lock(&guc->submission_state.lock); 2598 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2599 guc_exec_queue_print(q, p); 2600 mutex_unlock(&guc->submission_state.lock); 2601 } 2602 2603 /** 2604 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 2605 * exec queues registered to given GuC. 2606 * @guc: the &xe_guc struct instance 2607 * @scratch: scratch buffer to be used as temporary storage 2608 * 2609 * Returns: zero on success, negative error code on failure. 2610 */ 2611 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 2612 { 2613 struct xe_exec_queue *q; 2614 unsigned long index; 2615 int err = 0; 2616 2617 mutex_lock(&guc->submission_state.lock); 2618 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2619 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 2620 if (err) 2621 break; 2622 } 2623 mutex_unlock(&guc->submission_state.lock); 2624 2625 return err; 2626 } 2627