1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/delay.h> 12 #include <linux/dma-fence-array.h> 13 #include <linux/math64.h> 14 15 #include <drm/drm_managed.h> 16 17 #include "abi/guc_actions_abi.h" 18 #include "abi/guc_actions_slpc_abi.h" 19 #include "abi/guc_klvs_abi.h" 20 #include "regs/xe_lrc_layout.h" 21 #include "xe_assert.h" 22 #include "xe_devcoredump.h" 23 #include "xe_device.h" 24 #include "xe_exec_queue.h" 25 #include "xe_force_wake.h" 26 #include "xe_gpu_scheduler.h" 27 #include "xe_gt.h" 28 #include "xe_gt_clock.h" 29 #include "xe_gt_printk.h" 30 #include "xe_guc.h" 31 #include "xe_guc_capture.h" 32 #include "xe_guc_ct.h" 33 #include "xe_guc_exec_queue_types.h" 34 #include "xe_guc_id_mgr.h" 35 #include "xe_guc_klv_helpers.h" 36 #include "xe_guc_submit_types.h" 37 #include "xe_hw_engine.h" 38 #include "xe_hw_fence.h" 39 #include "xe_lrc.h" 40 #include "xe_macros.h" 41 #include "xe_map.h" 42 #include "xe_mocs.h" 43 #include "xe_pm.h" 44 #include "xe_ring_ops_types.h" 45 #include "xe_sched_job.h" 46 #include "xe_trace.h" 47 #include "xe_uc_fw.h" 48 #include "xe_vm.h" 49 50 static struct xe_guc * 51 exec_queue_to_guc(struct xe_exec_queue *q) 52 { 53 return &q->gt->uc.guc; 54 } 55 56 /* 57 * Helpers for engine state, using an atomic as some of the bits can transition 58 * as the same time (e.g. a suspend can be happning at the same time as schedule 59 * engine done being processed). 60 */ 61 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 62 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 63 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 64 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 65 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 66 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 67 #define EXEC_QUEUE_STATE_RESET (1 << 6) 68 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 69 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 70 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 71 #define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) 72 #define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) 73 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12) 74 #define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13) 75 76 static bool exec_queue_registered(struct xe_exec_queue *q) 77 { 78 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 79 } 80 81 static void set_exec_queue_registered(struct xe_exec_queue *q) 82 { 83 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 84 } 85 86 static void clear_exec_queue_registered(struct xe_exec_queue *q) 87 { 88 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 89 } 90 91 static bool exec_queue_enabled(struct xe_exec_queue *q) 92 { 93 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 94 } 95 96 static void set_exec_queue_enabled(struct xe_exec_queue *q) 97 { 98 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 99 } 100 101 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 102 { 103 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 104 } 105 106 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 107 { 108 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 109 } 110 111 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 112 { 113 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 114 } 115 116 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 117 { 118 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 119 } 120 121 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 122 { 123 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 124 } 125 126 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 127 { 128 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 129 } 130 131 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 132 { 133 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 134 } 135 136 static bool exec_queue_destroyed(struct xe_exec_queue *q) 137 { 138 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 139 } 140 141 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 142 { 143 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 144 } 145 146 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 147 { 148 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 149 } 150 151 static bool exec_queue_banned(struct xe_exec_queue *q) 152 { 153 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 154 } 155 156 static void set_exec_queue_banned(struct xe_exec_queue *q) 157 { 158 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 159 } 160 161 static bool exec_queue_suspended(struct xe_exec_queue *q) 162 { 163 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 164 } 165 166 static void set_exec_queue_suspended(struct xe_exec_queue *q) 167 { 168 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 169 } 170 171 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 172 { 173 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 174 } 175 176 static bool exec_queue_reset(struct xe_exec_queue *q) 177 { 178 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 179 } 180 181 static void set_exec_queue_reset(struct xe_exec_queue *q) 182 { 183 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 184 } 185 186 static bool exec_queue_killed(struct xe_exec_queue *q) 187 { 188 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 189 } 190 191 static void set_exec_queue_killed(struct xe_exec_queue *q) 192 { 193 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 194 } 195 196 static bool exec_queue_wedged(struct xe_exec_queue *q) 197 { 198 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 199 } 200 201 static void set_exec_queue_wedged(struct xe_exec_queue *q) 202 { 203 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 204 } 205 206 static bool exec_queue_check_timeout(struct xe_exec_queue *q) 207 { 208 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; 209 } 210 211 static void set_exec_queue_check_timeout(struct xe_exec_queue *q) 212 { 213 atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); 214 } 215 216 static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) 217 { 218 atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); 219 } 220 221 static bool exec_queue_extra_ref(struct xe_exec_queue *q) 222 { 223 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; 224 } 225 226 static void set_exec_queue_extra_ref(struct xe_exec_queue *q) 227 { 228 atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); 229 } 230 231 static void clear_exec_queue_extra_ref(struct xe_exec_queue *q) 232 { 233 atomic_and(~EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); 234 } 235 236 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 237 { 238 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 239 } 240 241 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 242 { 243 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 244 } 245 246 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 247 { 248 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 249 } 250 251 static bool exec_queue_pending_tdr_exit(struct xe_exec_queue *q) 252 { 253 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT; 254 } 255 256 static void set_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) 257 { 258 atomic_or(EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); 259 } 260 261 static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) 262 { 263 atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); 264 } 265 266 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 267 { 268 return (atomic_read(&q->guc->state) & 269 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 270 EXEC_QUEUE_STATE_BANNED)); 271 } 272 273 static void guc_submit_fini(struct drm_device *drm, void *arg) 274 { 275 struct xe_guc *guc = arg; 276 struct xe_device *xe = guc_to_xe(guc); 277 struct xe_gt *gt = guc_to_gt(guc); 278 int ret; 279 280 ret = wait_event_timeout(guc->submission_state.fini_wq, 281 xa_empty(&guc->submission_state.exec_queue_lookup), 282 HZ * 5); 283 284 drain_workqueue(xe->destroy_wq); 285 286 xe_gt_assert(gt, ret); 287 288 xa_destroy(&guc->submission_state.exec_queue_lookup); 289 } 290 291 static void guc_submit_wedged_fini(void *arg) 292 { 293 struct xe_guc *guc = arg; 294 struct xe_exec_queue *q; 295 unsigned long index; 296 297 mutex_lock(&guc->submission_state.lock); 298 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 299 if (exec_queue_wedged(q)) { 300 mutex_unlock(&guc->submission_state.lock); 301 xe_exec_queue_put(q); 302 mutex_lock(&guc->submission_state.lock); 303 } 304 } 305 mutex_unlock(&guc->submission_state.lock); 306 } 307 308 static const struct xe_exec_queue_ops guc_exec_queue_ops; 309 310 static void primelockdep(struct xe_guc *guc) 311 { 312 if (!IS_ENABLED(CONFIG_LOCKDEP)) 313 return; 314 315 fs_reclaim_acquire(GFP_KERNEL); 316 317 mutex_lock(&guc->submission_state.lock); 318 mutex_unlock(&guc->submission_state.lock); 319 320 fs_reclaim_release(GFP_KERNEL); 321 } 322 323 /** 324 * xe_guc_submit_init() - Initialize GuC submission. 325 * @guc: the &xe_guc to initialize 326 * @num_ids: number of GuC context IDs to use 327 * 328 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 329 * GuC context IDs supported by the GuC firmware should be used for submission. 330 * 331 * Only VF drivers will have to provide explicit number of GuC context IDs 332 * that they can use for submission. 333 * 334 * Return: 0 on success or a negative error code on failure. 335 */ 336 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 337 { 338 struct xe_device *xe = guc_to_xe(guc); 339 struct xe_gt *gt = guc_to_gt(guc); 340 int err; 341 342 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 343 if (err) 344 return err; 345 346 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 347 if (err) 348 return err; 349 350 gt->exec_queue_ops = &guc_exec_queue_ops; 351 352 xa_init(&guc->submission_state.exec_queue_lookup); 353 354 init_waitqueue_head(&guc->submission_state.fini_wq); 355 356 primelockdep(guc); 357 358 guc->submission_state.initialized = true; 359 360 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 361 } 362 363 /* 364 * Given that we want to guarantee enough RCS throughput to avoid missing 365 * frames, we set the yield policy to 20% of each 80ms interval. 366 */ 367 #define RC_YIELD_DURATION 80 /* in ms */ 368 #define RC_YIELD_RATIO 20 /* in percent */ 369 static u32 *emit_render_compute_yield_klv(u32 *emit) 370 { 371 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 372 *emit++ = RC_YIELD_DURATION; 373 *emit++ = RC_YIELD_RATIO; 374 375 return emit; 376 } 377 378 #define SCHEDULING_POLICY_MAX_DWORDS 16 379 static int guc_init_global_schedule_policy(struct xe_guc *guc) 380 { 381 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 382 u32 *emit = data; 383 u32 count = 0; 384 int ret; 385 386 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 387 return 0; 388 389 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 390 391 if (CCS_MASK(guc_to_gt(guc))) 392 emit = emit_render_compute_yield_klv(emit); 393 394 count = emit - data; 395 if (count > 1) { 396 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 397 398 ret = xe_guc_ct_send_block(&guc->ct, data, count); 399 if (ret < 0) { 400 xe_gt_err(guc_to_gt(guc), 401 "failed to enable GuC scheduling policies: %pe\n", 402 ERR_PTR(ret)); 403 return ret; 404 } 405 } 406 407 return 0; 408 } 409 410 int xe_guc_submit_enable(struct xe_guc *guc) 411 { 412 int ret; 413 414 ret = guc_init_global_schedule_policy(guc); 415 if (ret) 416 return ret; 417 418 guc->submission_state.enabled = true; 419 420 return 0; 421 } 422 423 void xe_guc_submit_disable(struct xe_guc *guc) 424 { 425 guc->submission_state.enabled = false; 426 } 427 428 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 429 { 430 int i; 431 432 lockdep_assert_held(&guc->submission_state.lock); 433 434 for (i = 0; i < xa_count; ++i) 435 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 436 437 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 438 q->guc->id, q->width); 439 440 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 441 wake_up(&guc->submission_state.fini_wq); 442 } 443 444 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 445 { 446 int ret; 447 int i; 448 449 /* 450 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 451 * worse case user gets -ENOMEM on engine create and has to try again. 452 * 453 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 454 * failure. 455 */ 456 lockdep_assert_held(&guc->submission_state.lock); 457 458 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 459 q->width); 460 if (ret < 0) 461 return ret; 462 463 q->guc->id = ret; 464 465 for (i = 0; i < q->width; ++i) { 466 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 467 q->guc->id + i, q, GFP_NOWAIT)); 468 if (ret) 469 goto err_release; 470 } 471 472 return 0; 473 474 err_release: 475 __release_guc_id(guc, q, i); 476 477 return ret; 478 } 479 480 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 481 { 482 mutex_lock(&guc->submission_state.lock); 483 __release_guc_id(guc, q, q->width); 484 mutex_unlock(&guc->submission_state.lock); 485 } 486 487 struct exec_queue_policy { 488 u32 count; 489 struct guc_update_exec_queue_policy h2g; 490 }; 491 492 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 493 { 494 size_t bytes = sizeof(policy->h2g.header) + 495 (sizeof(policy->h2g.klv[0]) * policy->count); 496 497 return bytes / sizeof(u32); 498 } 499 500 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 501 u16 guc_id) 502 { 503 policy->h2g.header.action = 504 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 505 policy->h2g.header.guc_id = guc_id; 506 policy->count = 0; 507 } 508 509 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 510 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 511 u32 data) \ 512 { \ 513 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 514 \ 515 policy->h2g.klv[policy->count].kl = \ 516 FIELD_PREP(GUC_KLV_0_KEY, \ 517 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 518 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 519 policy->h2g.klv[policy->count].value = data; \ 520 policy->count++; \ 521 } 522 523 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 524 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 525 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 526 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 527 #undef MAKE_EXEC_QUEUE_POLICY_ADD 528 529 static const int xe_exec_queue_prio_to_guc[] = { 530 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 531 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 532 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 533 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 534 }; 535 536 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 537 { 538 struct exec_queue_policy policy; 539 enum xe_exec_queue_priority prio = q->sched_props.priority; 540 u32 timeslice_us = q->sched_props.timeslice_us; 541 u32 slpc_exec_queue_freq_req = 0; 542 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 543 544 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 545 546 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 547 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 548 549 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 550 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 551 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 552 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 553 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 554 slpc_exec_queue_freq_req); 555 556 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 557 __guc_exec_queue_policy_action_size(&policy), 0, 0); 558 } 559 560 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 561 { 562 struct exec_queue_policy policy; 563 564 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 565 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 566 567 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 568 __guc_exec_queue_policy_action_size(&policy), 0, 0); 569 } 570 571 #define parallel_read(xe_, map_, field_) \ 572 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 573 field_) 574 #define parallel_write(xe_, map_, field_, val_) \ 575 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 576 field_, val_) 577 578 static void __register_mlrc_exec_queue(struct xe_guc *guc, 579 struct xe_exec_queue *q, 580 struct guc_ctxt_registration_info *info) 581 { 582 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 583 u32 action[MAX_MLRC_REG_SIZE]; 584 int len = 0; 585 int i; 586 587 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 588 589 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 590 action[len++] = info->flags; 591 action[len++] = info->context_idx; 592 action[len++] = info->engine_class; 593 action[len++] = info->engine_submit_mask; 594 action[len++] = info->wq_desc_lo; 595 action[len++] = info->wq_desc_hi; 596 action[len++] = info->wq_base_lo; 597 action[len++] = info->wq_base_hi; 598 action[len++] = info->wq_size; 599 action[len++] = q->width; 600 action[len++] = info->hwlrca_lo; 601 action[len++] = info->hwlrca_hi; 602 603 for (i = 1; i < q->width; ++i) { 604 struct xe_lrc *lrc = q->lrc[i]; 605 606 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 607 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 608 } 609 610 /* explicitly checks some fields that we might fixup later */ 611 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 612 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 613 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 614 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 615 xe_gt_assert(guc_to_gt(guc), q->width == 616 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 617 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 618 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 619 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 620 #undef MAX_MLRC_REG_SIZE 621 622 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 623 } 624 625 static void __register_exec_queue(struct xe_guc *guc, 626 struct guc_ctxt_registration_info *info) 627 { 628 u32 action[] = { 629 XE_GUC_ACTION_REGISTER_CONTEXT, 630 info->flags, 631 info->context_idx, 632 info->engine_class, 633 info->engine_submit_mask, 634 info->wq_desc_lo, 635 info->wq_desc_hi, 636 info->wq_base_lo, 637 info->wq_base_hi, 638 info->wq_size, 639 info->hwlrca_lo, 640 info->hwlrca_hi, 641 }; 642 643 /* explicitly checks some fields that we might fixup later */ 644 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 645 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 646 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 647 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 648 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 649 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 650 651 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 652 } 653 654 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 655 { 656 struct xe_guc *guc = exec_queue_to_guc(q); 657 struct xe_device *xe = guc_to_xe(guc); 658 struct xe_lrc *lrc = q->lrc[0]; 659 struct guc_ctxt_registration_info info; 660 661 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 662 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 663 664 memset(&info, 0, sizeof(info)); 665 info.context_idx = q->guc->id; 666 info.engine_class = xe_engine_class_to_guc_class(q->class); 667 info.engine_submit_mask = q->logical_mask; 668 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 669 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 670 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 671 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 672 673 if (xe_exec_queue_is_parallel(q)) { 674 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 675 struct iosys_map map = xe_lrc_parallel_map(lrc); 676 677 info.wq_desc_lo = lower_32_bits(ggtt_addr + 678 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 679 info.wq_desc_hi = upper_32_bits(ggtt_addr + 680 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 681 info.wq_base_lo = lower_32_bits(ggtt_addr + 682 offsetof(struct guc_submit_parallel_scratch, wq[0])); 683 info.wq_base_hi = upper_32_bits(ggtt_addr + 684 offsetof(struct guc_submit_parallel_scratch, wq[0])); 685 info.wq_size = WQ_SIZE; 686 687 q->guc->wqi_head = 0; 688 q->guc->wqi_tail = 0; 689 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 690 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 691 } 692 693 /* 694 * We must keep a reference for LR engines if engine is registered with 695 * the GuC as jobs signal immediately and can't destroy an engine if the 696 * GuC has a reference to it. 697 */ 698 if (xe_exec_queue_is_lr(q)) 699 xe_exec_queue_get(q); 700 701 set_exec_queue_registered(q); 702 trace_xe_exec_queue_register(q); 703 if (xe_exec_queue_is_parallel(q)) 704 __register_mlrc_exec_queue(guc, q, &info); 705 else 706 __register_exec_queue(guc, &info); 707 init_policies(guc, q); 708 } 709 710 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 711 { 712 return (WQ_SIZE - q->guc->wqi_tail); 713 } 714 715 static bool vf_recovery(struct xe_guc *guc) 716 { 717 return xe_gt_recovery_pending(guc_to_gt(guc)); 718 } 719 720 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 721 { 722 struct xe_guc *guc = exec_queue_to_guc(q); 723 struct xe_device *xe = guc_to_xe(guc); 724 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 725 unsigned int sleep_period_ms = 1; 726 727 #define AVAILABLE_SPACE \ 728 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 729 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 730 try_again: 731 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 732 if (wqi_size > AVAILABLE_SPACE) { 733 if (sleep_period_ms == 1024) { 734 xe_gt_reset_async(q->gt); 735 return -ENODEV; 736 } 737 738 msleep(sleep_period_ms); 739 sleep_period_ms <<= 1; 740 goto try_again; 741 } 742 } 743 #undef AVAILABLE_SPACE 744 745 return 0; 746 } 747 748 static int wq_noop_append(struct xe_exec_queue *q) 749 { 750 struct xe_guc *guc = exec_queue_to_guc(q); 751 struct xe_device *xe = guc_to_xe(guc); 752 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 753 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 754 755 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 756 return -ENODEV; 757 758 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 759 760 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 761 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 762 FIELD_PREP(WQ_LEN_MASK, len_dw)); 763 q->guc->wqi_tail = 0; 764 765 return 0; 766 } 767 768 static void wq_item_append(struct xe_exec_queue *q) 769 { 770 struct xe_guc *guc = exec_queue_to_guc(q); 771 struct xe_device *xe = guc_to_xe(guc); 772 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 773 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 774 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 775 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 776 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 777 int i = 0, j; 778 779 if (wqi_size > wq_space_until_wrap(q)) { 780 if (wq_noop_append(q)) 781 return; 782 } 783 if (wq_wait_for_space(q, wqi_size)) 784 return; 785 786 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 787 FIELD_PREP(WQ_LEN_MASK, len_dw); 788 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 789 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 790 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 791 wqi[i++] = 0; 792 for (j = 1; j < q->width; ++j) { 793 struct xe_lrc *lrc = q->lrc[j]; 794 795 wqi[i++] = lrc->ring.tail / sizeof(u64); 796 } 797 798 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 799 800 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 801 wq[q->guc->wqi_tail / sizeof(u32)])); 802 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 803 q->guc->wqi_tail += wqi_size; 804 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 805 806 xe_device_wmb(xe); 807 808 map = xe_lrc_parallel_map(q->lrc[0]); 809 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 810 } 811 812 #define RESUME_PENDING ~0x0ull 813 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 814 { 815 struct xe_guc *guc = exec_queue_to_guc(q); 816 struct xe_lrc *lrc = q->lrc[0]; 817 u32 action[3]; 818 u32 g2h_len = 0; 819 u32 num_g2h = 0; 820 int len = 0; 821 bool extra_submit = false; 822 823 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 824 825 if (!job->skip_emit || job->last_replay) { 826 if (xe_exec_queue_is_parallel(q)) 827 wq_item_append(q); 828 else 829 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 830 job->last_replay = false; 831 } 832 833 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 834 return; 835 836 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 837 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 838 action[len++] = q->guc->id; 839 action[len++] = GUC_CONTEXT_ENABLE; 840 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 841 num_g2h = 1; 842 if (xe_exec_queue_is_parallel(q)) 843 extra_submit = true; 844 845 q->guc->resume_time = RESUME_PENDING; 846 set_exec_queue_pending_enable(q); 847 set_exec_queue_enabled(q); 848 trace_xe_exec_queue_scheduling_enable(q); 849 } else { 850 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 851 action[len++] = q->guc->id; 852 trace_xe_exec_queue_submit(q); 853 } 854 855 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 856 857 if (extra_submit) { 858 len = 0; 859 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 860 action[len++] = q->guc->id; 861 trace_xe_exec_queue_submit(q); 862 863 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 864 } 865 } 866 867 static struct dma_fence * 868 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 869 { 870 struct xe_sched_job *job = to_xe_sched_job(drm_job); 871 struct xe_exec_queue *q = job->q; 872 struct xe_guc *guc = exec_queue_to_guc(q); 873 bool lr = xe_exec_queue_is_lr(q), killed_or_banned_or_wedged = 874 exec_queue_killed_or_banned_or_wedged(q); 875 876 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 877 exec_queue_banned(q) || exec_queue_suspended(q)); 878 879 trace_xe_sched_job_run(job); 880 881 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 882 if (!exec_queue_registered(q)) 883 register_exec_queue(q, GUC_CONTEXT_NORMAL); 884 if (!job->skip_emit) 885 q->ring_ops->emit_job(job); 886 submit_exec_queue(q, job); 887 job->skip_emit = false; 888 } 889 890 /* 891 * We don't care about job-fence ordering in LR VMs because these fences 892 * are never exported; they are used solely to keep jobs on the pending 893 * list. Once a queue enters an error state, there's no need to track 894 * them. 895 */ 896 if (killed_or_banned_or_wedged && lr) 897 xe_sched_job_set_error(job, -ECANCELED); 898 899 return job->fence; 900 } 901 902 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 903 { 904 struct xe_sched_job *job = to_xe_sched_job(drm_job); 905 906 trace_xe_sched_job_free(job); 907 xe_sched_job_put(job); 908 } 909 910 int xe_guc_read_stopped(struct xe_guc *guc) 911 { 912 return atomic_read(&guc->submission_state.stopped); 913 } 914 915 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 916 u32 action[] = { \ 917 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 918 q->guc->id, \ 919 GUC_CONTEXT_##enable_disable, \ 920 } 921 922 static void disable_scheduling_deregister(struct xe_guc *guc, 923 struct xe_exec_queue *q) 924 { 925 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 926 int ret; 927 928 set_min_preemption_timeout(guc, q); 929 smp_rmb(); 930 ret = wait_event_timeout(guc->ct.wq, 931 (!exec_queue_pending_enable(q) && 932 !exec_queue_pending_disable(q)) || 933 xe_guc_read_stopped(guc) || 934 vf_recovery(guc), 935 HZ * 5); 936 if (!ret && !vf_recovery(guc)) { 937 struct xe_gpu_scheduler *sched = &q->guc->sched; 938 939 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 940 xe_sched_submission_start(sched); 941 xe_gt_reset_async(q->gt); 942 if (!xe_exec_queue_is_lr(q)) 943 xe_sched_tdr_queue_imm(sched); 944 return; 945 } 946 947 clear_exec_queue_enabled(q); 948 set_exec_queue_pending_disable(q); 949 set_exec_queue_destroyed(q); 950 trace_xe_exec_queue_scheduling_disable(q); 951 952 /* 953 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 954 * handler and we are not allowed to reserved G2H space in handlers. 955 */ 956 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 957 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 958 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 959 } 960 961 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 962 { 963 struct xe_guc *guc = exec_queue_to_guc(q); 964 struct xe_device *xe = guc_to_xe(guc); 965 966 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 967 wake_up_all(&xe->ufence_wq); 968 969 if (xe_exec_queue_is_lr(q)) 970 queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); 971 else 972 xe_sched_tdr_queue_imm(&q->guc->sched); 973 } 974 975 /** 976 * xe_guc_submit_wedge() - Wedge GuC submission 977 * @guc: the GuC object 978 * 979 * Save exec queue's registered with GuC state by taking a ref to each queue. 980 * Register a DRMM handler to drop refs upon driver unload. 981 */ 982 void xe_guc_submit_wedge(struct xe_guc *guc) 983 { 984 struct xe_gt *gt = guc_to_gt(guc); 985 struct xe_exec_queue *q; 986 unsigned long index; 987 int err; 988 989 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 990 991 /* 992 * If device is being wedged even before submission_state is 993 * initialized, there's nothing to do here. 994 */ 995 if (!guc->submission_state.initialized) 996 return; 997 998 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, 999 guc_submit_wedged_fini, guc); 1000 if (err) { 1001 xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " 1002 "Although device is wedged.\n"); 1003 return; 1004 } 1005 1006 mutex_lock(&guc->submission_state.lock); 1007 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1008 if (xe_exec_queue_get_unless_zero(q)) 1009 set_exec_queue_wedged(q); 1010 mutex_unlock(&guc->submission_state.lock); 1011 } 1012 1013 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1014 { 1015 struct xe_device *xe = guc_to_xe(guc); 1016 1017 if (xe->wedged.mode != 2) 1018 return false; 1019 1020 if (xe_device_wedged(xe)) 1021 return true; 1022 1023 xe_device_declare_wedged(xe); 1024 1025 return true; 1026 } 1027 1028 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) 1029 { 1030 struct xe_guc_exec_queue *ge = 1031 container_of(w, struct xe_guc_exec_queue, lr_tdr); 1032 struct xe_exec_queue *q = ge->q; 1033 struct xe_guc *guc = exec_queue_to_guc(q); 1034 struct xe_gpu_scheduler *sched = &ge->sched; 1035 struct xe_sched_job *job; 1036 bool wedged = false; 1037 1038 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); 1039 1040 if (vf_recovery(guc)) 1041 return; 1042 1043 trace_xe_exec_queue_lr_cleanup(q); 1044 1045 if (!exec_queue_killed(q)) 1046 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1047 1048 /* Kill the run_job / process_msg entry points */ 1049 xe_sched_submission_stop(sched); 1050 1051 /* 1052 * Engine state now mostly stable, disable scheduling / deregister if 1053 * needed. This cleanup routine might be called multiple times, where 1054 * the actual async engine deregister drops the final engine ref. 1055 * Calling disable_scheduling_deregister will mark the engine as 1056 * destroyed and fire off the CT requests to disable scheduling / 1057 * deregister, which we only want to do once. We also don't want to mark 1058 * the engine as pending_disable again as this may race with the 1059 * xe_guc_deregister_done_handler() which treats it as an unexpected 1060 * state. 1061 */ 1062 if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { 1063 struct xe_guc *guc = exec_queue_to_guc(q); 1064 int ret; 1065 1066 set_exec_queue_banned(q); 1067 disable_scheduling_deregister(guc, q); 1068 1069 /* 1070 * Must wait for scheduling to be disabled before signalling 1071 * any fences, if GT broken the GT reset code should signal us. 1072 */ 1073 ret = wait_event_timeout(guc->ct.wq, 1074 !exec_queue_pending_disable(q) || 1075 xe_guc_read_stopped(guc) || 1076 vf_recovery(guc), HZ * 5); 1077 if (vf_recovery(guc)) 1078 return; 1079 1080 if (!ret) { 1081 xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n", 1082 q->guc->id); 1083 xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n", 1084 q->guc->id); 1085 xe_sched_submission_start(sched); 1086 xe_gt_reset_async(q->gt); 1087 return; 1088 } 1089 } 1090 1091 if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0])) 1092 xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id); 1093 1094 xe_hw_fence_irq_stop(q->fence_irq); 1095 1096 xe_sched_submission_start(sched); 1097 1098 spin_lock(&sched->base.job_list_lock); 1099 list_for_each_entry(job, &sched->base.pending_list, drm.list) 1100 xe_sched_job_set_error(job, -ECANCELED); 1101 spin_unlock(&sched->base.job_list_lock); 1102 1103 xe_hw_fence_irq_start(q->fence_irq); 1104 } 1105 1106 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1107 1108 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1109 { 1110 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1111 u32 ctx_timestamp, ctx_job_timestamp; 1112 u32 timeout_ms = q->sched_props.job_timeout_ms; 1113 u32 diff; 1114 u64 running_time_ms; 1115 1116 if (!xe_sched_job_started(job)) { 1117 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1118 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1119 q->guc->id); 1120 1121 return xe_sched_invalidate_job(job, 2); 1122 } 1123 1124 ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0])); 1125 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1126 1127 /* 1128 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1129 * possible overflows with a high timeout. 1130 */ 1131 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1132 1133 diff = ctx_timestamp - ctx_job_timestamp; 1134 1135 /* 1136 * Ensure timeout is within 5% to account for an GuC scheduling latency 1137 */ 1138 running_time_ms = 1139 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1140 1141 xe_gt_dbg(gt, 1142 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1143 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1144 q->guc->id, running_time_ms, timeout_ms, diff); 1145 1146 return running_time_ms >= timeout_ms; 1147 } 1148 1149 static void enable_scheduling(struct xe_exec_queue *q) 1150 { 1151 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1152 struct xe_guc *guc = exec_queue_to_guc(q); 1153 int ret; 1154 1155 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1156 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1157 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1158 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1159 1160 set_exec_queue_pending_enable(q); 1161 set_exec_queue_enabled(q); 1162 trace_xe_exec_queue_scheduling_enable(q); 1163 1164 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1165 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1166 1167 ret = wait_event_timeout(guc->ct.wq, 1168 !exec_queue_pending_enable(q) || 1169 xe_guc_read_stopped(guc) || 1170 vf_recovery(guc), HZ * 5); 1171 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1172 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1173 set_exec_queue_banned(q); 1174 xe_gt_reset_async(q->gt); 1175 if (!xe_exec_queue_is_lr(q)) 1176 xe_sched_tdr_queue_imm(&q->guc->sched); 1177 } 1178 } 1179 1180 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1181 { 1182 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1183 struct xe_guc *guc = exec_queue_to_guc(q); 1184 1185 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1186 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1187 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1188 1189 if (immediate) 1190 set_min_preemption_timeout(guc, q); 1191 clear_exec_queue_enabled(q); 1192 set_exec_queue_pending_disable(q); 1193 trace_xe_exec_queue_scheduling_disable(q); 1194 1195 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1196 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1197 } 1198 1199 static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 1200 { 1201 u32 action[] = { 1202 XE_GUC_ACTION_DEREGISTER_CONTEXT, 1203 q->guc->id, 1204 }; 1205 1206 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1207 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1208 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1209 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1210 1211 set_exec_queue_destroyed(q); 1212 trace_xe_exec_queue_deregister(q); 1213 1214 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1215 G2H_LEN_DW_DEREGISTER_CONTEXT, 1); 1216 } 1217 1218 static enum drm_gpu_sched_stat 1219 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1220 { 1221 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1222 struct xe_sched_job *tmp_job; 1223 struct xe_exec_queue *q = job->q; 1224 struct xe_gpu_scheduler *sched = &q->guc->sched; 1225 struct xe_guc *guc = exec_queue_to_guc(q); 1226 const char *process_name = "no process"; 1227 struct xe_device *xe = guc_to_xe(guc); 1228 unsigned int fw_ref; 1229 int err = -ETIME; 1230 pid_t pid = -1; 1231 int i = 0; 1232 bool wedged = false, skip_timeout_check; 1233 1234 xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_lr(q)); 1235 1236 /* 1237 * TDR has fired before free job worker. Common if exec queue 1238 * immediately closed after last fence signaled. Add back to pending 1239 * list so job can be freed and kick scheduler ensuring free job is not 1240 * lost. 1241 */ 1242 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1243 vf_recovery(guc)) 1244 return DRM_GPU_SCHED_STAT_NO_HANG; 1245 1246 /* Kill the run_job entry point */ 1247 xe_sched_submission_stop(sched); 1248 1249 /* Must check all state after stopping scheduler */ 1250 skip_timeout_check = exec_queue_reset(q) || 1251 exec_queue_killed_or_banned_or_wedged(q) || 1252 exec_queue_destroyed(q); 1253 1254 /* 1255 * If devcoredump not captured and GuC capture for the job is not ready 1256 * do manual capture first and decide later if we need to use it 1257 */ 1258 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1259 !xe_guc_capture_get_matching_and_lock(q)) { 1260 /* take force wake before engine register manual capture */ 1261 fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1262 if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 1263 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1264 1265 xe_engine_snapshot_capture_for_queue(q); 1266 1267 xe_force_wake_put(gt_to_fw(q->gt), fw_ref); 1268 } 1269 1270 /* 1271 * XXX: Sampling timeout doesn't work in wedged mode as we have to 1272 * modify scheduling state to read timestamp. We could read the 1273 * timestamp from a register to accumulate current running time but this 1274 * doesn't work for SRIOV. For now assuming timeouts in wedged mode are 1275 * genuine timeouts. 1276 */ 1277 if (!exec_queue_killed(q)) 1278 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1279 1280 /* Engine state now stable, disable scheduling to check timestamp */ 1281 if (!wedged && exec_queue_registered(q)) { 1282 int ret; 1283 1284 if (exec_queue_reset(q)) 1285 err = -EIO; 1286 1287 if (!exec_queue_destroyed(q)) { 1288 /* 1289 * Wait for any pending G2H to flush out before 1290 * modifying state 1291 */ 1292 ret = wait_event_timeout(guc->ct.wq, 1293 (!exec_queue_pending_enable(q) && 1294 !exec_queue_pending_disable(q)) || 1295 xe_guc_read_stopped(guc) || 1296 vf_recovery(guc), HZ * 5); 1297 if (vf_recovery(guc)) 1298 goto handle_vf_resume; 1299 if (!ret || xe_guc_read_stopped(guc)) 1300 goto trigger_reset; 1301 1302 /* 1303 * Flag communicates to G2H handler that schedule 1304 * disable originated from a timeout check. The G2H then 1305 * avoid triggering cleanup or deregistering the exec 1306 * queue. 1307 */ 1308 set_exec_queue_check_timeout(q); 1309 disable_scheduling(q, skip_timeout_check); 1310 } 1311 1312 /* 1313 * Must wait for scheduling to be disabled before signalling 1314 * any fences, if GT broken the GT reset code should signal us. 1315 * 1316 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1317 * error) messages which can cause the schedule disable to get 1318 * lost. If this occurs, trigger a GT reset to recover. 1319 */ 1320 smp_rmb(); 1321 ret = wait_event_timeout(guc->ct.wq, 1322 !exec_queue_pending_disable(q) || 1323 xe_guc_read_stopped(guc) || 1324 vf_recovery(guc), HZ * 5); 1325 if (vf_recovery(guc)) 1326 goto handle_vf_resume; 1327 if (!ret || xe_guc_read_stopped(guc)) { 1328 trigger_reset: 1329 if (!ret) 1330 xe_gt_warn(guc_to_gt(guc), 1331 "Schedule disable failed to respond, guc_id=%d", 1332 q->guc->id); 1333 xe_devcoredump(q, job, 1334 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1335 q->guc->id, ret, xe_guc_read_stopped(guc)); 1336 set_exec_queue_extra_ref(q); 1337 xe_exec_queue_get(q); /* GT reset owns this */ 1338 set_exec_queue_banned(q); 1339 xe_gt_reset_async(q->gt); 1340 xe_sched_tdr_queue_imm(sched); 1341 goto rearm; 1342 } 1343 } 1344 1345 /* 1346 * Check if job is actually timed out, if so restart job execution and TDR 1347 */ 1348 if (!wedged && !skip_timeout_check && !check_timeout(q, job) && 1349 !exec_queue_reset(q) && exec_queue_registered(q)) { 1350 clear_exec_queue_check_timeout(q); 1351 goto sched_enable; 1352 } 1353 1354 if (q->vm && q->vm->xef) { 1355 process_name = q->vm->xef->process_name; 1356 pid = q->vm->xef->pid; 1357 } 1358 1359 if (!exec_queue_killed(q)) 1360 xe_gt_notice(guc_to_gt(guc), 1361 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1362 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1363 q->guc->id, q->flags, process_name, pid); 1364 1365 trace_xe_sched_job_timedout(job); 1366 1367 if (!exec_queue_killed(q)) 1368 xe_devcoredump(q, job, 1369 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1370 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1371 q->guc->id, q->flags); 1372 1373 /* 1374 * Kernel jobs should never fail, nor should VM jobs if they do 1375 * somethings has gone wrong and the GT needs a reset 1376 */ 1377 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1378 "Kernel-submitted job timed out\n"); 1379 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1380 "VM job timed out on non-killed execqueue\n"); 1381 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1382 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1383 if (!xe_sched_invalidate_job(job, 2)) { 1384 clear_exec_queue_check_timeout(q); 1385 xe_gt_reset_async(q->gt); 1386 goto rearm; 1387 } 1388 } 1389 1390 /* Finish cleaning up exec queue via deregister */ 1391 set_exec_queue_banned(q); 1392 if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { 1393 set_exec_queue_extra_ref(q); 1394 xe_exec_queue_get(q); 1395 __deregister_exec_queue(guc, q); 1396 } 1397 1398 /* Stop fence signaling */ 1399 xe_hw_fence_irq_stop(q->fence_irq); 1400 1401 /* 1402 * Fence state now stable, stop / start scheduler which cleans up any 1403 * fences that are complete 1404 */ 1405 xe_sched_add_pending_job(sched, job); 1406 xe_sched_submission_start(sched); 1407 1408 xe_guc_exec_queue_trigger_cleanup(q); 1409 1410 /* Mark all outstanding jobs as bad, thus completing them */ 1411 spin_lock(&sched->base.job_list_lock); 1412 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) 1413 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); 1414 spin_unlock(&sched->base.job_list_lock); 1415 1416 /* Start fence signaling */ 1417 xe_hw_fence_irq_start(q->fence_irq); 1418 1419 return DRM_GPU_SCHED_STAT_RESET; 1420 1421 sched_enable: 1422 set_exec_queue_pending_tdr_exit(q); 1423 enable_scheduling(q); 1424 rearm: 1425 /* 1426 * XXX: Ideally want to adjust timeout based on current execution time 1427 * but there is not currently an easy way to do in DRM scheduler. With 1428 * some thought, do this in a follow up. 1429 */ 1430 xe_sched_submission_start(sched); 1431 handle_vf_resume: 1432 return DRM_GPU_SCHED_STAT_NO_HANG; 1433 } 1434 1435 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1436 { 1437 struct xe_guc_exec_queue *ge = q->guc; 1438 struct xe_guc *guc = exec_queue_to_guc(q); 1439 1440 release_guc_id(guc, q); 1441 xe_sched_entity_fini(&ge->entity); 1442 xe_sched_fini(&ge->sched); 1443 1444 /* 1445 * RCU free due sched being exported via DRM scheduler fences 1446 * (timeline name). 1447 */ 1448 kfree_rcu(ge, rcu); 1449 } 1450 1451 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1452 { 1453 struct xe_guc_exec_queue *ge = 1454 container_of(w, struct xe_guc_exec_queue, destroy_async); 1455 struct xe_exec_queue *q = ge->q; 1456 struct xe_guc *guc = exec_queue_to_guc(q); 1457 1458 xe_pm_runtime_get(guc_to_xe(guc)); 1459 trace_xe_exec_queue_destroy(q); 1460 1461 if (xe_exec_queue_is_lr(q)) 1462 cancel_work_sync(&ge->lr_tdr); 1463 /* Confirm no work left behind accessing device structures */ 1464 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1465 1466 xe_exec_queue_fini(q); 1467 1468 xe_pm_runtime_put(guc_to_xe(guc)); 1469 } 1470 1471 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1472 { 1473 struct xe_guc *guc = exec_queue_to_guc(q); 1474 struct xe_device *xe = guc_to_xe(guc); 1475 1476 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1477 1478 /* We must block on kernel engines so slabs are empty on driver unload */ 1479 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1480 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1481 else 1482 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1483 } 1484 1485 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1486 { 1487 /* 1488 * Might be done from within the GPU scheduler, need to do async as we 1489 * fini the scheduler when the engine is fini'd, the scheduler can't 1490 * complete fini within itself (circular dependency). Async resolves 1491 * this we and don't really care when everything is fini'd, just that it 1492 * is. 1493 */ 1494 guc_exec_queue_destroy_async(q); 1495 } 1496 1497 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1498 { 1499 struct xe_exec_queue *q = msg->private_data; 1500 struct xe_guc *guc = exec_queue_to_guc(q); 1501 1502 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1503 trace_xe_exec_queue_cleanup_entity(q); 1504 1505 /* 1506 * Expected state transitions for cleanup: 1507 * - If the exec queue is registered and GuC firmware is running, we must first 1508 * disable scheduling and deregister the queue to ensure proper teardown and 1509 * resource release in the GuC, then destroy the exec queue on driver side. 1510 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1511 * we cannot expect a response for the deregister request. In this case, 1512 * it is safe to directly destroy the exec queue on driver side, as the GuC 1513 * will not process further requests and all resources must be cleaned up locally. 1514 */ 1515 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1516 disable_scheduling_deregister(guc, q); 1517 else 1518 __guc_exec_queue_destroy(guc, q); 1519 } 1520 1521 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1522 { 1523 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1524 } 1525 1526 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1527 { 1528 struct xe_exec_queue *q = msg->private_data; 1529 struct xe_guc *guc = exec_queue_to_guc(q); 1530 1531 if (guc_exec_queue_allowed_to_change_state(q)) 1532 init_policies(guc, q); 1533 kfree(msg); 1534 } 1535 1536 static void __suspend_fence_signal(struct xe_exec_queue *q) 1537 { 1538 struct xe_guc *guc = exec_queue_to_guc(q); 1539 struct xe_device *xe = guc_to_xe(guc); 1540 1541 if (!q->guc->suspend_pending) 1542 return; 1543 1544 WRITE_ONCE(q->guc->suspend_pending, false); 1545 1546 /* 1547 * We use a GuC shared wait queue for VFs because the VF resfix start 1548 * interrupt must be able to wake all instances of suspend_wait. This 1549 * prevents the VF migration worker from being starved during 1550 * scheduling. 1551 */ 1552 if (IS_SRIOV_VF(xe)) 1553 wake_up_all(&guc->ct.wq); 1554 else 1555 wake_up(&q->guc->suspend_wait); 1556 } 1557 1558 static void suspend_fence_signal(struct xe_exec_queue *q) 1559 { 1560 struct xe_guc *guc = exec_queue_to_guc(q); 1561 1562 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1563 xe_guc_read_stopped(guc)); 1564 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1565 1566 __suspend_fence_signal(q); 1567 } 1568 1569 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1570 { 1571 struct xe_exec_queue *q = msg->private_data; 1572 struct xe_guc *guc = exec_queue_to_guc(q); 1573 1574 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1575 exec_queue_enabled(q)) { 1576 wait_event(guc->ct.wq, vf_recovery(guc) || 1577 ((q->guc->resume_time != RESUME_PENDING || 1578 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1579 1580 if (!xe_guc_read_stopped(guc)) { 1581 s64 since_resume_ms = 1582 ktime_ms_delta(ktime_get(), 1583 q->guc->resume_time); 1584 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1585 since_resume_ms; 1586 1587 if (wait_ms > 0 && q->guc->resume_time) 1588 msleep(wait_ms); 1589 1590 set_exec_queue_suspended(q); 1591 disable_scheduling(q, false); 1592 } 1593 } else if (q->guc->suspend_pending) { 1594 set_exec_queue_suspended(q); 1595 suspend_fence_signal(q); 1596 } 1597 } 1598 1599 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1600 { 1601 struct xe_exec_queue *q = msg->private_data; 1602 1603 if (guc_exec_queue_allowed_to_change_state(q)) { 1604 clear_exec_queue_suspended(q); 1605 if (!exec_queue_enabled(q)) { 1606 q->guc->resume_time = RESUME_PENDING; 1607 set_exec_queue_pending_resume(q); 1608 enable_scheduling(q); 1609 } 1610 } else { 1611 clear_exec_queue_suspended(q); 1612 } 1613 } 1614 1615 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1616 #define SET_SCHED_PROPS 2 1617 #define SUSPEND 3 1618 #define RESUME 4 1619 #define OPCODE_MASK 0xf 1620 #define MSG_LOCKED BIT(8) 1621 #define MSG_HEAD BIT(9) 1622 1623 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1624 { 1625 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1626 1627 trace_xe_sched_msg_recv(msg); 1628 1629 switch (msg->opcode) { 1630 case CLEANUP: 1631 __guc_exec_queue_process_msg_cleanup(msg); 1632 break; 1633 case SET_SCHED_PROPS: 1634 __guc_exec_queue_process_msg_set_sched_props(msg); 1635 break; 1636 case SUSPEND: 1637 __guc_exec_queue_process_msg_suspend(msg); 1638 break; 1639 case RESUME: 1640 __guc_exec_queue_process_msg_resume(msg); 1641 break; 1642 default: 1643 XE_WARN_ON("Unknown message type"); 1644 } 1645 1646 xe_pm_runtime_put(xe); 1647 } 1648 1649 static const struct drm_sched_backend_ops drm_sched_ops = { 1650 .run_job = guc_exec_queue_run_job, 1651 .free_job = guc_exec_queue_free_job, 1652 .timedout_job = guc_exec_queue_timedout_job, 1653 }; 1654 1655 static const struct xe_sched_backend_ops xe_sched_ops = { 1656 .process_msg = guc_exec_queue_process_msg, 1657 }; 1658 1659 static int guc_exec_queue_init(struct xe_exec_queue *q) 1660 { 1661 struct xe_gpu_scheduler *sched; 1662 struct xe_guc *guc = exec_queue_to_guc(q); 1663 struct xe_guc_exec_queue *ge; 1664 long timeout; 1665 int err, i; 1666 1667 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1668 1669 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1670 if (!ge) 1671 return -ENOMEM; 1672 1673 q->guc = ge; 1674 ge->q = q; 1675 init_rcu_head(&ge->rcu); 1676 init_waitqueue_head(&ge->suspend_wait); 1677 1678 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1679 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1680 1681 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1682 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1683 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1684 NULL, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1685 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1686 q->name, gt_to_xe(q->gt)->drm.dev); 1687 if (err) 1688 goto err_free; 1689 1690 sched = &ge->sched; 1691 err = xe_sched_entity_init(&ge->entity, sched); 1692 if (err) 1693 goto err_sched; 1694 1695 if (xe_exec_queue_is_lr(q)) 1696 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); 1697 1698 mutex_lock(&guc->submission_state.lock); 1699 1700 err = alloc_guc_id(guc, q); 1701 if (err) 1702 goto err_entity; 1703 1704 q->entity = &ge->entity; 1705 1706 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 1707 xe_sched_stop(sched); 1708 1709 mutex_unlock(&guc->submission_state.lock); 1710 1711 xe_exec_queue_assign_name(q, q->guc->id); 1712 1713 trace_xe_exec_queue_create(q); 1714 1715 return 0; 1716 1717 err_entity: 1718 mutex_unlock(&guc->submission_state.lock); 1719 xe_sched_entity_fini(&ge->entity); 1720 err_sched: 1721 xe_sched_fini(&ge->sched); 1722 err_free: 1723 kfree(ge); 1724 1725 return err; 1726 } 1727 1728 static void guc_exec_queue_kill(struct xe_exec_queue *q) 1729 { 1730 trace_xe_exec_queue_kill(q); 1731 set_exec_queue_killed(q); 1732 __suspend_fence_signal(q); 1733 xe_guc_exec_queue_trigger_cleanup(q); 1734 } 1735 1736 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 1737 u32 opcode) 1738 { 1739 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 1740 1741 INIT_LIST_HEAD(&msg->link); 1742 msg->opcode = opcode & OPCODE_MASK; 1743 msg->private_data = q; 1744 1745 trace_xe_sched_msg_add(msg); 1746 if (opcode & MSG_HEAD) 1747 xe_sched_add_msg_head(&q->guc->sched, msg); 1748 else if (opcode & MSG_LOCKED) 1749 xe_sched_add_msg_locked(&q->guc->sched, msg); 1750 else 1751 xe_sched_add_msg(&q->guc->sched, msg); 1752 } 1753 1754 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 1755 struct xe_sched_msg *msg, 1756 u32 opcode) 1757 { 1758 if (!list_empty(&msg->link)) 1759 return; 1760 1761 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 1762 } 1763 1764 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 1765 struct xe_sched_msg *msg, 1766 u32 opcode) 1767 { 1768 if (!list_empty(&msg->link)) 1769 return false; 1770 1771 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 1772 1773 return true; 1774 } 1775 1776 #define STATIC_MSG_CLEANUP 0 1777 #define STATIC_MSG_SUSPEND 1 1778 #define STATIC_MSG_RESUME 2 1779 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 1780 { 1781 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 1782 1783 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 1784 guc_exec_queue_add_msg(q, msg, CLEANUP); 1785 else 1786 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 1787 } 1788 1789 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 1790 enum xe_exec_queue_priority priority) 1791 { 1792 struct xe_sched_msg *msg; 1793 1794 if (q->sched_props.priority == priority || 1795 exec_queue_killed_or_banned_or_wedged(q)) 1796 return 0; 1797 1798 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1799 if (!msg) 1800 return -ENOMEM; 1801 1802 q->sched_props.priority = priority; 1803 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1804 1805 return 0; 1806 } 1807 1808 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 1809 { 1810 struct xe_sched_msg *msg; 1811 1812 if (q->sched_props.timeslice_us == timeslice_us || 1813 exec_queue_killed_or_banned_or_wedged(q)) 1814 return 0; 1815 1816 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1817 if (!msg) 1818 return -ENOMEM; 1819 1820 q->sched_props.timeslice_us = timeslice_us; 1821 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1822 1823 return 0; 1824 } 1825 1826 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 1827 u32 preempt_timeout_us) 1828 { 1829 struct xe_sched_msg *msg; 1830 1831 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 1832 exec_queue_killed_or_banned_or_wedged(q)) 1833 return 0; 1834 1835 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1836 if (!msg) 1837 return -ENOMEM; 1838 1839 q->sched_props.preempt_timeout_us = preempt_timeout_us; 1840 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1841 1842 return 0; 1843 } 1844 1845 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 1846 { 1847 struct xe_gpu_scheduler *sched = &q->guc->sched; 1848 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 1849 1850 if (exec_queue_killed_or_banned_or_wedged(q)) 1851 return -EINVAL; 1852 1853 xe_sched_msg_lock(sched); 1854 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 1855 q->guc->suspend_pending = true; 1856 xe_sched_msg_unlock(sched); 1857 1858 return 0; 1859 } 1860 1861 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 1862 { 1863 struct xe_guc *guc = exec_queue_to_guc(q); 1864 struct xe_device *xe = guc_to_xe(guc); 1865 int ret; 1866 1867 /* 1868 * Likely don't need to check exec_queue_killed() as we clear 1869 * suspend_pending upon kill but to be paranoid but races in which 1870 * suspend_pending is set after kill also check kill here. 1871 */ 1872 #define WAIT_COND \ 1873 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 1874 xe_guc_read_stopped(guc)) 1875 1876 retry: 1877 if (IS_SRIOV_VF(xe)) 1878 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 1879 vf_recovery(guc), 1880 HZ * 5); 1881 else 1882 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 1883 WAIT_COND, HZ * 5); 1884 1885 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 1886 return -EAGAIN; 1887 1888 if (!ret) { 1889 xe_gt_warn(guc_to_gt(guc), 1890 "Suspend fence, guc_id=%d, failed to respond", 1891 q->guc->id); 1892 /* XXX: Trigger GT reset? */ 1893 return -ETIME; 1894 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 1895 /* Corner case on RESFIX DONE where vf_recovery() changes */ 1896 goto retry; 1897 } 1898 1899 #undef WAIT_COND 1900 1901 return ret < 0 ? ret : 0; 1902 } 1903 1904 static void guc_exec_queue_resume(struct xe_exec_queue *q) 1905 { 1906 struct xe_gpu_scheduler *sched = &q->guc->sched; 1907 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 1908 struct xe_guc *guc = exec_queue_to_guc(q); 1909 1910 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 1911 1912 xe_sched_msg_lock(sched); 1913 guc_exec_queue_try_add_msg(q, msg, RESUME); 1914 xe_sched_msg_unlock(sched); 1915 } 1916 1917 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 1918 { 1919 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 1920 } 1921 1922 /* 1923 * All of these functions are an abstraction layer which other parts of Xe can 1924 * use to trap into the GuC backend. All of these functions, aside from init, 1925 * really shouldn't do much other than trap into the DRM scheduler which 1926 * synchronizes these operations. 1927 */ 1928 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 1929 .init = guc_exec_queue_init, 1930 .kill = guc_exec_queue_kill, 1931 .fini = guc_exec_queue_fini, 1932 .destroy = guc_exec_queue_destroy, 1933 .set_priority = guc_exec_queue_set_priority, 1934 .set_timeslice = guc_exec_queue_set_timeslice, 1935 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 1936 .suspend = guc_exec_queue_suspend, 1937 .suspend_wait = guc_exec_queue_suspend_wait, 1938 .resume = guc_exec_queue_resume, 1939 .reset_status = guc_exec_queue_reset_status, 1940 }; 1941 1942 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 1943 { 1944 struct xe_gpu_scheduler *sched = &q->guc->sched; 1945 1946 /* Stop scheduling + flush any DRM scheduler operations */ 1947 xe_sched_submission_stop(sched); 1948 1949 /* Clean up lost G2H + reset engine state */ 1950 if (exec_queue_registered(q)) { 1951 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 1952 xe_exec_queue_put(q); 1953 else if (exec_queue_destroyed(q)) 1954 __guc_exec_queue_destroy(guc, q); 1955 } 1956 if (q->guc->suspend_pending) { 1957 set_exec_queue_suspended(q); 1958 suspend_fence_signal(q); 1959 } 1960 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 1961 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 1962 EXEC_QUEUE_STATE_SUSPENDED, 1963 &q->guc->state); 1964 q->guc->resume_time = 0; 1965 trace_xe_exec_queue_stop(q); 1966 1967 /* 1968 * Ban any engine (aside from kernel and engines used for VM ops) with a 1969 * started but not complete job or if a job has gone through a GT reset 1970 * more than twice. 1971 */ 1972 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 1973 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 1974 bool ban = false; 1975 1976 if (job) { 1977 if ((xe_sched_job_started(job) && 1978 !xe_sched_job_completed(job)) || 1979 xe_sched_invalidate_job(job, 2)) { 1980 trace_xe_sched_job_ban(job); 1981 ban = true; 1982 } 1983 } else if (xe_exec_queue_is_lr(q) && 1984 !xe_lrc_ring_is_idle(q->lrc[0])) { 1985 ban = true; 1986 } 1987 1988 if (ban) { 1989 set_exec_queue_banned(q); 1990 xe_guc_exec_queue_trigger_cleanup(q); 1991 } 1992 } 1993 } 1994 1995 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 1996 { 1997 int ret; 1998 1999 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2000 return 0; 2001 2002 if (!guc->submission_state.initialized) 2003 return 0; 2004 2005 /* 2006 * Using an atomic here rather than submission_state.lock as this 2007 * function can be called while holding the CT lock (engine reset 2008 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2009 * Atomic is not ideal, but it works to prevent against concurrent reset 2010 * and releasing any TDRs waiting on guc->submission_state.stopped. 2011 */ 2012 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2013 smp_wmb(); 2014 wake_up_all(&guc->ct.wq); 2015 2016 return ret; 2017 } 2018 2019 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2020 { 2021 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2022 !xe_guc_read_stopped(guc)); 2023 } 2024 2025 void xe_guc_submit_stop(struct xe_guc *guc) 2026 { 2027 struct xe_exec_queue *q; 2028 unsigned long index; 2029 2030 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2031 2032 mutex_lock(&guc->submission_state.lock); 2033 2034 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2035 /* Prevent redundant attempts to stop parallel queues */ 2036 if (q->guc->id != index) 2037 continue; 2038 2039 guc_exec_queue_stop(guc, q); 2040 } 2041 2042 mutex_unlock(&guc->submission_state.lock); 2043 2044 /* 2045 * No one can enter the backend at this point, aside from new engine 2046 * creation which is protected by guc->submission_state.lock. 2047 */ 2048 2049 } 2050 2051 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2052 struct xe_exec_queue *q) 2053 { 2054 bool pending_enable, pending_disable, pending_resume; 2055 2056 pending_enable = exec_queue_pending_enable(q); 2057 pending_resume = exec_queue_pending_resume(q); 2058 2059 if (pending_enable && pending_resume) { 2060 q->guc->needs_resume = true; 2061 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2062 q->guc->id); 2063 } 2064 2065 if (pending_enable && !pending_resume && 2066 !exec_queue_pending_tdr_exit(q)) { 2067 clear_exec_queue_registered(q); 2068 if (xe_exec_queue_is_lr(q)) 2069 xe_exec_queue_put(q); 2070 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2071 q->guc->id); 2072 } 2073 2074 if (pending_enable) { 2075 clear_exec_queue_enabled(q); 2076 clear_exec_queue_pending_resume(q); 2077 clear_exec_queue_pending_tdr_exit(q); 2078 clear_exec_queue_pending_enable(q); 2079 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2080 q->guc->id); 2081 } 2082 2083 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2084 clear_exec_queue_destroyed(q); 2085 if (exec_queue_extra_ref(q)) 2086 xe_exec_queue_put(q); 2087 else 2088 q->guc->needs_cleanup = true; 2089 clear_exec_queue_extra_ref(q); 2090 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2091 q->guc->id); 2092 } 2093 2094 pending_disable = exec_queue_pending_disable(q); 2095 2096 if (pending_disable && exec_queue_suspended(q)) { 2097 clear_exec_queue_suspended(q); 2098 q->guc->needs_suspend = true; 2099 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2100 q->guc->id); 2101 } 2102 2103 if (pending_disable) { 2104 if (!pending_enable) 2105 set_exec_queue_enabled(q); 2106 clear_exec_queue_pending_disable(q); 2107 clear_exec_queue_check_timeout(q); 2108 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2109 q->guc->id); 2110 } 2111 2112 q->guc->resume_time = 0; 2113 } 2114 2115 /* 2116 * This function is quite complex but only real way to ensure no state is lost 2117 * during VF resume flows. The function scans the queue state, make adjustments 2118 * as needed, and queues jobs / messages which replayed upon unpause. 2119 */ 2120 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2121 { 2122 struct xe_gpu_scheduler *sched = &q->guc->sched; 2123 struct xe_sched_job *job; 2124 int i; 2125 2126 lockdep_assert_held(&guc->submission_state.lock); 2127 2128 /* Stop scheduling + flush any DRM scheduler operations */ 2129 xe_sched_submission_stop(sched); 2130 if (xe_exec_queue_is_lr(q)) 2131 cancel_work_sync(&q->guc->lr_tdr); 2132 else 2133 cancel_delayed_work_sync(&sched->base.work_tdr); 2134 2135 guc_exec_queue_revert_pending_state_change(guc, q); 2136 2137 if (xe_exec_queue_is_parallel(q)) { 2138 struct xe_device *xe = guc_to_xe(guc); 2139 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 2140 2141 /* 2142 * NOP existing WQ commands that may contain stale GGTT 2143 * addresses. These will be replayed upon unpause. The hardware 2144 * seems to get confused if the WQ head/tail pointers are 2145 * adjusted. 2146 */ 2147 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2148 parallel_write(xe, map, wq[i], 2149 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2150 FIELD_PREP(WQ_LEN_MASK, 0)); 2151 } 2152 2153 job = xe_sched_first_pending_job(sched); 2154 if (job) { 2155 /* 2156 * Adjust software tail so jobs submitted overwrite previous 2157 * position in ring buffer with new GGTT addresses. 2158 */ 2159 for (i = 0; i < q->width; ++i) 2160 q->lrc[i]->ring.tail = job->ptrs[i].head; 2161 } 2162 } 2163 2164 /** 2165 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2166 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2167 */ 2168 void xe_guc_submit_pause(struct xe_guc *guc) 2169 { 2170 struct xe_exec_queue *q; 2171 unsigned long index; 2172 2173 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2174 2175 mutex_lock(&guc->submission_state.lock); 2176 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2177 /* Prevent redundant attempts to stop parallel queues */ 2178 if (q->guc->id != index) 2179 continue; 2180 2181 guc_exec_queue_pause(guc, q); 2182 } 2183 mutex_unlock(&guc->submission_state.lock); 2184 } 2185 2186 static void guc_exec_queue_start(struct xe_exec_queue *q) 2187 { 2188 struct xe_gpu_scheduler *sched = &q->guc->sched; 2189 2190 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2191 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2192 int i; 2193 2194 trace_xe_exec_queue_resubmit(q); 2195 if (job) { 2196 for (i = 0; i < q->width; ++i) { 2197 /* 2198 * The GuC context is unregistered at this point 2199 * time, adjusting software ring tail ensures 2200 * jobs are rewritten in original placement, 2201 * adjusting LRC tail ensures the newly loaded 2202 * GuC / contexts only view the LRC tail 2203 * increasing as jobs are written out. 2204 */ 2205 q->lrc[i]->ring.tail = job->ptrs[i].head; 2206 xe_lrc_set_ring_tail(q->lrc[i], 2207 xe_lrc_ring_head(q->lrc[i])); 2208 } 2209 } 2210 xe_sched_resubmit_jobs(sched); 2211 } 2212 2213 xe_sched_submission_start(sched); 2214 xe_sched_submission_resume_tdr(sched); 2215 } 2216 2217 int xe_guc_submit_start(struct xe_guc *guc) 2218 { 2219 struct xe_exec_queue *q; 2220 unsigned long index; 2221 2222 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2223 2224 mutex_lock(&guc->submission_state.lock); 2225 atomic_dec(&guc->submission_state.stopped); 2226 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2227 /* Prevent redundant attempts to start parallel queues */ 2228 if (q->guc->id != index) 2229 continue; 2230 2231 guc_exec_queue_start(q); 2232 } 2233 mutex_unlock(&guc->submission_state.lock); 2234 2235 wake_up_all(&guc->ct.wq); 2236 2237 return 0; 2238 } 2239 2240 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2241 struct xe_exec_queue *q) 2242 { 2243 struct xe_gpu_scheduler *sched = &q->guc->sched; 2244 struct drm_sched_job *s_job; 2245 struct xe_sched_job *job = NULL; 2246 2247 list_for_each_entry(s_job, &sched->base.pending_list, list) { 2248 job = to_xe_sched_job(s_job); 2249 2250 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2251 q->guc->id, xe_sched_job_seqno(job)); 2252 2253 q->ring_ops->emit_job(job); 2254 job->skip_emit = true; 2255 } 2256 2257 if (job) 2258 job->last_replay = true; 2259 } 2260 2261 /** 2262 * xe_guc_submit_unpause_prepare - Prepare unpause submission tasks on given GuC. 2263 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2264 */ 2265 void xe_guc_submit_unpause_prepare(struct xe_guc *guc) 2266 { 2267 struct xe_exec_queue *q; 2268 unsigned long index; 2269 2270 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2271 2272 mutex_lock(&guc->submission_state.lock); 2273 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2274 /* Prevent redundant attempts to stop parallel queues */ 2275 if (q->guc->id != index) 2276 continue; 2277 2278 guc_exec_queue_unpause_prepare(guc, q); 2279 } 2280 mutex_unlock(&guc->submission_state.lock); 2281 } 2282 2283 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2284 { 2285 struct xe_gpu_scheduler *sched = &q->guc->sched; 2286 struct xe_sched_msg *msg; 2287 2288 if (q->guc->needs_cleanup) { 2289 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2290 2291 guc_exec_queue_add_msg(q, msg, CLEANUP); 2292 q->guc->needs_cleanup = false; 2293 } 2294 2295 if (q->guc->needs_suspend) { 2296 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2297 2298 xe_sched_msg_lock(sched); 2299 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2300 xe_sched_msg_unlock(sched); 2301 2302 q->guc->needs_suspend = false; 2303 } 2304 2305 /* 2306 * The resume must be in the message queue before the suspend as it is 2307 * not possible for a resume to be issued if a suspend pending is, but 2308 * the inverse is possible. 2309 */ 2310 if (q->guc->needs_resume) { 2311 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2312 2313 xe_sched_msg_lock(sched); 2314 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2315 xe_sched_msg_unlock(sched); 2316 2317 q->guc->needs_resume = false; 2318 } 2319 } 2320 2321 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2322 { 2323 struct xe_gpu_scheduler *sched = &q->guc->sched; 2324 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2325 2326 lockdep_assert_held(&guc->submission_state.lock); 2327 2328 xe_sched_resubmit_jobs(sched); 2329 guc_exec_queue_replay_pending_state_change(q); 2330 xe_sched_submission_start(sched); 2331 if (needs_tdr) 2332 xe_guc_exec_queue_trigger_cleanup(q); 2333 xe_sched_submission_resume_tdr(sched); 2334 } 2335 2336 /** 2337 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2338 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2339 */ 2340 void xe_guc_submit_unpause(struct xe_guc *guc) 2341 { 2342 struct xe_exec_queue *q; 2343 unsigned long index; 2344 2345 mutex_lock(&guc->submission_state.lock); 2346 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2347 /* 2348 * Prevent redundant attempts to stop parallel queues, or queues 2349 * created after resfix done. 2350 */ 2351 if (q->guc->id != index || 2352 !READ_ONCE(q->guc->sched.base.pause_submit)) 2353 continue; 2354 2355 guc_exec_queue_unpause(guc, q); 2356 } 2357 mutex_unlock(&guc->submission_state.lock); 2358 } 2359 2360 /** 2361 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2362 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2363 */ 2364 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2365 { 2366 struct xe_exec_queue *q; 2367 unsigned long index; 2368 2369 mutex_lock(&guc->submission_state.lock); 2370 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2371 struct xe_gpu_scheduler *sched = &q->guc->sched; 2372 2373 /* Prevent redundant attempts to stop parallel queues */ 2374 if (q->guc->id != index) 2375 continue; 2376 2377 xe_sched_submission_start(sched); 2378 if (exec_queue_killed_or_banned_or_wedged(q)) 2379 xe_guc_exec_queue_trigger_cleanup(q); 2380 } 2381 mutex_unlock(&guc->submission_state.lock); 2382 } 2383 2384 static struct xe_exec_queue * 2385 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2386 { 2387 struct xe_gt *gt = guc_to_gt(guc); 2388 struct xe_exec_queue *q; 2389 2390 if (unlikely(guc_id >= GUC_ID_MAX)) { 2391 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2392 return NULL; 2393 } 2394 2395 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2396 if (unlikely(!q)) { 2397 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2398 return NULL; 2399 } 2400 2401 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2402 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2403 2404 return q; 2405 } 2406 2407 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2408 { 2409 u32 action[] = { 2410 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2411 q->guc->id, 2412 }; 2413 2414 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2415 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2416 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2417 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2418 2419 trace_xe_exec_queue_deregister(q); 2420 2421 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); 2422 } 2423 2424 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2425 u32 runnable_state) 2426 { 2427 trace_xe_exec_queue_scheduling_done(q); 2428 2429 if (runnable_state == 1) { 2430 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2431 2432 q->guc->resume_time = ktime_get(); 2433 clear_exec_queue_pending_resume(q); 2434 clear_exec_queue_pending_tdr_exit(q); 2435 clear_exec_queue_pending_enable(q); 2436 smp_wmb(); 2437 wake_up_all(&guc->ct.wq); 2438 } else { 2439 bool check_timeout = exec_queue_check_timeout(q); 2440 2441 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2442 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2443 2444 if (q->guc->suspend_pending) { 2445 suspend_fence_signal(q); 2446 clear_exec_queue_pending_disable(q); 2447 } else { 2448 if (exec_queue_banned(q) || check_timeout) { 2449 smp_wmb(); 2450 wake_up_all(&guc->ct.wq); 2451 } 2452 if (!check_timeout && exec_queue_destroyed(q)) { 2453 /* 2454 * Make sure to clear the pending_disable only 2455 * after sampling the destroyed state. We want 2456 * to ensure we don't trigger the unregister too 2457 * early with something intending to only 2458 * disable scheduling. The caller doing the 2459 * destroy must wait for an ongoing 2460 * pending_disable before marking as destroyed. 2461 */ 2462 clear_exec_queue_pending_disable(q); 2463 deregister_exec_queue(guc, q); 2464 } else { 2465 clear_exec_queue_pending_disable(q); 2466 } 2467 } 2468 } 2469 } 2470 2471 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2472 { 2473 struct xe_exec_queue *q; 2474 u32 guc_id, runnable_state; 2475 2476 if (unlikely(len < 2)) 2477 return -EPROTO; 2478 2479 guc_id = msg[0]; 2480 runnable_state = msg[1]; 2481 2482 q = g2h_exec_queue_lookup(guc, guc_id); 2483 if (unlikely(!q)) 2484 return -EPROTO; 2485 2486 if (unlikely(!exec_queue_pending_enable(q) && 2487 !exec_queue_pending_disable(q))) { 2488 xe_gt_err(guc_to_gt(guc), 2489 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2490 atomic_read(&q->guc->state), q->guc->id, 2491 runnable_state); 2492 return -EPROTO; 2493 } 2494 2495 handle_sched_done(guc, q, runnable_state); 2496 2497 return 0; 2498 } 2499 2500 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2501 { 2502 trace_xe_exec_queue_deregister_done(q); 2503 2504 clear_exec_queue_registered(q); 2505 2506 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 2507 xe_exec_queue_put(q); 2508 else 2509 __guc_exec_queue_destroy(guc, q); 2510 } 2511 2512 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2513 { 2514 struct xe_exec_queue *q; 2515 u32 guc_id; 2516 2517 if (unlikely(len < 1)) 2518 return -EPROTO; 2519 2520 guc_id = msg[0]; 2521 2522 q = g2h_exec_queue_lookup(guc, guc_id); 2523 if (unlikely(!q)) 2524 return -EPROTO; 2525 2526 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2527 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2528 xe_gt_err(guc_to_gt(guc), 2529 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2530 atomic_read(&q->guc->state), q->guc->id); 2531 return -EPROTO; 2532 } 2533 2534 handle_deregister_done(guc, q); 2535 2536 return 0; 2537 } 2538 2539 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2540 { 2541 struct xe_gt *gt = guc_to_gt(guc); 2542 struct xe_exec_queue *q; 2543 u32 guc_id; 2544 2545 if (unlikely(len < 1)) 2546 return -EPROTO; 2547 2548 guc_id = msg[0]; 2549 2550 q = g2h_exec_queue_lookup(guc, guc_id); 2551 if (unlikely(!q)) 2552 return -EPROTO; 2553 2554 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d", 2555 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2556 2557 trace_xe_exec_queue_reset(q); 2558 2559 /* 2560 * A banned engine is a NOP at this point (came from 2561 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2562 * jobs by setting timeout of the job to the minimum value kicking 2563 * guc_exec_queue_timedout_job. 2564 */ 2565 set_exec_queue_reset(q); 2566 if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) 2567 xe_guc_exec_queue_trigger_cleanup(q); 2568 2569 return 0; 2570 } 2571 2572 /* 2573 * xe_guc_error_capture_handler - Handler of GuC captured message 2574 * @guc: The GuC object 2575 * @msg: Point to the message 2576 * @len: The message length 2577 * 2578 * When GuC captured data is ready, GuC will send message 2579 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2580 * called 1st to check status before process the data comes with the message. 2581 * 2582 * Returns: error code. 0 if success 2583 */ 2584 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 2585 { 2586 u32 status; 2587 2588 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2589 return -EPROTO; 2590 2591 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2592 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 2593 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 2594 2595 xe_guc_capture_process(guc); 2596 2597 return 0; 2598 } 2599 2600 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 2601 u32 len) 2602 { 2603 struct xe_gt *gt = guc_to_gt(guc); 2604 struct xe_exec_queue *q; 2605 u32 guc_id; 2606 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 2607 2608 if (unlikely(!len || len > 2)) 2609 return -EPROTO; 2610 2611 guc_id = msg[0]; 2612 2613 if (len == 2) 2614 type = msg[1]; 2615 2616 if (guc_id == GUC_ID_UNKNOWN) { 2617 /* 2618 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 2619 * context. In such case only PF will be notified about that fault. 2620 */ 2621 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 2622 return 0; 2623 } 2624 2625 q = g2h_exec_queue_lookup(guc, guc_id); 2626 if (unlikely(!q)) 2627 return -EPROTO; 2628 2629 /* 2630 * The type is HW-defined and changes based on platform, so we don't 2631 * decode it in the kernel and only check if it is valid. 2632 * See bspec 54047 and 72187 for details. 2633 */ 2634 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 2635 xe_gt_dbg(gt, 2636 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 2637 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2638 else 2639 xe_gt_dbg(gt, 2640 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 2641 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2642 2643 trace_xe_exec_queue_memory_cat_error(q); 2644 2645 /* Treat the same as engine reset */ 2646 set_exec_queue_reset(q); 2647 if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) 2648 xe_guc_exec_queue_trigger_cleanup(q); 2649 2650 return 0; 2651 } 2652 2653 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 2654 { 2655 struct xe_gt *gt = guc_to_gt(guc); 2656 u8 guc_class, instance; 2657 u32 reason; 2658 2659 if (unlikely(len != 3)) 2660 return -EPROTO; 2661 2662 guc_class = msg[0]; 2663 instance = msg[1]; 2664 reason = msg[2]; 2665 2666 /* Unexpected failure of a hardware feature, log an actual error */ 2667 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 2668 guc_class, instance, reason); 2669 2670 xe_gt_reset_async(gt); 2671 2672 return 0; 2673 } 2674 2675 static void 2676 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 2677 struct xe_guc_submit_exec_queue_snapshot *snapshot) 2678 { 2679 struct xe_guc *guc = exec_queue_to_guc(q); 2680 struct xe_device *xe = guc_to_xe(guc); 2681 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 2682 int i; 2683 2684 snapshot->guc.wqi_head = q->guc->wqi_head; 2685 snapshot->guc.wqi_tail = q->guc->wqi_tail; 2686 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 2687 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 2688 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 2689 wq_desc.wq_status); 2690 2691 if (snapshot->parallel.wq_desc.head != 2692 snapshot->parallel.wq_desc.tail) { 2693 for (i = snapshot->parallel.wq_desc.head; 2694 i != snapshot->parallel.wq_desc.tail; 2695 i = (i + sizeof(u32)) % WQ_SIZE) 2696 snapshot->parallel.wq[i / sizeof(u32)] = 2697 parallel_read(xe, map, wq[i / sizeof(u32)]); 2698 } 2699 } 2700 2701 static void 2702 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 2703 struct drm_printer *p) 2704 { 2705 int i; 2706 2707 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 2708 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 2709 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 2710 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 2711 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 2712 2713 if (snapshot->parallel.wq_desc.head != 2714 snapshot->parallel.wq_desc.tail) { 2715 for (i = snapshot->parallel.wq_desc.head; 2716 i != snapshot->parallel.wq_desc.tail; 2717 i = (i + sizeof(u32)) % WQ_SIZE) 2718 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 2719 snapshot->parallel.wq[i / sizeof(u32)]); 2720 } 2721 } 2722 2723 /** 2724 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 2725 * @q: faulty exec queue 2726 * 2727 * This can be printed out in a later stage like during dev_coredump 2728 * analysis. 2729 * 2730 * Returns: a GuC Submit Engine snapshot object that must be freed by the 2731 * caller, using `xe_guc_exec_queue_snapshot_free`. 2732 */ 2733 struct xe_guc_submit_exec_queue_snapshot * 2734 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 2735 { 2736 struct xe_gpu_scheduler *sched = &q->guc->sched; 2737 struct xe_guc_submit_exec_queue_snapshot *snapshot; 2738 int i; 2739 2740 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 2741 2742 if (!snapshot) 2743 return NULL; 2744 2745 snapshot->guc.id = q->guc->id; 2746 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 2747 snapshot->class = q->class; 2748 snapshot->logical_mask = q->logical_mask; 2749 snapshot->width = q->width; 2750 snapshot->refcount = kref_read(&q->refcount); 2751 snapshot->sched_timeout = sched->base.timeout; 2752 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 2753 snapshot->sched_props.preempt_timeout_us = 2754 q->sched_props.preempt_timeout_us; 2755 2756 snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *), 2757 GFP_ATOMIC); 2758 2759 if (snapshot->lrc) { 2760 for (i = 0; i < q->width; ++i) { 2761 struct xe_lrc *lrc = q->lrc[i]; 2762 2763 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 2764 } 2765 } 2766 2767 snapshot->schedule_state = atomic_read(&q->guc->state); 2768 snapshot->exec_queue_flags = q->flags; 2769 2770 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 2771 if (snapshot->parallel_execution) 2772 guc_exec_queue_wq_snapshot_capture(q, snapshot); 2773 2774 spin_lock(&sched->base.job_list_lock); 2775 snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); 2776 snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, 2777 sizeof(struct pending_list_snapshot), 2778 GFP_ATOMIC); 2779 2780 if (snapshot->pending_list) { 2781 struct xe_sched_job *job_iter; 2782 2783 i = 0; 2784 list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { 2785 snapshot->pending_list[i].seqno = 2786 xe_sched_job_seqno(job_iter); 2787 snapshot->pending_list[i].fence = 2788 dma_fence_is_signaled(job_iter->fence) ? 1 : 0; 2789 snapshot->pending_list[i].finished = 2790 dma_fence_is_signaled(&job_iter->drm.s_fence->finished) 2791 ? 1 : 0; 2792 i++; 2793 } 2794 } 2795 2796 spin_unlock(&sched->base.job_list_lock); 2797 2798 return snapshot; 2799 } 2800 2801 /** 2802 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 2803 * @snapshot: Previously captured snapshot of job. 2804 * 2805 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 2806 */ 2807 void 2808 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 2809 { 2810 int i; 2811 2812 if (!snapshot || !snapshot->lrc) 2813 return; 2814 2815 for (i = 0; i < snapshot->width; ++i) 2816 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 2817 } 2818 2819 /** 2820 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 2821 * @snapshot: GuC Submit Engine snapshot object. 2822 * @p: drm_printer where it will be printed out. 2823 * 2824 * This function prints out a given GuC Submit Engine snapshot object. 2825 */ 2826 void 2827 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 2828 struct drm_printer *p) 2829 { 2830 int i; 2831 2832 if (!snapshot) 2833 return; 2834 2835 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 2836 drm_printf(p, "\tName: %s\n", snapshot->name); 2837 drm_printf(p, "\tClass: %d\n", snapshot->class); 2838 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 2839 drm_printf(p, "\tWidth: %d\n", snapshot->width); 2840 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 2841 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 2842 drm_printf(p, "\tTimeslice: %u (us)\n", 2843 snapshot->sched_props.timeslice_us); 2844 drm_printf(p, "\tPreempt timeout: %u (us)\n", 2845 snapshot->sched_props.preempt_timeout_us); 2846 2847 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 2848 xe_lrc_snapshot_print(snapshot->lrc[i], p); 2849 2850 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 2851 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 2852 2853 if (snapshot->parallel_execution) 2854 guc_exec_queue_wq_snapshot_print(snapshot, p); 2855 2856 for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; 2857 i++) 2858 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", 2859 snapshot->pending_list[i].seqno, 2860 snapshot->pending_list[i].fence, 2861 snapshot->pending_list[i].finished); 2862 } 2863 2864 /** 2865 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 2866 * snapshot. 2867 * @snapshot: GuC Submit Engine snapshot object. 2868 * 2869 * This function free all the memory that needed to be allocated at capture 2870 * time. 2871 */ 2872 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 2873 { 2874 int i; 2875 2876 if (!snapshot) 2877 return; 2878 2879 if (snapshot->lrc) { 2880 for (i = 0; i < snapshot->width; i++) 2881 xe_lrc_snapshot_free(snapshot->lrc[i]); 2882 kfree(snapshot->lrc); 2883 } 2884 kfree(snapshot->pending_list); 2885 kfree(snapshot); 2886 } 2887 2888 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 2889 { 2890 struct xe_guc_submit_exec_queue_snapshot *snapshot; 2891 2892 snapshot = xe_guc_exec_queue_snapshot_capture(q); 2893 xe_guc_exec_queue_snapshot_print(snapshot, p); 2894 xe_guc_exec_queue_snapshot_free(snapshot); 2895 } 2896 2897 /** 2898 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 2899 * @q: Execution queue 2900 * @ctx_type: Type of the context 2901 * 2902 * This function registers the execution queue with the guc. Special context 2903 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 2904 * are only applicable for IGPU and in the VF. 2905 * Submits the execution queue to GUC after registering it. 2906 * 2907 * Returns - None. 2908 */ 2909 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 2910 { 2911 struct xe_guc *guc = exec_queue_to_guc(q); 2912 struct xe_device *xe = guc_to_xe(guc); 2913 struct xe_gt *gt = guc_to_gt(guc); 2914 2915 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 2916 xe_gt_assert(gt, !IS_DGFX(xe)); 2917 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 2918 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 2919 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 2920 2921 register_exec_queue(q, ctx_type); 2922 enable_scheduling(q); 2923 } 2924 2925 /** 2926 * xe_guc_submit_print - GuC Submit Print. 2927 * @guc: GuC. 2928 * @p: drm_printer where it will be printed out. 2929 * 2930 * This function capture and prints snapshots of **all** GuC Engines. 2931 */ 2932 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 2933 { 2934 struct xe_exec_queue *q; 2935 unsigned long index; 2936 2937 if (!xe_device_uc_enabled(guc_to_xe(guc))) 2938 return; 2939 2940 mutex_lock(&guc->submission_state.lock); 2941 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2942 guc_exec_queue_print(q, p); 2943 mutex_unlock(&guc->submission_state.lock); 2944 } 2945 2946 /** 2947 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 2948 * exec queues registered to given GuC. 2949 * @guc: the &xe_guc struct instance 2950 * @scratch: scratch buffer to be used as temporary storage 2951 * 2952 * Returns: zero on success, negative error code on failure. 2953 */ 2954 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 2955 { 2956 struct xe_exec_queue *q; 2957 unsigned long index; 2958 int err = 0; 2959 2960 mutex_lock(&guc->submission_state.lock); 2961 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2962 /* Prevent redundant attempts to stop parallel queues */ 2963 if (q->guc->id != index) 2964 continue; 2965 2966 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 2967 if (err) 2968 break; 2969 } 2970 mutex_unlock(&guc->submission_state.lock); 2971 2972 return err; 2973 } 2974