1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/dma-fence-array.h> 12 13 #include <drm/drm_managed.h> 14 15 #include "abi/guc_actions_abi.h" 16 #include "abi/guc_actions_slpc_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "xe_assert.h" 19 #include "xe_bo.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_gt_clock.h" 27 #include "xe_gt_printk.h" 28 #include "xe_guc.h" 29 #include "xe_guc_capture.h" 30 #include "xe_guc_ct.h" 31 #include "xe_guc_exec_queue_types.h" 32 #include "xe_guc_id_mgr.h" 33 #include "xe_guc_klv_helpers.h" 34 #include "xe_guc_submit_types.h" 35 #include "xe_hw_engine.h" 36 #include "xe_lrc.h" 37 #include "xe_macros.h" 38 #include "xe_map.h" 39 #include "xe_mocs.h" 40 #include "xe_pm.h" 41 #include "xe_ring_ops_types.h" 42 #include "xe_sched_job.h" 43 #include "xe_sleep.h" 44 #include "xe_trace.h" 45 #include "xe_uc_fw.h" 46 #include "xe_vm.h" 47 48 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 49 50 static int guc_submit_reset_prepare(struct xe_guc *guc); 51 52 static struct xe_guc * 53 exec_queue_to_guc(struct xe_exec_queue *q) 54 { 55 return &q->gt->uc.guc; 56 } 57 58 /* 59 * Helpers for engine state, using an atomic as some of the bits can transition 60 * as the same time (e.g. a suspend can be happning at the same time as schedule 61 * engine done being processed). 62 */ 63 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 64 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 65 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 66 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 67 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 68 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 69 #define EXEC_QUEUE_STATE_RESET (1 << 6) 70 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 71 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 72 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 73 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) 74 #define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND (1 << 11) 75 76 static bool exec_queue_registered(struct xe_exec_queue *q) 77 { 78 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 79 } 80 81 static void set_exec_queue_registered(struct xe_exec_queue *q) 82 { 83 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 84 } 85 86 static void clear_exec_queue_registered(struct xe_exec_queue *q) 87 { 88 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 89 } 90 91 static bool exec_queue_enabled(struct xe_exec_queue *q) 92 { 93 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 94 } 95 96 static void set_exec_queue_enabled(struct xe_exec_queue *q) 97 { 98 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 99 } 100 101 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 102 { 103 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 104 } 105 106 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 107 { 108 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 109 } 110 111 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 112 { 113 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 114 } 115 116 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 117 { 118 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 119 } 120 121 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 122 { 123 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 124 } 125 126 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 127 { 128 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 129 } 130 131 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 132 { 133 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 134 } 135 136 static bool exec_queue_destroyed(struct xe_exec_queue *q) 137 { 138 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 139 } 140 141 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 142 { 143 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 144 } 145 146 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 147 { 148 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 149 } 150 151 static bool exec_queue_banned(struct xe_exec_queue *q) 152 { 153 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 154 } 155 156 static void set_exec_queue_banned(struct xe_exec_queue *q) 157 { 158 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 159 } 160 161 static bool exec_queue_suspended(struct xe_exec_queue *q) 162 { 163 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 164 } 165 166 static void set_exec_queue_suspended(struct xe_exec_queue *q) 167 { 168 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 169 } 170 171 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 172 { 173 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 174 } 175 176 static bool exec_queue_reset(struct xe_exec_queue *q) 177 { 178 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 179 } 180 181 static void set_exec_queue_reset(struct xe_exec_queue *q) 182 { 183 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 184 } 185 186 static bool exec_queue_killed(struct xe_exec_queue *q) 187 { 188 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 189 } 190 191 static void set_exec_queue_killed(struct xe_exec_queue *q) 192 { 193 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 194 } 195 196 static bool exec_queue_wedged(struct xe_exec_queue *q) 197 { 198 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 199 } 200 201 static void set_exec_queue_wedged(struct xe_exec_queue *q) 202 { 203 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 204 } 205 206 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 207 { 208 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 209 } 210 211 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 212 { 213 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 214 } 215 216 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 217 { 218 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 219 } 220 221 static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 222 { 223 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND; 224 } 225 226 static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 227 { 228 atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 229 } 230 231 static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 232 { 233 atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 234 } 235 236 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 237 { 238 return (atomic_read(&q->guc->state) & 239 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 240 EXEC_QUEUE_STATE_BANNED)); 241 } 242 243 static void guc_submit_sw_fini(struct drm_device *drm, void *arg) 244 { 245 struct xe_guc *guc = arg; 246 struct xe_device *xe = guc_to_xe(guc); 247 struct xe_gt *gt = guc_to_gt(guc); 248 int ret; 249 250 ret = wait_event_timeout(guc->submission_state.fini_wq, 251 xa_empty(&guc->submission_state.exec_queue_lookup), 252 HZ * 5); 253 254 drain_workqueue(xe->destroy_wq); 255 256 xe_gt_assert(gt, ret); 257 258 xa_destroy(&guc->submission_state.exec_queue_lookup); 259 } 260 261 static void guc_submit_fini(void *arg) 262 { 263 struct xe_guc *guc = arg; 264 struct xe_exec_queue *q; 265 unsigned long index; 266 267 /* Drop any wedged queue refs */ 268 mutex_lock(&guc->submission_state.lock); 269 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 270 if (exec_queue_wedged(q)) { 271 mutex_unlock(&guc->submission_state.lock); 272 xe_exec_queue_put(q); 273 mutex_lock(&guc->submission_state.lock); 274 } 275 } 276 mutex_unlock(&guc->submission_state.lock); 277 278 /* Forcefully kill any remaining exec queues */ 279 xe_guc_ct_stop(&guc->ct); 280 guc_submit_reset_prepare(guc); 281 xe_guc_softreset(guc); 282 xe_guc_submit_stop(guc); 283 xe_uc_fw_sanitize(&guc->fw); 284 xe_guc_submit_pause_abort(guc); 285 } 286 287 static const struct xe_exec_queue_ops guc_exec_queue_ops; 288 289 static void primelockdep(struct xe_guc *guc) 290 { 291 if (!IS_ENABLED(CONFIG_LOCKDEP)) 292 return; 293 294 fs_reclaim_acquire(GFP_KERNEL); 295 296 mutex_lock(&guc->submission_state.lock); 297 mutex_unlock(&guc->submission_state.lock); 298 299 fs_reclaim_release(GFP_KERNEL); 300 } 301 302 /** 303 * xe_guc_submit_init() - Initialize GuC submission. 304 * @guc: the &xe_guc to initialize 305 * @num_ids: number of GuC context IDs to use 306 * 307 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 308 * GuC context IDs supported by the GuC firmware should be used for submission. 309 * 310 * Only VF drivers will have to provide explicit number of GuC context IDs 311 * that they can use for submission. 312 * 313 * Return: 0 on success or a negative error code on failure. 314 */ 315 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 316 { 317 struct xe_device *xe = guc_to_xe(guc); 318 struct xe_gt *gt = guc_to_gt(guc); 319 int err; 320 321 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 322 if (err) 323 return err; 324 325 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 326 if (err) 327 return err; 328 329 gt->exec_queue_ops = &guc_exec_queue_ops; 330 331 xa_init(&guc->submission_state.exec_queue_lookup); 332 333 init_waitqueue_head(&guc->submission_state.fini_wq); 334 335 primelockdep(guc); 336 337 guc->submission_state.initialized = true; 338 339 err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); 340 if (err) 341 return err; 342 343 return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); 344 } 345 346 /* 347 * Given that we want to guarantee enough RCS throughput to avoid missing 348 * frames, we set the yield policy to 20% of each 80ms interval. 349 */ 350 #define RC_YIELD_DURATION 80 /* in ms */ 351 #define RC_YIELD_RATIO 20 /* in percent */ 352 static u32 *emit_render_compute_yield_klv(u32 *emit) 353 { 354 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 355 *emit++ = RC_YIELD_DURATION; 356 *emit++ = RC_YIELD_RATIO; 357 358 return emit; 359 } 360 361 #define SCHEDULING_POLICY_MAX_DWORDS 16 362 static int guc_init_global_schedule_policy(struct xe_guc *guc) 363 { 364 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 365 u32 *emit = data; 366 u32 count = 0; 367 int ret; 368 369 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 370 return 0; 371 372 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 373 374 if (CCS_INSTANCES(guc_to_gt(guc))) 375 emit = emit_render_compute_yield_klv(emit); 376 377 count = emit - data; 378 if (count > 1) { 379 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 380 381 ret = xe_guc_ct_send_block(&guc->ct, data, count); 382 if (ret < 0) { 383 xe_gt_err(guc_to_gt(guc), 384 "failed to enable GuC scheduling policies: %pe\n", 385 ERR_PTR(ret)); 386 return ret; 387 } 388 } 389 390 return 0; 391 } 392 393 int xe_guc_submit_enable(struct xe_guc *guc) 394 { 395 int ret; 396 397 ret = guc_init_global_schedule_policy(guc); 398 if (ret) 399 return ret; 400 401 guc->submission_state.enabled = true; 402 403 return 0; 404 } 405 406 void xe_guc_submit_disable(struct xe_guc *guc) 407 { 408 guc->submission_state.enabled = false; 409 } 410 411 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 412 { 413 int i; 414 415 lockdep_assert_held(&guc->submission_state.lock); 416 417 for (i = 0; i < xa_count; ++i) 418 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 419 420 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 421 q->guc->id, q->width); 422 423 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 424 wake_up(&guc->submission_state.fini_wq); 425 } 426 427 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 428 { 429 int ret; 430 int i; 431 432 /* 433 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 434 * worse case user gets -ENOMEM on engine create and has to try again. 435 * 436 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 437 * failure. 438 */ 439 lockdep_assert_held(&guc->submission_state.lock); 440 441 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 442 q->width); 443 if (ret < 0) 444 return ret; 445 446 q->guc->id = ret; 447 448 for (i = 0; i < q->width; ++i) { 449 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 450 q->guc->id + i, q, GFP_NOWAIT)); 451 if (ret) 452 goto err_release; 453 } 454 455 return 0; 456 457 err_release: 458 __release_guc_id(guc, q, i); 459 460 return ret; 461 } 462 463 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 464 { 465 mutex_lock(&guc->submission_state.lock); 466 __release_guc_id(guc, q, q->width); 467 mutex_unlock(&guc->submission_state.lock); 468 } 469 470 struct exec_queue_policy { 471 u32 count; 472 struct guc_update_exec_queue_policy h2g; 473 }; 474 475 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 476 { 477 size_t bytes = sizeof(policy->h2g.header) + 478 (sizeof(policy->h2g.klv[0]) * policy->count); 479 480 return bytes / sizeof(u32); 481 } 482 483 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 484 u16 guc_id) 485 { 486 policy->h2g.header.action = 487 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 488 policy->h2g.header.guc_id = guc_id; 489 policy->count = 0; 490 } 491 492 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 493 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 494 u32 data) \ 495 { \ 496 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 497 \ 498 policy->h2g.klv[policy->count].kl = \ 499 FIELD_PREP(GUC_KLV_0_KEY, \ 500 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 501 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 502 policy->h2g.klv[policy->count].value = data; \ 503 policy->count++; \ 504 } 505 506 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 507 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 508 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 509 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 510 #undef MAKE_EXEC_QUEUE_POLICY_ADD 511 512 static const int xe_exec_queue_prio_to_guc[] = { 513 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 514 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 515 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 516 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 517 }; 518 519 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 520 { 521 struct exec_queue_policy policy; 522 enum xe_exec_queue_priority prio = q->sched_props.priority; 523 u32 timeslice_us = q->sched_props.timeslice_us; 524 u32 slpc_exec_queue_freq_req = 0; 525 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 526 527 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 528 !xe_exec_queue_is_multi_queue_secondary(q)); 529 530 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 531 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 532 533 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 534 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 535 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 536 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 537 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 538 slpc_exec_queue_freq_req); 539 540 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 541 __guc_exec_queue_policy_action_size(&policy), 0, 0); 542 } 543 544 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 545 { 546 struct exec_queue_policy policy; 547 548 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 549 550 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 551 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 552 553 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 554 __guc_exec_queue_policy_action_size(&policy), 0, 0); 555 } 556 557 static bool vf_recovery(struct xe_guc *guc) 558 { 559 return xe_gt_recovery_pending(guc_to_gt(guc)); 560 } 561 562 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 563 { 564 struct xe_guc *guc = exec_queue_to_guc(q); 565 struct xe_device *xe = guc_to_xe(guc); 566 567 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 568 wake_up_all(&xe->ufence_wq); 569 570 xe_sched_tdr_queue_imm(&q->guc->sched); 571 } 572 573 static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q) 574 { 575 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 576 struct xe_exec_queue_group *group = q->multi_queue.group; 577 struct xe_exec_queue *eq, *next; 578 LIST_HEAD(tmp); 579 580 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 581 xe_exec_queue_is_multi_queue(q)); 582 583 mutex_lock(&group->list_lock); 584 585 /* 586 * Stop all future queues being from executing while group is stopped. 587 */ 588 group->stopped = true; 589 590 list_for_each_entry_safe(eq, next, &group->list, multi_queue.link) 591 /* 592 * Refcount prevents an attempted removal from &group->list, 593 * temporary list allows safe iteration after dropping 594 * &group->list_lock. 595 */ 596 if (xe_exec_queue_get_unless_zero(eq)) 597 list_move_tail(&eq->multi_queue.link, &tmp); 598 599 mutex_unlock(&group->list_lock); 600 601 /* We cannot stop under list lock without getting inversions */ 602 xe_sched_submission_stop(&primary->guc->sched); 603 list_for_each_entry(eq, &tmp, multi_queue.link) 604 xe_sched_submission_stop(&eq->guc->sched); 605 606 mutex_lock(&group->list_lock); 607 list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) { 608 /* 609 * Corner where we got banned while stopping and not on 610 * &group->list 611 */ 612 if (READ_ONCE(group->banned)) 613 xe_guc_exec_queue_trigger_cleanup(eq); 614 615 list_move_tail(&eq->multi_queue.link, &group->list); 616 xe_exec_queue_put(eq); 617 } 618 mutex_unlock(&group->list_lock); 619 } 620 621 static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q) 622 { 623 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 624 struct xe_exec_queue_group *group = q->multi_queue.group; 625 struct xe_exec_queue *eq; 626 627 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 628 xe_exec_queue_is_multi_queue(q)); 629 630 xe_sched_submission_start(&primary->guc->sched); 631 632 mutex_lock(&group->list_lock); 633 group->stopped = false; 634 list_for_each_entry(eq, &group->list, multi_queue.link) 635 xe_sched_submission_start(&eq->guc->sched); 636 mutex_unlock(&group->list_lock); 637 } 638 639 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) 640 { 641 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 642 struct xe_exec_queue_group *group = q->multi_queue.group; 643 struct xe_exec_queue *eq; 644 645 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 646 xe_exec_queue_is_multi_queue(q)); 647 648 /* Group banned, skip timeout check in TDR */ 649 WRITE_ONCE(group->banned, true); 650 xe_guc_exec_queue_trigger_cleanup(primary); 651 652 mutex_lock(&group->list_lock); 653 list_for_each_entry(eq, &group->list, multi_queue.link) 654 xe_guc_exec_queue_trigger_cleanup(eq); 655 mutex_unlock(&group->list_lock); 656 } 657 658 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 659 { 660 if (xe_exec_queue_is_multi_queue(q)) { 661 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 662 struct xe_exec_queue_group *group = q->multi_queue.group; 663 struct xe_exec_queue *eq; 664 665 /* Group banned, skip timeout check in TDR */ 666 WRITE_ONCE(group->banned, true); 667 668 set_exec_queue_reset(primary); 669 if (!exec_queue_banned(primary)) 670 xe_guc_exec_queue_trigger_cleanup(primary); 671 672 mutex_lock(&group->list_lock); 673 list_for_each_entry(eq, &group->list, multi_queue.link) { 674 set_exec_queue_reset(eq); 675 if (!exec_queue_banned(eq)) 676 xe_guc_exec_queue_trigger_cleanup(eq); 677 } 678 mutex_unlock(&group->list_lock); 679 } else { 680 set_exec_queue_reset(q); 681 if (!exec_queue_banned(q)) 682 xe_guc_exec_queue_trigger_cleanup(q); 683 } 684 } 685 686 static void set_exec_queue_group_banned(struct xe_exec_queue *q) 687 { 688 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 689 struct xe_exec_queue_group *group = q->multi_queue.group; 690 struct xe_exec_queue *eq; 691 692 /* Ban all queues of the multi-queue group */ 693 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 694 xe_exec_queue_is_multi_queue(q)); 695 set_exec_queue_banned(primary); 696 697 mutex_lock(&group->list_lock); 698 list_for_each_entry(eq, &group->list, multi_queue.link) 699 set_exec_queue_banned(eq); 700 mutex_unlock(&group->list_lock); 701 } 702 703 /* Helper for context registration H2G */ 704 struct guc_ctxt_registration_info { 705 u32 flags; 706 u32 context_idx; 707 u32 engine_class; 708 u32 engine_submit_mask; 709 u32 wq_desc_lo; 710 u32 wq_desc_hi; 711 u32 wq_base_lo; 712 u32 wq_base_hi; 713 u32 wq_size; 714 u32 cgp_lo; 715 u32 cgp_hi; 716 u32 hwlrca_lo; 717 u32 hwlrca_hi; 718 }; 719 720 #define parallel_read(xe_, map_, field_) \ 721 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 722 field_) 723 #define parallel_write(xe_, map_, field_, val_) \ 724 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 725 field_, val_) 726 727 /** 728 * DOC: Multi Queue Group GuC interface 729 * 730 * The multi queue group coordination between KMD and GuC is through a software 731 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page 732 * allocated in the global GTT. 733 * 734 * CGP format: 735 * 736 * +-----------+---------------------------+---------------------------------------------+ 737 * | DWORD | Name | Description | 738 * +-----------+---------------------------+---------------------------------------------+ 739 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | 740 * +-----------+---------------------------+---------------------------------------------+ 741 * | 1..15 | RESERVED | MBZ | 742 * +-----------+---------------------------+---------------------------------------------+ 743 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | 744 * +-----------+---------------------------+---------------------------------------------+ 745 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | 746 * +-----------+---------------------------+---------------------------------------------+ 747 * | 18..31 | RESERVED | MBZ | 748 * +-----------+---------------------------+---------------------------------------------+ 749 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | 750 * +-----------+---------------------------+---------------------------------------------+ 751 * | 33 | Q0ContextIndex | Context ID for Queue 0 | 752 * +-----------+---------------------------+---------------------------------------------+ 753 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | 754 * +-----------+---------------------------+---------------------------------------------+ 755 * | 35 | Q1ContextIndex | Context ID for Queue 1 | 756 * +-----------+---------------------------+---------------------------------------------+ 757 * | ... |... | ... | 758 * +-----------+---------------------------+---------------------------------------------+ 759 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | 760 * +-----------+---------------------------+---------------------------------------------+ 761 * | 159 | Q63ContextIndex | Context ID for Queue 63 | 762 * +-----------+---------------------------+---------------------------------------------+ 763 * | 160..1024 | RESERVED | MBZ | 764 * +-----------+---------------------------+---------------------------------------------+ 765 * 766 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified 767 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies 768 * the CGP address. When the secondary queues are added to the group, the CGP is 769 * updated with entry for that queue and GuC is notified through the H2G interface 770 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages 771 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also 772 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any 773 * error in the CGP. Only one of these CGP update messages can be outstanding 774 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* 775 * fields indicate which queue entry is being updated in the CGP. 776 * 777 * The primary queue (Q0) represents the multi queue group context in GuC and 778 * submission on any queue of the group must be through Q0 GuC interface only. 779 * 780 * As it is not required to register secondary queues with GuC, the secondary queue 781 * context ids in the CGP are populated with Q0 context id. 782 */ 783 784 #define CGP_VERSION_MAJOR_SHIFT 8 785 786 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 787 struct xe_exec_queue *q) 788 { 789 struct xe_exec_queue_group *group = q->multi_queue.group; 790 u32 guc_id = group->primary->guc->id; 791 792 /* Currently implementing CGP version 1.0 */ 793 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 794 1 << CGP_VERSION_MAJOR_SHIFT); 795 796 xe_map_wr(xe, &group->cgp_bo->vmap, 797 (32 + q->multi_queue.pos * 2) * sizeof(u32), 798 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 799 800 xe_map_wr(xe, &group->cgp_bo->vmap, 801 (33 + q->multi_queue.pos * 2) * sizeof(u32), 802 u32, guc_id); 803 804 if (q->multi_queue.pos / 32) { 805 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 806 u32, BIT(q->multi_queue.pos % 32)); 807 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 808 } else { 809 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 810 u32, BIT(q->multi_queue.pos)); 811 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 812 } 813 } 814 815 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 816 struct xe_exec_queue *q, 817 const u32 *action, u32 len) 818 { 819 struct xe_exec_queue_group *group = q->multi_queue.group; 820 struct xe_device *xe = guc_to_xe(guc); 821 enum xe_multi_queue_priority priority; 822 long ret; 823 824 /* 825 * As all queues of a multi queue group use single drm scheduler 826 * submit workqueue, CGP synchronization with GuC are serialized. 827 * Hence, no locking is required here. 828 * Wait for any pending CGP_SYNC_DONE response before updating the 829 * CGP page and sending CGP_SYNC message. 830 * 831 * FIXME: Support VF migration 832 */ 833 ret = wait_event_timeout(guc->ct.wq, 834 !READ_ONCE(group->sync_pending) || 835 xe_guc_read_stopped(guc), HZ); 836 if (!ret || xe_guc_read_stopped(guc)) { 837 /* CGP_SYNC failed. Reset gt, cleanup the group */ 838 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 839 set_exec_queue_group_banned(q); 840 xe_gt_reset_async(q->gt); 841 xe_guc_exec_queue_group_trigger_cleanup(q); 842 return; 843 } 844 845 scoped_guard(spinlock, &q->multi_queue.lock) 846 priority = q->multi_queue.priority; 847 848 xe_lrc_set_multi_queue_priority(q->lrc[0], priority); 849 xe_guc_exec_queue_group_cgp_update(xe, q); 850 851 WRITE_ONCE(group->sync_pending, true); 852 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 853 } 854 855 static void __register_exec_queue_group(struct xe_guc *guc, 856 struct xe_exec_queue *q, 857 struct guc_ctxt_registration_info *info) 858 { 859 #define MAX_MULTI_QUEUE_REG_SIZE (8) 860 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 861 int len = 0; 862 863 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 864 action[len++] = info->flags; 865 action[len++] = info->context_idx; 866 action[len++] = info->engine_class; 867 action[len++] = info->engine_submit_mask; 868 action[len++] = 0; /* Reserved */ 869 action[len++] = info->cgp_lo; 870 action[len++] = info->cgp_hi; 871 872 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 873 #undef MAX_MULTI_QUEUE_REG_SIZE 874 875 /* 876 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 877 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 878 * from guc. 879 */ 880 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 881 } 882 883 static void xe_guc_exec_queue_group_add(struct xe_guc *guc, 884 struct xe_exec_queue *q) 885 { 886 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 887 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 888 int len = 0; 889 890 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); 891 892 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 893 action[len++] = q->multi_queue.group->primary->guc->id; 894 895 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 896 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 897 898 /* 899 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a 900 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 901 * from guc. 902 */ 903 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 904 } 905 906 static void __register_mlrc_exec_queue(struct xe_guc *guc, 907 struct xe_exec_queue *q, 908 struct guc_ctxt_registration_info *info) 909 { 910 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 911 u32 action[MAX_MLRC_REG_SIZE]; 912 int len = 0; 913 int i; 914 915 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 916 917 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 918 action[len++] = info->flags; 919 action[len++] = info->context_idx; 920 action[len++] = info->engine_class; 921 action[len++] = info->engine_submit_mask; 922 action[len++] = info->wq_desc_lo; 923 action[len++] = info->wq_desc_hi; 924 action[len++] = info->wq_base_lo; 925 action[len++] = info->wq_base_hi; 926 action[len++] = info->wq_size; 927 action[len++] = q->width; 928 action[len++] = info->hwlrca_lo; 929 action[len++] = info->hwlrca_hi; 930 931 for (i = 1; i < q->width; ++i) { 932 struct xe_lrc *lrc = q->lrc[i]; 933 934 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 935 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 936 } 937 938 /* explicitly checks some fields that we might fixup later */ 939 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 940 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 941 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 942 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 943 xe_gt_assert(guc_to_gt(guc), q->width == 944 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 945 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 946 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 947 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 948 #undef MAX_MLRC_REG_SIZE 949 950 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 951 } 952 953 static void __register_exec_queue(struct xe_guc *guc, 954 struct guc_ctxt_registration_info *info) 955 { 956 u32 action[] = { 957 XE_GUC_ACTION_REGISTER_CONTEXT, 958 info->flags, 959 info->context_idx, 960 info->engine_class, 961 info->engine_submit_mask, 962 info->wq_desc_lo, 963 info->wq_desc_hi, 964 info->wq_base_lo, 965 info->wq_base_hi, 966 info->wq_size, 967 info->hwlrca_lo, 968 info->hwlrca_hi, 969 }; 970 971 /* explicitly checks some fields that we might fixup later */ 972 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 973 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 974 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 975 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 976 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 977 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 978 979 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 980 } 981 982 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 983 { 984 struct xe_guc *guc = exec_queue_to_guc(q); 985 struct xe_device *xe = guc_to_xe(guc); 986 struct xe_lrc *lrc = q->lrc[0]; 987 struct guc_ctxt_registration_info info; 988 989 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 990 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 991 992 memset(&info, 0, sizeof(info)); 993 info.context_idx = q->guc->id; 994 info.engine_class = xe_engine_class_to_guc_class(q->class); 995 info.engine_submit_mask = q->logical_mask; 996 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 997 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 998 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 999 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 1000 1001 if (xe_exec_queue_is_multi_queue(q)) { 1002 struct xe_exec_queue_group *group = q->multi_queue.group; 1003 1004 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 1005 info.cgp_hi = 0; 1006 } 1007 1008 if (xe_exec_queue_is_parallel(q)) { 1009 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 1010 struct iosys_map map = xe_lrc_parallel_map(lrc); 1011 1012 info.wq_desc_lo = lower_32_bits(ggtt_addr + 1013 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1014 info.wq_desc_hi = upper_32_bits(ggtt_addr + 1015 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1016 info.wq_base_lo = lower_32_bits(ggtt_addr + 1017 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1018 info.wq_base_hi = upper_32_bits(ggtt_addr + 1019 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1020 info.wq_size = WQ_SIZE; 1021 1022 q->guc->wqi_head = 0; 1023 q->guc->wqi_tail = 0; 1024 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 1025 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 1026 } 1027 1028 set_exec_queue_registered(q); 1029 trace_xe_exec_queue_register(q); 1030 if (xe_exec_queue_is_multi_queue_primary(q)) 1031 __register_exec_queue_group(guc, q, &info); 1032 else if (xe_exec_queue_is_parallel(q)) 1033 __register_mlrc_exec_queue(guc, q, &info); 1034 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 1035 __register_exec_queue(guc, &info); 1036 1037 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1038 init_policies(guc, q); 1039 1040 if (xe_exec_queue_is_multi_queue_secondary(q)) 1041 xe_guc_exec_queue_group_add(guc, q); 1042 } 1043 1044 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 1045 { 1046 return (WQ_SIZE - q->guc->wqi_tail); 1047 } 1048 1049 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 1050 { 1051 struct xe_guc *guc = exec_queue_to_guc(q); 1052 struct xe_device *xe = guc_to_xe(guc); 1053 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1054 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 1055 1056 #define AVAILABLE_SPACE \ 1057 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 1058 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1059 try_again: 1060 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 1061 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1062 if (sleep_total_ms > 2000) { 1063 xe_gt_reset_async(q->gt); 1064 return -ENODEV; 1065 } 1066 1067 sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64); 1068 goto try_again; 1069 } 1070 } 1071 #undef AVAILABLE_SPACE 1072 1073 return 0; 1074 } 1075 1076 static int wq_noop_append(struct xe_exec_queue *q) 1077 { 1078 struct xe_guc *guc = exec_queue_to_guc(q); 1079 struct xe_device *xe = guc_to_xe(guc); 1080 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1081 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 1082 1083 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 1084 return -ENODEV; 1085 1086 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 1087 1088 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 1089 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 1090 FIELD_PREP(WQ_LEN_MASK, len_dw)); 1091 q->guc->wqi_tail = 0; 1092 1093 return 0; 1094 } 1095 1096 static void wq_item_append(struct xe_exec_queue *q) 1097 { 1098 struct xe_guc *guc = exec_queue_to_guc(q); 1099 struct xe_device *xe = guc_to_xe(guc); 1100 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1101 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 1102 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 1103 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 1104 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 1105 int i = 0, j; 1106 1107 if (wqi_size > wq_space_until_wrap(q)) { 1108 if (wq_noop_append(q)) 1109 return; 1110 } 1111 if (wq_wait_for_space(q, wqi_size)) 1112 return; 1113 1114 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 1115 FIELD_PREP(WQ_LEN_MASK, len_dw); 1116 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 1117 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 1118 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 1119 wqi[i++] = 0; 1120 for (j = 1; j < q->width; ++j) { 1121 struct xe_lrc *lrc = q->lrc[j]; 1122 1123 wqi[i++] = lrc->ring.tail / sizeof(u64); 1124 } 1125 1126 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 1127 1128 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 1129 wq[q->guc->wqi_tail / sizeof(u32)])); 1130 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 1131 q->guc->wqi_tail += wqi_size; 1132 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 1133 1134 xe_device_wmb(xe); 1135 1136 map = xe_lrc_parallel_map(q->lrc[0]); 1137 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 1138 } 1139 1140 #define RESUME_PENDING ~0x0ull 1141 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 1142 { 1143 struct xe_guc *guc = exec_queue_to_guc(q); 1144 struct xe_lrc *lrc = q->lrc[0]; 1145 u32 action[3]; 1146 u32 g2h_len = 0; 1147 u32 num_g2h = 0; 1148 int len = 0; 1149 bool extra_submit = false; 1150 1151 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1152 1153 if (!job->restore_replay || job->last_replay) { 1154 if (xe_exec_queue_is_parallel(q)) 1155 wq_item_append(q); 1156 else if (!exec_queue_idle_skip_suspend(q)) 1157 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1158 job->last_replay = false; 1159 } 1160 1161 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1162 return; 1163 1164 /* 1165 * All queues in a multi-queue group will use the primary queue 1166 * of the group to interface with GuC. 1167 */ 1168 q = xe_exec_queue_multi_queue_primary(q); 1169 1170 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1171 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1172 action[len++] = q->guc->id; 1173 action[len++] = GUC_CONTEXT_ENABLE; 1174 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1175 num_g2h = 1; 1176 if (xe_exec_queue_is_parallel(q)) 1177 extra_submit = true; 1178 1179 q->guc->resume_time = RESUME_PENDING; 1180 set_exec_queue_pending_enable(q); 1181 set_exec_queue_enabled(q); 1182 trace_xe_exec_queue_scheduling_enable(q); 1183 } else { 1184 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1185 action[len++] = q->guc->id; 1186 trace_xe_exec_queue_submit(q); 1187 } 1188 1189 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1190 1191 if (extra_submit) { 1192 len = 0; 1193 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1194 action[len++] = q->guc->id; 1195 trace_xe_exec_queue_submit(q); 1196 1197 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1198 } 1199 } 1200 1201 static struct dma_fence * 1202 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1203 { 1204 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1205 struct xe_exec_queue *q = job->q; 1206 struct xe_guc *guc = exec_queue_to_guc(q); 1207 bool killed_or_banned_or_wedged = 1208 exec_queue_killed_or_banned_or_wedged(q); 1209 1210 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1211 exec_queue_banned(q) || exec_queue_suspended(q)); 1212 1213 trace_xe_sched_job_run(job); 1214 1215 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1216 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1217 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1218 1219 if (exec_queue_killed_or_banned_or_wedged(primary)) { 1220 killed_or_banned_or_wedged = true; 1221 goto run_job_out; 1222 } 1223 1224 if (!exec_queue_registered(primary)) 1225 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1226 } 1227 1228 if (!exec_queue_registered(q)) 1229 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1230 if (!job->restore_replay) 1231 q->ring_ops->emit_job(job); 1232 submit_exec_queue(q, job); 1233 job->restore_replay = false; 1234 } 1235 1236 run_job_out: 1237 1238 return job->fence; 1239 } 1240 1241 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1242 { 1243 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1244 1245 trace_xe_sched_job_free(job); 1246 xe_sched_job_put(job); 1247 } 1248 1249 int xe_guc_read_stopped(struct xe_guc *guc) 1250 { 1251 return atomic_read(&guc->submission_state.stopped); 1252 } 1253 1254 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1255 struct xe_exec_queue *q, 1256 u32 runnable_state); 1257 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1258 1259 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1260 u32 action[] = { \ 1261 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1262 q->guc->id, \ 1263 GUC_CONTEXT_##enable_disable, \ 1264 } 1265 1266 static void disable_scheduling_deregister(struct xe_guc *guc, 1267 struct xe_exec_queue *q) 1268 { 1269 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1270 int ret; 1271 1272 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1273 set_min_preemption_timeout(guc, q); 1274 1275 smp_rmb(); 1276 ret = wait_event_timeout(guc->ct.wq, 1277 (!exec_queue_pending_enable(q) && 1278 !exec_queue_pending_disable(q)) || 1279 xe_guc_read_stopped(guc) || 1280 vf_recovery(guc), 1281 HZ * 5); 1282 if (!ret && !vf_recovery(guc)) { 1283 struct xe_gpu_scheduler *sched = &q->guc->sched; 1284 1285 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1286 xe_sched_submission_start(sched); 1287 xe_gt_reset_async(q->gt); 1288 xe_sched_tdr_queue_imm(sched); 1289 return; 1290 } 1291 1292 clear_exec_queue_enabled(q); 1293 set_exec_queue_pending_disable(q); 1294 set_exec_queue_destroyed(q); 1295 trace_xe_exec_queue_scheduling_disable(q); 1296 1297 /* 1298 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1299 * handler and we are not allowed to reserved G2H space in handlers. 1300 */ 1301 if (xe_exec_queue_is_multi_queue_secondary(q)) 1302 handle_multi_queue_secondary_sched_done(guc, q, 0); 1303 else 1304 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1305 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1306 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1307 } 1308 1309 /** 1310 * xe_guc_submit_wedge() - Wedge GuC submission 1311 * @guc: the GuC object 1312 * 1313 * Save exec queue's registered with GuC state by taking a ref to each queue. 1314 * Register a DRMM handler to drop refs upon driver unload. 1315 */ 1316 void xe_guc_submit_wedge(struct xe_guc *guc) 1317 { 1318 struct xe_device *xe = guc_to_xe(guc); 1319 struct xe_exec_queue *q; 1320 unsigned long index; 1321 1322 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1323 1324 /* 1325 * If device is being wedged even before submission_state is 1326 * initialized, there's nothing to do here. 1327 */ 1328 if (!guc->submission_state.initialized) 1329 return; 1330 1331 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { 1332 mutex_lock(&guc->submission_state.lock); 1333 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1334 if (xe_exec_queue_get_unless_zero(q)) 1335 set_exec_queue_wedged(q); 1336 mutex_unlock(&guc->submission_state.lock); 1337 } else { 1338 /* Forcefully kill any remaining exec queues, signal fences */ 1339 guc_submit_reset_prepare(guc); 1340 xe_guc_submit_stop(guc); 1341 xe_guc_softreset(guc); 1342 xe_uc_fw_sanitize(&guc->fw); 1343 xe_guc_submit_pause_abort(guc); 1344 } 1345 } 1346 1347 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1348 { 1349 struct xe_device *xe = guc_to_xe(guc); 1350 1351 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 1352 return false; 1353 1354 if (xe_device_wedged(xe)) 1355 return true; 1356 1357 xe_device_declare_wedged(xe); 1358 1359 return true; 1360 } 1361 1362 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1363 1364 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1365 { 1366 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1367 u32 ctx_timestamp, ctx_job_timestamp; 1368 u32 timeout_ms = q->sched_props.job_timeout_ms; 1369 u32 diff; 1370 u64 running_time_ms; 1371 1372 if (!xe_sched_job_started(job)) { 1373 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1374 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1375 q->guc->id); 1376 1377 return xe_sched_invalidate_job(job, 2); 1378 } 1379 1380 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); 1381 if (ctx_timestamp == job->sample_timestamp) { 1382 if (IS_SRIOV_VF(gt_to_xe(gt))) 1383 xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1384 xe_sched_job_seqno(job), 1385 xe_sched_job_lrc_seqno(job), q->guc->id); 1386 else 1387 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1388 xe_sched_job_seqno(job), 1389 xe_sched_job_lrc_seqno(job), q->guc->id); 1390 1391 return xe_sched_invalidate_job(job, 0); 1392 } 1393 1394 job->sample_timestamp = ctx_timestamp; 1395 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1396 1397 /* 1398 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1399 * possible overflows with a high timeout. 1400 */ 1401 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1402 1403 diff = ctx_timestamp - ctx_job_timestamp; 1404 1405 /* 1406 * Ensure timeout is within 5% to account for an GuC scheduling latency 1407 */ 1408 running_time_ms = 1409 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1410 1411 xe_gt_dbg(gt, 1412 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1413 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1414 q->guc->id, running_time_ms, timeout_ms, diff); 1415 1416 return running_time_ms >= timeout_ms; 1417 } 1418 1419 static void enable_scheduling(struct xe_exec_queue *q) 1420 { 1421 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1422 struct xe_guc *guc = exec_queue_to_guc(q); 1423 int ret; 1424 1425 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1426 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1427 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1428 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1429 1430 set_exec_queue_pending_enable(q); 1431 set_exec_queue_enabled(q); 1432 trace_xe_exec_queue_scheduling_enable(q); 1433 1434 if (xe_exec_queue_is_multi_queue_secondary(q)) 1435 handle_multi_queue_secondary_sched_done(guc, q, 1); 1436 else 1437 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1438 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1439 1440 ret = wait_event_timeout(guc->ct.wq, 1441 !exec_queue_pending_enable(q) || 1442 xe_guc_read_stopped(guc) || 1443 vf_recovery(guc), HZ * 5); 1444 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1445 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1446 set_exec_queue_banned(q); 1447 xe_gt_reset_async(q->gt); 1448 xe_sched_tdr_queue_imm(&q->guc->sched); 1449 } 1450 } 1451 1452 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1453 { 1454 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1455 struct xe_guc *guc = exec_queue_to_guc(q); 1456 1457 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1458 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1459 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1460 1461 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1462 set_min_preemption_timeout(guc, q); 1463 clear_exec_queue_enabled(q); 1464 set_exec_queue_pending_disable(q); 1465 trace_xe_exec_queue_scheduling_disable(q); 1466 1467 if (xe_exec_queue_is_multi_queue_secondary(q)) 1468 handle_multi_queue_secondary_sched_done(guc, q, 0); 1469 else 1470 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1471 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1472 } 1473 1474 static enum drm_gpu_sched_stat 1475 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1476 { 1477 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1478 struct drm_sched_job *tmp_job; 1479 struct xe_exec_queue *q = job->q, *primary; 1480 struct xe_gpu_scheduler *sched = &q->guc->sched; 1481 struct xe_guc *guc = exec_queue_to_guc(q); 1482 const char *process_name = "no process"; 1483 struct xe_device *xe = guc_to_xe(guc); 1484 int err = -ETIME; 1485 pid_t pid = -1; 1486 bool wedged = false, skip_timeout_check; 1487 1488 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1489 1490 primary = xe_exec_queue_multi_queue_primary(q); 1491 1492 /* 1493 * TDR has fired before free job worker. Common if exec queue 1494 * immediately closed after last fence signaled. Add back to pending 1495 * list so job can be freed and kick scheduler ensuring free job is not 1496 * lost. 1497 */ 1498 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1499 vf_recovery(guc)) 1500 return DRM_GPU_SCHED_STAT_NO_HANG; 1501 1502 /* Kill the run_job entry point */ 1503 if (xe_exec_queue_is_multi_queue(q)) 1504 xe_guc_exec_queue_group_stop(q); 1505 else 1506 xe_sched_submission_stop(sched); 1507 1508 /* Must check all state after stopping scheduler */ 1509 skip_timeout_check = exec_queue_reset(q) || 1510 exec_queue_killed_or_banned_or_wedged(q); 1511 1512 /* Skip timeout check if multi-queue group is banned */ 1513 if (xe_exec_queue_is_multi_queue(q) && 1514 READ_ONCE(q->multi_queue.group->banned)) 1515 skip_timeout_check = true; 1516 1517 /* LR jobs can only get here if queue has been killed or hit an error */ 1518 if (xe_exec_queue_is_lr(q)) 1519 xe_gt_assert(guc_to_gt(guc), skip_timeout_check); 1520 1521 /* 1522 * If devcoredump not captured and GuC capture for the job is not ready 1523 * do manual capture first and decide later if we need to use it 1524 */ 1525 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1526 !xe_guc_capture_get_matching_and_lock(q)) { 1527 /* take force wake before engine register manual capture */ 1528 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1529 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1530 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1531 1532 xe_engine_snapshot_capture_for_queue(q); 1533 } 1534 1535 /* 1536 * Check if job is actually timed out, if so restart job execution and TDR 1537 */ 1538 if (!skip_timeout_check && !check_timeout(q, job)) 1539 goto rearm; 1540 1541 if (!exec_queue_killed(q)) 1542 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1543 1544 set_exec_queue_banned(q); 1545 1546 /* Kick job / queue off hardware */ 1547 if (!wedged && (exec_queue_enabled(primary) || 1548 exec_queue_pending_disable(primary))) { 1549 int ret; 1550 1551 if (exec_queue_reset(primary)) 1552 err = -EIO; 1553 1554 if (xe_uc_fw_is_running(&guc->fw)) { 1555 /* 1556 * Wait for any pending G2H to flush out before 1557 * modifying state 1558 */ 1559 ret = wait_event_timeout(guc->ct.wq, 1560 (!exec_queue_pending_enable(primary) && 1561 !exec_queue_pending_disable(primary)) || 1562 xe_guc_read_stopped(guc) || 1563 vf_recovery(guc), HZ * 5); 1564 if (vf_recovery(guc)) 1565 goto handle_vf_resume; 1566 if (!ret || xe_guc_read_stopped(guc)) 1567 goto trigger_reset; 1568 1569 disable_scheduling(primary, skip_timeout_check); 1570 } 1571 1572 /* 1573 * Must wait for scheduling to be disabled before signalling 1574 * any fences, if GT broken the GT reset code should signal us. 1575 * 1576 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1577 * error) messages which can cause the schedule disable to get 1578 * lost. If this occurs, trigger a GT reset to recover. 1579 */ 1580 smp_rmb(); 1581 ret = wait_event_timeout(guc->ct.wq, 1582 !xe_uc_fw_is_running(&guc->fw) || 1583 !exec_queue_pending_disable(primary) || 1584 xe_guc_read_stopped(guc) || 1585 vf_recovery(guc), HZ * 5); 1586 if (vf_recovery(guc)) 1587 goto handle_vf_resume; 1588 if (!ret || xe_guc_read_stopped(guc)) { 1589 trigger_reset: 1590 if (!ret) 1591 xe_gt_warn(guc_to_gt(guc), 1592 "Schedule disable failed to respond, guc_id=%d", 1593 primary->guc->id); 1594 xe_devcoredump(primary, job, 1595 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1596 primary->guc->id, ret, xe_guc_read_stopped(guc)); 1597 xe_gt_reset_async(primary->gt); 1598 xe_sched_tdr_queue_imm(sched); 1599 goto rearm; 1600 } 1601 } 1602 1603 if (q->vm && q->vm->xef) { 1604 process_name = q->vm->xef->process_name; 1605 pid = q->vm->xef->pid; 1606 } 1607 1608 if (!exec_queue_killed(q)) 1609 xe_gt_notice(guc_to_gt(guc), 1610 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1611 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1612 q->guc->id, q->flags, process_name, pid); 1613 1614 trace_xe_sched_job_timedout(job); 1615 1616 if (!exec_queue_killed(q)) 1617 xe_devcoredump(q, job, 1618 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1619 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1620 q->guc->id, q->flags); 1621 1622 /* 1623 * Kernel jobs should never fail, nor should VM jobs if they do 1624 * somethings has gone wrong and the GT needs a reset 1625 */ 1626 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1627 "Kernel-submitted job timed out\n"); 1628 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1629 "VM job timed out on non-killed execqueue\n"); 1630 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1631 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1632 if (!xe_sched_invalidate_job(job, 2)) { 1633 xe_gt_reset_async(q->gt); 1634 goto rearm; 1635 } 1636 } 1637 1638 /* Mark all outstanding jobs as bad, thus completing them */ 1639 xe_sched_job_set_error(job, err); 1640 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) 1641 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); 1642 1643 if (xe_exec_queue_is_multi_queue(q)) { 1644 xe_guc_exec_queue_group_start(q); 1645 xe_guc_exec_queue_group_trigger_cleanup(q); 1646 } else { 1647 xe_sched_submission_start(sched); 1648 xe_guc_exec_queue_trigger_cleanup(q); 1649 } 1650 1651 /* 1652 * We want the job added back to the pending list so it gets freed; this 1653 * is what DRM_GPU_SCHED_STAT_NO_HANG does. 1654 */ 1655 return DRM_GPU_SCHED_STAT_NO_HANG; 1656 1657 rearm: 1658 /* 1659 * XXX: Ideally want to adjust timeout based on current execution time 1660 * but there is not currently an easy way to do in DRM scheduler. With 1661 * some thought, do this in a follow up. 1662 */ 1663 if (xe_exec_queue_is_multi_queue(q)) 1664 xe_guc_exec_queue_group_start(q); 1665 else 1666 xe_sched_submission_start(sched); 1667 handle_vf_resume: 1668 return DRM_GPU_SCHED_STAT_NO_HANG; 1669 } 1670 1671 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1672 { 1673 struct xe_guc_exec_queue *ge = q->guc; 1674 struct xe_guc *guc = exec_queue_to_guc(q); 1675 1676 release_guc_id(guc, q); 1677 xe_sched_entity_fini(&ge->entity); 1678 xe_sched_fini(&ge->sched); 1679 1680 /* 1681 * RCU free due sched being exported via DRM scheduler fences 1682 * (timeline name). 1683 */ 1684 kfree_rcu(ge, rcu); 1685 } 1686 1687 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1688 { 1689 struct xe_guc_exec_queue *ge = 1690 container_of(w, struct xe_guc_exec_queue, destroy_async); 1691 struct xe_exec_queue *q = ge->q; 1692 struct xe_guc *guc = exec_queue_to_guc(q); 1693 1694 guard(xe_pm_runtime)(guc_to_xe(guc)); 1695 trace_xe_exec_queue_destroy(q); 1696 1697 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1698 struct xe_exec_queue_group *group = q->multi_queue.group; 1699 1700 mutex_lock(&group->list_lock); 1701 list_del(&q->multi_queue.link); 1702 mutex_unlock(&group->list_lock); 1703 } 1704 1705 /* Confirm no work left behind accessing device structures */ 1706 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1707 1708 xe_exec_queue_fini(q); 1709 } 1710 1711 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1712 { 1713 struct xe_guc *guc = exec_queue_to_guc(q); 1714 struct xe_device *xe = guc_to_xe(guc); 1715 1716 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1717 1718 /* We must block on kernel engines so slabs are empty on driver unload */ 1719 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1720 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1721 else 1722 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1723 } 1724 1725 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1726 { 1727 /* 1728 * Might be done from within the GPU scheduler, need to do async as we 1729 * fini the scheduler when the engine is fini'd, the scheduler can't 1730 * complete fini within itself (circular dependency). Async resolves 1731 * this we and don't really care when everything is fini'd, just that it 1732 * is. 1733 */ 1734 guc_exec_queue_destroy_async(q); 1735 } 1736 1737 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1738 { 1739 struct xe_exec_queue *q = msg->private_data; 1740 struct xe_guc *guc = exec_queue_to_guc(q); 1741 1742 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1743 trace_xe_exec_queue_cleanup_entity(q); 1744 1745 /* 1746 * Expected state transitions for cleanup: 1747 * - If the exec queue is registered and GuC firmware is running, we must first 1748 * disable scheduling and deregister the queue to ensure proper teardown and 1749 * resource release in the GuC, then destroy the exec queue on driver side. 1750 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1751 * we cannot expect a response for the deregister request. In this case, 1752 * it is safe to directly destroy the exec queue on driver side, as the GuC 1753 * will not process further requests and all resources must be cleaned up locally. 1754 */ 1755 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1756 disable_scheduling_deregister(guc, q); 1757 else 1758 __guc_exec_queue_destroy(guc, q); 1759 } 1760 1761 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1762 { 1763 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1764 } 1765 1766 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1767 { 1768 struct xe_exec_queue *q = msg->private_data; 1769 struct xe_guc *guc = exec_queue_to_guc(q); 1770 1771 if (guc_exec_queue_allowed_to_change_state(q)) 1772 init_policies(guc, q); 1773 kfree(msg); 1774 } 1775 1776 static void __suspend_fence_signal(struct xe_exec_queue *q) 1777 { 1778 struct xe_guc *guc = exec_queue_to_guc(q); 1779 struct xe_device *xe = guc_to_xe(guc); 1780 1781 if (!q->guc->suspend_pending) 1782 return; 1783 1784 WRITE_ONCE(q->guc->suspend_pending, false); 1785 1786 /* 1787 * We use a GuC shared wait queue for VFs because the VF resfix start 1788 * interrupt must be able to wake all instances of suspend_wait. This 1789 * prevents the VF migration worker from being starved during 1790 * scheduling. 1791 */ 1792 if (IS_SRIOV_VF(xe)) 1793 wake_up_all(&guc->ct.wq); 1794 else 1795 wake_up(&q->guc->suspend_wait); 1796 } 1797 1798 static void suspend_fence_signal(struct xe_exec_queue *q) 1799 { 1800 struct xe_guc *guc = exec_queue_to_guc(q); 1801 1802 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1803 xe_guc_read_stopped(guc)); 1804 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1805 1806 __suspend_fence_signal(q); 1807 } 1808 1809 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1810 { 1811 struct xe_exec_queue *q = msg->private_data; 1812 struct xe_guc *guc = exec_queue_to_guc(q); 1813 bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q); 1814 1815 if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) && 1816 !exec_queue_suspended(q) && exec_queue_enabled(q)) { 1817 wait_event(guc->ct.wq, vf_recovery(guc) || 1818 ((q->guc->resume_time != RESUME_PENDING || 1819 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1820 1821 if (!xe_guc_read_stopped(guc)) { 1822 s64 since_resume_ms = 1823 ktime_ms_delta(ktime_get(), 1824 q->guc->resume_time); 1825 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1826 since_resume_ms; 1827 1828 if (wait_ms > 0 && q->guc->resume_time) 1829 xe_sleep_relaxed_ms(wait_ms); 1830 1831 set_exec_queue_suspended(q); 1832 disable_scheduling(q, false); 1833 } 1834 } else if (q->guc->suspend_pending) { 1835 if (idle_skip_suspend) 1836 set_exec_queue_idle_skip_suspend(q); 1837 set_exec_queue_suspended(q); 1838 suspend_fence_signal(q); 1839 } 1840 } 1841 1842 static void sched_context(struct xe_exec_queue *q) 1843 { 1844 struct xe_guc *guc = exec_queue_to_guc(q); 1845 struct xe_lrc *lrc = q->lrc[0]; 1846 u32 action[] = { 1847 XE_GUC_ACTION_SCHED_CONTEXT, 1848 q->guc->id, 1849 }; 1850 1851 xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q)); 1852 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1853 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1854 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1855 1856 trace_xe_exec_queue_submit(q); 1857 1858 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1859 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 1860 } 1861 1862 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1863 { 1864 struct xe_exec_queue *q = msg->private_data; 1865 1866 if (guc_exec_queue_allowed_to_change_state(q)) { 1867 clear_exec_queue_suspended(q); 1868 if (!exec_queue_enabled(q)) { 1869 if (exec_queue_idle_skip_suspend(q)) { 1870 struct xe_lrc *lrc = q->lrc[0]; 1871 1872 clear_exec_queue_idle_skip_suspend(q); 1873 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1874 } 1875 q->guc->resume_time = RESUME_PENDING; 1876 set_exec_queue_pending_resume(q); 1877 enable_scheduling(q); 1878 } else if (exec_queue_idle_skip_suspend(q)) { 1879 clear_exec_queue_idle_skip_suspend(q); 1880 sched_context(q); 1881 } 1882 } else { 1883 clear_exec_queue_suspended(q); 1884 clear_exec_queue_idle_skip_suspend(q); 1885 } 1886 } 1887 1888 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1889 { 1890 struct xe_exec_queue *q = msg->private_data; 1891 1892 if (guc_exec_queue_allowed_to_change_state(q)) { 1893 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 1894 struct xe_guc *guc = exec_queue_to_guc(q); 1895 struct xe_exec_queue_group *group = q->multi_queue.group; 1896 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 1897 int len = 0; 1898 1899 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 1900 action[len++] = group->primary->guc->id; 1901 1902 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 1903 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 1904 1905 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 1906 } 1907 1908 kfree(msg); 1909 } 1910 1911 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1912 #define SET_SCHED_PROPS 2 1913 #define SUSPEND 3 1914 #define RESUME 4 1915 #define SET_MULTI_QUEUE_PRIORITY 5 1916 #define OPCODE_MASK 0xf 1917 #define MSG_LOCKED BIT(8) 1918 #define MSG_HEAD BIT(9) 1919 1920 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1921 { 1922 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1923 1924 trace_xe_sched_msg_recv(msg); 1925 1926 switch (msg->opcode) { 1927 case CLEANUP: 1928 __guc_exec_queue_process_msg_cleanup(msg); 1929 break; 1930 case SET_SCHED_PROPS: 1931 __guc_exec_queue_process_msg_set_sched_props(msg); 1932 break; 1933 case SUSPEND: 1934 __guc_exec_queue_process_msg_suspend(msg); 1935 break; 1936 case RESUME: 1937 __guc_exec_queue_process_msg_resume(msg); 1938 break; 1939 case SET_MULTI_QUEUE_PRIORITY: 1940 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1941 break; 1942 default: 1943 XE_WARN_ON("Unknown message type"); 1944 } 1945 1946 xe_pm_runtime_put(xe); 1947 } 1948 1949 static const struct drm_sched_backend_ops drm_sched_ops = { 1950 .run_job = guc_exec_queue_run_job, 1951 .free_job = guc_exec_queue_free_job, 1952 .timedout_job = guc_exec_queue_timedout_job, 1953 }; 1954 1955 static const struct xe_sched_backend_ops xe_sched_ops = { 1956 .process_msg = guc_exec_queue_process_msg, 1957 }; 1958 1959 static int guc_exec_queue_init(struct xe_exec_queue *q) 1960 { 1961 struct xe_gpu_scheduler *sched; 1962 struct xe_guc *guc = exec_queue_to_guc(q); 1963 struct workqueue_struct *submit_wq = NULL; 1964 struct xe_guc_exec_queue *ge; 1965 long timeout; 1966 int err, i; 1967 1968 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1969 1970 ge = kzalloc_obj(*ge); 1971 if (!ge) 1972 return -ENOMEM; 1973 1974 q->guc = ge; 1975 ge->q = q; 1976 init_rcu_head(&ge->rcu); 1977 init_waitqueue_head(&ge->suspend_wait); 1978 1979 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1980 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1981 1982 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1983 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1984 1985 /* 1986 * Use primary queue's submit_wq for all secondary queues of a 1987 * multi queue group. This serialization avoids any locking around 1988 * CGP synchronization with GuC. 1989 */ 1990 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1991 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1992 1993 submit_wq = primary->guc->sched.base.submit_wq; 1994 } 1995 1996 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1997 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1998 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1999 q->name, gt_to_xe(q->gt)->drm.dev); 2000 if (err) 2001 goto err_free; 2002 2003 sched = &ge->sched; 2004 err = xe_sched_entity_init(&ge->entity, sched); 2005 if (err) 2006 goto err_sched; 2007 2008 mutex_lock(&guc->submission_state.lock); 2009 2010 err = alloc_guc_id(guc, q); 2011 if (err) 2012 goto err_entity; 2013 2014 q->entity = &ge->entity; 2015 2016 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 2017 xe_sched_stop(sched); 2018 2019 mutex_unlock(&guc->submission_state.lock); 2020 2021 xe_exec_queue_assign_name(q, q->guc->id); 2022 2023 /* 2024 * Maintain secondary queues of the multi queue group in a list 2025 * for handling dependencies across the queues in the group. 2026 */ 2027 if (xe_exec_queue_is_multi_queue_secondary(q)) { 2028 struct xe_exec_queue_group *group = q->multi_queue.group; 2029 2030 INIT_LIST_HEAD(&q->multi_queue.link); 2031 mutex_lock(&group->list_lock); 2032 if (group->stopped) 2033 WRITE_ONCE(q->guc->sched.base.pause_submit, true); 2034 list_add_tail(&q->multi_queue.link, &group->list); 2035 mutex_unlock(&group->list_lock); 2036 } 2037 2038 if (xe_exec_queue_is_multi_queue(q)) 2039 trace_xe_exec_queue_create_multi_queue(q); 2040 else 2041 trace_xe_exec_queue_create(q); 2042 2043 return 0; 2044 2045 err_entity: 2046 mutex_unlock(&guc->submission_state.lock); 2047 xe_sched_entity_fini(&ge->entity); 2048 err_sched: 2049 xe_sched_fini(&ge->sched); 2050 err_free: 2051 kfree(ge); 2052 2053 return err; 2054 } 2055 2056 static void guc_exec_queue_kill(struct xe_exec_queue *q) 2057 { 2058 trace_xe_exec_queue_kill(q); 2059 set_exec_queue_killed(q); 2060 __suspend_fence_signal(q); 2061 xe_guc_exec_queue_trigger_cleanup(q); 2062 } 2063 2064 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 2065 u32 opcode) 2066 { 2067 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 2068 2069 INIT_LIST_HEAD(&msg->link); 2070 msg->opcode = opcode & OPCODE_MASK; 2071 msg->private_data = q; 2072 2073 trace_xe_sched_msg_add(msg); 2074 if (opcode & MSG_HEAD) 2075 xe_sched_add_msg_head(&q->guc->sched, msg); 2076 else if (opcode & MSG_LOCKED) 2077 xe_sched_add_msg_locked(&q->guc->sched, msg); 2078 else 2079 xe_sched_add_msg(&q->guc->sched, msg); 2080 } 2081 2082 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2083 struct xe_sched_msg *msg, 2084 u32 opcode) 2085 { 2086 if (!list_empty(&msg->link)) 2087 return; 2088 2089 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2090 } 2091 2092 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2093 struct xe_sched_msg *msg, 2094 u32 opcode) 2095 { 2096 if (!list_empty(&msg->link)) 2097 return false; 2098 2099 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2100 2101 return true; 2102 } 2103 2104 #define STATIC_MSG_CLEANUP 0 2105 #define STATIC_MSG_SUSPEND 1 2106 #define STATIC_MSG_RESUME 2 2107 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2108 { 2109 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2110 2111 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2112 guc_exec_queue_add_msg(q, msg, CLEANUP); 2113 else 2114 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2115 } 2116 2117 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2118 enum xe_exec_queue_priority priority) 2119 { 2120 struct xe_sched_msg *msg; 2121 2122 if (q->sched_props.priority == priority || 2123 exec_queue_killed_or_banned_or_wedged(q)) 2124 return 0; 2125 2126 msg = kmalloc_obj(*msg); 2127 if (!msg) 2128 return -ENOMEM; 2129 2130 q->sched_props.priority = priority; 2131 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2132 2133 return 0; 2134 } 2135 2136 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2137 { 2138 struct xe_sched_msg *msg; 2139 2140 if (q->sched_props.timeslice_us == timeslice_us || 2141 exec_queue_killed_or_banned_or_wedged(q)) 2142 return 0; 2143 2144 msg = kmalloc_obj(*msg); 2145 if (!msg) 2146 return -ENOMEM; 2147 2148 q->sched_props.timeslice_us = timeslice_us; 2149 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2150 2151 return 0; 2152 } 2153 2154 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2155 u32 preempt_timeout_us) 2156 { 2157 struct xe_sched_msg *msg; 2158 2159 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2160 exec_queue_killed_or_banned_or_wedged(q)) 2161 return 0; 2162 2163 msg = kmalloc_obj(*msg); 2164 if (!msg) 2165 return -ENOMEM; 2166 2167 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2168 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2169 2170 return 0; 2171 } 2172 2173 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2174 enum xe_multi_queue_priority priority) 2175 { 2176 struct xe_sched_msg *msg; 2177 2178 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2179 2180 if (exec_queue_killed_or_banned_or_wedged(q)) 2181 return 0; 2182 2183 msg = kmalloc_obj(*msg); 2184 if (!msg) 2185 return -ENOMEM; 2186 2187 scoped_guard(spinlock, &q->multi_queue.lock) { 2188 if (q->multi_queue.priority == priority) { 2189 kfree(msg); 2190 return 0; 2191 } 2192 2193 q->multi_queue.priority = priority; 2194 } 2195 2196 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2197 2198 return 0; 2199 } 2200 2201 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2202 { 2203 struct xe_gpu_scheduler *sched = &q->guc->sched; 2204 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2205 2206 if (exec_queue_killed_or_banned_or_wedged(q)) 2207 return -EINVAL; 2208 2209 xe_sched_msg_lock(sched); 2210 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2211 q->guc->suspend_pending = true; 2212 xe_sched_msg_unlock(sched); 2213 2214 return 0; 2215 } 2216 2217 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2218 { 2219 struct xe_guc *guc = exec_queue_to_guc(q); 2220 struct xe_device *xe = guc_to_xe(guc); 2221 int ret; 2222 2223 /* 2224 * Likely don't need to check exec_queue_killed() as we clear 2225 * suspend_pending upon kill but to be paranoid but races in which 2226 * suspend_pending is set after kill also check kill here. 2227 */ 2228 #define WAIT_COND \ 2229 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2230 xe_guc_read_stopped(guc)) 2231 2232 retry: 2233 if (IS_SRIOV_VF(xe)) 2234 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2235 vf_recovery(guc), 2236 HZ * 5); 2237 else 2238 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2239 WAIT_COND, HZ * 5); 2240 2241 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2242 return -EAGAIN; 2243 2244 if (!ret) { 2245 xe_gt_warn(guc_to_gt(guc), 2246 "Suspend fence, guc_id=%d, failed to respond", 2247 q->guc->id); 2248 /* XXX: Trigger GT reset? */ 2249 return -ETIME; 2250 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2251 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2252 goto retry; 2253 } 2254 2255 #undef WAIT_COND 2256 2257 return ret < 0 ? ret : 0; 2258 } 2259 2260 static void guc_exec_queue_resume(struct xe_exec_queue *q) 2261 { 2262 struct xe_gpu_scheduler *sched = &q->guc->sched; 2263 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2264 struct xe_guc *guc = exec_queue_to_guc(q); 2265 2266 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2267 2268 xe_sched_msg_lock(sched); 2269 guc_exec_queue_try_add_msg(q, msg, RESUME); 2270 xe_sched_msg_unlock(sched); 2271 } 2272 2273 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2274 { 2275 if (xe_exec_queue_is_multi_queue_secondary(q) && 2276 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2277 return true; 2278 2279 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2280 } 2281 2282 static bool guc_exec_queue_active(struct xe_exec_queue *q) 2283 { 2284 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 2285 2286 return exec_queue_enabled(primary) && 2287 !exec_queue_pending_disable(primary); 2288 } 2289 2290 /* 2291 * All of these functions are an abstraction layer which other parts of Xe can 2292 * use to trap into the GuC backend. All of these functions, aside from init, 2293 * really shouldn't do much other than trap into the DRM scheduler which 2294 * synchronizes these operations. 2295 */ 2296 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2297 .init = guc_exec_queue_init, 2298 .kill = guc_exec_queue_kill, 2299 .fini = guc_exec_queue_fini, 2300 .destroy = guc_exec_queue_destroy, 2301 .set_priority = guc_exec_queue_set_priority, 2302 .set_timeslice = guc_exec_queue_set_timeslice, 2303 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2304 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2305 .suspend = guc_exec_queue_suspend, 2306 .suspend_wait = guc_exec_queue_suspend_wait, 2307 .resume = guc_exec_queue_resume, 2308 .reset_status = guc_exec_queue_reset_status, 2309 .active = guc_exec_queue_active, 2310 }; 2311 2312 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2313 { 2314 struct xe_gpu_scheduler *sched = &q->guc->sched; 2315 bool do_destroy = false; 2316 2317 /* Stop scheduling + flush any DRM scheduler operations */ 2318 xe_sched_submission_stop(sched); 2319 2320 /* Clean up lost G2H + reset engine state */ 2321 if (exec_queue_registered(q)) { 2322 if (exec_queue_destroyed(q)) 2323 do_destroy = true; 2324 } 2325 if (q->guc->suspend_pending) { 2326 set_exec_queue_suspended(q); 2327 suspend_fence_signal(q); 2328 } 2329 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2330 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2331 EXEC_QUEUE_STATE_SUSPENDED, 2332 &q->guc->state); 2333 q->guc->resume_time = 0; 2334 trace_xe_exec_queue_stop(q); 2335 2336 /* 2337 * Ban any engine (aside from kernel and engines used for VM ops) with a 2338 * started but not complete job or if a job has gone through a GT reset 2339 * more than twice. 2340 */ 2341 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2342 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2343 bool ban = false; 2344 2345 if (job) { 2346 if ((xe_sched_job_started(job) && 2347 !xe_sched_job_completed(job)) || 2348 xe_sched_invalidate_job(job, 2)) { 2349 trace_xe_sched_job_ban(job); 2350 ban = true; 2351 } 2352 } 2353 2354 if (ban) { 2355 set_exec_queue_banned(q); 2356 xe_guc_exec_queue_trigger_cleanup(q); 2357 } 2358 } 2359 2360 if (do_destroy) 2361 __guc_exec_queue_destroy(guc, q); 2362 } 2363 2364 static int guc_submit_reset_prepare(struct xe_guc *guc) 2365 { 2366 int ret; 2367 2368 /* 2369 * Using an atomic here rather than submission_state.lock as this 2370 * function can be called while holding the CT lock (engine reset 2371 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2372 * Atomic is not ideal, but it works to prevent against concurrent reset 2373 * and releasing any TDRs waiting on guc->submission_state.stopped. 2374 */ 2375 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2376 smp_wmb(); 2377 wake_up_all(&guc->ct.wq); 2378 2379 return ret; 2380 } 2381 2382 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2383 { 2384 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2385 return 0; 2386 2387 if (!guc->submission_state.initialized) 2388 return 0; 2389 2390 return guc_submit_reset_prepare(guc); 2391 } 2392 2393 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2394 { 2395 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2396 !xe_guc_read_stopped(guc)); 2397 } 2398 2399 void xe_guc_submit_stop(struct xe_guc *guc) 2400 { 2401 struct xe_exec_queue *q; 2402 unsigned long index; 2403 2404 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2405 2406 mutex_lock(&guc->submission_state.lock); 2407 2408 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2409 /* Prevent redundant attempts to stop parallel queues */ 2410 if (q->guc->id != index) 2411 continue; 2412 2413 guc_exec_queue_stop(guc, q); 2414 } 2415 2416 mutex_unlock(&guc->submission_state.lock); 2417 2418 /* 2419 * No one can enter the backend at this point, aside from new engine 2420 * creation which is protected by guc->submission_state.lock. 2421 */ 2422 2423 } 2424 2425 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2426 struct xe_exec_queue *q) 2427 { 2428 bool pending_enable, pending_disable, pending_resume; 2429 2430 pending_enable = exec_queue_pending_enable(q); 2431 pending_resume = exec_queue_pending_resume(q); 2432 2433 if (pending_enable && pending_resume) { 2434 q->guc->needs_resume = true; 2435 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2436 q->guc->id); 2437 } 2438 2439 if (pending_enable && !pending_resume) { 2440 clear_exec_queue_registered(q); 2441 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2442 q->guc->id); 2443 } 2444 2445 if (pending_enable) { 2446 clear_exec_queue_enabled(q); 2447 clear_exec_queue_pending_resume(q); 2448 clear_exec_queue_pending_enable(q); 2449 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2450 q->guc->id); 2451 } 2452 2453 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2454 clear_exec_queue_destroyed(q); 2455 q->guc->needs_cleanup = true; 2456 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2457 q->guc->id); 2458 } 2459 2460 pending_disable = exec_queue_pending_disable(q); 2461 2462 if (pending_disable && exec_queue_suspended(q)) { 2463 clear_exec_queue_suspended(q); 2464 q->guc->needs_suspend = true; 2465 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2466 q->guc->id); 2467 } 2468 2469 if (pending_disable) { 2470 if (!pending_enable) 2471 set_exec_queue_enabled(q); 2472 clear_exec_queue_pending_disable(q); 2473 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2474 q->guc->id); 2475 } 2476 2477 q->guc->resume_time = 0; 2478 } 2479 2480 static void lrc_parallel_clear(struct xe_lrc *lrc) 2481 { 2482 struct xe_device *xe = gt_to_xe(lrc->gt); 2483 struct iosys_map map = xe_lrc_parallel_map(lrc); 2484 int i; 2485 2486 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2487 parallel_write(xe, map, wq[i], 2488 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2489 FIELD_PREP(WQ_LEN_MASK, 0)); 2490 } 2491 2492 /* 2493 * This function is quite complex but only real way to ensure no state is lost 2494 * during VF resume flows. The function scans the queue state, make adjustments 2495 * as needed, and queues jobs / messages which replayed upon unpause. 2496 */ 2497 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2498 { 2499 struct xe_gpu_scheduler *sched = &q->guc->sched; 2500 struct xe_sched_job *job; 2501 int i; 2502 2503 lockdep_assert_held(&guc->submission_state.lock); 2504 2505 /* Stop scheduling + flush any DRM scheduler operations */ 2506 xe_sched_submission_stop(sched); 2507 cancel_delayed_work_sync(&sched->base.work_tdr); 2508 2509 guc_exec_queue_revert_pending_state_change(guc, q); 2510 2511 if (xe_exec_queue_is_parallel(q)) { 2512 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2513 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2514 2515 /* 2516 * NOP existing WQ commands that may contain stale GGTT 2517 * addresses. These will be replayed upon unpause. The hardware 2518 * seems to get confused if the WQ head/tail pointers are 2519 * adjusted. 2520 */ 2521 if (lrc) 2522 lrc_parallel_clear(lrc); 2523 } 2524 2525 job = xe_sched_first_pending_job(sched); 2526 if (job) { 2527 job->restore_replay = true; 2528 2529 /* 2530 * Adjust software tail so jobs submitted overwrite previous 2531 * position in ring buffer with new GGTT addresses. 2532 */ 2533 for (i = 0; i < q->width; ++i) 2534 q->lrc[i]->ring.tail = job->ptrs[i].head; 2535 } 2536 } 2537 2538 /** 2539 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2540 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2541 */ 2542 void xe_guc_submit_pause(struct xe_guc *guc) 2543 { 2544 struct xe_exec_queue *q; 2545 unsigned long index; 2546 2547 mutex_lock(&guc->submission_state.lock); 2548 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2549 xe_sched_submission_stop(&q->guc->sched); 2550 mutex_unlock(&guc->submission_state.lock); 2551 } 2552 2553 /** 2554 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2555 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2556 */ 2557 void xe_guc_submit_pause_vf(struct xe_guc *guc) 2558 { 2559 struct xe_exec_queue *q; 2560 unsigned long index; 2561 2562 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2563 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2564 2565 mutex_lock(&guc->submission_state.lock); 2566 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2567 /* Prevent redundant attempts to stop parallel queues */ 2568 if (q->guc->id != index) 2569 continue; 2570 2571 guc_exec_queue_pause(guc, q); 2572 } 2573 mutex_unlock(&guc->submission_state.lock); 2574 } 2575 2576 static void guc_exec_queue_start(struct xe_exec_queue *q) 2577 { 2578 struct xe_gpu_scheduler *sched = &q->guc->sched; 2579 2580 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2581 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2582 int i; 2583 2584 trace_xe_exec_queue_resubmit(q); 2585 if (job) { 2586 for (i = 0; i < q->width; ++i) { 2587 /* 2588 * The GuC context is unregistered at this point 2589 * time, adjusting software ring tail ensures 2590 * jobs are rewritten in original placement, 2591 * adjusting LRC tail ensures the newly loaded 2592 * GuC / contexts only view the LRC tail 2593 * increasing as jobs are written out. 2594 */ 2595 q->lrc[i]->ring.tail = job->ptrs[i].head; 2596 xe_lrc_set_ring_tail(q->lrc[i], 2597 xe_lrc_ring_head(q->lrc[i])); 2598 } 2599 } 2600 xe_sched_resubmit_jobs(sched); 2601 } 2602 2603 xe_sched_submission_start(sched); 2604 xe_sched_submission_resume_tdr(sched); 2605 } 2606 2607 int xe_guc_submit_start(struct xe_guc *guc) 2608 { 2609 struct xe_exec_queue *q; 2610 unsigned long index; 2611 2612 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2613 2614 mutex_lock(&guc->submission_state.lock); 2615 atomic_dec(&guc->submission_state.stopped); 2616 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2617 /* Prevent redundant attempts to start parallel queues */ 2618 if (q->guc->id != index) 2619 continue; 2620 2621 guc_exec_queue_start(q); 2622 } 2623 mutex_unlock(&guc->submission_state.lock); 2624 2625 wake_up_all(&guc->ct.wq); 2626 2627 return 0; 2628 } 2629 2630 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2631 struct xe_exec_queue *q) 2632 { 2633 struct xe_gpu_scheduler *sched = &q->guc->sched; 2634 struct xe_sched_job *job = NULL; 2635 struct drm_sched_job *s_job; 2636 bool restore_replay = false; 2637 2638 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) { 2639 job = to_xe_sched_job(s_job); 2640 restore_replay |= job->restore_replay; 2641 if (restore_replay) { 2642 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2643 q->guc->id, xe_sched_job_seqno(job)); 2644 2645 q->ring_ops->emit_job(job); 2646 job->restore_replay = true; 2647 } 2648 } 2649 2650 if (job) 2651 job->last_replay = true; 2652 } 2653 2654 /** 2655 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2656 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2657 */ 2658 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2659 { 2660 struct xe_exec_queue *q; 2661 unsigned long index; 2662 2663 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2664 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2665 2666 mutex_lock(&guc->submission_state.lock); 2667 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2668 /* Prevent redundant attempts to stop parallel queues */ 2669 if (q->guc->id != index) 2670 continue; 2671 2672 guc_exec_queue_unpause_prepare(guc, q); 2673 } 2674 mutex_unlock(&guc->submission_state.lock); 2675 } 2676 2677 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2678 { 2679 struct xe_gpu_scheduler *sched = &q->guc->sched; 2680 struct xe_sched_msg *msg; 2681 2682 if (q->guc->needs_cleanup) { 2683 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2684 2685 guc_exec_queue_add_msg(q, msg, CLEANUP); 2686 q->guc->needs_cleanup = false; 2687 } 2688 2689 if (q->guc->needs_suspend) { 2690 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2691 2692 xe_sched_msg_lock(sched); 2693 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2694 xe_sched_msg_unlock(sched); 2695 2696 q->guc->needs_suspend = false; 2697 } 2698 2699 /* 2700 * The resume must be in the message queue before the suspend as it is 2701 * not possible for a resume to be issued if a suspend pending is, but 2702 * the inverse is possible. 2703 */ 2704 if (q->guc->needs_resume) { 2705 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2706 2707 xe_sched_msg_lock(sched); 2708 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2709 xe_sched_msg_unlock(sched); 2710 2711 q->guc->needs_resume = false; 2712 } 2713 } 2714 2715 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2716 { 2717 struct xe_gpu_scheduler *sched = &q->guc->sched; 2718 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2719 2720 lockdep_assert_held(&guc->submission_state.lock); 2721 2722 xe_sched_resubmit_jobs(sched); 2723 guc_exec_queue_replay_pending_state_change(q); 2724 xe_sched_submission_start(sched); 2725 if (needs_tdr) 2726 xe_guc_exec_queue_trigger_cleanup(q); 2727 xe_sched_submission_resume_tdr(sched); 2728 } 2729 2730 /** 2731 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2732 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2733 */ 2734 void xe_guc_submit_unpause(struct xe_guc *guc) 2735 { 2736 struct xe_exec_queue *q; 2737 unsigned long index; 2738 2739 mutex_lock(&guc->submission_state.lock); 2740 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2741 xe_sched_submission_start(&q->guc->sched); 2742 mutex_unlock(&guc->submission_state.lock); 2743 } 2744 2745 /** 2746 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2747 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2748 */ 2749 void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2750 { 2751 struct xe_exec_queue *q; 2752 unsigned long index; 2753 2754 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2755 2756 mutex_lock(&guc->submission_state.lock); 2757 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2758 /* 2759 * Prevent redundant attempts to stop parallel queues, or queues 2760 * created after resfix done. 2761 */ 2762 if (q->guc->id != index || 2763 !drm_sched_is_stopped(&q->guc->sched.base)) 2764 continue; 2765 2766 guc_exec_queue_unpause(guc, q); 2767 } 2768 mutex_unlock(&guc->submission_state.lock); 2769 } 2770 2771 /** 2772 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2773 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2774 */ 2775 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2776 { 2777 struct xe_exec_queue *q; 2778 unsigned long index; 2779 2780 mutex_lock(&guc->submission_state.lock); 2781 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2782 struct xe_gpu_scheduler *sched = &q->guc->sched; 2783 2784 /* Prevent redundant attempts to stop parallel queues */ 2785 if (q->guc->id != index) 2786 continue; 2787 2788 xe_sched_submission_start(sched); 2789 guc_exec_queue_kill(q); 2790 } 2791 mutex_unlock(&guc->submission_state.lock); 2792 } 2793 2794 static struct xe_exec_queue * 2795 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2796 { 2797 struct xe_gt *gt = guc_to_gt(guc); 2798 struct xe_exec_queue *q; 2799 2800 if (unlikely(guc_id >= GUC_ID_MAX)) { 2801 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2802 return NULL; 2803 } 2804 2805 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2806 if (unlikely(!q)) { 2807 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2808 return NULL; 2809 } 2810 2811 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2812 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2813 2814 return q; 2815 } 2816 2817 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2818 { 2819 u32 action[] = { 2820 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2821 q->guc->id, 2822 }; 2823 2824 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2825 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2826 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2827 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2828 2829 trace_xe_exec_queue_deregister(q); 2830 2831 if (xe_exec_queue_is_multi_queue_secondary(q)) 2832 handle_deregister_done(guc, q); 2833 else 2834 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2835 ARRAY_SIZE(action)); 2836 } 2837 2838 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2839 u32 runnable_state) 2840 { 2841 trace_xe_exec_queue_scheduling_done(q); 2842 2843 if (runnable_state == 1) { 2844 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2845 2846 q->guc->resume_time = ktime_get(); 2847 clear_exec_queue_pending_resume(q); 2848 clear_exec_queue_pending_enable(q); 2849 smp_wmb(); 2850 wake_up_all(&guc->ct.wq); 2851 } else { 2852 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2853 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2854 2855 if (q->guc->suspend_pending) { 2856 suspend_fence_signal(q); 2857 clear_exec_queue_pending_disable(q); 2858 } else { 2859 if (exec_queue_banned(q)) { 2860 smp_wmb(); 2861 wake_up_all(&guc->ct.wq); 2862 } 2863 if (exec_queue_destroyed(q)) { 2864 /* 2865 * Make sure to clear the pending_disable only 2866 * after sampling the destroyed state. We want 2867 * to ensure we don't trigger the unregister too 2868 * early with something intending to only 2869 * disable scheduling. The caller doing the 2870 * destroy must wait for an ongoing 2871 * pending_disable before marking as destroyed. 2872 */ 2873 clear_exec_queue_pending_disable(q); 2874 deregister_exec_queue(guc, q); 2875 } else { 2876 clear_exec_queue_pending_disable(q); 2877 } 2878 } 2879 } 2880 } 2881 2882 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2883 struct xe_exec_queue *q, 2884 u32 runnable_state) 2885 { 2886 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2887 mutex_lock(&guc->ct.lock); 2888 handle_sched_done(guc, q, runnable_state); 2889 mutex_unlock(&guc->ct.lock); 2890 } 2891 2892 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2893 { 2894 struct xe_exec_queue *q; 2895 u32 guc_id, runnable_state; 2896 2897 if (unlikely(len < 2)) 2898 return -EPROTO; 2899 2900 guc_id = msg[0]; 2901 runnable_state = msg[1]; 2902 2903 q = g2h_exec_queue_lookup(guc, guc_id); 2904 if (unlikely(!q)) 2905 return -EPROTO; 2906 2907 if (unlikely(!exec_queue_pending_enable(q) && 2908 !exec_queue_pending_disable(q))) { 2909 xe_gt_err(guc_to_gt(guc), 2910 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2911 atomic_read(&q->guc->state), q->guc->id, 2912 runnable_state); 2913 return -EPROTO; 2914 } 2915 2916 handle_sched_done(guc, q, runnable_state); 2917 2918 return 0; 2919 } 2920 2921 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2922 { 2923 trace_xe_exec_queue_deregister_done(q); 2924 2925 clear_exec_queue_registered(q); 2926 __guc_exec_queue_destroy(guc, q); 2927 } 2928 2929 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2930 { 2931 struct xe_exec_queue *q; 2932 u32 guc_id; 2933 2934 if (unlikely(len < 1)) 2935 return -EPROTO; 2936 2937 guc_id = msg[0]; 2938 2939 q = g2h_exec_queue_lookup(guc, guc_id); 2940 if (unlikely(!q)) 2941 return -EPROTO; 2942 2943 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2944 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2945 xe_gt_err(guc_to_gt(guc), 2946 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2947 atomic_read(&q->guc->state), q->guc->id); 2948 return -EPROTO; 2949 } 2950 2951 handle_deregister_done(guc, q); 2952 2953 return 0; 2954 } 2955 2956 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2957 { 2958 struct xe_gt *gt = guc_to_gt(guc); 2959 struct xe_exec_queue *q; 2960 u32 guc_id; 2961 2962 if (unlikely(len < 1)) 2963 return -EPROTO; 2964 2965 guc_id = msg[0]; 2966 2967 q = g2h_exec_queue_lookup(guc, guc_id); 2968 if (unlikely(!q)) 2969 return -EPROTO; 2970 2971 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", 2972 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, 2973 atomic_read(&q->guc->state)); 2974 2975 trace_xe_exec_queue_reset(q); 2976 2977 /* 2978 * A banned engine is a NOP at this point (came from 2979 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2980 * jobs by setting timeout of the job to the minimum value kicking 2981 * guc_exec_queue_timedout_job. 2982 */ 2983 xe_guc_exec_queue_reset_trigger_cleanup(q); 2984 2985 return 0; 2986 } 2987 2988 /* 2989 * xe_guc_error_capture_handler - Handler of GuC captured message 2990 * @guc: The GuC object 2991 * @msg: Point to the message 2992 * @len: The message length 2993 * 2994 * When GuC captured data is ready, GuC will send message 2995 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2996 * called 1st to check status before process the data comes with the message. 2997 * 2998 * Returns: error code. 0 if success 2999 */ 3000 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 3001 { 3002 u32 status; 3003 3004 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 3005 return -EPROTO; 3006 3007 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 3008 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 3009 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 3010 3011 xe_guc_capture_process(guc); 3012 3013 return 0; 3014 } 3015 3016 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 3017 u32 len) 3018 { 3019 struct xe_gt *gt = guc_to_gt(guc); 3020 struct xe_exec_queue *q; 3021 u32 guc_id; 3022 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 3023 3024 if (unlikely(!len || len > 2)) 3025 return -EPROTO; 3026 3027 guc_id = msg[0]; 3028 3029 if (len == 2) 3030 type = msg[1]; 3031 3032 if (guc_id == GUC_ID_UNKNOWN) { 3033 /* 3034 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 3035 * context. In such case only PF will be notified about that fault. 3036 */ 3037 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 3038 return 0; 3039 } 3040 3041 q = g2h_exec_queue_lookup(guc, guc_id); 3042 if (unlikely(!q)) 3043 return -EPROTO; 3044 3045 /* 3046 * The type is HW-defined and changes based on platform, so we don't 3047 * decode it in the kernel and only check if it is valid. 3048 * See bspec 54047 and 72187 for details. 3049 */ 3050 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 3051 xe_gt_info(gt, 3052 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 3053 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3054 else 3055 xe_gt_info(gt, 3056 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 3057 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3058 3059 trace_xe_exec_queue_memory_cat_error(q); 3060 3061 /* Treat the same as engine reset */ 3062 xe_guc_exec_queue_reset_trigger_cleanup(q); 3063 3064 return 0; 3065 } 3066 3067 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 3068 { 3069 struct xe_gt *gt = guc_to_gt(guc); 3070 u8 guc_class, instance; 3071 u32 reason; 3072 3073 if (unlikely(len != 3)) 3074 return -EPROTO; 3075 3076 guc_class = msg[0]; 3077 instance = msg[1]; 3078 reason = msg[2]; 3079 3080 /* Unexpected failure of a hardware feature, log an actual error */ 3081 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 3082 guc_class, instance, reason); 3083 3084 xe_gt_reset_async(gt); 3085 3086 return 0; 3087 } 3088 3089 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, 3090 u32 len) 3091 { 3092 struct xe_gt *gt = guc_to_gt(guc); 3093 struct xe_device *xe = guc_to_xe(guc); 3094 struct xe_exec_queue *q; 3095 u32 guc_id = msg[2]; 3096 3097 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { 3098 drm_err(&xe->drm, "Invalid length %u", len); 3099 return -EPROTO; 3100 } 3101 3102 q = g2h_exec_queue_lookup(guc, guc_id); 3103 if (unlikely(!q)) 3104 return -EPROTO; 3105 3106 xe_gt_dbg(gt, 3107 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", 3108 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); 3109 3110 trace_xe_exec_queue_cgp_context_error(q); 3111 3112 /* Treat the same as engine reset */ 3113 xe_guc_exec_queue_reset_trigger_cleanup(q); 3114 3115 return 0; 3116 } 3117 3118 /** 3119 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3120 * @guc: guc 3121 * @msg: message indicating CGP sync done 3122 * @len: length of message 3123 * 3124 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3125 * for CGP synchronization to complete. 3126 * 3127 * Return: 0 on success, -EPROTO for malformed messages. 3128 */ 3129 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3130 { 3131 struct xe_device *xe = guc_to_xe(guc); 3132 struct xe_exec_queue *q; 3133 u32 guc_id = msg[0]; 3134 3135 if (unlikely(len < 1)) { 3136 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3137 return -EPROTO; 3138 } 3139 3140 q = g2h_exec_queue_lookup(guc, guc_id); 3141 if (unlikely(!q)) 3142 return -EPROTO; 3143 3144 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3145 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3146 return -EPROTO; 3147 } 3148 3149 /* Wakeup the serialized cgp update wait */ 3150 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3151 xe_guc_ct_wake_waiters(&guc->ct); 3152 3153 return 0; 3154 } 3155 3156 static void 3157 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3158 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3159 { 3160 struct xe_guc *guc = exec_queue_to_guc(q); 3161 struct xe_device *xe = guc_to_xe(guc); 3162 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3163 int i; 3164 3165 snapshot->guc.wqi_head = q->guc->wqi_head; 3166 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3167 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3168 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3169 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3170 wq_desc.wq_status); 3171 3172 if (snapshot->parallel.wq_desc.head != 3173 snapshot->parallel.wq_desc.tail) { 3174 for (i = snapshot->parallel.wq_desc.head; 3175 i != snapshot->parallel.wq_desc.tail; 3176 i = (i + sizeof(u32)) % WQ_SIZE) 3177 snapshot->parallel.wq[i / sizeof(u32)] = 3178 parallel_read(xe, map, wq[i / sizeof(u32)]); 3179 } 3180 } 3181 3182 static void 3183 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3184 struct drm_printer *p) 3185 { 3186 int i; 3187 3188 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3189 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3190 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3191 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3192 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3193 3194 if (snapshot->parallel.wq_desc.head != 3195 snapshot->parallel.wq_desc.tail) { 3196 for (i = snapshot->parallel.wq_desc.head; 3197 i != snapshot->parallel.wq_desc.tail; 3198 i = (i + sizeof(u32)) % WQ_SIZE) 3199 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3200 snapshot->parallel.wq[i / sizeof(u32)]); 3201 } 3202 } 3203 3204 /** 3205 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3206 * @q: faulty exec queue 3207 * 3208 * This can be printed out in a later stage like during dev_coredump 3209 * analysis. 3210 * 3211 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3212 * caller, using `xe_guc_exec_queue_snapshot_free`. 3213 */ 3214 struct xe_guc_submit_exec_queue_snapshot * 3215 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3216 { 3217 struct xe_gpu_scheduler *sched = &q->guc->sched; 3218 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3219 int i; 3220 3221 snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC); 3222 3223 if (!snapshot) 3224 return NULL; 3225 3226 snapshot->guc.id = q->guc->id; 3227 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3228 snapshot->class = q->class; 3229 snapshot->logical_mask = q->logical_mask; 3230 snapshot->width = q->width; 3231 snapshot->refcount = kref_read(&q->refcount); 3232 snapshot->sched_timeout = sched->base.timeout; 3233 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3234 snapshot->sched_props.preempt_timeout_us = 3235 q->sched_props.preempt_timeout_us; 3236 3237 snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width, 3238 GFP_ATOMIC); 3239 3240 if (snapshot->lrc) { 3241 for (i = 0; i < q->width; ++i) { 3242 struct xe_lrc *lrc = q->lrc[i]; 3243 3244 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3245 } 3246 } 3247 3248 snapshot->schedule_state = atomic_read(&q->guc->state); 3249 snapshot->exec_queue_flags = q->flags; 3250 3251 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3252 if (snapshot->parallel_execution) 3253 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3254 3255 if (xe_exec_queue_is_multi_queue(q)) { 3256 snapshot->multi_queue.valid = true; 3257 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3258 snapshot->multi_queue.pos = q->multi_queue.pos; 3259 } 3260 3261 return snapshot; 3262 } 3263 3264 /** 3265 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3266 * @snapshot: Previously captured snapshot of job. 3267 * 3268 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3269 */ 3270 void 3271 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3272 { 3273 int i; 3274 3275 if (!snapshot || !snapshot->lrc) 3276 return; 3277 3278 for (i = 0; i < snapshot->width; ++i) 3279 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3280 } 3281 3282 /** 3283 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3284 * @snapshot: GuC Submit Engine snapshot object. 3285 * @p: drm_printer where it will be printed out. 3286 * 3287 * This function prints out a given GuC Submit Engine snapshot object. 3288 */ 3289 void 3290 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3291 struct drm_printer *p) 3292 { 3293 int i; 3294 3295 if (!snapshot) 3296 return; 3297 3298 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3299 drm_printf(p, "\tName: %s\n", snapshot->name); 3300 drm_printf(p, "\tClass: %d\n", snapshot->class); 3301 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3302 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3303 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3304 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3305 drm_printf(p, "\tTimeslice: %u (us)\n", 3306 snapshot->sched_props.timeslice_us); 3307 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3308 snapshot->sched_props.preempt_timeout_us); 3309 3310 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3311 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3312 3313 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3314 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3315 3316 if (snapshot->parallel_execution) 3317 guc_exec_queue_wq_snapshot_print(snapshot, p); 3318 3319 if (snapshot->multi_queue.valid) { 3320 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3321 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3322 } 3323 } 3324 3325 /** 3326 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3327 * snapshot. 3328 * @snapshot: GuC Submit Engine snapshot object. 3329 * 3330 * This function free all the memory that needed to be allocated at capture 3331 * time. 3332 */ 3333 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3334 { 3335 int i; 3336 3337 if (!snapshot) 3338 return; 3339 3340 if (snapshot->lrc) { 3341 for (i = 0; i < snapshot->width; i++) 3342 xe_lrc_snapshot_free(snapshot->lrc[i]); 3343 kfree(snapshot->lrc); 3344 } 3345 kfree(snapshot); 3346 } 3347 3348 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3349 { 3350 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3351 3352 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3353 xe_guc_exec_queue_snapshot_print(snapshot, p); 3354 xe_guc_exec_queue_snapshot_free(snapshot); 3355 } 3356 3357 /** 3358 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3359 * @q: Execution queue 3360 * @ctx_type: Type of the context 3361 * 3362 * This function registers the execution queue with the guc. Special context 3363 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3364 * are only applicable for IGPU and in the VF. 3365 * Submits the execution queue to GUC after registering it. 3366 * 3367 * Returns - None. 3368 */ 3369 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3370 { 3371 struct xe_guc *guc = exec_queue_to_guc(q); 3372 struct xe_device *xe = guc_to_xe(guc); 3373 struct xe_gt *gt = guc_to_gt(guc); 3374 3375 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3376 xe_gt_assert(gt, !IS_DGFX(xe)); 3377 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3378 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3379 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3380 3381 register_exec_queue(q, ctx_type); 3382 enable_scheduling(q); 3383 } 3384 3385 /** 3386 * xe_guc_submit_print - GuC Submit Print. 3387 * @guc: GuC. 3388 * @p: drm_printer where it will be printed out. 3389 * 3390 * This function capture and prints snapshots of **all** GuC Engines. 3391 */ 3392 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3393 { 3394 struct xe_exec_queue *q; 3395 unsigned long index; 3396 3397 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3398 return; 3399 3400 mutex_lock(&guc->submission_state.lock); 3401 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3402 guc_exec_queue_print(q, p); 3403 mutex_unlock(&guc->submission_state.lock); 3404 } 3405 3406 /** 3407 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues 3408 * registered with the GuC 3409 * @guc: GuC. 3410 * 3411 * Return: true if any MLRC queue is registered with the GuC, false otherwise. 3412 */ 3413 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) 3414 { 3415 struct xe_exec_queue *q; 3416 unsigned long index; 3417 3418 guard(mutex)(&guc->submission_state.lock); 3419 3420 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3421 if (q->width > 1) 3422 return true; 3423 3424 return false; 3425 } 3426 3427 /** 3428 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3429 * exec queues registered to given GuC. 3430 * @guc: the &xe_guc struct instance 3431 * @scratch: scratch buffer to be used as temporary storage 3432 * 3433 * Returns: zero on success, negative error code on failure. 3434 */ 3435 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3436 { 3437 struct xe_exec_queue *q; 3438 unsigned long index; 3439 int err = 0; 3440 3441 mutex_lock(&guc->submission_state.lock); 3442 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3443 /* Prevent redundant attempts to stop parallel queues */ 3444 if (q->guc->id != index) 3445 continue; 3446 3447 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3448 if (err) 3449 break; 3450 } 3451 mutex_unlock(&guc->submission_state.lock); 3452 3453 return err; 3454 } 3455