1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/dma-fence-array.h> 12 13 #include <drm/drm_managed.h> 14 15 #include "abi/guc_actions_abi.h" 16 #include "abi/guc_actions_slpc_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "xe_assert.h" 19 #include "xe_bo.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_gt_clock.h" 27 #include "xe_gt_printk.h" 28 #include "xe_guc.h" 29 #include "xe_guc_capture.h" 30 #include "xe_guc_ct.h" 31 #include "xe_guc_exec_queue_types.h" 32 #include "xe_guc_id_mgr.h" 33 #include "xe_guc_klv_helpers.h" 34 #include "xe_guc_submit_types.h" 35 #include "xe_hw_engine.h" 36 #include "xe_lrc.h" 37 #include "xe_macros.h" 38 #include "xe_map.h" 39 #include "xe_mocs.h" 40 #include "xe_pm.h" 41 #include "xe_ring_ops_types.h" 42 #include "xe_sched_job.h" 43 #include "xe_sleep.h" 44 #include "xe_trace.h" 45 #include "xe_uc_fw.h" 46 #include "xe_vm.h" 47 48 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 49 50 static int guc_submit_reset_prepare(struct xe_guc *guc); 51 52 static struct xe_guc * 53 exec_queue_to_guc(struct xe_exec_queue *q) 54 { 55 return &q->gt->uc.guc; 56 } 57 58 /* 59 * Helpers for engine state, using an atomic as some of the bits can transition 60 * as the same time (e.g. a suspend can be happning at the same time as schedule 61 * engine done being processed). 62 */ 63 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 64 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 65 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 66 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 67 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 68 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 69 #define EXEC_QUEUE_STATE_RESET (1 << 6) 70 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 71 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 72 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 73 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) 74 #define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND (1 << 11) 75 76 static bool exec_queue_registered(struct xe_exec_queue *q) 77 { 78 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 79 } 80 81 static void set_exec_queue_registered(struct xe_exec_queue *q) 82 { 83 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 84 } 85 86 static void clear_exec_queue_registered(struct xe_exec_queue *q) 87 { 88 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 89 } 90 91 static bool exec_queue_enabled(struct xe_exec_queue *q) 92 { 93 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 94 } 95 96 static void set_exec_queue_enabled(struct xe_exec_queue *q) 97 { 98 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 99 } 100 101 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 102 { 103 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 104 } 105 106 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 107 { 108 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 109 } 110 111 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 112 { 113 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 114 } 115 116 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 117 { 118 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 119 } 120 121 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 122 { 123 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 124 } 125 126 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 127 { 128 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 129 } 130 131 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 132 { 133 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 134 } 135 136 static bool exec_queue_destroyed(struct xe_exec_queue *q) 137 { 138 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 139 } 140 141 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 142 { 143 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 144 } 145 146 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 147 { 148 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 149 } 150 151 static bool exec_queue_banned(struct xe_exec_queue *q) 152 { 153 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 154 } 155 156 static void set_exec_queue_banned(struct xe_exec_queue *q) 157 { 158 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 159 } 160 161 static bool exec_queue_suspended(struct xe_exec_queue *q) 162 { 163 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 164 } 165 166 static void set_exec_queue_suspended(struct xe_exec_queue *q) 167 { 168 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 169 } 170 171 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 172 { 173 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 174 } 175 176 static bool exec_queue_reset(struct xe_exec_queue *q) 177 { 178 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 179 } 180 181 static void set_exec_queue_reset(struct xe_exec_queue *q) 182 { 183 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 184 } 185 186 static bool exec_queue_killed(struct xe_exec_queue *q) 187 { 188 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 189 } 190 191 static void set_exec_queue_killed(struct xe_exec_queue *q) 192 { 193 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 194 } 195 196 static bool exec_queue_wedged(struct xe_exec_queue *q) 197 { 198 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 199 } 200 201 static void set_exec_queue_wedged(struct xe_exec_queue *q) 202 { 203 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 204 } 205 206 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 207 { 208 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 209 } 210 211 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 212 { 213 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 214 } 215 216 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 217 { 218 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 219 } 220 221 static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 222 { 223 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND; 224 } 225 226 static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 227 { 228 atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 229 } 230 231 static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 232 { 233 atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 234 } 235 236 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 237 { 238 return (atomic_read(&q->guc->state) & 239 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 240 EXEC_QUEUE_STATE_BANNED)); 241 } 242 243 static void guc_submit_sw_fini(struct drm_device *drm, void *arg) 244 { 245 struct xe_guc *guc = arg; 246 struct xe_device *xe = guc_to_xe(guc); 247 struct xe_gt *gt = guc_to_gt(guc); 248 int ret; 249 250 ret = wait_event_timeout(guc->submission_state.fini_wq, 251 xa_empty(&guc->submission_state.exec_queue_lookup), 252 HZ * 5); 253 254 drain_workqueue(xe->destroy_wq); 255 256 xe_gt_assert(gt, ret); 257 258 xa_destroy(&guc->submission_state.exec_queue_lookup); 259 } 260 261 static void guc_submit_fini(void *arg) 262 { 263 struct xe_guc *guc = arg; 264 struct xe_exec_queue *q; 265 unsigned long index; 266 267 /* Drop any wedged queue refs */ 268 mutex_lock(&guc->submission_state.lock); 269 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 270 if (exec_queue_wedged(q)) { 271 mutex_unlock(&guc->submission_state.lock); 272 xe_exec_queue_put(q); 273 mutex_lock(&guc->submission_state.lock); 274 } 275 } 276 mutex_unlock(&guc->submission_state.lock); 277 278 /* Forcefully kill any remaining exec queues */ 279 xe_guc_ct_stop(&guc->ct); 280 guc_submit_reset_prepare(guc); 281 xe_guc_softreset(guc); 282 xe_guc_submit_stop(guc); 283 xe_uc_fw_sanitize(&guc->fw); 284 xe_guc_submit_pause_abort(guc); 285 } 286 287 static const struct xe_exec_queue_ops guc_exec_queue_ops; 288 289 static void primelockdep(struct xe_guc *guc) 290 { 291 if (!IS_ENABLED(CONFIG_LOCKDEP)) 292 return; 293 294 fs_reclaim_acquire(GFP_KERNEL); 295 296 mutex_lock(&guc->submission_state.lock); 297 mutex_unlock(&guc->submission_state.lock); 298 299 fs_reclaim_release(GFP_KERNEL); 300 } 301 302 /** 303 * xe_guc_submit_init() - Initialize GuC submission. 304 * @guc: the &xe_guc to initialize 305 * @num_ids: number of GuC context IDs to use 306 * 307 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 308 * GuC context IDs supported by the GuC firmware should be used for submission. 309 * 310 * Only VF drivers will have to provide explicit number of GuC context IDs 311 * that they can use for submission. 312 * 313 * Return: 0 on success or a negative error code on failure. 314 */ 315 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 316 { 317 struct xe_device *xe = guc_to_xe(guc); 318 struct xe_gt *gt = guc_to_gt(guc); 319 int err; 320 321 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 322 if (err) 323 return err; 324 325 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 326 if (err) 327 return err; 328 329 gt->exec_queue_ops = &guc_exec_queue_ops; 330 331 xa_init(&guc->submission_state.exec_queue_lookup); 332 333 init_waitqueue_head(&guc->submission_state.fini_wq); 334 335 primelockdep(guc); 336 337 guc->submission_state.initialized = true; 338 339 err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); 340 if (err) 341 return err; 342 343 return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); 344 } 345 346 /* 347 * Given that we want to guarantee enough RCS throughput to avoid missing 348 * frames, we set the yield policy to 20% of each 80ms interval. 349 */ 350 #define RC_YIELD_DURATION 80 /* in ms */ 351 #define RC_YIELD_RATIO 20 /* in percent */ 352 static u32 *emit_render_compute_yield_klv(u32 *emit) 353 { 354 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 355 *emit++ = RC_YIELD_DURATION; 356 *emit++ = RC_YIELD_RATIO; 357 358 return emit; 359 } 360 361 #define SCHEDULING_POLICY_MAX_DWORDS 16 362 static int guc_init_global_schedule_policy(struct xe_guc *guc) 363 { 364 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 365 u32 *emit = data; 366 u32 count = 0; 367 int ret; 368 369 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 370 return 0; 371 372 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 373 374 if (CCS_INSTANCES(guc_to_gt(guc))) 375 emit = emit_render_compute_yield_klv(emit); 376 377 count = emit - data; 378 if (count > 1) { 379 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 380 381 ret = xe_guc_ct_send_block(&guc->ct, data, count); 382 if (ret < 0) { 383 xe_gt_err(guc_to_gt(guc), 384 "failed to enable GuC scheduling policies: %pe\n", 385 ERR_PTR(ret)); 386 return ret; 387 } 388 } 389 390 return 0; 391 } 392 393 int xe_guc_submit_enable(struct xe_guc *guc) 394 { 395 int ret; 396 397 ret = guc_init_global_schedule_policy(guc); 398 if (ret) 399 return ret; 400 401 guc->submission_state.enabled = true; 402 403 return 0; 404 } 405 406 void xe_guc_submit_disable(struct xe_guc *guc) 407 { 408 guc->submission_state.enabled = false; 409 } 410 411 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 412 { 413 int i; 414 415 lockdep_assert_held(&guc->submission_state.lock); 416 417 for (i = 0; i < xa_count; ++i) 418 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 419 420 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 421 q->guc->id, q->width); 422 423 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 424 wake_up(&guc->submission_state.fini_wq); 425 } 426 427 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 428 { 429 int ret; 430 int i; 431 432 /* 433 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 434 * worse case user gets -ENOMEM on engine create and has to try again. 435 * 436 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 437 * failure. 438 */ 439 lockdep_assert_held(&guc->submission_state.lock); 440 441 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 442 q->width); 443 if (ret < 0) 444 return ret; 445 446 q->guc->id = ret; 447 448 for (i = 0; i < q->width; ++i) { 449 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 450 q->guc->id + i, q, GFP_NOWAIT)); 451 if (ret) 452 goto err_release; 453 } 454 455 return 0; 456 457 err_release: 458 __release_guc_id(guc, q, i); 459 460 return ret; 461 } 462 463 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 464 { 465 mutex_lock(&guc->submission_state.lock); 466 __release_guc_id(guc, q, q->width); 467 mutex_unlock(&guc->submission_state.lock); 468 } 469 470 struct exec_queue_policy { 471 u32 count; 472 struct guc_update_exec_queue_policy h2g; 473 }; 474 475 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 476 { 477 size_t bytes = sizeof(policy->h2g.header) + 478 (sizeof(policy->h2g.klv[0]) * policy->count); 479 480 return bytes / sizeof(u32); 481 } 482 483 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 484 u16 guc_id) 485 { 486 policy->h2g.header.action = 487 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 488 policy->h2g.header.guc_id = guc_id; 489 policy->count = 0; 490 } 491 492 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 493 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 494 u32 data) \ 495 { \ 496 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 497 \ 498 policy->h2g.klv[policy->count].kl = \ 499 FIELD_PREP(GUC_KLV_0_KEY, \ 500 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 501 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 502 policy->h2g.klv[policy->count].value = data; \ 503 policy->count++; \ 504 } 505 506 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 507 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 508 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 509 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 510 #undef MAKE_EXEC_QUEUE_POLICY_ADD 511 512 static const int xe_exec_queue_prio_to_guc[] = { 513 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 514 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 515 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 516 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 517 }; 518 519 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 520 { 521 struct exec_queue_policy policy; 522 enum xe_exec_queue_priority prio = q->sched_props.priority; 523 u32 timeslice_us = q->sched_props.timeslice_us; 524 u32 slpc_exec_queue_freq_req = 0; 525 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 526 527 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 528 !xe_exec_queue_is_multi_queue_secondary(q)); 529 530 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 531 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 532 533 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 534 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 535 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 536 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 537 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 538 slpc_exec_queue_freq_req); 539 540 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 541 __guc_exec_queue_policy_action_size(&policy), 0, 0); 542 } 543 544 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 545 { 546 struct exec_queue_policy policy; 547 548 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 549 550 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 551 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 552 553 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 554 __guc_exec_queue_policy_action_size(&policy), 0, 0); 555 } 556 557 static bool vf_recovery(struct xe_guc *guc) 558 { 559 return xe_gt_recovery_pending(guc_to_gt(guc)); 560 } 561 562 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 563 { 564 struct xe_guc *guc = exec_queue_to_guc(q); 565 struct xe_device *xe = guc_to_xe(guc); 566 567 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 568 wake_up_all(&xe->ufence_wq); 569 570 xe_sched_tdr_queue_imm(&q->guc->sched); 571 } 572 573 static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q) 574 { 575 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 576 struct xe_exec_queue_group *group = q->multi_queue.group; 577 struct xe_exec_queue *eq, *next; 578 LIST_HEAD(tmp); 579 580 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 581 xe_exec_queue_is_multi_queue(q)); 582 583 mutex_lock(&group->list_lock); 584 585 /* 586 * Stop all future queues being from executing while group is stopped. 587 */ 588 group->stopped = true; 589 590 list_for_each_entry_safe(eq, next, &group->list, multi_queue.link) 591 /* 592 * Refcount prevents an attempted removal from &group->list, 593 * temporary list allows safe iteration after dropping 594 * &group->list_lock. 595 */ 596 if (xe_exec_queue_get_unless_zero(eq)) 597 list_move_tail(&eq->multi_queue.link, &tmp); 598 599 mutex_unlock(&group->list_lock); 600 601 /* We cannot stop under list lock without getting inversions */ 602 xe_sched_submission_stop(&primary->guc->sched); 603 list_for_each_entry(eq, &tmp, multi_queue.link) 604 xe_sched_submission_stop(&eq->guc->sched); 605 606 mutex_lock(&group->list_lock); 607 list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) { 608 /* 609 * Corner where we got banned while stopping and not on 610 * &group->list 611 */ 612 if (READ_ONCE(group->banned)) 613 xe_guc_exec_queue_trigger_cleanup(eq); 614 615 list_move_tail(&eq->multi_queue.link, &group->list); 616 xe_exec_queue_put(eq); 617 } 618 mutex_unlock(&group->list_lock); 619 } 620 621 static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q) 622 { 623 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 624 struct xe_exec_queue_group *group = q->multi_queue.group; 625 struct xe_exec_queue *eq; 626 627 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 628 xe_exec_queue_is_multi_queue(q)); 629 630 xe_sched_submission_start(&primary->guc->sched); 631 632 mutex_lock(&group->list_lock); 633 group->stopped = false; 634 list_for_each_entry(eq, &group->list, multi_queue.link) 635 xe_sched_submission_start(&eq->guc->sched); 636 mutex_unlock(&group->list_lock); 637 } 638 639 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) 640 { 641 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 642 struct xe_exec_queue_group *group = q->multi_queue.group; 643 struct xe_exec_queue *eq; 644 645 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 646 xe_exec_queue_is_multi_queue(q)); 647 648 /* Group banned, skip timeout check in TDR */ 649 WRITE_ONCE(group->banned, true); 650 xe_guc_exec_queue_trigger_cleanup(primary); 651 652 mutex_lock(&group->list_lock); 653 list_for_each_entry(eq, &group->list, multi_queue.link) 654 xe_guc_exec_queue_trigger_cleanup(eq); 655 mutex_unlock(&group->list_lock); 656 } 657 658 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 659 { 660 if (xe_exec_queue_is_multi_queue(q)) { 661 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 662 struct xe_exec_queue_group *group = q->multi_queue.group; 663 struct xe_exec_queue *eq; 664 665 /* Group banned, skip timeout check in TDR */ 666 WRITE_ONCE(group->banned, true); 667 668 set_exec_queue_reset(primary); 669 if (!exec_queue_banned(primary)) 670 xe_guc_exec_queue_trigger_cleanup(primary); 671 672 mutex_lock(&group->list_lock); 673 list_for_each_entry(eq, &group->list, multi_queue.link) { 674 set_exec_queue_reset(eq); 675 if (!exec_queue_banned(eq)) 676 xe_guc_exec_queue_trigger_cleanup(eq); 677 } 678 mutex_unlock(&group->list_lock); 679 } else { 680 set_exec_queue_reset(q); 681 if (!exec_queue_banned(q)) 682 xe_guc_exec_queue_trigger_cleanup(q); 683 } 684 } 685 686 static void set_exec_queue_group_banned(struct xe_exec_queue *q) 687 { 688 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 689 struct xe_exec_queue_group *group = q->multi_queue.group; 690 struct xe_exec_queue *eq; 691 692 /* Ban all queues of the multi-queue group */ 693 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 694 xe_exec_queue_is_multi_queue(q)); 695 set_exec_queue_banned(primary); 696 697 mutex_lock(&group->list_lock); 698 list_for_each_entry(eq, &group->list, multi_queue.link) 699 set_exec_queue_banned(eq); 700 mutex_unlock(&group->list_lock); 701 } 702 703 /* Helper for context registration H2G */ 704 struct guc_ctxt_registration_info { 705 u32 flags; 706 u32 context_idx; 707 u32 engine_class; 708 u32 engine_submit_mask; 709 u32 wq_desc_lo; 710 u32 wq_desc_hi; 711 u32 wq_base_lo; 712 u32 wq_base_hi; 713 u32 wq_size; 714 u32 cgp_lo; 715 u32 cgp_hi; 716 u32 hwlrca_lo; 717 u32 hwlrca_hi; 718 }; 719 720 #define parallel_read(xe_, map_, field_) \ 721 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 722 field_) 723 #define parallel_write(xe_, map_, field_, val_) \ 724 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 725 field_, val_) 726 727 /** 728 * DOC: Multi Queue Group GuC interface 729 * 730 * The multi queue group coordination between KMD and GuC is through a software 731 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page 732 * allocated in the global GTT. 733 * 734 * CGP format: 735 * 736 * +-----------+---------------------------+---------------------------------------------+ 737 * | DWORD | Name | Description | 738 * +-----------+---------------------------+---------------------------------------------+ 739 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | 740 * +-----------+---------------------------+---------------------------------------------+ 741 * | 1..15 | RESERVED | MBZ | 742 * +-----------+---------------------------+---------------------------------------------+ 743 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | 744 * +-----------+---------------------------+---------------------------------------------+ 745 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | 746 * +-----------+---------------------------+---------------------------------------------+ 747 * | 18..31 | RESERVED | MBZ | 748 * +-----------+---------------------------+---------------------------------------------+ 749 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | 750 * +-----------+---------------------------+---------------------------------------------+ 751 * | 33 | Q0ContextIndex | Context ID for Queue 0 | 752 * +-----------+---------------------------+---------------------------------------------+ 753 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | 754 * +-----------+---------------------------+---------------------------------------------+ 755 * | 35 | Q1ContextIndex | Context ID for Queue 1 | 756 * +-----------+---------------------------+---------------------------------------------+ 757 * | ... |... | ... | 758 * +-----------+---------------------------+---------------------------------------------+ 759 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | 760 * +-----------+---------------------------+---------------------------------------------+ 761 * | 159 | Q63ContextIndex | Context ID for Queue 63 | 762 * +-----------+---------------------------+---------------------------------------------+ 763 * | 160..1024 | RESERVED | MBZ | 764 * +-----------+---------------------------+---------------------------------------------+ 765 * 766 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified 767 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies 768 * the CGP address. When the secondary queues are added to the group, the CGP is 769 * updated with entry for that queue and GuC is notified through the H2G interface 770 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages 771 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also 772 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any 773 * error in the CGP. Only one of these CGP update messages can be outstanding 774 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* 775 * fields indicate which queue entry is being updated in the CGP. 776 * 777 * The primary queue (Q0) represents the multi queue group context in GuC and 778 * submission on any queue of the group must be through Q0 GuC interface only. 779 * 780 * As it is not required to register secondary queues with GuC, the secondary queue 781 * context ids in the CGP are populated with Q0 context id. 782 */ 783 784 #define CGP_VERSION_MAJOR_SHIFT 8 785 786 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 787 struct xe_exec_queue *q) 788 { 789 struct xe_exec_queue_group *group = q->multi_queue.group; 790 u32 guc_id = group->primary->guc->id; 791 792 /* Currently implementing CGP version 1.0 */ 793 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 794 1 << CGP_VERSION_MAJOR_SHIFT); 795 796 xe_map_wr(xe, &group->cgp_bo->vmap, 797 (32 + q->multi_queue.pos * 2) * sizeof(u32), 798 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 799 800 xe_map_wr(xe, &group->cgp_bo->vmap, 801 (33 + q->multi_queue.pos * 2) * sizeof(u32), 802 u32, guc_id); 803 804 if (q->multi_queue.pos / 32) { 805 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 806 u32, BIT(q->multi_queue.pos % 32)); 807 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 808 } else { 809 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 810 u32, BIT(q->multi_queue.pos)); 811 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 812 } 813 } 814 815 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 816 struct xe_exec_queue *q, 817 const u32 *action, u32 len) 818 { 819 struct xe_exec_queue_group *group = q->multi_queue.group; 820 struct xe_device *xe = guc_to_xe(guc); 821 enum xe_multi_queue_priority priority; 822 long ret; 823 824 /* 825 * As all queues of a multi queue group use single drm scheduler 826 * submit workqueue, CGP synchronization with GuC are serialized. 827 * Hence, no locking is required here. 828 * Wait for any pending CGP_SYNC_DONE response before updating the 829 * CGP page and sending CGP_SYNC message. 830 * 831 * FIXME: Support VF migration 832 */ 833 ret = wait_event_timeout(guc->ct.wq, 834 !READ_ONCE(group->sync_pending) || 835 xe_guc_read_stopped(guc), HZ); 836 if (!ret || xe_guc_read_stopped(guc)) { 837 /* CGP_SYNC failed. Reset gt, cleanup the group */ 838 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 839 set_exec_queue_group_banned(q); 840 xe_gt_reset_async(q->gt); 841 xe_guc_exec_queue_group_trigger_cleanup(q); 842 return; 843 } 844 845 scoped_guard(spinlock, &q->multi_queue.lock) 846 priority = q->multi_queue.priority; 847 848 xe_lrc_set_multi_queue_priority(q->lrc[0], priority); 849 xe_guc_exec_queue_group_cgp_update(xe, q); 850 851 WRITE_ONCE(group->sync_pending, true); 852 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 853 } 854 855 static void guc_exec_queue_send_cgp_sync(struct xe_exec_queue *q) 856 { 857 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 858 struct xe_guc *guc = exec_queue_to_guc(q); 859 struct xe_exec_queue_group *group = q->multi_queue.group; 860 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 861 int len = 0; 862 863 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 864 action[len++] = group->primary->guc->id; 865 866 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 867 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 868 869 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 870 } 871 872 static void __register_exec_queue_group(struct xe_exec_queue *q, 873 struct guc_ctxt_registration_info *info) 874 { 875 struct xe_guc *guc = exec_queue_to_guc(q); 876 #define MAX_MULTI_QUEUE_REG_SIZE (8) 877 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 878 int len = 0; 879 880 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 881 action[len++] = info->flags; 882 action[len++] = info->context_idx; 883 action[len++] = info->engine_class; 884 action[len++] = info->engine_submit_mask; 885 action[len++] = 0; /* Reserved */ 886 action[len++] = info->cgp_lo; 887 action[len++] = info->cgp_hi; 888 889 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 890 #undef MAX_MULTI_QUEUE_REG_SIZE 891 892 /* 893 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 894 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 895 * from guc. 896 */ 897 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 898 } 899 900 static void __register_mlrc_exec_queue(struct xe_guc *guc, 901 struct xe_exec_queue *q, 902 struct guc_ctxt_registration_info *info) 903 { 904 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 905 u32 action[MAX_MLRC_REG_SIZE]; 906 int len = 0; 907 int i; 908 909 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 910 911 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 912 action[len++] = info->flags; 913 action[len++] = info->context_idx; 914 action[len++] = info->engine_class; 915 action[len++] = info->engine_submit_mask; 916 action[len++] = info->wq_desc_lo; 917 action[len++] = info->wq_desc_hi; 918 action[len++] = info->wq_base_lo; 919 action[len++] = info->wq_base_hi; 920 action[len++] = info->wq_size; 921 action[len++] = q->width; 922 action[len++] = info->hwlrca_lo; 923 action[len++] = info->hwlrca_hi; 924 925 for (i = 1; i < q->width; ++i) { 926 struct xe_lrc *lrc = q->lrc[i]; 927 928 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 929 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 930 } 931 932 /* explicitly checks some fields that we might fixup later */ 933 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 934 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 935 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 936 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 937 xe_gt_assert(guc_to_gt(guc), q->width == 938 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 939 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 940 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 941 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 942 #undef MAX_MLRC_REG_SIZE 943 944 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 945 } 946 947 static void __register_exec_queue(struct xe_guc *guc, 948 struct guc_ctxt_registration_info *info) 949 { 950 u32 action[] = { 951 XE_GUC_ACTION_REGISTER_CONTEXT, 952 info->flags, 953 info->context_idx, 954 info->engine_class, 955 info->engine_submit_mask, 956 info->wq_desc_lo, 957 info->wq_desc_hi, 958 info->wq_base_lo, 959 info->wq_base_hi, 960 info->wq_size, 961 info->hwlrca_lo, 962 info->hwlrca_hi, 963 }; 964 965 /* explicitly checks some fields that we might fixup later */ 966 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 967 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 968 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 969 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 970 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 971 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 972 973 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 974 } 975 976 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 977 { 978 struct xe_guc *guc = exec_queue_to_guc(q); 979 struct xe_device *xe = guc_to_xe(guc); 980 struct xe_lrc *lrc = q->lrc[0]; 981 struct guc_ctxt_registration_info info; 982 983 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 984 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 985 986 memset(&info, 0, sizeof(info)); 987 info.context_idx = q->guc->id; 988 info.engine_class = xe_engine_class_to_guc_class(q->class); 989 info.engine_submit_mask = q->logical_mask; 990 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 991 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 992 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 993 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 994 995 if (xe_exec_queue_is_multi_queue(q)) { 996 struct xe_exec_queue_group *group = q->multi_queue.group; 997 998 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 999 info.cgp_hi = 0; 1000 } 1001 1002 if (xe_exec_queue_is_parallel(q)) { 1003 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 1004 struct iosys_map map = xe_lrc_parallel_map(lrc); 1005 1006 info.wq_desc_lo = lower_32_bits(ggtt_addr + 1007 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1008 info.wq_desc_hi = upper_32_bits(ggtt_addr + 1009 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1010 info.wq_base_lo = lower_32_bits(ggtt_addr + 1011 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1012 info.wq_base_hi = upper_32_bits(ggtt_addr + 1013 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1014 info.wq_size = WQ_SIZE; 1015 1016 q->guc->wqi_head = 0; 1017 q->guc->wqi_tail = 0; 1018 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 1019 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 1020 } 1021 1022 set_exec_queue_registered(q); 1023 trace_xe_exec_queue_register(q); 1024 if (xe_exec_queue_is_multi_queue_primary(q)) 1025 __register_exec_queue_group(q, &info); 1026 else if (xe_exec_queue_is_parallel(q)) 1027 __register_mlrc_exec_queue(guc, q, &info); 1028 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 1029 __register_exec_queue(guc, &info); 1030 1031 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1032 init_policies(guc, q); 1033 1034 if (xe_exec_queue_is_multi_queue_secondary(q)) 1035 guc_exec_queue_send_cgp_sync(q); 1036 } 1037 1038 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 1039 { 1040 return (WQ_SIZE - q->guc->wqi_tail); 1041 } 1042 1043 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 1044 { 1045 struct xe_guc *guc = exec_queue_to_guc(q); 1046 struct xe_device *xe = guc_to_xe(guc); 1047 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1048 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 1049 1050 #define AVAILABLE_SPACE \ 1051 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 1052 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1053 try_again: 1054 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 1055 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1056 if (sleep_total_ms > 2000) { 1057 xe_gt_reset_async(q->gt); 1058 return -ENODEV; 1059 } 1060 1061 sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64); 1062 goto try_again; 1063 } 1064 } 1065 #undef AVAILABLE_SPACE 1066 1067 return 0; 1068 } 1069 1070 static int wq_noop_append(struct xe_exec_queue *q) 1071 { 1072 struct xe_guc *guc = exec_queue_to_guc(q); 1073 struct xe_device *xe = guc_to_xe(guc); 1074 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1075 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 1076 1077 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 1078 return -ENODEV; 1079 1080 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 1081 1082 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 1083 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 1084 FIELD_PREP(WQ_LEN_MASK, len_dw)); 1085 q->guc->wqi_tail = 0; 1086 1087 return 0; 1088 } 1089 1090 static void wq_item_append(struct xe_exec_queue *q) 1091 { 1092 struct xe_guc *guc = exec_queue_to_guc(q); 1093 struct xe_device *xe = guc_to_xe(guc); 1094 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1095 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 1096 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 1097 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 1098 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 1099 int i = 0, j; 1100 1101 if (wqi_size > wq_space_until_wrap(q)) { 1102 if (wq_noop_append(q)) 1103 return; 1104 } 1105 if (wq_wait_for_space(q, wqi_size)) 1106 return; 1107 1108 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 1109 FIELD_PREP(WQ_LEN_MASK, len_dw); 1110 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 1111 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 1112 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 1113 wqi[i++] = 0; 1114 for (j = 1; j < q->width; ++j) { 1115 struct xe_lrc *lrc = q->lrc[j]; 1116 1117 wqi[i++] = lrc->ring.tail / sizeof(u64); 1118 } 1119 1120 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 1121 1122 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 1123 wq[q->guc->wqi_tail / sizeof(u32)])); 1124 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 1125 q->guc->wqi_tail += wqi_size; 1126 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 1127 1128 xe_device_wmb(xe); 1129 1130 map = xe_lrc_parallel_map(q->lrc[0]); 1131 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 1132 } 1133 1134 #define RESUME_PENDING ~0x0ull 1135 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 1136 { 1137 struct xe_guc *guc = exec_queue_to_guc(q); 1138 struct xe_lrc *lrc = q->lrc[0]; 1139 u32 action[3]; 1140 u32 g2h_len = 0; 1141 u32 num_g2h = 0; 1142 int len = 0; 1143 bool extra_submit = false; 1144 1145 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1146 1147 if (!job->restore_replay || job->last_replay) { 1148 if (xe_exec_queue_is_parallel(q)) 1149 wq_item_append(q); 1150 else if (!exec_queue_idle_skip_suspend(q)) 1151 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1152 job->last_replay = false; 1153 } 1154 1155 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1156 return; 1157 1158 /* 1159 * All queues in a multi-queue group will use the primary queue 1160 * of the group to interface with GuC. 1161 */ 1162 q = xe_exec_queue_multi_queue_primary(q); 1163 1164 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1165 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1166 action[len++] = q->guc->id; 1167 action[len++] = GUC_CONTEXT_ENABLE; 1168 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1169 num_g2h = 1; 1170 if (xe_exec_queue_is_parallel(q)) 1171 extra_submit = true; 1172 1173 q->guc->resume_time = RESUME_PENDING; 1174 set_exec_queue_pending_enable(q); 1175 set_exec_queue_enabled(q); 1176 trace_xe_exec_queue_scheduling_enable(q); 1177 } else { 1178 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1179 action[len++] = q->guc->id; 1180 trace_xe_exec_queue_submit(q); 1181 } 1182 1183 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1184 1185 if (extra_submit) { 1186 len = 0; 1187 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1188 action[len++] = q->guc->id; 1189 trace_xe_exec_queue_submit(q); 1190 1191 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1192 } 1193 } 1194 1195 static struct dma_fence * 1196 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1197 { 1198 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1199 struct xe_exec_queue *q = job->q; 1200 struct xe_guc *guc = exec_queue_to_guc(q); 1201 bool killed_or_banned_or_wedged = 1202 exec_queue_killed_or_banned_or_wedged(q); 1203 1204 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1205 exec_queue_banned(q) || exec_queue_suspended(q)); 1206 1207 trace_xe_sched_job_run(job); 1208 1209 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1210 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1211 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1212 1213 if (exec_queue_killed_or_banned_or_wedged(primary)) 1214 goto run_job_out; 1215 1216 if (!exec_queue_registered(primary)) 1217 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1218 } 1219 1220 if (!exec_queue_registered(q)) 1221 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1222 if (!job->restore_replay) 1223 q->ring_ops->emit_job(job); 1224 submit_exec_queue(q, job); 1225 job->restore_replay = false; 1226 } 1227 1228 run_job_out: 1229 1230 return job->fence; 1231 } 1232 1233 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1234 { 1235 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1236 1237 trace_xe_sched_job_free(job); 1238 xe_sched_job_put(job); 1239 } 1240 1241 int xe_guc_read_stopped(struct xe_guc *guc) 1242 { 1243 return atomic_read(&guc->submission_state.stopped); 1244 } 1245 1246 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1247 struct xe_exec_queue *q, 1248 u32 runnable_state); 1249 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1250 1251 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1252 u32 action[] = { \ 1253 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1254 q->guc->id, \ 1255 GUC_CONTEXT_##enable_disable, \ 1256 } 1257 1258 static void disable_scheduling_deregister(struct xe_guc *guc, 1259 struct xe_exec_queue *q) 1260 { 1261 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1262 int ret; 1263 1264 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1265 set_min_preemption_timeout(guc, q); 1266 1267 smp_rmb(); 1268 ret = wait_event_timeout(guc->ct.wq, 1269 (!exec_queue_pending_enable(q) && 1270 !exec_queue_pending_disable(q)) || 1271 xe_guc_read_stopped(guc) || 1272 vf_recovery(guc), 1273 HZ * 5); 1274 if (!ret && !vf_recovery(guc)) { 1275 struct xe_gpu_scheduler *sched = &q->guc->sched; 1276 1277 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1278 xe_sched_submission_start(sched); 1279 xe_gt_reset_async(q->gt); 1280 xe_sched_tdr_queue_imm(sched); 1281 return; 1282 } 1283 1284 clear_exec_queue_enabled(q); 1285 set_exec_queue_pending_disable(q); 1286 set_exec_queue_destroyed(q); 1287 trace_xe_exec_queue_scheduling_disable(q); 1288 1289 /* 1290 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1291 * handler and we are not allowed to reserved G2H space in handlers. 1292 */ 1293 if (xe_exec_queue_is_multi_queue_secondary(q)) 1294 handle_multi_queue_secondary_sched_done(guc, q, 0); 1295 else 1296 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1297 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1298 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1299 } 1300 1301 /** 1302 * xe_guc_submit_wedge() - Wedge GuC submission 1303 * @guc: the GuC object 1304 * 1305 * Save exec queue's registered with GuC state by taking a ref to each queue. 1306 * Register a DRMM handler to drop refs upon driver unload. 1307 */ 1308 void xe_guc_submit_wedge(struct xe_guc *guc) 1309 { 1310 struct xe_device *xe = guc_to_xe(guc); 1311 struct xe_exec_queue *q; 1312 unsigned long index; 1313 1314 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1315 1316 /* 1317 * If device is being wedged even before submission_state is 1318 * initialized, there's nothing to do here. 1319 */ 1320 if (!guc->submission_state.initialized) 1321 return; 1322 1323 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { 1324 mutex_lock(&guc->submission_state.lock); 1325 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1326 if (xe_exec_queue_get_unless_zero(q)) 1327 set_exec_queue_wedged(q); 1328 mutex_unlock(&guc->submission_state.lock); 1329 } else { 1330 /* Forcefully kill any remaining exec queues, signal fences */ 1331 guc_submit_reset_prepare(guc); 1332 xe_guc_submit_stop(guc); 1333 xe_guc_softreset(guc); 1334 xe_uc_fw_sanitize(&guc->fw); 1335 xe_guc_submit_pause_abort(guc); 1336 } 1337 } 1338 1339 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1340 { 1341 struct xe_device *xe = guc_to_xe(guc); 1342 1343 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 1344 return false; 1345 1346 if (xe_device_wedged(xe)) 1347 return true; 1348 1349 xe_device_declare_wedged(xe); 1350 1351 return true; 1352 } 1353 1354 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1355 1356 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1357 { 1358 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1359 u32 ctx_timestamp, ctx_job_timestamp; 1360 u32 timeout_ms = q->sched_props.job_timeout_ms; 1361 u32 diff; 1362 u64 running_time_ms; 1363 1364 if (!xe_sched_job_started(job)) { 1365 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1366 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1367 q->guc->id); 1368 1369 return xe_sched_invalidate_job(job, 2); 1370 } 1371 1372 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); 1373 if (ctx_timestamp == job->sample_timestamp) { 1374 if (IS_SRIOV_VF(gt_to_xe(gt))) 1375 xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1376 xe_sched_job_seqno(job), 1377 xe_sched_job_lrc_seqno(job), q->guc->id); 1378 else 1379 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1380 xe_sched_job_seqno(job), 1381 xe_sched_job_lrc_seqno(job), q->guc->id); 1382 1383 return xe_sched_invalidate_job(job, 0); 1384 } 1385 1386 job->sample_timestamp = ctx_timestamp; 1387 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1388 1389 /* 1390 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1391 * possible overflows with a high timeout. 1392 */ 1393 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1394 1395 diff = ctx_timestamp - ctx_job_timestamp; 1396 1397 /* 1398 * Ensure timeout is within 5% to account for an GuC scheduling latency 1399 */ 1400 running_time_ms = 1401 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1402 1403 xe_gt_dbg(gt, 1404 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1405 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1406 q->guc->id, running_time_ms, timeout_ms, diff); 1407 1408 return running_time_ms >= timeout_ms; 1409 } 1410 1411 static void enable_scheduling(struct xe_exec_queue *q) 1412 { 1413 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1414 struct xe_guc *guc = exec_queue_to_guc(q); 1415 int ret; 1416 1417 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1418 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1419 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1420 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1421 1422 set_exec_queue_pending_enable(q); 1423 set_exec_queue_enabled(q); 1424 trace_xe_exec_queue_scheduling_enable(q); 1425 1426 if (xe_exec_queue_is_multi_queue_secondary(q)) 1427 handle_multi_queue_secondary_sched_done(guc, q, 1); 1428 else 1429 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1430 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1431 1432 ret = wait_event_timeout(guc->ct.wq, 1433 !exec_queue_pending_enable(q) || 1434 xe_guc_read_stopped(guc) || 1435 vf_recovery(guc), HZ * 5); 1436 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1437 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1438 set_exec_queue_banned(q); 1439 xe_gt_reset_async(q->gt); 1440 xe_sched_tdr_queue_imm(&q->guc->sched); 1441 } 1442 } 1443 1444 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1445 { 1446 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1447 struct xe_guc *guc = exec_queue_to_guc(q); 1448 1449 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1450 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1451 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1452 1453 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1454 set_min_preemption_timeout(guc, q); 1455 clear_exec_queue_enabled(q); 1456 set_exec_queue_pending_disable(q); 1457 trace_xe_exec_queue_scheduling_disable(q); 1458 1459 if (xe_exec_queue_is_multi_queue_secondary(q)) 1460 handle_multi_queue_secondary_sched_done(guc, q, 0); 1461 else 1462 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1463 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1464 } 1465 1466 static enum drm_gpu_sched_stat 1467 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1468 { 1469 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1470 struct drm_sched_job *tmp_job; 1471 struct xe_exec_queue *q = job->q, *primary; 1472 struct xe_gpu_scheduler *sched = &q->guc->sched; 1473 struct xe_guc *guc = exec_queue_to_guc(q); 1474 const char *process_name = "no process"; 1475 struct xe_device *xe = guc_to_xe(guc); 1476 int err = -ETIME; 1477 pid_t pid = -1; 1478 bool wedged = false, skip_timeout_check; 1479 1480 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1481 1482 primary = xe_exec_queue_multi_queue_primary(q); 1483 1484 /* 1485 * TDR has fired before free job worker. Common if exec queue 1486 * immediately closed after last fence signaled. Add back to pending 1487 * list so job can be freed and kick scheduler ensuring free job is not 1488 * lost. 1489 */ 1490 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1491 vf_recovery(guc)) 1492 return DRM_GPU_SCHED_STAT_NO_HANG; 1493 1494 /* Kill the run_job entry point */ 1495 if (xe_exec_queue_is_multi_queue(q)) 1496 xe_guc_exec_queue_group_stop(q); 1497 else 1498 xe_sched_submission_stop(sched); 1499 1500 /* Must check all state after stopping scheduler */ 1501 skip_timeout_check = exec_queue_reset(q) || 1502 exec_queue_killed_or_banned_or_wedged(q); 1503 1504 /* Skip timeout check if multi-queue group is banned */ 1505 if (xe_exec_queue_is_multi_queue(q) && 1506 READ_ONCE(q->multi_queue.group->banned)) 1507 skip_timeout_check = true; 1508 1509 /* LR jobs can only get here if queue has been killed or hit an error */ 1510 if (xe_exec_queue_is_lr(q)) 1511 xe_gt_assert(guc_to_gt(guc), skip_timeout_check); 1512 1513 /* 1514 * If devcoredump not captured and GuC capture for the job is not ready 1515 * do manual capture first and decide later if we need to use it 1516 */ 1517 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1518 !xe_guc_capture_get_matching_and_lock(q)) { 1519 /* take force wake before engine register manual capture */ 1520 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1521 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1522 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1523 1524 xe_engine_snapshot_capture_for_queue(q); 1525 } 1526 1527 /* 1528 * Check if job is actually timed out, if so restart job execution and TDR 1529 */ 1530 if (!skip_timeout_check && !check_timeout(q, job)) 1531 goto rearm; 1532 1533 if (!exec_queue_killed(q)) 1534 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1535 1536 set_exec_queue_banned(q); 1537 1538 /* Kick job / queue off hardware */ 1539 if (!wedged && (exec_queue_enabled(primary) || 1540 exec_queue_pending_disable(primary))) { 1541 int ret; 1542 1543 if (exec_queue_reset(primary)) 1544 err = -EIO; 1545 1546 if (xe_uc_fw_is_running(&guc->fw)) { 1547 /* 1548 * Wait for any pending G2H to flush out before 1549 * modifying state 1550 */ 1551 ret = wait_event_timeout(guc->ct.wq, 1552 (!exec_queue_pending_enable(primary) && 1553 !exec_queue_pending_disable(primary)) || 1554 xe_guc_read_stopped(guc) || 1555 vf_recovery(guc), HZ * 5); 1556 if (vf_recovery(guc)) 1557 goto handle_vf_resume; 1558 if (!ret || xe_guc_read_stopped(guc)) 1559 goto trigger_reset; 1560 1561 disable_scheduling(primary, skip_timeout_check); 1562 } 1563 1564 /* 1565 * Must wait for scheduling to be disabled before signalling 1566 * any fences, if GT broken the GT reset code should signal us. 1567 * 1568 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1569 * error) messages which can cause the schedule disable to get 1570 * lost. If this occurs, trigger a GT reset to recover. 1571 */ 1572 smp_rmb(); 1573 ret = wait_event_timeout(guc->ct.wq, 1574 !xe_uc_fw_is_running(&guc->fw) || 1575 !exec_queue_pending_disable(primary) || 1576 xe_guc_read_stopped(guc) || 1577 vf_recovery(guc), HZ * 5); 1578 if (vf_recovery(guc)) 1579 goto handle_vf_resume; 1580 if (!ret || xe_guc_read_stopped(guc)) { 1581 trigger_reset: 1582 if (!ret) 1583 xe_gt_warn(guc_to_gt(guc), 1584 "Schedule disable failed to respond, guc_id=%d", 1585 primary->guc->id); 1586 xe_devcoredump(primary, job, 1587 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1588 primary->guc->id, ret, xe_guc_read_stopped(guc)); 1589 xe_gt_reset_async(primary->gt); 1590 xe_sched_tdr_queue_imm(sched); 1591 goto rearm; 1592 } 1593 } 1594 1595 if (q->vm && q->vm->xef) { 1596 process_name = q->vm->xef->process_name; 1597 pid = q->vm->xef->pid; 1598 } 1599 1600 if (!exec_queue_killed(q)) 1601 xe_gt_notice(guc_to_gt(guc), 1602 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1603 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1604 q->guc->id, q->flags, process_name, pid); 1605 1606 trace_xe_sched_job_timedout(job); 1607 1608 if (!exec_queue_killed(q)) 1609 xe_devcoredump(q, job, 1610 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1611 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1612 q->guc->id, q->flags); 1613 1614 /* 1615 * Kernel jobs should never fail, nor should VM jobs if they do 1616 * somethings has gone wrong and the GT needs a reset 1617 */ 1618 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1619 "Kernel-submitted job timed out\n"); 1620 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1621 "VM job timed out on non-killed execqueue\n"); 1622 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1623 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1624 if (!xe_sched_invalidate_job(job, 2)) { 1625 xe_gt_reset_async(q->gt); 1626 goto rearm; 1627 } 1628 } 1629 1630 /* Mark all outstanding jobs as bad, thus completing them */ 1631 xe_sched_job_set_error(job, err); 1632 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) 1633 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); 1634 1635 if (xe_exec_queue_is_multi_queue(q)) { 1636 xe_guc_exec_queue_group_start(q); 1637 xe_guc_exec_queue_group_trigger_cleanup(q); 1638 } else { 1639 xe_sched_submission_start(sched); 1640 xe_guc_exec_queue_trigger_cleanup(q); 1641 } 1642 1643 /* 1644 * We want the job added back to the pending list so it gets freed; this 1645 * is what DRM_GPU_SCHED_STAT_NO_HANG does. 1646 */ 1647 return DRM_GPU_SCHED_STAT_NO_HANG; 1648 1649 rearm: 1650 /* 1651 * XXX: Ideally want to adjust timeout based on current execution time 1652 * but there is not currently an easy way to do in DRM scheduler. With 1653 * some thought, do this in a follow up. 1654 */ 1655 if (xe_exec_queue_is_multi_queue(q)) 1656 xe_guc_exec_queue_group_start(q); 1657 else 1658 xe_sched_submission_start(sched); 1659 handle_vf_resume: 1660 return DRM_GPU_SCHED_STAT_NO_HANG; 1661 } 1662 1663 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1664 { 1665 struct xe_guc_exec_queue *ge = q->guc; 1666 struct xe_guc *guc = exec_queue_to_guc(q); 1667 1668 release_guc_id(guc, q); 1669 xe_sched_entity_fini(&ge->entity); 1670 xe_sched_fini(&ge->sched); 1671 1672 /* 1673 * RCU free due sched being exported via DRM scheduler fences 1674 * (timeline name). 1675 */ 1676 kfree_rcu(ge, rcu); 1677 } 1678 1679 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1680 { 1681 struct xe_guc_exec_queue *ge = 1682 container_of(w, struct xe_guc_exec_queue, destroy_async); 1683 struct xe_exec_queue *q = ge->q; 1684 struct xe_guc *guc = exec_queue_to_guc(q); 1685 1686 guard(xe_pm_runtime)(guc_to_xe(guc)); 1687 trace_xe_exec_queue_destroy(q); 1688 1689 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1690 struct xe_exec_queue_group *group = q->multi_queue.group; 1691 1692 mutex_lock(&group->list_lock); 1693 list_del(&q->multi_queue.link); 1694 mutex_unlock(&group->list_lock); 1695 } 1696 1697 /* Confirm no work left behind accessing device structures */ 1698 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1699 1700 xe_exec_queue_fini(q); 1701 } 1702 1703 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1704 { 1705 struct xe_guc *guc = exec_queue_to_guc(q); 1706 struct xe_device *xe = guc_to_xe(guc); 1707 1708 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1709 1710 /* We must block on kernel engines so slabs are empty on driver unload */ 1711 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1712 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1713 else 1714 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1715 } 1716 1717 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1718 { 1719 /* 1720 * Might be done from within the GPU scheduler, need to do async as we 1721 * fini the scheduler when the engine is fini'd, the scheduler can't 1722 * complete fini within itself (circular dependency). Async resolves 1723 * this we and don't really care when everything is fini'd, just that it 1724 * is. 1725 */ 1726 guc_exec_queue_destroy_async(q); 1727 } 1728 1729 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1730 { 1731 struct xe_exec_queue *q = msg->private_data; 1732 struct xe_guc *guc = exec_queue_to_guc(q); 1733 1734 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1735 trace_xe_exec_queue_cleanup_entity(q); 1736 1737 /* 1738 * Expected state transitions for cleanup: 1739 * - If the exec queue is registered and GuC firmware is running, we must first 1740 * disable scheduling and deregister the queue to ensure proper teardown and 1741 * resource release in the GuC, then destroy the exec queue on driver side. 1742 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1743 * we cannot expect a response for the deregister request. In this case, 1744 * it is safe to directly destroy the exec queue on driver side, as the GuC 1745 * will not process further requests and all resources must be cleaned up locally. 1746 */ 1747 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1748 disable_scheduling_deregister(guc, q); 1749 else 1750 __guc_exec_queue_destroy(guc, q); 1751 } 1752 1753 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1754 { 1755 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1756 } 1757 1758 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1759 { 1760 struct xe_exec_queue *q = msg->private_data; 1761 struct xe_guc *guc = exec_queue_to_guc(q); 1762 1763 if (guc_exec_queue_allowed_to_change_state(q)) 1764 init_policies(guc, q); 1765 kfree(msg); 1766 } 1767 1768 static void __suspend_fence_signal(struct xe_exec_queue *q) 1769 { 1770 struct xe_guc *guc = exec_queue_to_guc(q); 1771 struct xe_device *xe = guc_to_xe(guc); 1772 1773 if (!q->guc->suspend_pending) 1774 return; 1775 1776 WRITE_ONCE(q->guc->suspend_pending, false); 1777 1778 /* 1779 * We use a GuC shared wait queue for VFs because the VF resfix start 1780 * interrupt must be able to wake all instances of suspend_wait. This 1781 * prevents the VF migration worker from being starved during 1782 * scheduling. 1783 */ 1784 if (IS_SRIOV_VF(xe)) 1785 wake_up_all(&guc->ct.wq); 1786 else 1787 wake_up(&q->guc->suspend_wait); 1788 } 1789 1790 static void suspend_fence_signal(struct xe_exec_queue *q) 1791 { 1792 struct xe_guc *guc = exec_queue_to_guc(q); 1793 1794 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1795 xe_guc_read_stopped(guc)); 1796 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1797 1798 __suspend_fence_signal(q); 1799 } 1800 1801 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1802 { 1803 struct xe_exec_queue *q = msg->private_data; 1804 struct xe_guc *guc = exec_queue_to_guc(q); 1805 bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q); 1806 1807 if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) && 1808 !exec_queue_suspended(q) && exec_queue_enabled(q)) { 1809 wait_event(guc->ct.wq, vf_recovery(guc) || 1810 ((q->guc->resume_time != RESUME_PENDING || 1811 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1812 1813 if (!xe_guc_read_stopped(guc)) { 1814 s64 since_resume_ms = 1815 ktime_ms_delta(ktime_get(), 1816 q->guc->resume_time); 1817 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1818 since_resume_ms; 1819 1820 if (wait_ms > 0 && q->guc->resume_time) 1821 xe_sleep_relaxed_ms(wait_ms); 1822 1823 set_exec_queue_suspended(q); 1824 disable_scheduling(q, false); 1825 } 1826 } else if (q->guc->suspend_pending) { 1827 if (idle_skip_suspend) 1828 set_exec_queue_idle_skip_suspend(q); 1829 set_exec_queue_suspended(q); 1830 suspend_fence_signal(q); 1831 } 1832 } 1833 1834 static void sched_context(struct xe_exec_queue *q) 1835 { 1836 struct xe_guc *guc = exec_queue_to_guc(q); 1837 struct xe_lrc *lrc = q->lrc[0]; 1838 u32 action[] = { 1839 XE_GUC_ACTION_SCHED_CONTEXT, 1840 q->guc->id, 1841 }; 1842 1843 xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q)); 1844 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1845 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1846 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1847 1848 trace_xe_exec_queue_submit(q); 1849 1850 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1851 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 1852 } 1853 1854 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1855 { 1856 struct xe_exec_queue *q = msg->private_data; 1857 1858 if (guc_exec_queue_allowed_to_change_state(q)) { 1859 clear_exec_queue_suspended(q); 1860 if (!exec_queue_enabled(q)) { 1861 if (exec_queue_idle_skip_suspend(q)) { 1862 struct xe_lrc *lrc = q->lrc[0]; 1863 1864 clear_exec_queue_idle_skip_suspend(q); 1865 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1866 } 1867 q->guc->resume_time = RESUME_PENDING; 1868 set_exec_queue_pending_resume(q); 1869 enable_scheduling(q); 1870 } else if (exec_queue_idle_skip_suspend(q)) { 1871 clear_exec_queue_idle_skip_suspend(q); 1872 sched_context(q); 1873 } 1874 } else { 1875 clear_exec_queue_suspended(q); 1876 clear_exec_queue_idle_skip_suspend(q); 1877 } 1878 } 1879 1880 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1881 { 1882 struct xe_exec_queue *q = msg->private_data; 1883 1884 if (guc_exec_queue_allowed_to_change_state(q)) 1885 guc_exec_queue_send_cgp_sync(q); 1886 1887 kfree(msg); 1888 } 1889 1890 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1891 #define SET_SCHED_PROPS 2 1892 #define SUSPEND 3 1893 #define RESUME 4 1894 #define SET_MULTI_QUEUE_PRIORITY 5 1895 #define OPCODE_MASK 0xf 1896 #define MSG_LOCKED BIT(8) 1897 #define MSG_HEAD BIT(9) 1898 1899 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1900 { 1901 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1902 1903 trace_xe_sched_msg_recv(msg); 1904 1905 switch (msg->opcode) { 1906 case CLEANUP: 1907 __guc_exec_queue_process_msg_cleanup(msg); 1908 break; 1909 case SET_SCHED_PROPS: 1910 __guc_exec_queue_process_msg_set_sched_props(msg); 1911 break; 1912 case SUSPEND: 1913 __guc_exec_queue_process_msg_suspend(msg); 1914 break; 1915 case RESUME: 1916 __guc_exec_queue_process_msg_resume(msg); 1917 break; 1918 case SET_MULTI_QUEUE_PRIORITY: 1919 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1920 break; 1921 default: 1922 XE_WARN_ON("Unknown message type"); 1923 } 1924 1925 xe_pm_runtime_put(xe); 1926 } 1927 1928 static const struct drm_sched_backend_ops drm_sched_ops = { 1929 .run_job = guc_exec_queue_run_job, 1930 .free_job = guc_exec_queue_free_job, 1931 .timedout_job = guc_exec_queue_timedout_job, 1932 }; 1933 1934 static const struct xe_sched_backend_ops xe_sched_ops = { 1935 .process_msg = guc_exec_queue_process_msg, 1936 }; 1937 1938 static int guc_exec_queue_init(struct xe_exec_queue *q) 1939 { 1940 struct xe_gpu_scheduler *sched; 1941 struct xe_guc *guc = exec_queue_to_guc(q); 1942 struct workqueue_struct *submit_wq = NULL; 1943 struct xe_guc_exec_queue *ge; 1944 long timeout; 1945 int err, i; 1946 1947 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1948 1949 ge = kzalloc_obj(*ge); 1950 if (!ge) 1951 return -ENOMEM; 1952 1953 q->guc = ge; 1954 ge->q = q; 1955 init_rcu_head(&ge->rcu); 1956 init_waitqueue_head(&ge->suspend_wait); 1957 1958 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1959 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1960 1961 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1962 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1963 1964 /* 1965 * Use primary queue's submit_wq for all secondary queues of a 1966 * multi queue group. This serialization avoids any locking around 1967 * CGP synchronization with GuC. 1968 */ 1969 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1970 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1971 1972 submit_wq = primary->guc->sched.base.submit_wq; 1973 } 1974 1975 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1976 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1977 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1978 q->name, gt_to_xe(q->gt)->drm.dev); 1979 if (err) 1980 goto err_free; 1981 1982 sched = &ge->sched; 1983 err = xe_sched_entity_init(&ge->entity, sched); 1984 if (err) 1985 goto err_sched; 1986 1987 mutex_lock(&guc->submission_state.lock); 1988 1989 err = alloc_guc_id(guc, q); 1990 if (err) 1991 goto err_entity; 1992 1993 q->entity = &ge->entity; 1994 1995 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 1996 xe_sched_stop(sched); 1997 1998 mutex_unlock(&guc->submission_state.lock); 1999 2000 xe_exec_queue_assign_name(q, q->guc->id); 2001 2002 /* 2003 * Maintain secondary queues of the multi queue group in a list 2004 * for handling dependencies across the queues in the group. 2005 */ 2006 if (xe_exec_queue_is_multi_queue_secondary(q)) { 2007 struct xe_exec_queue_group *group = q->multi_queue.group; 2008 2009 INIT_LIST_HEAD(&q->multi_queue.link); 2010 mutex_lock(&group->list_lock); 2011 if (group->stopped) 2012 WRITE_ONCE(q->guc->sched.base.pause_submit, true); 2013 list_add_tail(&q->multi_queue.link, &group->list); 2014 mutex_unlock(&group->list_lock); 2015 } 2016 2017 if (xe_exec_queue_is_multi_queue(q)) 2018 trace_xe_exec_queue_create_multi_queue(q); 2019 else 2020 trace_xe_exec_queue_create(q); 2021 2022 return 0; 2023 2024 err_entity: 2025 mutex_unlock(&guc->submission_state.lock); 2026 xe_sched_entity_fini(&ge->entity); 2027 err_sched: 2028 xe_sched_fini(&ge->sched); 2029 err_free: 2030 kfree(ge); 2031 2032 return err; 2033 } 2034 2035 static void guc_exec_queue_kill(struct xe_exec_queue *q) 2036 { 2037 trace_xe_exec_queue_kill(q); 2038 set_exec_queue_killed(q); 2039 __suspend_fence_signal(q); 2040 xe_guc_exec_queue_trigger_cleanup(q); 2041 } 2042 2043 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 2044 u32 opcode) 2045 { 2046 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 2047 2048 INIT_LIST_HEAD(&msg->link); 2049 msg->opcode = opcode & OPCODE_MASK; 2050 msg->private_data = q; 2051 2052 trace_xe_sched_msg_add(msg); 2053 if (opcode & MSG_HEAD) 2054 xe_sched_add_msg_head(&q->guc->sched, msg); 2055 else if (opcode & MSG_LOCKED) 2056 xe_sched_add_msg_locked(&q->guc->sched, msg); 2057 else 2058 xe_sched_add_msg(&q->guc->sched, msg); 2059 } 2060 2061 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2062 struct xe_sched_msg *msg, 2063 u32 opcode) 2064 { 2065 if (!list_empty(&msg->link)) 2066 return; 2067 2068 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2069 } 2070 2071 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2072 struct xe_sched_msg *msg, 2073 u32 opcode) 2074 { 2075 if (!list_empty(&msg->link)) 2076 return false; 2077 2078 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2079 2080 return true; 2081 } 2082 2083 #define STATIC_MSG_CLEANUP 0 2084 #define STATIC_MSG_SUSPEND 1 2085 #define STATIC_MSG_RESUME 2 2086 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2087 { 2088 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2089 2090 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2091 guc_exec_queue_add_msg(q, msg, CLEANUP); 2092 else 2093 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2094 } 2095 2096 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2097 enum xe_exec_queue_priority priority) 2098 { 2099 struct xe_sched_msg *msg; 2100 2101 if (q->sched_props.priority == priority || 2102 exec_queue_killed_or_banned_or_wedged(q)) 2103 return 0; 2104 2105 msg = kmalloc_obj(*msg); 2106 if (!msg) 2107 return -ENOMEM; 2108 2109 q->sched_props.priority = priority; 2110 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2111 2112 return 0; 2113 } 2114 2115 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2116 { 2117 struct xe_sched_msg *msg; 2118 2119 if (q->sched_props.timeslice_us == timeslice_us || 2120 exec_queue_killed_or_banned_or_wedged(q)) 2121 return 0; 2122 2123 msg = kmalloc_obj(*msg); 2124 if (!msg) 2125 return -ENOMEM; 2126 2127 q->sched_props.timeslice_us = timeslice_us; 2128 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2129 2130 return 0; 2131 } 2132 2133 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2134 u32 preempt_timeout_us) 2135 { 2136 struct xe_sched_msg *msg; 2137 2138 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2139 exec_queue_killed_or_banned_or_wedged(q)) 2140 return 0; 2141 2142 msg = kmalloc_obj(*msg); 2143 if (!msg) 2144 return -ENOMEM; 2145 2146 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2147 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2148 2149 return 0; 2150 } 2151 2152 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2153 enum xe_multi_queue_priority priority) 2154 { 2155 struct xe_sched_msg *msg; 2156 2157 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2158 2159 if (exec_queue_killed_or_banned_or_wedged(q)) 2160 return 0; 2161 2162 msg = kmalloc_obj(*msg); 2163 if (!msg) 2164 return -ENOMEM; 2165 2166 scoped_guard(spinlock, &q->multi_queue.lock) { 2167 if (q->multi_queue.priority == priority) { 2168 kfree(msg); 2169 return 0; 2170 } 2171 2172 q->multi_queue.priority = priority; 2173 } 2174 2175 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2176 2177 return 0; 2178 } 2179 2180 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2181 { 2182 struct xe_gpu_scheduler *sched = &q->guc->sched; 2183 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2184 2185 if (exec_queue_killed_or_banned_or_wedged(q)) 2186 return -EINVAL; 2187 2188 xe_sched_msg_lock(sched); 2189 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2190 q->guc->suspend_pending = true; 2191 xe_sched_msg_unlock(sched); 2192 2193 return 0; 2194 } 2195 2196 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2197 { 2198 struct xe_guc *guc = exec_queue_to_guc(q); 2199 struct xe_device *xe = guc_to_xe(guc); 2200 int ret; 2201 2202 /* 2203 * Likely don't need to check exec_queue_killed() as we clear 2204 * suspend_pending upon kill but to be paranoid but races in which 2205 * suspend_pending is set after kill also check kill here. 2206 */ 2207 #define WAIT_COND \ 2208 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2209 xe_guc_read_stopped(guc)) 2210 2211 retry: 2212 if (IS_SRIOV_VF(xe)) 2213 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2214 vf_recovery(guc), 2215 HZ * 5); 2216 else 2217 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2218 WAIT_COND, HZ * 5); 2219 2220 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2221 return -EAGAIN; 2222 2223 if (!ret) { 2224 xe_gt_warn(guc_to_gt(guc), 2225 "Suspend fence, guc_id=%d, failed to respond", 2226 q->guc->id); 2227 /* XXX: Trigger GT reset? */ 2228 return -ETIME; 2229 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2230 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2231 goto retry; 2232 } 2233 2234 #undef WAIT_COND 2235 2236 return ret < 0 ? ret : 0; 2237 } 2238 2239 static void guc_exec_queue_resume(struct xe_exec_queue *q) 2240 { 2241 struct xe_gpu_scheduler *sched = &q->guc->sched; 2242 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2243 struct xe_guc *guc = exec_queue_to_guc(q); 2244 2245 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2246 2247 xe_sched_msg_lock(sched); 2248 guc_exec_queue_try_add_msg(q, msg, RESUME); 2249 xe_sched_msg_unlock(sched); 2250 } 2251 2252 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2253 { 2254 if (xe_exec_queue_is_multi_queue_secondary(q) && 2255 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2256 return true; 2257 2258 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2259 } 2260 2261 static bool guc_exec_queue_active(struct xe_exec_queue *q) 2262 { 2263 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 2264 2265 return exec_queue_enabled(primary) && 2266 !exec_queue_pending_disable(primary); 2267 } 2268 2269 /* 2270 * All of these functions are an abstraction layer which other parts of Xe can 2271 * use to trap into the GuC backend. All of these functions, aside from init, 2272 * really shouldn't do much other than trap into the DRM scheduler which 2273 * synchronizes these operations. 2274 */ 2275 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2276 .init = guc_exec_queue_init, 2277 .kill = guc_exec_queue_kill, 2278 .fini = guc_exec_queue_fini, 2279 .destroy = guc_exec_queue_destroy, 2280 .set_priority = guc_exec_queue_set_priority, 2281 .set_timeslice = guc_exec_queue_set_timeslice, 2282 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2283 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2284 .suspend = guc_exec_queue_suspend, 2285 .suspend_wait = guc_exec_queue_suspend_wait, 2286 .resume = guc_exec_queue_resume, 2287 .reset_status = guc_exec_queue_reset_status, 2288 .active = guc_exec_queue_active, 2289 }; 2290 2291 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2292 { 2293 struct xe_gpu_scheduler *sched = &q->guc->sched; 2294 bool do_destroy = false; 2295 2296 /* Stop scheduling + flush any DRM scheduler operations */ 2297 xe_sched_submission_stop(sched); 2298 2299 /* Clean up lost G2H + reset engine state */ 2300 if (exec_queue_registered(q)) { 2301 if (exec_queue_destroyed(q)) 2302 do_destroy = true; 2303 } 2304 if (q->guc->suspend_pending) { 2305 set_exec_queue_suspended(q); 2306 suspend_fence_signal(q); 2307 } 2308 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2309 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2310 EXEC_QUEUE_STATE_SUSPENDED, 2311 &q->guc->state); 2312 q->guc->resume_time = 0; 2313 trace_xe_exec_queue_stop(q); 2314 2315 /* 2316 * Ban any engine (aside from kernel and engines used for VM ops) with a 2317 * started but not complete job or if a job has gone through a GT reset 2318 * more than twice. 2319 */ 2320 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2321 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2322 bool ban = false; 2323 2324 if (job) { 2325 if ((xe_sched_job_started(job) && 2326 !xe_sched_job_completed(job)) || 2327 xe_sched_invalidate_job(job, 2)) { 2328 trace_xe_sched_job_ban(job); 2329 ban = true; 2330 } 2331 } 2332 2333 if (ban) { 2334 set_exec_queue_banned(q); 2335 xe_guc_exec_queue_trigger_cleanup(q); 2336 } 2337 } 2338 2339 if (do_destroy) 2340 __guc_exec_queue_destroy(guc, q); 2341 } 2342 2343 static int guc_submit_reset_prepare(struct xe_guc *guc) 2344 { 2345 int ret; 2346 2347 /* 2348 * Using an atomic here rather than submission_state.lock as this 2349 * function can be called while holding the CT lock (engine reset 2350 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2351 * Atomic is not ideal, but it works to prevent against concurrent reset 2352 * and releasing any TDRs waiting on guc->submission_state.stopped. 2353 */ 2354 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2355 smp_wmb(); 2356 wake_up_all(&guc->ct.wq); 2357 2358 return ret; 2359 } 2360 2361 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2362 { 2363 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2364 return 0; 2365 2366 if (!guc->submission_state.initialized) 2367 return 0; 2368 2369 return guc_submit_reset_prepare(guc); 2370 } 2371 2372 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2373 { 2374 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2375 !xe_guc_read_stopped(guc)); 2376 } 2377 2378 void xe_guc_submit_stop(struct xe_guc *guc) 2379 { 2380 struct xe_exec_queue *q; 2381 unsigned long index; 2382 2383 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2384 2385 mutex_lock(&guc->submission_state.lock); 2386 2387 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2388 /* Prevent redundant attempts to stop parallel queues */ 2389 if (q->guc->id != index) 2390 continue; 2391 2392 guc_exec_queue_stop(guc, q); 2393 } 2394 2395 mutex_unlock(&guc->submission_state.lock); 2396 2397 /* 2398 * No one can enter the backend at this point, aside from new engine 2399 * creation which is protected by guc->submission_state.lock. 2400 */ 2401 2402 } 2403 2404 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2405 struct xe_exec_queue *q) 2406 { 2407 bool pending_enable, pending_disable, pending_resume; 2408 2409 pending_enable = exec_queue_pending_enable(q); 2410 pending_resume = exec_queue_pending_resume(q); 2411 2412 if (pending_enable && pending_resume) { 2413 q->guc->needs_resume = true; 2414 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2415 q->guc->id); 2416 } 2417 2418 if (pending_enable && !pending_resume) { 2419 clear_exec_queue_registered(q); 2420 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2421 q->guc->id); 2422 } 2423 2424 if (pending_enable) { 2425 clear_exec_queue_enabled(q); 2426 clear_exec_queue_pending_resume(q); 2427 clear_exec_queue_pending_enable(q); 2428 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2429 q->guc->id); 2430 } 2431 2432 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2433 clear_exec_queue_destroyed(q); 2434 q->guc->needs_cleanup = true; 2435 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2436 q->guc->id); 2437 } 2438 2439 pending_disable = exec_queue_pending_disable(q); 2440 2441 if (pending_disable && exec_queue_suspended(q)) { 2442 clear_exec_queue_suspended(q); 2443 q->guc->needs_suspend = true; 2444 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2445 q->guc->id); 2446 } 2447 2448 if (pending_disable) { 2449 if (!pending_enable) 2450 set_exec_queue_enabled(q); 2451 clear_exec_queue_pending_disable(q); 2452 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2453 q->guc->id); 2454 } 2455 2456 q->guc->resume_time = 0; 2457 } 2458 2459 static void lrc_parallel_clear(struct xe_lrc *lrc) 2460 { 2461 struct xe_device *xe = gt_to_xe(lrc->gt); 2462 struct iosys_map map = xe_lrc_parallel_map(lrc); 2463 int i; 2464 2465 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2466 parallel_write(xe, map, wq[i], 2467 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2468 FIELD_PREP(WQ_LEN_MASK, 0)); 2469 } 2470 2471 /* 2472 * This function is quite complex but only real way to ensure no state is lost 2473 * during VF resume flows. The function scans the queue state, make adjustments 2474 * as needed, and queues jobs / messages which replayed upon unpause. 2475 */ 2476 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2477 { 2478 struct xe_gpu_scheduler *sched = &q->guc->sched; 2479 struct xe_sched_job *job; 2480 int i; 2481 2482 lockdep_assert_held(&guc->submission_state.lock); 2483 2484 /* Stop scheduling + flush any DRM scheduler operations */ 2485 xe_sched_submission_stop(sched); 2486 cancel_delayed_work_sync(&sched->base.work_tdr); 2487 2488 guc_exec_queue_revert_pending_state_change(guc, q); 2489 2490 if (xe_exec_queue_is_parallel(q)) { 2491 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2492 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2493 2494 /* 2495 * NOP existing WQ commands that may contain stale GGTT 2496 * addresses. These will be replayed upon unpause. The hardware 2497 * seems to get confused if the WQ head/tail pointers are 2498 * adjusted. 2499 */ 2500 if (lrc) 2501 lrc_parallel_clear(lrc); 2502 } 2503 2504 job = xe_sched_first_pending_job(sched); 2505 if (job) { 2506 job->restore_replay = true; 2507 2508 /* 2509 * Adjust software tail so jobs submitted overwrite previous 2510 * position in ring buffer with new GGTT addresses. 2511 */ 2512 for (i = 0; i < q->width; ++i) 2513 q->lrc[i]->ring.tail = job->ptrs[i].head; 2514 } 2515 } 2516 2517 /** 2518 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2519 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2520 */ 2521 void xe_guc_submit_pause(struct xe_guc *guc) 2522 { 2523 struct xe_exec_queue *q; 2524 unsigned long index; 2525 2526 mutex_lock(&guc->submission_state.lock); 2527 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2528 xe_sched_submission_stop(&q->guc->sched); 2529 mutex_unlock(&guc->submission_state.lock); 2530 } 2531 2532 /** 2533 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2534 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2535 */ 2536 void xe_guc_submit_pause_vf(struct xe_guc *guc) 2537 { 2538 struct xe_exec_queue *q; 2539 unsigned long index; 2540 2541 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2542 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2543 2544 mutex_lock(&guc->submission_state.lock); 2545 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2546 /* Prevent redundant attempts to stop parallel queues */ 2547 if (q->guc->id != index) 2548 continue; 2549 2550 guc_exec_queue_pause(guc, q); 2551 } 2552 mutex_unlock(&guc->submission_state.lock); 2553 } 2554 2555 static void guc_exec_queue_start(struct xe_exec_queue *q) 2556 { 2557 struct xe_gpu_scheduler *sched = &q->guc->sched; 2558 2559 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2560 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2561 int i; 2562 2563 trace_xe_exec_queue_resubmit(q); 2564 if (job) { 2565 for (i = 0; i < q->width; ++i) { 2566 /* 2567 * The GuC context is unregistered at this point 2568 * time, adjusting software ring tail ensures 2569 * jobs are rewritten in original placement, 2570 * adjusting LRC tail ensures the newly loaded 2571 * GuC / contexts only view the LRC tail 2572 * increasing as jobs are written out. 2573 */ 2574 q->lrc[i]->ring.tail = job->ptrs[i].head; 2575 xe_lrc_set_ring_tail(q->lrc[i], 2576 xe_lrc_ring_head(q->lrc[i])); 2577 } 2578 } 2579 xe_sched_resubmit_jobs(sched); 2580 } 2581 2582 xe_sched_submission_start(sched); 2583 xe_sched_submission_resume_tdr(sched); 2584 } 2585 2586 int xe_guc_submit_start(struct xe_guc *guc) 2587 { 2588 struct xe_exec_queue *q; 2589 unsigned long index; 2590 2591 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2592 2593 mutex_lock(&guc->submission_state.lock); 2594 atomic_dec(&guc->submission_state.stopped); 2595 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2596 /* Prevent redundant attempts to start parallel queues */ 2597 if (q->guc->id != index) 2598 continue; 2599 2600 guc_exec_queue_start(q); 2601 } 2602 mutex_unlock(&guc->submission_state.lock); 2603 2604 wake_up_all(&guc->ct.wq); 2605 2606 return 0; 2607 } 2608 2609 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2610 struct xe_exec_queue *q) 2611 { 2612 struct xe_gpu_scheduler *sched = &q->guc->sched; 2613 struct xe_sched_job *job = NULL; 2614 struct drm_sched_job *s_job; 2615 bool restore_replay = false; 2616 2617 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) { 2618 job = to_xe_sched_job(s_job); 2619 restore_replay |= job->restore_replay; 2620 if (restore_replay) { 2621 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2622 q->guc->id, xe_sched_job_seqno(job)); 2623 2624 q->ring_ops->emit_job(job); 2625 job->restore_replay = true; 2626 } 2627 } 2628 2629 if (job) 2630 job->last_replay = true; 2631 } 2632 2633 /** 2634 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2635 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2636 */ 2637 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2638 { 2639 struct xe_exec_queue *q; 2640 unsigned long index; 2641 2642 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2643 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2644 2645 mutex_lock(&guc->submission_state.lock); 2646 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2647 /* Prevent redundant attempts to stop parallel queues */ 2648 if (q->guc->id != index) 2649 continue; 2650 2651 guc_exec_queue_unpause_prepare(guc, q); 2652 } 2653 mutex_unlock(&guc->submission_state.lock); 2654 } 2655 2656 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2657 { 2658 struct xe_gpu_scheduler *sched = &q->guc->sched; 2659 struct xe_sched_msg *msg; 2660 2661 if (q->guc->needs_cleanup) { 2662 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2663 2664 guc_exec_queue_add_msg(q, msg, CLEANUP); 2665 q->guc->needs_cleanup = false; 2666 } 2667 2668 if (q->guc->needs_suspend) { 2669 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2670 2671 xe_sched_msg_lock(sched); 2672 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2673 xe_sched_msg_unlock(sched); 2674 2675 q->guc->needs_suspend = false; 2676 } 2677 2678 /* 2679 * The resume must be in the message queue before the suspend as it is 2680 * not possible for a resume to be issued if a suspend pending is, but 2681 * the inverse is possible. 2682 */ 2683 if (q->guc->needs_resume) { 2684 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2685 2686 xe_sched_msg_lock(sched); 2687 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2688 xe_sched_msg_unlock(sched); 2689 2690 q->guc->needs_resume = false; 2691 } 2692 } 2693 2694 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2695 { 2696 struct xe_gpu_scheduler *sched = &q->guc->sched; 2697 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2698 2699 lockdep_assert_held(&guc->submission_state.lock); 2700 2701 xe_sched_resubmit_jobs(sched); 2702 guc_exec_queue_replay_pending_state_change(q); 2703 xe_sched_submission_start(sched); 2704 if (needs_tdr) 2705 xe_guc_exec_queue_trigger_cleanup(q); 2706 xe_sched_submission_resume_tdr(sched); 2707 } 2708 2709 /** 2710 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2711 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2712 */ 2713 void xe_guc_submit_unpause(struct xe_guc *guc) 2714 { 2715 struct xe_exec_queue *q; 2716 unsigned long index; 2717 2718 mutex_lock(&guc->submission_state.lock); 2719 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2720 xe_sched_submission_start(&q->guc->sched); 2721 mutex_unlock(&guc->submission_state.lock); 2722 } 2723 2724 /** 2725 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2726 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2727 */ 2728 void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2729 { 2730 struct xe_exec_queue *q; 2731 unsigned long index; 2732 2733 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2734 2735 mutex_lock(&guc->submission_state.lock); 2736 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2737 /* 2738 * Prevent redundant attempts to stop parallel queues, or queues 2739 * created after resfix done. 2740 */ 2741 if (q->guc->id != index || 2742 !drm_sched_is_stopped(&q->guc->sched.base)) 2743 continue; 2744 2745 guc_exec_queue_unpause(guc, q); 2746 } 2747 mutex_unlock(&guc->submission_state.lock); 2748 } 2749 2750 /** 2751 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2752 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2753 */ 2754 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2755 { 2756 struct xe_exec_queue *q; 2757 unsigned long index; 2758 2759 mutex_lock(&guc->submission_state.lock); 2760 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2761 struct xe_gpu_scheduler *sched = &q->guc->sched; 2762 2763 /* Prevent redundant attempts to stop parallel queues */ 2764 if (q->guc->id != index) 2765 continue; 2766 2767 xe_sched_submission_start(sched); 2768 guc_exec_queue_kill(q); 2769 } 2770 mutex_unlock(&guc->submission_state.lock); 2771 } 2772 2773 static struct xe_exec_queue * 2774 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2775 { 2776 struct xe_gt *gt = guc_to_gt(guc); 2777 struct xe_exec_queue *q; 2778 2779 if (unlikely(guc_id >= GUC_ID_MAX)) { 2780 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2781 return NULL; 2782 } 2783 2784 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2785 if (unlikely(!q)) { 2786 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2787 return NULL; 2788 } 2789 2790 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2791 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2792 2793 return q; 2794 } 2795 2796 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2797 { 2798 u32 action[] = { 2799 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2800 q->guc->id, 2801 }; 2802 2803 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2804 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2805 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2806 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2807 2808 trace_xe_exec_queue_deregister(q); 2809 2810 if (xe_exec_queue_is_multi_queue_secondary(q)) 2811 handle_deregister_done(guc, q); 2812 else 2813 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2814 ARRAY_SIZE(action)); 2815 } 2816 2817 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2818 u32 runnable_state) 2819 { 2820 trace_xe_exec_queue_scheduling_done(q); 2821 2822 if (runnable_state == 1) { 2823 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2824 2825 q->guc->resume_time = ktime_get(); 2826 clear_exec_queue_pending_resume(q); 2827 clear_exec_queue_pending_enable(q); 2828 smp_wmb(); 2829 wake_up_all(&guc->ct.wq); 2830 } else { 2831 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2832 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2833 2834 if (q->guc->suspend_pending) { 2835 suspend_fence_signal(q); 2836 clear_exec_queue_pending_disable(q); 2837 } else { 2838 if (exec_queue_banned(q)) { 2839 smp_wmb(); 2840 wake_up_all(&guc->ct.wq); 2841 } 2842 if (exec_queue_destroyed(q)) { 2843 /* 2844 * Make sure to clear the pending_disable only 2845 * after sampling the destroyed state. We want 2846 * to ensure we don't trigger the unregister too 2847 * early with something intending to only 2848 * disable scheduling. The caller doing the 2849 * destroy must wait for an ongoing 2850 * pending_disable before marking as destroyed. 2851 */ 2852 clear_exec_queue_pending_disable(q); 2853 deregister_exec_queue(guc, q); 2854 } else { 2855 clear_exec_queue_pending_disable(q); 2856 } 2857 } 2858 } 2859 } 2860 2861 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2862 struct xe_exec_queue *q, 2863 u32 runnable_state) 2864 { 2865 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2866 mutex_lock(&guc->ct.lock); 2867 handle_sched_done(guc, q, runnable_state); 2868 mutex_unlock(&guc->ct.lock); 2869 } 2870 2871 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2872 { 2873 struct xe_exec_queue *q; 2874 u32 guc_id, runnable_state; 2875 2876 if (unlikely(len < 2)) 2877 return -EPROTO; 2878 2879 guc_id = msg[0]; 2880 runnable_state = msg[1]; 2881 2882 q = g2h_exec_queue_lookup(guc, guc_id); 2883 if (unlikely(!q)) 2884 return -EPROTO; 2885 2886 if (unlikely(!exec_queue_pending_enable(q) && 2887 !exec_queue_pending_disable(q))) { 2888 xe_gt_err(guc_to_gt(guc), 2889 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2890 atomic_read(&q->guc->state), q->guc->id, 2891 runnable_state); 2892 return -EPROTO; 2893 } 2894 2895 handle_sched_done(guc, q, runnable_state); 2896 2897 return 0; 2898 } 2899 2900 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2901 { 2902 trace_xe_exec_queue_deregister_done(q); 2903 2904 clear_exec_queue_registered(q); 2905 __guc_exec_queue_destroy(guc, q); 2906 } 2907 2908 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2909 { 2910 struct xe_exec_queue *q; 2911 u32 guc_id; 2912 2913 if (unlikely(len < 1)) 2914 return -EPROTO; 2915 2916 guc_id = msg[0]; 2917 2918 q = g2h_exec_queue_lookup(guc, guc_id); 2919 if (unlikely(!q)) 2920 return -EPROTO; 2921 2922 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2923 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2924 xe_gt_err(guc_to_gt(guc), 2925 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2926 atomic_read(&q->guc->state), q->guc->id); 2927 return -EPROTO; 2928 } 2929 2930 handle_deregister_done(guc, q); 2931 2932 return 0; 2933 } 2934 2935 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2936 { 2937 struct xe_gt *gt = guc_to_gt(guc); 2938 struct xe_exec_queue *q; 2939 u32 guc_id; 2940 2941 if (unlikely(len < 1)) 2942 return -EPROTO; 2943 2944 guc_id = msg[0]; 2945 2946 q = g2h_exec_queue_lookup(guc, guc_id); 2947 if (unlikely(!q)) 2948 return -EPROTO; 2949 2950 if (!exec_queue_killed(q)) 2951 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", 2952 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, 2953 atomic_read(&q->guc->state)); 2954 2955 trace_xe_exec_queue_reset(q); 2956 2957 /* 2958 * A banned engine is a NOP at this point (came from 2959 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2960 * jobs by setting timeout of the job to the minimum value kicking 2961 * guc_exec_queue_timedout_job. 2962 */ 2963 xe_guc_exec_queue_reset_trigger_cleanup(q); 2964 2965 return 0; 2966 } 2967 2968 /* 2969 * xe_guc_error_capture_handler - Handler of GuC captured message 2970 * @guc: The GuC object 2971 * @msg: Point to the message 2972 * @len: The message length 2973 * 2974 * When GuC captured data is ready, GuC will send message 2975 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2976 * called 1st to check status before process the data comes with the message. 2977 * 2978 * Returns: error code. 0 if success 2979 */ 2980 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 2981 { 2982 u32 status; 2983 2984 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2985 return -EPROTO; 2986 2987 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2988 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 2989 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 2990 2991 xe_guc_capture_process(guc); 2992 2993 return 0; 2994 } 2995 2996 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 2997 u32 len) 2998 { 2999 struct xe_gt *gt = guc_to_gt(guc); 3000 struct xe_exec_queue *q; 3001 u32 guc_id; 3002 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 3003 3004 if (unlikely(!len || len > 2)) 3005 return -EPROTO; 3006 3007 guc_id = msg[0]; 3008 3009 if (len == 2) 3010 type = msg[1]; 3011 3012 if (guc_id == GUC_ID_UNKNOWN) { 3013 /* 3014 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 3015 * context. In such case only PF will be notified about that fault. 3016 */ 3017 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 3018 return 0; 3019 } 3020 3021 q = g2h_exec_queue_lookup(guc, guc_id); 3022 if (unlikely(!q)) 3023 return -EPROTO; 3024 3025 /* 3026 * The type is HW-defined and changes based on platform, so we don't 3027 * decode it in the kernel and only check if it is valid. 3028 * See bspec 54047 and 72187 for details. 3029 */ 3030 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 3031 xe_gt_info(gt, 3032 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 3033 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3034 else 3035 xe_gt_info(gt, 3036 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 3037 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3038 3039 trace_xe_exec_queue_memory_cat_error(q); 3040 3041 /* Treat the same as engine reset */ 3042 xe_guc_exec_queue_reset_trigger_cleanup(q); 3043 3044 return 0; 3045 } 3046 3047 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 3048 { 3049 struct xe_gt *gt = guc_to_gt(guc); 3050 u8 guc_class, instance; 3051 u32 reason; 3052 3053 if (unlikely(len != 3)) 3054 return -EPROTO; 3055 3056 guc_class = msg[0]; 3057 instance = msg[1]; 3058 reason = msg[2]; 3059 3060 /* Unexpected failure of a hardware feature, log an actual error */ 3061 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 3062 guc_class, instance, reason); 3063 3064 xe_gt_reset_async(gt); 3065 3066 return 0; 3067 } 3068 3069 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, 3070 u32 len) 3071 { 3072 struct xe_gt *gt = guc_to_gt(guc); 3073 struct xe_device *xe = guc_to_xe(guc); 3074 struct xe_exec_queue *q; 3075 u32 guc_id = msg[2]; 3076 3077 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { 3078 drm_err(&xe->drm, "Invalid length %u", len); 3079 return -EPROTO; 3080 } 3081 3082 q = g2h_exec_queue_lookup(guc, guc_id); 3083 if (unlikely(!q)) 3084 return -EPROTO; 3085 3086 xe_gt_dbg(gt, 3087 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", 3088 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); 3089 3090 trace_xe_exec_queue_cgp_context_error(q); 3091 3092 /* Treat the same as engine reset */ 3093 xe_guc_exec_queue_reset_trigger_cleanup(q); 3094 3095 return 0; 3096 } 3097 3098 /** 3099 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3100 * @guc: guc 3101 * @msg: message indicating CGP sync done 3102 * @len: length of message 3103 * 3104 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3105 * for CGP synchronization to complete. 3106 * 3107 * Return: 0 on success, -EPROTO for malformed messages. 3108 */ 3109 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3110 { 3111 struct xe_device *xe = guc_to_xe(guc); 3112 struct xe_exec_queue *q; 3113 u32 guc_id = msg[0]; 3114 3115 if (unlikely(len < 1)) { 3116 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3117 return -EPROTO; 3118 } 3119 3120 q = g2h_exec_queue_lookup(guc, guc_id); 3121 if (unlikely(!q)) 3122 return -EPROTO; 3123 3124 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3125 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3126 return -EPROTO; 3127 } 3128 3129 /* Wakeup the serialized cgp update wait */ 3130 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3131 xe_guc_ct_wake_waiters(&guc->ct); 3132 3133 return 0; 3134 } 3135 3136 static void 3137 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3138 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3139 { 3140 struct xe_guc *guc = exec_queue_to_guc(q); 3141 struct xe_device *xe = guc_to_xe(guc); 3142 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3143 int i; 3144 3145 snapshot->guc.wqi_head = q->guc->wqi_head; 3146 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3147 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3148 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3149 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3150 wq_desc.wq_status); 3151 3152 if (snapshot->parallel.wq_desc.head != 3153 snapshot->parallel.wq_desc.tail) { 3154 for (i = snapshot->parallel.wq_desc.head; 3155 i != snapshot->parallel.wq_desc.tail; 3156 i = (i + sizeof(u32)) % WQ_SIZE) 3157 snapshot->parallel.wq[i / sizeof(u32)] = 3158 parallel_read(xe, map, wq[i / sizeof(u32)]); 3159 } 3160 } 3161 3162 static void 3163 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3164 struct drm_printer *p) 3165 { 3166 int i; 3167 3168 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3169 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3170 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3171 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3172 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3173 3174 if (snapshot->parallel.wq_desc.head != 3175 snapshot->parallel.wq_desc.tail) { 3176 for (i = snapshot->parallel.wq_desc.head; 3177 i != snapshot->parallel.wq_desc.tail; 3178 i = (i + sizeof(u32)) % WQ_SIZE) 3179 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3180 snapshot->parallel.wq[i / sizeof(u32)]); 3181 } 3182 } 3183 3184 /** 3185 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3186 * @q: faulty exec queue 3187 * 3188 * This can be printed out in a later stage like during dev_coredump 3189 * analysis. 3190 * 3191 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3192 * caller, using `xe_guc_exec_queue_snapshot_free`. 3193 */ 3194 struct xe_guc_submit_exec_queue_snapshot * 3195 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3196 { 3197 struct xe_gpu_scheduler *sched = &q->guc->sched; 3198 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3199 int i; 3200 3201 snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC); 3202 3203 if (!snapshot) 3204 return NULL; 3205 3206 snapshot->guc.id = q->guc->id; 3207 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3208 snapshot->class = q->class; 3209 snapshot->logical_mask = q->logical_mask; 3210 snapshot->width = q->width; 3211 snapshot->refcount = kref_read(&q->refcount); 3212 snapshot->sched_timeout = sched->base.timeout; 3213 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3214 snapshot->sched_props.preempt_timeout_us = 3215 q->sched_props.preempt_timeout_us; 3216 3217 snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width, 3218 GFP_ATOMIC); 3219 3220 if (snapshot->lrc) { 3221 for (i = 0; i < q->width; ++i) { 3222 struct xe_lrc *lrc = q->lrc[i]; 3223 3224 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3225 } 3226 } 3227 3228 snapshot->schedule_state = atomic_read(&q->guc->state); 3229 snapshot->exec_queue_flags = q->flags; 3230 3231 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3232 if (snapshot->parallel_execution) 3233 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3234 3235 if (xe_exec_queue_is_multi_queue(q)) { 3236 snapshot->multi_queue.valid = true; 3237 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3238 snapshot->multi_queue.pos = q->multi_queue.pos; 3239 } 3240 3241 return snapshot; 3242 } 3243 3244 /** 3245 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3246 * @snapshot: Previously captured snapshot of job. 3247 * 3248 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3249 */ 3250 void 3251 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3252 { 3253 int i; 3254 3255 if (!snapshot || !snapshot->lrc) 3256 return; 3257 3258 for (i = 0; i < snapshot->width; ++i) 3259 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3260 } 3261 3262 /** 3263 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3264 * @snapshot: GuC Submit Engine snapshot object. 3265 * @p: drm_printer where it will be printed out. 3266 * 3267 * This function prints out a given GuC Submit Engine snapshot object. 3268 */ 3269 void 3270 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3271 struct drm_printer *p) 3272 { 3273 int i; 3274 3275 if (!snapshot) 3276 return; 3277 3278 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3279 drm_printf(p, "\tName: %s\n", snapshot->name); 3280 drm_printf(p, "\tClass: %d\n", snapshot->class); 3281 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3282 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3283 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3284 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3285 drm_printf(p, "\tTimeslice: %u (us)\n", 3286 snapshot->sched_props.timeslice_us); 3287 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3288 snapshot->sched_props.preempt_timeout_us); 3289 3290 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3291 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3292 3293 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3294 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3295 3296 if (snapshot->parallel_execution) 3297 guc_exec_queue_wq_snapshot_print(snapshot, p); 3298 3299 if (snapshot->multi_queue.valid) { 3300 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3301 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3302 } 3303 } 3304 3305 /** 3306 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3307 * snapshot. 3308 * @snapshot: GuC Submit Engine snapshot object. 3309 * 3310 * This function free all the memory that needed to be allocated at capture 3311 * time. 3312 */ 3313 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3314 { 3315 int i; 3316 3317 if (!snapshot) 3318 return; 3319 3320 if (snapshot->lrc) { 3321 for (i = 0; i < snapshot->width; i++) 3322 xe_lrc_snapshot_free(snapshot->lrc[i]); 3323 kfree(snapshot->lrc); 3324 } 3325 kfree(snapshot); 3326 } 3327 3328 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3329 { 3330 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3331 3332 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3333 xe_guc_exec_queue_snapshot_print(snapshot, p); 3334 xe_guc_exec_queue_snapshot_free(snapshot); 3335 } 3336 3337 /** 3338 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3339 * @q: Execution queue 3340 * @ctx_type: Type of the context 3341 * 3342 * This function registers the execution queue with the guc. Special context 3343 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3344 * are only applicable for IGPU and in the VF. 3345 * Submits the execution queue to GUC after registering it. 3346 * 3347 * Returns - None. 3348 */ 3349 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3350 { 3351 struct xe_guc *guc = exec_queue_to_guc(q); 3352 struct xe_device *xe = guc_to_xe(guc); 3353 struct xe_gt *gt = guc_to_gt(guc); 3354 3355 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3356 xe_gt_assert(gt, !IS_DGFX(xe)); 3357 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3358 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3359 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3360 3361 register_exec_queue(q, ctx_type); 3362 enable_scheduling(q); 3363 } 3364 3365 /** 3366 * xe_guc_submit_print - GuC Submit Print. 3367 * @guc: GuC. 3368 * @p: drm_printer where it will be printed out. 3369 * 3370 * This function capture and prints snapshots of **all** GuC Engines. 3371 */ 3372 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3373 { 3374 struct xe_exec_queue *q; 3375 unsigned long index; 3376 3377 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3378 return; 3379 3380 mutex_lock(&guc->submission_state.lock); 3381 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3382 guc_exec_queue_print(q, p); 3383 mutex_unlock(&guc->submission_state.lock); 3384 } 3385 3386 /** 3387 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues 3388 * registered with the GuC 3389 * @guc: GuC. 3390 * 3391 * Return: true if any MLRC queue is registered with the GuC, false otherwise. 3392 */ 3393 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) 3394 { 3395 struct xe_exec_queue *q; 3396 unsigned long index; 3397 3398 guard(mutex)(&guc->submission_state.lock); 3399 3400 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3401 if (q->width > 1) 3402 return true; 3403 3404 return false; 3405 } 3406 3407 /** 3408 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3409 * exec queues registered to given GuC. 3410 * @guc: the &xe_guc struct instance 3411 * @scratch: scratch buffer to be used as temporary storage 3412 * 3413 * Returns: zero on success, negative error code on failure. 3414 */ 3415 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3416 { 3417 struct xe_exec_queue *q; 3418 unsigned long index; 3419 int err = 0; 3420 3421 mutex_lock(&guc->submission_state.lock); 3422 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3423 /* Prevent redundant attempts to stop parallel queues */ 3424 if (q->guc->id != index) 3425 continue; 3426 3427 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3428 if (err) 3429 break; 3430 } 3431 mutex_unlock(&guc->submission_state.lock); 3432 3433 return err; 3434 } 3435