1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/dma-fence-array.h> 12 13 #include <drm/drm_managed.h> 14 15 #include "abi/guc_actions_abi.h" 16 #include "abi/guc_actions_slpc_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "xe_assert.h" 19 #include "xe_bo.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_gt_clock.h" 27 #include "xe_gt_printk.h" 28 #include "xe_guc.h" 29 #include "xe_guc_capture.h" 30 #include "xe_guc_ct.h" 31 #include "xe_guc_exec_queue_types.h" 32 #include "xe_guc_id_mgr.h" 33 #include "xe_guc_klv_helpers.h" 34 #include "xe_guc_submit_types.h" 35 #include "xe_hw_engine.h" 36 #include "xe_lrc.h" 37 #include "xe_macros.h" 38 #include "xe_map.h" 39 #include "xe_mocs.h" 40 #include "xe_pm.h" 41 #include "xe_ring_ops_types.h" 42 #include "xe_sched_job.h" 43 #include "xe_sleep.h" 44 #include "xe_trace.h" 45 #include "xe_uc_fw.h" 46 #include "xe_vm.h" 47 48 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 49 50 static struct xe_guc * 51 exec_queue_to_guc(struct xe_exec_queue *q) 52 { 53 return &q->gt->uc.guc; 54 } 55 56 /* 57 * Helpers for engine state, using an atomic as some of the bits can transition 58 * as the same time (e.g. a suspend can be happning at the same time as schedule 59 * engine done being processed). 60 */ 61 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 62 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 63 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 64 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 65 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 66 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 67 #define EXEC_QUEUE_STATE_RESET (1 << 6) 68 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 69 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 70 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 71 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) 72 #define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND (1 << 11) 73 74 static bool exec_queue_registered(struct xe_exec_queue *q) 75 { 76 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 77 } 78 79 static void set_exec_queue_registered(struct xe_exec_queue *q) 80 { 81 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 82 } 83 84 static void clear_exec_queue_registered(struct xe_exec_queue *q) 85 { 86 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 87 } 88 89 static bool exec_queue_enabled(struct xe_exec_queue *q) 90 { 91 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 92 } 93 94 static void set_exec_queue_enabled(struct xe_exec_queue *q) 95 { 96 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 97 } 98 99 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 100 { 101 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 102 } 103 104 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 105 { 106 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 107 } 108 109 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 110 { 111 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 112 } 113 114 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 115 { 116 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 117 } 118 119 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 120 { 121 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 122 } 123 124 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 125 { 126 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 127 } 128 129 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 130 { 131 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 132 } 133 134 static bool exec_queue_destroyed(struct xe_exec_queue *q) 135 { 136 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 137 } 138 139 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 140 { 141 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 142 } 143 144 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 145 { 146 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 147 } 148 149 static bool exec_queue_banned(struct xe_exec_queue *q) 150 { 151 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 152 } 153 154 static void set_exec_queue_banned(struct xe_exec_queue *q) 155 { 156 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 157 } 158 159 static bool exec_queue_suspended(struct xe_exec_queue *q) 160 { 161 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 162 } 163 164 static void set_exec_queue_suspended(struct xe_exec_queue *q) 165 { 166 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 167 } 168 169 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 170 { 171 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 172 } 173 174 static bool exec_queue_reset(struct xe_exec_queue *q) 175 { 176 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 177 } 178 179 static void set_exec_queue_reset(struct xe_exec_queue *q) 180 { 181 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 182 } 183 184 static bool exec_queue_killed(struct xe_exec_queue *q) 185 { 186 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 187 } 188 189 static void set_exec_queue_killed(struct xe_exec_queue *q) 190 { 191 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 192 } 193 194 static bool exec_queue_wedged(struct xe_exec_queue *q) 195 { 196 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 197 } 198 199 static void set_exec_queue_wedged(struct xe_exec_queue *q) 200 { 201 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 202 } 203 204 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 205 { 206 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 207 } 208 209 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 210 { 211 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 212 } 213 214 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 215 { 216 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 217 } 218 219 static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 220 { 221 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND; 222 } 223 224 static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 225 { 226 atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 227 } 228 229 static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 230 { 231 atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 232 } 233 234 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 235 { 236 return (atomic_read(&q->guc->state) & 237 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 238 EXEC_QUEUE_STATE_BANNED)); 239 } 240 241 static void guc_submit_fini(struct drm_device *drm, void *arg) 242 { 243 struct xe_guc *guc = arg; 244 struct xe_device *xe = guc_to_xe(guc); 245 struct xe_gt *gt = guc_to_gt(guc); 246 int ret; 247 248 ret = wait_event_timeout(guc->submission_state.fini_wq, 249 xa_empty(&guc->submission_state.exec_queue_lookup), 250 HZ * 5); 251 252 drain_workqueue(xe->destroy_wq); 253 254 xe_gt_assert(gt, ret); 255 256 xa_destroy(&guc->submission_state.exec_queue_lookup); 257 } 258 259 static void guc_submit_wedged_fini(void *arg) 260 { 261 struct xe_guc *guc = arg; 262 struct xe_exec_queue *q; 263 unsigned long index; 264 265 mutex_lock(&guc->submission_state.lock); 266 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 267 if (exec_queue_wedged(q)) { 268 mutex_unlock(&guc->submission_state.lock); 269 xe_exec_queue_put(q); 270 mutex_lock(&guc->submission_state.lock); 271 } 272 } 273 mutex_unlock(&guc->submission_state.lock); 274 } 275 276 static const struct xe_exec_queue_ops guc_exec_queue_ops; 277 278 static void primelockdep(struct xe_guc *guc) 279 { 280 if (!IS_ENABLED(CONFIG_LOCKDEP)) 281 return; 282 283 fs_reclaim_acquire(GFP_KERNEL); 284 285 mutex_lock(&guc->submission_state.lock); 286 mutex_unlock(&guc->submission_state.lock); 287 288 fs_reclaim_release(GFP_KERNEL); 289 } 290 291 /** 292 * xe_guc_submit_init() - Initialize GuC submission. 293 * @guc: the &xe_guc to initialize 294 * @num_ids: number of GuC context IDs to use 295 * 296 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 297 * GuC context IDs supported by the GuC firmware should be used for submission. 298 * 299 * Only VF drivers will have to provide explicit number of GuC context IDs 300 * that they can use for submission. 301 * 302 * Return: 0 on success or a negative error code on failure. 303 */ 304 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 305 { 306 struct xe_device *xe = guc_to_xe(guc); 307 struct xe_gt *gt = guc_to_gt(guc); 308 int err; 309 310 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 311 if (err) 312 return err; 313 314 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 315 if (err) 316 return err; 317 318 gt->exec_queue_ops = &guc_exec_queue_ops; 319 320 xa_init(&guc->submission_state.exec_queue_lookup); 321 322 init_waitqueue_head(&guc->submission_state.fini_wq); 323 324 primelockdep(guc); 325 326 guc->submission_state.initialized = true; 327 328 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 329 } 330 331 /* 332 * Given that we want to guarantee enough RCS throughput to avoid missing 333 * frames, we set the yield policy to 20% of each 80ms interval. 334 */ 335 #define RC_YIELD_DURATION 80 /* in ms */ 336 #define RC_YIELD_RATIO 20 /* in percent */ 337 static u32 *emit_render_compute_yield_klv(u32 *emit) 338 { 339 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 340 *emit++ = RC_YIELD_DURATION; 341 *emit++ = RC_YIELD_RATIO; 342 343 return emit; 344 } 345 346 #define SCHEDULING_POLICY_MAX_DWORDS 16 347 static int guc_init_global_schedule_policy(struct xe_guc *guc) 348 { 349 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 350 u32 *emit = data; 351 u32 count = 0; 352 int ret; 353 354 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 355 return 0; 356 357 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 358 359 if (CCS_INSTANCES(guc_to_gt(guc))) 360 emit = emit_render_compute_yield_klv(emit); 361 362 count = emit - data; 363 if (count > 1) { 364 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 365 366 ret = xe_guc_ct_send_block(&guc->ct, data, count); 367 if (ret < 0) { 368 xe_gt_err(guc_to_gt(guc), 369 "failed to enable GuC scheduling policies: %pe\n", 370 ERR_PTR(ret)); 371 return ret; 372 } 373 } 374 375 return 0; 376 } 377 378 int xe_guc_submit_enable(struct xe_guc *guc) 379 { 380 int ret; 381 382 ret = guc_init_global_schedule_policy(guc); 383 if (ret) 384 return ret; 385 386 guc->submission_state.enabled = true; 387 388 return 0; 389 } 390 391 void xe_guc_submit_disable(struct xe_guc *guc) 392 { 393 guc->submission_state.enabled = false; 394 } 395 396 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 397 { 398 int i; 399 400 lockdep_assert_held(&guc->submission_state.lock); 401 402 for (i = 0; i < xa_count; ++i) 403 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 404 405 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 406 q->guc->id, q->width); 407 408 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 409 wake_up(&guc->submission_state.fini_wq); 410 } 411 412 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 413 { 414 int ret; 415 int i; 416 417 /* 418 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 419 * worse case user gets -ENOMEM on engine create and has to try again. 420 * 421 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 422 * failure. 423 */ 424 lockdep_assert_held(&guc->submission_state.lock); 425 426 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 427 q->width); 428 if (ret < 0) 429 return ret; 430 431 q->guc->id = ret; 432 433 for (i = 0; i < q->width; ++i) { 434 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 435 q->guc->id + i, q, GFP_NOWAIT)); 436 if (ret) 437 goto err_release; 438 } 439 440 return 0; 441 442 err_release: 443 __release_guc_id(guc, q, i); 444 445 return ret; 446 } 447 448 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 449 { 450 mutex_lock(&guc->submission_state.lock); 451 __release_guc_id(guc, q, q->width); 452 mutex_unlock(&guc->submission_state.lock); 453 } 454 455 struct exec_queue_policy { 456 u32 count; 457 struct guc_update_exec_queue_policy h2g; 458 }; 459 460 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 461 { 462 size_t bytes = sizeof(policy->h2g.header) + 463 (sizeof(policy->h2g.klv[0]) * policy->count); 464 465 return bytes / sizeof(u32); 466 } 467 468 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 469 u16 guc_id) 470 { 471 policy->h2g.header.action = 472 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 473 policy->h2g.header.guc_id = guc_id; 474 policy->count = 0; 475 } 476 477 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 478 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 479 u32 data) \ 480 { \ 481 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 482 \ 483 policy->h2g.klv[policy->count].kl = \ 484 FIELD_PREP(GUC_KLV_0_KEY, \ 485 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 486 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 487 policy->h2g.klv[policy->count].value = data; \ 488 policy->count++; \ 489 } 490 491 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 492 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 493 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 494 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 495 #undef MAKE_EXEC_QUEUE_POLICY_ADD 496 497 static const int xe_exec_queue_prio_to_guc[] = { 498 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 499 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 500 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 501 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 502 }; 503 504 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 505 { 506 struct exec_queue_policy policy; 507 enum xe_exec_queue_priority prio = q->sched_props.priority; 508 u32 timeslice_us = q->sched_props.timeslice_us; 509 u32 slpc_exec_queue_freq_req = 0; 510 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 511 512 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 513 !xe_exec_queue_is_multi_queue_secondary(q)); 514 515 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 516 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 517 518 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 519 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 520 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 521 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 522 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 523 slpc_exec_queue_freq_req); 524 525 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 526 __guc_exec_queue_policy_action_size(&policy), 0, 0); 527 } 528 529 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 530 { 531 struct exec_queue_policy policy; 532 533 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 534 535 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 536 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 537 538 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 539 __guc_exec_queue_policy_action_size(&policy), 0, 0); 540 } 541 542 static bool vf_recovery(struct xe_guc *guc) 543 { 544 return xe_gt_recovery_pending(guc_to_gt(guc)); 545 } 546 547 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 548 { 549 struct xe_guc *guc = exec_queue_to_guc(q); 550 struct xe_device *xe = guc_to_xe(guc); 551 552 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 553 wake_up_all(&xe->ufence_wq); 554 555 xe_sched_tdr_queue_imm(&q->guc->sched); 556 } 557 558 static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q) 559 { 560 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 561 struct xe_exec_queue_group *group = q->multi_queue.group; 562 struct xe_exec_queue *eq, *next; 563 LIST_HEAD(tmp); 564 565 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 566 xe_exec_queue_is_multi_queue(q)); 567 568 mutex_lock(&group->list_lock); 569 570 /* 571 * Stop all future queues being from executing while group is stopped. 572 */ 573 group->stopped = true; 574 575 list_for_each_entry_safe(eq, next, &group->list, multi_queue.link) 576 /* 577 * Refcount prevents an attempted removal from &group->list, 578 * temporary list allows safe iteration after dropping 579 * &group->list_lock. 580 */ 581 if (xe_exec_queue_get_unless_zero(eq)) 582 list_move_tail(&eq->multi_queue.link, &tmp); 583 584 mutex_unlock(&group->list_lock); 585 586 /* We cannot stop under list lock without getting inversions */ 587 xe_sched_submission_stop(&primary->guc->sched); 588 list_for_each_entry(eq, &tmp, multi_queue.link) 589 xe_sched_submission_stop(&eq->guc->sched); 590 591 mutex_lock(&group->list_lock); 592 list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) { 593 /* 594 * Corner where we got banned while stopping and not on 595 * &group->list 596 */ 597 if (READ_ONCE(group->banned)) 598 xe_guc_exec_queue_trigger_cleanup(eq); 599 600 list_move_tail(&eq->multi_queue.link, &group->list); 601 xe_exec_queue_put(eq); 602 } 603 mutex_unlock(&group->list_lock); 604 } 605 606 static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q) 607 { 608 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 609 struct xe_exec_queue_group *group = q->multi_queue.group; 610 struct xe_exec_queue *eq; 611 612 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 613 xe_exec_queue_is_multi_queue(q)); 614 615 xe_sched_submission_start(&primary->guc->sched); 616 617 mutex_lock(&group->list_lock); 618 group->stopped = false; 619 list_for_each_entry(eq, &group->list, multi_queue.link) 620 xe_sched_submission_start(&eq->guc->sched); 621 mutex_unlock(&group->list_lock); 622 } 623 624 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) 625 { 626 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 627 struct xe_exec_queue_group *group = q->multi_queue.group; 628 struct xe_exec_queue *eq; 629 630 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 631 xe_exec_queue_is_multi_queue(q)); 632 633 /* Group banned, skip timeout check in TDR */ 634 WRITE_ONCE(group->banned, true); 635 xe_guc_exec_queue_trigger_cleanup(primary); 636 637 mutex_lock(&group->list_lock); 638 list_for_each_entry(eq, &group->list, multi_queue.link) 639 xe_guc_exec_queue_trigger_cleanup(eq); 640 mutex_unlock(&group->list_lock); 641 } 642 643 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 644 { 645 if (xe_exec_queue_is_multi_queue(q)) { 646 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 647 struct xe_exec_queue_group *group = q->multi_queue.group; 648 struct xe_exec_queue *eq; 649 650 /* Group banned, skip timeout check in TDR */ 651 WRITE_ONCE(group->banned, true); 652 653 set_exec_queue_reset(primary); 654 if (!exec_queue_banned(primary)) 655 xe_guc_exec_queue_trigger_cleanup(primary); 656 657 mutex_lock(&group->list_lock); 658 list_for_each_entry(eq, &group->list, multi_queue.link) { 659 set_exec_queue_reset(eq); 660 if (!exec_queue_banned(eq)) 661 xe_guc_exec_queue_trigger_cleanup(eq); 662 } 663 mutex_unlock(&group->list_lock); 664 } else { 665 set_exec_queue_reset(q); 666 if (!exec_queue_banned(q)) 667 xe_guc_exec_queue_trigger_cleanup(q); 668 } 669 } 670 671 static void set_exec_queue_group_banned(struct xe_exec_queue *q) 672 { 673 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 674 struct xe_exec_queue_group *group = q->multi_queue.group; 675 struct xe_exec_queue *eq; 676 677 /* Ban all queues of the multi-queue group */ 678 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 679 xe_exec_queue_is_multi_queue(q)); 680 set_exec_queue_banned(primary); 681 682 mutex_lock(&group->list_lock); 683 list_for_each_entry(eq, &group->list, multi_queue.link) 684 set_exec_queue_banned(eq); 685 mutex_unlock(&group->list_lock); 686 } 687 688 /* Helper for context registration H2G */ 689 struct guc_ctxt_registration_info { 690 u32 flags; 691 u32 context_idx; 692 u32 engine_class; 693 u32 engine_submit_mask; 694 u32 wq_desc_lo; 695 u32 wq_desc_hi; 696 u32 wq_base_lo; 697 u32 wq_base_hi; 698 u32 wq_size; 699 u32 cgp_lo; 700 u32 cgp_hi; 701 u32 hwlrca_lo; 702 u32 hwlrca_hi; 703 }; 704 705 #define parallel_read(xe_, map_, field_) \ 706 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 707 field_) 708 #define parallel_write(xe_, map_, field_, val_) \ 709 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 710 field_, val_) 711 712 /** 713 * DOC: Multi Queue Group GuC interface 714 * 715 * The multi queue group coordination between KMD and GuC is through a software 716 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page 717 * allocated in the global GTT. 718 * 719 * CGP format: 720 * 721 * +-----------+---------------------------+---------------------------------------------+ 722 * | DWORD | Name | Description | 723 * +-----------+---------------------------+---------------------------------------------+ 724 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | 725 * +-----------+---------------------------+---------------------------------------------+ 726 * | 1..15 | RESERVED | MBZ | 727 * +-----------+---------------------------+---------------------------------------------+ 728 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | 729 * +-----------+---------------------------+---------------------------------------------+ 730 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | 731 * +-----------+---------------------------+---------------------------------------------+ 732 * | 18..31 | RESERVED | MBZ | 733 * +-----------+---------------------------+---------------------------------------------+ 734 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | 735 * +-----------+---------------------------+---------------------------------------------+ 736 * | 33 | Q0ContextIndex | Context ID for Queue 0 | 737 * +-----------+---------------------------+---------------------------------------------+ 738 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | 739 * +-----------+---------------------------+---------------------------------------------+ 740 * | 35 | Q1ContextIndex | Context ID for Queue 1 | 741 * +-----------+---------------------------+---------------------------------------------+ 742 * | ... |... | ... | 743 * +-----------+---------------------------+---------------------------------------------+ 744 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | 745 * +-----------+---------------------------+---------------------------------------------+ 746 * | 159 | Q63ContextIndex | Context ID for Queue 63 | 747 * +-----------+---------------------------+---------------------------------------------+ 748 * | 160..1024 | RESERVED | MBZ | 749 * +-----------+---------------------------+---------------------------------------------+ 750 * 751 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified 752 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies 753 * the CGP address. When the secondary queues are added to the group, the CGP is 754 * updated with entry for that queue and GuC is notified through the H2G interface 755 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages 756 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also 757 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any 758 * error in the CGP. Only one of these CGP update messages can be outstanding 759 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* 760 * fields indicate which queue entry is being updated in the CGP. 761 * 762 * The primary queue (Q0) represents the multi queue group context in GuC and 763 * submission on any queue of the group must be through Q0 GuC interface only. 764 * 765 * As it is not required to register secondary queues with GuC, the secondary queue 766 * context ids in the CGP are populated with Q0 context id. 767 */ 768 769 #define CGP_VERSION_MAJOR_SHIFT 8 770 771 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 772 struct xe_exec_queue *q) 773 { 774 struct xe_exec_queue_group *group = q->multi_queue.group; 775 u32 guc_id = group->primary->guc->id; 776 777 /* Currently implementing CGP version 1.0 */ 778 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 779 1 << CGP_VERSION_MAJOR_SHIFT); 780 781 xe_map_wr(xe, &group->cgp_bo->vmap, 782 (32 + q->multi_queue.pos * 2) * sizeof(u32), 783 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 784 785 xe_map_wr(xe, &group->cgp_bo->vmap, 786 (33 + q->multi_queue.pos * 2) * sizeof(u32), 787 u32, guc_id); 788 789 if (q->multi_queue.pos / 32) { 790 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 791 u32, BIT(q->multi_queue.pos % 32)); 792 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 793 } else { 794 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 795 u32, BIT(q->multi_queue.pos)); 796 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 797 } 798 } 799 800 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 801 struct xe_exec_queue *q, 802 const u32 *action, u32 len) 803 { 804 struct xe_exec_queue_group *group = q->multi_queue.group; 805 struct xe_device *xe = guc_to_xe(guc); 806 enum xe_multi_queue_priority priority; 807 long ret; 808 809 /* 810 * As all queues of a multi queue group use single drm scheduler 811 * submit workqueue, CGP synchronization with GuC are serialized. 812 * Hence, no locking is required here. 813 * Wait for any pending CGP_SYNC_DONE response before updating the 814 * CGP page and sending CGP_SYNC message. 815 * 816 * FIXME: Support VF migration 817 */ 818 ret = wait_event_timeout(guc->ct.wq, 819 !READ_ONCE(group->sync_pending) || 820 xe_guc_read_stopped(guc), HZ); 821 if (!ret || xe_guc_read_stopped(guc)) { 822 /* CGP_SYNC failed. Reset gt, cleanup the group */ 823 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 824 set_exec_queue_group_banned(q); 825 xe_gt_reset_async(q->gt); 826 xe_guc_exec_queue_group_trigger_cleanup(q); 827 return; 828 } 829 830 scoped_guard(spinlock, &q->multi_queue.lock) 831 priority = q->multi_queue.priority; 832 833 xe_lrc_set_multi_queue_priority(q->lrc[0], priority); 834 xe_guc_exec_queue_group_cgp_update(xe, q); 835 836 WRITE_ONCE(group->sync_pending, true); 837 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 838 } 839 840 static void __register_exec_queue_group(struct xe_guc *guc, 841 struct xe_exec_queue *q, 842 struct guc_ctxt_registration_info *info) 843 { 844 #define MAX_MULTI_QUEUE_REG_SIZE (8) 845 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 846 int len = 0; 847 848 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 849 action[len++] = info->flags; 850 action[len++] = info->context_idx; 851 action[len++] = info->engine_class; 852 action[len++] = info->engine_submit_mask; 853 action[len++] = 0; /* Reserved */ 854 action[len++] = info->cgp_lo; 855 action[len++] = info->cgp_hi; 856 857 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 858 #undef MAX_MULTI_QUEUE_REG_SIZE 859 860 /* 861 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 862 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 863 * from guc. 864 */ 865 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 866 } 867 868 static void xe_guc_exec_queue_group_add(struct xe_guc *guc, 869 struct xe_exec_queue *q) 870 { 871 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 872 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 873 int len = 0; 874 875 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); 876 877 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 878 action[len++] = q->multi_queue.group->primary->guc->id; 879 880 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 881 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 882 883 /* 884 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a 885 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 886 * from guc. 887 */ 888 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 889 } 890 891 static void __register_mlrc_exec_queue(struct xe_guc *guc, 892 struct xe_exec_queue *q, 893 struct guc_ctxt_registration_info *info) 894 { 895 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 896 u32 action[MAX_MLRC_REG_SIZE]; 897 int len = 0; 898 int i; 899 900 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 901 902 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 903 action[len++] = info->flags; 904 action[len++] = info->context_idx; 905 action[len++] = info->engine_class; 906 action[len++] = info->engine_submit_mask; 907 action[len++] = info->wq_desc_lo; 908 action[len++] = info->wq_desc_hi; 909 action[len++] = info->wq_base_lo; 910 action[len++] = info->wq_base_hi; 911 action[len++] = info->wq_size; 912 action[len++] = q->width; 913 action[len++] = info->hwlrca_lo; 914 action[len++] = info->hwlrca_hi; 915 916 for (i = 1; i < q->width; ++i) { 917 struct xe_lrc *lrc = q->lrc[i]; 918 919 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 920 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 921 } 922 923 /* explicitly checks some fields that we might fixup later */ 924 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 925 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 926 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 927 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 928 xe_gt_assert(guc_to_gt(guc), q->width == 929 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 930 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 931 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 932 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 933 #undef MAX_MLRC_REG_SIZE 934 935 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 936 } 937 938 static void __register_exec_queue(struct xe_guc *guc, 939 struct guc_ctxt_registration_info *info) 940 { 941 u32 action[] = { 942 XE_GUC_ACTION_REGISTER_CONTEXT, 943 info->flags, 944 info->context_idx, 945 info->engine_class, 946 info->engine_submit_mask, 947 info->wq_desc_lo, 948 info->wq_desc_hi, 949 info->wq_base_lo, 950 info->wq_base_hi, 951 info->wq_size, 952 info->hwlrca_lo, 953 info->hwlrca_hi, 954 }; 955 956 /* explicitly checks some fields that we might fixup later */ 957 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 958 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 959 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 960 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 961 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 962 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 963 964 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 965 } 966 967 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 968 { 969 struct xe_guc *guc = exec_queue_to_guc(q); 970 struct xe_device *xe = guc_to_xe(guc); 971 struct xe_lrc *lrc = q->lrc[0]; 972 struct guc_ctxt_registration_info info; 973 974 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 975 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 976 977 memset(&info, 0, sizeof(info)); 978 info.context_idx = q->guc->id; 979 info.engine_class = xe_engine_class_to_guc_class(q->class); 980 info.engine_submit_mask = q->logical_mask; 981 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 982 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 983 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 984 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 985 986 if (xe_exec_queue_is_multi_queue(q)) { 987 struct xe_exec_queue_group *group = q->multi_queue.group; 988 989 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 990 info.cgp_hi = 0; 991 } 992 993 if (xe_exec_queue_is_parallel(q)) { 994 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 995 struct iosys_map map = xe_lrc_parallel_map(lrc); 996 997 info.wq_desc_lo = lower_32_bits(ggtt_addr + 998 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 999 info.wq_desc_hi = upper_32_bits(ggtt_addr + 1000 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1001 info.wq_base_lo = lower_32_bits(ggtt_addr + 1002 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1003 info.wq_base_hi = upper_32_bits(ggtt_addr + 1004 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1005 info.wq_size = WQ_SIZE; 1006 1007 q->guc->wqi_head = 0; 1008 q->guc->wqi_tail = 0; 1009 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 1010 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 1011 } 1012 1013 set_exec_queue_registered(q); 1014 trace_xe_exec_queue_register(q); 1015 if (xe_exec_queue_is_multi_queue_primary(q)) 1016 __register_exec_queue_group(guc, q, &info); 1017 else if (xe_exec_queue_is_parallel(q)) 1018 __register_mlrc_exec_queue(guc, q, &info); 1019 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 1020 __register_exec_queue(guc, &info); 1021 1022 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1023 init_policies(guc, q); 1024 1025 if (xe_exec_queue_is_multi_queue_secondary(q)) 1026 xe_guc_exec_queue_group_add(guc, q); 1027 } 1028 1029 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 1030 { 1031 return (WQ_SIZE - q->guc->wqi_tail); 1032 } 1033 1034 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 1035 { 1036 struct xe_guc *guc = exec_queue_to_guc(q); 1037 struct xe_device *xe = guc_to_xe(guc); 1038 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1039 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 1040 1041 #define AVAILABLE_SPACE \ 1042 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 1043 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1044 try_again: 1045 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 1046 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1047 if (sleep_total_ms > 2000) { 1048 xe_gt_reset_async(q->gt); 1049 return -ENODEV; 1050 } 1051 1052 sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64); 1053 goto try_again; 1054 } 1055 } 1056 #undef AVAILABLE_SPACE 1057 1058 return 0; 1059 } 1060 1061 static int wq_noop_append(struct xe_exec_queue *q) 1062 { 1063 struct xe_guc *guc = exec_queue_to_guc(q); 1064 struct xe_device *xe = guc_to_xe(guc); 1065 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1066 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 1067 1068 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 1069 return -ENODEV; 1070 1071 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 1072 1073 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 1074 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 1075 FIELD_PREP(WQ_LEN_MASK, len_dw)); 1076 q->guc->wqi_tail = 0; 1077 1078 return 0; 1079 } 1080 1081 static void wq_item_append(struct xe_exec_queue *q) 1082 { 1083 struct xe_guc *guc = exec_queue_to_guc(q); 1084 struct xe_device *xe = guc_to_xe(guc); 1085 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1086 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 1087 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 1088 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 1089 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 1090 int i = 0, j; 1091 1092 if (wqi_size > wq_space_until_wrap(q)) { 1093 if (wq_noop_append(q)) 1094 return; 1095 } 1096 if (wq_wait_for_space(q, wqi_size)) 1097 return; 1098 1099 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 1100 FIELD_PREP(WQ_LEN_MASK, len_dw); 1101 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 1102 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 1103 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 1104 wqi[i++] = 0; 1105 for (j = 1; j < q->width; ++j) { 1106 struct xe_lrc *lrc = q->lrc[j]; 1107 1108 wqi[i++] = lrc->ring.tail / sizeof(u64); 1109 } 1110 1111 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 1112 1113 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 1114 wq[q->guc->wqi_tail / sizeof(u32)])); 1115 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 1116 q->guc->wqi_tail += wqi_size; 1117 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 1118 1119 xe_device_wmb(xe); 1120 1121 map = xe_lrc_parallel_map(q->lrc[0]); 1122 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 1123 } 1124 1125 #define RESUME_PENDING ~0x0ull 1126 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 1127 { 1128 struct xe_guc *guc = exec_queue_to_guc(q); 1129 struct xe_lrc *lrc = q->lrc[0]; 1130 u32 action[3]; 1131 u32 g2h_len = 0; 1132 u32 num_g2h = 0; 1133 int len = 0; 1134 bool extra_submit = false; 1135 1136 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1137 1138 if (!job->restore_replay || job->last_replay) { 1139 if (xe_exec_queue_is_parallel(q)) 1140 wq_item_append(q); 1141 else if (!exec_queue_idle_skip_suspend(q)) 1142 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1143 job->last_replay = false; 1144 } 1145 1146 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1147 return; 1148 1149 /* 1150 * All queues in a multi-queue group will use the primary queue 1151 * of the group to interface with GuC. 1152 */ 1153 q = xe_exec_queue_multi_queue_primary(q); 1154 1155 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1156 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1157 action[len++] = q->guc->id; 1158 action[len++] = GUC_CONTEXT_ENABLE; 1159 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1160 num_g2h = 1; 1161 if (xe_exec_queue_is_parallel(q)) 1162 extra_submit = true; 1163 1164 q->guc->resume_time = RESUME_PENDING; 1165 set_exec_queue_pending_enable(q); 1166 set_exec_queue_enabled(q); 1167 trace_xe_exec_queue_scheduling_enable(q); 1168 } else { 1169 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1170 action[len++] = q->guc->id; 1171 trace_xe_exec_queue_submit(q); 1172 } 1173 1174 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1175 1176 if (extra_submit) { 1177 len = 0; 1178 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1179 action[len++] = q->guc->id; 1180 trace_xe_exec_queue_submit(q); 1181 1182 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1183 } 1184 } 1185 1186 static struct dma_fence * 1187 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1188 { 1189 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1190 struct xe_exec_queue *q = job->q; 1191 struct xe_guc *guc = exec_queue_to_guc(q); 1192 bool killed_or_banned_or_wedged = 1193 exec_queue_killed_or_banned_or_wedged(q); 1194 1195 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1196 exec_queue_banned(q) || exec_queue_suspended(q)); 1197 1198 trace_xe_sched_job_run(job); 1199 1200 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1201 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1202 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1203 1204 if (exec_queue_killed_or_banned_or_wedged(primary)) { 1205 killed_or_banned_or_wedged = true; 1206 goto run_job_out; 1207 } 1208 1209 if (!exec_queue_registered(primary)) 1210 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1211 } 1212 1213 if (!exec_queue_registered(q)) 1214 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1215 if (!job->restore_replay) 1216 q->ring_ops->emit_job(job); 1217 submit_exec_queue(q, job); 1218 job->restore_replay = false; 1219 } 1220 1221 run_job_out: 1222 1223 return job->fence; 1224 } 1225 1226 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1227 { 1228 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1229 1230 trace_xe_sched_job_free(job); 1231 xe_sched_job_put(job); 1232 } 1233 1234 int xe_guc_read_stopped(struct xe_guc *guc) 1235 { 1236 return atomic_read(&guc->submission_state.stopped); 1237 } 1238 1239 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1240 struct xe_exec_queue *q, 1241 u32 runnable_state); 1242 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1243 1244 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1245 u32 action[] = { \ 1246 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1247 q->guc->id, \ 1248 GUC_CONTEXT_##enable_disable, \ 1249 } 1250 1251 static void disable_scheduling_deregister(struct xe_guc *guc, 1252 struct xe_exec_queue *q) 1253 { 1254 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1255 int ret; 1256 1257 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1258 set_min_preemption_timeout(guc, q); 1259 1260 smp_rmb(); 1261 ret = wait_event_timeout(guc->ct.wq, 1262 (!exec_queue_pending_enable(q) && 1263 !exec_queue_pending_disable(q)) || 1264 xe_guc_read_stopped(guc) || 1265 vf_recovery(guc), 1266 HZ * 5); 1267 if (!ret && !vf_recovery(guc)) { 1268 struct xe_gpu_scheduler *sched = &q->guc->sched; 1269 1270 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1271 xe_sched_submission_start(sched); 1272 xe_gt_reset_async(q->gt); 1273 xe_sched_tdr_queue_imm(sched); 1274 return; 1275 } 1276 1277 clear_exec_queue_enabled(q); 1278 set_exec_queue_pending_disable(q); 1279 set_exec_queue_destroyed(q); 1280 trace_xe_exec_queue_scheduling_disable(q); 1281 1282 /* 1283 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1284 * handler and we are not allowed to reserved G2H space in handlers. 1285 */ 1286 if (xe_exec_queue_is_multi_queue_secondary(q)) 1287 handle_multi_queue_secondary_sched_done(guc, q, 0); 1288 else 1289 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1290 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1291 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1292 } 1293 1294 /** 1295 * xe_guc_submit_wedge() - Wedge GuC submission 1296 * @guc: the GuC object 1297 * 1298 * Save exec queue's registered with GuC state by taking a ref to each queue. 1299 * Register a DRMM handler to drop refs upon driver unload. 1300 */ 1301 void xe_guc_submit_wedge(struct xe_guc *guc) 1302 { 1303 struct xe_gt *gt = guc_to_gt(guc); 1304 struct xe_exec_queue *q; 1305 unsigned long index; 1306 int err; 1307 1308 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1309 1310 /* 1311 * If device is being wedged even before submission_state is 1312 * initialized, there's nothing to do here. 1313 */ 1314 if (!guc->submission_state.initialized) 1315 return; 1316 1317 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, 1318 guc_submit_wedged_fini, guc); 1319 if (err) { 1320 xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; " 1321 "Although device is wedged.\n", 1322 xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); 1323 return; 1324 } 1325 1326 mutex_lock(&guc->submission_state.lock); 1327 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1328 if (xe_exec_queue_get_unless_zero(q)) 1329 set_exec_queue_wedged(q); 1330 mutex_unlock(&guc->submission_state.lock); 1331 } 1332 1333 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1334 { 1335 struct xe_device *xe = guc_to_xe(guc); 1336 1337 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 1338 return false; 1339 1340 if (xe_device_wedged(xe)) 1341 return true; 1342 1343 xe_device_declare_wedged(xe); 1344 1345 return true; 1346 } 1347 1348 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1349 1350 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1351 { 1352 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1353 u32 ctx_timestamp, ctx_job_timestamp; 1354 u32 timeout_ms = q->sched_props.job_timeout_ms; 1355 u32 diff; 1356 u64 running_time_ms; 1357 1358 if (!xe_sched_job_started(job)) { 1359 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1360 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1361 q->guc->id); 1362 1363 return xe_sched_invalidate_job(job, 2); 1364 } 1365 1366 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); 1367 if (ctx_timestamp == job->sample_timestamp) { 1368 if (IS_SRIOV_VF(gt_to_xe(gt))) 1369 xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1370 xe_sched_job_seqno(job), 1371 xe_sched_job_lrc_seqno(job), q->guc->id); 1372 else 1373 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1374 xe_sched_job_seqno(job), 1375 xe_sched_job_lrc_seqno(job), q->guc->id); 1376 1377 return xe_sched_invalidate_job(job, 0); 1378 } 1379 1380 job->sample_timestamp = ctx_timestamp; 1381 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1382 1383 /* 1384 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1385 * possible overflows with a high timeout. 1386 */ 1387 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1388 1389 diff = ctx_timestamp - ctx_job_timestamp; 1390 1391 /* 1392 * Ensure timeout is within 5% to account for an GuC scheduling latency 1393 */ 1394 running_time_ms = 1395 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1396 1397 xe_gt_dbg(gt, 1398 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1399 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1400 q->guc->id, running_time_ms, timeout_ms, diff); 1401 1402 return running_time_ms >= timeout_ms; 1403 } 1404 1405 static void enable_scheduling(struct xe_exec_queue *q) 1406 { 1407 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1408 struct xe_guc *guc = exec_queue_to_guc(q); 1409 int ret; 1410 1411 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1412 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1413 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1414 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1415 1416 set_exec_queue_pending_enable(q); 1417 set_exec_queue_enabled(q); 1418 trace_xe_exec_queue_scheduling_enable(q); 1419 1420 if (xe_exec_queue_is_multi_queue_secondary(q)) 1421 handle_multi_queue_secondary_sched_done(guc, q, 1); 1422 else 1423 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1424 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1425 1426 ret = wait_event_timeout(guc->ct.wq, 1427 !exec_queue_pending_enable(q) || 1428 xe_guc_read_stopped(guc) || 1429 vf_recovery(guc), HZ * 5); 1430 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1431 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1432 set_exec_queue_banned(q); 1433 xe_gt_reset_async(q->gt); 1434 xe_sched_tdr_queue_imm(&q->guc->sched); 1435 } 1436 } 1437 1438 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1439 { 1440 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1441 struct xe_guc *guc = exec_queue_to_guc(q); 1442 1443 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1444 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1445 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1446 1447 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1448 set_min_preemption_timeout(guc, q); 1449 clear_exec_queue_enabled(q); 1450 set_exec_queue_pending_disable(q); 1451 trace_xe_exec_queue_scheduling_disable(q); 1452 1453 if (xe_exec_queue_is_multi_queue_secondary(q)) 1454 handle_multi_queue_secondary_sched_done(guc, q, 0); 1455 else 1456 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1457 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1458 } 1459 1460 static enum drm_gpu_sched_stat 1461 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1462 { 1463 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1464 struct drm_sched_job *tmp_job; 1465 struct xe_exec_queue *q = job->q, *primary; 1466 struct xe_gpu_scheduler *sched = &q->guc->sched; 1467 struct xe_guc *guc = exec_queue_to_guc(q); 1468 const char *process_name = "no process"; 1469 struct xe_device *xe = guc_to_xe(guc); 1470 int err = -ETIME; 1471 pid_t pid = -1; 1472 bool wedged = false, skip_timeout_check; 1473 1474 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1475 1476 primary = xe_exec_queue_multi_queue_primary(q); 1477 1478 /* 1479 * TDR has fired before free job worker. Common if exec queue 1480 * immediately closed after last fence signaled. Add back to pending 1481 * list so job can be freed and kick scheduler ensuring free job is not 1482 * lost. 1483 */ 1484 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1485 vf_recovery(guc)) 1486 return DRM_GPU_SCHED_STAT_NO_HANG; 1487 1488 /* Kill the run_job entry point */ 1489 if (xe_exec_queue_is_multi_queue(q)) 1490 xe_guc_exec_queue_group_stop(q); 1491 else 1492 xe_sched_submission_stop(sched); 1493 1494 /* Must check all state after stopping scheduler */ 1495 skip_timeout_check = exec_queue_reset(q) || 1496 exec_queue_killed_or_banned_or_wedged(q); 1497 1498 /* Skip timeout check if multi-queue group is banned */ 1499 if (xe_exec_queue_is_multi_queue(q) && 1500 READ_ONCE(q->multi_queue.group->banned)) 1501 skip_timeout_check = true; 1502 1503 /* LR jobs can only get here if queue has been killed or hit an error */ 1504 if (xe_exec_queue_is_lr(q)) 1505 xe_gt_assert(guc_to_gt(guc), skip_timeout_check); 1506 1507 /* 1508 * If devcoredump not captured and GuC capture for the job is not ready 1509 * do manual capture first and decide later if we need to use it 1510 */ 1511 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1512 !xe_guc_capture_get_matching_and_lock(q)) { 1513 /* take force wake before engine register manual capture */ 1514 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1515 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1516 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1517 1518 xe_engine_snapshot_capture_for_queue(q); 1519 } 1520 1521 /* 1522 * Check if job is actually timed out, if so restart job execution and TDR 1523 */ 1524 if (!skip_timeout_check && !check_timeout(q, job)) 1525 goto rearm; 1526 1527 if (!exec_queue_killed(q)) 1528 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1529 1530 set_exec_queue_banned(q); 1531 1532 /* Kick job / queue off hardware */ 1533 if (!wedged && (exec_queue_enabled(primary) || 1534 exec_queue_pending_disable(primary))) { 1535 int ret; 1536 1537 if (exec_queue_reset(primary)) 1538 err = -EIO; 1539 1540 if (xe_uc_fw_is_running(&guc->fw)) { 1541 /* 1542 * Wait for any pending G2H to flush out before 1543 * modifying state 1544 */ 1545 ret = wait_event_timeout(guc->ct.wq, 1546 (!exec_queue_pending_enable(primary) && 1547 !exec_queue_pending_disable(primary)) || 1548 xe_guc_read_stopped(guc) || 1549 vf_recovery(guc), HZ * 5); 1550 if (vf_recovery(guc)) 1551 goto handle_vf_resume; 1552 if (!ret || xe_guc_read_stopped(guc)) 1553 goto trigger_reset; 1554 1555 disable_scheduling(primary, skip_timeout_check); 1556 } 1557 1558 /* 1559 * Must wait for scheduling to be disabled before signalling 1560 * any fences, if GT broken the GT reset code should signal us. 1561 * 1562 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1563 * error) messages which can cause the schedule disable to get 1564 * lost. If this occurs, trigger a GT reset to recover. 1565 */ 1566 smp_rmb(); 1567 ret = wait_event_timeout(guc->ct.wq, 1568 !xe_uc_fw_is_running(&guc->fw) || 1569 !exec_queue_pending_disable(primary) || 1570 xe_guc_read_stopped(guc) || 1571 vf_recovery(guc), HZ * 5); 1572 if (vf_recovery(guc)) 1573 goto handle_vf_resume; 1574 if (!ret || xe_guc_read_stopped(guc)) { 1575 trigger_reset: 1576 if (!ret) 1577 xe_gt_warn(guc_to_gt(guc), 1578 "Schedule disable failed to respond, guc_id=%d", 1579 primary->guc->id); 1580 xe_devcoredump(primary, job, 1581 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1582 primary->guc->id, ret, xe_guc_read_stopped(guc)); 1583 xe_gt_reset_async(primary->gt); 1584 xe_sched_tdr_queue_imm(sched); 1585 goto rearm; 1586 } 1587 } 1588 1589 if (q->vm && q->vm->xef) { 1590 process_name = q->vm->xef->process_name; 1591 pid = q->vm->xef->pid; 1592 } 1593 1594 if (!exec_queue_killed(q)) 1595 xe_gt_notice(guc_to_gt(guc), 1596 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1597 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1598 q->guc->id, q->flags, process_name, pid); 1599 1600 trace_xe_sched_job_timedout(job); 1601 1602 if (!exec_queue_killed(q)) 1603 xe_devcoredump(q, job, 1604 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1605 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1606 q->guc->id, q->flags); 1607 1608 /* 1609 * Kernel jobs should never fail, nor should VM jobs if they do 1610 * somethings has gone wrong and the GT needs a reset 1611 */ 1612 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1613 "Kernel-submitted job timed out\n"); 1614 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1615 "VM job timed out on non-killed execqueue\n"); 1616 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1617 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1618 if (!xe_sched_invalidate_job(job, 2)) { 1619 xe_gt_reset_async(q->gt); 1620 goto rearm; 1621 } 1622 } 1623 1624 /* Mark all outstanding jobs as bad, thus completing them */ 1625 xe_sched_job_set_error(job, err); 1626 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) 1627 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); 1628 1629 if (xe_exec_queue_is_multi_queue(q)) { 1630 xe_guc_exec_queue_group_start(q); 1631 xe_guc_exec_queue_group_trigger_cleanup(q); 1632 } else { 1633 xe_sched_submission_start(sched); 1634 xe_guc_exec_queue_trigger_cleanup(q); 1635 } 1636 1637 /* 1638 * We want the job added back to the pending list so it gets freed; this 1639 * is what DRM_GPU_SCHED_STAT_NO_HANG does. 1640 */ 1641 return DRM_GPU_SCHED_STAT_NO_HANG; 1642 1643 rearm: 1644 /* 1645 * XXX: Ideally want to adjust timeout based on current execution time 1646 * but there is not currently an easy way to do in DRM scheduler. With 1647 * some thought, do this in a follow up. 1648 */ 1649 if (xe_exec_queue_is_multi_queue(q)) 1650 xe_guc_exec_queue_group_start(q); 1651 else 1652 xe_sched_submission_start(sched); 1653 handle_vf_resume: 1654 return DRM_GPU_SCHED_STAT_NO_HANG; 1655 } 1656 1657 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1658 { 1659 struct xe_guc_exec_queue *ge = q->guc; 1660 struct xe_guc *guc = exec_queue_to_guc(q); 1661 1662 release_guc_id(guc, q); 1663 xe_sched_entity_fini(&ge->entity); 1664 xe_sched_fini(&ge->sched); 1665 1666 /* 1667 * RCU free due sched being exported via DRM scheduler fences 1668 * (timeline name). 1669 */ 1670 kfree_rcu(ge, rcu); 1671 } 1672 1673 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1674 { 1675 struct xe_guc_exec_queue *ge = 1676 container_of(w, struct xe_guc_exec_queue, destroy_async); 1677 struct xe_exec_queue *q = ge->q; 1678 struct xe_guc *guc = exec_queue_to_guc(q); 1679 1680 guard(xe_pm_runtime)(guc_to_xe(guc)); 1681 trace_xe_exec_queue_destroy(q); 1682 1683 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1684 struct xe_exec_queue_group *group = q->multi_queue.group; 1685 1686 mutex_lock(&group->list_lock); 1687 list_del(&q->multi_queue.link); 1688 mutex_unlock(&group->list_lock); 1689 } 1690 1691 /* Confirm no work left behind accessing device structures */ 1692 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1693 1694 xe_exec_queue_fini(q); 1695 } 1696 1697 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1698 { 1699 struct xe_guc *guc = exec_queue_to_guc(q); 1700 struct xe_device *xe = guc_to_xe(guc); 1701 1702 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1703 1704 /* We must block on kernel engines so slabs are empty on driver unload */ 1705 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1706 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1707 else 1708 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1709 } 1710 1711 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1712 { 1713 /* 1714 * Might be done from within the GPU scheduler, need to do async as we 1715 * fini the scheduler when the engine is fini'd, the scheduler can't 1716 * complete fini within itself (circular dependency). Async resolves 1717 * this we and don't really care when everything is fini'd, just that it 1718 * is. 1719 */ 1720 guc_exec_queue_destroy_async(q); 1721 } 1722 1723 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1724 { 1725 struct xe_exec_queue *q = msg->private_data; 1726 struct xe_guc *guc = exec_queue_to_guc(q); 1727 1728 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1729 trace_xe_exec_queue_cleanup_entity(q); 1730 1731 /* 1732 * Expected state transitions for cleanup: 1733 * - If the exec queue is registered and GuC firmware is running, we must first 1734 * disable scheduling and deregister the queue to ensure proper teardown and 1735 * resource release in the GuC, then destroy the exec queue on driver side. 1736 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1737 * we cannot expect a response for the deregister request. In this case, 1738 * it is safe to directly destroy the exec queue on driver side, as the GuC 1739 * will not process further requests and all resources must be cleaned up locally. 1740 */ 1741 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1742 disable_scheduling_deregister(guc, q); 1743 else 1744 __guc_exec_queue_destroy(guc, q); 1745 } 1746 1747 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1748 { 1749 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1750 } 1751 1752 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1753 { 1754 struct xe_exec_queue *q = msg->private_data; 1755 struct xe_guc *guc = exec_queue_to_guc(q); 1756 1757 if (guc_exec_queue_allowed_to_change_state(q)) 1758 init_policies(guc, q); 1759 kfree(msg); 1760 } 1761 1762 static void __suspend_fence_signal(struct xe_exec_queue *q) 1763 { 1764 struct xe_guc *guc = exec_queue_to_guc(q); 1765 struct xe_device *xe = guc_to_xe(guc); 1766 1767 if (!q->guc->suspend_pending) 1768 return; 1769 1770 WRITE_ONCE(q->guc->suspend_pending, false); 1771 1772 /* 1773 * We use a GuC shared wait queue for VFs because the VF resfix start 1774 * interrupt must be able to wake all instances of suspend_wait. This 1775 * prevents the VF migration worker from being starved during 1776 * scheduling. 1777 */ 1778 if (IS_SRIOV_VF(xe)) 1779 wake_up_all(&guc->ct.wq); 1780 else 1781 wake_up(&q->guc->suspend_wait); 1782 } 1783 1784 static void suspend_fence_signal(struct xe_exec_queue *q) 1785 { 1786 struct xe_guc *guc = exec_queue_to_guc(q); 1787 1788 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1789 xe_guc_read_stopped(guc)); 1790 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1791 1792 __suspend_fence_signal(q); 1793 } 1794 1795 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1796 { 1797 struct xe_exec_queue *q = msg->private_data; 1798 struct xe_guc *guc = exec_queue_to_guc(q); 1799 bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q); 1800 1801 if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) && 1802 !exec_queue_suspended(q) && exec_queue_enabled(q)) { 1803 wait_event(guc->ct.wq, vf_recovery(guc) || 1804 ((q->guc->resume_time != RESUME_PENDING || 1805 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1806 1807 if (!xe_guc_read_stopped(guc)) { 1808 s64 since_resume_ms = 1809 ktime_ms_delta(ktime_get(), 1810 q->guc->resume_time); 1811 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1812 since_resume_ms; 1813 1814 if (wait_ms > 0 && q->guc->resume_time) 1815 xe_sleep_relaxed_ms(wait_ms); 1816 1817 set_exec_queue_suspended(q); 1818 disable_scheduling(q, false); 1819 } 1820 } else if (q->guc->suspend_pending) { 1821 if (idle_skip_suspend) 1822 set_exec_queue_idle_skip_suspend(q); 1823 set_exec_queue_suspended(q); 1824 suspend_fence_signal(q); 1825 } 1826 } 1827 1828 static void sched_context(struct xe_exec_queue *q) 1829 { 1830 struct xe_guc *guc = exec_queue_to_guc(q); 1831 struct xe_lrc *lrc = q->lrc[0]; 1832 u32 action[] = { 1833 XE_GUC_ACTION_SCHED_CONTEXT, 1834 q->guc->id, 1835 }; 1836 1837 xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q)); 1838 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1839 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1840 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1841 1842 trace_xe_exec_queue_submit(q); 1843 1844 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1845 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 1846 } 1847 1848 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1849 { 1850 struct xe_exec_queue *q = msg->private_data; 1851 1852 if (guc_exec_queue_allowed_to_change_state(q)) { 1853 clear_exec_queue_suspended(q); 1854 if (!exec_queue_enabled(q)) { 1855 if (exec_queue_idle_skip_suspend(q)) { 1856 struct xe_lrc *lrc = q->lrc[0]; 1857 1858 clear_exec_queue_idle_skip_suspend(q); 1859 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1860 } 1861 q->guc->resume_time = RESUME_PENDING; 1862 set_exec_queue_pending_resume(q); 1863 enable_scheduling(q); 1864 } else if (exec_queue_idle_skip_suspend(q)) { 1865 clear_exec_queue_idle_skip_suspend(q); 1866 sched_context(q); 1867 } 1868 } else { 1869 clear_exec_queue_suspended(q); 1870 clear_exec_queue_idle_skip_suspend(q); 1871 } 1872 } 1873 1874 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1875 { 1876 struct xe_exec_queue *q = msg->private_data; 1877 1878 if (guc_exec_queue_allowed_to_change_state(q)) { 1879 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 1880 struct xe_guc *guc = exec_queue_to_guc(q); 1881 struct xe_exec_queue_group *group = q->multi_queue.group; 1882 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 1883 int len = 0; 1884 1885 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 1886 action[len++] = group->primary->guc->id; 1887 1888 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 1889 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 1890 1891 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 1892 } 1893 1894 kfree(msg); 1895 } 1896 1897 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1898 #define SET_SCHED_PROPS 2 1899 #define SUSPEND 3 1900 #define RESUME 4 1901 #define SET_MULTI_QUEUE_PRIORITY 5 1902 #define OPCODE_MASK 0xf 1903 #define MSG_LOCKED BIT(8) 1904 #define MSG_HEAD BIT(9) 1905 1906 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1907 { 1908 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1909 1910 trace_xe_sched_msg_recv(msg); 1911 1912 switch (msg->opcode) { 1913 case CLEANUP: 1914 __guc_exec_queue_process_msg_cleanup(msg); 1915 break; 1916 case SET_SCHED_PROPS: 1917 __guc_exec_queue_process_msg_set_sched_props(msg); 1918 break; 1919 case SUSPEND: 1920 __guc_exec_queue_process_msg_suspend(msg); 1921 break; 1922 case RESUME: 1923 __guc_exec_queue_process_msg_resume(msg); 1924 break; 1925 case SET_MULTI_QUEUE_PRIORITY: 1926 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1927 break; 1928 default: 1929 XE_WARN_ON("Unknown message type"); 1930 } 1931 1932 xe_pm_runtime_put(xe); 1933 } 1934 1935 static const struct drm_sched_backend_ops drm_sched_ops = { 1936 .run_job = guc_exec_queue_run_job, 1937 .free_job = guc_exec_queue_free_job, 1938 .timedout_job = guc_exec_queue_timedout_job, 1939 }; 1940 1941 static const struct xe_sched_backend_ops xe_sched_ops = { 1942 .process_msg = guc_exec_queue_process_msg, 1943 }; 1944 1945 static int guc_exec_queue_init(struct xe_exec_queue *q) 1946 { 1947 struct xe_gpu_scheduler *sched; 1948 struct xe_guc *guc = exec_queue_to_guc(q); 1949 struct workqueue_struct *submit_wq = NULL; 1950 struct xe_guc_exec_queue *ge; 1951 long timeout; 1952 int err, i; 1953 1954 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1955 1956 ge = kzalloc_obj(*ge); 1957 if (!ge) 1958 return -ENOMEM; 1959 1960 q->guc = ge; 1961 ge->q = q; 1962 init_rcu_head(&ge->rcu); 1963 init_waitqueue_head(&ge->suspend_wait); 1964 1965 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1966 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1967 1968 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1969 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1970 1971 /* 1972 * Use primary queue's submit_wq for all secondary queues of a 1973 * multi queue group. This serialization avoids any locking around 1974 * CGP synchronization with GuC. 1975 */ 1976 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1977 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1978 1979 submit_wq = primary->guc->sched.base.submit_wq; 1980 } 1981 1982 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1983 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1984 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1985 q->name, gt_to_xe(q->gt)->drm.dev); 1986 if (err) 1987 goto err_free; 1988 1989 sched = &ge->sched; 1990 err = xe_sched_entity_init(&ge->entity, sched); 1991 if (err) 1992 goto err_sched; 1993 1994 mutex_lock(&guc->submission_state.lock); 1995 1996 err = alloc_guc_id(guc, q); 1997 if (err) 1998 goto err_entity; 1999 2000 q->entity = &ge->entity; 2001 2002 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 2003 xe_sched_stop(sched); 2004 2005 mutex_unlock(&guc->submission_state.lock); 2006 2007 xe_exec_queue_assign_name(q, q->guc->id); 2008 2009 /* 2010 * Maintain secondary queues of the multi queue group in a list 2011 * for handling dependencies across the queues in the group. 2012 */ 2013 if (xe_exec_queue_is_multi_queue_secondary(q)) { 2014 struct xe_exec_queue_group *group = q->multi_queue.group; 2015 2016 INIT_LIST_HEAD(&q->multi_queue.link); 2017 mutex_lock(&group->list_lock); 2018 if (group->stopped) 2019 WRITE_ONCE(q->guc->sched.base.pause_submit, true); 2020 list_add_tail(&q->multi_queue.link, &group->list); 2021 mutex_unlock(&group->list_lock); 2022 } 2023 2024 if (xe_exec_queue_is_multi_queue(q)) 2025 trace_xe_exec_queue_create_multi_queue(q); 2026 else 2027 trace_xe_exec_queue_create(q); 2028 2029 return 0; 2030 2031 err_entity: 2032 mutex_unlock(&guc->submission_state.lock); 2033 xe_sched_entity_fini(&ge->entity); 2034 err_sched: 2035 xe_sched_fini(&ge->sched); 2036 err_free: 2037 kfree(ge); 2038 2039 return err; 2040 } 2041 2042 static void guc_exec_queue_kill(struct xe_exec_queue *q) 2043 { 2044 trace_xe_exec_queue_kill(q); 2045 set_exec_queue_killed(q); 2046 __suspend_fence_signal(q); 2047 xe_guc_exec_queue_trigger_cleanup(q); 2048 } 2049 2050 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 2051 u32 opcode) 2052 { 2053 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 2054 2055 INIT_LIST_HEAD(&msg->link); 2056 msg->opcode = opcode & OPCODE_MASK; 2057 msg->private_data = q; 2058 2059 trace_xe_sched_msg_add(msg); 2060 if (opcode & MSG_HEAD) 2061 xe_sched_add_msg_head(&q->guc->sched, msg); 2062 else if (opcode & MSG_LOCKED) 2063 xe_sched_add_msg_locked(&q->guc->sched, msg); 2064 else 2065 xe_sched_add_msg(&q->guc->sched, msg); 2066 } 2067 2068 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2069 struct xe_sched_msg *msg, 2070 u32 opcode) 2071 { 2072 if (!list_empty(&msg->link)) 2073 return; 2074 2075 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2076 } 2077 2078 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2079 struct xe_sched_msg *msg, 2080 u32 opcode) 2081 { 2082 if (!list_empty(&msg->link)) 2083 return false; 2084 2085 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2086 2087 return true; 2088 } 2089 2090 #define STATIC_MSG_CLEANUP 0 2091 #define STATIC_MSG_SUSPEND 1 2092 #define STATIC_MSG_RESUME 2 2093 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2094 { 2095 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2096 2097 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2098 guc_exec_queue_add_msg(q, msg, CLEANUP); 2099 else 2100 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2101 } 2102 2103 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2104 enum xe_exec_queue_priority priority) 2105 { 2106 struct xe_sched_msg *msg; 2107 2108 if (q->sched_props.priority == priority || 2109 exec_queue_killed_or_banned_or_wedged(q)) 2110 return 0; 2111 2112 msg = kmalloc_obj(*msg); 2113 if (!msg) 2114 return -ENOMEM; 2115 2116 q->sched_props.priority = priority; 2117 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2118 2119 return 0; 2120 } 2121 2122 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2123 { 2124 struct xe_sched_msg *msg; 2125 2126 if (q->sched_props.timeslice_us == timeslice_us || 2127 exec_queue_killed_or_banned_or_wedged(q)) 2128 return 0; 2129 2130 msg = kmalloc_obj(*msg); 2131 if (!msg) 2132 return -ENOMEM; 2133 2134 q->sched_props.timeslice_us = timeslice_us; 2135 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2136 2137 return 0; 2138 } 2139 2140 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2141 u32 preempt_timeout_us) 2142 { 2143 struct xe_sched_msg *msg; 2144 2145 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2146 exec_queue_killed_or_banned_or_wedged(q)) 2147 return 0; 2148 2149 msg = kmalloc_obj(*msg); 2150 if (!msg) 2151 return -ENOMEM; 2152 2153 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2154 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2155 2156 return 0; 2157 } 2158 2159 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2160 enum xe_multi_queue_priority priority) 2161 { 2162 struct xe_sched_msg *msg; 2163 2164 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2165 2166 if (exec_queue_killed_or_banned_or_wedged(q)) 2167 return 0; 2168 2169 msg = kmalloc_obj(*msg); 2170 if (!msg) 2171 return -ENOMEM; 2172 2173 scoped_guard(spinlock, &q->multi_queue.lock) { 2174 if (q->multi_queue.priority == priority) { 2175 kfree(msg); 2176 return 0; 2177 } 2178 2179 q->multi_queue.priority = priority; 2180 } 2181 2182 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2183 2184 return 0; 2185 } 2186 2187 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2188 { 2189 struct xe_gpu_scheduler *sched = &q->guc->sched; 2190 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2191 2192 if (exec_queue_killed_or_banned_or_wedged(q)) 2193 return -EINVAL; 2194 2195 xe_sched_msg_lock(sched); 2196 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2197 q->guc->suspend_pending = true; 2198 xe_sched_msg_unlock(sched); 2199 2200 return 0; 2201 } 2202 2203 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2204 { 2205 struct xe_guc *guc = exec_queue_to_guc(q); 2206 struct xe_device *xe = guc_to_xe(guc); 2207 int ret; 2208 2209 /* 2210 * Likely don't need to check exec_queue_killed() as we clear 2211 * suspend_pending upon kill but to be paranoid but races in which 2212 * suspend_pending is set after kill also check kill here. 2213 */ 2214 #define WAIT_COND \ 2215 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2216 xe_guc_read_stopped(guc)) 2217 2218 retry: 2219 if (IS_SRIOV_VF(xe)) 2220 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2221 vf_recovery(guc), 2222 HZ * 5); 2223 else 2224 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2225 WAIT_COND, HZ * 5); 2226 2227 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2228 return -EAGAIN; 2229 2230 if (!ret) { 2231 xe_gt_warn(guc_to_gt(guc), 2232 "Suspend fence, guc_id=%d, failed to respond", 2233 q->guc->id); 2234 /* XXX: Trigger GT reset? */ 2235 return -ETIME; 2236 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2237 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2238 goto retry; 2239 } 2240 2241 #undef WAIT_COND 2242 2243 return ret < 0 ? ret : 0; 2244 } 2245 2246 static void guc_exec_queue_resume(struct xe_exec_queue *q) 2247 { 2248 struct xe_gpu_scheduler *sched = &q->guc->sched; 2249 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2250 struct xe_guc *guc = exec_queue_to_guc(q); 2251 2252 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2253 2254 xe_sched_msg_lock(sched); 2255 guc_exec_queue_try_add_msg(q, msg, RESUME); 2256 xe_sched_msg_unlock(sched); 2257 } 2258 2259 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2260 { 2261 if (xe_exec_queue_is_multi_queue_secondary(q) && 2262 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2263 return true; 2264 2265 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2266 } 2267 2268 static bool guc_exec_queue_active(struct xe_exec_queue *q) 2269 { 2270 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 2271 2272 return exec_queue_enabled(primary) && 2273 !exec_queue_pending_disable(primary); 2274 } 2275 2276 /* 2277 * All of these functions are an abstraction layer which other parts of Xe can 2278 * use to trap into the GuC backend. All of these functions, aside from init, 2279 * really shouldn't do much other than trap into the DRM scheduler which 2280 * synchronizes these operations. 2281 */ 2282 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2283 .init = guc_exec_queue_init, 2284 .kill = guc_exec_queue_kill, 2285 .fini = guc_exec_queue_fini, 2286 .destroy = guc_exec_queue_destroy, 2287 .set_priority = guc_exec_queue_set_priority, 2288 .set_timeslice = guc_exec_queue_set_timeslice, 2289 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2290 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2291 .suspend = guc_exec_queue_suspend, 2292 .suspend_wait = guc_exec_queue_suspend_wait, 2293 .resume = guc_exec_queue_resume, 2294 .reset_status = guc_exec_queue_reset_status, 2295 .active = guc_exec_queue_active, 2296 }; 2297 2298 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2299 { 2300 struct xe_gpu_scheduler *sched = &q->guc->sched; 2301 2302 /* Stop scheduling + flush any DRM scheduler operations */ 2303 xe_sched_submission_stop(sched); 2304 2305 /* Clean up lost G2H + reset engine state */ 2306 if (exec_queue_registered(q)) { 2307 if (exec_queue_destroyed(q)) 2308 __guc_exec_queue_destroy(guc, q); 2309 } 2310 if (q->guc->suspend_pending) { 2311 set_exec_queue_suspended(q); 2312 suspend_fence_signal(q); 2313 } 2314 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2315 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2316 EXEC_QUEUE_STATE_SUSPENDED, 2317 &q->guc->state); 2318 q->guc->resume_time = 0; 2319 trace_xe_exec_queue_stop(q); 2320 2321 /* 2322 * Ban any engine (aside from kernel and engines used for VM ops) with a 2323 * started but not complete job or if a job has gone through a GT reset 2324 * more than twice. 2325 */ 2326 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2327 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2328 bool ban = false; 2329 2330 if (job) { 2331 if ((xe_sched_job_started(job) && 2332 !xe_sched_job_completed(job)) || 2333 xe_sched_invalidate_job(job, 2)) { 2334 trace_xe_sched_job_ban(job); 2335 ban = true; 2336 } 2337 } 2338 2339 if (ban) { 2340 set_exec_queue_banned(q); 2341 xe_guc_exec_queue_trigger_cleanup(q); 2342 } 2343 } 2344 } 2345 2346 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2347 { 2348 int ret; 2349 2350 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2351 return 0; 2352 2353 if (!guc->submission_state.initialized) 2354 return 0; 2355 2356 /* 2357 * Using an atomic here rather than submission_state.lock as this 2358 * function can be called while holding the CT lock (engine reset 2359 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2360 * Atomic is not ideal, but it works to prevent against concurrent reset 2361 * and releasing any TDRs waiting on guc->submission_state.stopped. 2362 */ 2363 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2364 smp_wmb(); 2365 wake_up_all(&guc->ct.wq); 2366 2367 return ret; 2368 } 2369 2370 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2371 { 2372 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2373 !xe_guc_read_stopped(guc)); 2374 } 2375 2376 void xe_guc_submit_stop(struct xe_guc *guc) 2377 { 2378 struct xe_exec_queue *q; 2379 unsigned long index; 2380 2381 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2382 2383 mutex_lock(&guc->submission_state.lock); 2384 2385 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2386 /* Prevent redundant attempts to stop parallel queues */ 2387 if (q->guc->id != index) 2388 continue; 2389 2390 guc_exec_queue_stop(guc, q); 2391 } 2392 2393 mutex_unlock(&guc->submission_state.lock); 2394 2395 /* 2396 * No one can enter the backend at this point, aside from new engine 2397 * creation which is protected by guc->submission_state.lock. 2398 */ 2399 2400 } 2401 2402 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2403 struct xe_exec_queue *q) 2404 { 2405 bool pending_enable, pending_disable, pending_resume; 2406 2407 pending_enable = exec_queue_pending_enable(q); 2408 pending_resume = exec_queue_pending_resume(q); 2409 2410 if (pending_enable && pending_resume) { 2411 q->guc->needs_resume = true; 2412 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2413 q->guc->id); 2414 } 2415 2416 if (pending_enable && !pending_resume) { 2417 clear_exec_queue_registered(q); 2418 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2419 q->guc->id); 2420 } 2421 2422 if (pending_enable) { 2423 clear_exec_queue_enabled(q); 2424 clear_exec_queue_pending_resume(q); 2425 clear_exec_queue_pending_enable(q); 2426 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2427 q->guc->id); 2428 } 2429 2430 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2431 clear_exec_queue_destroyed(q); 2432 q->guc->needs_cleanup = true; 2433 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2434 q->guc->id); 2435 } 2436 2437 pending_disable = exec_queue_pending_disable(q); 2438 2439 if (pending_disable && exec_queue_suspended(q)) { 2440 clear_exec_queue_suspended(q); 2441 q->guc->needs_suspend = true; 2442 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2443 q->guc->id); 2444 } 2445 2446 if (pending_disable) { 2447 if (!pending_enable) 2448 set_exec_queue_enabled(q); 2449 clear_exec_queue_pending_disable(q); 2450 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2451 q->guc->id); 2452 } 2453 2454 q->guc->resume_time = 0; 2455 } 2456 2457 static void lrc_parallel_clear(struct xe_lrc *lrc) 2458 { 2459 struct xe_device *xe = gt_to_xe(lrc->gt); 2460 struct iosys_map map = xe_lrc_parallel_map(lrc); 2461 int i; 2462 2463 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2464 parallel_write(xe, map, wq[i], 2465 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2466 FIELD_PREP(WQ_LEN_MASK, 0)); 2467 } 2468 2469 /* 2470 * This function is quite complex but only real way to ensure no state is lost 2471 * during VF resume flows. The function scans the queue state, make adjustments 2472 * as needed, and queues jobs / messages which replayed upon unpause. 2473 */ 2474 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2475 { 2476 struct xe_gpu_scheduler *sched = &q->guc->sched; 2477 struct xe_sched_job *job; 2478 int i; 2479 2480 lockdep_assert_held(&guc->submission_state.lock); 2481 2482 /* Stop scheduling + flush any DRM scheduler operations */ 2483 xe_sched_submission_stop(sched); 2484 cancel_delayed_work_sync(&sched->base.work_tdr); 2485 2486 guc_exec_queue_revert_pending_state_change(guc, q); 2487 2488 if (xe_exec_queue_is_parallel(q)) { 2489 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2490 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2491 2492 /* 2493 * NOP existing WQ commands that may contain stale GGTT 2494 * addresses. These will be replayed upon unpause. The hardware 2495 * seems to get confused if the WQ head/tail pointers are 2496 * adjusted. 2497 */ 2498 if (lrc) 2499 lrc_parallel_clear(lrc); 2500 } 2501 2502 job = xe_sched_first_pending_job(sched); 2503 if (job) { 2504 job->restore_replay = true; 2505 2506 /* 2507 * Adjust software tail so jobs submitted overwrite previous 2508 * position in ring buffer with new GGTT addresses. 2509 */ 2510 for (i = 0; i < q->width; ++i) 2511 q->lrc[i]->ring.tail = job->ptrs[i].head; 2512 } 2513 } 2514 2515 /** 2516 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2517 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2518 */ 2519 void xe_guc_submit_pause(struct xe_guc *guc) 2520 { 2521 struct xe_exec_queue *q; 2522 unsigned long index; 2523 2524 mutex_lock(&guc->submission_state.lock); 2525 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2526 xe_sched_submission_stop(&q->guc->sched); 2527 mutex_unlock(&guc->submission_state.lock); 2528 } 2529 2530 /** 2531 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2532 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2533 */ 2534 void xe_guc_submit_pause_vf(struct xe_guc *guc) 2535 { 2536 struct xe_exec_queue *q; 2537 unsigned long index; 2538 2539 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2540 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2541 2542 mutex_lock(&guc->submission_state.lock); 2543 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2544 /* Prevent redundant attempts to stop parallel queues */ 2545 if (q->guc->id != index) 2546 continue; 2547 2548 guc_exec_queue_pause(guc, q); 2549 } 2550 mutex_unlock(&guc->submission_state.lock); 2551 } 2552 2553 static void guc_exec_queue_start(struct xe_exec_queue *q) 2554 { 2555 struct xe_gpu_scheduler *sched = &q->guc->sched; 2556 2557 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2558 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2559 int i; 2560 2561 trace_xe_exec_queue_resubmit(q); 2562 if (job) { 2563 for (i = 0; i < q->width; ++i) { 2564 /* 2565 * The GuC context is unregistered at this point 2566 * time, adjusting software ring tail ensures 2567 * jobs are rewritten in original placement, 2568 * adjusting LRC tail ensures the newly loaded 2569 * GuC / contexts only view the LRC tail 2570 * increasing as jobs are written out. 2571 */ 2572 q->lrc[i]->ring.tail = job->ptrs[i].head; 2573 xe_lrc_set_ring_tail(q->lrc[i], 2574 xe_lrc_ring_head(q->lrc[i])); 2575 } 2576 } 2577 xe_sched_resubmit_jobs(sched); 2578 } 2579 2580 xe_sched_submission_start(sched); 2581 xe_sched_submission_resume_tdr(sched); 2582 } 2583 2584 int xe_guc_submit_start(struct xe_guc *guc) 2585 { 2586 struct xe_exec_queue *q; 2587 unsigned long index; 2588 2589 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2590 2591 mutex_lock(&guc->submission_state.lock); 2592 atomic_dec(&guc->submission_state.stopped); 2593 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2594 /* Prevent redundant attempts to start parallel queues */ 2595 if (q->guc->id != index) 2596 continue; 2597 2598 guc_exec_queue_start(q); 2599 } 2600 mutex_unlock(&guc->submission_state.lock); 2601 2602 wake_up_all(&guc->ct.wq); 2603 2604 return 0; 2605 } 2606 2607 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2608 struct xe_exec_queue *q) 2609 { 2610 struct xe_gpu_scheduler *sched = &q->guc->sched; 2611 struct xe_sched_job *job = NULL; 2612 struct drm_sched_job *s_job; 2613 bool restore_replay = false; 2614 2615 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) { 2616 job = to_xe_sched_job(s_job); 2617 restore_replay |= job->restore_replay; 2618 if (restore_replay) { 2619 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2620 q->guc->id, xe_sched_job_seqno(job)); 2621 2622 q->ring_ops->emit_job(job); 2623 job->restore_replay = true; 2624 } 2625 } 2626 2627 if (job) 2628 job->last_replay = true; 2629 } 2630 2631 /** 2632 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2633 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2634 */ 2635 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2636 { 2637 struct xe_exec_queue *q; 2638 unsigned long index; 2639 2640 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2641 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2642 2643 mutex_lock(&guc->submission_state.lock); 2644 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2645 /* Prevent redundant attempts to stop parallel queues */ 2646 if (q->guc->id != index) 2647 continue; 2648 2649 guc_exec_queue_unpause_prepare(guc, q); 2650 } 2651 mutex_unlock(&guc->submission_state.lock); 2652 } 2653 2654 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2655 { 2656 struct xe_gpu_scheduler *sched = &q->guc->sched; 2657 struct xe_sched_msg *msg; 2658 2659 if (q->guc->needs_cleanup) { 2660 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2661 2662 guc_exec_queue_add_msg(q, msg, CLEANUP); 2663 q->guc->needs_cleanup = false; 2664 } 2665 2666 if (q->guc->needs_suspend) { 2667 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2668 2669 xe_sched_msg_lock(sched); 2670 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2671 xe_sched_msg_unlock(sched); 2672 2673 q->guc->needs_suspend = false; 2674 } 2675 2676 /* 2677 * The resume must be in the message queue before the suspend as it is 2678 * not possible for a resume to be issued if a suspend pending is, but 2679 * the inverse is possible. 2680 */ 2681 if (q->guc->needs_resume) { 2682 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2683 2684 xe_sched_msg_lock(sched); 2685 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2686 xe_sched_msg_unlock(sched); 2687 2688 q->guc->needs_resume = false; 2689 } 2690 } 2691 2692 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2693 { 2694 struct xe_gpu_scheduler *sched = &q->guc->sched; 2695 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2696 2697 lockdep_assert_held(&guc->submission_state.lock); 2698 2699 xe_sched_resubmit_jobs(sched); 2700 guc_exec_queue_replay_pending_state_change(q); 2701 xe_sched_submission_start(sched); 2702 if (needs_tdr) 2703 xe_guc_exec_queue_trigger_cleanup(q); 2704 xe_sched_submission_resume_tdr(sched); 2705 } 2706 2707 /** 2708 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2709 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2710 */ 2711 void xe_guc_submit_unpause(struct xe_guc *guc) 2712 { 2713 struct xe_exec_queue *q; 2714 unsigned long index; 2715 2716 mutex_lock(&guc->submission_state.lock); 2717 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2718 xe_sched_submission_start(&q->guc->sched); 2719 mutex_unlock(&guc->submission_state.lock); 2720 } 2721 2722 /** 2723 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2724 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2725 */ 2726 void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2727 { 2728 struct xe_exec_queue *q; 2729 unsigned long index; 2730 2731 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2732 2733 mutex_lock(&guc->submission_state.lock); 2734 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2735 /* 2736 * Prevent redundant attempts to stop parallel queues, or queues 2737 * created after resfix done. 2738 */ 2739 if (q->guc->id != index || 2740 !drm_sched_is_stopped(&q->guc->sched.base)) 2741 continue; 2742 2743 guc_exec_queue_unpause(guc, q); 2744 } 2745 mutex_unlock(&guc->submission_state.lock); 2746 } 2747 2748 /** 2749 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2750 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2751 */ 2752 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2753 { 2754 struct xe_exec_queue *q; 2755 unsigned long index; 2756 2757 mutex_lock(&guc->submission_state.lock); 2758 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2759 struct xe_gpu_scheduler *sched = &q->guc->sched; 2760 2761 /* Prevent redundant attempts to stop parallel queues */ 2762 if (q->guc->id != index) 2763 continue; 2764 2765 xe_sched_submission_start(sched); 2766 if (exec_queue_killed_or_banned_or_wedged(q)) 2767 xe_guc_exec_queue_trigger_cleanup(q); 2768 } 2769 mutex_unlock(&guc->submission_state.lock); 2770 } 2771 2772 static struct xe_exec_queue * 2773 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2774 { 2775 struct xe_gt *gt = guc_to_gt(guc); 2776 struct xe_exec_queue *q; 2777 2778 if (unlikely(guc_id >= GUC_ID_MAX)) { 2779 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2780 return NULL; 2781 } 2782 2783 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2784 if (unlikely(!q)) { 2785 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2786 return NULL; 2787 } 2788 2789 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2790 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2791 2792 return q; 2793 } 2794 2795 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2796 { 2797 u32 action[] = { 2798 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2799 q->guc->id, 2800 }; 2801 2802 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2803 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2804 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2805 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2806 2807 trace_xe_exec_queue_deregister(q); 2808 2809 if (xe_exec_queue_is_multi_queue_secondary(q)) 2810 handle_deregister_done(guc, q); 2811 else 2812 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2813 ARRAY_SIZE(action)); 2814 } 2815 2816 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2817 u32 runnable_state) 2818 { 2819 trace_xe_exec_queue_scheduling_done(q); 2820 2821 if (runnable_state == 1) { 2822 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2823 2824 q->guc->resume_time = ktime_get(); 2825 clear_exec_queue_pending_resume(q); 2826 clear_exec_queue_pending_enable(q); 2827 smp_wmb(); 2828 wake_up_all(&guc->ct.wq); 2829 } else { 2830 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2831 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2832 2833 if (q->guc->suspend_pending) { 2834 suspend_fence_signal(q); 2835 clear_exec_queue_pending_disable(q); 2836 } else { 2837 if (exec_queue_banned(q)) { 2838 smp_wmb(); 2839 wake_up_all(&guc->ct.wq); 2840 } 2841 if (exec_queue_destroyed(q)) { 2842 /* 2843 * Make sure to clear the pending_disable only 2844 * after sampling the destroyed state. We want 2845 * to ensure we don't trigger the unregister too 2846 * early with something intending to only 2847 * disable scheduling. The caller doing the 2848 * destroy must wait for an ongoing 2849 * pending_disable before marking as destroyed. 2850 */ 2851 clear_exec_queue_pending_disable(q); 2852 deregister_exec_queue(guc, q); 2853 } else { 2854 clear_exec_queue_pending_disable(q); 2855 } 2856 } 2857 } 2858 } 2859 2860 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2861 struct xe_exec_queue *q, 2862 u32 runnable_state) 2863 { 2864 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2865 mutex_lock(&guc->ct.lock); 2866 handle_sched_done(guc, q, runnable_state); 2867 mutex_unlock(&guc->ct.lock); 2868 } 2869 2870 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2871 { 2872 struct xe_exec_queue *q; 2873 u32 guc_id, runnable_state; 2874 2875 if (unlikely(len < 2)) 2876 return -EPROTO; 2877 2878 guc_id = msg[0]; 2879 runnable_state = msg[1]; 2880 2881 q = g2h_exec_queue_lookup(guc, guc_id); 2882 if (unlikely(!q)) 2883 return -EPROTO; 2884 2885 if (unlikely(!exec_queue_pending_enable(q) && 2886 !exec_queue_pending_disable(q))) { 2887 xe_gt_err(guc_to_gt(guc), 2888 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2889 atomic_read(&q->guc->state), q->guc->id, 2890 runnable_state); 2891 return -EPROTO; 2892 } 2893 2894 handle_sched_done(guc, q, runnable_state); 2895 2896 return 0; 2897 } 2898 2899 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2900 { 2901 trace_xe_exec_queue_deregister_done(q); 2902 2903 clear_exec_queue_registered(q); 2904 __guc_exec_queue_destroy(guc, q); 2905 } 2906 2907 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2908 { 2909 struct xe_exec_queue *q; 2910 u32 guc_id; 2911 2912 if (unlikely(len < 1)) 2913 return -EPROTO; 2914 2915 guc_id = msg[0]; 2916 2917 q = g2h_exec_queue_lookup(guc, guc_id); 2918 if (unlikely(!q)) 2919 return -EPROTO; 2920 2921 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2922 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2923 xe_gt_err(guc_to_gt(guc), 2924 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2925 atomic_read(&q->guc->state), q->guc->id); 2926 return -EPROTO; 2927 } 2928 2929 handle_deregister_done(guc, q); 2930 2931 return 0; 2932 } 2933 2934 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2935 { 2936 struct xe_gt *gt = guc_to_gt(guc); 2937 struct xe_exec_queue *q; 2938 u32 guc_id; 2939 2940 if (unlikely(len < 1)) 2941 return -EPROTO; 2942 2943 guc_id = msg[0]; 2944 2945 q = g2h_exec_queue_lookup(guc, guc_id); 2946 if (unlikely(!q)) 2947 return -EPROTO; 2948 2949 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", 2950 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, 2951 atomic_read(&q->guc->state)); 2952 2953 trace_xe_exec_queue_reset(q); 2954 2955 /* 2956 * A banned engine is a NOP at this point (came from 2957 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2958 * jobs by setting timeout of the job to the minimum value kicking 2959 * guc_exec_queue_timedout_job. 2960 */ 2961 xe_guc_exec_queue_reset_trigger_cleanup(q); 2962 2963 return 0; 2964 } 2965 2966 /* 2967 * xe_guc_error_capture_handler - Handler of GuC captured message 2968 * @guc: The GuC object 2969 * @msg: Point to the message 2970 * @len: The message length 2971 * 2972 * When GuC captured data is ready, GuC will send message 2973 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2974 * called 1st to check status before process the data comes with the message. 2975 * 2976 * Returns: error code. 0 if success 2977 */ 2978 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 2979 { 2980 u32 status; 2981 2982 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2983 return -EPROTO; 2984 2985 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2986 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 2987 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 2988 2989 xe_guc_capture_process(guc); 2990 2991 return 0; 2992 } 2993 2994 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 2995 u32 len) 2996 { 2997 struct xe_gt *gt = guc_to_gt(guc); 2998 struct xe_exec_queue *q; 2999 u32 guc_id; 3000 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 3001 3002 if (unlikely(!len || len > 2)) 3003 return -EPROTO; 3004 3005 guc_id = msg[0]; 3006 3007 if (len == 2) 3008 type = msg[1]; 3009 3010 if (guc_id == GUC_ID_UNKNOWN) { 3011 /* 3012 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 3013 * context. In such case only PF will be notified about that fault. 3014 */ 3015 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 3016 return 0; 3017 } 3018 3019 q = g2h_exec_queue_lookup(guc, guc_id); 3020 if (unlikely(!q)) 3021 return -EPROTO; 3022 3023 /* 3024 * The type is HW-defined and changes based on platform, so we don't 3025 * decode it in the kernel and only check if it is valid. 3026 * See bspec 54047 and 72187 for details. 3027 */ 3028 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 3029 xe_gt_info(gt, 3030 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 3031 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3032 else 3033 xe_gt_info(gt, 3034 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 3035 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3036 3037 trace_xe_exec_queue_memory_cat_error(q); 3038 3039 /* Treat the same as engine reset */ 3040 xe_guc_exec_queue_reset_trigger_cleanup(q); 3041 3042 return 0; 3043 } 3044 3045 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 3046 { 3047 struct xe_gt *gt = guc_to_gt(guc); 3048 u8 guc_class, instance; 3049 u32 reason; 3050 3051 if (unlikely(len != 3)) 3052 return -EPROTO; 3053 3054 guc_class = msg[0]; 3055 instance = msg[1]; 3056 reason = msg[2]; 3057 3058 /* Unexpected failure of a hardware feature, log an actual error */ 3059 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 3060 guc_class, instance, reason); 3061 3062 xe_gt_reset_async(gt); 3063 3064 return 0; 3065 } 3066 3067 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, 3068 u32 len) 3069 { 3070 struct xe_gt *gt = guc_to_gt(guc); 3071 struct xe_device *xe = guc_to_xe(guc); 3072 struct xe_exec_queue *q; 3073 u32 guc_id = msg[2]; 3074 3075 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { 3076 drm_err(&xe->drm, "Invalid length %u", len); 3077 return -EPROTO; 3078 } 3079 3080 q = g2h_exec_queue_lookup(guc, guc_id); 3081 if (unlikely(!q)) 3082 return -EPROTO; 3083 3084 xe_gt_dbg(gt, 3085 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", 3086 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); 3087 3088 trace_xe_exec_queue_cgp_context_error(q); 3089 3090 /* Treat the same as engine reset */ 3091 xe_guc_exec_queue_reset_trigger_cleanup(q); 3092 3093 return 0; 3094 } 3095 3096 /** 3097 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3098 * @guc: guc 3099 * @msg: message indicating CGP sync done 3100 * @len: length of message 3101 * 3102 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3103 * for CGP synchronization to complete. 3104 * 3105 * Return: 0 on success, -EPROTO for malformed messages. 3106 */ 3107 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3108 { 3109 struct xe_device *xe = guc_to_xe(guc); 3110 struct xe_exec_queue *q; 3111 u32 guc_id = msg[0]; 3112 3113 if (unlikely(len < 1)) { 3114 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3115 return -EPROTO; 3116 } 3117 3118 q = g2h_exec_queue_lookup(guc, guc_id); 3119 if (unlikely(!q)) 3120 return -EPROTO; 3121 3122 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3123 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3124 return -EPROTO; 3125 } 3126 3127 /* Wakeup the serialized cgp update wait */ 3128 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3129 xe_guc_ct_wake_waiters(&guc->ct); 3130 3131 return 0; 3132 } 3133 3134 static void 3135 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3136 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3137 { 3138 struct xe_guc *guc = exec_queue_to_guc(q); 3139 struct xe_device *xe = guc_to_xe(guc); 3140 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3141 int i; 3142 3143 snapshot->guc.wqi_head = q->guc->wqi_head; 3144 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3145 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3146 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3147 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3148 wq_desc.wq_status); 3149 3150 if (snapshot->parallel.wq_desc.head != 3151 snapshot->parallel.wq_desc.tail) { 3152 for (i = snapshot->parallel.wq_desc.head; 3153 i != snapshot->parallel.wq_desc.tail; 3154 i = (i + sizeof(u32)) % WQ_SIZE) 3155 snapshot->parallel.wq[i / sizeof(u32)] = 3156 parallel_read(xe, map, wq[i / sizeof(u32)]); 3157 } 3158 } 3159 3160 static void 3161 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3162 struct drm_printer *p) 3163 { 3164 int i; 3165 3166 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3167 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3168 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3169 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3170 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3171 3172 if (snapshot->parallel.wq_desc.head != 3173 snapshot->parallel.wq_desc.tail) { 3174 for (i = snapshot->parallel.wq_desc.head; 3175 i != snapshot->parallel.wq_desc.tail; 3176 i = (i + sizeof(u32)) % WQ_SIZE) 3177 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3178 snapshot->parallel.wq[i / sizeof(u32)]); 3179 } 3180 } 3181 3182 /** 3183 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3184 * @q: faulty exec queue 3185 * 3186 * This can be printed out in a later stage like during dev_coredump 3187 * analysis. 3188 * 3189 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3190 * caller, using `xe_guc_exec_queue_snapshot_free`. 3191 */ 3192 struct xe_guc_submit_exec_queue_snapshot * 3193 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3194 { 3195 struct xe_gpu_scheduler *sched = &q->guc->sched; 3196 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3197 int i; 3198 3199 snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC); 3200 3201 if (!snapshot) 3202 return NULL; 3203 3204 snapshot->guc.id = q->guc->id; 3205 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3206 snapshot->class = q->class; 3207 snapshot->logical_mask = q->logical_mask; 3208 snapshot->width = q->width; 3209 snapshot->refcount = kref_read(&q->refcount); 3210 snapshot->sched_timeout = sched->base.timeout; 3211 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3212 snapshot->sched_props.preempt_timeout_us = 3213 q->sched_props.preempt_timeout_us; 3214 3215 snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width, 3216 GFP_ATOMIC); 3217 3218 if (snapshot->lrc) { 3219 for (i = 0; i < q->width; ++i) { 3220 struct xe_lrc *lrc = q->lrc[i]; 3221 3222 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3223 } 3224 } 3225 3226 snapshot->schedule_state = atomic_read(&q->guc->state); 3227 snapshot->exec_queue_flags = q->flags; 3228 3229 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3230 if (snapshot->parallel_execution) 3231 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3232 3233 if (xe_exec_queue_is_multi_queue(q)) { 3234 snapshot->multi_queue.valid = true; 3235 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3236 snapshot->multi_queue.pos = q->multi_queue.pos; 3237 } 3238 3239 return snapshot; 3240 } 3241 3242 /** 3243 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3244 * @snapshot: Previously captured snapshot of job. 3245 * 3246 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3247 */ 3248 void 3249 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3250 { 3251 int i; 3252 3253 if (!snapshot || !snapshot->lrc) 3254 return; 3255 3256 for (i = 0; i < snapshot->width; ++i) 3257 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3258 } 3259 3260 /** 3261 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3262 * @snapshot: GuC Submit Engine snapshot object. 3263 * @p: drm_printer where it will be printed out. 3264 * 3265 * This function prints out a given GuC Submit Engine snapshot object. 3266 */ 3267 void 3268 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3269 struct drm_printer *p) 3270 { 3271 int i; 3272 3273 if (!snapshot) 3274 return; 3275 3276 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3277 drm_printf(p, "\tName: %s\n", snapshot->name); 3278 drm_printf(p, "\tClass: %d\n", snapshot->class); 3279 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3280 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3281 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3282 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3283 drm_printf(p, "\tTimeslice: %u (us)\n", 3284 snapshot->sched_props.timeslice_us); 3285 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3286 snapshot->sched_props.preempt_timeout_us); 3287 3288 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3289 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3290 3291 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3292 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3293 3294 if (snapshot->parallel_execution) 3295 guc_exec_queue_wq_snapshot_print(snapshot, p); 3296 3297 if (snapshot->multi_queue.valid) { 3298 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3299 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3300 } 3301 } 3302 3303 /** 3304 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3305 * snapshot. 3306 * @snapshot: GuC Submit Engine snapshot object. 3307 * 3308 * This function free all the memory that needed to be allocated at capture 3309 * time. 3310 */ 3311 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3312 { 3313 int i; 3314 3315 if (!snapshot) 3316 return; 3317 3318 if (snapshot->lrc) { 3319 for (i = 0; i < snapshot->width; i++) 3320 xe_lrc_snapshot_free(snapshot->lrc[i]); 3321 kfree(snapshot->lrc); 3322 } 3323 kfree(snapshot); 3324 } 3325 3326 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3327 { 3328 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3329 3330 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3331 xe_guc_exec_queue_snapshot_print(snapshot, p); 3332 xe_guc_exec_queue_snapshot_free(snapshot); 3333 } 3334 3335 /** 3336 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3337 * @q: Execution queue 3338 * @ctx_type: Type of the context 3339 * 3340 * This function registers the execution queue with the guc. Special context 3341 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3342 * are only applicable for IGPU and in the VF. 3343 * Submits the execution queue to GUC after registering it. 3344 * 3345 * Returns - None. 3346 */ 3347 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3348 { 3349 struct xe_guc *guc = exec_queue_to_guc(q); 3350 struct xe_device *xe = guc_to_xe(guc); 3351 struct xe_gt *gt = guc_to_gt(guc); 3352 3353 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3354 xe_gt_assert(gt, !IS_DGFX(xe)); 3355 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3356 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3357 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3358 3359 register_exec_queue(q, ctx_type); 3360 enable_scheduling(q); 3361 } 3362 3363 /** 3364 * xe_guc_submit_print - GuC Submit Print. 3365 * @guc: GuC. 3366 * @p: drm_printer where it will be printed out. 3367 * 3368 * This function capture and prints snapshots of **all** GuC Engines. 3369 */ 3370 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3371 { 3372 struct xe_exec_queue *q; 3373 unsigned long index; 3374 3375 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3376 return; 3377 3378 mutex_lock(&guc->submission_state.lock); 3379 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3380 guc_exec_queue_print(q, p); 3381 mutex_unlock(&guc->submission_state.lock); 3382 } 3383 3384 /** 3385 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues 3386 * registered with the GuC 3387 * @guc: GuC. 3388 * 3389 * Return: true if any MLRC queue is registered with the GuC, false otherwise. 3390 */ 3391 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) 3392 { 3393 struct xe_exec_queue *q; 3394 unsigned long index; 3395 3396 guard(mutex)(&guc->submission_state.lock); 3397 3398 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3399 if (q->width > 1) 3400 return true; 3401 3402 return false; 3403 } 3404 3405 /** 3406 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3407 * exec queues registered to given GuC. 3408 * @guc: the &xe_guc struct instance 3409 * @scratch: scratch buffer to be used as temporary storage 3410 * 3411 * Returns: zero on success, negative error code on failure. 3412 */ 3413 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3414 { 3415 struct xe_exec_queue *q; 3416 unsigned long index; 3417 int err = 0; 3418 3419 mutex_lock(&guc->submission_state.lock); 3420 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3421 /* Prevent redundant attempts to stop parallel queues */ 3422 if (q->guc->id != index) 3423 continue; 3424 3425 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3426 if (err) 3427 break; 3428 } 3429 mutex_unlock(&guc->submission_state.lock); 3430 3431 return err; 3432 } 3433