1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/dma-fence-array.h> 12 13 #include <drm/drm_managed.h> 14 15 #include "abi/guc_actions_abi.h" 16 #include "abi/guc_actions_slpc_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "xe_assert.h" 19 #include "xe_bo.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_gt_clock.h" 27 #include "xe_gt_printk.h" 28 #include "xe_guc.h" 29 #include "xe_guc_capture.h" 30 #include "xe_guc_ct.h" 31 #include "xe_guc_exec_queue_types.h" 32 #include "xe_guc_id_mgr.h" 33 #include "xe_guc_klv_helpers.h" 34 #include "xe_guc_submit_types.h" 35 #include "xe_hw_engine.h" 36 #include "xe_lrc.h" 37 #include "xe_macros.h" 38 #include "xe_map.h" 39 #include "xe_mocs.h" 40 #include "xe_pm.h" 41 #include "xe_ring_ops_types.h" 42 #include "xe_sched_job.h" 43 #include "xe_sleep.h" 44 #include "xe_trace.h" 45 #include "xe_uc_fw.h" 46 #include "xe_vm.h" 47 48 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 49 50 static int guc_submit_reset_prepare(struct xe_guc *guc); 51 52 static struct xe_guc * 53 exec_queue_to_guc(struct xe_exec_queue *q) 54 { 55 return &q->gt->uc.guc; 56 } 57 58 /* 59 * Helpers for engine state, using an atomic as some of the bits can transition 60 * as the same time (e.g. a suspend can be happning at the same time as schedule 61 * engine done being processed). 62 */ 63 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 64 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 65 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 66 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 67 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 68 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 69 #define EXEC_QUEUE_STATE_RESET (1 << 6) 70 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 71 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 72 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 73 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) 74 #define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND (1 << 11) 75 76 static bool exec_queue_registered(struct xe_exec_queue *q) 77 { 78 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 79 } 80 81 static void set_exec_queue_registered(struct xe_exec_queue *q) 82 { 83 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 84 } 85 86 static void clear_exec_queue_registered(struct xe_exec_queue *q) 87 { 88 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 89 } 90 91 static bool exec_queue_enabled(struct xe_exec_queue *q) 92 { 93 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 94 } 95 96 static void set_exec_queue_enabled(struct xe_exec_queue *q) 97 { 98 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 99 } 100 101 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 102 { 103 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 104 } 105 106 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 107 { 108 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 109 } 110 111 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 112 { 113 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 114 } 115 116 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 117 { 118 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 119 } 120 121 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 122 { 123 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 124 } 125 126 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 127 { 128 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 129 } 130 131 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 132 { 133 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 134 } 135 136 static bool exec_queue_destroyed(struct xe_exec_queue *q) 137 { 138 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 139 } 140 141 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 142 { 143 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 144 } 145 146 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 147 { 148 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 149 } 150 151 static bool exec_queue_banned(struct xe_exec_queue *q) 152 { 153 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 154 } 155 156 static void set_exec_queue_banned(struct xe_exec_queue *q) 157 { 158 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 159 } 160 161 static bool exec_queue_suspended(struct xe_exec_queue *q) 162 { 163 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 164 } 165 166 static void set_exec_queue_suspended(struct xe_exec_queue *q) 167 { 168 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 169 } 170 171 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 172 { 173 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 174 } 175 176 static bool exec_queue_reset(struct xe_exec_queue *q) 177 { 178 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 179 } 180 181 static void set_exec_queue_reset(struct xe_exec_queue *q) 182 { 183 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 184 } 185 186 static bool exec_queue_killed(struct xe_exec_queue *q) 187 { 188 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 189 } 190 191 static void set_exec_queue_killed(struct xe_exec_queue *q) 192 { 193 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 194 } 195 196 static bool exec_queue_wedged(struct xe_exec_queue *q) 197 { 198 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 199 } 200 201 static void set_exec_queue_wedged(struct xe_exec_queue *q) 202 { 203 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 204 } 205 206 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 207 { 208 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 209 } 210 211 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 212 { 213 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 214 } 215 216 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 217 { 218 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 219 } 220 221 static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 222 { 223 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND; 224 } 225 226 static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 227 { 228 atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 229 } 230 231 static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 232 { 233 atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 234 } 235 236 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 237 { 238 return (atomic_read(&q->guc->state) & 239 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 240 EXEC_QUEUE_STATE_BANNED)); 241 } 242 243 static void guc_submit_sw_fini(struct drm_device *drm, void *arg) 244 { 245 struct xe_guc *guc = arg; 246 struct xe_device *xe = guc_to_xe(guc); 247 struct xe_gt *gt = guc_to_gt(guc); 248 int ret; 249 250 ret = wait_event_timeout(guc->submission_state.fini_wq, 251 xa_empty(&guc->submission_state.exec_queue_lookup), 252 HZ * 5); 253 254 drain_workqueue(xe->destroy_wq); 255 256 xe_gt_assert(gt, ret); 257 258 xa_destroy(&guc->submission_state.exec_queue_lookup); 259 } 260 261 static void guc_submit_fini(void *arg) 262 { 263 struct xe_guc *guc = arg; 264 265 /* Forcefully kill any remaining exec queues */ 266 xe_guc_ct_stop(&guc->ct); 267 guc_submit_reset_prepare(guc); 268 xe_guc_softreset(guc); 269 xe_guc_submit_stop(guc); 270 xe_uc_fw_sanitize(&guc->fw); 271 xe_guc_submit_pause_abort(guc); 272 } 273 274 static void guc_submit_wedged_fini(void *arg) 275 { 276 struct xe_guc *guc = arg; 277 struct xe_exec_queue *q; 278 unsigned long index; 279 280 mutex_lock(&guc->submission_state.lock); 281 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 282 if (exec_queue_wedged(q)) { 283 mutex_unlock(&guc->submission_state.lock); 284 xe_exec_queue_put(q); 285 mutex_lock(&guc->submission_state.lock); 286 } 287 } 288 mutex_unlock(&guc->submission_state.lock); 289 } 290 291 static const struct xe_exec_queue_ops guc_exec_queue_ops; 292 293 static void primelockdep(struct xe_guc *guc) 294 { 295 if (!IS_ENABLED(CONFIG_LOCKDEP)) 296 return; 297 298 fs_reclaim_acquire(GFP_KERNEL); 299 300 mutex_lock(&guc->submission_state.lock); 301 mutex_unlock(&guc->submission_state.lock); 302 303 fs_reclaim_release(GFP_KERNEL); 304 } 305 306 /** 307 * xe_guc_submit_init() - Initialize GuC submission. 308 * @guc: the &xe_guc to initialize 309 * @num_ids: number of GuC context IDs to use 310 * 311 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 312 * GuC context IDs supported by the GuC firmware should be used for submission. 313 * 314 * Only VF drivers will have to provide explicit number of GuC context IDs 315 * that they can use for submission. 316 * 317 * Return: 0 on success or a negative error code on failure. 318 */ 319 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 320 { 321 struct xe_device *xe = guc_to_xe(guc); 322 struct xe_gt *gt = guc_to_gt(guc); 323 int err; 324 325 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 326 if (err) 327 return err; 328 329 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 330 if (err) 331 return err; 332 333 gt->exec_queue_ops = &guc_exec_queue_ops; 334 335 xa_init(&guc->submission_state.exec_queue_lookup); 336 337 init_waitqueue_head(&guc->submission_state.fini_wq); 338 339 primelockdep(guc); 340 341 guc->submission_state.initialized = true; 342 343 err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); 344 if (err) 345 return err; 346 347 return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); 348 } 349 350 /* 351 * Given that we want to guarantee enough RCS throughput to avoid missing 352 * frames, we set the yield policy to 20% of each 80ms interval. 353 */ 354 #define RC_YIELD_DURATION 80 /* in ms */ 355 #define RC_YIELD_RATIO 20 /* in percent */ 356 static u32 *emit_render_compute_yield_klv(u32 *emit) 357 { 358 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 359 *emit++ = RC_YIELD_DURATION; 360 *emit++ = RC_YIELD_RATIO; 361 362 return emit; 363 } 364 365 #define SCHEDULING_POLICY_MAX_DWORDS 16 366 static int guc_init_global_schedule_policy(struct xe_guc *guc) 367 { 368 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 369 u32 *emit = data; 370 u32 count = 0; 371 int ret; 372 373 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 374 return 0; 375 376 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 377 378 if (CCS_INSTANCES(guc_to_gt(guc))) 379 emit = emit_render_compute_yield_klv(emit); 380 381 count = emit - data; 382 if (count > 1) { 383 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 384 385 ret = xe_guc_ct_send_block(&guc->ct, data, count); 386 if (ret < 0) { 387 xe_gt_err(guc_to_gt(guc), 388 "failed to enable GuC scheduling policies: %pe\n", 389 ERR_PTR(ret)); 390 return ret; 391 } 392 } 393 394 return 0; 395 } 396 397 int xe_guc_submit_enable(struct xe_guc *guc) 398 { 399 int ret; 400 401 ret = guc_init_global_schedule_policy(guc); 402 if (ret) 403 return ret; 404 405 guc->submission_state.enabled = true; 406 407 return 0; 408 } 409 410 void xe_guc_submit_disable(struct xe_guc *guc) 411 { 412 guc->submission_state.enabled = false; 413 } 414 415 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 416 { 417 int i; 418 419 lockdep_assert_held(&guc->submission_state.lock); 420 421 for (i = 0; i < xa_count; ++i) 422 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 423 424 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 425 q->guc->id, q->width); 426 427 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 428 wake_up(&guc->submission_state.fini_wq); 429 } 430 431 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 432 { 433 int ret; 434 int i; 435 436 /* 437 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 438 * worse case user gets -ENOMEM on engine create and has to try again. 439 * 440 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 441 * failure. 442 */ 443 lockdep_assert_held(&guc->submission_state.lock); 444 445 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 446 q->width); 447 if (ret < 0) 448 return ret; 449 450 q->guc->id = ret; 451 452 for (i = 0; i < q->width; ++i) { 453 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 454 q->guc->id + i, q, GFP_NOWAIT)); 455 if (ret) 456 goto err_release; 457 } 458 459 return 0; 460 461 err_release: 462 __release_guc_id(guc, q, i); 463 464 return ret; 465 } 466 467 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 468 { 469 mutex_lock(&guc->submission_state.lock); 470 __release_guc_id(guc, q, q->width); 471 mutex_unlock(&guc->submission_state.lock); 472 } 473 474 struct exec_queue_policy { 475 u32 count; 476 struct guc_update_exec_queue_policy h2g; 477 }; 478 479 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 480 { 481 size_t bytes = sizeof(policy->h2g.header) + 482 (sizeof(policy->h2g.klv[0]) * policy->count); 483 484 return bytes / sizeof(u32); 485 } 486 487 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 488 u16 guc_id) 489 { 490 policy->h2g.header.action = 491 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 492 policy->h2g.header.guc_id = guc_id; 493 policy->count = 0; 494 } 495 496 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 497 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 498 u32 data) \ 499 { \ 500 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 501 \ 502 policy->h2g.klv[policy->count].kl = \ 503 FIELD_PREP(GUC_KLV_0_KEY, \ 504 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 505 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 506 policy->h2g.klv[policy->count].value = data; \ 507 policy->count++; \ 508 } 509 510 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 511 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 512 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 513 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 514 #undef MAKE_EXEC_QUEUE_POLICY_ADD 515 516 static const int xe_exec_queue_prio_to_guc[] = { 517 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 518 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 519 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 520 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 521 }; 522 523 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 524 { 525 struct exec_queue_policy policy; 526 enum xe_exec_queue_priority prio = q->sched_props.priority; 527 u32 timeslice_us = q->sched_props.timeslice_us; 528 u32 slpc_exec_queue_freq_req = 0; 529 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 530 531 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 532 !xe_exec_queue_is_multi_queue_secondary(q)); 533 534 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 535 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 536 537 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 538 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 539 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 540 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 541 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 542 slpc_exec_queue_freq_req); 543 544 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 545 __guc_exec_queue_policy_action_size(&policy), 0, 0); 546 } 547 548 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 549 { 550 struct exec_queue_policy policy; 551 552 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 553 554 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 555 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 556 557 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 558 __guc_exec_queue_policy_action_size(&policy), 0, 0); 559 } 560 561 static bool vf_recovery(struct xe_guc *guc) 562 { 563 return xe_gt_recovery_pending(guc_to_gt(guc)); 564 } 565 566 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 567 { 568 struct xe_guc *guc = exec_queue_to_guc(q); 569 struct xe_device *xe = guc_to_xe(guc); 570 571 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 572 wake_up_all(&xe->ufence_wq); 573 574 xe_sched_tdr_queue_imm(&q->guc->sched); 575 } 576 577 static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q) 578 { 579 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 580 struct xe_exec_queue_group *group = q->multi_queue.group; 581 struct xe_exec_queue *eq, *next; 582 LIST_HEAD(tmp); 583 584 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 585 xe_exec_queue_is_multi_queue(q)); 586 587 mutex_lock(&group->list_lock); 588 589 /* 590 * Stop all future queues being from executing while group is stopped. 591 */ 592 group->stopped = true; 593 594 list_for_each_entry_safe(eq, next, &group->list, multi_queue.link) 595 /* 596 * Refcount prevents an attempted removal from &group->list, 597 * temporary list allows safe iteration after dropping 598 * &group->list_lock. 599 */ 600 if (xe_exec_queue_get_unless_zero(eq)) 601 list_move_tail(&eq->multi_queue.link, &tmp); 602 603 mutex_unlock(&group->list_lock); 604 605 /* We cannot stop under list lock without getting inversions */ 606 xe_sched_submission_stop(&primary->guc->sched); 607 list_for_each_entry(eq, &tmp, multi_queue.link) 608 xe_sched_submission_stop(&eq->guc->sched); 609 610 mutex_lock(&group->list_lock); 611 list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) { 612 /* 613 * Corner where we got banned while stopping and not on 614 * &group->list 615 */ 616 if (READ_ONCE(group->banned)) 617 xe_guc_exec_queue_trigger_cleanup(eq); 618 619 list_move_tail(&eq->multi_queue.link, &group->list); 620 xe_exec_queue_put(eq); 621 } 622 mutex_unlock(&group->list_lock); 623 } 624 625 static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q) 626 { 627 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 628 struct xe_exec_queue_group *group = q->multi_queue.group; 629 struct xe_exec_queue *eq; 630 631 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 632 xe_exec_queue_is_multi_queue(q)); 633 634 xe_sched_submission_start(&primary->guc->sched); 635 636 mutex_lock(&group->list_lock); 637 group->stopped = false; 638 list_for_each_entry(eq, &group->list, multi_queue.link) 639 xe_sched_submission_start(&eq->guc->sched); 640 mutex_unlock(&group->list_lock); 641 } 642 643 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) 644 { 645 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 646 struct xe_exec_queue_group *group = q->multi_queue.group; 647 struct xe_exec_queue *eq; 648 649 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 650 xe_exec_queue_is_multi_queue(q)); 651 652 /* Group banned, skip timeout check in TDR */ 653 WRITE_ONCE(group->banned, true); 654 xe_guc_exec_queue_trigger_cleanup(primary); 655 656 mutex_lock(&group->list_lock); 657 list_for_each_entry(eq, &group->list, multi_queue.link) 658 xe_guc_exec_queue_trigger_cleanup(eq); 659 mutex_unlock(&group->list_lock); 660 } 661 662 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 663 { 664 if (xe_exec_queue_is_multi_queue(q)) { 665 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 666 struct xe_exec_queue_group *group = q->multi_queue.group; 667 struct xe_exec_queue *eq; 668 669 /* Group banned, skip timeout check in TDR */ 670 WRITE_ONCE(group->banned, true); 671 672 set_exec_queue_reset(primary); 673 if (!exec_queue_banned(primary)) 674 xe_guc_exec_queue_trigger_cleanup(primary); 675 676 mutex_lock(&group->list_lock); 677 list_for_each_entry(eq, &group->list, multi_queue.link) { 678 set_exec_queue_reset(eq); 679 if (!exec_queue_banned(eq)) 680 xe_guc_exec_queue_trigger_cleanup(eq); 681 } 682 mutex_unlock(&group->list_lock); 683 } else { 684 set_exec_queue_reset(q); 685 if (!exec_queue_banned(q)) 686 xe_guc_exec_queue_trigger_cleanup(q); 687 } 688 } 689 690 static void set_exec_queue_group_banned(struct xe_exec_queue *q) 691 { 692 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 693 struct xe_exec_queue_group *group = q->multi_queue.group; 694 struct xe_exec_queue *eq; 695 696 /* Ban all queues of the multi-queue group */ 697 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 698 xe_exec_queue_is_multi_queue(q)); 699 set_exec_queue_banned(primary); 700 701 mutex_lock(&group->list_lock); 702 list_for_each_entry(eq, &group->list, multi_queue.link) 703 set_exec_queue_banned(eq); 704 mutex_unlock(&group->list_lock); 705 } 706 707 /* Helper for context registration H2G */ 708 struct guc_ctxt_registration_info { 709 u32 flags; 710 u32 context_idx; 711 u32 engine_class; 712 u32 engine_submit_mask; 713 u32 wq_desc_lo; 714 u32 wq_desc_hi; 715 u32 wq_base_lo; 716 u32 wq_base_hi; 717 u32 wq_size; 718 u32 cgp_lo; 719 u32 cgp_hi; 720 u32 hwlrca_lo; 721 u32 hwlrca_hi; 722 }; 723 724 #define parallel_read(xe_, map_, field_) \ 725 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 726 field_) 727 #define parallel_write(xe_, map_, field_, val_) \ 728 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 729 field_, val_) 730 731 /** 732 * DOC: Multi Queue Group GuC interface 733 * 734 * The multi queue group coordination between KMD and GuC is through a software 735 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page 736 * allocated in the global GTT. 737 * 738 * CGP format: 739 * 740 * +-----------+---------------------------+---------------------------------------------+ 741 * | DWORD | Name | Description | 742 * +-----------+---------------------------+---------------------------------------------+ 743 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | 744 * +-----------+---------------------------+---------------------------------------------+ 745 * | 1..15 | RESERVED | MBZ | 746 * +-----------+---------------------------+---------------------------------------------+ 747 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | 748 * +-----------+---------------------------+---------------------------------------------+ 749 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | 750 * +-----------+---------------------------+---------------------------------------------+ 751 * | 18..31 | RESERVED | MBZ | 752 * +-----------+---------------------------+---------------------------------------------+ 753 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | 754 * +-----------+---------------------------+---------------------------------------------+ 755 * | 33 | Q0ContextIndex | Context ID for Queue 0 | 756 * +-----------+---------------------------+---------------------------------------------+ 757 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | 758 * +-----------+---------------------------+---------------------------------------------+ 759 * | 35 | Q1ContextIndex | Context ID for Queue 1 | 760 * +-----------+---------------------------+---------------------------------------------+ 761 * | ... |... | ... | 762 * +-----------+---------------------------+---------------------------------------------+ 763 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | 764 * +-----------+---------------------------+---------------------------------------------+ 765 * | 159 | Q63ContextIndex | Context ID for Queue 63 | 766 * +-----------+---------------------------+---------------------------------------------+ 767 * | 160..1024 | RESERVED | MBZ | 768 * +-----------+---------------------------+---------------------------------------------+ 769 * 770 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified 771 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies 772 * the CGP address. When the secondary queues are added to the group, the CGP is 773 * updated with entry for that queue and GuC is notified through the H2G interface 774 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages 775 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also 776 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any 777 * error in the CGP. Only one of these CGP update messages can be outstanding 778 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* 779 * fields indicate which queue entry is being updated in the CGP. 780 * 781 * The primary queue (Q0) represents the multi queue group context in GuC and 782 * submission on any queue of the group must be through Q0 GuC interface only. 783 * 784 * As it is not required to register secondary queues with GuC, the secondary queue 785 * context ids in the CGP are populated with Q0 context id. 786 */ 787 788 #define CGP_VERSION_MAJOR_SHIFT 8 789 790 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 791 struct xe_exec_queue *q) 792 { 793 struct xe_exec_queue_group *group = q->multi_queue.group; 794 u32 guc_id = group->primary->guc->id; 795 796 /* Currently implementing CGP version 1.0 */ 797 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 798 1 << CGP_VERSION_MAJOR_SHIFT); 799 800 xe_map_wr(xe, &group->cgp_bo->vmap, 801 (32 + q->multi_queue.pos * 2) * sizeof(u32), 802 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 803 804 xe_map_wr(xe, &group->cgp_bo->vmap, 805 (33 + q->multi_queue.pos * 2) * sizeof(u32), 806 u32, guc_id); 807 808 if (q->multi_queue.pos / 32) { 809 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 810 u32, BIT(q->multi_queue.pos % 32)); 811 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 812 } else { 813 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 814 u32, BIT(q->multi_queue.pos)); 815 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 816 } 817 } 818 819 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 820 struct xe_exec_queue *q, 821 const u32 *action, u32 len) 822 { 823 struct xe_exec_queue_group *group = q->multi_queue.group; 824 struct xe_device *xe = guc_to_xe(guc); 825 enum xe_multi_queue_priority priority; 826 long ret; 827 828 /* 829 * As all queues of a multi queue group use single drm scheduler 830 * submit workqueue, CGP synchronization with GuC are serialized. 831 * Hence, no locking is required here. 832 * Wait for any pending CGP_SYNC_DONE response before updating the 833 * CGP page and sending CGP_SYNC message. 834 * 835 * FIXME: Support VF migration 836 */ 837 ret = wait_event_timeout(guc->ct.wq, 838 !READ_ONCE(group->sync_pending) || 839 xe_guc_read_stopped(guc), HZ); 840 if (!ret || xe_guc_read_stopped(guc)) { 841 /* CGP_SYNC failed. Reset gt, cleanup the group */ 842 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 843 set_exec_queue_group_banned(q); 844 xe_gt_reset_async(q->gt); 845 xe_guc_exec_queue_group_trigger_cleanup(q); 846 return; 847 } 848 849 scoped_guard(spinlock, &q->multi_queue.lock) 850 priority = q->multi_queue.priority; 851 852 xe_lrc_set_multi_queue_priority(q->lrc[0], priority); 853 xe_guc_exec_queue_group_cgp_update(xe, q); 854 855 WRITE_ONCE(group->sync_pending, true); 856 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 857 } 858 859 static void __register_exec_queue_group(struct xe_guc *guc, 860 struct xe_exec_queue *q, 861 struct guc_ctxt_registration_info *info) 862 { 863 #define MAX_MULTI_QUEUE_REG_SIZE (8) 864 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 865 int len = 0; 866 867 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 868 action[len++] = info->flags; 869 action[len++] = info->context_idx; 870 action[len++] = info->engine_class; 871 action[len++] = info->engine_submit_mask; 872 action[len++] = 0; /* Reserved */ 873 action[len++] = info->cgp_lo; 874 action[len++] = info->cgp_hi; 875 876 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 877 #undef MAX_MULTI_QUEUE_REG_SIZE 878 879 /* 880 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 881 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 882 * from guc. 883 */ 884 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 885 } 886 887 static void xe_guc_exec_queue_group_add(struct xe_guc *guc, 888 struct xe_exec_queue *q) 889 { 890 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 891 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 892 int len = 0; 893 894 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); 895 896 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 897 action[len++] = q->multi_queue.group->primary->guc->id; 898 899 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 900 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 901 902 /* 903 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a 904 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 905 * from guc. 906 */ 907 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 908 } 909 910 static void __register_mlrc_exec_queue(struct xe_guc *guc, 911 struct xe_exec_queue *q, 912 struct guc_ctxt_registration_info *info) 913 { 914 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 915 u32 action[MAX_MLRC_REG_SIZE]; 916 int len = 0; 917 int i; 918 919 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 920 921 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 922 action[len++] = info->flags; 923 action[len++] = info->context_idx; 924 action[len++] = info->engine_class; 925 action[len++] = info->engine_submit_mask; 926 action[len++] = info->wq_desc_lo; 927 action[len++] = info->wq_desc_hi; 928 action[len++] = info->wq_base_lo; 929 action[len++] = info->wq_base_hi; 930 action[len++] = info->wq_size; 931 action[len++] = q->width; 932 action[len++] = info->hwlrca_lo; 933 action[len++] = info->hwlrca_hi; 934 935 for (i = 1; i < q->width; ++i) { 936 struct xe_lrc *lrc = q->lrc[i]; 937 938 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 939 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 940 } 941 942 /* explicitly checks some fields that we might fixup later */ 943 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 944 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 945 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 946 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 947 xe_gt_assert(guc_to_gt(guc), q->width == 948 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 949 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 950 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 951 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 952 #undef MAX_MLRC_REG_SIZE 953 954 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 955 } 956 957 static void __register_exec_queue(struct xe_guc *guc, 958 struct guc_ctxt_registration_info *info) 959 { 960 u32 action[] = { 961 XE_GUC_ACTION_REGISTER_CONTEXT, 962 info->flags, 963 info->context_idx, 964 info->engine_class, 965 info->engine_submit_mask, 966 info->wq_desc_lo, 967 info->wq_desc_hi, 968 info->wq_base_lo, 969 info->wq_base_hi, 970 info->wq_size, 971 info->hwlrca_lo, 972 info->hwlrca_hi, 973 }; 974 975 /* explicitly checks some fields that we might fixup later */ 976 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 977 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 978 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 979 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 980 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 981 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 982 983 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 984 } 985 986 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 987 { 988 struct xe_guc *guc = exec_queue_to_guc(q); 989 struct xe_device *xe = guc_to_xe(guc); 990 struct xe_lrc *lrc = q->lrc[0]; 991 struct guc_ctxt_registration_info info; 992 993 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 994 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 995 996 memset(&info, 0, sizeof(info)); 997 info.context_idx = q->guc->id; 998 info.engine_class = xe_engine_class_to_guc_class(q->class); 999 info.engine_submit_mask = q->logical_mask; 1000 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 1001 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 1002 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 1003 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 1004 1005 if (xe_exec_queue_is_multi_queue(q)) { 1006 struct xe_exec_queue_group *group = q->multi_queue.group; 1007 1008 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 1009 info.cgp_hi = 0; 1010 } 1011 1012 if (xe_exec_queue_is_parallel(q)) { 1013 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 1014 struct iosys_map map = xe_lrc_parallel_map(lrc); 1015 1016 info.wq_desc_lo = lower_32_bits(ggtt_addr + 1017 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1018 info.wq_desc_hi = upper_32_bits(ggtt_addr + 1019 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1020 info.wq_base_lo = lower_32_bits(ggtt_addr + 1021 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1022 info.wq_base_hi = upper_32_bits(ggtt_addr + 1023 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1024 info.wq_size = WQ_SIZE; 1025 1026 q->guc->wqi_head = 0; 1027 q->guc->wqi_tail = 0; 1028 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 1029 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 1030 } 1031 1032 set_exec_queue_registered(q); 1033 trace_xe_exec_queue_register(q); 1034 if (xe_exec_queue_is_multi_queue_primary(q)) 1035 __register_exec_queue_group(guc, q, &info); 1036 else if (xe_exec_queue_is_parallel(q)) 1037 __register_mlrc_exec_queue(guc, q, &info); 1038 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 1039 __register_exec_queue(guc, &info); 1040 1041 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1042 init_policies(guc, q); 1043 1044 if (xe_exec_queue_is_multi_queue_secondary(q)) 1045 xe_guc_exec_queue_group_add(guc, q); 1046 } 1047 1048 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 1049 { 1050 return (WQ_SIZE - q->guc->wqi_tail); 1051 } 1052 1053 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 1054 { 1055 struct xe_guc *guc = exec_queue_to_guc(q); 1056 struct xe_device *xe = guc_to_xe(guc); 1057 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1058 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 1059 1060 #define AVAILABLE_SPACE \ 1061 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 1062 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1063 try_again: 1064 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 1065 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1066 if (sleep_total_ms > 2000) { 1067 xe_gt_reset_async(q->gt); 1068 return -ENODEV; 1069 } 1070 1071 sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64); 1072 goto try_again; 1073 } 1074 } 1075 #undef AVAILABLE_SPACE 1076 1077 return 0; 1078 } 1079 1080 static int wq_noop_append(struct xe_exec_queue *q) 1081 { 1082 struct xe_guc *guc = exec_queue_to_guc(q); 1083 struct xe_device *xe = guc_to_xe(guc); 1084 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1085 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 1086 1087 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 1088 return -ENODEV; 1089 1090 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 1091 1092 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 1093 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 1094 FIELD_PREP(WQ_LEN_MASK, len_dw)); 1095 q->guc->wqi_tail = 0; 1096 1097 return 0; 1098 } 1099 1100 static void wq_item_append(struct xe_exec_queue *q) 1101 { 1102 struct xe_guc *guc = exec_queue_to_guc(q); 1103 struct xe_device *xe = guc_to_xe(guc); 1104 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1105 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 1106 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 1107 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 1108 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 1109 int i = 0, j; 1110 1111 if (wqi_size > wq_space_until_wrap(q)) { 1112 if (wq_noop_append(q)) 1113 return; 1114 } 1115 if (wq_wait_for_space(q, wqi_size)) 1116 return; 1117 1118 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 1119 FIELD_PREP(WQ_LEN_MASK, len_dw); 1120 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 1121 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 1122 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 1123 wqi[i++] = 0; 1124 for (j = 1; j < q->width; ++j) { 1125 struct xe_lrc *lrc = q->lrc[j]; 1126 1127 wqi[i++] = lrc->ring.tail / sizeof(u64); 1128 } 1129 1130 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 1131 1132 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 1133 wq[q->guc->wqi_tail / sizeof(u32)])); 1134 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 1135 q->guc->wqi_tail += wqi_size; 1136 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 1137 1138 xe_device_wmb(xe); 1139 1140 map = xe_lrc_parallel_map(q->lrc[0]); 1141 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 1142 } 1143 1144 #define RESUME_PENDING ~0x0ull 1145 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 1146 { 1147 struct xe_guc *guc = exec_queue_to_guc(q); 1148 struct xe_lrc *lrc = q->lrc[0]; 1149 u32 action[3]; 1150 u32 g2h_len = 0; 1151 u32 num_g2h = 0; 1152 int len = 0; 1153 bool extra_submit = false; 1154 1155 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1156 1157 if (!job->restore_replay || job->last_replay) { 1158 if (xe_exec_queue_is_parallel(q)) 1159 wq_item_append(q); 1160 else if (!exec_queue_idle_skip_suspend(q)) 1161 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1162 job->last_replay = false; 1163 } 1164 1165 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1166 return; 1167 1168 /* 1169 * All queues in a multi-queue group will use the primary queue 1170 * of the group to interface with GuC. 1171 */ 1172 q = xe_exec_queue_multi_queue_primary(q); 1173 1174 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1175 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1176 action[len++] = q->guc->id; 1177 action[len++] = GUC_CONTEXT_ENABLE; 1178 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1179 num_g2h = 1; 1180 if (xe_exec_queue_is_parallel(q)) 1181 extra_submit = true; 1182 1183 q->guc->resume_time = RESUME_PENDING; 1184 set_exec_queue_pending_enable(q); 1185 set_exec_queue_enabled(q); 1186 trace_xe_exec_queue_scheduling_enable(q); 1187 } else { 1188 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1189 action[len++] = q->guc->id; 1190 trace_xe_exec_queue_submit(q); 1191 } 1192 1193 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1194 1195 if (extra_submit) { 1196 len = 0; 1197 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1198 action[len++] = q->guc->id; 1199 trace_xe_exec_queue_submit(q); 1200 1201 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1202 } 1203 } 1204 1205 static struct dma_fence * 1206 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1207 { 1208 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1209 struct xe_exec_queue *q = job->q; 1210 struct xe_guc *guc = exec_queue_to_guc(q); 1211 bool killed_or_banned_or_wedged = 1212 exec_queue_killed_or_banned_or_wedged(q); 1213 1214 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1215 exec_queue_banned(q) || exec_queue_suspended(q)); 1216 1217 trace_xe_sched_job_run(job); 1218 1219 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1220 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1221 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1222 1223 if (exec_queue_killed_or_banned_or_wedged(primary)) { 1224 killed_or_banned_or_wedged = true; 1225 goto run_job_out; 1226 } 1227 1228 if (!exec_queue_registered(primary)) 1229 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1230 } 1231 1232 if (!exec_queue_registered(q)) 1233 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1234 if (!job->restore_replay) 1235 q->ring_ops->emit_job(job); 1236 submit_exec_queue(q, job); 1237 job->restore_replay = false; 1238 } 1239 1240 run_job_out: 1241 1242 return job->fence; 1243 } 1244 1245 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1246 { 1247 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1248 1249 trace_xe_sched_job_free(job); 1250 xe_sched_job_put(job); 1251 } 1252 1253 int xe_guc_read_stopped(struct xe_guc *guc) 1254 { 1255 return atomic_read(&guc->submission_state.stopped); 1256 } 1257 1258 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1259 struct xe_exec_queue *q, 1260 u32 runnable_state); 1261 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1262 1263 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1264 u32 action[] = { \ 1265 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1266 q->guc->id, \ 1267 GUC_CONTEXT_##enable_disable, \ 1268 } 1269 1270 static void disable_scheduling_deregister(struct xe_guc *guc, 1271 struct xe_exec_queue *q) 1272 { 1273 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1274 int ret; 1275 1276 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1277 set_min_preemption_timeout(guc, q); 1278 1279 smp_rmb(); 1280 ret = wait_event_timeout(guc->ct.wq, 1281 (!exec_queue_pending_enable(q) && 1282 !exec_queue_pending_disable(q)) || 1283 xe_guc_read_stopped(guc) || 1284 vf_recovery(guc), 1285 HZ * 5); 1286 if (!ret && !vf_recovery(guc)) { 1287 struct xe_gpu_scheduler *sched = &q->guc->sched; 1288 1289 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1290 xe_sched_submission_start(sched); 1291 xe_gt_reset_async(q->gt); 1292 xe_sched_tdr_queue_imm(sched); 1293 return; 1294 } 1295 1296 clear_exec_queue_enabled(q); 1297 set_exec_queue_pending_disable(q); 1298 set_exec_queue_destroyed(q); 1299 trace_xe_exec_queue_scheduling_disable(q); 1300 1301 /* 1302 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1303 * handler and we are not allowed to reserved G2H space in handlers. 1304 */ 1305 if (xe_exec_queue_is_multi_queue_secondary(q)) 1306 handle_multi_queue_secondary_sched_done(guc, q, 0); 1307 else 1308 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1309 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1310 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1311 } 1312 1313 /** 1314 * xe_guc_submit_wedge() - Wedge GuC submission 1315 * @guc: the GuC object 1316 * 1317 * Save exec queue's registered with GuC state by taking a ref to each queue. 1318 * Register a DRMM handler to drop refs upon driver unload. 1319 */ 1320 void xe_guc_submit_wedge(struct xe_guc *guc) 1321 { 1322 struct xe_device *xe = guc_to_xe(guc); 1323 struct xe_gt *gt = guc_to_gt(guc); 1324 struct xe_exec_queue *q; 1325 unsigned long index; 1326 int err; 1327 1328 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1329 1330 /* 1331 * If device is being wedged even before submission_state is 1332 * initialized, there's nothing to do here. 1333 */ 1334 if (!guc->submission_state.initialized) 1335 return; 1336 1337 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { 1338 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, 1339 guc_submit_wedged_fini, guc); 1340 if (err) { 1341 xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; " 1342 "Although device is wedged.\n", 1343 xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); 1344 return; 1345 } 1346 1347 mutex_lock(&guc->submission_state.lock); 1348 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1349 if (xe_exec_queue_get_unless_zero(q)) 1350 set_exec_queue_wedged(q); 1351 mutex_unlock(&guc->submission_state.lock); 1352 } else { 1353 /* Forcefully kill any remaining exec queues, signal fences */ 1354 guc_submit_reset_prepare(guc); 1355 xe_guc_submit_stop(guc); 1356 xe_guc_softreset(guc); 1357 xe_uc_fw_sanitize(&guc->fw); 1358 xe_guc_submit_pause_abort(guc); 1359 } 1360 } 1361 1362 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1363 { 1364 struct xe_device *xe = guc_to_xe(guc); 1365 1366 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 1367 return false; 1368 1369 if (xe_device_wedged(xe)) 1370 return true; 1371 1372 xe_device_declare_wedged(xe); 1373 1374 return true; 1375 } 1376 1377 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1378 1379 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1380 { 1381 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1382 u32 ctx_timestamp, ctx_job_timestamp; 1383 u32 timeout_ms = q->sched_props.job_timeout_ms; 1384 u32 diff; 1385 u64 running_time_ms; 1386 1387 if (!xe_sched_job_started(job)) { 1388 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1389 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1390 q->guc->id); 1391 1392 return xe_sched_invalidate_job(job, 2); 1393 } 1394 1395 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); 1396 if (ctx_timestamp == job->sample_timestamp) { 1397 if (IS_SRIOV_VF(gt_to_xe(gt))) 1398 xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1399 xe_sched_job_seqno(job), 1400 xe_sched_job_lrc_seqno(job), q->guc->id); 1401 else 1402 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1403 xe_sched_job_seqno(job), 1404 xe_sched_job_lrc_seqno(job), q->guc->id); 1405 1406 return xe_sched_invalidate_job(job, 0); 1407 } 1408 1409 job->sample_timestamp = ctx_timestamp; 1410 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1411 1412 /* 1413 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1414 * possible overflows with a high timeout. 1415 */ 1416 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1417 1418 diff = ctx_timestamp - ctx_job_timestamp; 1419 1420 /* 1421 * Ensure timeout is within 5% to account for an GuC scheduling latency 1422 */ 1423 running_time_ms = 1424 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1425 1426 xe_gt_dbg(gt, 1427 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1428 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1429 q->guc->id, running_time_ms, timeout_ms, diff); 1430 1431 return running_time_ms >= timeout_ms; 1432 } 1433 1434 static void enable_scheduling(struct xe_exec_queue *q) 1435 { 1436 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1437 struct xe_guc *guc = exec_queue_to_guc(q); 1438 int ret; 1439 1440 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1441 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1442 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1443 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1444 1445 set_exec_queue_pending_enable(q); 1446 set_exec_queue_enabled(q); 1447 trace_xe_exec_queue_scheduling_enable(q); 1448 1449 if (xe_exec_queue_is_multi_queue_secondary(q)) 1450 handle_multi_queue_secondary_sched_done(guc, q, 1); 1451 else 1452 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1453 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1454 1455 ret = wait_event_timeout(guc->ct.wq, 1456 !exec_queue_pending_enable(q) || 1457 xe_guc_read_stopped(guc) || 1458 vf_recovery(guc), HZ * 5); 1459 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1460 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1461 set_exec_queue_banned(q); 1462 xe_gt_reset_async(q->gt); 1463 xe_sched_tdr_queue_imm(&q->guc->sched); 1464 } 1465 } 1466 1467 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1468 { 1469 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1470 struct xe_guc *guc = exec_queue_to_guc(q); 1471 1472 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1473 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1474 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1475 1476 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1477 set_min_preemption_timeout(guc, q); 1478 clear_exec_queue_enabled(q); 1479 set_exec_queue_pending_disable(q); 1480 trace_xe_exec_queue_scheduling_disable(q); 1481 1482 if (xe_exec_queue_is_multi_queue_secondary(q)) 1483 handle_multi_queue_secondary_sched_done(guc, q, 0); 1484 else 1485 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1486 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1487 } 1488 1489 static enum drm_gpu_sched_stat 1490 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1491 { 1492 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1493 struct drm_sched_job *tmp_job; 1494 struct xe_exec_queue *q = job->q, *primary; 1495 struct xe_gpu_scheduler *sched = &q->guc->sched; 1496 struct xe_guc *guc = exec_queue_to_guc(q); 1497 const char *process_name = "no process"; 1498 struct xe_device *xe = guc_to_xe(guc); 1499 int err = -ETIME; 1500 pid_t pid = -1; 1501 bool wedged = false, skip_timeout_check; 1502 1503 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1504 1505 primary = xe_exec_queue_multi_queue_primary(q); 1506 1507 /* 1508 * TDR has fired before free job worker. Common if exec queue 1509 * immediately closed after last fence signaled. Add back to pending 1510 * list so job can be freed and kick scheduler ensuring free job is not 1511 * lost. 1512 */ 1513 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1514 vf_recovery(guc)) 1515 return DRM_GPU_SCHED_STAT_NO_HANG; 1516 1517 /* Kill the run_job entry point */ 1518 if (xe_exec_queue_is_multi_queue(q)) 1519 xe_guc_exec_queue_group_stop(q); 1520 else 1521 xe_sched_submission_stop(sched); 1522 1523 /* Must check all state after stopping scheduler */ 1524 skip_timeout_check = exec_queue_reset(q) || 1525 exec_queue_killed_or_banned_or_wedged(q); 1526 1527 /* Skip timeout check if multi-queue group is banned */ 1528 if (xe_exec_queue_is_multi_queue(q) && 1529 READ_ONCE(q->multi_queue.group->banned)) 1530 skip_timeout_check = true; 1531 1532 /* LR jobs can only get here if queue has been killed or hit an error */ 1533 if (xe_exec_queue_is_lr(q)) 1534 xe_gt_assert(guc_to_gt(guc), skip_timeout_check); 1535 1536 /* 1537 * If devcoredump not captured and GuC capture for the job is not ready 1538 * do manual capture first and decide later if we need to use it 1539 */ 1540 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1541 !xe_guc_capture_get_matching_and_lock(q)) { 1542 /* take force wake before engine register manual capture */ 1543 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1544 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1545 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1546 1547 xe_engine_snapshot_capture_for_queue(q); 1548 } 1549 1550 /* 1551 * Check if job is actually timed out, if so restart job execution and TDR 1552 */ 1553 if (!skip_timeout_check && !check_timeout(q, job)) 1554 goto rearm; 1555 1556 if (!exec_queue_killed(q)) 1557 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1558 1559 set_exec_queue_banned(q); 1560 1561 /* Kick job / queue off hardware */ 1562 if (!wedged && (exec_queue_enabled(primary) || 1563 exec_queue_pending_disable(primary))) { 1564 int ret; 1565 1566 if (exec_queue_reset(primary)) 1567 err = -EIO; 1568 1569 if (xe_uc_fw_is_running(&guc->fw)) { 1570 /* 1571 * Wait for any pending G2H to flush out before 1572 * modifying state 1573 */ 1574 ret = wait_event_timeout(guc->ct.wq, 1575 (!exec_queue_pending_enable(primary) && 1576 !exec_queue_pending_disable(primary)) || 1577 xe_guc_read_stopped(guc) || 1578 vf_recovery(guc), HZ * 5); 1579 if (vf_recovery(guc)) 1580 goto handle_vf_resume; 1581 if (!ret || xe_guc_read_stopped(guc)) 1582 goto trigger_reset; 1583 1584 disable_scheduling(primary, skip_timeout_check); 1585 } 1586 1587 /* 1588 * Must wait for scheduling to be disabled before signalling 1589 * any fences, if GT broken the GT reset code should signal us. 1590 * 1591 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1592 * error) messages which can cause the schedule disable to get 1593 * lost. If this occurs, trigger a GT reset to recover. 1594 */ 1595 smp_rmb(); 1596 ret = wait_event_timeout(guc->ct.wq, 1597 !xe_uc_fw_is_running(&guc->fw) || 1598 !exec_queue_pending_disable(primary) || 1599 xe_guc_read_stopped(guc) || 1600 vf_recovery(guc), HZ * 5); 1601 if (vf_recovery(guc)) 1602 goto handle_vf_resume; 1603 if (!ret || xe_guc_read_stopped(guc)) { 1604 trigger_reset: 1605 if (!ret) 1606 xe_gt_warn(guc_to_gt(guc), 1607 "Schedule disable failed to respond, guc_id=%d", 1608 primary->guc->id); 1609 xe_devcoredump(primary, job, 1610 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1611 primary->guc->id, ret, xe_guc_read_stopped(guc)); 1612 xe_gt_reset_async(primary->gt); 1613 xe_sched_tdr_queue_imm(sched); 1614 goto rearm; 1615 } 1616 } 1617 1618 if (q->vm && q->vm->xef) { 1619 process_name = q->vm->xef->process_name; 1620 pid = q->vm->xef->pid; 1621 } 1622 1623 if (!exec_queue_killed(q)) 1624 xe_gt_notice(guc_to_gt(guc), 1625 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1626 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1627 q->guc->id, q->flags, process_name, pid); 1628 1629 trace_xe_sched_job_timedout(job); 1630 1631 if (!exec_queue_killed(q)) 1632 xe_devcoredump(q, job, 1633 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1634 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1635 q->guc->id, q->flags); 1636 1637 /* 1638 * Kernel jobs should never fail, nor should VM jobs if they do 1639 * somethings has gone wrong and the GT needs a reset 1640 */ 1641 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1642 "Kernel-submitted job timed out\n"); 1643 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1644 "VM job timed out on non-killed execqueue\n"); 1645 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1646 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1647 if (!xe_sched_invalidate_job(job, 2)) { 1648 xe_gt_reset_async(q->gt); 1649 goto rearm; 1650 } 1651 } 1652 1653 /* Mark all outstanding jobs as bad, thus completing them */ 1654 xe_sched_job_set_error(job, err); 1655 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) 1656 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); 1657 1658 if (xe_exec_queue_is_multi_queue(q)) { 1659 xe_guc_exec_queue_group_start(q); 1660 xe_guc_exec_queue_group_trigger_cleanup(q); 1661 } else { 1662 xe_sched_submission_start(sched); 1663 xe_guc_exec_queue_trigger_cleanup(q); 1664 } 1665 1666 /* 1667 * We want the job added back to the pending list so it gets freed; this 1668 * is what DRM_GPU_SCHED_STAT_NO_HANG does. 1669 */ 1670 return DRM_GPU_SCHED_STAT_NO_HANG; 1671 1672 rearm: 1673 /* 1674 * XXX: Ideally want to adjust timeout based on current execution time 1675 * but there is not currently an easy way to do in DRM scheduler. With 1676 * some thought, do this in a follow up. 1677 */ 1678 if (xe_exec_queue_is_multi_queue(q)) 1679 xe_guc_exec_queue_group_start(q); 1680 else 1681 xe_sched_submission_start(sched); 1682 handle_vf_resume: 1683 return DRM_GPU_SCHED_STAT_NO_HANG; 1684 } 1685 1686 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1687 { 1688 struct xe_guc_exec_queue *ge = q->guc; 1689 struct xe_guc *guc = exec_queue_to_guc(q); 1690 1691 release_guc_id(guc, q); 1692 xe_sched_entity_fini(&ge->entity); 1693 xe_sched_fini(&ge->sched); 1694 1695 /* 1696 * RCU free due sched being exported via DRM scheduler fences 1697 * (timeline name). 1698 */ 1699 kfree_rcu(ge, rcu); 1700 } 1701 1702 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1703 { 1704 struct xe_guc_exec_queue *ge = 1705 container_of(w, struct xe_guc_exec_queue, destroy_async); 1706 struct xe_exec_queue *q = ge->q; 1707 struct xe_guc *guc = exec_queue_to_guc(q); 1708 1709 guard(xe_pm_runtime)(guc_to_xe(guc)); 1710 trace_xe_exec_queue_destroy(q); 1711 1712 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1713 struct xe_exec_queue_group *group = q->multi_queue.group; 1714 1715 mutex_lock(&group->list_lock); 1716 list_del(&q->multi_queue.link); 1717 mutex_unlock(&group->list_lock); 1718 } 1719 1720 /* Confirm no work left behind accessing device structures */ 1721 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1722 1723 xe_exec_queue_fini(q); 1724 } 1725 1726 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1727 { 1728 struct xe_guc *guc = exec_queue_to_guc(q); 1729 struct xe_device *xe = guc_to_xe(guc); 1730 1731 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1732 1733 /* We must block on kernel engines so slabs are empty on driver unload */ 1734 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1735 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1736 else 1737 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1738 } 1739 1740 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1741 { 1742 /* 1743 * Might be done from within the GPU scheduler, need to do async as we 1744 * fini the scheduler when the engine is fini'd, the scheduler can't 1745 * complete fini within itself (circular dependency). Async resolves 1746 * this we and don't really care when everything is fini'd, just that it 1747 * is. 1748 */ 1749 guc_exec_queue_destroy_async(q); 1750 } 1751 1752 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1753 { 1754 struct xe_exec_queue *q = msg->private_data; 1755 struct xe_guc *guc = exec_queue_to_guc(q); 1756 1757 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1758 trace_xe_exec_queue_cleanup_entity(q); 1759 1760 /* 1761 * Expected state transitions for cleanup: 1762 * - If the exec queue is registered and GuC firmware is running, we must first 1763 * disable scheduling and deregister the queue to ensure proper teardown and 1764 * resource release in the GuC, then destroy the exec queue on driver side. 1765 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1766 * we cannot expect a response for the deregister request. In this case, 1767 * it is safe to directly destroy the exec queue on driver side, as the GuC 1768 * will not process further requests and all resources must be cleaned up locally. 1769 */ 1770 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1771 disable_scheduling_deregister(guc, q); 1772 else 1773 __guc_exec_queue_destroy(guc, q); 1774 } 1775 1776 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1777 { 1778 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1779 } 1780 1781 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1782 { 1783 struct xe_exec_queue *q = msg->private_data; 1784 struct xe_guc *guc = exec_queue_to_guc(q); 1785 1786 if (guc_exec_queue_allowed_to_change_state(q)) 1787 init_policies(guc, q); 1788 kfree(msg); 1789 } 1790 1791 static void __suspend_fence_signal(struct xe_exec_queue *q) 1792 { 1793 struct xe_guc *guc = exec_queue_to_guc(q); 1794 struct xe_device *xe = guc_to_xe(guc); 1795 1796 if (!q->guc->suspend_pending) 1797 return; 1798 1799 WRITE_ONCE(q->guc->suspend_pending, false); 1800 1801 /* 1802 * We use a GuC shared wait queue for VFs because the VF resfix start 1803 * interrupt must be able to wake all instances of suspend_wait. This 1804 * prevents the VF migration worker from being starved during 1805 * scheduling. 1806 */ 1807 if (IS_SRIOV_VF(xe)) 1808 wake_up_all(&guc->ct.wq); 1809 else 1810 wake_up(&q->guc->suspend_wait); 1811 } 1812 1813 static void suspend_fence_signal(struct xe_exec_queue *q) 1814 { 1815 struct xe_guc *guc = exec_queue_to_guc(q); 1816 1817 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1818 xe_guc_read_stopped(guc)); 1819 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1820 1821 __suspend_fence_signal(q); 1822 } 1823 1824 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1825 { 1826 struct xe_exec_queue *q = msg->private_data; 1827 struct xe_guc *guc = exec_queue_to_guc(q); 1828 bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q); 1829 1830 if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) && 1831 !exec_queue_suspended(q) && exec_queue_enabled(q)) { 1832 wait_event(guc->ct.wq, vf_recovery(guc) || 1833 ((q->guc->resume_time != RESUME_PENDING || 1834 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1835 1836 if (!xe_guc_read_stopped(guc)) { 1837 s64 since_resume_ms = 1838 ktime_ms_delta(ktime_get(), 1839 q->guc->resume_time); 1840 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1841 since_resume_ms; 1842 1843 if (wait_ms > 0 && q->guc->resume_time) 1844 xe_sleep_relaxed_ms(wait_ms); 1845 1846 set_exec_queue_suspended(q); 1847 disable_scheduling(q, false); 1848 } 1849 } else if (q->guc->suspend_pending) { 1850 if (idle_skip_suspend) 1851 set_exec_queue_idle_skip_suspend(q); 1852 set_exec_queue_suspended(q); 1853 suspend_fence_signal(q); 1854 } 1855 } 1856 1857 static void sched_context(struct xe_exec_queue *q) 1858 { 1859 struct xe_guc *guc = exec_queue_to_guc(q); 1860 struct xe_lrc *lrc = q->lrc[0]; 1861 u32 action[] = { 1862 XE_GUC_ACTION_SCHED_CONTEXT, 1863 q->guc->id, 1864 }; 1865 1866 xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q)); 1867 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1868 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1869 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1870 1871 trace_xe_exec_queue_submit(q); 1872 1873 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1874 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 1875 } 1876 1877 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1878 { 1879 struct xe_exec_queue *q = msg->private_data; 1880 1881 if (guc_exec_queue_allowed_to_change_state(q)) { 1882 clear_exec_queue_suspended(q); 1883 if (!exec_queue_enabled(q)) { 1884 if (exec_queue_idle_skip_suspend(q)) { 1885 struct xe_lrc *lrc = q->lrc[0]; 1886 1887 clear_exec_queue_idle_skip_suspend(q); 1888 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1889 } 1890 q->guc->resume_time = RESUME_PENDING; 1891 set_exec_queue_pending_resume(q); 1892 enable_scheduling(q); 1893 } else if (exec_queue_idle_skip_suspend(q)) { 1894 clear_exec_queue_idle_skip_suspend(q); 1895 sched_context(q); 1896 } 1897 } else { 1898 clear_exec_queue_suspended(q); 1899 clear_exec_queue_idle_skip_suspend(q); 1900 } 1901 } 1902 1903 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1904 { 1905 struct xe_exec_queue *q = msg->private_data; 1906 1907 if (guc_exec_queue_allowed_to_change_state(q)) { 1908 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 1909 struct xe_guc *guc = exec_queue_to_guc(q); 1910 struct xe_exec_queue_group *group = q->multi_queue.group; 1911 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 1912 int len = 0; 1913 1914 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 1915 action[len++] = group->primary->guc->id; 1916 1917 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 1918 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 1919 1920 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 1921 } 1922 1923 kfree(msg); 1924 } 1925 1926 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1927 #define SET_SCHED_PROPS 2 1928 #define SUSPEND 3 1929 #define RESUME 4 1930 #define SET_MULTI_QUEUE_PRIORITY 5 1931 #define OPCODE_MASK 0xf 1932 #define MSG_LOCKED BIT(8) 1933 #define MSG_HEAD BIT(9) 1934 1935 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1936 { 1937 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1938 1939 trace_xe_sched_msg_recv(msg); 1940 1941 switch (msg->opcode) { 1942 case CLEANUP: 1943 __guc_exec_queue_process_msg_cleanup(msg); 1944 break; 1945 case SET_SCHED_PROPS: 1946 __guc_exec_queue_process_msg_set_sched_props(msg); 1947 break; 1948 case SUSPEND: 1949 __guc_exec_queue_process_msg_suspend(msg); 1950 break; 1951 case RESUME: 1952 __guc_exec_queue_process_msg_resume(msg); 1953 break; 1954 case SET_MULTI_QUEUE_PRIORITY: 1955 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1956 break; 1957 default: 1958 XE_WARN_ON("Unknown message type"); 1959 } 1960 1961 xe_pm_runtime_put(xe); 1962 } 1963 1964 static const struct drm_sched_backend_ops drm_sched_ops = { 1965 .run_job = guc_exec_queue_run_job, 1966 .free_job = guc_exec_queue_free_job, 1967 .timedout_job = guc_exec_queue_timedout_job, 1968 }; 1969 1970 static const struct xe_sched_backend_ops xe_sched_ops = { 1971 .process_msg = guc_exec_queue_process_msg, 1972 }; 1973 1974 static int guc_exec_queue_init(struct xe_exec_queue *q) 1975 { 1976 struct xe_gpu_scheduler *sched; 1977 struct xe_guc *guc = exec_queue_to_guc(q); 1978 struct workqueue_struct *submit_wq = NULL; 1979 struct xe_guc_exec_queue *ge; 1980 long timeout; 1981 int err, i; 1982 1983 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1984 1985 ge = kzalloc_obj(*ge); 1986 if (!ge) 1987 return -ENOMEM; 1988 1989 q->guc = ge; 1990 ge->q = q; 1991 init_rcu_head(&ge->rcu); 1992 init_waitqueue_head(&ge->suspend_wait); 1993 1994 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1995 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1996 1997 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1998 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1999 2000 /* 2001 * Use primary queue's submit_wq for all secondary queues of a 2002 * multi queue group. This serialization avoids any locking around 2003 * CGP synchronization with GuC. 2004 */ 2005 if (xe_exec_queue_is_multi_queue_secondary(q)) { 2006 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 2007 2008 submit_wq = primary->guc->sched.base.submit_wq; 2009 } 2010 2011 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 2012 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 2013 timeout, guc_to_gt(guc)->ordered_wq, NULL, 2014 q->name, gt_to_xe(q->gt)->drm.dev); 2015 if (err) 2016 goto err_free; 2017 2018 sched = &ge->sched; 2019 err = xe_sched_entity_init(&ge->entity, sched); 2020 if (err) 2021 goto err_sched; 2022 2023 mutex_lock(&guc->submission_state.lock); 2024 2025 err = alloc_guc_id(guc, q); 2026 if (err) 2027 goto err_entity; 2028 2029 q->entity = &ge->entity; 2030 2031 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 2032 xe_sched_stop(sched); 2033 2034 mutex_unlock(&guc->submission_state.lock); 2035 2036 xe_exec_queue_assign_name(q, q->guc->id); 2037 2038 /* 2039 * Maintain secondary queues of the multi queue group in a list 2040 * for handling dependencies across the queues in the group. 2041 */ 2042 if (xe_exec_queue_is_multi_queue_secondary(q)) { 2043 struct xe_exec_queue_group *group = q->multi_queue.group; 2044 2045 INIT_LIST_HEAD(&q->multi_queue.link); 2046 mutex_lock(&group->list_lock); 2047 if (group->stopped) 2048 WRITE_ONCE(q->guc->sched.base.pause_submit, true); 2049 list_add_tail(&q->multi_queue.link, &group->list); 2050 mutex_unlock(&group->list_lock); 2051 } 2052 2053 if (xe_exec_queue_is_multi_queue(q)) 2054 trace_xe_exec_queue_create_multi_queue(q); 2055 else 2056 trace_xe_exec_queue_create(q); 2057 2058 return 0; 2059 2060 err_entity: 2061 mutex_unlock(&guc->submission_state.lock); 2062 xe_sched_entity_fini(&ge->entity); 2063 err_sched: 2064 xe_sched_fini(&ge->sched); 2065 err_free: 2066 kfree(ge); 2067 2068 return err; 2069 } 2070 2071 static void guc_exec_queue_kill(struct xe_exec_queue *q) 2072 { 2073 trace_xe_exec_queue_kill(q); 2074 set_exec_queue_killed(q); 2075 __suspend_fence_signal(q); 2076 xe_guc_exec_queue_trigger_cleanup(q); 2077 } 2078 2079 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 2080 u32 opcode) 2081 { 2082 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 2083 2084 INIT_LIST_HEAD(&msg->link); 2085 msg->opcode = opcode & OPCODE_MASK; 2086 msg->private_data = q; 2087 2088 trace_xe_sched_msg_add(msg); 2089 if (opcode & MSG_HEAD) 2090 xe_sched_add_msg_head(&q->guc->sched, msg); 2091 else if (opcode & MSG_LOCKED) 2092 xe_sched_add_msg_locked(&q->guc->sched, msg); 2093 else 2094 xe_sched_add_msg(&q->guc->sched, msg); 2095 } 2096 2097 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2098 struct xe_sched_msg *msg, 2099 u32 opcode) 2100 { 2101 if (!list_empty(&msg->link)) 2102 return; 2103 2104 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2105 } 2106 2107 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2108 struct xe_sched_msg *msg, 2109 u32 opcode) 2110 { 2111 if (!list_empty(&msg->link)) 2112 return false; 2113 2114 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2115 2116 return true; 2117 } 2118 2119 #define STATIC_MSG_CLEANUP 0 2120 #define STATIC_MSG_SUSPEND 1 2121 #define STATIC_MSG_RESUME 2 2122 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2123 { 2124 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2125 2126 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2127 guc_exec_queue_add_msg(q, msg, CLEANUP); 2128 else 2129 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2130 } 2131 2132 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2133 enum xe_exec_queue_priority priority) 2134 { 2135 struct xe_sched_msg *msg; 2136 2137 if (q->sched_props.priority == priority || 2138 exec_queue_killed_or_banned_or_wedged(q)) 2139 return 0; 2140 2141 msg = kmalloc_obj(*msg); 2142 if (!msg) 2143 return -ENOMEM; 2144 2145 q->sched_props.priority = priority; 2146 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2147 2148 return 0; 2149 } 2150 2151 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2152 { 2153 struct xe_sched_msg *msg; 2154 2155 if (q->sched_props.timeslice_us == timeslice_us || 2156 exec_queue_killed_or_banned_or_wedged(q)) 2157 return 0; 2158 2159 msg = kmalloc_obj(*msg); 2160 if (!msg) 2161 return -ENOMEM; 2162 2163 q->sched_props.timeslice_us = timeslice_us; 2164 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2165 2166 return 0; 2167 } 2168 2169 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2170 u32 preempt_timeout_us) 2171 { 2172 struct xe_sched_msg *msg; 2173 2174 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2175 exec_queue_killed_or_banned_or_wedged(q)) 2176 return 0; 2177 2178 msg = kmalloc_obj(*msg); 2179 if (!msg) 2180 return -ENOMEM; 2181 2182 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2183 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2184 2185 return 0; 2186 } 2187 2188 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2189 enum xe_multi_queue_priority priority) 2190 { 2191 struct xe_sched_msg *msg; 2192 2193 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2194 2195 if (exec_queue_killed_or_banned_or_wedged(q)) 2196 return 0; 2197 2198 msg = kmalloc_obj(*msg); 2199 if (!msg) 2200 return -ENOMEM; 2201 2202 scoped_guard(spinlock, &q->multi_queue.lock) { 2203 if (q->multi_queue.priority == priority) { 2204 kfree(msg); 2205 return 0; 2206 } 2207 2208 q->multi_queue.priority = priority; 2209 } 2210 2211 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2212 2213 return 0; 2214 } 2215 2216 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2217 { 2218 struct xe_gpu_scheduler *sched = &q->guc->sched; 2219 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2220 2221 if (exec_queue_killed_or_banned_or_wedged(q)) 2222 return -EINVAL; 2223 2224 xe_sched_msg_lock(sched); 2225 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2226 q->guc->suspend_pending = true; 2227 xe_sched_msg_unlock(sched); 2228 2229 return 0; 2230 } 2231 2232 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2233 { 2234 struct xe_guc *guc = exec_queue_to_guc(q); 2235 struct xe_device *xe = guc_to_xe(guc); 2236 int ret; 2237 2238 /* 2239 * Likely don't need to check exec_queue_killed() as we clear 2240 * suspend_pending upon kill but to be paranoid but races in which 2241 * suspend_pending is set after kill also check kill here. 2242 */ 2243 #define WAIT_COND \ 2244 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2245 xe_guc_read_stopped(guc)) 2246 2247 retry: 2248 if (IS_SRIOV_VF(xe)) 2249 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2250 vf_recovery(guc), 2251 HZ * 5); 2252 else 2253 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2254 WAIT_COND, HZ * 5); 2255 2256 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2257 return -EAGAIN; 2258 2259 if (!ret) { 2260 xe_gt_warn(guc_to_gt(guc), 2261 "Suspend fence, guc_id=%d, failed to respond", 2262 q->guc->id); 2263 /* XXX: Trigger GT reset? */ 2264 return -ETIME; 2265 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2266 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2267 goto retry; 2268 } 2269 2270 #undef WAIT_COND 2271 2272 return ret < 0 ? ret : 0; 2273 } 2274 2275 static void guc_exec_queue_resume(struct xe_exec_queue *q) 2276 { 2277 struct xe_gpu_scheduler *sched = &q->guc->sched; 2278 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2279 struct xe_guc *guc = exec_queue_to_guc(q); 2280 2281 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2282 2283 xe_sched_msg_lock(sched); 2284 guc_exec_queue_try_add_msg(q, msg, RESUME); 2285 xe_sched_msg_unlock(sched); 2286 } 2287 2288 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2289 { 2290 if (xe_exec_queue_is_multi_queue_secondary(q) && 2291 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2292 return true; 2293 2294 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2295 } 2296 2297 static bool guc_exec_queue_active(struct xe_exec_queue *q) 2298 { 2299 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 2300 2301 return exec_queue_enabled(primary) && 2302 !exec_queue_pending_disable(primary); 2303 } 2304 2305 /* 2306 * All of these functions are an abstraction layer which other parts of Xe can 2307 * use to trap into the GuC backend. All of these functions, aside from init, 2308 * really shouldn't do much other than trap into the DRM scheduler which 2309 * synchronizes these operations. 2310 */ 2311 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2312 .init = guc_exec_queue_init, 2313 .kill = guc_exec_queue_kill, 2314 .fini = guc_exec_queue_fini, 2315 .destroy = guc_exec_queue_destroy, 2316 .set_priority = guc_exec_queue_set_priority, 2317 .set_timeslice = guc_exec_queue_set_timeslice, 2318 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2319 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2320 .suspend = guc_exec_queue_suspend, 2321 .suspend_wait = guc_exec_queue_suspend_wait, 2322 .resume = guc_exec_queue_resume, 2323 .reset_status = guc_exec_queue_reset_status, 2324 .active = guc_exec_queue_active, 2325 }; 2326 2327 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2328 { 2329 struct xe_gpu_scheduler *sched = &q->guc->sched; 2330 bool do_destroy = false; 2331 2332 /* Stop scheduling + flush any DRM scheduler operations */ 2333 xe_sched_submission_stop(sched); 2334 2335 /* Clean up lost G2H + reset engine state */ 2336 if (exec_queue_registered(q)) { 2337 if (exec_queue_destroyed(q)) 2338 do_destroy = true; 2339 } 2340 if (q->guc->suspend_pending) { 2341 set_exec_queue_suspended(q); 2342 suspend_fence_signal(q); 2343 } 2344 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2345 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2346 EXEC_QUEUE_STATE_SUSPENDED, 2347 &q->guc->state); 2348 q->guc->resume_time = 0; 2349 trace_xe_exec_queue_stop(q); 2350 2351 /* 2352 * Ban any engine (aside from kernel and engines used for VM ops) with a 2353 * started but not complete job or if a job has gone through a GT reset 2354 * more than twice. 2355 */ 2356 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2357 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2358 bool ban = false; 2359 2360 if (job) { 2361 if ((xe_sched_job_started(job) && 2362 !xe_sched_job_completed(job)) || 2363 xe_sched_invalidate_job(job, 2)) { 2364 trace_xe_sched_job_ban(job); 2365 ban = true; 2366 } 2367 } 2368 2369 if (ban) { 2370 set_exec_queue_banned(q); 2371 xe_guc_exec_queue_trigger_cleanup(q); 2372 } 2373 } 2374 2375 if (do_destroy) 2376 __guc_exec_queue_destroy(guc, q); 2377 } 2378 2379 static int guc_submit_reset_prepare(struct xe_guc *guc) 2380 { 2381 int ret; 2382 2383 /* 2384 * Using an atomic here rather than submission_state.lock as this 2385 * function can be called while holding the CT lock (engine reset 2386 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2387 * Atomic is not ideal, but it works to prevent against concurrent reset 2388 * and releasing any TDRs waiting on guc->submission_state.stopped. 2389 */ 2390 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2391 smp_wmb(); 2392 wake_up_all(&guc->ct.wq); 2393 2394 return ret; 2395 } 2396 2397 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2398 { 2399 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2400 return 0; 2401 2402 if (!guc->submission_state.initialized) 2403 return 0; 2404 2405 return guc_submit_reset_prepare(guc); 2406 } 2407 2408 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2409 { 2410 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2411 !xe_guc_read_stopped(guc)); 2412 } 2413 2414 void xe_guc_submit_stop(struct xe_guc *guc) 2415 { 2416 struct xe_exec_queue *q; 2417 unsigned long index; 2418 2419 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2420 2421 mutex_lock(&guc->submission_state.lock); 2422 2423 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2424 /* Prevent redundant attempts to stop parallel queues */ 2425 if (q->guc->id != index) 2426 continue; 2427 2428 guc_exec_queue_stop(guc, q); 2429 } 2430 2431 mutex_unlock(&guc->submission_state.lock); 2432 2433 /* 2434 * No one can enter the backend at this point, aside from new engine 2435 * creation which is protected by guc->submission_state.lock. 2436 */ 2437 2438 } 2439 2440 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2441 struct xe_exec_queue *q) 2442 { 2443 bool pending_enable, pending_disable, pending_resume; 2444 2445 pending_enable = exec_queue_pending_enable(q); 2446 pending_resume = exec_queue_pending_resume(q); 2447 2448 if (pending_enable && pending_resume) { 2449 q->guc->needs_resume = true; 2450 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2451 q->guc->id); 2452 } 2453 2454 if (pending_enable && !pending_resume) { 2455 clear_exec_queue_registered(q); 2456 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2457 q->guc->id); 2458 } 2459 2460 if (pending_enable) { 2461 clear_exec_queue_enabled(q); 2462 clear_exec_queue_pending_resume(q); 2463 clear_exec_queue_pending_enable(q); 2464 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2465 q->guc->id); 2466 } 2467 2468 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2469 clear_exec_queue_destroyed(q); 2470 q->guc->needs_cleanup = true; 2471 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2472 q->guc->id); 2473 } 2474 2475 pending_disable = exec_queue_pending_disable(q); 2476 2477 if (pending_disable && exec_queue_suspended(q)) { 2478 clear_exec_queue_suspended(q); 2479 q->guc->needs_suspend = true; 2480 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2481 q->guc->id); 2482 } 2483 2484 if (pending_disable) { 2485 if (!pending_enable) 2486 set_exec_queue_enabled(q); 2487 clear_exec_queue_pending_disable(q); 2488 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2489 q->guc->id); 2490 } 2491 2492 q->guc->resume_time = 0; 2493 } 2494 2495 static void lrc_parallel_clear(struct xe_lrc *lrc) 2496 { 2497 struct xe_device *xe = gt_to_xe(lrc->gt); 2498 struct iosys_map map = xe_lrc_parallel_map(lrc); 2499 int i; 2500 2501 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2502 parallel_write(xe, map, wq[i], 2503 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2504 FIELD_PREP(WQ_LEN_MASK, 0)); 2505 } 2506 2507 /* 2508 * This function is quite complex but only real way to ensure no state is lost 2509 * during VF resume flows. The function scans the queue state, make adjustments 2510 * as needed, and queues jobs / messages which replayed upon unpause. 2511 */ 2512 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2513 { 2514 struct xe_gpu_scheduler *sched = &q->guc->sched; 2515 struct xe_sched_job *job; 2516 int i; 2517 2518 lockdep_assert_held(&guc->submission_state.lock); 2519 2520 /* Stop scheduling + flush any DRM scheduler operations */ 2521 xe_sched_submission_stop(sched); 2522 cancel_delayed_work_sync(&sched->base.work_tdr); 2523 2524 guc_exec_queue_revert_pending_state_change(guc, q); 2525 2526 if (xe_exec_queue_is_parallel(q)) { 2527 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2528 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2529 2530 /* 2531 * NOP existing WQ commands that may contain stale GGTT 2532 * addresses. These will be replayed upon unpause. The hardware 2533 * seems to get confused if the WQ head/tail pointers are 2534 * adjusted. 2535 */ 2536 if (lrc) 2537 lrc_parallel_clear(lrc); 2538 } 2539 2540 job = xe_sched_first_pending_job(sched); 2541 if (job) { 2542 job->restore_replay = true; 2543 2544 /* 2545 * Adjust software tail so jobs submitted overwrite previous 2546 * position in ring buffer with new GGTT addresses. 2547 */ 2548 for (i = 0; i < q->width; ++i) 2549 q->lrc[i]->ring.tail = job->ptrs[i].head; 2550 } 2551 } 2552 2553 /** 2554 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2555 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2556 */ 2557 void xe_guc_submit_pause(struct xe_guc *guc) 2558 { 2559 struct xe_exec_queue *q; 2560 unsigned long index; 2561 2562 mutex_lock(&guc->submission_state.lock); 2563 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2564 xe_sched_submission_stop(&q->guc->sched); 2565 mutex_unlock(&guc->submission_state.lock); 2566 } 2567 2568 /** 2569 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2570 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2571 */ 2572 void xe_guc_submit_pause_vf(struct xe_guc *guc) 2573 { 2574 struct xe_exec_queue *q; 2575 unsigned long index; 2576 2577 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2578 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2579 2580 mutex_lock(&guc->submission_state.lock); 2581 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2582 /* Prevent redundant attempts to stop parallel queues */ 2583 if (q->guc->id != index) 2584 continue; 2585 2586 guc_exec_queue_pause(guc, q); 2587 } 2588 mutex_unlock(&guc->submission_state.lock); 2589 } 2590 2591 static void guc_exec_queue_start(struct xe_exec_queue *q) 2592 { 2593 struct xe_gpu_scheduler *sched = &q->guc->sched; 2594 2595 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2596 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2597 int i; 2598 2599 trace_xe_exec_queue_resubmit(q); 2600 if (job) { 2601 for (i = 0; i < q->width; ++i) { 2602 /* 2603 * The GuC context is unregistered at this point 2604 * time, adjusting software ring tail ensures 2605 * jobs are rewritten in original placement, 2606 * adjusting LRC tail ensures the newly loaded 2607 * GuC / contexts only view the LRC tail 2608 * increasing as jobs are written out. 2609 */ 2610 q->lrc[i]->ring.tail = job->ptrs[i].head; 2611 xe_lrc_set_ring_tail(q->lrc[i], 2612 xe_lrc_ring_head(q->lrc[i])); 2613 } 2614 } 2615 xe_sched_resubmit_jobs(sched); 2616 } 2617 2618 xe_sched_submission_start(sched); 2619 xe_sched_submission_resume_tdr(sched); 2620 } 2621 2622 int xe_guc_submit_start(struct xe_guc *guc) 2623 { 2624 struct xe_exec_queue *q; 2625 unsigned long index; 2626 2627 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2628 2629 mutex_lock(&guc->submission_state.lock); 2630 atomic_dec(&guc->submission_state.stopped); 2631 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2632 /* Prevent redundant attempts to start parallel queues */ 2633 if (q->guc->id != index) 2634 continue; 2635 2636 guc_exec_queue_start(q); 2637 } 2638 mutex_unlock(&guc->submission_state.lock); 2639 2640 wake_up_all(&guc->ct.wq); 2641 2642 return 0; 2643 } 2644 2645 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2646 struct xe_exec_queue *q) 2647 { 2648 struct xe_gpu_scheduler *sched = &q->guc->sched; 2649 struct xe_sched_job *job = NULL; 2650 struct drm_sched_job *s_job; 2651 bool restore_replay = false; 2652 2653 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) { 2654 job = to_xe_sched_job(s_job); 2655 restore_replay |= job->restore_replay; 2656 if (restore_replay) { 2657 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2658 q->guc->id, xe_sched_job_seqno(job)); 2659 2660 q->ring_ops->emit_job(job); 2661 job->restore_replay = true; 2662 } 2663 } 2664 2665 if (job) 2666 job->last_replay = true; 2667 } 2668 2669 /** 2670 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2671 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2672 */ 2673 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2674 { 2675 struct xe_exec_queue *q; 2676 unsigned long index; 2677 2678 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2679 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2680 2681 mutex_lock(&guc->submission_state.lock); 2682 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2683 /* Prevent redundant attempts to stop parallel queues */ 2684 if (q->guc->id != index) 2685 continue; 2686 2687 guc_exec_queue_unpause_prepare(guc, q); 2688 } 2689 mutex_unlock(&guc->submission_state.lock); 2690 } 2691 2692 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2693 { 2694 struct xe_gpu_scheduler *sched = &q->guc->sched; 2695 struct xe_sched_msg *msg; 2696 2697 if (q->guc->needs_cleanup) { 2698 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2699 2700 guc_exec_queue_add_msg(q, msg, CLEANUP); 2701 q->guc->needs_cleanup = false; 2702 } 2703 2704 if (q->guc->needs_suspend) { 2705 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2706 2707 xe_sched_msg_lock(sched); 2708 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2709 xe_sched_msg_unlock(sched); 2710 2711 q->guc->needs_suspend = false; 2712 } 2713 2714 /* 2715 * The resume must be in the message queue before the suspend as it is 2716 * not possible for a resume to be issued if a suspend pending is, but 2717 * the inverse is possible. 2718 */ 2719 if (q->guc->needs_resume) { 2720 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2721 2722 xe_sched_msg_lock(sched); 2723 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2724 xe_sched_msg_unlock(sched); 2725 2726 q->guc->needs_resume = false; 2727 } 2728 } 2729 2730 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2731 { 2732 struct xe_gpu_scheduler *sched = &q->guc->sched; 2733 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2734 2735 lockdep_assert_held(&guc->submission_state.lock); 2736 2737 xe_sched_resubmit_jobs(sched); 2738 guc_exec_queue_replay_pending_state_change(q); 2739 xe_sched_submission_start(sched); 2740 if (needs_tdr) 2741 xe_guc_exec_queue_trigger_cleanup(q); 2742 xe_sched_submission_resume_tdr(sched); 2743 } 2744 2745 /** 2746 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2747 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2748 */ 2749 void xe_guc_submit_unpause(struct xe_guc *guc) 2750 { 2751 struct xe_exec_queue *q; 2752 unsigned long index; 2753 2754 mutex_lock(&guc->submission_state.lock); 2755 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2756 xe_sched_submission_start(&q->guc->sched); 2757 mutex_unlock(&guc->submission_state.lock); 2758 } 2759 2760 /** 2761 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2762 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2763 */ 2764 void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2765 { 2766 struct xe_exec_queue *q; 2767 unsigned long index; 2768 2769 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2770 2771 mutex_lock(&guc->submission_state.lock); 2772 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2773 /* 2774 * Prevent redundant attempts to stop parallel queues, or queues 2775 * created after resfix done. 2776 */ 2777 if (q->guc->id != index || 2778 !drm_sched_is_stopped(&q->guc->sched.base)) 2779 continue; 2780 2781 guc_exec_queue_unpause(guc, q); 2782 } 2783 mutex_unlock(&guc->submission_state.lock); 2784 } 2785 2786 /** 2787 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2788 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2789 */ 2790 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2791 { 2792 struct xe_exec_queue *q; 2793 unsigned long index; 2794 2795 mutex_lock(&guc->submission_state.lock); 2796 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2797 struct xe_gpu_scheduler *sched = &q->guc->sched; 2798 2799 /* Prevent redundant attempts to stop parallel queues */ 2800 if (q->guc->id != index) 2801 continue; 2802 2803 xe_sched_submission_start(sched); 2804 guc_exec_queue_kill(q); 2805 } 2806 mutex_unlock(&guc->submission_state.lock); 2807 } 2808 2809 static struct xe_exec_queue * 2810 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2811 { 2812 struct xe_gt *gt = guc_to_gt(guc); 2813 struct xe_exec_queue *q; 2814 2815 if (unlikely(guc_id >= GUC_ID_MAX)) { 2816 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2817 return NULL; 2818 } 2819 2820 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2821 if (unlikely(!q)) { 2822 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2823 return NULL; 2824 } 2825 2826 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2827 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2828 2829 return q; 2830 } 2831 2832 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2833 { 2834 u32 action[] = { 2835 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2836 q->guc->id, 2837 }; 2838 2839 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2840 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2841 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2842 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2843 2844 trace_xe_exec_queue_deregister(q); 2845 2846 if (xe_exec_queue_is_multi_queue_secondary(q)) 2847 handle_deregister_done(guc, q); 2848 else 2849 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2850 ARRAY_SIZE(action)); 2851 } 2852 2853 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2854 u32 runnable_state) 2855 { 2856 trace_xe_exec_queue_scheduling_done(q); 2857 2858 if (runnable_state == 1) { 2859 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2860 2861 q->guc->resume_time = ktime_get(); 2862 clear_exec_queue_pending_resume(q); 2863 clear_exec_queue_pending_enable(q); 2864 smp_wmb(); 2865 wake_up_all(&guc->ct.wq); 2866 } else { 2867 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2868 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2869 2870 if (q->guc->suspend_pending) { 2871 suspend_fence_signal(q); 2872 clear_exec_queue_pending_disable(q); 2873 } else { 2874 if (exec_queue_banned(q)) { 2875 smp_wmb(); 2876 wake_up_all(&guc->ct.wq); 2877 } 2878 if (exec_queue_destroyed(q)) { 2879 /* 2880 * Make sure to clear the pending_disable only 2881 * after sampling the destroyed state. We want 2882 * to ensure we don't trigger the unregister too 2883 * early with something intending to only 2884 * disable scheduling. The caller doing the 2885 * destroy must wait for an ongoing 2886 * pending_disable before marking as destroyed. 2887 */ 2888 clear_exec_queue_pending_disable(q); 2889 deregister_exec_queue(guc, q); 2890 } else { 2891 clear_exec_queue_pending_disable(q); 2892 } 2893 } 2894 } 2895 } 2896 2897 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2898 struct xe_exec_queue *q, 2899 u32 runnable_state) 2900 { 2901 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2902 mutex_lock(&guc->ct.lock); 2903 handle_sched_done(guc, q, runnable_state); 2904 mutex_unlock(&guc->ct.lock); 2905 } 2906 2907 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2908 { 2909 struct xe_exec_queue *q; 2910 u32 guc_id, runnable_state; 2911 2912 if (unlikely(len < 2)) 2913 return -EPROTO; 2914 2915 guc_id = msg[0]; 2916 runnable_state = msg[1]; 2917 2918 q = g2h_exec_queue_lookup(guc, guc_id); 2919 if (unlikely(!q)) 2920 return -EPROTO; 2921 2922 if (unlikely(!exec_queue_pending_enable(q) && 2923 !exec_queue_pending_disable(q))) { 2924 xe_gt_err(guc_to_gt(guc), 2925 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2926 atomic_read(&q->guc->state), q->guc->id, 2927 runnable_state); 2928 return -EPROTO; 2929 } 2930 2931 handle_sched_done(guc, q, runnable_state); 2932 2933 return 0; 2934 } 2935 2936 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2937 { 2938 trace_xe_exec_queue_deregister_done(q); 2939 2940 clear_exec_queue_registered(q); 2941 __guc_exec_queue_destroy(guc, q); 2942 } 2943 2944 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2945 { 2946 struct xe_exec_queue *q; 2947 u32 guc_id; 2948 2949 if (unlikely(len < 1)) 2950 return -EPROTO; 2951 2952 guc_id = msg[0]; 2953 2954 q = g2h_exec_queue_lookup(guc, guc_id); 2955 if (unlikely(!q)) 2956 return -EPROTO; 2957 2958 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2959 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2960 xe_gt_err(guc_to_gt(guc), 2961 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2962 atomic_read(&q->guc->state), q->guc->id); 2963 return -EPROTO; 2964 } 2965 2966 handle_deregister_done(guc, q); 2967 2968 return 0; 2969 } 2970 2971 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2972 { 2973 struct xe_gt *gt = guc_to_gt(guc); 2974 struct xe_exec_queue *q; 2975 u32 guc_id; 2976 2977 if (unlikely(len < 1)) 2978 return -EPROTO; 2979 2980 guc_id = msg[0]; 2981 2982 q = g2h_exec_queue_lookup(guc, guc_id); 2983 if (unlikely(!q)) 2984 return -EPROTO; 2985 2986 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", 2987 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, 2988 atomic_read(&q->guc->state)); 2989 2990 trace_xe_exec_queue_reset(q); 2991 2992 /* 2993 * A banned engine is a NOP at this point (came from 2994 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2995 * jobs by setting timeout of the job to the minimum value kicking 2996 * guc_exec_queue_timedout_job. 2997 */ 2998 xe_guc_exec_queue_reset_trigger_cleanup(q); 2999 3000 return 0; 3001 } 3002 3003 /* 3004 * xe_guc_error_capture_handler - Handler of GuC captured message 3005 * @guc: The GuC object 3006 * @msg: Point to the message 3007 * @len: The message length 3008 * 3009 * When GuC captured data is ready, GuC will send message 3010 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 3011 * called 1st to check status before process the data comes with the message. 3012 * 3013 * Returns: error code. 0 if success 3014 */ 3015 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 3016 { 3017 u32 status; 3018 3019 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 3020 return -EPROTO; 3021 3022 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 3023 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 3024 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 3025 3026 xe_guc_capture_process(guc); 3027 3028 return 0; 3029 } 3030 3031 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 3032 u32 len) 3033 { 3034 struct xe_gt *gt = guc_to_gt(guc); 3035 struct xe_exec_queue *q; 3036 u32 guc_id; 3037 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 3038 3039 if (unlikely(!len || len > 2)) 3040 return -EPROTO; 3041 3042 guc_id = msg[0]; 3043 3044 if (len == 2) 3045 type = msg[1]; 3046 3047 if (guc_id == GUC_ID_UNKNOWN) { 3048 /* 3049 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 3050 * context. In such case only PF will be notified about that fault. 3051 */ 3052 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 3053 return 0; 3054 } 3055 3056 q = g2h_exec_queue_lookup(guc, guc_id); 3057 if (unlikely(!q)) 3058 return -EPROTO; 3059 3060 /* 3061 * The type is HW-defined and changes based on platform, so we don't 3062 * decode it in the kernel and only check if it is valid. 3063 * See bspec 54047 and 72187 for details. 3064 */ 3065 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 3066 xe_gt_info(gt, 3067 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 3068 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3069 else 3070 xe_gt_info(gt, 3071 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 3072 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3073 3074 trace_xe_exec_queue_memory_cat_error(q); 3075 3076 /* Treat the same as engine reset */ 3077 xe_guc_exec_queue_reset_trigger_cleanup(q); 3078 3079 return 0; 3080 } 3081 3082 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 3083 { 3084 struct xe_gt *gt = guc_to_gt(guc); 3085 u8 guc_class, instance; 3086 u32 reason; 3087 3088 if (unlikely(len != 3)) 3089 return -EPROTO; 3090 3091 guc_class = msg[0]; 3092 instance = msg[1]; 3093 reason = msg[2]; 3094 3095 /* Unexpected failure of a hardware feature, log an actual error */ 3096 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 3097 guc_class, instance, reason); 3098 3099 xe_gt_reset_async(gt); 3100 3101 return 0; 3102 } 3103 3104 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, 3105 u32 len) 3106 { 3107 struct xe_gt *gt = guc_to_gt(guc); 3108 struct xe_device *xe = guc_to_xe(guc); 3109 struct xe_exec_queue *q; 3110 u32 guc_id = msg[2]; 3111 3112 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { 3113 drm_err(&xe->drm, "Invalid length %u", len); 3114 return -EPROTO; 3115 } 3116 3117 q = g2h_exec_queue_lookup(guc, guc_id); 3118 if (unlikely(!q)) 3119 return -EPROTO; 3120 3121 xe_gt_dbg(gt, 3122 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", 3123 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); 3124 3125 trace_xe_exec_queue_cgp_context_error(q); 3126 3127 /* Treat the same as engine reset */ 3128 xe_guc_exec_queue_reset_trigger_cleanup(q); 3129 3130 return 0; 3131 } 3132 3133 /** 3134 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3135 * @guc: guc 3136 * @msg: message indicating CGP sync done 3137 * @len: length of message 3138 * 3139 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3140 * for CGP synchronization to complete. 3141 * 3142 * Return: 0 on success, -EPROTO for malformed messages. 3143 */ 3144 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3145 { 3146 struct xe_device *xe = guc_to_xe(guc); 3147 struct xe_exec_queue *q; 3148 u32 guc_id = msg[0]; 3149 3150 if (unlikely(len < 1)) { 3151 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3152 return -EPROTO; 3153 } 3154 3155 q = g2h_exec_queue_lookup(guc, guc_id); 3156 if (unlikely(!q)) 3157 return -EPROTO; 3158 3159 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3160 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3161 return -EPROTO; 3162 } 3163 3164 /* Wakeup the serialized cgp update wait */ 3165 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3166 xe_guc_ct_wake_waiters(&guc->ct); 3167 3168 return 0; 3169 } 3170 3171 static void 3172 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3173 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3174 { 3175 struct xe_guc *guc = exec_queue_to_guc(q); 3176 struct xe_device *xe = guc_to_xe(guc); 3177 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3178 int i; 3179 3180 snapshot->guc.wqi_head = q->guc->wqi_head; 3181 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3182 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3183 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3184 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3185 wq_desc.wq_status); 3186 3187 if (snapshot->parallel.wq_desc.head != 3188 snapshot->parallel.wq_desc.tail) { 3189 for (i = snapshot->parallel.wq_desc.head; 3190 i != snapshot->parallel.wq_desc.tail; 3191 i = (i + sizeof(u32)) % WQ_SIZE) 3192 snapshot->parallel.wq[i / sizeof(u32)] = 3193 parallel_read(xe, map, wq[i / sizeof(u32)]); 3194 } 3195 } 3196 3197 static void 3198 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3199 struct drm_printer *p) 3200 { 3201 int i; 3202 3203 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3204 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3205 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3206 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3207 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3208 3209 if (snapshot->parallel.wq_desc.head != 3210 snapshot->parallel.wq_desc.tail) { 3211 for (i = snapshot->parallel.wq_desc.head; 3212 i != snapshot->parallel.wq_desc.tail; 3213 i = (i + sizeof(u32)) % WQ_SIZE) 3214 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3215 snapshot->parallel.wq[i / sizeof(u32)]); 3216 } 3217 } 3218 3219 /** 3220 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3221 * @q: faulty exec queue 3222 * 3223 * This can be printed out in a later stage like during dev_coredump 3224 * analysis. 3225 * 3226 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3227 * caller, using `xe_guc_exec_queue_snapshot_free`. 3228 */ 3229 struct xe_guc_submit_exec_queue_snapshot * 3230 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3231 { 3232 struct xe_gpu_scheduler *sched = &q->guc->sched; 3233 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3234 int i; 3235 3236 snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC); 3237 3238 if (!snapshot) 3239 return NULL; 3240 3241 snapshot->guc.id = q->guc->id; 3242 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3243 snapshot->class = q->class; 3244 snapshot->logical_mask = q->logical_mask; 3245 snapshot->width = q->width; 3246 snapshot->refcount = kref_read(&q->refcount); 3247 snapshot->sched_timeout = sched->base.timeout; 3248 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3249 snapshot->sched_props.preempt_timeout_us = 3250 q->sched_props.preempt_timeout_us; 3251 3252 snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width, 3253 GFP_ATOMIC); 3254 3255 if (snapshot->lrc) { 3256 for (i = 0; i < q->width; ++i) { 3257 struct xe_lrc *lrc = q->lrc[i]; 3258 3259 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3260 } 3261 } 3262 3263 snapshot->schedule_state = atomic_read(&q->guc->state); 3264 snapshot->exec_queue_flags = q->flags; 3265 3266 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3267 if (snapshot->parallel_execution) 3268 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3269 3270 if (xe_exec_queue_is_multi_queue(q)) { 3271 snapshot->multi_queue.valid = true; 3272 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3273 snapshot->multi_queue.pos = q->multi_queue.pos; 3274 } 3275 3276 return snapshot; 3277 } 3278 3279 /** 3280 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3281 * @snapshot: Previously captured snapshot of job. 3282 * 3283 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3284 */ 3285 void 3286 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3287 { 3288 int i; 3289 3290 if (!snapshot || !snapshot->lrc) 3291 return; 3292 3293 for (i = 0; i < snapshot->width; ++i) 3294 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3295 } 3296 3297 /** 3298 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3299 * @snapshot: GuC Submit Engine snapshot object. 3300 * @p: drm_printer where it will be printed out. 3301 * 3302 * This function prints out a given GuC Submit Engine snapshot object. 3303 */ 3304 void 3305 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3306 struct drm_printer *p) 3307 { 3308 int i; 3309 3310 if (!snapshot) 3311 return; 3312 3313 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3314 drm_printf(p, "\tName: %s\n", snapshot->name); 3315 drm_printf(p, "\tClass: %d\n", snapshot->class); 3316 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3317 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3318 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3319 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3320 drm_printf(p, "\tTimeslice: %u (us)\n", 3321 snapshot->sched_props.timeslice_us); 3322 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3323 snapshot->sched_props.preempt_timeout_us); 3324 3325 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3326 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3327 3328 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3329 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3330 3331 if (snapshot->parallel_execution) 3332 guc_exec_queue_wq_snapshot_print(snapshot, p); 3333 3334 if (snapshot->multi_queue.valid) { 3335 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3336 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3337 } 3338 } 3339 3340 /** 3341 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3342 * snapshot. 3343 * @snapshot: GuC Submit Engine snapshot object. 3344 * 3345 * This function free all the memory that needed to be allocated at capture 3346 * time. 3347 */ 3348 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3349 { 3350 int i; 3351 3352 if (!snapshot) 3353 return; 3354 3355 if (snapshot->lrc) { 3356 for (i = 0; i < snapshot->width; i++) 3357 xe_lrc_snapshot_free(snapshot->lrc[i]); 3358 kfree(snapshot->lrc); 3359 } 3360 kfree(snapshot); 3361 } 3362 3363 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3364 { 3365 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3366 3367 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3368 xe_guc_exec_queue_snapshot_print(snapshot, p); 3369 xe_guc_exec_queue_snapshot_free(snapshot); 3370 } 3371 3372 /** 3373 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3374 * @q: Execution queue 3375 * @ctx_type: Type of the context 3376 * 3377 * This function registers the execution queue with the guc. Special context 3378 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3379 * are only applicable for IGPU and in the VF. 3380 * Submits the execution queue to GUC after registering it. 3381 * 3382 * Returns - None. 3383 */ 3384 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3385 { 3386 struct xe_guc *guc = exec_queue_to_guc(q); 3387 struct xe_device *xe = guc_to_xe(guc); 3388 struct xe_gt *gt = guc_to_gt(guc); 3389 3390 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3391 xe_gt_assert(gt, !IS_DGFX(xe)); 3392 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3393 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3394 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3395 3396 register_exec_queue(q, ctx_type); 3397 enable_scheduling(q); 3398 } 3399 3400 /** 3401 * xe_guc_submit_print - GuC Submit Print. 3402 * @guc: GuC. 3403 * @p: drm_printer where it will be printed out. 3404 * 3405 * This function capture and prints snapshots of **all** GuC Engines. 3406 */ 3407 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3408 { 3409 struct xe_exec_queue *q; 3410 unsigned long index; 3411 3412 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3413 return; 3414 3415 mutex_lock(&guc->submission_state.lock); 3416 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3417 guc_exec_queue_print(q, p); 3418 mutex_unlock(&guc->submission_state.lock); 3419 } 3420 3421 /** 3422 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues 3423 * registered with the GuC 3424 * @guc: GuC. 3425 * 3426 * Return: true if any MLRC queue is registered with the GuC, false otherwise. 3427 */ 3428 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) 3429 { 3430 struct xe_exec_queue *q; 3431 unsigned long index; 3432 3433 guard(mutex)(&guc->submission_state.lock); 3434 3435 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3436 if (q->width > 1) 3437 return true; 3438 3439 return false; 3440 } 3441 3442 /** 3443 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3444 * exec queues registered to given GuC. 3445 * @guc: the &xe_guc struct instance 3446 * @scratch: scratch buffer to be used as temporary storage 3447 * 3448 * Returns: zero on success, negative error code on failure. 3449 */ 3450 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3451 { 3452 struct xe_exec_queue *q; 3453 unsigned long index; 3454 int err = 0; 3455 3456 mutex_lock(&guc->submission_state.lock); 3457 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3458 /* Prevent redundant attempts to stop parallel queues */ 3459 if (q->guc->id != index) 3460 continue; 3461 3462 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3463 if (err) 3464 break; 3465 } 3466 mutex_unlock(&guc->submission_state.lock); 3467 3468 return err; 3469 } 3470