1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/delay.h> 12 #include <linux/dma-fence-array.h> 13 #include <linux/math64.h> 14 15 #include <drm/drm_managed.h> 16 17 #include "abi/guc_actions_abi.h" 18 #include "abi/guc_actions_slpc_abi.h" 19 #include "abi/guc_klvs_abi.h" 20 #include "xe_assert.h" 21 #include "xe_bo.h" 22 #include "xe_devcoredump.h" 23 #include "xe_device.h" 24 #include "xe_exec_queue.h" 25 #include "xe_force_wake.h" 26 #include "xe_gpu_scheduler.h" 27 #include "xe_gt.h" 28 #include "xe_gt_clock.h" 29 #include "xe_gt_printk.h" 30 #include "xe_guc.h" 31 #include "xe_guc_capture.h" 32 #include "xe_guc_ct.h" 33 #include "xe_guc_exec_queue_types.h" 34 #include "xe_guc_id_mgr.h" 35 #include "xe_guc_klv_helpers.h" 36 #include "xe_guc_submit_types.h" 37 #include "xe_hw_engine.h" 38 #include "xe_lrc.h" 39 #include "xe_macros.h" 40 #include "xe_map.h" 41 #include "xe_mocs.h" 42 #include "xe_pm.h" 43 #include "xe_ring_ops_types.h" 44 #include "xe_sched_job.h" 45 #include "xe_trace.h" 46 #include "xe_uc_fw.h" 47 #include "xe_vm.h" 48 49 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 50 51 static struct xe_guc * 52 exec_queue_to_guc(struct xe_exec_queue *q) 53 { 54 return &q->gt->uc.guc; 55 } 56 57 /* 58 * Helpers for engine state, using an atomic as some of the bits can transition 59 * as the same time (e.g. a suspend can be happning at the same time as schedule 60 * engine done being processed). 61 */ 62 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 63 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 64 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 65 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 66 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 67 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 68 #define EXEC_QUEUE_STATE_RESET (1 << 6) 69 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 70 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 71 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 72 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) 73 #define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND (1 << 11) 74 75 static bool exec_queue_registered(struct xe_exec_queue *q) 76 { 77 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 78 } 79 80 static void set_exec_queue_registered(struct xe_exec_queue *q) 81 { 82 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 83 } 84 85 static void clear_exec_queue_registered(struct xe_exec_queue *q) 86 { 87 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 88 } 89 90 static bool exec_queue_enabled(struct xe_exec_queue *q) 91 { 92 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 93 } 94 95 static void set_exec_queue_enabled(struct xe_exec_queue *q) 96 { 97 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 98 } 99 100 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 101 { 102 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 103 } 104 105 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 106 { 107 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 108 } 109 110 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 111 { 112 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 113 } 114 115 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 116 { 117 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 118 } 119 120 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 121 { 122 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 123 } 124 125 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 126 { 127 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 128 } 129 130 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 131 { 132 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 133 } 134 135 static bool exec_queue_destroyed(struct xe_exec_queue *q) 136 { 137 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 138 } 139 140 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 141 { 142 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 143 } 144 145 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 146 { 147 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 148 } 149 150 static bool exec_queue_banned(struct xe_exec_queue *q) 151 { 152 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 153 } 154 155 static void set_exec_queue_banned(struct xe_exec_queue *q) 156 { 157 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 158 } 159 160 static bool exec_queue_suspended(struct xe_exec_queue *q) 161 { 162 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 163 } 164 165 static void set_exec_queue_suspended(struct xe_exec_queue *q) 166 { 167 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 168 } 169 170 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 171 { 172 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 173 } 174 175 static bool exec_queue_reset(struct xe_exec_queue *q) 176 { 177 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 178 } 179 180 static void set_exec_queue_reset(struct xe_exec_queue *q) 181 { 182 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 183 } 184 185 static bool exec_queue_killed(struct xe_exec_queue *q) 186 { 187 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 188 } 189 190 static void set_exec_queue_killed(struct xe_exec_queue *q) 191 { 192 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 193 } 194 195 static bool exec_queue_wedged(struct xe_exec_queue *q) 196 { 197 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 198 } 199 200 static void set_exec_queue_wedged(struct xe_exec_queue *q) 201 { 202 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 203 } 204 205 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 206 { 207 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 208 } 209 210 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 211 { 212 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 213 } 214 215 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 216 { 217 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 218 } 219 220 static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 221 { 222 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND; 223 } 224 225 static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 226 { 227 atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 228 } 229 230 static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 231 { 232 atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 233 } 234 235 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 236 { 237 return (atomic_read(&q->guc->state) & 238 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 239 EXEC_QUEUE_STATE_BANNED)); 240 } 241 242 static void guc_submit_fini(struct drm_device *drm, void *arg) 243 { 244 struct xe_guc *guc = arg; 245 struct xe_device *xe = guc_to_xe(guc); 246 struct xe_gt *gt = guc_to_gt(guc); 247 int ret; 248 249 ret = wait_event_timeout(guc->submission_state.fini_wq, 250 xa_empty(&guc->submission_state.exec_queue_lookup), 251 HZ * 5); 252 253 drain_workqueue(xe->destroy_wq); 254 255 xe_gt_assert(gt, ret); 256 257 xa_destroy(&guc->submission_state.exec_queue_lookup); 258 } 259 260 static void guc_submit_wedged_fini(void *arg) 261 { 262 struct xe_guc *guc = arg; 263 struct xe_exec_queue *q; 264 unsigned long index; 265 266 mutex_lock(&guc->submission_state.lock); 267 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 268 if (exec_queue_wedged(q)) { 269 mutex_unlock(&guc->submission_state.lock); 270 xe_exec_queue_put(q); 271 mutex_lock(&guc->submission_state.lock); 272 } 273 } 274 mutex_unlock(&guc->submission_state.lock); 275 } 276 277 static const struct xe_exec_queue_ops guc_exec_queue_ops; 278 279 static void primelockdep(struct xe_guc *guc) 280 { 281 if (!IS_ENABLED(CONFIG_LOCKDEP)) 282 return; 283 284 fs_reclaim_acquire(GFP_KERNEL); 285 286 mutex_lock(&guc->submission_state.lock); 287 mutex_unlock(&guc->submission_state.lock); 288 289 fs_reclaim_release(GFP_KERNEL); 290 } 291 292 /** 293 * xe_guc_submit_init() - Initialize GuC submission. 294 * @guc: the &xe_guc to initialize 295 * @num_ids: number of GuC context IDs to use 296 * 297 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 298 * GuC context IDs supported by the GuC firmware should be used for submission. 299 * 300 * Only VF drivers will have to provide explicit number of GuC context IDs 301 * that they can use for submission. 302 * 303 * Return: 0 on success or a negative error code on failure. 304 */ 305 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 306 { 307 struct xe_device *xe = guc_to_xe(guc); 308 struct xe_gt *gt = guc_to_gt(guc); 309 int err; 310 311 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 312 if (err) 313 return err; 314 315 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 316 if (err) 317 return err; 318 319 gt->exec_queue_ops = &guc_exec_queue_ops; 320 321 xa_init(&guc->submission_state.exec_queue_lookup); 322 323 init_waitqueue_head(&guc->submission_state.fini_wq); 324 325 primelockdep(guc); 326 327 guc->submission_state.initialized = true; 328 329 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 330 } 331 332 /* 333 * Given that we want to guarantee enough RCS throughput to avoid missing 334 * frames, we set the yield policy to 20% of each 80ms interval. 335 */ 336 #define RC_YIELD_DURATION 80 /* in ms */ 337 #define RC_YIELD_RATIO 20 /* in percent */ 338 static u32 *emit_render_compute_yield_klv(u32 *emit) 339 { 340 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 341 *emit++ = RC_YIELD_DURATION; 342 *emit++ = RC_YIELD_RATIO; 343 344 return emit; 345 } 346 347 #define SCHEDULING_POLICY_MAX_DWORDS 16 348 static int guc_init_global_schedule_policy(struct xe_guc *guc) 349 { 350 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 351 u32 *emit = data; 352 u32 count = 0; 353 int ret; 354 355 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 356 return 0; 357 358 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 359 360 if (CCS_INSTANCES(guc_to_gt(guc))) 361 emit = emit_render_compute_yield_klv(emit); 362 363 count = emit - data; 364 if (count > 1) { 365 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 366 367 ret = xe_guc_ct_send_block(&guc->ct, data, count); 368 if (ret < 0) { 369 xe_gt_err(guc_to_gt(guc), 370 "failed to enable GuC scheduling policies: %pe\n", 371 ERR_PTR(ret)); 372 return ret; 373 } 374 } 375 376 return 0; 377 } 378 379 int xe_guc_submit_enable(struct xe_guc *guc) 380 { 381 int ret; 382 383 ret = guc_init_global_schedule_policy(guc); 384 if (ret) 385 return ret; 386 387 guc->submission_state.enabled = true; 388 389 return 0; 390 } 391 392 void xe_guc_submit_disable(struct xe_guc *guc) 393 { 394 guc->submission_state.enabled = false; 395 } 396 397 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 398 { 399 int i; 400 401 lockdep_assert_held(&guc->submission_state.lock); 402 403 for (i = 0; i < xa_count; ++i) 404 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 405 406 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 407 q->guc->id, q->width); 408 409 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 410 wake_up(&guc->submission_state.fini_wq); 411 } 412 413 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 414 { 415 int ret; 416 int i; 417 418 /* 419 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 420 * worse case user gets -ENOMEM on engine create and has to try again. 421 * 422 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 423 * failure. 424 */ 425 lockdep_assert_held(&guc->submission_state.lock); 426 427 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 428 q->width); 429 if (ret < 0) 430 return ret; 431 432 q->guc->id = ret; 433 434 for (i = 0; i < q->width; ++i) { 435 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 436 q->guc->id + i, q, GFP_NOWAIT)); 437 if (ret) 438 goto err_release; 439 } 440 441 return 0; 442 443 err_release: 444 __release_guc_id(guc, q, i); 445 446 return ret; 447 } 448 449 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 450 { 451 mutex_lock(&guc->submission_state.lock); 452 __release_guc_id(guc, q, q->width); 453 mutex_unlock(&guc->submission_state.lock); 454 } 455 456 struct exec_queue_policy { 457 u32 count; 458 struct guc_update_exec_queue_policy h2g; 459 }; 460 461 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 462 { 463 size_t bytes = sizeof(policy->h2g.header) + 464 (sizeof(policy->h2g.klv[0]) * policy->count); 465 466 return bytes / sizeof(u32); 467 } 468 469 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 470 u16 guc_id) 471 { 472 policy->h2g.header.action = 473 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 474 policy->h2g.header.guc_id = guc_id; 475 policy->count = 0; 476 } 477 478 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 479 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 480 u32 data) \ 481 { \ 482 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 483 \ 484 policy->h2g.klv[policy->count].kl = \ 485 FIELD_PREP(GUC_KLV_0_KEY, \ 486 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 487 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 488 policy->h2g.klv[policy->count].value = data; \ 489 policy->count++; \ 490 } 491 492 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 493 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 494 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 495 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 496 #undef MAKE_EXEC_QUEUE_POLICY_ADD 497 498 static const int xe_exec_queue_prio_to_guc[] = { 499 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 500 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 501 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 502 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 503 }; 504 505 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 506 { 507 struct exec_queue_policy policy; 508 enum xe_exec_queue_priority prio = q->sched_props.priority; 509 u32 timeslice_us = q->sched_props.timeslice_us; 510 u32 slpc_exec_queue_freq_req = 0; 511 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 512 513 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 514 !xe_exec_queue_is_multi_queue_secondary(q)); 515 516 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 517 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 518 519 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 520 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 521 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 522 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 523 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 524 slpc_exec_queue_freq_req); 525 526 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 527 __guc_exec_queue_policy_action_size(&policy), 0, 0); 528 } 529 530 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 531 { 532 struct exec_queue_policy policy; 533 534 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 535 536 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 537 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 538 539 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 540 __guc_exec_queue_policy_action_size(&policy), 0, 0); 541 } 542 543 static bool vf_recovery(struct xe_guc *guc) 544 { 545 return xe_gt_recovery_pending(guc_to_gt(guc)); 546 } 547 548 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 549 { 550 struct xe_guc *guc = exec_queue_to_guc(q); 551 struct xe_device *xe = guc_to_xe(guc); 552 553 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 554 wake_up_all(&xe->ufence_wq); 555 556 xe_sched_tdr_queue_imm(&q->guc->sched); 557 } 558 559 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) 560 { 561 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 562 struct xe_exec_queue_group *group = q->multi_queue.group; 563 struct xe_exec_queue *eq; 564 565 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 566 xe_exec_queue_is_multi_queue(q)); 567 568 /* Group banned, skip timeout check in TDR */ 569 WRITE_ONCE(group->banned, true); 570 xe_guc_exec_queue_trigger_cleanup(primary); 571 572 mutex_lock(&group->list_lock); 573 list_for_each_entry(eq, &group->list, multi_queue.link) 574 xe_guc_exec_queue_trigger_cleanup(eq); 575 mutex_unlock(&group->list_lock); 576 } 577 578 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 579 { 580 if (xe_exec_queue_is_multi_queue(q)) { 581 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 582 struct xe_exec_queue_group *group = q->multi_queue.group; 583 struct xe_exec_queue *eq; 584 585 /* Group banned, skip timeout check in TDR */ 586 WRITE_ONCE(group->banned, true); 587 588 set_exec_queue_reset(primary); 589 if (!exec_queue_banned(primary)) 590 xe_guc_exec_queue_trigger_cleanup(primary); 591 592 mutex_lock(&group->list_lock); 593 list_for_each_entry(eq, &group->list, multi_queue.link) { 594 set_exec_queue_reset(eq); 595 if (!exec_queue_banned(eq)) 596 xe_guc_exec_queue_trigger_cleanup(eq); 597 } 598 mutex_unlock(&group->list_lock); 599 } else { 600 set_exec_queue_reset(q); 601 if (!exec_queue_banned(q)) 602 xe_guc_exec_queue_trigger_cleanup(q); 603 } 604 } 605 606 static void set_exec_queue_group_banned(struct xe_exec_queue *q) 607 { 608 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 609 struct xe_exec_queue_group *group = q->multi_queue.group; 610 struct xe_exec_queue *eq; 611 612 /* Ban all queues of the multi-queue group */ 613 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 614 xe_exec_queue_is_multi_queue(q)); 615 set_exec_queue_banned(primary); 616 617 mutex_lock(&group->list_lock); 618 list_for_each_entry(eq, &group->list, multi_queue.link) 619 set_exec_queue_banned(eq); 620 mutex_unlock(&group->list_lock); 621 } 622 623 /* Helper for context registration H2G */ 624 struct guc_ctxt_registration_info { 625 u32 flags; 626 u32 context_idx; 627 u32 engine_class; 628 u32 engine_submit_mask; 629 u32 wq_desc_lo; 630 u32 wq_desc_hi; 631 u32 wq_base_lo; 632 u32 wq_base_hi; 633 u32 wq_size; 634 u32 cgp_lo; 635 u32 cgp_hi; 636 u32 hwlrca_lo; 637 u32 hwlrca_hi; 638 }; 639 640 #define parallel_read(xe_, map_, field_) \ 641 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 642 field_) 643 #define parallel_write(xe_, map_, field_, val_) \ 644 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 645 field_, val_) 646 647 /** 648 * DOC: Multi Queue Group GuC interface 649 * 650 * The multi queue group coordination between KMD and GuC is through a software 651 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page 652 * allocated in the global GTT. 653 * 654 * CGP format: 655 * 656 * +-----------+---------------------------+---------------------------------------------+ 657 * | DWORD | Name | Description | 658 * +-----------+---------------------------+---------------------------------------------+ 659 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | 660 * +-----------+---------------------------+---------------------------------------------+ 661 * | 1..15 | RESERVED | MBZ | 662 * +-----------+---------------------------+---------------------------------------------+ 663 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | 664 * +-----------+---------------------------+---------------------------------------------+ 665 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | 666 * +-----------+---------------------------+---------------------------------------------+ 667 * | 18..31 | RESERVED | MBZ | 668 * +-----------+---------------------------+---------------------------------------------+ 669 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | 670 * +-----------+---------------------------+---------------------------------------------+ 671 * | 33 | Q0ContextIndex | Context ID for Queue 0 | 672 * +-----------+---------------------------+---------------------------------------------+ 673 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | 674 * +-----------+---------------------------+---------------------------------------------+ 675 * | 35 | Q1ContextIndex | Context ID for Queue 1 | 676 * +-----------+---------------------------+---------------------------------------------+ 677 * | ... |... | ... | 678 * +-----------+---------------------------+---------------------------------------------+ 679 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | 680 * +-----------+---------------------------+---------------------------------------------+ 681 * | 159 | Q63ContextIndex | Context ID for Queue 63 | 682 * +-----------+---------------------------+---------------------------------------------+ 683 * | 160..1024 | RESERVED | MBZ | 684 * +-----------+---------------------------+---------------------------------------------+ 685 * 686 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified 687 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies 688 * the CGP address. When the secondary queues are added to the group, the CGP is 689 * updated with entry for that queue and GuC is notified through the H2G interface 690 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages 691 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also 692 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any 693 * error in the CGP. Only one of these CGP update messages can be outstanding 694 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* 695 * fields indicate which queue entry is being updated in the CGP. 696 * 697 * The primary queue (Q0) represents the multi queue group context in GuC and 698 * submission on any queue of the group must be through Q0 GuC interface only. 699 * 700 * As it is not required to register secondary queues with GuC, the secondary queue 701 * context ids in the CGP are populated with Q0 context id. 702 */ 703 704 #define CGP_VERSION_MAJOR_SHIFT 8 705 706 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 707 struct xe_exec_queue *q) 708 { 709 struct xe_exec_queue_group *group = q->multi_queue.group; 710 u32 guc_id = group->primary->guc->id; 711 712 /* Currently implementing CGP version 1.0 */ 713 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 714 1 << CGP_VERSION_MAJOR_SHIFT); 715 716 xe_map_wr(xe, &group->cgp_bo->vmap, 717 (32 + q->multi_queue.pos * 2) * sizeof(u32), 718 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 719 720 xe_map_wr(xe, &group->cgp_bo->vmap, 721 (33 + q->multi_queue.pos * 2) * sizeof(u32), 722 u32, guc_id); 723 724 if (q->multi_queue.pos / 32) { 725 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 726 u32, BIT(q->multi_queue.pos % 32)); 727 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 728 } else { 729 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 730 u32, BIT(q->multi_queue.pos)); 731 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 732 } 733 } 734 735 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 736 struct xe_exec_queue *q, 737 const u32 *action, u32 len) 738 { 739 struct xe_exec_queue_group *group = q->multi_queue.group; 740 struct xe_device *xe = guc_to_xe(guc); 741 long ret; 742 743 /* 744 * As all queues of a multi queue group use single drm scheduler 745 * submit workqueue, CGP synchronization with GuC are serialized. 746 * Hence, no locking is required here. 747 * Wait for any pending CGP_SYNC_DONE response before updating the 748 * CGP page and sending CGP_SYNC message. 749 * 750 * FIXME: Support VF migration 751 */ 752 ret = wait_event_timeout(guc->ct.wq, 753 !READ_ONCE(group->sync_pending) || 754 xe_guc_read_stopped(guc), HZ); 755 if (!ret || xe_guc_read_stopped(guc)) { 756 /* CGP_SYNC failed. Reset gt, cleanup the group */ 757 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 758 set_exec_queue_group_banned(q); 759 xe_gt_reset_async(q->gt); 760 xe_guc_exec_queue_group_trigger_cleanup(q); 761 return; 762 } 763 764 xe_lrc_set_multi_queue_priority(q->lrc[0], q->multi_queue.priority); 765 xe_guc_exec_queue_group_cgp_update(xe, q); 766 767 WRITE_ONCE(group->sync_pending, true); 768 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 769 } 770 771 static void __register_exec_queue_group(struct xe_guc *guc, 772 struct xe_exec_queue *q, 773 struct guc_ctxt_registration_info *info) 774 { 775 #define MAX_MULTI_QUEUE_REG_SIZE (8) 776 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 777 int len = 0; 778 779 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 780 action[len++] = info->flags; 781 action[len++] = info->context_idx; 782 action[len++] = info->engine_class; 783 action[len++] = info->engine_submit_mask; 784 action[len++] = 0; /* Reserved */ 785 action[len++] = info->cgp_lo; 786 action[len++] = info->cgp_hi; 787 788 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 789 #undef MAX_MULTI_QUEUE_REG_SIZE 790 791 /* 792 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 793 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 794 * from guc. 795 */ 796 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 797 } 798 799 static void xe_guc_exec_queue_group_add(struct xe_guc *guc, 800 struct xe_exec_queue *q) 801 { 802 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 803 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 804 int len = 0; 805 806 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); 807 808 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 809 action[len++] = q->multi_queue.group->primary->guc->id; 810 811 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 812 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 813 814 /* 815 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a 816 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 817 * from guc. 818 */ 819 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 820 } 821 822 static void __register_mlrc_exec_queue(struct xe_guc *guc, 823 struct xe_exec_queue *q, 824 struct guc_ctxt_registration_info *info) 825 { 826 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 827 u32 action[MAX_MLRC_REG_SIZE]; 828 int len = 0; 829 int i; 830 831 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 832 833 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 834 action[len++] = info->flags; 835 action[len++] = info->context_idx; 836 action[len++] = info->engine_class; 837 action[len++] = info->engine_submit_mask; 838 action[len++] = info->wq_desc_lo; 839 action[len++] = info->wq_desc_hi; 840 action[len++] = info->wq_base_lo; 841 action[len++] = info->wq_base_hi; 842 action[len++] = info->wq_size; 843 action[len++] = q->width; 844 action[len++] = info->hwlrca_lo; 845 action[len++] = info->hwlrca_hi; 846 847 for (i = 1; i < q->width; ++i) { 848 struct xe_lrc *lrc = q->lrc[i]; 849 850 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 851 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 852 } 853 854 /* explicitly checks some fields that we might fixup later */ 855 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 856 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 857 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 858 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 859 xe_gt_assert(guc_to_gt(guc), q->width == 860 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 861 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 862 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 863 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 864 #undef MAX_MLRC_REG_SIZE 865 866 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 867 } 868 869 static void __register_exec_queue(struct xe_guc *guc, 870 struct guc_ctxt_registration_info *info) 871 { 872 u32 action[] = { 873 XE_GUC_ACTION_REGISTER_CONTEXT, 874 info->flags, 875 info->context_idx, 876 info->engine_class, 877 info->engine_submit_mask, 878 info->wq_desc_lo, 879 info->wq_desc_hi, 880 info->wq_base_lo, 881 info->wq_base_hi, 882 info->wq_size, 883 info->hwlrca_lo, 884 info->hwlrca_hi, 885 }; 886 887 /* explicitly checks some fields that we might fixup later */ 888 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 889 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 890 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 891 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 892 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 893 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 894 895 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 896 } 897 898 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 899 { 900 struct xe_guc *guc = exec_queue_to_guc(q); 901 struct xe_device *xe = guc_to_xe(guc); 902 struct xe_lrc *lrc = q->lrc[0]; 903 struct guc_ctxt_registration_info info; 904 905 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 906 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 907 908 memset(&info, 0, sizeof(info)); 909 info.context_idx = q->guc->id; 910 info.engine_class = xe_engine_class_to_guc_class(q->class); 911 info.engine_submit_mask = q->logical_mask; 912 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 913 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 914 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 915 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 916 917 if (xe_exec_queue_is_multi_queue(q)) { 918 struct xe_exec_queue_group *group = q->multi_queue.group; 919 920 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 921 info.cgp_hi = 0; 922 } 923 924 if (xe_exec_queue_is_parallel(q)) { 925 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 926 struct iosys_map map = xe_lrc_parallel_map(lrc); 927 928 info.wq_desc_lo = lower_32_bits(ggtt_addr + 929 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 930 info.wq_desc_hi = upper_32_bits(ggtt_addr + 931 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 932 info.wq_base_lo = lower_32_bits(ggtt_addr + 933 offsetof(struct guc_submit_parallel_scratch, wq[0])); 934 info.wq_base_hi = upper_32_bits(ggtt_addr + 935 offsetof(struct guc_submit_parallel_scratch, wq[0])); 936 info.wq_size = WQ_SIZE; 937 938 q->guc->wqi_head = 0; 939 q->guc->wqi_tail = 0; 940 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 941 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 942 } 943 944 set_exec_queue_registered(q); 945 trace_xe_exec_queue_register(q); 946 if (xe_exec_queue_is_multi_queue_primary(q)) 947 __register_exec_queue_group(guc, q, &info); 948 else if (xe_exec_queue_is_parallel(q)) 949 __register_mlrc_exec_queue(guc, q, &info); 950 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 951 __register_exec_queue(guc, &info); 952 953 if (!xe_exec_queue_is_multi_queue_secondary(q)) 954 init_policies(guc, q); 955 956 if (xe_exec_queue_is_multi_queue_secondary(q)) 957 xe_guc_exec_queue_group_add(guc, q); 958 } 959 960 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 961 { 962 return (WQ_SIZE - q->guc->wqi_tail); 963 } 964 965 static inline void relaxed_ms_sleep(unsigned int delay_ms) 966 { 967 unsigned long min_us, max_us; 968 969 if (!delay_ms) 970 return; 971 972 if (delay_ms > 20) { 973 msleep(delay_ms); 974 return; 975 } 976 977 min_us = mul_u32_u32(delay_ms, 1000); 978 max_us = min_us + 500; 979 980 usleep_range(min_us, max_us); 981 } 982 983 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 984 { 985 struct xe_guc *guc = exec_queue_to_guc(q); 986 struct xe_device *xe = guc_to_xe(guc); 987 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 988 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 989 990 #define AVAILABLE_SPACE \ 991 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 992 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 993 try_again: 994 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 995 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 996 if (sleep_total_ms > 2000) { 997 xe_gt_reset_async(q->gt); 998 return -ENODEV; 999 } 1000 1001 msleep(sleep_period_ms); 1002 sleep_total_ms += sleep_period_ms; 1003 if (sleep_period_ms < 64) 1004 sleep_period_ms <<= 1; 1005 goto try_again; 1006 } 1007 } 1008 #undef AVAILABLE_SPACE 1009 1010 return 0; 1011 } 1012 1013 static int wq_noop_append(struct xe_exec_queue *q) 1014 { 1015 struct xe_guc *guc = exec_queue_to_guc(q); 1016 struct xe_device *xe = guc_to_xe(guc); 1017 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1018 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 1019 1020 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 1021 return -ENODEV; 1022 1023 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 1024 1025 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 1026 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 1027 FIELD_PREP(WQ_LEN_MASK, len_dw)); 1028 q->guc->wqi_tail = 0; 1029 1030 return 0; 1031 } 1032 1033 static void wq_item_append(struct xe_exec_queue *q) 1034 { 1035 struct xe_guc *guc = exec_queue_to_guc(q); 1036 struct xe_device *xe = guc_to_xe(guc); 1037 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1038 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 1039 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 1040 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 1041 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 1042 int i = 0, j; 1043 1044 if (wqi_size > wq_space_until_wrap(q)) { 1045 if (wq_noop_append(q)) 1046 return; 1047 } 1048 if (wq_wait_for_space(q, wqi_size)) 1049 return; 1050 1051 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 1052 FIELD_PREP(WQ_LEN_MASK, len_dw); 1053 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 1054 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 1055 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 1056 wqi[i++] = 0; 1057 for (j = 1; j < q->width; ++j) { 1058 struct xe_lrc *lrc = q->lrc[j]; 1059 1060 wqi[i++] = lrc->ring.tail / sizeof(u64); 1061 } 1062 1063 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 1064 1065 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 1066 wq[q->guc->wqi_tail / sizeof(u32)])); 1067 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 1068 q->guc->wqi_tail += wqi_size; 1069 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 1070 1071 xe_device_wmb(xe); 1072 1073 map = xe_lrc_parallel_map(q->lrc[0]); 1074 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 1075 } 1076 1077 #define RESUME_PENDING ~0x0ull 1078 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 1079 { 1080 struct xe_guc *guc = exec_queue_to_guc(q); 1081 struct xe_lrc *lrc = q->lrc[0]; 1082 u32 action[3]; 1083 u32 g2h_len = 0; 1084 u32 num_g2h = 0; 1085 int len = 0; 1086 bool extra_submit = false; 1087 1088 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1089 1090 if (!job->restore_replay || job->last_replay) { 1091 if (xe_exec_queue_is_parallel(q)) 1092 wq_item_append(q); 1093 else if (!exec_queue_idle_skip_suspend(q)) 1094 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1095 job->last_replay = false; 1096 } 1097 1098 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1099 return; 1100 1101 /* 1102 * All queues in a multi-queue group will use the primary queue 1103 * of the group to interface with GuC. 1104 */ 1105 q = xe_exec_queue_multi_queue_primary(q); 1106 1107 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1108 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1109 action[len++] = q->guc->id; 1110 action[len++] = GUC_CONTEXT_ENABLE; 1111 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1112 num_g2h = 1; 1113 if (xe_exec_queue_is_parallel(q)) 1114 extra_submit = true; 1115 1116 q->guc->resume_time = RESUME_PENDING; 1117 set_exec_queue_pending_enable(q); 1118 set_exec_queue_enabled(q); 1119 trace_xe_exec_queue_scheduling_enable(q); 1120 } else { 1121 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1122 action[len++] = q->guc->id; 1123 trace_xe_exec_queue_submit(q); 1124 } 1125 1126 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1127 1128 if (extra_submit) { 1129 len = 0; 1130 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1131 action[len++] = q->guc->id; 1132 trace_xe_exec_queue_submit(q); 1133 1134 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1135 } 1136 } 1137 1138 static struct dma_fence * 1139 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1140 { 1141 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1142 struct xe_exec_queue *q = job->q; 1143 struct xe_guc *guc = exec_queue_to_guc(q); 1144 bool killed_or_banned_or_wedged = 1145 exec_queue_killed_or_banned_or_wedged(q); 1146 1147 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1148 exec_queue_banned(q) || exec_queue_suspended(q)); 1149 1150 trace_xe_sched_job_run(job); 1151 1152 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1153 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1154 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1155 1156 if (exec_queue_killed_or_banned_or_wedged(primary)) { 1157 killed_or_banned_or_wedged = true; 1158 goto run_job_out; 1159 } 1160 1161 if (!exec_queue_registered(primary)) 1162 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1163 } 1164 1165 if (!exec_queue_registered(q)) 1166 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1167 if (!job->restore_replay) 1168 q->ring_ops->emit_job(job); 1169 submit_exec_queue(q, job); 1170 job->restore_replay = false; 1171 } 1172 1173 run_job_out: 1174 1175 return job->fence; 1176 } 1177 1178 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1179 { 1180 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1181 1182 trace_xe_sched_job_free(job); 1183 xe_sched_job_put(job); 1184 } 1185 1186 int xe_guc_read_stopped(struct xe_guc *guc) 1187 { 1188 return atomic_read(&guc->submission_state.stopped); 1189 } 1190 1191 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1192 struct xe_exec_queue *q, 1193 u32 runnable_state); 1194 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1195 1196 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1197 u32 action[] = { \ 1198 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1199 q->guc->id, \ 1200 GUC_CONTEXT_##enable_disable, \ 1201 } 1202 1203 static void disable_scheduling_deregister(struct xe_guc *guc, 1204 struct xe_exec_queue *q) 1205 { 1206 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1207 int ret; 1208 1209 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1210 set_min_preemption_timeout(guc, q); 1211 1212 smp_rmb(); 1213 ret = wait_event_timeout(guc->ct.wq, 1214 (!exec_queue_pending_enable(q) && 1215 !exec_queue_pending_disable(q)) || 1216 xe_guc_read_stopped(guc) || 1217 vf_recovery(guc), 1218 HZ * 5); 1219 if (!ret && !vf_recovery(guc)) { 1220 struct xe_gpu_scheduler *sched = &q->guc->sched; 1221 1222 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1223 xe_sched_submission_start(sched); 1224 xe_gt_reset_async(q->gt); 1225 xe_sched_tdr_queue_imm(sched); 1226 return; 1227 } 1228 1229 clear_exec_queue_enabled(q); 1230 set_exec_queue_pending_disable(q); 1231 set_exec_queue_destroyed(q); 1232 trace_xe_exec_queue_scheduling_disable(q); 1233 1234 /* 1235 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1236 * handler and we are not allowed to reserved G2H space in handlers. 1237 */ 1238 if (xe_exec_queue_is_multi_queue_secondary(q)) 1239 handle_multi_queue_secondary_sched_done(guc, q, 0); 1240 else 1241 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1242 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1243 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1244 } 1245 1246 /** 1247 * xe_guc_submit_wedge() - Wedge GuC submission 1248 * @guc: the GuC object 1249 * 1250 * Save exec queue's registered with GuC state by taking a ref to each queue. 1251 * Register a DRMM handler to drop refs upon driver unload. 1252 */ 1253 void xe_guc_submit_wedge(struct xe_guc *guc) 1254 { 1255 struct xe_gt *gt = guc_to_gt(guc); 1256 struct xe_exec_queue *q; 1257 unsigned long index; 1258 int err; 1259 1260 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1261 1262 /* 1263 * If device is being wedged even before submission_state is 1264 * initialized, there's nothing to do here. 1265 */ 1266 if (!guc->submission_state.initialized) 1267 return; 1268 1269 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, 1270 guc_submit_wedged_fini, guc); 1271 if (err) { 1272 xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; " 1273 "Although device is wedged.\n", 1274 xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); 1275 return; 1276 } 1277 1278 mutex_lock(&guc->submission_state.lock); 1279 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1280 if (xe_exec_queue_get_unless_zero(q)) 1281 set_exec_queue_wedged(q); 1282 mutex_unlock(&guc->submission_state.lock); 1283 } 1284 1285 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1286 { 1287 struct xe_device *xe = guc_to_xe(guc); 1288 1289 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 1290 return false; 1291 1292 if (xe_device_wedged(xe)) 1293 return true; 1294 1295 xe_device_declare_wedged(xe); 1296 1297 return true; 1298 } 1299 1300 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1301 1302 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1303 { 1304 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1305 u32 ctx_timestamp, ctx_job_timestamp; 1306 u32 timeout_ms = q->sched_props.job_timeout_ms; 1307 u32 diff; 1308 u64 running_time_ms; 1309 1310 if (!xe_sched_job_started(job)) { 1311 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1312 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1313 q->guc->id); 1314 1315 return xe_sched_invalidate_job(job, 2); 1316 } 1317 1318 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); 1319 if (ctx_timestamp == job->sample_timestamp) { 1320 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1321 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1322 q->guc->id); 1323 1324 return xe_sched_invalidate_job(job, 0); 1325 } 1326 1327 job->sample_timestamp = ctx_timestamp; 1328 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1329 1330 /* 1331 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1332 * possible overflows with a high timeout. 1333 */ 1334 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1335 1336 diff = ctx_timestamp - ctx_job_timestamp; 1337 1338 /* 1339 * Ensure timeout is within 5% to account for an GuC scheduling latency 1340 */ 1341 running_time_ms = 1342 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1343 1344 xe_gt_dbg(gt, 1345 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1346 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1347 q->guc->id, running_time_ms, timeout_ms, diff); 1348 1349 return running_time_ms >= timeout_ms; 1350 } 1351 1352 static void enable_scheduling(struct xe_exec_queue *q) 1353 { 1354 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1355 struct xe_guc *guc = exec_queue_to_guc(q); 1356 int ret; 1357 1358 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1359 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1360 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1361 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1362 1363 set_exec_queue_pending_enable(q); 1364 set_exec_queue_enabled(q); 1365 trace_xe_exec_queue_scheduling_enable(q); 1366 1367 if (xe_exec_queue_is_multi_queue_secondary(q)) 1368 handle_multi_queue_secondary_sched_done(guc, q, 1); 1369 else 1370 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1371 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1372 1373 ret = wait_event_timeout(guc->ct.wq, 1374 !exec_queue_pending_enable(q) || 1375 xe_guc_read_stopped(guc) || 1376 vf_recovery(guc), HZ * 5); 1377 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1378 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1379 set_exec_queue_banned(q); 1380 xe_gt_reset_async(q->gt); 1381 xe_sched_tdr_queue_imm(&q->guc->sched); 1382 } 1383 } 1384 1385 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1386 { 1387 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1388 struct xe_guc *guc = exec_queue_to_guc(q); 1389 1390 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1391 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1392 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1393 1394 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1395 set_min_preemption_timeout(guc, q); 1396 clear_exec_queue_enabled(q); 1397 set_exec_queue_pending_disable(q); 1398 trace_xe_exec_queue_scheduling_disable(q); 1399 1400 if (xe_exec_queue_is_multi_queue_secondary(q)) 1401 handle_multi_queue_secondary_sched_done(guc, q, 0); 1402 else 1403 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1404 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1405 } 1406 1407 static enum drm_gpu_sched_stat 1408 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1409 { 1410 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1411 struct drm_sched_job *tmp_job; 1412 struct xe_exec_queue *q = job->q; 1413 struct xe_gpu_scheduler *sched = &q->guc->sched; 1414 struct xe_guc *guc = exec_queue_to_guc(q); 1415 const char *process_name = "no process"; 1416 struct xe_device *xe = guc_to_xe(guc); 1417 int err = -ETIME; 1418 pid_t pid = -1; 1419 bool wedged = false, skip_timeout_check; 1420 1421 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1422 1423 /* 1424 * TDR has fired before free job worker. Common if exec queue 1425 * immediately closed after last fence signaled. Add back to pending 1426 * list so job can be freed and kick scheduler ensuring free job is not 1427 * lost. 1428 */ 1429 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1430 vf_recovery(guc)) 1431 return DRM_GPU_SCHED_STAT_NO_HANG; 1432 1433 /* Kill the run_job entry point */ 1434 xe_sched_submission_stop(sched); 1435 1436 /* Must check all state after stopping scheduler */ 1437 skip_timeout_check = exec_queue_reset(q) || 1438 exec_queue_killed_or_banned_or_wedged(q); 1439 1440 /* Skip timeout check if multi-queue group is banned */ 1441 if (xe_exec_queue_is_multi_queue(q) && 1442 READ_ONCE(q->multi_queue.group->banned)) 1443 skip_timeout_check = true; 1444 1445 /* LR jobs can only get here if queue has been killed or hit an error */ 1446 if (xe_exec_queue_is_lr(q)) 1447 xe_gt_assert(guc_to_gt(guc), skip_timeout_check); 1448 1449 /* 1450 * FIXME: In multi-queue scenario, the TDR must ensure that the whole 1451 * multi-queue group is off the HW before signaling the fences to avoid 1452 * possible memory corruptions. This means disabling scheduling on the 1453 * primary queue before or during the secondary queue's TDR. Need to 1454 * implement this in least obtrusive way. 1455 */ 1456 1457 /* 1458 * If devcoredump not captured and GuC capture for the job is not ready 1459 * do manual capture first and decide later if we need to use it 1460 */ 1461 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1462 !xe_guc_capture_get_matching_and_lock(q)) { 1463 /* take force wake before engine register manual capture */ 1464 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1465 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1466 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1467 1468 xe_engine_snapshot_capture_for_queue(q); 1469 } 1470 1471 /* 1472 * Check if job is actually timed out, if so restart job execution and TDR 1473 */ 1474 if (!skip_timeout_check && !check_timeout(q, job)) 1475 goto rearm; 1476 1477 if (!exec_queue_killed(q)) 1478 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1479 1480 set_exec_queue_banned(q); 1481 1482 /* Kick job / queue off hardware */ 1483 if (!wedged && (exec_queue_enabled(q) || exec_queue_pending_disable(q))) { 1484 int ret; 1485 1486 if (exec_queue_reset(q)) 1487 err = -EIO; 1488 1489 if (xe_uc_fw_is_running(&guc->fw)) { 1490 /* 1491 * Wait for any pending G2H to flush out before 1492 * modifying state 1493 */ 1494 ret = wait_event_timeout(guc->ct.wq, 1495 (!exec_queue_pending_enable(q) && 1496 !exec_queue_pending_disable(q)) || 1497 xe_guc_read_stopped(guc) || 1498 vf_recovery(guc), HZ * 5); 1499 if (vf_recovery(guc)) 1500 goto handle_vf_resume; 1501 if (!ret || xe_guc_read_stopped(guc)) 1502 goto trigger_reset; 1503 1504 disable_scheduling(q, skip_timeout_check); 1505 } 1506 1507 /* 1508 * Must wait for scheduling to be disabled before signalling 1509 * any fences, if GT broken the GT reset code should signal us. 1510 * 1511 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1512 * error) messages which can cause the schedule disable to get 1513 * lost. If this occurs, trigger a GT reset to recover. 1514 */ 1515 smp_rmb(); 1516 ret = wait_event_timeout(guc->ct.wq, 1517 !xe_uc_fw_is_running(&guc->fw) || 1518 !exec_queue_pending_disable(q) || 1519 xe_guc_read_stopped(guc) || 1520 vf_recovery(guc), HZ * 5); 1521 if (vf_recovery(guc)) 1522 goto handle_vf_resume; 1523 if (!ret || xe_guc_read_stopped(guc)) { 1524 trigger_reset: 1525 if (!ret) 1526 xe_gt_warn(guc_to_gt(guc), 1527 "Schedule disable failed to respond, guc_id=%d", 1528 q->guc->id); 1529 xe_devcoredump(q, job, 1530 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1531 q->guc->id, ret, xe_guc_read_stopped(guc)); 1532 xe_gt_reset_async(q->gt); 1533 xe_sched_tdr_queue_imm(sched); 1534 goto rearm; 1535 } 1536 } 1537 1538 if (q->vm && q->vm->xef) { 1539 process_name = q->vm->xef->process_name; 1540 pid = q->vm->xef->pid; 1541 } 1542 1543 if (!exec_queue_killed(q)) 1544 xe_gt_notice(guc_to_gt(guc), 1545 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1546 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1547 q->guc->id, q->flags, process_name, pid); 1548 1549 trace_xe_sched_job_timedout(job); 1550 1551 if (!exec_queue_killed(q)) 1552 xe_devcoredump(q, job, 1553 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1554 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1555 q->guc->id, q->flags); 1556 1557 /* 1558 * Kernel jobs should never fail, nor should VM jobs if they do 1559 * somethings has gone wrong and the GT needs a reset 1560 */ 1561 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1562 "Kernel-submitted job timed out\n"); 1563 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1564 "VM job timed out on non-killed execqueue\n"); 1565 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1566 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1567 if (!xe_sched_invalidate_job(job, 2)) { 1568 xe_gt_reset_async(q->gt); 1569 goto rearm; 1570 } 1571 } 1572 1573 /* Mark all outstanding jobs as bad, thus completing them */ 1574 xe_sched_job_set_error(job, err); 1575 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) 1576 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); 1577 1578 xe_sched_submission_start(sched); 1579 1580 if (xe_exec_queue_is_multi_queue(q)) 1581 xe_guc_exec_queue_group_trigger_cleanup(q); 1582 else 1583 xe_guc_exec_queue_trigger_cleanup(q); 1584 1585 /* 1586 * We want the job added back to the pending list so it gets freed; this 1587 * is what DRM_GPU_SCHED_STAT_NO_HANG does. 1588 */ 1589 return DRM_GPU_SCHED_STAT_NO_HANG; 1590 1591 rearm: 1592 /* 1593 * XXX: Ideally want to adjust timeout based on current execution time 1594 * but there is not currently an easy way to do in DRM scheduler. With 1595 * some thought, do this in a follow up. 1596 */ 1597 xe_sched_submission_start(sched); 1598 handle_vf_resume: 1599 return DRM_GPU_SCHED_STAT_NO_HANG; 1600 } 1601 1602 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1603 { 1604 struct xe_guc_exec_queue *ge = q->guc; 1605 struct xe_guc *guc = exec_queue_to_guc(q); 1606 1607 release_guc_id(guc, q); 1608 xe_sched_entity_fini(&ge->entity); 1609 xe_sched_fini(&ge->sched); 1610 1611 /* 1612 * RCU free due sched being exported via DRM scheduler fences 1613 * (timeline name). 1614 */ 1615 kfree_rcu(ge, rcu); 1616 } 1617 1618 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1619 { 1620 struct xe_guc_exec_queue *ge = 1621 container_of(w, struct xe_guc_exec_queue, destroy_async); 1622 struct xe_exec_queue *q = ge->q; 1623 struct xe_guc *guc = exec_queue_to_guc(q); 1624 1625 guard(xe_pm_runtime)(guc_to_xe(guc)); 1626 trace_xe_exec_queue_destroy(q); 1627 1628 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1629 struct xe_exec_queue_group *group = q->multi_queue.group; 1630 1631 mutex_lock(&group->list_lock); 1632 list_del(&q->multi_queue.link); 1633 mutex_unlock(&group->list_lock); 1634 } 1635 1636 /* Confirm no work left behind accessing device structures */ 1637 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1638 1639 xe_exec_queue_fini(q); 1640 } 1641 1642 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1643 { 1644 struct xe_guc *guc = exec_queue_to_guc(q); 1645 struct xe_device *xe = guc_to_xe(guc); 1646 1647 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1648 1649 /* We must block on kernel engines so slabs are empty on driver unload */ 1650 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1651 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1652 else 1653 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1654 } 1655 1656 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1657 { 1658 /* 1659 * Might be done from within the GPU scheduler, need to do async as we 1660 * fini the scheduler when the engine is fini'd, the scheduler can't 1661 * complete fini within itself (circular dependency). Async resolves 1662 * this we and don't really care when everything is fini'd, just that it 1663 * is. 1664 */ 1665 guc_exec_queue_destroy_async(q); 1666 } 1667 1668 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1669 { 1670 struct xe_exec_queue *q = msg->private_data; 1671 struct xe_guc *guc = exec_queue_to_guc(q); 1672 1673 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1674 trace_xe_exec_queue_cleanup_entity(q); 1675 1676 /* 1677 * Expected state transitions for cleanup: 1678 * - If the exec queue is registered and GuC firmware is running, we must first 1679 * disable scheduling and deregister the queue to ensure proper teardown and 1680 * resource release in the GuC, then destroy the exec queue on driver side. 1681 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1682 * we cannot expect a response for the deregister request. In this case, 1683 * it is safe to directly destroy the exec queue on driver side, as the GuC 1684 * will not process further requests and all resources must be cleaned up locally. 1685 */ 1686 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1687 disable_scheduling_deregister(guc, q); 1688 else 1689 __guc_exec_queue_destroy(guc, q); 1690 } 1691 1692 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1693 { 1694 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1695 } 1696 1697 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1698 { 1699 struct xe_exec_queue *q = msg->private_data; 1700 struct xe_guc *guc = exec_queue_to_guc(q); 1701 1702 if (guc_exec_queue_allowed_to_change_state(q)) 1703 init_policies(guc, q); 1704 kfree(msg); 1705 } 1706 1707 static void __suspend_fence_signal(struct xe_exec_queue *q) 1708 { 1709 struct xe_guc *guc = exec_queue_to_guc(q); 1710 struct xe_device *xe = guc_to_xe(guc); 1711 1712 if (!q->guc->suspend_pending) 1713 return; 1714 1715 WRITE_ONCE(q->guc->suspend_pending, false); 1716 1717 /* 1718 * We use a GuC shared wait queue for VFs because the VF resfix start 1719 * interrupt must be able to wake all instances of suspend_wait. This 1720 * prevents the VF migration worker from being starved during 1721 * scheduling. 1722 */ 1723 if (IS_SRIOV_VF(xe)) 1724 wake_up_all(&guc->ct.wq); 1725 else 1726 wake_up(&q->guc->suspend_wait); 1727 } 1728 1729 static void suspend_fence_signal(struct xe_exec_queue *q) 1730 { 1731 struct xe_guc *guc = exec_queue_to_guc(q); 1732 1733 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1734 xe_guc_read_stopped(guc)); 1735 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1736 1737 __suspend_fence_signal(q); 1738 } 1739 1740 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1741 { 1742 struct xe_exec_queue *q = msg->private_data; 1743 struct xe_guc *guc = exec_queue_to_guc(q); 1744 bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q); 1745 1746 if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) && 1747 !exec_queue_suspended(q) && exec_queue_enabled(q)) { 1748 wait_event(guc->ct.wq, vf_recovery(guc) || 1749 ((q->guc->resume_time != RESUME_PENDING || 1750 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1751 1752 if (!xe_guc_read_stopped(guc)) { 1753 s64 since_resume_ms = 1754 ktime_ms_delta(ktime_get(), 1755 q->guc->resume_time); 1756 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1757 since_resume_ms; 1758 1759 if (wait_ms > 0 && q->guc->resume_time) 1760 relaxed_ms_sleep(wait_ms); 1761 1762 set_exec_queue_suspended(q); 1763 disable_scheduling(q, false); 1764 } 1765 } else if (q->guc->suspend_pending) { 1766 if (idle_skip_suspend) 1767 set_exec_queue_idle_skip_suspend(q); 1768 set_exec_queue_suspended(q); 1769 suspend_fence_signal(q); 1770 } 1771 } 1772 1773 static void sched_context(struct xe_exec_queue *q) 1774 { 1775 struct xe_guc *guc = exec_queue_to_guc(q); 1776 struct xe_lrc *lrc = q->lrc[0]; 1777 u32 action[] = { 1778 XE_GUC_ACTION_SCHED_CONTEXT, 1779 q->guc->id, 1780 }; 1781 1782 xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q)); 1783 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1784 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1785 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1786 1787 trace_xe_exec_queue_submit(q); 1788 1789 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1790 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 1791 } 1792 1793 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1794 { 1795 struct xe_exec_queue *q = msg->private_data; 1796 1797 if (guc_exec_queue_allowed_to_change_state(q)) { 1798 clear_exec_queue_suspended(q); 1799 if (!exec_queue_enabled(q)) { 1800 if (exec_queue_idle_skip_suspend(q)) { 1801 struct xe_lrc *lrc = q->lrc[0]; 1802 1803 clear_exec_queue_idle_skip_suspend(q); 1804 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1805 } 1806 q->guc->resume_time = RESUME_PENDING; 1807 set_exec_queue_pending_resume(q); 1808 enable_scheduling(q); 1809 } else if (exec_queue_idle_skip_suspend(q)) { 1810 clear_exec_queue_idle_skip_suspend(q); 1811 sched_context(q); 1812 } 1813 } else { 1814 clear_exec_queue_suspended(q); 1815 clear_exec_queue_idle_skip_suspend(q); 1816 } 1817 } 1818 1819 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1820 { 1821 struct xe_exec_queue *q = msg->private_data; 1822 1823 if (guc_exec_queue_allowed_to_change_state(q)) { 1824 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 1825 struct xe_guc *guc = exec_queue_to_guc(q); 1826 struct xe_exec_queue_group *group = q->multi_queue.group; 1827 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 1828 int len = 0; 1829 1830 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 1831 action[len++] = group->primary->guc->id; 1832 1833 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 1834 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 1835 1836 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 1837 } 1838 1839 kfree(msg); 1840 } 1841 1842 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1843 #define SET_SCHED_PROPS 2 1844 #define SUSPEND 3 1845 #define RESUME 4 1846 #define SET_MULTI_QUEUE_PRIORITY 5 1847 #define OPCODE_MASK 0xf 1848 #define MSG_LOCKED BIT(8) 1849 #define MSG_HEAD BIT(9) 1850 1851 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1852 { 1853 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1854 1855 trace_xe_sched_msg_recv(msg); 1856 1857 switch (msg->opcode) { 1858 case CLEANUP: 1859 __guc_exec_queue_process_msg_cleanup(msg); 1860 break; 1861 case SET_SCHED_PROPS: 1862 __guc_exec_queue_process_msg_set_sched_props(msg); 1863 break; 1864 case SUSPEND: 1865 __guc_exec_queue_process_msg_suspend(msg); 1866 break; 1867 case RESUME: 1868 __guc_exec_queue_process_msg_resume(msg); 1869 break; 1870 case SET_MULTI_QUEUE_PRIORITY: 1871 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1872 break; 1873 default: 1874 XE_WARN_ON("Unknown message type"); 1875 } 1876 1877 xe_pm_runtime_put(xe); 1878 } 1879 1880 static const struct drm_sched_backend_ops drm_sched_ops = { 1881 .run_job = guc_exec_queue_run_job, 1882 .free_job = guc_exec_queue_free_job, 1883 .timedout_job = guc_exec_queue_timedout_job, 1884 }; 1885 1886 static const struct xe_sched_backend_ops xe_sched_ops = { 1887 .process_msg = guc_exec_queue_process_msg, 1888 }; 1889 1890 static int guc_exec_queue_init(struct xe_exec_queue *q) 1891 { 1892 struct xe_gpu_scheduler *sched; 1893 struct xe_guc *guc = exec_queue_to_guc(q); 1894 struct workqueue_struct *submit_wq = NULL; 1895 struct xe_guc_exec_queue *ge; 1896 long timeout; 1897 int err, i; 1898 1899 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1900 1901 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1902 if (!ge) 1903 return -ENOMEM; 1904 1905 q->guc = ge; 1906 ge->q = q; 1907 init_rcu_head(&ge->rcu); 1908 init_waitqueue_head(&ge->suspend_wait); 1909 1910 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1911 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1912 1913 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1914 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1915 1916 /* 1917 * Use primary queue's submit_wq for all secondary queues of a 1918 * multi queue group. This serialization avoids any locking around 1919 * CGP synchronization with GuC. 1920 */ 1921 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1922 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1923 1924 submit_wq = primary->guc->sched.base.submit_wq; 1925 } 1926 1927 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1928 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1929 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1930 q->name, gt_to_xe(q->gt)->drm.dev); 1931 if (err) 1932 goto err_free; 1933 1934 sched = &ge->sched; 1935 err = xe_sched_entity_init(&ge->entity, sched); 1936 if (err) 1937 goto err_sched; 1938 1939 mutex_lock(&guc->submission_state.lock); 1940 1941 err = alloc_guc_id(guc, q); 1942 if (err) 1943 goto err_entity; 1944 1945 q->entity = &ge->entity; 1946 1947 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 1948 xe_sched_stop(sched); 1949 1950 mutex_unlock(&guc->submission_state.lock); 1951 1952 xe_exec_queue_assign_name(q, q->guc->id); 1953 1954 /* 1955 * Maintain secondary queues of the multi queue group in a list 1956 * for handling dependencies across the queues in the group. 1957 */ 1958 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1959 struct xe_exec_queue_group *group = q->multi_queue.group; 1960 1961 INIT_LIST_HEAD(&q->multi_queue.link); 1962 mutex_lock(&group->list_lock); 1963 list_add_tail(&q->multi_queue.link, &group->list); 1964 mutex_unlock(&group->list_lock); 1965 } 1966 1967 if (xe_exec_queue_is_multi_queue(q)) 1968 trace_xe_exec_queue_create_multi_queue(q); 1969 else 1970 trace_xe_exec_queue_create(q); 1971 1972 return 0; 1973 1974 err_entity: 1975 mutex_unlock(&guc->submission_state.lock); 1976 xe_sched_entity_fini(&ge->entity); 1977 err_sched: 1978 xe_sched_fini(&ge->sched); 1979 err_free: 1980 kfree(ge); 1981 1982 return err; 1983 } 1984 1985 static void guc_exec_queue_kill(struct xe_exec_queue *q) 1986 { 1987 trace_xe_exec_queue_kill(q); 1988 set_exec_queue_killed(q); 1989 __suspend_fence_signal(q); 1990 xe_guc_exec_queue_trigger_cleanup(q); 1991 } 1992 1993 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 1994 u32 opcode) 1995 { 1996 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 1997 1998 INIT_LIST_HEAD(&msg->link); 1999 msg->opcode = opcode & OPCODE_MASK; 2000 msg->private_data = q; 2001 2002 trace_xe_sched_msg_add(msg); 2003 if (opcode & MSG_HEAD) 2004 xe_sched_add_msg_head(&q->guc->sched, msg); 2005 else if (opcode & MSG_LOCKED) 2006 xe_sched_add_msg_locked(&q->guc->sched, msg); 2007 else 2008 xe_sched_add_msg(&q->guc->sched, msg); 2009 } 2010 2011 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2012 struct xe_sched_msg *msg, 2013 u32 opcode) 2014 { 2015 if (!list_empty(&msg->link)) 2016 return; 2017 2018 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2019 } 2020 2021 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2022 struct xe_sched_msg *msg, 2023 u32 opcode) 2024 { 2025 if (!list_empty(&msg->link)) 2026 return false; 2027 2028 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2029 2030 return true; 2031 } 2032 2033 #define STATIC_MSG_CLEANUP 0 2034 #define STATIC_MSG_SUSPEND 1 2035 #define STATIC_MSG_RESUME 2 2036 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2037 { 2038 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2039 2040 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2041 guc_exec_queue_add_msg(q, msg, CLEANUP); 2042 else 2043 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2044 } 2045 2046 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2047 enum xe_exec_queue_priority priority) 2048 { 2049 struct xe_sched_msg *msg; 2050 2051 if (q->sched_props.priority == priority || 2052 exec_queue_killed_or_banned_or_wedged(q)) 2053 return 0; 2054 2055 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 2056 if (!msg) 2057 return -ENOMEM; 2058 2059 q->sched_props.priority = priority; 2060 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2061 2062 return 0; 2063 } 2064 2065 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2066 { 2067 struct xe_sched_msg *msg; 2068 2069 if (q->sched_props.timeslice_us == timeslice_us || 2070 exec_queue_killed_or_banned_or_wedged(q)) 2071 return 0; 2072 2073 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 2074 if (!msg) 2075 return -ENOMEM; 2076 2077 q->sched_props.timeslice_us = timeslice_us; 2078 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2079 2080 return 0; 2081 } 2082 2083 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2084 u32 preempt_timeout_us) 2085 { 2086 struct xe_sched_msg *msg; 2087 2088 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2089 exec_queue_killed_or_banned_or_wedged(q)) 2090 return 0; 2091 2092 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 2093 if (!msg) 2094 return -ENOMEM; 2095 2096 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2097 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2098 2099 return 0; 2100 } 2101 2102 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2103 enum xe_multi_queue_priority priority) 2104 { 2105 struct xe_sched_msg *msg; 2106 2107 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2108 2109 if (q->multi_queue.priority == priority || 2110 exec_queue_killed_or_banned_or_wedged(q)) 2111 return 0; 2112 2113 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 2114 if (!msg) 2115 return -ENOMEM; 2116 2117 q->multi_queue.priority = priority; 2118 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2119 2120 return 0; 2121 } 2122 2123 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2124 { 2125 struct xe_gpu_scheduler *sched = &q->guc->sched; 2126 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2127 2128 if (exec_queue_killed_or_banned_or_wedged(q)) 2129 return -EINVAL; 2130 2131 xe_sched_msg_lock(sched); 2132 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2133 q->guc->suspend_pending = true; 2134 xe_sched_msg_unlock(sched); 2135 2136 return 0; 2137 } 2138 2139 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2140 { 2141 struct xe_guc *guc = exec_queue_to_guc(q); 2142 struct xe_device *xe = guc_to_xe(guc); 2143 int ret; 2144 2145 /* 2146 * Likely don't need to check exec_queue_killed() as we clear 2147 * suspend_pending upon kill but to be paranoid but races in which 2148 * suspend_pending is set after kill also check kill here. 2149 */ 2150 #define WAIT_COND \ 2151 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2152 xe_guc_read_stopped(guc)) 2153 2154 retry: 2155 if (IS_SRIOV_VF(xe)) 2156 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2157 vf_recovery(guc), 2158 HZ * 5); 2159 else 2160 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2161 WAIT_COND, HZ * 5); 2162 2163 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2164 return -EAGAIN; 2165 2166 if (!ret) { 2167 xe_gt_warn(guc_to_gt(guc), 2168 "Suspend fence, guc_id=%d, failed to respond", 2169 q->guc->id); 2170 /* XXX: Trigger GT reset? */ 2171 return -ETIME; 2172 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2173 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2174 goto retry; 2175 } 2176 2177 #undef WAIT_COND 2178 2179 return ret < 0 ? ret : 0; 2180 } 2181 2182 static void guc_exec_queue_resume(struct xe_exec_queue *q) 2183 { 2184 struct xe_gpu_scheduler *sched = &q->guc->sched; 2185 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2186 struct xe_guc *guc = exec_queue_to_guc(q); 2187 2188 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2189 2190 xe_sched_msg_lock(sched); 2191 guc_exec_queue_try_add_msg(q, msg, RESUME); 2192 xe_sched_msg_unlock(sched); 2193 } 2194 2195 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2196 { 2197 if (xe_exec_queue_is_multi_queue_secondary(q) && 2198 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2199 return true; 2200 2201 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2202 } 2203 2204 /* 2205 * All of these functions are an abstraction layer which other parts of Xe can 2206 * use to trap into the GuC backend. All of these functions, aside from init, 2207 * really shouldn't do much other than trap into the DRM scheduler which 2208 * synchronizes these operations. 2209 */ 2210 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2211 .init = guc_exec_queue_init, 2212 .kill = guc_exec_queue_kill, 2213 .fini = guc_exec_queue_fini, 2214 .destroy = guc_exec_queue_destroy, 2215 .set_priority = guc_exec_queue_set_priority, 2216 .set_timeslice = guc_exec_queue_set_timeslice, 2217 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2218 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2219 .suspend = guc_exec_queue_suspend, 2220 .suspend_wait = guc_exec_queue_suspend_wait, 2221 .resume = guc_exec_queue_resume, 2222 .reset_status = guc_exec_queue_reset_status, 2223 }; 2224 2225 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2226 { 2227 struct xe_gpu_scheduler *sched = &q->guc->sched; 2228 2229 /* Stop scheduling + flush any DRM scheduler operations */ 2230 xe_sched_submission_stop(sched); 2231 2232 /* Clean up lost G2H + reset engine state */ 2233 if (exec_queue_registered(q)) { 2234 if (exec_queue_destroyed(q)) 2235 __guc_exec_queue_destroy(guc, q); 2236 } 2237 if (q->guc->suspend_pending) { 2238 set_exec_queue_suspended(q); 2239 suspend_fence_signal(q); 2240 } 2241 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2242 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2243 EXEC_QUEUE_STATE_SUSPENDED, 2244 &q->guc->state); 2245 q->guc->resume_time = 0; 2246 trace_xe_exec_queue_stop(q); 2247 2248 /* 2249 * Ban any engine (aside from kernel and engines used for VM ops) with a 2250 * started but not complete job or if a job has gone through a GT reset 2251 * more than twice. 2252 */ 2253 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2254 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2255 bool ban = false; 2256 2257 if (job) { 2258 if ((xe_sched_job_started(job) && 2259 !xe_sched_job_completed(job)) || 2260 xe_sched_invalidate_job(job, 2)) { 2261 trace_xe_sched_job_ban(job); 2262 ban = true; 2263 } 2264 } 2265 2266 if (ban) { 2267 set_exec_queue_banned(q); 2268 xe_guc_exec_queue_trigger_cleanup(q); 2269 } 2270 } 2271 } 2272 2273 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2274 { 2275 int ret; 2276 2277 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2278 return 0; 2279 2280 if (!guc->submission_state.initialized) 2281 return 0; 2282 2283 /* 2284 * Using an atomic here rather than submission_state.lock as this 2285 * function can be called while holding the CT lock (engine reset 2286 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2287 * Atomic is not ideal, but it works to prevent against concurrent reset 2288 * and releasing any TDRs waiting on guc->submission_state.stopped. 2289 */ 2290 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2291 smp_wmb(); 2292 wake_up_all(&guc->ct.wq); 2293 2294 return ret; 2295 } 2296 2297 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2298 { 2299 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2300 !xe_guc_read_stopped(guc)); 2301 } 2302 2303 void xe_guc_submit_stop(struct xe_guc *guc) 2304 { 2305 struct xe_exec_queue *q; 2306 unsigned long index; 2307 2308 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2309 2310 mutex_lock(&guc->submission_state.lock); 2311 2312 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2313 /* Prevent redundant attempts to stop parallel queues */ 2314 if (q->guc->id != index) 2315 continue; 2316 2317 guc_exec_queue_stop(guc, q); 2318 } 2319 2320 mutex_unlock(&guc->submission_state.lock); 2321 2322 /* 2323 * No one can enter the backend at this point, aside from new engine 2324 * creation which is protected by guc->submission_state.lock. 2325 */ 2326 2327 } 2328 2329 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2330 struct xe_exec_queue *q) 2331 { 2332 bool pending_enable, pending_disable, pending_resume; 2333 2334 pending_enable = exec_queue_pending_enable(q); 2335 pending_resume = exec_queue_pending_resume(q); 2336 2337 if (pending_enable && pending_resume) { 2338 q->guc->needs_resume = true; 2339 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2340 q->guc->id); 2341 } 2342 2343 if (pending_enable && !pending_resume) { 2344 clear_exec_queue_registered(q); 2345 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2346 q->guc->id); 2347 } 2348 2349 if (pending_enable) { 2350 clear_exec_queue_enabled(q); 2351 clear_exec_queue_pending_resume(q); 2352 clear_exec_queue_pending_enable(q); 2353 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2354 q->guc->id); 2355 } 2356 2357 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2358 clear_exec_queue_destroyed(q); 2359 q->guc->needs_cleanup = true; 2360 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2361 q->guc->id); 2362 } 2363 2364 pending_disable = exec_queue_pending_disable(q); 2365 2366 if (pending_disable && exec_queue_suspended(q)) { 2367 clear_exec_queue_suspended(q); 2368 q->guc->needs_suspend = true; 2369 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2370 q->guc->id); 2371 } 2372 2373 if (pending_disable) { 2374 if (!pending_enable) 2375 set_exec_queue_enabled(q); 2376 clear_exec_queue_pending_disable(q); 2377 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2378 q->guc->id); 2379 } 2380 2381 q->guc->resume_time = 0; 2382 } 2383 2384 static void lrc_parallel_clear(struct xe_lrc *lrc) 2385 { 2386 struct xe_device *xe = gt_to_xe(lrc->gt); 2387 struct iosys_map map = xe_lrc_parallel_map(lrc); 2388 int i; 2389 2390 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2391 parallel_write(xe, map, wq[i], 2392 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2393 FIELD_PREP(WQ_LEN_MASK, 0)); 2394 } 2395 2396 /* 2397 * This function is quite complex but only real way to ensure no state is lost 2398 * during VF resume flows. The function scans the queue state, make adjustments 2399 * as needed, and queues jobs / messages which replayed upon unpause. 2400 */ 2401 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2402 { 2403 struct xe_gpu_scheduler *sched = &q->guc->sched; 2404 struct xe_sched_job *job; 2405 int i; 2406 2407 lockdep_assert_held(&guc->submission_state.lock); 2408 2409 /* Stop scheduling + flush any DRM scheduler operations */ 2410 xe_sched_submission_stop(sched); 2411 cancel_delayed_work_sync(&sched->base.work_tdr); 2412 2413 guc_exec_queue_revert_pending_state_change(guc, q); 2414 2415 if (xe_exec_queue_is_parallel(q)) { 2416 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2417 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2418 2419 /* 2420 * NOP existing WQ commands that may contain stale GGTT 2421 * addresses. These will be replayed upon unpause. The hardware 2422 * seems to get confused if the WQ head/tail pointers are 2423 * adjusted. 2424 */ 2425 if (lrc) 2426 lrc_parallel_clear(lrc); 2427 } 2428 2429 job = xe_sched_first_pending_job(sched); 2430 if (job) { 2431 job->restore_replay = true; 2432 2433 /* 2434 * Adjust software tail so jobs submitted overwrite previous 2435 * position in ring buffer with new GGTT addresses. 2436 */ 2437 for (i = 0; i < q->width; ++i) 2438 q->lrc[i]->ring.tail = job->ptrs[i].head; 2439 } 2440 } 2441 2442 /** 2443 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2444 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2445 */ 2446 void xe_guc_submit_pause(struct xe_guc *guc) 2447 { 2448 struct xe_exec_queue *q; 2449 unsigned long index; 2450 2451 mutex_lock(&guc->submission_state.lock); 2452 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2453 xe_sched_submission_stop(&q->guc->sched); 2454 mutex_unlock(&guc->submission_state.lock); 2455 } 2456 2457 /** 2458 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2459 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2460 */ 2461 void xe_guc_submit_pause_vf(struct xe_guc *guc) 2462 { 2463 struct xe_exec_queue *q; 2464 unsigned long index; 2465 2466 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2467 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2468 2469 mutex_lock(&guc->submission_state.lock); 2470 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2471 /* Prevent redundant attempts to stop parallel queues */ 2472 if (q->guc->id != index) 2473 continue; 2474 2475 guc_exec_queue_pause(guc, q); 2476 } 2477 mutex_unlock(&guc->submission_state.lock); 2478 } 2479 2480 static void guc_exec_queue_start(struct xe_exec_queue *q) 2481 { 2482 struct xe_gpu_scheduler *sched = &q->guc->sched; 2483 2484 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2485 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2486 int i; 2487 2488 trace_xe_exec_queue_resubmit(q); 2489 if (job) { 2490 for (i = 0; i < q->width; ++i) { 2491 /* 2492 * The GuC context is unregistered at this point 2493 * time, adjusting software ring tail ensures 2494 * jobs are rewritten in original placement, 2495 * adjusting LRC tail ensures the newly loaded 2496 * GuC / contexts only view the LRC tail 2497 * increasing as jobs are written out. 2498 */ 2499 q->lrc[i]->ring.tail = job->ptrs[i].head; 2500 xe_lrc_set_ring_tail(q->lrc[i], 2501 xe_lrc_ring_head(q->lrc[i])); 2502 } 2503 } 2504 xe_sched_resubmit_jobs(sched); 2505 } 2506 2507 xe_sched_submission_start(sched); 2508 xe_sched_submission_resume_tdr(sched); 2509 } 2510 2511 int xe_guc_submit_start(struct xe_guc *guc) 2512 { 2513 struct xe_exec_queue *q; 2514 unsigned long index; 2515 2516 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2517 2518 mutex_lock(&guc->submission_state.lock); 2519 atomic_dec(&guc->submission_state.stopped); 2520 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2521 /* Prevent redundant attempts to start parallel queues */ 2522 if (q->guc->id != index) 2523 continue; 2524 2525 guc_exec_queue_start(q); 2526 } 2527 mutex_unlock(&guc->submission_state.lock); 2528 2529 wake_up_all(&guc->ct.wq); 2530 2531 return 0; 2532 } 2533 2534 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2535 struct xe_exec_queue *q) 2536 { 2537 struct xe_gpu_scheduler *sched = &q->guc->sched; 2538 struct xe_sched_job *job = NULL; 2539 struct drm_sched_job *s_job; 2540 bool restore_replay = false; 2541 2542 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) { 2543 job = to_xe_sched_job(s_job); 2544 restore_replay |= job->restore_replay; 2545 if (restore_replay) { 2546 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2547 q->guc->id, xe_sched_job_seqno(job)); 2548 2549 q->ring_ops->emit_job(job); 2550 job->restore_replay = true; 2551 } 2552 } 2553 2554 if (job) 2555 job->last_replay = true; 2556 } 2557 2558 /** 2559 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2560 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2561 */ 2562 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2563 { 2564 struct xe_exec_queue *q; 2565 unsigned long index; 2566 2567 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2568 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2569 2570 mutex_lock(&guc->submission_state.lock); 2571 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2572 /* Prevent redundant attempts to stop parallel queues */ 2573 if (q->guc->id != index) 2574 continue; 2575 2576 guc_exec_queue_unpause_prepare(guc, q); 2577 } 2578 mutex_unlock(&guc->submission_state.lock); 2579 } 2580 2581 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2582 { 2583 struct xe_gpu_scheduler *sched = &q->guc->sched; 2584 struct xe_sched_msg *msg; 2585 2586 if (q->guc->needs_cleanup) { 2587 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2588 2589 guc_exec_queue_add_msg(q, msg, CLEANUP); 2590 q->guc->needs_cleanup = false; 2591 } 2592 2593 if (q->guc->needs_suspend) { 2594 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2595 2596 xe_sched_msg_lock(sched); 2597 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2598 xe_sched_msg_unlock(sched); 2599 2600 q->guc->needs_suspend = false; 2601 } 2602 2603 /* 2604 * The resume must be in the message queue before the suspend as it is 2605 * not possible for a resume to be issued if a suspend pending is, but 2606 * the inverse is possible. 2607 */ 2608 if (q->guc->needs_resume) { 2609 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2610 2611 xe_sched_msg_lock(sched); 2612 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2613 xe_sched_msg_unlock(sched); 2614 2615 q->guc->needs_resume = false; 2616 } 2617 } 2618 2619 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2620 { 2621 struct xe_gpu_scheduler *sched = &q->guc->sched; 2622 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2623 2624 lockdep_assert_held(&guc->submission_state.lock); 2625 2626 xe_sched_resubmit_jobs(sched); 2627 guc_exec_queue_replay_pending_state_change(q); 2628 xe_sched_submission_start(sched); 2629 if (needs_tdr) 2630 xe_guc_exec_queue_trigger_cleanup(q); 2631 xe_sched_submission_resume_tdr(sched); 2632 } 2633 2634 /** 2635 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2636 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2637 */ 2638 void xe_guc_submit_unpause(struct xe_guc *guc) 2639 { 2640 struct xe_exec_queue *q; 2641 unsigned long index; 2642 2643 mutex_lock(&guc->submission_state.lock); 2644 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2645 xe_sched_submission_start(&q->guc->sched); 2646 mutex_unlock(&guc->submission_state.lock); 2647 } 2648 2649 /** 2650 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2651 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2652 */ 2653 void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2654 { 2655 struct xe_exec_queue *q; 2656 unsigned long index; 2657 2658 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2659 2660 mutex_lock(&guc->submission_state.lock); 2661 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2662 /* 2663 * Prevent redundant attempts to stop parallel queues, or queues 2664 * created after resfix done. 2665 */ 2666 if (q->guc->id != index || 2667 !drm_sched_is_stopped(&q->guc->sched.base)) 2668 continue; 2669 2670 guc_exec_queue_unpause(guc, q); 2671 } 2672 mutex_unlock(&guc->submission_state.lock); 2673 } 2674 2675 /** 2676 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2677 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2678 */ 2679 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2680 { 2681 struct xe_exec_queue *q; 2682 unsigned long index; 2683 2684 mutex_lock(&guc->submission_state.lock); 2685 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2686 struct xe_gpu_scheduler *sched = &q->guc->sched; 2687 2688 /* Prevent redundant attempts to stop parallel queues */ 2689 if (q->guc->id != index) 2690 continue; 2691 2692 xe_sched_submission_start(sched); 2693 if (exec_queue_killed_or_banned_or_wedged(q)) 2694 xe_guc_exec_queue_trigger_cleanup(q); 2695 } 2696 mutex_unlock(&guc->submission_state.lock); 2697 } 2698 2699 static struct xe_exec_queue * 2700 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2701 { 2702 struct xe_gt *gt = guc_to_gt(guc); 2703 struct xe_exec_queue *q; 2704 2705 if (unlikely(guc_id >= GUC_ID_MAX)) { 2706 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2707 return NULL; 2708 } 2709 2710 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2711 if (unlikely(!q)) { 2712 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2713 return NULL; 2714 } 2715 2716 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2717 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2718 2719 return q; 2720 } 2721 2722 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2723 { 2724 u32 action[] = { 2725 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2726 q->guc->id, 2727 }; 2728 2729 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2730 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2731 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2732 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2733 2734 trace_xe_exec_queue_deregister(q); 2735 2736 if (xe_exec_queue_is_multi_queue_secondary(q)) 2737 handle_deregister_done(guc, q); 2738 else 2739 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2740 ARRAY_SIZE(action)); 2741 } 2742 2743 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2744 u32 runnable_state) 2745 { 2746 trace_xe_exec_queue_scheduling_done(q); 2747 2748 if (runnable_state == 1) { 2749 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2750 2751 q->guc->resume_time = ktime_get(); 2752 clear_exec_queue_pending_resume(q); 2753 clear_exec_queue_pending_enable(q); 2754 smp_wmb(); 2755 wake_up_all(&guc->ct.wq); 2756 } else { 2757 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2758 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2759 2760 if (q->guc->suspend_pending) { 2761 suspend_fence_signal(q); 2762 clear_exec_queue_pending_disable(q); 2763 } else { 2764 if (exec_queue_banned(q)) { 2765 smp_wmb(); 2766 wake_up_all(&guc->ct.wq); 2767 } 2768 if (exec_queue_destroyed(q)) { 2769 /* 2770 * Make sure to clear the pending_disable only 2771 * after sampling the destroyed state. We want 2772 * to ensure we don't trigger the unregister too 2773 * early with something intending to only 2774 * disable scheduling. The caller doing the 2775 * destroy must wait for an ongoing 2776 * pending_disable before marking as destroyed. 2777 */ 2778 clear_exec_queue_pending_disable(q); 2779 deregister_exec_queue(guc, q); 2780 } else { 2781 clear_exec_queue_pending_disable(q); 2782 } 2783 } 2784 } 2785 } 2786 2787 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2788 struct xe_exec_queue *q, 2789 u32 runnable_state) 2790 { 2791 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2792 mutex_lock(&guc->ct.lock); 2793 handle_sched_done(guc, q, runnable_state); 2794 mutex_unlock(&guc->ct.lock); 2795 } 2796 2797 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2798 { 2799 struct xe_exec_queue *q; 2800 u32 guc_id, runnable_state; 2801 2802 if (unlikely(len < 2)) 2803 return -EPROTO; 2804 2805 guc_id = msg[0]; 2806 runnable_state = msg[1]; 2807 2808 q = g2h_exec_queue_lookup(guc, guc_id); 2809 if (unlikely(!q)) 2810 return -EPROTO; 2811 2812 if (unlikely(!exec_queue_pending_enable(q) && 2813 !exec_queue_pending_disable(q))) { 2814 xe_gt_err(guc_to_gt(guc), 2815 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2816 atomic_read(&q->guc->state), q->guc->id, 2817 runnable_state); 2818 return -EPROTO; 2819 } 2820 2821 handle_sched_done(guc, q, runnable_state); 2822 2823 return 0; 2824 } 2825 2826 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2827 { 2828 trace_xe_exec_queue_deregister_done(q); 2829 2830 clear_exec_queue_registered(q); 2831 __guc_exec_queue_destroy(guc, q); 2832 } 2833 2834 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2835 { 2836 struct xe_exec_queue *q; 2837 u32 guc_id; 2838 2839 if (unlikely(len < 1)) 2840 return -EPROTO; 2841 2842 guc_id = msg[0]; 2843 2844 q = g2h_exec_queue_lookup(guc, guc_id); 2845 if (unlikely(!q)) 2846 return -EPROTO; 2847 2848 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2849 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2850 xe_gt_err(guc_to_gt(guc), 2851 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2852 atomic_read(&q->guc->state), q->guc->id); 2853 return -EPROTO; 2854 } 2855 2856 handle_deregister_done(guc, q); 2857 2858 return 0; 2859 } 2860 2861 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2862 { 2863 struct xe_gt *gt = guc_to_gt(guc); 2864 struct xe_exec_queue *q; 2865 u32 guc_id; 2866 2867 if (unlikely(len < 1)) 2868 return -EPROTO; 2869 2870 guc_id = msg[0]; 2871 2872 q = g2h_exec_queue_lookup(guc, guc_id); 2873 if (unlikely(!q)) 2874 return -EPROTO; 2875 2876 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", 2877 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, 2878 atomic_read(&q->guc->state)); 2879 2880 trace_xe_exec_queue_reset(q); 2881 2882 /* 2883 * A banned engine is a NOP at this point (came from 2884 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2885 * jobs by setting timeout of the job to the minimum value kicking 2886 * guc_exec_queue_timedout_job. 2887 */ 2888 xe_guc_exec_queue_reset_trigger_cleanup(q); 2889 2890 return 0; 2891 } 2892 2893 /* 2894 * xe_guc_error_capture_handler - Handler of GuC captured message 2895 * @guc: The GuC object 2896 * @msg: Point to the message 2897 * @len: The message length 2898 * 2899 * When GuC captured data is ready, GuC will send message 2900 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2901 * called 1st to check status before process the data comes with the message. 2902 * 2903 * Returns: error code. 0 if success 2904 */ 2905 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 2906 { 2907 u32 status; 2908 2909 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2910 return -EPROTO; 2911 2912 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2913 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 2914 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 2915 2916 xe_guc_capture_process(guc); 2917 2918 return 0; 2919 } 2920 2921 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 2922 u32 len) 2923 { 2924 struct xe_gt *gt = guc_to_gt(guc); 2925 struct xe_exec_queue *q; 2926 u32 guc_id; 2927 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 2928 2929 if (unlikely(!len || len > 2)) 2930 return -EPROTO; 2931 2932 guc_id = msg[0]; 2933 2934 if (len == 2) 2935 type = msg[1]; 2936 2937 if (guc_id == GUC_ID_UNKNOWN) { 2938 /* 2939 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 2940 * context. In such case only PF will be notified about that fault. 2941 */ 2942 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 2943 return 0; 2944 } 2945 2946 q = g2h_exec_queue_lookup(guc, guc_id); 2947 if (unlikely(!q)) 2948 return -EPROTO; 2949 2950 /* 2951 * The type is HW-defined and changes based on platform, so we don't 2952 * decode it in the kernel and only check if it is valid. 2953 * See bspec 54047 and 72187 for details. 2954 */ 2955 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 2956 xe_gt_info(gt, 2957 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 2958 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2959 else 2960 xe_gt_info(gt, 2961 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 2962 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2963 2964 trace_xe_exec_queue_memory_cat_error(q); 2965 2966 /* Treat the same as engine reset */ 2967 xe_guc_exec_queue_reset_trigger_cleanup(q); 2968 2969 return 0; 2970 } 2971 2972 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 2973 { 2974 struct xe_gt *gt = guc_to_gt(guc); 2975 u8 guc_class, instance; 2976 u32 reason; 2977 2978 if (unlikely(len != 3)) 2979 return -EPROTO; 2980 2981 guc_class = msg[0]; 2982 instance = msg[1]; 2983 reason = msg[2]; 2984 2985 /* Unexpected failure of a hardware feature, log an actual error */ 2986 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 2987 guc_class, instance, reason); 2988 2989 xe_gt_reset_async(gt); 2990 2991 return 0; 2992 } 2993 2994 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, 2995 u32 len) 2996 { 2997 struct xe_gt *gt = guc_to_gt(guc); 2998 struct xe_device *xe = guc_to_xe(guc); 2999 struct xe_exec_queue *q; 3000 u32 guc_id = msg[2]; 3001 3002 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { 3003 drm_err(&xe->drm, "Invalid length %u", len); 3004 return -EPROTO; 3005 } 3006 3007 q = g2h_exec_queue_lookup(guc, guc_id); 3008 if (unlikely(!q)) 3009 return -EPROTO; 3010 3011 xe_gt_dbg(gt, 3012 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", 3013 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); 3014 3015 trace_xe_exec_queue_cgp_context_error(q); 3016 3017 /* Treat the same as engine reset */ 3018 xe_guc_exec_queue_reset_trigger_cleanup(q); 3019 3020 return 0; 3021 } 3022 3023 /** 3024 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3025 * @guc: guc 3026 * @msg: message indicating CGP sync done 3027 * @len: length of message 3028 * 3029 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3030 * for CGP synchronization to complete. 3031 * 3032 * Return: 0 on success, -EPROTO for malformed messages. 3033 */ 3034 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3035 { 3036 struct xe_device *xe = guc_to_xe(guc); 3037 struct xe_exec_queue *q; 3038 u32 guc_id = msg[0]; 3039 3040 if (unlikely(len < 1)) { 3041 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3042 return -EPROTO; 3043 } 3044 3045 q = g2h_exec_queue_lookup(guc, guc_id); 3046 if (unlikely(!q)) 3047 return -EPROTO; 3048 3049 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3050 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3051 return -EPROTO; 3052 } 3053 3054 /* Wakeup the serialized cgp update wait */ 3055 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3056 xe_guc_ct_wake_waiters(&guc->ct); 3057 3058 return 0; 3059 } 3060 3061 static void 3062 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3063 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3064 { 3065 struct xe_guc *guc = exec_queue_to_guc(q); 3066 struct xe_device *xe = guc_to_xe(guc); 3067 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3068 int i; 3069 3070 snapshot->guc.wqi_head = q->guc->wqi_head; 3071 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3072 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3073 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3074 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3075 wq_desc.wq_status); 3076 3077 if (snapshot->parallel.wq_desc.head != 3078 snapshot->parallel.wq_desc.tail) { 3079 for (i = snapshot->parallel.wq_desc.head; 3080 i != snapshot->parallel.wq_desc.tail; 3081 i = (i + sizeof(u32)) % WQ_SIZE) 3082 snapshot->parallel.wq[i / sizeof(u32)] = 3083 parallel_read(xe, map, wq[i / sizeof(u32)]); 3084 } 3085 } 3086 3087 static void 3088 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3089 struct drm_printer *p) 3090 { 3091 int i; 3092 3093 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3094 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3095 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3096 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3097 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3098 3099 if (snapshot->parallel.wq_desc.head != 3100 snapshot->parallel.wq_desc.tail) { 3101 for (i = snapshot->parallel.wq_desc.head; 3102 i != snapshot->parallel.wq_desc.tail; 3103 i = (i + sizeof(u32)) % WQ_SIZE) 3104 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3105 snapshot->parallel.wq[i / sizeof(u32)]); 3106 } 3107 } 3108 3109 /** 3110 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3111 * @q: faulty exec queue 3112 * 3113 * This can be printed out in a later stage like during dev_coredump 3114 * analysis. 3115 * 3116 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3117 * caller, using `xe_guc_exec_queue_snapshot_free`. 3118 */ 3119 struct xe_guc_submit_exec_queue_snapshot * 3120 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3121 { 3122 struct xe_gpu_scheduler *sched = &q->guc->sched; 3123 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3124 int i; 3125 3126 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 3127 3128 if (!snapshot) 3129 return NULL; 3130 3131 snapshot->guc.id = q->guc->id; 3132 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3133 snapshot->class = q->class; 3134 snapshot->logical_mask = q->logical_mask; 3135 snapshot->width = q->width; 3136 snapshot->refcount = kref_read(&q->refcount); 3137 snapshot->sched_timeout = sched->base.timeout; 3138 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3139 snapshot->sched_props.preempt_timeout_us = 3140 q->sched_props.preempt_timeout_us; 3141 3142 snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *), 3143 GFP_ATOMIC); 3144 3145 if (snapshot->lrc) { 3146 for (i = 0; i < q->width; ++i) { 3147 struct xe_lrc *lrc = q->lrc[i]; 3148 3149 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3150 } 3151 } 3152 3153 snapshot->schedule_state = atomic_read(&q->guc->state); 3154 snapshot->exec_queue_flags = q->flags; 3155 3156 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3157 if (snapshot->parallel_execution) 3158 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3159 3160 if (xe_exec_queue_is_multi_queue(q)) { 3161 snapshot->multi_queue.valid = true; 3162 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3163 snapshot->multi_queue.pos = q->multi_queue.pos; 3164 } 3165 3166 return snapshot; 3167 } 3168 3169 /** 3170 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3171 * @snapshot: Previously captured snapshot of job. 3172 * 3173 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3174 */ 3175 void 3176 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3177 { 3178 int i; 3179 3180 if (!snapshot || !snapshot->lrc) 3181 return; 3182 3183 for (i = 0; i < snapshot->width; ++i) 3184 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3185 } 3186 3187 /** 3188 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3189 * @snapshot: GuC Submit Engine snapshot object. 3190 * @p: drm_printer where it will be printed out. 3191 * 3192 * This function prints out a given GuC Submit Engine snapshot object. 3193 */ 3194 void 3195 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3196 struct drm_printer *p) 3197 { 3198 int i; 3199 3200 if (!snapshot) 3201 return; 3202 3203 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3204 drm_printf(p, "\tName: %s\n", snapshot->name); 3205 drm_printf(p, "\tClass: %d\n", snapshot->class); 3206 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3207 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3208 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3209 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3210 drm_printf(p, "\tTimeslice: %u (us)\n", 3211 snapshot->sched_props.timeslice_us); 3212 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3213 snapshot->sched_props.preempt_timeout_us); 3214 3215 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3216 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3217 3218 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3219 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3220 3221 if (snapshot->parallel_execution) 3222 guc_exec_queue_wq_snapshot_print(snapshot, p); 3223 3224 if (snapshot->multi_queue.valid) { 3225 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3226 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3227 } 3228 } 3229 3230 /** 3231 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3232 * snapshot. 3233 * @snapshot: GuC Submit Engine snapshot object. 3234 * 3235 * This function free all the memory that needed to be allocated at capture 3236 * time. 3237 */ 3238 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3239 { 3240 int i; 3241 3242 if (!snapshot) 3243 return; 3244 3245 if (snapshot->lrc) { 3246 for (i = 0; i < snapshot->width; i++) 3247 xe_lrc_snapshot_free(snapshot->lrc[i]); 3248 kfree(snapshot->lrc); 3249 } 3250 kfree(snapshot); 3251 } 3252 3253 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3254 { 3255 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3256 3257 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3258 xe_guc_exec_queue_snapshot_print(snapshot, p); 3259 xe_guc_exec_queue_snapshot_free(snapshot); 3260 } 3261 3262 /** 3263 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3264 * @q: Execution queue 3265 * @ctx_type: Type of the context 3266 * 3267 * This function registers the execution queue with the guc. Special context 3268 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3269 * are only applicable for IGPU and in the VF. 3270 * Submits the execution queue to GUC after registering it. 3271 * 3272 * Returns - None. 3273 */ 3274 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3275 { 3276 struct xe_guc *guc = exec_queue_to_guc(q); 3277 struct xe_device *xe = guc_to_xe(guc); 3278 struct xe_gt *gt = guc_to_gt(guc); 3279 3280 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3281 xe_gt_assert(gt, !IS_DGFX(xe)); 3282 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3283 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3284 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3285 3286 register_exec_queue(q, ctx_type); 3287 enable_scheduling(q); 3288 } 3289 3290 /** 3291 * xe_guc_submit_print - GuC Submit Print. 3292 * @guc: GuC. 3293 * @p: drm_printer where it will be printed out. 3294 * 3295 * This function capture and prints snapshots of **all** GuC Engines. 3296 */ 3297 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3298 { 3299 struct xe_exec_queue *q; 3300 unsigned long index; 3301 3302 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3303 return; 3304 3305 mutex_lock(&guc->submission_state.lock); 3306 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3307 guc_exec_queue_print(q, p); 3308 mutex_unlock(&guc->submission_state.lock); 3309 } 3310 3311 /** 3312 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues 3313 * registered with the GuC 3314 * @guc: GuC. 3315 * 3316 * Return: true if any MLRC queue is registered with the GuC, false otherwise. 3317 */ 3318 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) 3319 { 3320 struct xe_exec_queue *q; 3321 unsigned long index; 3322 3323 guard(mutex)(&guc->submission_state.lock); 3324 3325 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3326 if (q->width > 1) 3327 return true; 3328 3329 return false; 3330 } 3331 3332 /** 3333 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3334 * exec queues registered to given GuC. 3335 * @guc: the &xe_guc struct instance 3336 * @scratch: scratch buffer to be used as temporary storage 3337 * 3338 * Returns: zero on success, negative error code on failure. 3339 */ 3340 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3341 { 3342 struct xe_exec_queue *q; 3343 unsigned long index; 3344 int err = 0; 3345 3346 mutex_lock(&guc->submission_state.lock); 3347 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3348 /* Prevent redundant attempts to stop parallel queues */ 3349 if (q->guc->id != index) 3350 continue; 3351 3352 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3353 if (err) 3354 break; 3355 } 3356 mutex_unlock(&guc->submission_state.lock); 3357 3358 return err; 3359 } 3360