1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/dma-fence-array.h> 12 13 #include <drm/drm_managed.h> 14 15 #include "abi/guc_actions_abi.h" 16 #include "abi/guc_actions_slpc_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "xe_assert.h" 19 #include "xe_bo.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_gt_clock.h" 27 #include "xe_gt_printk.h" 28 #include "xe_guc.h" 29 #include "xe_guc_capture.h" 30 #include "xe_guc_ct.h" 31 #include "xe_guc_exec_queue_types.h" 32 #include "xe_guc_id_mgr.h" 33 #include "xe_guc_klv_helpers.h" 34 #include "xe_guc_submit_types.h" 35 #include "xe_hw_engine.h" 36 #include "xe_lrc.h" 37 #include "xe_macros.h" 38 #include "xe_map.h" 39 #include "xe_mocs.h" 40 #include "xe_pm.h" 41 #include "xe_ring_ops_types.h" 42 #include "xe_sched_job.h" 43 #include "xe_sleep.h" 44 #include "xe_trace.h" 45 #include "xe_uc_fw.h" 46 #include "xe_vm.h" 47 48 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 49 50 static int guc_submit_reset_prepare(struct xe_guc *guc); 51 52 static struct xe_guc * 53 exec_queue_to_guc(struct xe_exec_queue *q) 54 { 55 return &q->gt->uc.guc; 56 } 57 58 /* 59 * Helpers for engine state, using an atomic as some of the bits can transition 60 * as the same time (e.g. a suspend can be happning at the same time as schedule 61 * engine done being processed). 62 */ 63 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 64 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 65 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 66 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 67 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 68 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 69 #define EXEC_QUEUE_STATE_RESET (1 << 6) 70 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 71 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 72 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 73 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) 74 75 static bool exec_queue_registered(struct xe_exec_queue *q) 76 { 77 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 78 } 79 80 static void set_exec_queue_registered(struct xe_exec_queue *q) 81 { 82 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 83 } 84 85 static void clear_exec_queue_registered(struct xe_exec_queue *q) 86 { 87 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 88 } 89 90 static bool exec_queue_enabled(struct xe_exec_queue *q) 91 { 92 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 93 } 94 95 static void set_exec_queue_enabled(struct xe_exec_queue *q) 96 { 97 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 98 } 99 100 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 101 { 102 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 103 } 104 105 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 106 { 107 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 108 } 109 110 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 111 { 112 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 113 } 114 115 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 116 { 117 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 118 } 119 120 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 121 { 122 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 123 } 124 125 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 126 { 127 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 128 } 129 130 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 131 { 132 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 133 } 134 135 static bool exec_queue_destroyed(struct xe_exec_queue *q) 136 { 137 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 138 } 139 140 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 141 { 142 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 143 } 144 145 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 146 { 147 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 148 } 149 150 static bool exec_queue_banned(struct xe_exec_queue *q) 151 { 152 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 153 } 154 155 static void set_exec_queue_banned(struct xe_exec_queue *q) 156 { 157 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 158 } 159 160 static bool exec_queue_suspended(struct xe_exec_queue *q) 161 { 162 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 163 } 164 165 static void set_exec_queue_suspended(struct xe_exec_queue *q) 166 { 167 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 168 } 169 170 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 171 { 172 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 173 } 174 175 static bool exec_queue_reset(struct xe_exec_queue *q) 176 { 177 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 178 } 179 180 static void set_exec_queue_reset(struct xe_exec_queue *q) 181 { 182 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 183 } 184 185 static bool exec_queue_killed(struct xe_exec_queue *q) 186 { 187 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 188 } 189 190 static void set_exec_queue_killed(struct xe_exec_queue *q) 191 { 192 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 193 } 194 195 static bool exec_queue_wedged(struct xe_exec_queue *q) 196 { 197 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 198 } 199 200 static void set_exec_queue_wedged(struct xe_exec_queue *q) 201 { 202 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 203 } 204 205 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 206 { 207 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 208 } 209 210 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 211 { 212 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 213 } 214 215 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 216 { 217 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 218 } 219 220 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 221 { 222 return (atomic_read(&q->guc->state) & 223 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 224 EXEC_QUEUE_STATE_BANNED)); 225 } 226 227 static void guc_submit_sw_fini(struct drm_device *drm, void *arg) 228 { 229 struct xe_guc *guc = arg; 230 struct xe_device *xe = guc_to_xe(guc); 231 struct xe_gt *gt = guc_to_gt(guc); 232 int ret; 233 234 ret = wait_event_timeout(guc->submission_state.fini_wq, 235 xa_empty(&guc->submission_state.exec_queue_lookup), 236 HZ * 5); 237 238 drain_workqueue(xe->destroy_wq); 239 240 xe_gt_assert(gt, ret); 241 242 xa_destroy(&guc->submission_state.exec_queue_lookup); 243 } 244 245 static void guc_submit_fini(void *arg) 246 { 247 struct xe_guc *guc = arg; 248 struct xe_exec_queue *q; 249 unsigned long index; 250 251 /* Drop any wedged queue refs */ 252 mutex_lock(&guc->submission_state.lock); 253 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 254 if (exec_queue_wedged(q)) { 255 mutex_unlock(&guc->submission_state.lock); 256 xe_exec_queue_put(q); 257 mutex_lock(&guc->submission_state.lock); 258 } 259 } 260 mutex_unlock(&guc->submission_state.lock); 261 262 /* Forcefully kill any remaining exec queues */ 263 xe_guc_ct_stop(&guc->ct); 264 guc_submit_reset_prepare(guc); 265 xe_guc_softreset(guc); 266 xe_guc_submit_stop(guc); 267 xe_uc_fw_sanitize(&guc->fw); 268 xe_guc_submit_pause_abort(guc); 269 } 270 271 static const struct xe_exec_queue_ops guc_exec_queue_ops; 272 273 static void primelockdep(struct xe_guc *guc) 274 { 275 if (!IS_ENABLED(CONFIG_LOCKDEP)) 276 return; 277 278 fs_reclaim_acquire(GFP_KERNEL); 279 280 mutex_lock(&guc->submission_state.lock); 281 mutex_unlock(&guc->submission_state.lock); 282 283 fs_reclaim_release(GFP_KERNEL); 284 } 285 286 /** 287 * xe_guc_submit_init() - Initialize GuC submission. 288 * @guc: the &xe_guc to initialize 289 * @num_ids: number of GuC context IDs to use 290 * 291 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 292 * GuC context IDs supported by the GuC firmware should be used for submission. 293 * 294 * Only VF drivers will have to provide explicit number of GuC context IDs 295 * that they can use for submission. 296 * 297 * Return: 0 on success or a negative error code on failure. 298 */ 299 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 300 { 301 struct xe_device *xe = guc_to_xe(guc); 302 struct xe_gt *gt = guc_to_gt(guc); 303 int err; 304 305 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 306 if (err) 307 return err; 308 309 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 310 if (err) 311 return err; 312 313 gt->exec_queue_ops = &guc_exec_queue_ops; 314 315 xa_init(&guc->submission_state.exec_queue_lookup); 316 317 init_waitqueue_head(&guc->submission_state.fini_wq); 318 319 primelockdep(guc); 320 321 guc->submission_state.initialized = true; 322 323 err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); 324 if (err) 325 return err; 326 327 return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); 328 } 329 330 /* 331 * Given that we want to guarantee enough RCS throughput to avoid missing 332 * frames, we set the yield policy to 20% of each 80ms interval. 333 */ 334 #define RC_YIELD_DURATION 80 /* in ms */ 335 #define RC_YIELD_RATIO 20 /* in percent */ 336 static u32 *emit_render_compute_yield_klv(u32 *emit) 337 { 338 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 339 *emit++ = RC_YIELD_DURATION; 340 *emit++ = RC_YIELD_RATIO; 341 342 return emit; 343 } 344 345 #define SCHEDULING_POLICY_MAX_DWORDS 16 346 static int guc_init_global_schedule_policy(struct xe_guc *guc) 347 { 348 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 349 u32 *emit = data; 350 u32 count = 0; 351 int ret; 352 353 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 354 return 0; 355 356 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 357 358 if (CCS_INSTANCES(guc_to_gt(guc))) 359 emit = emit_render_compute_yield_klv(emit); 360 361 count = emit - data; 362 if (count > 1) { 363 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 364 365 ret = xe_guc_ct_send_block(&guc->ct, data, count); 366 if (ret < 0) { 367 xe_gt_err(guc_to_gt(guc), 368 "failed to enable GuC scheduling policies: %pe\n", 369 ERR_PTR(ret)); 370 return ret; 371 } 372 } 373 374 return 0; 375 } 376 377 int xe_guc_submit_enable(struct xe_guc *guc) 378 { 379 int ret; 380 381 ret = guc_init_global_schedule_policy(guc); 382 if (ret) 383 return ret; 384 385 guc->submission_state.enabled = true; 386 387 return 0; 388 } 389 390 void xe_guc_submit_disable(struct xe_guc *guc) 391 { 392 guc->submission_state.enabled = false; 393 } 394 395 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 396 { 397 int i; 398 399 lockdep_assert_held(&guc->submission_state.lock); 400 401 for (i = 0; i < xa_count; ++i) 402 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 403 404 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 405 q->guc->id, q->width); 406 407 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 408 wake_up(&guc->submission_state.fini_wq); 409 } 410 411 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 412 { 413 int ret; 414 int i; 415 416 /* 417 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 418 * worse case user gets -ENOMEM on engine create and has to try again. 419 * 420 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 421 * failure. 422 */ 423 lockdep_assert_held(&guc->submission_state.lock); 424 425 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 426 q->width); 427 if (ret < 0) 428 return ret; 429 430 q->guc->id = ret; 431 432 for (i = 0; i < q->width; ++i) { 433 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 434 q->guc->id + i, q, GFP_NOWAIT)); 435 if (ret) 436 goto err_release; 437 } 438 439 return 0; 440 441 err_release: 442 __release_guc_id(guc, q, i); 443 444 return ret; 445 } 446 447 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 448 { 449 mutex_lock(&guc->submission_state.lock); 450 __release_guc_id(guc, q, q->width); 451 mutex_unlock(&guc->submission_state.lock); 452 } 453 454 struct exec_queue_policy { 455 u32 count; 456 struct guc_update_exec_queue_policy h2g; 457 }; 458 459 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 460 { 461 size_t bytes = sizeof(policy->h2g.header) + 462 (sizeof(policy->h2g.klv[0]) * policy->count); 463 464 return bytes / sizeof(u32); 465 } 466 467 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 468 u16 guc_id) 469 { 470 policy->h2g.header.action = 471 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 472 policy->h2g.header.guc_id = guc_id; 473 policy->count = 0; 474 } 475 476 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 477 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 478 u32 data) \ 479 { \ 480 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 481 \ 482 policy->h2g.klv[policy->count].kl = \ 483 FIELD_PREP(GUC_KLV_0_KEY, \ 484 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 485 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 486 policy->h2g.klv[policy->count].value = data; \ 487 policy->count++; \ 488 } 489 490 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 491 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 492 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 493 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 494 #undef MAKE_EXEC_QUEUE_POLICY_ADD 495 496 static const int xe_exec_queue_prio_to_guc[] = { 497 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 498 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 499 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 500 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 501 }; 502 503 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 504 { 505 struct exec_queue_policy policy; 506 enum xe_exec_queue_priority prio = q->sched_props.priority; 507 u32 timeslice_us = q->sched_props.timeslice_us; 508 u32 slpc_exec_queue_freq_req = 0; 509 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 510 511 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 512 !xe_exec_queue_is_multi_queue_secondary(q)); 513 514 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 515 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 516 517 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 518 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 519 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 520 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 521 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 522 slpc_exec_queue_freq_req); 523 524 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 525 __guc_exec_queue_policy_action_size(&policy), 0, 0); 526 } 527 528 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 529 { 530 struct exec_queue_policy policy; 531 532 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 533 534 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 535 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 536 537 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 538 __guc_exec_queue_policy_action_size(&policy), 0, 0); 539 } 540 541 static bool vf_recovery(struct xe_guc *guc) 542 { 543 return xe_gt_recovery_pending(guc_to_gt(guc)); 544 } 545 546 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 547 { 548 struct xe_guc *guc = exec_queue_to_guc(q); 549 struct xe_device *xe = guc_to_xe(guc); 550 551 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 552 wake_up_all(&xe->ufence_wq); 553 554 xe_sched_tdr_queue_imm(&q->guc->sched); 555 } 556 557 static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q) 558 { 559 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 560 struct xe_exec_queue_group *group = q->multi_queue.group; 561 struct xe_exec_queue *eq, *next; 562 LIST_HEAD(tmp); 563 564 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 565 xe_exec_queue_is_multi_queue(q)); 566 567 mutex_lock(&group->list_lock); 568 569 /* 570 * Stop all future queues being from executing while group is stopped. 571 */ 572 group->stopped = true; 573 574 list_for_each_entry_safe(eq, next, &group->list, multi_queue.link) 575 /* 576 * Refcount prevents an attempted removal from &group->list, 577 * temporary list allows safe iteration after dropping 578 * &group->list_lock. 579 */ 580 if (xe_exec_queue_get_unless_zero(eq)) 581 list_move_tail(&eq->multi_queue.link, &tmp); 582 583 mutex_unlock(&group->list_lock); 584 585 /* We cannot stop under list lock without getting inversions */ 586 xe_sched_submission_stop(&primary->guc->sched); 587 list_for_each_entry(eq, &tmp, multi_queue.link) 588 xe_sched_submission_stop(&eq->guc->sched); 589 590 mutex_lock(&group->list_lock); 591 list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) { 592 /* 593 * Corner where we got banned while stopping and not on 594 * &group->list 595 */ 596 if (READ_ONCE(group->banned)) 597 xe_guc_exec_queue_trigger_cleanup(eq); 598 599 list_move_tail(&eq->multi_queue.link, &group->list); 600 xe_exec_queue_put(eq); 601 } 602 mutex_unlock(&group->list_lock); 603 } 604 605 static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q) 606 { 607 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 608 struct xe_exec_queue_group *group = q->multi_queue.group; 609 struct xe_exec_queue *eq; 610 611 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 612 xe_exec_queue_is_multi_queue(q)); 613 614 xe_sched_submission_start(&primary->guc->sched); 615 616 mutex_lock(&group->list_lock); 617 group->stopped = false; 618 list_for_each_entry(eq, &group->list, multi_queue.link) 619 xe_sched_submission_start(&eq->guc->sched); 620 mutex_unlock(&group->list_lock); 621 } 622 623 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) 624 { 625 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 626 struct xe_exec_queue_group *group = q->multi_queue.group; 627 struct xe_exec_queue *eq; 628 629 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 630 xe_exec_queue_is_multi_queue(q)); 631 632 /* Group banned, skip timeout check in TDR */ 633 WRITE_ONCE(group->banned, true); 634 xe_guc_exec_queue_trigger_cleanup(primary); 635 636 mutex_lock(&group->list_lock); 637 list_for_each_entry(eq, &group->list, multi_queue.link) 638 xe_guc_exec_queue_trigger_cleanup(eq); 639 mutex_unlock(&group->list_lock); 640 } 641 642 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 643 { 644 if (xe_exec_queue_is_multi_queue(q)) { 645 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 646 struct xe_exec_queue_group *group = q->multi_queue.group; 647 struct xe_exec_queue *eq; 648 649 /* Group banned, skip timeout check in TDR */ 650 WRITE_ONCE(group->banned, true); 651 652 set_exec_queue_reset(primary); 653 if (!exec_queue_banned(primary)) 654 xe_guc_exec_queue_trigger_cleanup(primary); 655 656 mutex_lock(&group->list_lock); 657 list_for_each_entry(eq, &group->list, multi_queue.link) { 658 set_exec_queue_reset(eq); 659 if (!exec_queue_banned(eq)) 660 xe_guc_exec_queue_trigger_cleanup(eq); 661 } 662 mutex_unlock(&group->list_lock); 663 } else { 664 set_exec_queue_reset(q); 665 if (!exec_queue_banned(q)) 666 xe_guc_exec_queue_trigger_cleanup(q); 667 } 668 } 669 670 static void set_exec_queue_group_banned(struct xe_exec_queue *q) 671 { 672 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 673 struct xe_exec_queue_group *group = q->multi_queue.group; 674 struct xe_exec_queue *eq; 675 676 /* Ban all queues of the multi-queue group */ 677 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 678 xe_exec_queue_is_multi_queue(q)); 679 set_exec_queue_banned(primary); 680 681 mutex_lock(&group->list_lock); 682 list_for_each_entry(eq, &group->list, multi_queue.link) 683 set_exec_queue_banned(eq); 684 mutex_unlock(&group->list_lock); 685 } 686 687 /* Helper for context registration H2G */ 688 struct guc_ctxt_registration_info { 689 u32 flags; 690 u32 context_idx; 691 u32 engine_class; 692 u32 engine_submit_mask; 693 u32 wq_desc_lo; 694 u32 wq_desc_hi; 695 u32 wq_base_lo; 696 u32 wq_base_hi; 697 u32 wq_size; 698 u32 cgp_lo; 699 u32 cgp_hi; 700 u32 hwlrca_lo; 701 u32 hwlrca_hi; 702 }; 703 704 #define parallel_read(xe_, map_, field_) \ 705 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 706 field_) 707 #define parallel_write(xe_, map_, field_, val_) \ 708 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 709 field_, val_) 710 711 /** 712 * DOC: Multi Queue Group GuC interface 713 * 714 * The multi queue group coordination between KMD and GuC is through a software 715 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page 716 * allocated in the global GTT. 717 * 718 * CGP format: 719 * 720 * +-----------+---------------------------+---------------------------------------------+ 721 * | DWORD | Name | Description | 722 * +-----------+---------------------------+---------------------------------------------+ 723 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | 724 * +-----------+---------------------------+---------------------------------------------+ 725 * | 1..15 | RESERVED | MBZ | 726 * +-----------+---------------------------+---------------------------------------------+ 727 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | 728 * +-----------+---------------------------+---------------------------------------------+ 729 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | 730 * +-----------+---------------------------+---------------------------------------------+ 731 * | 18..31 | RESERVED | MBZ | 732 * +-----------+---------------------------+---------------------------------------------+ 733 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | 734 * +-----------+---------------------------+---------------------------------------------+ 735 * | 33 | Q0ContextIndex | Context ID for Queue 0 | 736 * +-----------+---------------------------+---------------------------------------------+ 737 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | 738 * +-----------+---------------------------+---------------------------------------------+ 739 * | 35 | Q1ContextIndex | Context ID for Queue 1 | 740 * +-----------+---------------------------+---------------------------------------------+ 741 * | ... |... | ... | 742 * +-----------+---------------------------+---------------------------------------------+ 743 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | 744 * +-----------+---------------------------+---------------------------------------------+ 745 * | 159 | Q63ContextIndex | Context ID for Queue 63 | 746 * +-----------+---------------------------+---------------------------------------------+ 747 * | 160..1024 | RESERVED | MBZ | 748 * +-----------+---------------------------+---------------------------------------------+ 749 * 750 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified 751 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies 752 * the CGP address. When the secondary queues are added to the group, the CGP is 753 * updated with entry for that queue and GuC is notified through the H2G interface 754 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages 755 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also 756 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any 757 * error in the CGP. Only one of these CGP update messages can be outstanding 758 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* 759 * fields indicate which queue entry is being updated in the CGP. 760 * 761 * The primary queue (Q0) represents the multi queue group context in GuC and 762 * submission on any queue of the group must be through Q0 GuC interface only. 763 * 764 * As it is not required to register secondary queues with GuC, the secondary queue 765 * context ids in the CGP are populated with Q0 context id. 766 */ 767 768 #define CGP_VERSION_MAJOR_SHIFT 8 769 770 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 771 struct xe_exec_queue *q) 772 { 773 struct xe_exec_queue_group *group = q->multi_queue.group; 774 u32 guc_id = group->primary->guc->id; 775 776 /* Currently implementing CGP version 1.0 */ 777 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 778 1 << CGP_VERSION_MAJOR_SHIFT); 779 780 xe_map_wr(xe, &group->cgp_bo->vmap, 781 (32 + q->multi_queue.pos * 2) * sizeof(u32), 782 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 783 784 xe_map_wr(xe, &group->cgp_bo->vmap, 785 (33 + q->multi_queue.pos * 2) * sizeof(u32), 786 u32, guc_id); 787 788 if (q->multi_queue.pos / 32) { 789 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 790 u32, BIT(q->multi_queue.pos % 32)); 791 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 792 } else { 793 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 794 u32, BIT(q->multi_queue.pos)); 795 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 796 } 797 } 798 799 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 800 struct xe_exec_queue *q, 801 const u32 *action, u32 len) 802 { 803 struct xe_exec_queue_group *group = q->multi_queue.group; 804 struct xe_device *xe = guc_to_xe(guc); 805 enum xe_multi_queue_priority priority; 806 long ret; 807 808 /* 809 * As all queues of a multi queue group use single drm scheduler 810 * submit workqueue, CGP synchronization with GuC are serialized. 811 * Hence, no locking is required here. 812 * Wait for any pending CGP_SYNC_DONE response before updating the 813 * CGP page and sending CGP_SYNC message. 814 * 815 * FIXME: Support VF migration 816 */ 817 ret = wait_event_timeout(guc->ct.wq, 818 !READ_ONCE(group->sync_pending) || 819 xe_guc_read_stopped(guc), HZ); 820 if (!ret || xe_guc_read_stopped(guc)) { 821 /* CGP_SYNC failed. Reset gt, cleanup the group */ 822 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 823 set_exec_queue_group_banned(q); 824 xe_gt_reset_async(q->gt); 825 xe_guc_exec_queue_group_trigger_cleanup(q); 826 return; 827 } 828 829 scoped_guard(spinlock, &q->multi_queue.lock) 830 priority = q->multi_queue.priority; 831 832 xe_lrc_set_multi_queue_priority(q->lrc[0], priority); 833 xe_guc_exec_queue_group_cgp_update(xe, q); 834 835 WRITE_ONCE(group->sync_pending, true); 836 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 837 } 838 839 static void __register_exec_queue_group(struct xe_guc *guc, 840 struct xe_exec_queue *q, 841 struct guc_ctxt_registration_info *info) 842 { 843 #define MAX_MULTI_QUEUE_REG_SIZE (8) 844 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 845 int len = 0; 846 847 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 848 action[len++] = info->flags; 849 action[len++] = info->context_idx; 850 action[len++] = info->engine_class; 851 action[len++] = info->engine_submit_mask; 852 action[len++] = 0; /* Reserved */ 853 action[len++] = info->cgp_lo; 854 action[len++] = info->cgp_hi; 855 856 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 857 #undef MAX_MULTI_QUEUE_REG_SIZE 858 859 /* 860 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 861 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 862 * from guc. 863 */ 864 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 865 } 866 867 static void xe_guc_exec_queue_group_add(struct xe_guc *guc, 868 struct xe_exec_queue *q) 869 { 870 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 871 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 872 int len = 0; 873 874 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); 875 876 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 877 action[len++] = q->multi_queue.group->primary->guc->id; 878 879 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 880 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 881 882 /* 883 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a 884 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 885 * from guc. 886 */ 887 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 888 } 889 890 static void __register_mlrc_exec_queue(struct xe_guc *guc, 891 struct xe_exec_queue *q, 892 struct guc_ctxt_registration_info *info) 893 { 894 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 895 u32 action[MAX_MLRC_REG_SIZE]; 896 int len = 0; 897 int i; 898 899 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 900 901 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 902 action[len++] = info->flags; 903 action[len++] = info->context_idx; 904 action[len++] = info->engine_class; 905 action[len++] = info->engine_submit_mask; 906 action[len++] = info->wq_desc_lo; 907 action[len++] = info->wq_desc_hi; 908 action[len++] = info->wq_base_lo; 909 action[len++] = info->wq_base_hi; 910 action[len++] = info->wq_size; 911 action[len++] = q->width; 912 action[len++] = info->hwlrca_lo; 913 action[len++] = info->hwlrca_hi; 914 915 for (i = 1; i < q->width; ++i) { 916 struct xe_lrc *lrc = q->lrc[i]; 917 918 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 919 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 920 } 921 922 /* explicitly checks some fields that we might fixup later */ 923 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 924 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 925 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 926 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 927 xe_gt_assert(guc_to_gt(guc), q->width == 928 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 929 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 930 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 931 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 932 #undef MAX_MLRC_REG_SIZE 933 934 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 935 } 936 937 static void __register_exec_queue(struct xe_guc *guc, 938 struct guc_ctxt_registration_info *info) 939 { 940 u32 action[] = { 941 XE_GUC_ACTION_REGISTER_CONTEXT, 942 info->flags, 943 info->context_idx, 944 info->engine_class, 945 info->engine_submit_mask, 946 info->wq_desc_lo, 947 info->wq_desc_hi, 948 info->wq_base_lo, 949 info->wq_base_hi, 950 info->wq_size, 951 info->hwlrca_lo, 952 info->hwlrca_hi, 953 }; 954 955 /* explicitly checks some fields that we might fixup later */ 956 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 957 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 958 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 959 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 960 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 961 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 962 963 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 964 } 965 966 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 967 { 968 struct xe_guc *guc = exec_queue_to_guc(q); 969 struct xe_device *xe = guc_to_xe(guc); 970 struct xe_lrc *lrc = q->lrc[0]; 971 struct guc_ctxt_registration_info info; 972 973 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 974 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 975 976 memset(&info, 0, sizeof(info)); 977 info.context_idx = q->guc->id; 978 info.engine_class = xe_engine_class_to_guc_class(q->class); 979 info.engine_submit_mask = q->logical_mask; 980 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 981 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 982 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 983 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 984 985 if (xe_exec_queue_is_multi_queue(q)) { 986 struct xe_exec_queue_group *group = q->multi_queue.group; 987 988 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 989 info.cgp_hi = 0; 990 } 991 992 if (xe_exec_queue_is_parallel(q)) { 993 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 994 struct iosys_map map = xe_lrc_parallel_map(lrc); 995 996 info.wq_desc_lo = lower_32_bits(ggtt_addr + 997 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 998 info.wq_desc_hi = upper_32_bits(ggtt_addr + 999 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1000 info.wq_base_lo = lower_32_bits(ggtt_addr + 1001 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1002 info.wq_base_hi = upper_32_bits(ggtt_addr + 1003 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1004 info.wq_size = WQ_SIZE; 1005 1006 q->guc->wqi_head = 0; 1007 q->guc->wqi_tail = 0; 1008 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 1009 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 1010 } 1011 1012 set_exec_queue_registered(q); 1013 trace_xe_exec_queue_register(q); 1014 if (xe_exec_queue_is_multi_queue_primary(q)) 1015 __register_exec_queue_group(guc, q, &info); 1016 else if (xe_exec_queue_is_parallel(q)) 1017 __register_mlrc_exec_queue(guc, q, &info); 1018 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 1019 __register_exec_queue(guc, &info); 1020 1021 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1022 init_policies(guc, q); 1023 1024 if (xe_exec_queue_is_multi_queue_secondary(q)) 1025 xe_guc_exec_queue_group_add(guc, q); 1026 } 1027 1028 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 1029 { 1030 return (WQ_SIZE - q->guc->wqi_tail); 1031 } 1032 1033 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 1034 { 1035 struct xe_guc *guc = exec_queue_to_guc(q); 1036 struct xe_device *xe = guc_to_xe(guc); 1037 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1038 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 1039 1040 #define AVAILABLE_SPACE \ 1041 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 1042 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1043 try_again: 1044 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 1045 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1046 if (sleep_total_ms > 2000) { 1047 xe_gt_reset_async(q->gt); 1048 return -ENODEV; 1049 } 1050 1051 sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64); 1052 goto try_again; 1053 } 1054 } 1055 #undef AVAILABLE_SPACE 1056 1057 return 0; 1058 } 1059 1060 static int wq_noop_append(struct xe_exec_queue *q) 1061 { 1062 struct xe_guc *guc = exec_queue_to_guc(q); 1063 struct xe_device *xe = guc_to_xe(guc); 1064 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1065 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 1066 1067 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 1068 return -ENODEV; 1069 1070 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 1071 1072 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 1073 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 1074 FIELD_PREP(WQ_LEN_MASK, len_dw)); 1075 q->guc->wqi_tail = 0; 1076 1077 return 0; 1078 } 1079 1080 static void wq_item_append(struct xe_exec_queue *q) 1081 { 1082 struct xe_guc *guc = exec_queue_to_guc(q); 1083 struct xe_device *xe = guc_to_xe(guc); 1084 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1085 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 1086 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 1087 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 1088 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 1089 int i = 0, j; 1090 1091 if (wqi_size > wq_space_until_wrap(q)) { 1092 if (wq_noop_append(q)) 1093 return; 1094 } 1095 if (wq_wait_for_space(q, wqi_size)) 1096 return; 1097 1098 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 1099 FIELD_PREP(WQ_LEN_MASK, len_dw); 1100 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 1101 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 1102 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 1103 wqi[i++] = 0; 1104 for (j = 1; j < q->width; ++j) { 1105 struct xe_lrc *lrc = q->lrc[j]; 1106 1107 wqi[i++] = lrc->ring.tail / sizeof(u64); 1108 } 1109 1110 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 1111 1112 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 1113 wq[q->guc->wqi_tail / sizeof(u32)])); 1114 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 1115 q->guc->wqi_tail += wqi_size; 1116 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 1117 1118 xe_device_wmb(xe); 1119 1120 map = xe_lrc_parallel_map(q->lrc[0]); 1121 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 1122 } 1123 1124 #define RESUME_PENDING ~0x0ull 1125 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 1126 { 1127 struct xe_guc *guc = exec_queue_to_guc(q); 1128 struct xe_lrc *lrc = q->lrc[0]; 1129 u32 action[3]; 1130 u32 g2h_len = 0; 1131 u32 num_g2h = 0; 1132 int len = 0; 1133 bool extra_submit = false; 1134 1135 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1136 1137 if (!job->restore_replay || job->last_replay) { 1138 if (xe_exec_queue_is_parallel(q)) 1139 wq_item_append(q); 1140 else 1141 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1142 job->last_replay = false; 1143 } 1144 1145 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1146 return; 1147 1148 /* 1149 * All queues in a multi-queue group will use the primary queue 1150 * of the group to interface with GuC. If primay is suspended, 1151 * just return. Jobs will get scheduled once primary is resumed. 1152 */ 1153 q = xe_exec_queue_multi_queue_primary(q); 1154 if (exec_queue_suspended(q)) 1155 return; 1156 1157 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1158 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1159 action[len++] = q->guc->id; 1160 action[len++] = GUC_CONTEXT_ENABLE; 1161 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1162 num_g2h = 1; 1163 if (xe_exec_queue_is_parallel(q)) 1164 extra_submit = true; 1165 1166 q->guc->resume_time = RESUME_PENDING; 1167 set_exec_queue_pending_enable(q); 1168 set_exec_queue_enabled(q); 1169 trace_xe_exec_queue_scheduling_enable(q); 1170 } else { 1171 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1172 action[len++] = q->guc->id; 1173 trace_xe_exec_queue_submit(q); 1174 } 1175 1176 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1177 1178 if (extra_submit) { 1179 len = 0; 1180 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1181 action[len++] = q->guc->id; 1182 trace_xe_exec_queue_submit(q); 1183 1184 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1185 } 1186 } 1187 1188 static struct dma_fence * 1189 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1190 { 1191 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1192 struct xe_exec_queue *q = job->q; 1193 struct xe_guc *guc = exec_queue_to_guc(q); 1194 bool killed_or_banned_or_wedged = 1195 exec_queue_killed_or_banned_or_wedged(q); 1196 1197 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1198 exec_queue_banned(q) || exec_queue_suspended(q)); 1199 1200 trace_xe_sched_job_run(job); 1201 1202 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1203 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1204 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1205 1206 if (exec_queue_killed_or_banned_or_wedged(primary)) { 1207 killed_or_banned_or_wedged = true; 1208 goto run_job_out; 1209 } 1210 1211 if (!exec_queue_registered(primary)) 1212 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1213 } 1214 1215 if (!exec_queue_registered(q)) 1216 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1217 if (!job->restore_replay) 1218 q->ring_ops->emit_job(job); 1219 submit_exec_queue(q, job); 1220 job->restore_replay = false; 1221 } 1222 1223 run_job_out: 1224 1225 return job->fence; 1226 } 1227 1228 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1229 { 1230 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1231 1232 trace_xe_sched_job_free(job); 1233 xe_sched_job_put(job); 1234 } 1235 1236 int xe_guc_read_stopped(struct xe_guc *guc) 1237 { 1238 return atomic_read(&guc->submission_state.stopped); 1239 } 1240 1241 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1242 struct xe_exec_queue *q, 1243 u32 runnable_state); 1244 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1245 1246 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1247 u32 action[] = { \ 1248 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1249 q->guc->id, \ 1250 GUC_CONTEXT_##enable_disable, \ 1251 } 1252 1253 static void disable_scheduling_deregister(struct xe_guc *guc, 1254 struct xe_exec_queue *q) 1255 { 1256 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1257 int ret; 1258 1259 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1260 set_min_preemption_timeout(guc, q); 1261 1262 smp_rmb(); 1263 ret = wait_event_timeout(guc->ct.wq, 1264 (!exec_queue_pending_enable(q) && 1265 !exec_queue_pending_disable(q)) || 1266 xe_guc_read_stopped(guc) || 1267 vf_recovery(guc), 1268 HZ * 5); 1269 if (!ret && !vf_recovery(guc)) { 1270 struct xe_gpu_scheduler *sched = &q->guc->sched; 1271 1272 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1273 xe_sched_submission_start(sched); 1274 xe_gt_reset_async(q->gt); 1275 xe_sched_tdr_queue_imm(sched); 1276 return; 1277 } 1278 1279 clear_exec_queue_enabled(q); 1280 set_exec_queue_pending_disable(q); 1281 set_exec_queue_destroyed(q); 1282 trace_xe_exec_queue_scheduling_disable(q); 1283 1284 /* 1285 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1286 * handler and we are not allowed to reserved G2H space in handlers. 1287 */ 1288 if (xe_exec_queue_is_multi_queue_secondary(q)) 1289 handle_multi_queue_secondary_sched_done(guc, q, 0); 1290 else 1291 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1292 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1293 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1294 } 1295 1296 /** 1297 * xe_guc_submit_wedge() - Wedge GuC submission 1298 * @guc: the GuC object 1299 * 1300 * Save exec queue's registered with GuC state by taking a ref to each queue. 1301 * Register a DRMM handler to drop refs upon driver unload. 1302 */ 1303 void xe_guc_submit_wedge(struct xe_guc *guc) 1304 { 1305 struct xe_device *xe = guc_to_xe(guc); 1306 struct xe_exec_queue *q; 1307 unsigned long index; 1308 1309 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1310 1311 /* 1312 * If device is being wedged even before submission_state is 1313 * initialized, there's nothing to do here. 1314 */ 1315 if (!guc->submission_state.initialized) 1316 return; 1317 1318 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { 1319 mutex_lock(&guc->submission_state.lock); 1320 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1321 if (xe_exec_queue_get_unless_zero(q)) 1322 set_exec_queue_wedged(q); 1323 mutex_unlock(&guc->submission_state.lock); 1324 } else { 1325 /* Forcefully kill any remaining exec queues, signal fences */ 1326 guc_submit_reset_prepare(guc); 1327 xe_guc_submit_stop(guc); 1328 xe_guc_softreset(guc); 1329 xe_uc_fw_sanitize(&guc->fw); 1330 xe_guc_submit_pause_abort(guc); 1331 } 1332 } 1333 1334 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1335 { 1336 struct xe_device *xe = guc_to_xe(guc); 1337 1338 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 1339 return false; 1340 1341 if (xe_device_wedged(xe)) 1342 return true; 1343 1344 xe_device_declare_wedged(xe); 1345 1346 return true; 1347 } 1348 1349 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1350 1351 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1352 { 1353 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1354 u32 ctx_timestamp, ctx_job_timestamp; 1355 u32 timeout_ms = q->sched_props.job_timeout_ms; 1356 u32 diff; 1357 u64 running_time_ms; 1358 1359 if (!xe_sched_job_started(job)) { 1360 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1361 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1362 q->guc->id); 1363 1364 return xe_sched_invalidate_job(job, 2); 1365 } 1366 1367 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); 1368 if (ctx_timestamp == job->sample_timestamp) { 1369 if (IS_SRIOV_VF(gt_to_xe(gt))) 1370 xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1371 xe_sched_job_seqno(job), 1372 xe_sched_job_lrc_seqno(job), q->guc->id); 1373 else 1374 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1375 xe_sched_job_seqno(job), 1376 xe_sched_job_lrc_seqno(job), q->guc->id); 1377 1378 return xe_sched_invalidate_job(job, 0); 1379 } 1380 1381 job->sample_timestamp = ctx_timestamp; 1382 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1383 1384 /* 1385 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1386 * possible overflows with a high timeout. 1387 */ 1388 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1389 1390 diff = ctx_timestamp - ctx_job_timestamp; 1391 1392 /* 1393 * Ensure timeout is within 5% to account for an GuC scheduling latency 1394 */ 1395 running_time_ms = 1396 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1397 1398 xe_gt_dbg(gt, 1399 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1400 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1401 q->guc->id, running_time_ms, timeout_ms, diff); 1402 1403 return running_time_ms >= timeout_ms; 1404 } 1405 1406 static void enable_scheduling(struct xe_exec_queue *q) 1407 { 1408 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1409 struct xe_guc *guc = exec_queue_to_guc(q); 1410 int ret; 1411 1412 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1413 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1414 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1415 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1416 1417 set_exec_queue_pending_enable(q); 1418 set_exec_queue_enabled(q); 1419 trace_xe_exec_queue_scheduling_enable(q); 1420 1421 if (xe_exec_queue_is_multi_queue_secondary(q)) 1422 handle_multi_queue_secondary_sched_done(guc, q, 1); 1423 else 1424 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1425 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1426 1427 ret = wait_event_timeout(guc->ct.wq, 1428 !exec_queue_pending_enable(q) || 1429 xe_guc_read_stopped(guc) || 1430 vf_recovery(guc), HZ * 5); 1431 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1432 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1433 set_exec_queue_banned(q); 1434 xe_gt_reset_async(q->gt); 1435 xe_sched_tdr_queue_imm(&q->guc->sched); 1436 } 1437 } 1438 1439 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1440 { 1441 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1442 struct xe_guc *guc = exec_queue_to_guc(q); 1443 1444 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1445 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1446 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1447 1448 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1449 set_min_preemption_timeout(guc, q); 1450 clear_exec_queue_enabled(q); 1451 set_exec_queue_pending_disable(q); 1452 trace_xe_exec_queue_scheduling_disable(q); 1453 1454 if (xe_exec_queue_is_multi_queue_secondary(q)) 1455 handle_multi_queue_secondary_sched_done(guc, q, 0); 1456 else 1457 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1458 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1459 } 1460 1461 static enum drm_gpu_sched_stat 1462 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1463 { 1464 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1465 struct drm_sched_job *tmp_job; 1466 struct xe_exec_queue *q = job->q, *primary; 1467 struct xe_gpu_scheduler *sched = &q->guc->sched; 1468 struct xe_guc *guc = exec_queue_to_guc(q); 1469 const char *process_name = "no process"; 1470 struct xe_device *xe = guc_to_xe(guc); 1471 int err = -ETIME; 1472 pid_t pid = -1; 1473 bool wedged = false, skip_timeout_check; 1474 1475 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1476 1477 primary = xe_exec_queue_multi_queue_primary(q); 1478 1479 /* 1480 * TDR has fired before free job worker. Common if exec queue 1481 * immediately closed after last fence signaled. Add back to pending 1482 * list so job can be freed and kick scheduler ensuring free job is not 1483 * lost. 1484 */ 1485 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1486 vf_recovery(guc)) 1487 return DRM_GPU_SCHED_STAT_NO_HANG; 1488 1489 /* Kill the run_job entry point */ 1490 if (xe_exec_queue_is_multi_queue(q)) 1491 xe_guc_exec_queue_group_stop(q); 1492 else 1493 xe_sched_submission_stop(sched); 1494 1495 /* Must check all state after stopping scheduler */ 1496 skip_timeout_check = exec_queue_reset(q) || 1497 exec_queue_killed_or_banned_or_wedged(q); 1498 1499 /* Skip timeout check if multi-queue group is banned */ 1500 if (xe_exec_queue_is_multi_queue(q) && 1501 READ_ONCE(q->multi_queue.group->banned)) 1502 skip_timeout_check = true; 1503 1504 /* LR jobs can only get here if queue has been killed or hit an error */ 1505 if (xe_exec_queue_is_lr(q)) 1506 xe_gt_assert(guc_to_gt(guc), skip_timeout_check); 1507 1508 /* 1509 * If devcoredump not captured and GuC capture for the job is not ready 1510 * do manual capture first and decide later if we need to use it 1511 */ 1512 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1513 !xe_guc_capture_get_matching_and_lock(q)) { 1514 /* take force wake before engine register manual capture */ 1515 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1516 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1517 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1518 1519 xe_engine_snapshot_capture_for_queue(q); 1520 } 1521 1522 /* 1523 * Check if job is actually timed out, if so restart job execution and TDR 1524 */ 1525 if (!skip_timeout_check && !check_timeout(q, job)) 1526 goto rearm; 1527 1528 if (!exec_queue_killed(q)) 1529 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1530 1531 set_exec_queue_banned(q); 1532 1533 /* Kick job / queue off hardware */ 1534 if (!wedged && (exec_queue_enabled(primary) || 1535 exec_queue_pending_disable(primary))) { 1536 int ret; 1537 1538 if (exec_queue_reset(primary)) 1539 err = -EIO; 1540 1541 if (xe_uc_fw_is_running(&guc->fw)) { 1542 /* 1543 * Wait for any pending G2H to flush out before 1544 * modifying state 1545 */ 1546 ret = wait_event_timeout(guc->ct.wq, 1547 (!exec_queue_pending_enable(primary) && 1548 !exec_queue_pending_disable(primary)) || 1549 xe_guc_read_stopped(guc) || 1550 vf_recovery(guc), HZ * 5); 1551 if (vf_recovery(guc)) 1552 goto handle_vf_resume; 1553 if (!ret || xe_guc_read_stopped(guc)) 1554 goto trigger_reset; 1555 1556 disable_scheduling(primary, skip_timeout_check); 1557 } 1558 1559 /* 1560 * Must wait for scheduling to be disabled before signalling 1561 * any fences, if GT broken the GT reset code should signal us. 1562 * 1563 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1564 * error) messages which can cause the schedule disable to get 1565 * lost. If this occurs, trigger a GT reset to recover. 1566 */ 1567 smp_rmb(); 1568 ret = wait_event_timeout(guc->ct.wq, 1569 !xe_uc_fw_is_running(&guc->fw) || 1570 !exec_queue_pending_disable(primary) || 1571 xe_guc_read_stopped(guc) || 1572 vf_recovery(guc), HZ * 5); 1573 if (vf_recovery(guc)) 1574 goto handle_vf_resume; 1575 if (!ret || xe_guc_read_stopped(guc)) { 1576 trigger_reset: 1577 if (!ret) 1578 xe_gt_warn(guc_to_gt(guc), 1579 "Schedule disable failed to respond, guc_id=%d", 1580 primary->guc->id); 1581 xe_devcoredump(primary, job, 1582 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1583 primary->guc->id, ret, xe_guc_read_stopped(guc)); 1584 xe_gt_reset_async(primary->gt); 1585 xe_sched_tdr_queue_imm(sched); 1586 goto rearm; 1587 } 1588 } 1589 1590 if (q->vm && q->vm->xef) { 1591 process_name = q->vm->xef->process_name; 1592 pid = q->vm->xef->pid; 1593 } 1594 1595 if (!exec_queue_killed(q)) 1596 xe_gt_notice(guc_to_gt(guc), 1597 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1598 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1599 q->guc->id, q->flags, process_name, pid); 1600 1601 trace_xe_sched_job_timedout(job); 1602 1603 if (!exec_queue_killed(q)) 1604 xe_devcoredump(q, job, 1605 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1606 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1607 q->guc->id, q->flags); 1608 1609 /* 1610 * Kernel jobs should never fail, nor should VM jobs if they do 1611 * somethings has gone wrong and the GT needs a reset 1612 */ 1613 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1614 "Kernel-submitted job timed out\n"); 1615 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1616 "VM job timed out on non-killed execqueue\n"); 1617 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1618 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1619 if (!xe_sched_invalidate_job(job, 2)) { 1620 xe_gt_reset_async(q->gt); 1621 goto rearm; 1622 } 1623 } 1624 1625 /* Mark all outstanding jobs as bad, thus completing them */ 1626 xe_sched_job_set_error(job, err); 1627 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) 1628 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); 1629 1630 if (xe_exec_queue_is_multi_queue(q)) { 1631 xe_guc_exec_queue_group_start(q); 1632 xe_guc_exec_queue_group_trigger_cleanup(q); 1633 } else { 1634 xe_sched_submission_start(sched); 1635 xe_guc_exec_queue_trigger_cleanup(q); 1636 } 1637 1638 /* 1639 * We want the job added back to the pending list so it gets freed; this 1640 * is what DRM_GPU_SCHED_STAT_NO_HANG does. 1641 */ 1642 return DRM_GPU_SCHED_STAT_NO_HANG; 1643 1644 rearm: 1645 /* 1646 * XXX: Ideally want to adjust timeout based on current execution time 1647 * but there is not currently an easy way to do in DRM scheduler. With 1648 * some thought, do this in a follow up. 1649 */ 1650 if (xe_exec_queue_is_multi_queue(q)) 1651 xe_guc_exec_queue_group_start(q); 1652 else 1653 xe_sched_submission_start(sched); 1654 handle_vf_resume: 1655 return DRM_GPU_SCHED_STAT_NO_HANG; 1656 } 1657 1658 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1659 { 1660 struct xe_guc_exec_queue *ge = q->guc; 1661 struct xe_guc *guc = exec_queue_to_guc(q); 1662 1663 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1664 struct xe_exec_queue_group *group = q->multi_queue.group; 1665 1666 mutex_lock(&group->list_lock); 1667 list_del(&q->multi_queue.link); 1668 mutex_unlock(&group->list_lock); 1669 } 1670 1671 release_guc_id(guc, q); 1672 xe_sched_entity_fini(&ge->entity); 1673 xe_sched_fini(&ge->sched); 1674 1675 /* 1676 * RCU free due sched being exported via DRM scheduler fences 1677 * (timeline name). 1678 */ 1679 kfree_rcu(ge, rcu); 1680 } 1681 1682 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1683 { 1684 struct xe_guc_exec_queue *ge = 1685 container_of(w, struct xe_guc_exec_queue, destroy_async); 1686 struct xe_exec_queue *q = ge->q; 1687 struct xe_guc *guc = exec_queue_to_guc(q); 1688 1689 guard(xe_pm_runtime)(guc_to_xe(guc)); 1690 trace_xe_exec_queue_destroy(q); 1691 1692 /* Confirm no work left behind accessing device structures */ 1693 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1694 1695 xe_exec_queue_fini(q); 1696 } 1697 1698 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1699 { 1700 struct xe_guc *guc = exec_queue_to_guc(q); 1701 struct xe_device *xe = guc_to_xe(guc); 1702 1703 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1704 1705 /* We must block on kernel engines so slabs are empty on driver unload */ 1706 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1707 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1708 else 1709 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1710 } 1711 1712 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1713 { 1714 /* 1715 * Might be done from within the GPU scheduler, need to do async as we 1716 * fini the scheduler when the engine is fini'd, the scheduler can't 1717 * complete fini within itself (circular dependency). Async resolves 1718 * this we and don't really care when everything is fini'd, just that it 1719 * is. 1720 */ 1721 guc_exec_queue_destroy_async(q); 1722 } 1723 1724 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1725 { 1726 struct xe_exec_queue *q = msg->private_data; 1727 struct xe_guc *guc = exec_queue_to_guc(q); 1728 1729 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1730 trace_xe_exec_queue_cleanup_entity(q); 1731 1732 /* 1733 * Expected state transitions for cleanup: 1734 * - If the exec queue is registered and GuC firmware is running, we must first 1735 * disable scheduling and deregister the queue to ensure proper teardown and 1736 * resource release in the GuC, then destroy the exec queue on driver side. 1737 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1738 * we cannot expect a response for the deregister request. In this case, 1739 * it is safe to directly destroy the exec queue on driver side, as the GuC 1740 * will not process further requests and all resources must be cleaned up locally. 1741 */ 1742 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1743 disable_scheduling_deregister(guc, q); 1744 else 1745 __guc_exec_queue_destroy(guc, q); 1746 } 1747 1748 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1749 { 1750 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1751 } 1752 1753 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1754 { 1755 struct xe_exec_queue *q = msg->private_data; 1756 struct xe_guc *guc = exec_queue_to_guc(q); 1757 1758 if (guc_exec_queue_allowed_to_change_state(q)) 1759 init_policies(guc, q); 1760 kfree(msg); 1761 } 1762 1763 static void __suspend_fence_signal(struct xe_exec_queue *q) 1764 { 1765 struct xe_guc *guc = exec_queue_to_guc(q); 1766 struct xe_device *xe = guc_to_xe(guc); 1767 1768 if (!q->guc->suspend_pending) 1769 return; 1770 1771 WRITE_ONCE(q->guc->suspend_pending, false); 1772 1773 /* 1774 * We use a GuC shared wait queue for VFs because the VF resfix start 1775 * interrupt must be able to wake all instances of suspend_wait. This 1776 * prevents the VF migration worker from being starved during 1777 * scheduling. 1778 */ 1779 if (IS_SRIOV_VF(xe)) 1780 wake_up_all(&guc->ct.wq); 1781 else 1782 wake_up(&q->guc->suspend_wait); 1783 } 1784 1785 static void suspend_fence_signal(struct xe_exec_queue *q) 1786 { 1787 struct xe_guc *guc = exec_queue_to_guc(q); 1788 1789 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1790 xe_guc_read_stopped(guc)); 1791 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1792 1793 __suspend_fence_signal(q); 1794 } 1795 1796 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1797 { 1798 struct xe_exec_queue *q = msg->private_data; 1799 struct xe_guc *guc = exec_queue_to_guc(q); 1800 1801 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1802 exec_queue_enabled(q)) { 1803 wait_event(guc->ct.wq, vf_recovery(guc) || 1804 ((q->guc->resume_time != RESUME_PENDING || 1805 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1806 1807 if (!xe_guc_read_stopped(guc)) { 1808 s64 since_resume_ms = 1809 ktime_ms_delta(ktime_get(), 1810 q->guc->resume_time); 1811 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1812 since_resume_ms; 1813 1814 if (wait_ms > 0 && q->guc->resume_time) 1815 xe_sleep_relaxed_ms(wait_ms); 1816 1817 set_exec_queue_suspended(q); 1818 disable_scheduling(q, false); 1819 } 1820 } else if (q->guc->suspend_pending) { 1821 set_exec_queue_suspended(q); 1822 suspend_fence_signal(q); 1823 } 1824 } 1825 1826 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1827 { 1828 struct xe_exec_queue *q = msg->private_data; 1829 1830 if (guc_exec_queue_allowed_to_change_state(q)) { 1831 clear_exec_queue_suspended(q); 1832 if (!exec_queue_enabled(q)) { 1833 q->guc->resume_time = RESUME_PENDING; 1834 set_exec_queue_pending_resume(q); 1835 enable_scheduling(q); 1836 } 1837 } else { 1838 clear_exec_queue_suspended(q); 1839 } 1840 } 1841 1842 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1843 { 1844 struct xe_exec_queue *q = msg->private_data; 1845 1846 if (guc_exec_queue_allowed_to_change_state(q)) { 1847 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 1848 struct xe_guc *guc = exec_queue_to_guc(q); 1849 struct xe_exec_queue_group *group = q->multi_queue.group; 1850 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 1851 int len = 0; 1852 1853 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 1854 action[len++] = group->primary->guc->id; 1855 1856 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 1857 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 1858 1859 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 1860 } 1861 1862 kfree(msg); 1863 } 1864 1865 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1866 #define SET_SCHED_PROPS 2 1867 #define SUSPEND 3 1868 #define RESUME 4 1869 #define SET_MULTI_QUEUE_PRIORITY 5 1870 #define OPCODE_MASK 0xf 1871 #define MSG_LOCKED BIT(8) 1872 #define MSG_HEAD BIT(9) 1873 1874 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1875 { 1876 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1877 1878 trace_xe_sched_msg_recv(msg); 1879 1880 switch (msg->opcode) { 1881 case CLEANUP: 1882 __guc_exec_queue_process_msg_cleanup(msg); 1883 break; 1884 case SET_SCHED_PROPS: 1885 __guc_exec_queue_process_msg_set_sched_props(msg); 1886 break; 1887 case SUSPEND: 1888 __guc_exec_queue_process_msg_suspend(msg); 1889 break; 1890 case RESUME: 1891 __guc_exec_queue_process_msg_resume(msg); 1892 break; 1893 case SET_MULTI_QUEUE_PRIORITY: 1894 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1895 break; 1896 default: 1897 XE_WARN_ON("Unknown message type"); 1898 } 1899 1900 xe_pm_runtime_put(xe); 1901 } 1902 1903 static const struct drm_sched_backend_ops drm_sched_ops = { 1904 .run_job = guc_exec_queue_run_job, 1905 .free_job = guc_exec_queue_free_job, 1906 .timedout_job = guc_exec_queue_timedout_job, 1907 }; 1908 1909 static const struct xe_sched_backend_ops xe_sched_ops = { 1910 .process_msg = guc_exec_queue_process_msg, 1911 }; 1912 1913 static int guc_exec_queue_init(struct xe_exec_queue *q) 1914 { 1915 struct xe_gpu_scheduler *sched; 1916 struct xe_guc *guc = exec_queue_to_guc(q); 1917 struct workqueue_struct *submit_wq = NULL; 1918 struct xe_guc_exec_queue *ge; 1919 long timeout; 1920 int err, i; 1921 1922 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1923 1924 ge = kzalloc_obj(*ge); 1925 if (!ge) 1926 return -ENOMEM; 1927 1928 q->guc = ge; 1929 ge->q = q; 1930 init_rcu_head(&ge->rcu); 1931 init_waitqueue_head(&ge->suspend_wait); 1932 1933 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1934 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1935 1936 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1937 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1938 1939 /* 1940 * Use primary queue's submit_wq for all secondary queues of a 1941 * multi queue group. This serialization avoids any locking around 1942 * CGP synchronization with GuC. 1943 */ 1944 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1945 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1946 1947 submit_wq = primary->guc->sched.base.submit_wq; 1948 } 1949 1950 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1951 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1952 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1953 q->name, gt_to_xe(q->gt)->drm.dev); 1954 if (err) 1955 goto err_free; 1956 1957 sched = &ge->sched; 1958 err = xe_sched_entity_init(&ge->entity, sched); 1959 if (err) 1960 goto err_sched; 1961 1962 mutex_lock(&guc->submission_state.lock); 1963 1964 err = alloc_guc_id(guc, q); 1965 if (err) 1966 goto err_entity; 1967 1968 q->entity = &ge->entity; 1969 1970 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 1971 xe_sched_stop(sched); 1972 1973 mutex_unlock(&guc->submission_state.lock); 1974 1975 xe_exec_queue_assign_name(q, q->guc->id); 1976 1977 /* 1978 * Maintain secondary queues of the multi queue group in a list 1979 * for handling dependencies across the queues in the group. 1980 */ 1981 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1982 struct xe_exec_queue_group *group = q->multi_queue.group; 1983 1984 INIT_LIST_HEAD(&q->multi_queue.link); 1985 mutex_lock(&group->list_lock); 1986 if (group->stopped) 1987 WRITE_ONCE(q->guc->sched.base.pause_submit, true); 1988 list_add_tail(&q->multi_queue.link, &group->list); 1989 mutex_unlock(&group->list_lock); 1990 } 1991 1992 if (xe_exec_queue_is_multi_queue(q)) 1993 trace_xe_exec_queue_create_multi_queue(q); 1994 else 1995 trace_xe_exec_queue_create(q); 1996 1997 return 0; 1998 1999 err_entity: 2000 mutex_unlock(&guc->submission_state.lock); 2001 xe_sched_entity_fini(&ge->entity); 2002 err_sched: 2003 xe_sched_fini(&ge->sched); 2004 err_free: 2005 kfree(ge); 2006 2007 return err; 2008 } 2009 2010 static void guc_exec_queue_kill(struct xe_exec_queue *q) 2011 { 2012 trace_xe_exec_queue_kill(q); 2013 set_exec_queue_killed(q); 2014 __suspend_fence_signal(q); 2015 xe_guc_exec_queue_trigger_cleanup(q); 2016 } 2017 2018 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 2019 u32 opcode) 2020 { 2021 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 2022 2023 INIT_LIST_HEAD(&msg->link); 2024 msg->opcode = opcode & OPCODE_MASK; 2025 msg->private_data = q; 2026 2027 trace_xe_sched_msg_add(msg); 2028 if (opcode & MSG_HEAD) 2029 xe_sched_add_msg_head(&q->guc->sched, msg); 2030 else if (opcode & MSG_LOCKED) 2031 xe_sched_add_msg_locked(&q->guc->sched, msg); 2032 else 2033 xe_sched_add_msg(&q->guc->sched, msg); 2034 } 2035 2036 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2037 struct xe_sched_msg *msg, 2038 u32 opcode) 2039 { 2040 if (!list_empty(&msg->link)) 2041 return; 2042 2043 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2044 } 2045 2046 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2047 struct xe_sched_msg *msg, 2048 u32 opcode) 2049 { 2050 if (!list_empty(&msg->link)) 2051 return false; 2052 2053 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2054 2055 return true; 2056 } 2057 2058 #define STATIC_MSG_CLEANUP 0 2059 #define STATIC_MSG_SUSPEND 1 2060 #define STATIC_MSG_RESUME 2 2061 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2062 { 2063 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2064 2065 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2066 guc_exec_queue_add_msg(q, msg, CLEANUP); 2067 else 2068 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2069 } 2070 2071 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2072 enum xe_exec_queue_priority priority) 2073 { 2074 struct xe_sched_msg *msg; 2075 2076 if (q->sched_props.priority == priority || 2077 exec_queue_killed_or_banned_or_wedged(q)) 2078 return 0; 2079 2080 msg = kmalloc_obj(*msg); 2081 if (!msg) 2082 return -ENOMEM; 2083 2084 q->sched_props.priority = priority; 2085 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2086 2087 return 0; 2088 } 2089 2090 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2091 { 2092 struct xe_sched_msg *msg; 2093 2094 if (q->sched_props.timeslice_us == timeslice_us || 2095 exec_queue_killed_or_banned_or_wedged(q)) 2096 return 0; 2097 2098 msg = kmalloc_obj(*msg); 2099 if (!msg) 2100 return -ENOMEM; 2101 2102 q->sched_props.timeslice_us = timeslice_us; 2103 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2104 2105 return 0; 2106 } 2107 2108 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2109 u32 preempt_timeout_us) 2110 { 2111 struct xe_sched_msg *msg; 2112 2113 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2114 exec_queue_killed_or_banned_or_wedged(q)) 2115 return 0; 2116 2117 msg = kmalloc_obj(*msg); 2118 if (!msg) 2119 return -ENOMEM; 2120 2121 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2122 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2123 2124 return 0; 2125 } 2126 2127 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2128 enum xe_multi_queue_priority priority) 2129 { 2130 struct xe_sched_msg *msg; 2131 2132 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2133 2134 if (exec_queue_killed_or_banned_or_wedged(q)) 2135 return 0; 2136 2137 msg = kmalloc_obj(*msg); 2138 if (!msg) 2139 return -ENOMEM; 2140 2141 scoped_guard(spinlock, &q->multi_queue.lock) { 2142 if (q->multi_queue.priority == priority) { 2143 kfree(msg); 2144 return 0; 2145 } 2146 2147 q->multi_queue.priority = priority; 2148 } 2149 2150 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2151 2152 return 0; 2153 } 2154 2155 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2156 { 2157 struct xe_gpu_scheduler *sched = &q->guc->sched; 2158 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2159 2160 if (exec_queue_killed_or_banned_or_wedged(q)) 2161 return -EINVAL; 2162 2163 xe_sched_msg_lock(sched); 2164 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2165 q->guc->suspend_pending = true; 2166 xe_sched_msg_unlock(sched); 2167 2168 return 0; 2169 } 2170 2171 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2172 { 2173 struct xe_guc *guc = exec_queue_to_guc(q); 2174 struct xe_device *xe = guc_to_xe(guc); 2175 int ret; 2176 2177 /* 2178 * Likely don't need to check exec_queue_killed() as we clear 2179 * suspend_pending upon kill but to be paranoid but races in which 2180 * suspend_pending is set after kill also check kill here. 2181 */ 2182 #define WAIT_COND \ 2183 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2184 xe_guc_read_stopped(guc)) 2185 2186 retry: 2187 if (IS_SRIOV_VF(xe)) 2188 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2189 vf_recovery(guc), 2190 HZ * 5); 2191 else 2192 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2193 WAIT_COND, HZ * 5); 2194 2195 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2196 return -EAGAIN; 2197 2198 if (!ret) { 2199 xe_gt_warn(guc_to_gt(guc), 2200 "Suspend fence, guc_id=%d, failed to respond", 2201 q->guc->id); 2202 /* XXX: Trigger GT reset? */ 2203 return -ETIME; 2204 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2205 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2206 goto retry; 2207 } 2208 2209 #undef WAIT_COND 2210 2211 return ret < 0 ? ret : 0; 2212 } 2213 2214 static void guc_exec_queue_resume(struct xe_exec_queue *q) 2215 { 2216 struct xe_gpu_scheduler *sched = &q->guc->sched; 2217 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2218 struct xe_guc *guc = exec_queue_to_guc(q); 2219 2220 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2221 2222 xe_sched_msg_lock(sched); 2223 guc_exec_queue_try_add_msg(q, msg, RESUME); 2224 xe_sched_msg_unlock(sched); 2225 } 2226 2227 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2228 { 2229 if (xe_exec_queue_is_multi_queue_secondary(q) && 2230 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2231 return true; 2232 2233 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2234 } 2235 2236 static bool guc_exec_queue_active(struct xe_exec_queue *q) 2237 { 2238 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 2239 2240 return exec_queue_enabled(primary) && 2241 !exec_queue_pending_disable(primary); 2242 } 2243 2244 /* 2245 * All of these functions are an abstraction layer which other parts of Xe can 2246 * use to trap into the GuC backend. All of these functions, aside from init, 2247 * really shouldn't do much other than trap into the DRM scheduler which 2248 * synchronizes these operations. 2249 */ 2250 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2251 .init = guc_exec_queue_init, 2252 .kill = guc_exec_queue_kill, 2253 .fini = guc_exec_queue_fini, 2254 .destroy = guc_exec_queue_destroy, 2255 .set_priority = guc_exec_queue_set_priority, 2256 .set_timeslice = guc_exec_queue_set_timeslice, 2257 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2258 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2259 .suspend = guc_exec_queue_suspend, 2260 .suspend_wait = guc_exec_queue_suspend_wait, 2261 .resume = guc_exec_queue_resume, 2262 .reset_status = guc_exec_queue_reset_status, 2263 .active = guc_exec_queue_active, 2264 }; 2265 2266 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2267 { 2268 struct xe_gpu_scheduler *sched = &q->guc->sched; 2269 bool do_destroy = false; 2270 2271 /* Stop scheduling + flush any DRM scheduler operations */ 2272 xe_sched_submission_stop(sched); 2273 2274 /* Clean up lost G2H + reset engine state */ 2275 if (exec_queue_registered(q)) { 2276 if (exec_queue_destroyed(q)) 2277 do_destroy = true; 2278 } 2279 if (q->guc->suspend_pending) { 2280 set_exec_queue_suspended(q); 2281 suspend_fence_signal(q); 2282 } 2283 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2284 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2285 EXEC_QUEUE_STATE_SUSPENDED, 2286 &q->guc->state); 2287 q->guc->resume_time = 0; 2288 trace_xe_exec_queue_stop(q); 2289 2290 /* 2291 * Ban any engine (aside from kernel and engines used for VM ops) with a 2292 * started but not complete job or if a job has gone through a GT reset 2293 * more than twice. 2294 */ 2295 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2296 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2297 bool ban = false; 2298 2299 if (job) { 2300 if ((xe_sched_job_started(job) && 2301 !xe_sched_job_completed(job)) || 2302 xe_sched_invalidate_job(job, 2)) { 2303 trace_xe_sched_job_ban(job); 2304 ban = true; 2305 } 2306 } 2307 2308 if (ban) { 2309 set_exec_queue_banned(q); 2310 xe_guc_exec_queue_trigger_cleanup(q); 2311 } 2312 } 2313 2314 if (do_destroy) 2315 __guc_exec_queue_destroy(guc, q); 2316 } 2317 2318 static int guc_submit_reset_prepare(struct xe_guc *guc) 2319 { 2320 int ret; 2321 2322 /* 2323 * Using an atomic here rather than submission_state.lock as this 2324 * function can be called while holding the CT lock (engine reset 2325 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2326 * Atomic is not ideal, but it works to prevent against concurrent reset 2327 * and releasing any TDRs waiting on guc->submission_state.stopped. 2328 */ 2329 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2330 smp_wmb(); 2331 wake_up_all(&guc->ct.wq); 2332 2333 return ret; 2334 } 2335 2336 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2337 { 2338 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2339 return 0; 2340 2341 if (!guc->submission_state.initialized) 2342 return 0; 2343 2344 return guc_submit_reset_prepare(guc); 2345 } 2346 2347 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2348 { 2349 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2350 !xe_guc_read_stopped(guc)); 2351 } 2352 2353 void xe_guc_submit_stop(struct xe_guc *guc) 2354 { 2355 struct xe_exec_queue *q; 2356 unsigned long index; 2357 2358 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2359 2360 mutex_lock(&guc->submission_state.lock); 2361 2362 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2363 /* Prevent redundant attempts to stop parallel queues */ 2364 if (q->guc->id != index) 2365 continue; 2366 2367 guc_exec_queue_stop(guc, q); 2368 } 2369 2370 mutex_unlock(&guc->submission_state.lock); 2371 2372 /* 2373 * No one can enter the backend at this point, aside from new engine 2374 * creation which is protected by guc->submission_state.lock. 2375 */ 2376 2377 } 2378 2379 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2380 struct xe_exec_queue *q) 2381 { 2382 bool pending_enable, pending_disable, pending_resume; 2383 2384 pending_enable = exec_queue_pending_enable(q); 2385 pending_resume = exec_queue_pending_resume(q); 2386 2387 if (pending_enable && pending_resume) { 2388 q->guc->needs_resume = true; 2389 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2390 q->guc->id); 2391 } 2392 2393 if (pending_enable && !pending_resume) { 2394 clear_exec_queue_registered(q); 2395 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2396 q->guc->id); 2397 } 2398 2399 if (pending_enable) { 2400 clear_exec_queue_enabled(q); 2401 clear_exec_queue_pending_resume(q); 2402 clear_exec_queue_pending_enable(q); 2403 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2404 q->guc->id); 2405 } 2406 2407 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2408 clear_exec_queue_destroyed(q); 2409 q->guc->needs_cleanup = true; 2410 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2411 q->guc->id); 2412 } 2413 2414 pending_disable = exec_queue_pending_disable(q); 2415 2416 if (pending_disable && exec_queue_suspended(q)) { 2417 clear_exec_queue_suspended(q); 2418 q->guc->needs_suspend = true; 2419 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2420 q->guc->id); 2421 } 2422 2423 if (pending_disable) { 2424 if (!pending_enable) 2425 set_exec_queue_enabled(q); 2426 clear_exec_queue_pending_disable(q); 2427 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2428 q->guc->id); 2429 } 2430 2431 q->guc->resume_time = 0; 2432 } 2433 2434 static void lrc_parallel_clear(struct xe_lrc *lrc) 2435 { 2436 struct xe_device *xe = gt_to_xe(lrc->gt); 2437 struct iosys_map map = xe_lrc_parallel_map(lrc); 2438 int i; 2439 2440 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2441 parallel_write(xe, map, wq[i], 2442 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2443 FIELD_PREP(WQ_LEN_MASK, 0)); 2444 } 2445 2446 /* 2447 * This function is quite complex but only real way to ensure no state is lost 2448 * during VF resume flows. The function scans the queue state, make adjustments 2449 * as needed, and queues jobs / messages which replayed upon unpause. 2450 */ 2451 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2452 { 2453 struct xe_gpu_scheduler *sched = &q->guc->sched; 2454 struct xe_sched_job *job; 2455 int i; 2456 2457 lockdep_assert_held(&guc->submission_state.lock); 2458 2459 /* Stop scheduling + flush any DRM scheduler operations */ 2460 xe_sched_submission_stop(sched); 2461 cancel_delayed_work_sync(&sched->base.work_tdr); 2462 2463 guc_exec_queue_revert_pending_state_change(guc, q); 2464 2465 if (xe_exec_queue_is_parallel(q)) { 2466 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2467 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2468 2469 /* 2470 * NOP existing WQ commands that may contain stale GGTT 2471 * addresses. These will be replayed upon unpause. The hardware 2472 * seems to get confused if the WQ head/tail pointers are 2473 * adjusted. 2474 */ 2475 if (lrc) 2476 lrc_parallel_clear(lrc); 2477 } 2478 2479 job = xe_sched_first_pending_job(sched); 2480 if (job) { 2481 job->restore_replay = true; 2482 2483 /* 2484 * Adjust software tail so jobs submitted overwrite previous 2485 * position in ring buffer with new GGTT addresses. 2486 */ 2487 for (i = 0; i < q->width; ++i) 2488 q->lrc[i]->ring.tail = job->ptrs[i].head; 2489 } 2490 } 2491 2492 /** 2493 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2494 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2495 */ 2496 void xe_guc_submit_pause(struct xe_guc *guc) 2497 { 2498 struct xe_exec_queue *q; 2499 unsigned long index; 2500 2501 mutex_lock(&guc->submission_state.lock); 2502 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2503 xe_sched_submission_stop(&q->guc->sched); 2504 mutex_unlock(&guc->submission_state.lock); 2505 } 2506 2507 /** 2508 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2509 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2510 */ 2511 void xe_guc_submit_pause_vf(struct xe_guc *guc) 2512 { 2513 struct xe_exec_queue *q; 2514 unsigned long index; 2515 2516 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2517 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2518 2519 mutex_lock(&guc->submission_state.lock); 2520 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2521 /* Prevent redundant attempts to stop parallel queues */ 2522 if (q->guc->id != index) 2523 continue; 2524 2525 guc_exec_queue_pause(guc, q); 2526 } 2527 mutex_unlock(&guc->submission_state.lock); 2528 } 2529 2530 static void guc_exec_queue_start(struct xe_exec_queue *q) 2531 { 2532 struct xe_gpu_scheduler *sched = &q->guc->sched; 2533 2534 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2535 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2536 int i; 2537 2538 trace_xe_exec_queue_resubmit(q); 2539 if (job) { 2540 for (i = 0; i < q->width; ++i) { 2541 /* 2542 * The GuC context is unregistered at this point 2543 * time, adjusting software ring tail ensures 2544 * jobs are rewritten in original placement, 2545 * adjusting LRC tail ensures the newly loaded 2546 * GuC / contexts only view the LRC tail 2547 * increasing as jobs are written out. 2548 */ 2549 q->lrc[i]->ring.tail = job->ptrs[i].head; 2550 xe_lrc_set_ring_tail(q->lrc[i], 2551 xe_lrc_ring_head(q->lrc[i])); 2552 } 2553 } 2554 xe_sched_resubmit_jobs(sched); 2555 } 2556 2557 xe_sched_submission_start(sched); 2558 xe_sched_submission_resume_tdr(sched); 2559 } 2560 2561 int xe_guc_submit_start(struct xe_guc *guc) 2562 { 2563 struct xe_exec_queue *q; 2564 unsigned long index; 2565 2566 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2567 2568 mutex_lock(&guc->submission_state.lock); 2569 atomic_dec(&guc->submission_state.stopped); 2570 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2571 /* Prevent redundant attempts to start parallel queues */ 2572 if (q->guc->id != index) 2573 continue; 2574 2575 guc_exec_queue_start(q); 2576 } 2577 mutex_unlock(&guc->submission_state.lock); 2578 2579 wake_up_all(&guc->ct.wq); 2580 2581 return 0; 2582 } 2583 2584 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2585 struct xe_exec_queue *q) 2586 { 2587 struct xe_gpu_scheduler *sched = &q->guc->sched; 2588 struct xe_sched_job *job = NULL; 2589 struct drm_sched_job *s_job; 2590 bool restore_replay = false; 2591 2592 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) { 2593 job = to_xe_sched_job(s_job); 2594 restore_replay |= job->restore_replay; 2595 if (restore_replay) { 2596 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2597 q->guc->id, xe_sched_job_seqno(job)); 2598 2599 q->ring_ops->emit_job(job); 2600 job->restore_replay = true; 2601 } 2602 } 2603 2604 if (job) 2605 job->last_replay = true; 2606 } 2607 2608 /** 2609 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2610 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2611 */ 2612 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2613 { 2614 struct xe_exec_queue *q; 2615 unsigned long index; 2616 2617 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2618 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2619 2620 mutex_lock(&guc->submission_state.lock); 2621 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2622 /* Prevent redundant attempts to stop parallel queues */ 2623 if (q->guc->id != index) 2624 continue; 2625 2626 guc_exec_queue_unpause_prepare(guc, q); 2627 } 2628 mutex_unlock(&guc->submission_state.lock); 2629 } 2630 2631 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2632 { 2633 struct xe_gpu_scheduler *sched = &q->guc->sched; 2634 struct xe_sched_msg *msg; 2635 2636 if (q->guc->needs_cleanup) { 2637 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2638 2639 guc_exec_queue_add_msg(q, msg, CLEANUP); 2640 q->guc->needs_cleanup = false; 2641 } 2642 2643 if (q->guc->needs_suspend) { 2644 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2645 2646 xe_sched_msg_lock(sched); 2647 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2648 xe_sched_msg_unlock(sched); 2649 2650 q->guc->needs_suspend = false; 2651 } 2652 2653 /* 2654 * The resume must be in the message queue before the suspend as it is 2655 * not possible for a resume to be issued if a suspend pending is, but 2656 * the inverse is possible. 2657 */ 2658 if (q->guc->needs_resume) { 2659 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2660 2661 xe_sched_msg_lock(sched); 2662 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2663 xe_sched_msg_unlock(sched); 2664 2665 q->guc->needs_resume = false; 2666 } 2667 } 2668 2669 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2670 { 2671 struct xe_gpu_scheduler *sched = &q->guc->sched; 2672 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2673 2674 lockdep_assert_held(&guc->submission_state.lock); 2675 2676 xe_sched_resubmit_jobs(sched); 2677 guc_exec_queue_replay_pending_state_change(q); 2678 xe_sched_submission_start(sched); 2679 if (needs_tdr) 2680 xe_guc_exec_queue_trigger_cleanup(q); 2681 xe_sched_submission_resume_tdr(sched); 2682 } 2683 2684 /** 2685 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2686 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2687 */ 2688 void xe_guc_submit_unpause(struct xe_guc *guc) 2689 { 2690 struct xe_exec_queue *q; 2691 unsigned long index; 2692 2693 mutex_lock(&guc->submission_state.lock); 2694 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2695 xe_sched_submission_start(&q->guc->sched); 2696 mutex_unlock(&guc->submission_state.lock); 2697 } 2698 2699 /** 2700 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2701 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2702 */ 2703 void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2704 { 2705 struct xe_exec_queue *q; 2706 unsigned long index; 2707 2708 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2709 2710 mutex_lock(&guc->submission_state.lock); 2711 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2712 /* 2713 * Prevent redundant attempts to stop parallel queues, or queues 2714 * created after resfix done. 2715 */ 2716 if (q->guc->id != index || 2717 !drm_sched_is_stopped(&q->guc->sched.base)) 2718 continue; 2719 2720 guc_exec_queue_unpause(guc, q); 2721 } 2722 mutex_unlock(&guc->submission_state.lock); 2723 } 2724 2725 /** 2726 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2727 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2728 */ 2729 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2730 { 2731 struct xe_exec_queue *q; 2732 unsigned long index; 2733 2734 mutex_lock(&guc->submission_state.lock); 2735 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2736 struct xe_gpu_scheduler *sched = &q->guc->sched; 2737 2738 /* Prevent redundant attempts to stop parallel queues */ 2739 if (q->guc->id != index) 2740 continue; 2741 2742 xe_sched_submission_start(sched); 2743 guc_exec_queue_kill(q); 2744 } 2745 mutex_unlock(&guc->submission_state.lock); 2746 } 2747 2748 static struct xe_exec_queue * 2749 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2750 { 2751 struct xe_gt *gt = guc_to_gt(guc); 2752 struct xe_exec_queue *q; 2753 2754 if (unlikely(guc_id >= GUC_ID_MAX)) { 2755 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2756 return NULL; 2757 } 2758 2759 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2760 if (unlikely(!q)) { 2761 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2762 return NULL; 2763 } 2764 2765 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2766 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2767 2768 return q; 2769 } 2770 2771 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2772 { 2773 u32 action[] = { 2774 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2775 q->guc->id, 2776 }; 2777 2778 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2779 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2780 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2781 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2782 2783 trace_xe_exec_queue_deregister(q); 2784 2785 if (xe_exec_queue_is_multi_queue_secondary(q)) 2786 handle_deregister_done(guc, q); 2787 else 2788 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2789 ARRAY_SIZE(action)); 2790 } 2791 2792 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2793 u32 runnable_state) 2794 { 2795 trace_xe_exec_queue_scheduling_done(q); 2796 2797 if (runnable_state == 1) { 2798 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2799 2800 q->guc->resume_time = ktime_get(); 2801 clear_exec_queue_pending_resume(q); 2802 clear_exec_queue_pending_enable(q); 2803 smp_wmb(); 2804 wake_up_all(&guc->ct.wq); 2805 } else { 2806 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2807 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2808 2809 if (q->guc->suspend_pending) { 2810 clear_exec_queue_pending_disable(q); 2811 suspend_fence_signal(q); 2812 } else { 2813 if (exec_queue_banned(q)) { 2814 smp_wmb(); 2815 wake_up_all(&guc->ct.wq); 2816 } 2817 if (exec_queue_destroyed(q)) { 2818 /* 2819 * Make sure to clear the pending_disable only 2820 * after sampling the destroyed state. We want 2821 * to ensure we don't trigger the unregister too 2822 * early with something intending to only 2823 * disable scheduling. The caller doing the 2824 * destroy must wait for an ongoing 2825 * pending_disable before marking as destroyed. 2826 */ 2827 clear_exec_queue_pending_disable(q); 2828 deregister_exec_queue(guc, q); 2829 } else { 2830 clear_exec_queue_pending_disable(q); 2831 } 2832 } 2833 } 2834 } 2835 2836 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2837 struct xe_exec_queue *q, 2838 u32 runnable_state) 2839 { 2840 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2841 mutex_lock(&guc->ct.lock); 2842 handle_sched_done(guc, q, runnable_state); 2843 mutex_unlock(&guc->ct.lock); 2844 } 2845 2846 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2847 { 2848 struct xe_exec_queue *q; 2849 u32 guc_id, runnable_state; 2850 2851 if (unlikely(len < 2)) 2852 return -EPROTO; 2853 2854 guc_id = msg[0]; 2855 runnable_state = msg[1]; 2856 2857 q = g2h_exec_queue_lookup(guc, guc_id); 2858 if (unlikely(!q)) 2859 return -EPROTO; 2860 2861 if (unlikely(!exec_queue_pending_enable(q) && 2862 !exec_queue_pending_disable(q))) { 2863 xe_gt_err(guc_to_gt(guc), 2864 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2865 atomic_read(&q->guc->state), q->guc->id, 2866 runnable_state); 2867 return -EPROTO; 2868 } 2869 2870 handle_sched_done(guc, q, runnable_state); 2871 2872 return 0; 2873 } 2874 2875 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2876 { 2877 trace_xe_exec_queue_deregister_done(q); 2878 2879 clear_exec_queue_registered(q); 2880 __guc_exec_queue_destroy(guc, q); 2881 } 2882 2883 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2884 { 2885 struct xe_exec_queue *q; 2886 u32 guc_id; 2887 2888 if (unlikely(len < 1)) 2889 return -EPROTO; 2890 2891 guc_id = msg[0]; 2892 2893 q = g2h_exec_queue_lookup(guc, guc_id); 2894 if (unlikely(!q)) 2895 return -EPROTO; 2896 2897 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2898 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2899 xe_gt_err(guc_to_gt(guc), 2900 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2901 atomic_read(&q->guc->state), q->guc->id); 2902 return -EPROTO; 2903 } 2904 2905 handle_deregister_done(guc, q); 2906 2907 return 0; 2908 } 2909 2910 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2911 { 2912 struct xe_gt *gt = guc_to_gt(guc); 2913 struct xe_exec_queue *q; 2914 u32 guc_id; 2915 2916 if (unlikely(len < 1)) 2917 return -EPROTO; 2918 2919 guc_id = msg[0]; 2920 2921 q = g2h_exec_queue_lookup(guc, guc_id); 2922 if (unlikely(!q)) 2923 return -EPROTO; 2924 2925 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", 2926 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, 2927 atomic_read(&q->guc->state)); 2928 2929 trace_xe_exec_queue_reset(q); 2930 2931 /* 2932 * A banned engine is a NOP at this point (came from 2933 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2934 * jobs by setting timeout of the job to the minimum value kicking 2935 * guc_exec_queue_timedout_job. 2936 */ 2937 xe_guc_exec_queue_reset_trigger_cleanup(q); 2938 2939 return 0; 2940 } 2941 2942 /* 2943 * xe_guc_error_capture_handler - Handler of GuC captured message 2944 * @guc: The GuC object 2945 * @msg: Point to the message 2946 * @len: The message length 2947 * 2948 * When GuC captured data is ready, GuC will send message 2949 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2950 * called 1st to check status before process the data comes with the message. 2951 * 2952 * Returns: error code. 0 if success 2953 */ 2954 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 2955 { 2956 u32 status; 2957 2958 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2959 return -EPROTO; 2960 2961 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2962 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 2963 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 2964 2965 xe_guc_capture_process(guc); 2966 2967 return 0; 2968 } 2969 2970 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 2971 u32 len) 2972 { 2973 struct xe_gt *gt = guc_to_gt(guc); 2974 struct xe_exec_queue *q; 2975 u32 guc_id; 2976 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 2977 2978 if (unlikely(!len || len > 2)) 2979 return -EPROTO; 2980 2981 guc_id = msg[0]; 2982 2983 if (len == 2) 2984 type = msg[1]; 2985 2986 if (guc_id == GUC_ID_UNKNOWN) { 2987 /* 2988 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 2989 * context. In such case only PF will be notified about that fault. 2990 */ 2991 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 2992 return 0; 2993 } 2994 2995 q = g2h_exec_queue_lookup(guc, guc_id); 2996 if (unlikely(!q)) 2997 return -EPROTO; 2998 2999 /* 3000 * The type is HW-defined and changes based on platform, so we don't 3001 * decode it in the kernel and only check if it is valid. 3002 * See bspec 54047 and 72187 for details. 3003 */ 3004 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 3005 xe_gt_info(gt, 3006 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 3007 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3008 else 3009 xe_gt_info(gt, 3010 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 3011 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3012 3013 trace_xe_exec_queue_memory_cat_error(q); 3014 3015 /* Treat the same as engine reset */ 3016 xe_guc_exec_queue_reset_trigger_cleanup(q); 3017 3018 return 0; 3019 } 3020 3021 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 3022 { 3023 struct xe_gt *gt = guc_to_gt(guc); 3024 u8 guc_class, instance; 3025 u32 reason; 3026 3027 if (unlikely(len != 3)) 3028 return -EPROTO; 3029 3030 guc_class = msg[0]; 3031 instance = msg[1]; 3032 reason = msg[2]; 3033 3034 /* Unexpected failure of a hardware feature, log an actual error */ 3035 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 3036 guc_class, instance, reason); 3037 3038 xe_gt_reset_async(gt); 3039 3040 return 0; 3041 } 3042 3043 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, 3044 u32 len) 3045 { 3046 struct xe_gt *gt = guc_to_gt(guc); 3047 struct xe_device *xe = guc_to_xe(guc); 3048 struct xe_exec_queue *q; 3049 u32 guc_id = msg[2]; 3050 3051 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { 3052 drm_err(&xe->drm, "Invalid length %u", len); 3053 return -EPROTO; 3054 } 3055 3056 q = g2h_exec_queue_lookup(guc, guc_id); 3057 if (unlikely(!q)) 3058 return -EPROTO; 3059 3060 xe_gt_dbg(gt, 3061 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", 3062 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); 3063 3064 trace_xe_exec_queue_cgp_context_error(q); 3065 3066 /* Treat the same as engine reset */ 3067 xe_guc_exec_queue_reset_trigger_cleanup(q); 3068 3069 return 0; 3070 } 3071 3072 /** 3073 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3074 * @guc: guc 3075 * @msg: message indicating CGP sync done 3076 * @len: length of message 3077 * 3078 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3079 * for CGP synchronization to complete. 3080 * 3081 * Return: 0 on success, -EPROTO for malformed messages. 3082 */ 3083 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3084 { 3085 struct xe_device *xe = guc_to_xe(guc); 3086 struct xe_exec_queue *q; 3087 u32 guc_id = msg[0]; 3088 3089 if (unlikely(len < 1)) { 3090 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3091 return -EPROTO; 3092 } 3093 3094 q = g2h_exec_queue_lookup(guc, guc_id); 3095 if (unlikely(!q)) 3096 return -EPROTO; 3097 3098 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3099 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3100 return -EPROTO; 3101 } 3102 3103 /* Wakeup the serialized cgp update wait */ 3104 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3105 xe_guc_ct_wake_waiters(&guc->ct); 3106 3107 return 0; 3108 } 3109 3110 static void 3111 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3112 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3113 { 3114 struct xe_guc *guc = exec_queue_to_guc(q); 3115 struct xe_device *xe = guc_to_xe(guc); 3116 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3117 int i; 3118 3119 snapshot->guc.wqi_head = q->guc->wqi_head; 3120 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3121 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3122 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3123 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3124 wq_desc.wq_status); 3125 3126 if (snapshot->parallel.wq_desc.head != 3127 snapshot->parallel.wq_desc.tail) { 3128 for (i = snapshot->parallel.wq_desc.head; 3129 i != snapshot->parallel.wq_desc.tail; 3130 i = (i + sizeof(u32)) % WQ_SIZE) 3131 snapshot->parallel.wq[i / sizeof(u32)] = 3132 parallel_read(xe, map, wq[i / sizeof(u32)]); 3133 } 3134 } 3135 3136 static void 3137 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3138 struct drm_printer *p) 3139 { 3140 int i; 3141 3142 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3143 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3144 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3145 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3146 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3147 3148 if (snapshot->parallel.wq_desc.head != 3149 snapshot->parallel.wq_desc.tail) { 3150 for (i = snapshot->parallel.wq_desc.head; 3151 i != snapshot->parallel.wq_desc.tail; 3152 i = (i + sizeof(u32)) % WQ_SIZE) 3153 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3154 snapshot->parallel.wq[i / sizeof(u32)]); 3155 } 3156 } 3157 3158 /** 3159 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3160 * @q: faulty exec queue 3161 * 3162 * This can be printed out in a later stage like during dev_coredump 3163 * analysis. 3164 * 3165 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3166 * caller, using `xe_guc_exec_queue_snapshot_free`. 3167 */ 3168 struct xe_guc_submit_exec_queue_snapshot * 3169 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3170 { 3171 struct xe_gpu_scheduler *sched = &q->guc->sched; 3172 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3173 int i; 3174 3175 snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC); 3176 3177 if (!snapshot) 3178 return NULL; 3179 3180 snapshot->guc.id = q->guc->id; 3181 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3182 snapshot->class = q->class; 3183 snapshot->logical_mask = q->logical_mask; 3184 snapshot->width = q->width; 3185 snapshot->refcount = kref_read(&q->refcount); 3186 snapshot->sched_timeout = sched->base.timeout; 3187 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3188 snapshot->sched_props.preempt_timeout_us = 3189 q->sched_props.preempt_timeout_us; 3190 3191 snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width, 3192 GFP_ATOMIC); 3193 3194 if (snapshot->lrc) { 3195 for (i = 0; i < q->width; ++i) { 3196 struct xe_lrc *lrc = q->lrc[i]; 3197 3198 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3199 } 3200 } 3201 3202 snapshot->schedule_state = atomic_read(&q->guc->state); 3203 snapshot->exec_queue_flags = q->flags; 3204 3205 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3206 if (snapshot->parallel_execution) 3207 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3208 3209 if (xe_exec_queue_is_multi_queue(q)) { 3210 snapshot->multi_queue.valid = true; 3211 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3212 snapshot->multi_queue.pos = q->multi_queue.pos; 3213 } 3214 3215 return snapshot; 3216 } 3217 3218 /** 3219 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3220 * @snapshot: Previously captured snapshot of job. 3221 * 3222 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3223 */ 3224 void 3225 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3226 { 3227 int i; 3228 3229 if (!snapshot || !snapshot->lrc) 3230 return; 3231 3232 for (i = 0; i < snapshot->width; ++i) 3233 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3234 } 3235 3236 /** 3237 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3238 * @snapshot: GuC Submit Engine snapshot object. 3239 * @p: drm_printer where it will be printed out. 3240 * 3241 * This function prints out a given GuC Submit Engine snapshot object. 3242 */ 3243 void 3244 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3245 struct drm_printer *p) 3246 { 3247 int i; 3248 3249 if (!snapshot) 3250 return; 3251 3252 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3253 drm_printf(p, "\tName: %s\n", snapshot->name); 3254 drm_printf(p, "\tClass: %d\n", snapshot->class); 3255 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3256 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3257 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3258 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3259 drm_printf(p, "\tTimeslice: %u (us)\n", 3260 snapshot->sched_props.timeslice_us); 3261 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3262 snapshot->sched_props.preempt_timeout_us); 3263 3264 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3265 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3266 3267 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3268 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3269 3270 if (snapshot->parallel_execution) 3271 guc_exec_queue_wq_snapshot_print(snapshot, p); 3272 3273 if (snapshot->multi_queue.valid) { 3274 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3275 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3276 } 3277 } 3278 3279 /** 3280 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3281 * snapshot. 3282 * @snapshot: GuC Submit Engine snapshot object. 3283 * 3284 * This function free all the memory that needed to be allocated at capture 3285 * time. 3286 */ 3287 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3288 { 3289 int i; 3290 3291 if (!snapshot) 3292 return; 3293 3294 if (snapshot->lrc) { 3295 for (i = 0; i < snapshot->width; i++) 3296 xe_lrc_snapshot_free(snapshot->lrc[i]); 3297 kfree(snapshot->lrc); 3298 } 3299 kfree(snapshot); 3300 } 3301 3302 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3303 { 3304 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3305 3306 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3307 xe_guc_exec_queue_snapshot_print(snapshot, p); 3308 xe_guc_exec_queue_snapshot_free(snapshot); 3309 } 3310 3311 /** 3312 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3313 * @q: Execution queue 3314 * @ctx_type: Type of the context 3315 * 3316 * This function registers the execution queue with the guc. Special context 3317 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3318 * are only applicable for IGPU and in the VF. 3319 * Submits the execution queue to GUC after registering it. 3320 * 3321 * Returns - None. 3322 */ 3323 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3324 { 3325 struct xe_guc *guc = exec_queue_to_guc(q); 3326 struct xe_device *xe = guc_to_xe(guc); 3327 struct xe_gt *gt = guc_to_gt(guc); 3328 3329 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3330 xe_gt_assert(gt, !IS_DGFX(xe)); 3331 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3332 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3333 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3334 3335 register_exec_queue(q, ctx_type); 3336 enable_scheduling(q); 3337 } 3338 3339 /** 3340 * xe_guc_submit_print - GuC Submit Print. 3341 * @guc: GuC. 3342 * @p: drm_printer where it will be printed out. 3343 * 3344 * This function capture and prints snapshots of **all** GuC Engines. 3345 */ 3346 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3347 { 3348 struct xe_exec_queue *q; 3349 unsigned long index; 3350 3351 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3352 return; 3353 3354 mutex_lock(&guc->submission_state.lock); 3355 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3356 guc_exec_queue_print(q, p); 3357 mutex_unlock(&guc->submission_state.lock); 3358 } 3359 3360 /** 3361 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues 3362 * registered with the GuC 3363 * @guc: GuC. 3364 * 3365 * Return: true if any MLRC queue is registered with the GuC, false otherwise. 3366 */ 3367 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) 3368 { 3369 struct xe_exec_queue *q; 3370 unsigned long index; 3371 3372 guard(mutex)(&guc->submission_state.lock); 3373 3374 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3375 if (q->width > 1) 3376 return true; 3377 3378 return false; 3379 } 3380 3381 /** 3382 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3383 * exec queues registered to given GuC. 3384 * @guc: the &xe_guc struct instance 3385 * @scratch: scratch buffer to be used as temporary storage 3386 * 3387 * Returns: zero on success, negative error code on failure. 3388 */ 3389 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3390 { 3391 struct xe_exec_queue *q; 3392 unsigned long index; 3393 int err = 0; 3394 3395 mutex_lock(&guc->submission_state.lock); 3396 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3397 /* Prevent redundant attempts to stop parallel queues */ 3398 if (q->guc->id != index) 3399 continue; 3400 3401 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3402 if (err) 3403 break; 3404 } 3405 mutex_unlock(&guc->submission_state.lock); 3406 3407 return err; 3408 } 3409