1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/dma-fence-array.h> 12 13 #include <drm/drm_managed.h> 14 15 #include "abi/guc_actions_abi.h" 16 #include "abi/guc_actions_slpc_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "xe_assert.h" 19 #include "xe_bo.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_gt_clock.h" 27 #include "xe_gt_printk.h" 28 #include "xe_guc.h" 29 #include "xe_guc_capture.h" 30 #include "xe_guc_ct.h" 31 #include "xe_guc_exec_queue_types.h" 32 #include "xe_guc_id_mgr.h" 33 #include "xe_guc_klv_helpers.h" 34 #include "xe_guc_submit_types.h" 35 #include "xe_hw_engine.h" 36 #include "xe_lrc.h" 37 #include "xe_macros.h" 38 #include "xe_map.h" 39 #include "xe_mocs.h" 40 #include "xe_pm.h" 41 #include "xe_ring_ops_types.h" 42 #include "xe_sched_job.h" 43 #include "xe_sleep.h" 44 #include "xe_trace.h" 45 #include "xe_uc_fw.h" 46 #include "xe_vm.h" 47 48 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 49 50 static int guc_submit_reset_prepare(struct xe_guc *guc); 51 52 static struct xe_guc * 53 exec_queue_to_guc(struct xe_exec_queue *q) 54 { 55 return &q->gt->uc.guc; 56 } 57 58 /* 59 * Helpers for engine state, using an atomic as some of the bits can transition 60 * as the same time (e.g. a suspend can be happning at the same time as schedule 61 * engine done being processed). 62 */ 63 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 64 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 65 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 66 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 67 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 68 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 69 #define EXEC_QUEUE_STATE_RESET (1 << 6) 70 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 71 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 72 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 73 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) 74 75 static bool exec_queue_registered(struct xe_exec_queue *q) 76 { 77 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 78 } 79 80 static void set_exec_queue_registered(struct xe_exec_queue *q) 81 { 82 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 83 } 84 85 static void clear_exec_queue_registered(struct xe_exec_queue *q) 86 { 87 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 88 } 89 90 static bool exec_queue_enabled(struct xe_exec_queue *q) 91 { 92 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 93 } 94 95 static void set_exec_queue_enabled(struct xe_exec_queue *q) 96 { 97 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 98 } 99 100 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 101 { 102 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 103 } 104 105 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 106 { 107 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 108 } 109 110 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 111 { 112 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 113 } 114 115 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 116 { 117 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 118 } 119 120 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 121 { 122 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 123 } 124 125 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 126 { 127 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 128 } 129 130 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 131 { 132 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 133 } 134 135 static bool exec_queue_destroyed(struct xe_exec_queue *q) 136 { 137 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 138 } 139 140 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 141 { 142 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 143 } 144 145 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 146 { 147 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 148 } 149 150 static bool exec_queue_banned(struct xe_exec_queue *q) 151 { 152 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 153 } 154 155 static void set_exec_queue_banned(struct xe_exec_queue *q) 156 { 157 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 158 } 159 160 static void clear_exec_queue_banned(struct xe_exec_queue *q) 161 { 162 atomic_andnot(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 163 } 164 165 static bool exec_queue_suspended(struct xe_exec_queue *q) 166 { 167 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 168 } 169 170 static void set_exec_queue_suspended(struct xe_exec_queue *q) 171 { 172 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 173 } 174 175 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 176 { 177 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 178 } 179 180 static bool exec_queue_reset(struct xe_exec_queue *q) 181 { 182 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 183 } 184 185 static void set_exec_queue_reset(struct xe_exec_queue *q) 186 { 187 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 188 } 189 190 static bool exec_queue_killed(struct xe_exec_queue *q) 191 { 192 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 193 } 194 195 static void set_exec_queue_killed(struct xe_exec_queue *q) 196 { 197 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 198 } 199 200 static bool exec_queue_wedged(struct xe_exec_queue *q) 201 { 202 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 203 } 204 205 static void set_exec_queue_wedged(struct xe_exec_queue *q) 206 { 207 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 208 } 209 210 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 211 { 212 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 213 } 214 215 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 216 { 217 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 218 } 219 220 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 221 { 222 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 223 } 224 225 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 226 { 227 return (atomic_read(&q->guc->state) & 228 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 229 EXEC_QUEUE_STATE_BANNED)); 230 } 231 232 static void guc_submit_sw_fini(struct drm_device *drm, void *arg) 233 { 234 struct xe_guc *guc = arg; 235 struct xe_device *xe = guc_to_xe(guc); 236 struct xe_gt *gt = guc_to_gt(guc); 237 int ret; 238 239 ret = wait_event_timeout(guc->submission_state.fini_wq, 240 xa_empty(&guc->submission_state.exec_queue_lookup), 241 HZ * 5); 242 243 drain_workqueue(xe->destroy_wq); 244 245 xe_gt_assert(gt, ret); 246 247 xa_destroy(&guc->submission_state.exec_queue_lookup); 248 } 249 250 static void guc_submit_fini(void *arg) 251 { 252 struct xe_guc *guc = arg; 253 struct xe_exec_queue *q; 254 unsigned long index; 255 256 /* Drop any wedged queue refs */ 257 mutex_lock(&guc->submission_state.lock); 258 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 259 if (exec_queue_wedged(q)) { 260 mutex_unlock(&guc->submission_state.lock); 261 xe_exec_queue_put(q); 262 mutex_lock(&guc->submission_state.lock); 263 } 264 } 265 mutex_unlock(&guc->submission_state.lock); 266 267 /* Forcefully kill any remaining exec queues */ 268 xe_guc_ct_stop(&guc->ct); 269 guc_submit_reset_prepare(guc); 270 xe_guc_softreset(guc); 271 xe_guc_submit_stop(guc); 272 xe_uc_fw_sanitize(&guc->fw); 273 xe_guc_submit_pause_abort(guc); 274 } 275 276 static const struct xe_exec_queue_ops guc_exec_queue_ops; 277 278 static void primelockdep(struct xe_guc *guc) 279 { 280 if (!IS_ENABLED(CONFIG_LOCKDEP)) 281 return; 282 283 fs_reclaim_acquire(GFP_KERNEL); 284 285 mutex_lock(&guc->submission_state.lock); 286 mutex_unlock(&guc->submission_state.lock); 287 288 fs_reclaim_release(GFP_KERNEL); 289 } 290 291 /** 292 * xe_guc_submit_init() - Initialize GuC submission. 293 * @guc: the &xe_guc to initialize 294 * @num_ids: number of GuC context IDs to use 295 * 296 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 297 * GuC context IDs supported by the GuC firmware should be used for submission. 298 * 299 * Only VF drivers will have to provide explicit number of GuC context IDs 300 * that they can use for submission. 301 * 302 * Return: 0 on success or a negative error code on failure. 303 */ 304 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 305 { 306 struct xe_device *xe = guc_to_xe(guc); 307 struct xe_gt *gt = guc_to_gt(guc); 308 int err; 309 310 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 311 if (err) 312 return err; 313 314 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 315 if (err) 316 return err; 317 318 gt->exec_queue_ops = &guc_exec_queue_ops; 319 320 xa_init(&guc->submission_state.exec_queue_lookup); 321 322 init_waitqueue_head(&guc->submission_state.fini_wq); 323 324 primelockdep(guc); 325 326 guc->submission_state.initialized = true; 327 328 err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); 329 if (err) 330 return err; 331 332 return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); 333 } 334 335 /* 336 * Given that we want to guarantee enough RCS throughput to avoid missing 337 * frames, we set the yield policy to 20% of each 80ms interval. 338 */ 339 #define RC_YIELD_DURATION 80 /* in ms */ 340 #define RC_YIELD_RATIO 20 /* in percent */ 341 static u32 *emit_render_compute_yield_klv(u32 *emit) 342 { 343 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 344 *emit++ = RC_YIELD_DURATION; 345 *emit++ = RC_YIELD_RATIO; 346 347 return emit; 348 } 349 350 #define SCHEDULING_POLICY_MAX_DWORDS 16 351 static int guc_init_global_schedule_policy(struct xe_guc *guc) 352 { 353 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 354 u32 *emit = data; 355 u32 count = 0; 356 int ret; 357 358 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 359 return 0; 360 361 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 362 363 if (CCS_INSTANCES(guc_to_gt(guc))) 364 emit = emit_render_compute_yield_klv(emit); 365 366 count = emit - data; 367 if (count > 1) { 368 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 369 370 ret = xe_guc_ct_send_block(&guc->ct, data, count); 371 if (ret < 0) { 372 xe_gt_err(guc_to_gt(guc), 373 "failed to enable GuC scheduling policies: %pe\n", 374 ERR_PTR(ret)); 375 return ret; 376 } 377 } 378 379 return 0; 380 } 381 382 int xe_guc_submit_enable(struct xe_guc *guc) 383 { 384 int ret; 385 386 ret = guc_init_global_schedule_policy(guc); 387 if (ret) 388 return ret; 389 390 guc->submission_state.enabled = true; 391 392 return 0; 393 } 394 395 void xe_guc_submit_disable(struct xe_guc *guc) 396 { 397 guc->submission_state.enabled = false; 398 } 399 400 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 401 { 402 int i; 403 404 lockdep_assert_held(&guc->submission_state.lock); 405 406 for (i = 0; i < xa_count; ++i) 407 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 408 409 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 410 q->guc->id, q->width); 411 412 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 413 wake_up(&guc->submission_state.fini_wq); 414 } 415 416 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 417 { 418 int ret; 419 int i; 420 421 /* 422 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 423 * worse case user gets -ENOMEM on engine create and has to try again. 424 * 425 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 426 * failure. 427 */ 428 lockdep_assert_held(&guc->submission_state.lock); 429 430 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 431 q->width); 432 if (ret < 0) 433 return ret; 434 435 q->guc->id = ret; 436 437 for (i = 0; i < q->width; ++i) { 438 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 439 q->guc->id + i, q, GFP_NOWAIT)); 440 if (ret) 441 goto err_release; 442 } 443 444 return 0; 445 446 err_release: 447 __release_guc_id(guc, q, i); 448 449 return ret; 450 } 451 452 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 453 { 454 mutex_lock(&guc->submission_state.lock); 455 __release_guc_id(guc, q, q->width); 456 mutex_unlock(&guc->submission_state.lock); 457 } 458 459 struct exec_queue_policy { 460 u32 count; 461 struct guc_update_exec_queue_policy h2g; 462 }; 463 464 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 465 { 466 size_t bytes = sizeof(policy->h2g.header) + 467 (sizeof(policy->h2g.klv[0]) * policy->count); 468 469 return bytes / sizeof(u32); 470 } 471 472 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 473 u16 guc_id) 474 { 475 policy->h2g.header.action = 476 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 477 policy->h2g.header.guc_id = guc_id; 478 policy->count = 0; 479 } 480 481 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 482 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 483 u32 data) \ 484 { \ 485 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 486 \ 487 policy->h2g.klv[policy->count].kl = \ 488 FIELD_PREP(GUC_KLV_0_KEY, \ 489 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 490 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 491 policy->h2g.klv[policy->count].value = data; \ 492 policy->count++; \ 493 } 494 495 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 496 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 497 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 498 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 499 #undef MAKE_EXEC_QUEUE_POLICY_ADD 500 501 static const int xe_exec_queue_prio_to_guc[] = { 502 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 503 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 504 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 505 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 506 }; 507 508 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 509 { 510 struct exec_queue_policy policy; 511 enum xe_exec_queue_priority prio = q->sched_props.priority; 512 u32 timeslice_us = q->sched_props.timeslice_us; 513 u32 slpc_exec_queue_freq_req = 0; 514 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 515 516 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 517 !xe_exec_queue_is_multi_queue_secondary(q)); 518 519 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 520 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 521 522 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 523 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 524 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 525 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 526 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 527 slpc_exec_queue_freq_req); 528 529 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 530 __guc_exec_queue_policy_action_size(&policy), 0, 0); 531 } 532 533 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 534 { 535 struct exec_queue_policy policy; 536 537 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 538 539 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 540 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 541 542 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 543 __guc_exec_queue_policy_action_size(&policy), 0, 0); 544 } 545 546 static bool vf_recovery(struct xe_guc *guc) 547 { 548 return xe_gt_recovery_pending(guc_to_gt(guc)); 549 } 550 551 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 552 { 553 struct xe_guc *guc = exec_queue_to_guc(q); 554 struct xe_device *xe = guc_to_xe(guc); 555 556 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 557 wake_up_all(&xe->ufence_wq); 558 559 xe_sched_tdr_queue_imm(&q->guc->sched); 560 } 561 562 static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q) 563 { 564 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 565 struct xe_exec_queue_group *group = q->multi_queue.group; 566 struct xe_exec_queue *eq, *next; 567 LIST_HEAD(tmp); 568 569 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 570 xe_exec_queue_is_multi_queue(q)); 571 572 mutex_lock(&group->list_lock); 573 574 /* 575 * Stop all future queues being from executing while group is stopped. 576 */ 577 group->stopped = true; 578 579 list_for_each_entry_safe(eq, next, &group->list, multi_queue.link) 580 /* 581 * Refcount prevents an attempted removal from &group->list, 582 * temporary list allows safe iteration after dropping 583 * &group->list_lock. 584 */ 585 if (xe_exec_queue_get_unless_zero(eq)) 586 list_move_tail(&eq->multi_queue.link, &tmp); 587 588 mutex_unlock(&group->list_lock); 589 590 /* We cannot stop under list lock without getting inversions */ 591 xe_sched_submission_stop(&primary->guc->sched); 592 list_for_each_entry(eq, &tmp, multi_queue.link) 593 xe_sched_submission_stop(&eq->guc->sched); 594 595 mutex_lock(&group->list_lock); 596 list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) { 597 /* 598 * Corner where we got banned while stopping and not on 599 * &group->list 600 */ 601 if (READ_ONCE(group->banned)) 602 xe_guc_exec_queue_trigger_cleanup(eq); 603 604 list_move_tail(&eq->multi_queue.link, &group->list); 605 xe_exec_queue_put(eq); 606 } 607 mutex_unlock(&group->list_lock); 608 } 609 610 static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q) 611 { 612 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 613 struct xe_exec_queue_group *group = q->multi_queue.group; 614 struct xe_exec_queue *eq; 615 616 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 617 xe_exec_queue_is_multi_queue(q)); 618 619 xe_sched_submission_start(&primary->guc->sched); 620 621 mutex_lock(&group->list_lock); 622 group->stopped = false; 623 list_for_each_entry(eq, &group->list, multi_queue.link) 624 xe_sched_submission_start(&eq->guc->sched); 625 mutex_unlock(&group->list_lock); 626 } 627 628 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) 629 { 630 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 631 struct xe_exec_queue_group *group = q->multi_queue.group; 632 struct xe_exec_queue *eq; 633 634 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 635 xe_exec_queue_is_multi_queue(q)); 636 637 /* Group banned, skip timeout check in TDR */ 638 WRITE_ONCE(group->banned, true); 639 xe_guc_exec_queue_trigger_cleanup(primary); 640 641 mutex_lock(&group->list_lock); 642 list_for_each_entry(eq, &group->list, multi_queue.link) 643 xe_guc_exec_queue_trigger_cleanup(eq); 644 mutex_unlock(&group->list_lock); 645 } 646 647 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 648 { 649 if (xe_exec_queue_is_multi_queue(q)) { 650 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 651 struct xe_exec_queue_group *group = q->multi_queue.group; 652 struct xe_exec_queue *eq; 653 654 /* Group banned, skip timeout check in TDR */ 655 WRITE_ONCE(group->banned, true); 656 657 set_exec_queue_reset(primary); 658 if (!exec_queue_banned(primary)) 659 xe_guc_exec_queue_trigger_cleanup(primary); 660 661 mutex_lock(&group->list_lock); 662 list_for_each_entry(eq, &group->list, multi_queue.link) { 663 set_exec_queue_reset(eq); 664 if (!exec_queue_banned(eq)) 665 xe_guc_exec_queue_trigger_cleanup(eq); 666 } 667 mutex_unlock(&group->list_lock); 668 } else { 669 set_exec_queue_reset(q); 670 if (!exec_queue_banned(q)) 671 xe_guc_exec_queue_trigger_cleanup(q); 672 } 673 } 674 675 static void set_exec_queue_group_banned(struct xe_exec_queue *q) 676 { 677 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 678 struct xe_exec_queue_group *group = q->multi_queue.group; 679 struct xe_exec_queue *eq; 680 681 /* Ban all queues of the multi-queue group */ 682 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 683 xe_exec_queue_is_multi_queue(q)); 684 set_exec_queue_banned(primary); 685 686 mutex_lock(&group->list_lock); 687 list_for_each_entry(eq, &group->list, multi_queue.link) 688 set_exec_queue_banned(eq); 689 mutex_unlock(&group->list_lock); 690 } 691 692 /* Helper for context registration H2G */ 693 struct guc_ctxt_registration_info { 694 u32 flags; 695 u32 context_idx; 696 u32 engine_class; 697 u32 engine_submit_mask; 698 u32 wq_desc_lo; 699 u32 wq_desc_hi; 700 u32 wq_base_lo; 701 u32 wq_base_hi; 702 u32 wq_size; 703 u32 cgp_lo; 704 u32 cgp_hi; 705 u32 hwlrca_lo; 706 u32 hwlrca_hi; 707 }; 708 709 #define parallel_read(xe_, map_, field_) \ 710 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 711 field_) 712 #define parallel_write(xe_, map_, field_, val_) \ 713 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 714 field_, val_) 715 716 /** 717 * DOC: Multi Queue Group GuC interface 718 * 719 * The multi queue group coordination between KMD and GuC is through a software 720 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page 721 * allocated in the global GTT. 722 * 723 * CGP format: 724 * 725 * +-----------+---------------------------+---------------------------------------------+ 726 * | DWORD | Name | Description | 727 * +-----------+---------------------------+---------------------------------------------+ 728 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | 729 * +-----------+---------------------------+---------------------------------------------+ 730 * | 1..15 | RESERVED | MBZ | 731 * +-----------+---------------------------+---------------------------------------------+ 732 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | 733 * +-----------+---------------------------+---------------------------------------------+ 734 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | 735 * +-----------+---------------------------+---------------------------------------------+ 736 * | 18..31 | RESERVED | MBZ | 737 * +-----------+---------------------------+---------------------------------------------+ 738 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | 739 * +-----------+---------------------------+---------------------------------------------+ 740 * | 33 | Q0ContextIndex | Context ID for Queue 0 | 741 * +-----------+---------------------------+---------------------------------------------+ 742 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | 743 * +-----------+---------------------------+---------------------------------------------+ 744 * | 35 | Q1ContextIndex | Context ID for Queue 1 | 745 * +-----------+---------------------------+---------------------------------------------+ 746 * | ... |... | ... | 747 * +-----------+---------------------------+---------------------------------------------+ 748 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | 749 * +-----------+---------------------------+---------------------------------------------+ 750 * | 159 | Q63ContextIndex | Context ID for Queue 63 | 751 * +-----------+---------------------------+---------------------------------------------+ 752 * | 160..1024 | RESERVED | MBZ | 753 * +-----------+---------------------------+---------------------------------------------+ 754 * 755 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified 756 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies 757 * the CGP address. When the secondary queues are added to the group, the CGP is 758 * updated with entry for that queue and GuC is notified through the H2G interface 759 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages 760 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also 761 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any 762 * error in the CGP. Only one of these CGP update messages can be outstanding 763 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* 764 * fields indicate which queue entry is being updated in the CGP. 765 * 766 * The primary queue (Q0) represents the multi queue group context in GuC and 767 * submission on any queue of the group must be through Q0 GuC interface only. 768 * 769 * As it is not required to register secondary queues with GuC, the secondary queue 770 * context ids in the CGP are populated with Q0 context id. 771 */ 772 773 #define CGP_VERSION_MAJOR_SHIFT 8 774 775 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 776 struct xe_exec_queue *q) 777 { 778 struct xe_exec_queue_group *group = q->multi_queue.group; 779 u32 guc_id = group->primary->guc->id; 780 781 /* Currently implementing CGP version 1.0 */ 782 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 783 1 << CGP_VERSION_MAJOR_SHIFT); 784 785 xe_map_wr(xe, &group->cgp_bo->vmap, 786 (32 + q->multi_queue.pos * 2) * sizeof(u32), 787 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 788 789 xe_map_wr(xe, &group->cgp_bo->vmap, 790 (33 + q->multi_queue.pos * 2) * sizeof(u32), 791 u32, guc_id); 792 793 if (q->multi_queue.pos / 32) { 794 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 795 u32, BIT(q->multi_queue.pos % 32)); 796 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 797 } else { 798 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 799 u32, BIT(q->multi_queue.pos)); 800 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 801 } 802 } 803 804 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 805 struct xe_exec_queue *q, 806 const u32 *action, u32 len) 807 { 808 struct xe_exec_queue_group *group = q->multi_queue.group; 809 struct xe_device *xe = guc_to_xe(guc); 810 enum xe_multi_queue_priority priority; 811 long ret; 812 813 /* 814 * As all queues of a multi queue group use single drm scheduler 815 * submit workqueue, CGP synchronization with GuC are serialized. 816 * Hence, no locking is required here. 817 * Wait for any pending CGP_SYNC_DONE response before updating the 818 * CGP page and sending CGP_SYNC message. 819 * 820 * FIXME: Support VF migration 821 */ 822 ret = wait_event_timeout(guc->ct.wq, 823 !READ_ONCE(group->sync_pending) || 824 xe_guc_read_stopped(guc), HZ); 825 if (!ret || xe_guc_read_stopped(guc)) { 826 /* CGP_SYNC failed. Reset gt, cleanup the group */ 827 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 828 set_exec_queue_group_banned(q); 829 xe_gt_reset_async(q->gt); 830 xe_guc_exec_queue_group_trigger_cleanup(q); 831 return; 832 } 833 834 scoped_guard(spinlock, &q->multi_queue.lock) 835 priority = q->multi_queue.priority; 836 837 xe_lrc_set_multi_queue_priority(q->lrc[0], priority); 838 xe_guc_exec_queue_group_cgp_update(xe, q); 839 840 WRITE_ONCE(group->sync_pending, true); 841 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 842 } 843 844 static void __register_exec_queue_group(struct xe_guc *guc, 845 struct xe_exec_queue *q, 846 struct guc_ctxt_registration_info *info) 847 { 848 #define MAX_MULTI_QUEUE_REG_SIZE (8) 849 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 850 int len = 0; 851 852 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 853 action[len++] = info->flags; 854 action[len++] = info->context_idx; 855 action[len++] = info->engine_class; 856 action[len++] = info->engine_submit_mask; 857 action[len++] = 0; /* Reserved */ 858 action[len++] = info->cgp_lo; 859 action[len++] = info->cgp_hi; 860 861 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 862 #undef MAX_MULTI_QUEUE_REG_SIZE 863 864 /* 865 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 866 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 867 * from guc. 868 */ 869 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 870 } 871 872 static void xe_guc_exec_queue_group_add(struct xe_guc *guc, 873 struct xe_exec_queue *q) 874 { 875 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 876 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 877 int len = 0; 878 879 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); 880 881 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 882 action[len++] = q->multi_queue.group->primary->guc->id; 883 884 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 885 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 886 887 /* 888 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a 889 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 890 * from guc. 891 */ 892 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 893 } 894 895 static void __register_mlrc_exec_queue(struct xe_guc *guc, 896 struct xe_exec_queue *q, 897 struct guc_ctxt_registration_info *info) 898 { 899 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 900 u32 action[MAX_MLRC_REG_SIZE]; 901 int len = 0; 902 int i; 903 904 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 905 906 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 907 action[len++] = info->flags; 908 action[len++] = info->context_idx; 909 action[len++] = info->engine_class; 910 action[len++] = info->engine_submit_mask; 911 action[len++] = info->wq_desc_lo; 912 action[len++] = info->wq_desc_hi; 913 action[len++] = info->wq_base_lo; 914 action[len++] = info->wq_base_hi; 915 action[len++] = info->wq_size; 916 action[len++] = q->width; 917 action[len++] = info->hwlrca_lo; 918 action[len++] = info->hwlrca_hi; 919 920 for (i = 1; i < q->width; ++i) { 921 struct xe_lrc *lrc = q->lrc[i]; 922 923 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 924 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 925 } 926 927 /* explicitly checks some fields that we might fixup later */ 928 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 929 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 930 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 931 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 932 xe_gt_assert(guc_to_gt(guc), q->width == 933 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 934 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 935 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 936 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 937 #undef MAX_MLRC_REG_SIZE 938 939 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 940 } 941 942 static void __register_exec_queue(struct xe_guc *guc, 943 struct guc_ctxt_registration_info *info) 944 { 945 u32 action[] = { 946 XE_GUC_ACTION_REGISTER_CONTEXT, 947 info->flags, 948 info->context_idx, 949 info->engine_class, 950 info->engine_submit_mask, 951 info->wq_desc_lo, 952 info->wq_desc_hi, 953 info->wq_base_lo, 954 info->wq_base_hi, 955 info->wq_size, 956 info->hwlrca_lo, 957 info->hwlrca_hi, 958 }; 959 960 /* explicitly checks some fields that we might fixup later */ 961 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 962 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 963 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 964 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 965 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 966 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 967 968 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 969 } 970 971 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 972 { 973 struct xe_guc *guc = exec_queue_to_guc(q); 974 struct xe_device *xe = guc_to_xe(guc); 975 struct xe_lrc *lrc = q->lrc[0]; 976 struct guc_ctxt_registration_info info; 977 978 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 979 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 980 981 memset(&info, 0, sizeof(info)); 982 info.context_idx = q->guc->id; 983 info.engine_class = xe_engine_class_to_guc_class(q->class); 984 info.engine_submit_mask = q->logical_mask; 985 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 986 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 987 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 988 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 989 990 if (xe_exec_queue_is_multi_queue(q)) { 991 struct xe_exec_queue_group *group = q->multi_queue.group; 992 993 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 994 info.cgp_hi = 0; 995 } 996 997 if (xe_exec_queue_is_parallel(q)) { 998 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 999 struct iosys_map map = xe_lrc_parallel_map(lrc); 1000 1001 info.wq_desc_lo = lower_32_bits(ggtt_addr + 1002 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1003 info.wq_desc_hi = upper_32_bits(ggtt_addr + 1004 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1005 info.wq_base_lo = lower_32_bits(ggtt_addr + 1006 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1007 info.wq_base_hi = upper_32_bits(ggtt_addr + 1008 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1009 info.wq_size = WQ_SIZE; 1010 1011 q->guc->wqi_head = 0; 1012 q->guc->wqi_tail = 0; 1013 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 1014 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 1015 } 1016 1017 set_exec_queue_registered(q); 1018 trace_xe_exec_queue_register(q); 1019 if (xe_exec_queue_is_multi_queue_primary(q)) 1020 __register_exec_queue_group(guc, q, &info); 1021 else if (xe_exec_queue_is_parallel(q)) 1022 __register_mlrc_exec_queue(guc, q, &info); 1023 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 1024 __register_exec_queue(guc, &info); 1025 1026 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1027 init_policies(guc, q); 1028 1029 if (xe_exec_queue_is_multi_queue_secondary(q)) 1030 xe_guc_exec_queue_group_add(guc, q); 1031 } 1032 1033 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 1034 { 1035 return (WQ_SIZE - q->guc->wqi_tail); 1036 } 1037 1038 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 1039 { 1040 struct xe_guc *guc = exec_queue_to_guc(q); 1041 struct xe_device *xe = guc_to_xe(guc); 1042 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1043 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 1044 1045 #define AVAILABLE_SPACE \ 1046 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 1047 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1048 try_again: 1049 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 1050 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1051 if (sleep_total_ms > 2000) { 1052 xe_gt_reset_async(q->gt); 1053 return -ENODEV; 1054 } 1055 1056 sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64); 1057 goto try_again; 1058 } 1059 } 1060 #undef AVAILABLE_SPACE 1061 1062 return 0; 1063 } 1064 1065 static int wq_noop_append(struct xe_exec_queue *q) 1066 { 1067 struct xe_guc *guc = exec_queue_to_guc(q); 1068 struct xe_device *xe = guc_to_xe(guc); 1069 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1070 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 1071 1072 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 1073 return -ENODEV; 1074 1075 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 1076 1077 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 1078 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 1079 FIELD_PREP(WQ_LEN_MASK, len_dw)); 1080 q->guc->wqi_tail = 0; 1081 1082 return 0; 1083 } 1084 1085 static void wq_item_append(struct xe_exec_queue *q) 1086 { 1087 struct xe_guc *guc = exec_queue_to_guc(q); 1088 struct xe_device *xe = guc_to_xe(guc); 1089 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1090 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 1091 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 1092 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 1093 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 1094 int i = 0, j; 1095 1096 if (wqi_size > wq_space_until_wrap(q)) { 1097 if (wq_noop_append(q)) 1098 return; 1099 } 1100 if (wq_wait_for_space(q, wqi_size)) 1101 return; 1102 1103 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 1104 FIELD_PREP(WQ_LEN_MASK, len_dw); 1105 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 1106 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 1107 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 1108 wqi[i++] = 0; 1109 for (j = 1; j < q->width; ++j) { 1110 struct xe_lrc *lrc = q->lrc[j]; 1111 1112 wqi[i++] = lrc->ring.tail / sizeof(u64); 1113 } 1114 1115 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 1116 1117 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 1118 wq[q->guc->wqi_tail / sizeof(u32)])); 1119 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 1120 q->guc->wqi_tail += wqi_size; 1121 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 1122 1123 xe_device_wmb(xe); 1124 1125 map = xe_lrc_parallel_map(q->lrc[0]); 1126 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 1127 } 1128 1129 #define RESUME_PENDING ~0x0ull 1130 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 1131 { 1132 struct xe_guc *guc = exec_queue_to_guc(q); 1133 struct xe_lrc *lrc = q->lrc[0]; 1134 u32 action[3]; 1135 u32 g2h_len = 0; 1136 u32 num_g2h = 0; 1137 int len = 0; 1138 bool extra_submit = false; 1139 1140 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1141 1142 if (!job->restore_replay || job->last_replay) { 1143 if (xe_exec_queue_is_parallel(q)) 1144 wq_item_append(q); 1145 else 1146 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1147 job->last_replay = false; 1148 } 1149 1150 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1151 return; 1152 1153 /* 1154 * All queues in a multi-queue group will use the primary queue 1155 * of the group to interface with GuC. If primay is suspended, 1156 * just return. Jobs will get scheduled once primary is resumed. 1157 */ 1158 q = xe_exec_queue_multi_queue_primary(q); 1159 if (exec_queue_suspended(q)) 1160 return; 1161 1162 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1163 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1164 action[len++] = q->guc->id; 1165 action[len++] = GUC_CONTEXT_ENABLE; 1166 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1167 num_g2h = 1; 1168 if (xe_exec_queue_is_parallel(q)) 1169 extra_submit = true; 1170 1171 q->guc->resume_time = RESUME_PENDING; 1172 set_exec_queue_pending_enable(q); 1173 set_exec_queue_enabled(q); 1174 trace_xe_exec_queue_scheduling_enable(q); 1175 } else { 1176 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1177 action[len++] = q->guc->id; 1178 trace_xe_exec_queue_submit(q); 1179 } 1180 1181 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1182 1183 if (extra_submit) { 1184 len = 0; 1185 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1186 action[len++] = q->guc->id; 1187 trace_xe_exec_queue_submit(q); 1188 1189 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1190 } 1191 } 1192 1193 static struct dma_fence * 1194 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1195 { 1196 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1197 struct xe_exec_queue *q = job->q; 1198 struct xe_guc *guc = exec_queue_to_guc(q); 1199 bool killed_or_banned_or_wedged = 1200 exec_queue_killed_or_banned_or_wedged(q); 1201 1202 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1203 exec_queue_banned(q) || exec_queue_suspended(q)); 1204 1205 trace_xe_sched_job_run(job); 1206 1207 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1208 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1209 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1210 1211 if (exec_queue_killed_or_banned_or_wedged(primary)) { 1212 killed_or_banned_or_wedged = true; 1213 goto run_job_out; 1214 } 1215 1216 if (!exec_queue_registered(primary)) 1217 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1218 } 1219 1220 if (!exec_queue_registered(q)) 1221 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1222 if (!job->restore_replay) 1223 q->ring_ops->emit_job(job); 1224 submit_exec_queue(q, job); 1225 job->restore_replay = false; 1226 } 1227 1228 run_job_out: 1229 1230 return job->fence; 1231 } 1232 1233 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1234 { 1235 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1236 1237 trace_xe_sched_job_free(job); 1238 xe_sched_job_put(job); 1239 } 1240 1241 int xe_guc_read_stopped(struct xe_guc *guc) 1242 { 1243 return atomic_read(&guc->submission_state.stopped); 1244 } 1245 1246 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1247 struct xe_exec_queue *q, 1248 u32 runnable_state); 1249 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1250 1251 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1252 u32 action[] = { \ 1253 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1254 q->guc->id, \ 1255 GUC_CONTEXT_##enable_disable, \ 1256 } 1257 1258 static void disable_scheduling_deregister(struct xe_guc *guc, 1259 struct xe_exec_queue *q) 1260 { 1261 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1262 int ret; 1263 1264 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1265 set_min_preemption_timeout(guc, q); 1266 1267 smp_rmb(); 1268 ret = wait_event_timeout(guc->ct.wq, 1269 (!exec_queue_pending_enable(q) && 1270 !exec_queue_pending_disable(q)) || 1271 xe_guc_read_stopped(guc) || 1272 vf_recovery(guc), 1273 HZ * 5); 1274 if (!ret && !vf_recovery(guc)) { 1275 struct xe_gpu_scheduler *sched = &q->guc->sched; 1276 1277 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1278 xe_sched_submission_start(sched); 1279 xe_gt_reset_async(q->gt); 1280 xe_sched_tdr_queue_imm(sched); 1281 return; 1282 } 1283 1284 clear_exec_queue_enabled(q); 1285 set_exec_queue_pending_disable(q); 1286 set_exec_queue_destroyed(q); 1287 trace_xe_exec_queue_scheduling_disable(q); 1288 1289 /* 1290 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1291 * handler and we are not allowed to reserved G2H space in handlers. 1292 */ 1293 if (xe_exec_queue_is_multi_queue_secondary(q)) 1294 handle_multi_queue_secondary_sched_done(guc, q, 0); 1295 else 1296 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1297 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1298 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1299 } 1300 1301 /** 1302 * xe_guc_submit_wedge() - Wedge GuC submission 1303 * @guc: the GuC object 1304 * 1305 * Save exec queue's registered with GuC state by taking a ref to each queue. 1306 * Register a DRMM handler to drop refs upon driver unload. 1307 */ 1308 void xe_guc_submit_wedge(struct xe_guc *guc) 1309 { 1310 struct xe_device *xe = guc_to_xe(guc); 1311 struct xe_exec_queue *q; 1312 unsigned long index; 1313 1314 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1315 1316 /* 1317 * If device is being wedged even before submission_state is 1318 * initialized, there's nothing to do here. 1319 */ 1320 if (!guc->submission_state.initialized) 1321 return; 1322 1323 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { 1324 mutex_lock(&guc->submission_state.lock); 1325 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1326 if (xe_exec_queue_get_unless_zero(q)) 1327 set_exec_queue_wedged(q); 1328 mutex_unlock(&guc->submission_state.lock); 1329 } else { 1330 /* Forcefully kill any remaining exec queues, signal fences */ 1331 guc_submit_reset_prepare(guc); 1332 xe_guc_submit_stop(guc); 1333 xe_guc_softreset(guc); 1334 xe_uc_fw_sanitize(&guc->fw); 1335 xe_guc_submit_pause_abort(guc); 1336 } 1337 } 1338 1339 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1340 { 1341 struct xe_device *xe = guc_to_xe(guc); 1342 1343 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 1344 return false; 1345 1346 if (xe_device_wedged(xe)) 1347 return true; 1348 1349 xe_device_declare_wedged(xe); 1350 1351 return true; 1352 } 1353 1354 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1355 1356 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1357 { 1358 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1359 u32 ctx_timestamp, ctx_job_timestamp; 1360 u32 timeout_ms = q->sched_props.job_timeout_ms; 1361 u32 diff; 1362 u64 running_time_ms; 1363 1364 if (!xe_sched_job_started(job)) { 1365 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1366 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1367 q->guc->id); 1368 1369 /* GuC never scheduled this job - let the caller trigger a GT reset. */ 1370 return true; 1371 } 1372 1373 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); 1374 if (ctx_timestamp == job->sample_timestamp) { 1375 if (IS_SRIOV_VF(gt_to_xe(gt))) 1376 xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1377 xe_sched_job_seqno(job), 1378 xe_sched_job_lrc_seqno(job), q->guc->id); 1379 else 1380 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1381 xe_sched_job_seqno(job), 1382 xe_sched_job_lrc_seqno(job), q->guc->id); 1383 1384 return xe_sched_invalidate_job(job, 0); 1385 } 1386 1387 job->sample_timestamp = ctx_timestamp; 1388 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1389 1390 /* 1391 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1392 * possible overflows with a high timeout. 1393 */ 1394 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1395 1396 diff = ctx_timestamp - ctx_job_timestamp; 1397 1398 /* 1399 * Ensure timeout is within 5% to account for an GuC scheduling latency 1400 */ 1401 running_time_ms = 1402 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1403 1404 xe_gt_dbg(gt, 1405 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1406 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1407 q->guc->id, running_time_ms, timeout_ms, diff); 1408 1409 return running_time_ms >= timeout_ms; 1410 } 1411 1412 static void enable_scheduling(struct xe_exec_queue *q) 1413 { 1414 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1415 struct xe_guc *guc = exec_queue_to_guc(q); 1416 int ret; 1417 1418 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1419 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1420 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1421 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1422 1423 set_exec_queue_pending_enable(q); 1424 set_exec_queue_enabled(q); 1425 trace_xe_exec_queue_scheduling_enable(q); 1426 1427 if (xe_exec_queue_is_multi_queue_secondary(q)) 1428 handle_multi_queue_secondary_sched_done(guc, q, 1); 1429 else 1430 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1431 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1432 1433 ret = wait_event_timeout(guc->ct.wq, 1434 !exec_queue_pending_enable(q) || 1435 xe_guc_read_stopped(guc) || 1436 vf_recovery(guc), HZ * 5); 1437 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1438 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1439 set_exec_queue_banned(q); 1440 xe_gt_reset_async(q->gt); 1441 xe_sched_tdr_queue_imm(&q->guc->sched); 1442 } 1443 } 1444 1445 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1446 { 1447 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1448 struct xe_guc *guc = exec_queue_to_guc(q); 1449 1450 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1451 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1452 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1453 1454 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1455 set_min_preemption_timeout(guc, q); 1456 clear_exec_queue_enabled(q); 1457 set_exec_queue_pending_disable(q); 1458 trace_xe_exec_queue_scheduling_disable(q); 1459 1460 if (xe_exec_queue_is_multi_queue_secondary(q)) 1461 handle_multi_queue_secondary_sched_done(guc, q, 0); 1462 else 1463 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1464 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1465 } 1466 1467 /* 1468 * Recover via GT reset for a kernel queue, or for a GuC scheduling failure (job 1469 * never started) on a queue that was not already killed or banned. An already 1470 * banned queue must stay banned, so its unstarted jobs do not clear the ban or 1471 * trigger a reset. 1472 */ 1473 static bool timeout_needs_gt_reset(struct xe_exec_queue *q, struct xe_sched_job *job, 1474 bool skip_timeout_check) 1475 { 1476 if (q->flags & EXEC_QUEUE_FLAG_KERNEL) 1477 return true; 1478 1479 return !skip_timeout_check && !xe_sched_job_started(job); 1480 } 1481 1482 static enum drm_gpu_sched_stat 1483 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1484 { 1485 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1486 struct drm_sched_job *tmp_job; 1487 struct xe_exec_queue *q = job->q, *primary; 1488 struct xe_gpu_scheduler *sched = &q->guc->sched; 1489 struct xe_guc *guc = exec_queue_to_guc(q); 1490 const char *process_name = "no process"; 1491 struct xe_device *xe = guc_to_xe(guc); 1492 int err = -ETIME; 1493 pid_t pid = -1; 1494 bool wedged = false, skip_timeout_check; 1495 1496 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1497 1498 primary = xe_exec_queue_multi_queue_primary(q); 1499 1500 /* 1501 * TDR has fired before free job worker. Common if exec queue 1502 * immediately closed after last fence signaled. Add back to pending 1503 * list so job can be freed and kick scheduler ensuring free job is not 1504 * lost. 1505 */ 1506 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1507 vf_recovery(guc)) 1508 return DRM_GPU_SCHED_STAT_NO_HANG; 1509 1510 /* Kill the run_job entry point */ 1511 if (xe_exec_queue_is_multi_queue(q)) 1512 xe_guc_exec_queue_group_stop(q); 1513 else 1514 xe_sched_submission_stop(sched); 1515 1516 /* Must check all state after stopping scheduler */ 1517 skip_timeout_check = exec_queue_reset(q) || 1518 exec_queue_killed_or_banned_or_wedged(q); 1519 1520 /* Skip timeout check if multi-queue group is banned */ 1521 if (xe_exec_queue_is_multi_queue(q) && 1522 READ_ONCE(q->multi_queue.group->banned)) 1523 skip_timeout_check = true; 1524 1525 /* LR jobs can only get here if queue has been killed or hit an error */ 1526 if (xe_exec_queue_is_lr(q)) 1527 xe_gt_assert(guc_to_gt(guc), skip_timeout_check); 1528 1529 /* 1530 * If devcoredump not captured and GuC capture for the job is not ready 1531 * do manual capture first and decide later if we need to use it 1532 */ 1533 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1534 !xe_guc_capture_get_matching_and_lock(q)) { 1535 /* take force wake before engine register manual capture */ 1536 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1537 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1538 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1539 1540 xe_engine_snapshot_capture_for_queue(q); 1541 } 1542 1543 /* 1544 * Check if job is actually timed out, if so restart job execution and TDR 1545 */ 1546 if (!skip_timeout_check && !check_timeout(q, job)) 1547 goto rearm; 1548 1549 if (!exec_queue_killed(q)) 1550 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1551 1552 set_exec_queue_banned(q); 1553 1554 /* Kick job / queue off hardware */ 1555 if (!wedged && (exec_queue_enabled(primary) || 1556 exec_queue_pending_disable(primary))) { 1557 int ret; 1558 1559 if (exec_queue_reset(primary)) 1560 err = -EIO; 1561 1562 if (xe_uc_fw_is_running(&guc->fw)) { 1563 /* 1564 * Wait for any pending G2H to flush out before 1565 * modifying state 1566 */ 1567 ret = wait_event_timeout(guc->ct.wq, 1568 (!exec_queue_pending_enable(primary) && 1569 !exec_queue_pending_disable(primary)) || 1570 xe_guc_read_stopped(guc) || 1571 vf_recovery(guc), HZ * 5); 1572 if (vf_recovery(guc)) 1573 goto handle_vf_resume; 1574 if (!ret || xe_guc_read_stopped(guc)) 1575 goto trigger_reset; 1576 1577 disable_scheduling(primary, skip_timeout_check); 1578 } 1579 1580 /* 1581 * Must wait for scheduling to be disabled before signalling 1582 * any fences, if GT broken the GT reset code should signal us. 1583 * 1584 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1585 * error) messages which can cause the schedule disable to get 1586 * lost. If this occurs, trigger a GT reset to recover. 1587 */ 1588 smp_rmb(); 1589 ret = wait_event_timeout(guc->ct.wq, 1590 !xe_uc_fw_is_running(&guc->fw) || 1591 !exec_queue_pending_disable(primary) || 1592 xe_guc_read_stopped(guc) || 1593 vf_recovery(guc), HZ * 5); 1594 if (vf_recovery(guc)) 1595 goto handle_vf_resume; 1596 if (!ret || xe_guc_read_stopped(guc)) { 1597 trigger_reset: 1598 if (!ret) 1599 xe_gt_warn(guc_to_gt(guc), 1600 "Schedule disable failed to respond, guc_id=%d", 1601 primary->guc->id); 1602 xe_devcoredump(primary, job, 1603 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1604 primary->guc->id, ret, xe_guc_read_stopped(guc)); 1605 xe_gt_reset_async(primary->gt); 1606 xe_sched_tdr_queue_imm(sched); 1607 goto rearm; 1608 } 1609 } 1610 1611 if (q->vm && q->vm->xef) { 1612 process_name = q->vm->xef->process_name; 1613 pid = q->vm->xef->pid; 1614 } 1615 1616 if (!exec_queue_killed(q)) 1617 xe_gt_notice(guc_to_gt(guc), 1618 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1619 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1620 q->guc->id, q->flags, process_name, pid); 1621 1622 trace_xe_sched_job_timedout(job); 1623 1624 if (!exec_queue_killed(q)) 1625 xe_devcoredump(q, job, 1626 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1627 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1628 q->guc->id, q->flags); 1629 1630 if (!wedged) { 1631 if (timeout_needs_gt_reset(q, job, skip_timeout_check)) { 1632 if (!xe_sched_invalidate_job(job, 2)) { 1633 clear_exec_queue_banned(q); 1634 xe_gt_reset_async(q->gt); 1635 goto rearm; 1636 } 1637 if (q->flags & EXEC_QUEUE_FLAG_KERNEL) { 1638 xe_gt_WARN(q->gt, true, "Kernel-submitted job timed out\n"); 1639 xe_device_declare_wedged(gt_to_xe(q->gt)); 1640 } 1641 } else if (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)) { 1642 xe_gt_WARN(q->gt, true, "VM job timed out on non-killed execqueue\n"); 1643 } 1644 } 1645 1646 /* Mark all outstanding jobs as bad, thus completing them */ 1647 xe_sched_job_set_error(job, err); 1648 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) 1649 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); 1650 1651 if (xe_exec_queue_is_multi_queue(q)) { 1652 xe_guc_exec_queue_group_start(q); 1653 xe_guc_exec_queue_group_trigger_cleanup(q); 1654 } else { 1655 xe_sched_submission_start(sched); 1656 xe_guc_exec_queue_trigger_cleanup(q); 1657 } 1658 1659 /* 1660 * We want the job added back to the pending list so it gets freed; this 1661 * is what DRM_GPU_SCHED_STAT_NO_HANG does. 1662 */ 1663 return DRM_GPU_SCHED_STAT_NO_HANG; 1664 1665 rearm: 1666 /* 1667 * XXX: Ideally want to adjust timeout based on current execution time 1668 * but there is not currently an easy way to do in DRM scheduler. With 1669 * some thought, do this in a follow up. 1670 */ 1671 if (xe_exec_queue_is_multi_queue(q)) 1672 xe_guc_exec_queue_group_start(q); 1673 else 1674 xe_sched_submission_start(sched); 1675 handle_vf_resume: 1676 return DRM_GPU_SCHED_STAT_NO_HANG; 1677 } 1678 1679 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1680 { 1681 struct xe_guc_exec_queue *ge = q->guc; 1682 struct xe_guc *guc = exec_queue_to_guc(q); 1683 1684 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1685 struct xe_exec_queue_group *group = q->multi_queue.group; 1686 1687 mutex_lock(&group->list_lock); 1688 list_del(&q->multi_queue.link); 1689 mutex_unlock(&group->list_lock); 1690 } 1691 1692 release_guc_id(guc, q); 1693 xe_sched_entity_fini(&ge->entity); 1694 xe_sched_fini(&ge->sched); 1695 1696 /* 1697 * RCU free due sched being exported via DRM scheduler fences 1698 * (timeline name). 1699 */ 1700 kfree_rcu(ge, rcu); 1701 } 1702 1703 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1704 { 1705 struct xe_guc_exec_queue *ge = 1706 container_of(w, struct xe_guc_exec_queue, destroy_async); 1707 struct xe_exec_queue *q = ge->q; 1708 struct xe_guc *guc = exec_queue_to_guc(q); 1709 1710 guard(xe_pm_runtime)(guc_to_xe(guc)); 1711 trace_xe_exec_queue_destroy(q); 1712 1713 /* Confirm no work left behind accessing device structures */ 1714 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1715 1716 xe_exec_queue_fini(q); 1717 } 1718 1719 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1720 { 1721 struct xe_guc *guc = exec_queue_to_guc(q); 1722 struct xe_device *xe = guc_to_xe(guc); 1723 1724 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1725 1726 /* We must block on kernel engines so slabs are empty on driver unload */ 1727 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1728 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1729 else 1730 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1731 } 1732 1733 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1734 { 1735 /* 1736 * Might be done from within the GPU scheduler, need to do async as we 1737 * fini the scheduler when the engine is fini'd, the scheduler can't 1738 * complete fini within itself (circular dependency). Async resolves 1739 * this we and don't really care when everything is fini'd, just that it 1740 * is. 1741 */ 1742 guc_exec_queue_destroy_async(q); 1743 } 1744 1745 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1746 { 1747 struct xe_exec_queue *q = msg->private_data; 1748 struct xe_guc *guc = exec_queue_to_guc(q); 1749 1750 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1751 trace_xe_exec_queue_cleanup_entity(q); 1752 1753 /* 1754 * Expected state transitions for cleanup: 1755 * - If the exec queue is registered and GuC firmware is running, we must first 1756 * disable scheduling and deregister the queue to ensure proper teardown and 1757 * resource release in the GuC, then destroy the exec queue on driver side. 1758 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1759 * we cannot expect a response for the deregister request. In this case, 1760 * it is safe to directly destroy the exec queue on driver side, as the GuC 1761 * will not process further requests and all resources must be cleaned up locally. 1762 */ 1763 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1764 disable_scheduling_deregister(guc, q); 1765 else 1766 __guc_exec_queue_destroy(guc, q); 1767 } 1768 1769 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1770 { 1771 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1772 } 1773 1774 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1775 { 1776 struct xe_exec_queue *q = msg->private_data; 1777 struct xe_guc *guc = exec_queue_to_guc(q); 1778 1779 if (guc_exec_queue_allowed_to_change_state(q)) 1780 init_policies(guc, q); 1781 kfree(msg); 1782 } 1783 1784 static void __suspend_fence_signal(struct xe_exec_queue *q) 1785 { 1786 struct xe_guc *guc = exec_queue_to_guc(q); 1787 struct xe_device *xe = guc_to_xe(guc); 1788 1789 if (!q->guc->suspend_pending) 1790 return; 1791 1792 WRITE_ONCE(q->guc->suspend_pending, false); 1793 1794 /* 1795 * We use a GuC shared wait queue for VFs because the VF resfix start 1796 * interrupt must be able to wake all instances of suspend_wait. This 1797 * prevents the VF migration worker from being starved during 1798 * scheduling. 1799 */ 1800 if (IS_SRIOV_VF(xe)) 1801 wake_up_all(&guc->ct.wq); 1802 else 1803 wake_up(&q->guc->suspend_wait); 1804 } 1805 1806 static void suspend_fence_signal(struct xe_exec_queue *q) 1807 { 1808 struct xe_guc *guc = exec_queue_to_guc(q); 1809 1810 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1811 xe_guc_read_stopped(guc)); 1812 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1813 1814 __suspend_fence_signal(q); 1815 } 1816 1817 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1818 { 1819 struct xe_exec_queue *q = msg->private_data; 1820 struct xe_guc *guc = exec_queue_to_guc(q); 1821 1822 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1823 exec_queue_enabled(q)) { 1824 wait_event(guc->ct.wq, vf_recovery(guc) || 1825 ((q->guc->resume_time != RESUME_PENDING || 1826 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1827 1828 if (!xe_guc_read_stopped(guc)) { 1829 s64 since_resume_ms = 1830 ktime_ms_delta(ktime_get(), 1831 q->guc->resume_time); 1832 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1833 since_resume_ms; 1834 1835 if (wait_ms > 0 && q->guc->resume_time) 1836 xe_sleep_relaxed_ms(wait_ms); 1837 1838 set_exec_queue_suspended(q); 1839 disable_scheduling(q, false); 1840 } 1841 } else if (q->guc->suspend_pending) { 1842 set_exec_queue_suspended(q); 1843 suspend_fence_signal(q); 1844 } 1845 } 1846 1847 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1848 { 1849 struct xe_exec_queue *q = msg->private_data; 1850 1851 if (guc_exec_queue_allowed_to_change_state(q)) { 1852 clear_exec_queue_suspended(q); 1853 if (!exec_queue_enabled(q)) { 1854 q->guc->resume_time = RESUME_PENDING; 1855 set_exec_queue_pending_resume(q); 1856 enable_scheduling(q); 1857 } 1858 } else { 1859 clear_exec_queue_suspended(q); 1860 } 1861 } 1862 1863 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1864 { 1865 struct xe_exec_queue *q = msg->private_data; 1866 1867 if (guc_exec_queue_allowed_to_change_state(q)) { 1868 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 1869 struct xe_guc *guc = exec_queue_to_guc(q); 1870 struct xe_exec_queue_group *group = q->multi_queue.group; 1871 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 1872 int len = 0; 1873 1874 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 1875 action[len++] = group->primary->guc->id; 1876 1877 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 1878 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 1879 1880 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 1881 } 1882 1883 kfree(msg); 1884 } 1885 1886 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1887 #define SET_SCHED_PROPS 2 1888 #define SUSPEND 3 1889 #define RESUME 4 1890 #define SET_MULTI_QUEUE_PRIORITY 5 1891 #define OPCODE_MASK 0xf 1892 #define MSG_LOCKED BIT(8) 1893 #define MSG_HEAD BIT(9) 1894 1895 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1896 { 1897 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1898 1899 trace_xe_sched_msg_recv(msg); 1900 1901 switch (msg->opcode) { 1902 case CLEANUP: 1903 __guc_exec_queue_process_msg_cleanup(msg); 1904 break; 1905 case SET_SCHED_PROPS: 1906 __guc_exec_queue_process_msg_set_sched_props(msg); 1907 break; 1908 case SUSPEND: 1909 __guc_exec_queue_process_msg_suspend(msg); 1910 break; 1911 case RESUME: 1912 __guc_exec_queue_process_msg_resume(msg); 1913 break; 1914 case SET_MULTI_QUEUE_PRIORITY: 1915 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1916 break; 1917 default: 1918 XE_WARN_ON("Unknown message type"); 1919 } 1920 1921 xe_pm_runtime_put(xe); 1922 } 1923 1924 static const struct drm_sched_backend_ops drm_sched_ops = { 1925 .run_job = guc_exec_queue_run_job, 1926 .free_job = guc_exec_queue_free_job, 1927 .timedout_job = guc_exec_queue_timedout_job, 1928 }; 1929 1930 static const struct xe_sched_backend_ops xe_sched_ops = { 1931 .process_msg = guc_exec_queue_process_msg, 1932 }; 1933 1934 static int guc_exec_queue_init(struct xe_exec_queue *q) 1935 { 1936 struct xe_gpu_scheduler *sched; 1937 struct xe_guc *guc = exec_queue_to_guc(q); 1938 struct workqueue_struct *submit_wq = NULL; 1939 struct xe_guc_exec_queue *ge; 1940 long timeout; 1941 int err, i; 1942 1943 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1944 1945 ge = kzalloc_obj(*ge); 1946 if (!ge) 1947 return -ENOMEM; 1948 1949 q->guc = ge; 1950 ge->q = q; 1951 init_rcu_head(&ge->rcu); 1952 init_waitqueue_head(&ge->suspend_wait); 1953 1954 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1955 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1956 1957 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1958 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1959 1960 /* 1961 * Use primary queue's submit_wq for all secondary queues of a 1962 * multi queue group. This serialization avoids any locking around 1963 * CGP synchronization with GuC. 1964 */ 1965 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1966 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1967 1968 submit_wq = primary->guc->sched.base.submit_wq; 1969 } 1970 1971 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1972 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1973 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1974 q->name, gt_to_xe(q->gt)->drm.dev); 1975 if (err) 1976 goto err_free; 1977 1978 sched = &ge->sched; 1979 err = xe_sched_entity_init(&ge->entity, sched); 1980 if (err) 1981 goto err_sched; 1982 1983 mutex_lock(&guc->submission_state.lock); 1984 1985 err = alloc_guc_id(guc, q); 1986 if (err) 1987 goto err_entity; 1988 1989 q->entity = &ge->entity; 1990 1991 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 1992 xe_sched_stop(sched); 1993 1994 mutex_unlock(&guc->submission_state.lock); 1995 1996 xe_exec_queue_assign_name(q, q->guc->id); 1997 1998 /* 1999 * Maintain secondary queues of the multi queue group in a list 2000 * for handling dependencies across the queues in the group. 2001 */ 2002 if (xe_exec_queue_is_multi_queue_secondary(q)) { 2003 struct xe_exec_queue_group *group = q->multi_queue.group; 2004 2005 INIT_LIST_HEAD(&q->multi_queue.link); 2006 mutex_lock(&group->list_lock); 2007 if (group->stopped) 2008 WRITE_ONCE(q->guc->sched.base.pause_submit, true); 2009 list_add_tail(&q->multi_queue.link, &group->list); 2010 mutex_unlock(&group->list_lock); 2011 } 2012 2013 if (xe_exec_queue_is_multi_queue(q)) 2014 trace_xe_exec_queue_create_multi_queue(q); 2015 else 2016 trace_xe_exec_queue_create(q); 2017 2018 return 0; 2019 2020 err_entity: 2021 mutex_unlock(&guc->submission_state.lock); 2022 xe_sched_entity_fini(&ge->entity); 2023 err_sched: 2024 xe_sched_fini(&ge->sched); 2025 err_free: 2026 kfree(ge); 2027 2028 return err; 2029 } 2030 2031 static void guc_exec_queue_kill(struct xe_exec_queue *q) 2032 { 2033 trace_xe_exec_queue_kill(q); 2034 set_exec_queue_killed(q); 2035 __suspend_fence_signal(q); 2036 xe_guc_exec_queue_trigger_cleanup(q); 2037 } 2038 2039 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 2040 u32 opcode) 2041 { 2042 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 2043 2044 INIT_LIST_HEAD(&msg->link); 2045 msg->opcode = opcode & OPCODE_MASK; 2046 msg->private_data = q; 2047 2048 trace_xe_sched_msg_add(msg); 2049 if (opcode & MSG_HEAD) 2050 xe_sched_add_msg_head(&q->guc->sched, msg); 2051 else if (opcode & MSG_LOCKED) 2052 xe_sched_add_msg_locked(&q->guc->sched, msg); 2053 else 2054 xe_sched_add_msg(&q->guc->sched, msg); 2055 } 2056 2057 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2058 struct xe_sched_msg *msg, 2059 u32 opcode) 2060 { 2061 if (!list_empty(&msg->link)) 2062 return; 2063 2064 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2065 } 2066 2067 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2068 struct xe_sched_msg *msg, 2069 u32 opcode) 2070 { 2071 if (!list_empty(&msg->link)) 2072 return false; 2073 2074 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2075 2076 return true; 2077 } 2078 2079 #define STATIC_MSG_CLEANUP 0 2080 #define STATIC_MSG_SUSPEND 1 2081 #define STATIC_MSG_RESUME 2 2082 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2083 { 2084 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2085 2086 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2087 guc_exec_queue_add_msg(q, msg, CLEANUP); 2088 else 2089 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2090 } 2091 2092 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2093 enum xe_exec_queue_priority priority) 2094 { 2095 struct xe_sched_msg *msg; 2096 2097 if (q->sched_props.priority == priority || 2098 exec_queue_killed_or_banned_or_wedged(q)) 2099 return 0; 2100 2101 msg = kmalloc_obj(*msg); 2102 if (!msg) 2103 return -ENOMEM; 2104 2105 q->sched_props.priority = priority; 2106 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2107 2108 return 0; 2109 } 2110 2111 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2112 { 2113 struct xe_sched_msg *msg; 2114 2115 if (q->sched_props.timeslice_us == timeslice_us || 2116 exec_queue_killed_or_banned_or_wedged(q)) 2117 return 0; 2118 2119 msg = kmalloc_obj(*msg); 2120 if (!msg) 2121 return -ENOMEM; 2122 2123 q->sched_props.timeslice_us = timeslice_us; 2124 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2125 2126 return 0; 2127 } 2128 2129 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2130 u32 preempt_timeout_us) 2131 { 2132 struct xe_sched_msg *msg; 2133 2134 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2135 exec_queue_killed_or_banned_or_wedged(q)) 2136 return 0; 2137 2138 msg = kmalloc_obj(*msg); 2139 if (!msg) 2140 return -ENOMEM; 2141 2142 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2143 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2144 2145 return 0; 2146 } 2147 2148 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2149 enum xe_multi_queue_priority priority) 2150 { 2151 struct xe_sched_msg *msg; 2152 2153 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2154 2155 if (exec_queue_killed_or_banned_or_wedged(q)) 2156 return 0; 2157 2158 msg = kmalloc_obj(*msg); 2159 if (!msg) 2160 return -ENOMEM; 2161 2162 scoped_guard(spinlock, &q->multi_queue.lock) { 2163 if (q->multi_queue.priority == priority) { 2164 kfree(msg); 2165 return 0; 2166 } 2167 2168 q->multi_queue.priority = priority; 2169 } 2170 2171 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2172 2173 return 0; 2174 } 2175 2176 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2177 { 2178 struct xe_gpu_scheduler *sched = &q->guc->sched; 2179 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2180 2181 if (exec_queue_killed_or_banned_or_wedged(q)) 2182 return -EINVAL; 2183 2184 xe_sched_msg_lock(sched); 2185 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2186 q->guc->suspend_pending = true; 2187 xe_sched_msg_unlock(sched); 2188 2189 return 0; 2190 } 2191 2192 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2193 { 2194 struct xe_guc *guc = exec_queue_to_guc(q); 2195 struct xe_device *xe = guc_to_xe(guc); 2196 int ret; 2197 2198 /* 2199 * Likely don't need to check exec_queue_killed() as we clear 2200 * suspend_pending upon kill but to be paranoid but races in which 2201 * suspend_pending is set after kill also check kill here. 2202 */ 2203 #define WAIT_COND \ 2204 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2205 xe_guc_read_stopped(guc)) 2206 2207 retry: 2208 if (IS_SRIOV_VF(xe)) 2209 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2210 vf_recovery(guc), 2211 HZ * 5); 2212 else 2213 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2214 WAIT_COND, HZ * 5); 2215 2216 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2217 return -EAGAIN; 2218 2219 if (!ret) { 2220 xe_gt_warn(guc_to_gt(guc), 2221 "Suspend fence, guc_id=%d, failed to respond", 2222 q->guc->id); 2223 /* XXX: Trigger GT reset? */ 2224 return -ETIME; 2225 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2226 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2227 goto retry; 2228 } 2229 2230 #undef WAIT_COND 2231 2232 return ret < 0 ? ret : 0; 2233 } 2234 2235 static void guc_exec_queue_resume(struct xe_exec_queue *q) 2236 { 2237 struct xe_gpu_scheduler *sched = &q->guc->sched; 2238 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2239 struct xe_guc *guc = exec_queue_to_guc(q); 2240 2241 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2242 2243 xe_sched_msg_lock(sched); 2244 guc_exec_queue_try_add_msg(q, msg, RESUME); 2245 xe_sched_msg_unlock(sched); 2246 } 2247 2248 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2249 { 2250 if (xe_exec_queue_is_multi_queue_secondary(q) && 2251 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2252 return true; 2253 2254 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2255 } 2256 2257 static bool guc_exec_queue_active(struct xe_exec_queue *q) 2258 { 2259 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 2260 2261 return exec_queue_enabled(primary) && 2262 !exec_queue_pending_disable(primary); 2263 } 2264 2265 /* 2266 * All of these functions are an abstraction layer which other parts of Xe can 2267 * use to trap into the GuC backend. All of these functions, aside from init, 2268 * really shouldn't do much other than trap into the DRM scheduler which 2269 * synchronizes these operations. 2270 */ 2271 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2272 .init = guc_exec_queue_init, 2273 .kill = guc_exec_queue_kill, 2274 .fini = guc_exec_queue_fini, 2275 .destroy = guc_exec_queue_destroy, 2276 .set_priority = guc_exec_queue_set_priority, 2277 .set_timeslice = guc_exec_queue_set_timeslice, 2278 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2279 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2280 .suspend = guc_exec_queue_suspend, 2281 .suspend_wait = guc_exec_queue_suspend_wait, 2282 .resume = guc_exec_queue_resume, 2283 .reset_status = guc_exec_queue_reset_status, 2284 .active = guc_exec_queue_active, 2285 }; 2286 2287 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2288 { 2289 struct xe_gpu_scheduler *sched = &q->guc->sched; 2290 bool do_destroy = false; 2291 2292 /* Stop scheduling + flush any DRM scheduler operations */ 2293 xe_sched_submission_stop(sched); 2294 2295 /* Clean up lost G2H + reset engine state */ 2296 if (exec_queue_registered(q)) { 2297 if (exec_queue_destroyed(q)) 2298 do_destroy = true; 2299 } 2300 if (q->guc->suspend_pending) { 2301 set_exec_queue_suspended(q); 2302 suspend_fence_signal(q); 2303 } 2304 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2305 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2306 EXEC_QUEUE_STATE_SUSPENDED, 2307 &q->guc->state); 2308 q->guc->resume_time = 0; 2309 trace_xe_exec_queue_stop(q); 2310 2311 /* 2312 * Ban any engine (aside from kernel and engines used for VM ops) with a 2313 * started but not complete job or if a job has gone through a GT reset 2314 * more than twice. 2315 */ 2316 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2317 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2318 bool ban = false; 2319 2320 if (job) { 2321 if ((xe_sched_job_started(job) && 2322 !xe_sched_job_completed(job)) || 2323 xe_sched_invalidate_job(job, 2)) { 2324 trace_xe_sched_job_ban(job); 2325 ban = true; 2326 } 2327 } 2328 2329 if (ban) { 2330 set_exec_queue_banned(q); 2331 xe_guc_exec_queue_trigger_cleanup(q); 2332 } 2333 } 2334 2335 if (do_destroy) 2336 __guc_exec_queue_destroy(guc, q); 2337 } 2338 2339 static int guc_submit_reset_prepare(struct xe_guc *guc) 2340 { 2341 int ret; 2342 2343 /* 2344 * Using an atomic here rather than submission_state.lock as this 2345 * function can be called while holding the CT lock (engine reset 2346 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2347 * Atomic is not ideal, but it works to prevent against concurrent reset 2348 * and releasing any TDRs waiting on guc->submission_state.stopped. 2349 */ 2350 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2351 smp_wmb(); 2352 wake_up_all(&guc->ct.wq); 2353 2354 return ret; 2355 } 2356 2357 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2358 { 2359 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2360 return 0; 2361 2362 if (!guc->submission_state.initialized) 2363 return 0; 2364 2365 return guc_submit_reset_prepare(guc); 2366 } 2367 2368 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2369 { 2370 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2371 !xe_guc_read_stopped(guc)); 2372 } 2373 2374 void xe_guc_submit_stop(struct xe_guc *guc) 2375 { 2376 struct xe_exec_queue *q; 2377 unsigned long index; 2378 2379 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2380 2381 mutex_lock(&guc->submission_state.lock); 2382 2383 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2384 /* Prevent redundant attempts to stop parallel queues */ 2385 if (q->guc->id != index) 2386 continue; 2387 2388 guc_exec_queue_stop(guc, q); 2389 } 2390 2391 mutex_unlock(&guc->submission_state.lock); 2392 2393 /* 2394 * No one can enter the backend at this point, aside from new engine 2395 * creation which is protected by guc->submission_state.lock. 2396 */ 2397 2398 } 2399 2400 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2401 struct xe_exec_queue *q) 2402 { 2403 bool pending_enable, pending_disable, pending_resume; 2404 2405 pending_enable = exec_queue_pending_enable(q); 2406 pending_resume = exec_queue_pending_resume(q); 2407 2408 if (pending_enable && pending_resume) { 2409 q->guc->needs_resume = true; 2410 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2411 q->guc->id); 2412 } 2413 2414 if (pending_enable && !pending_resume) { 2415 clear_exec_queue_registered(q); 2416 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2417 q->guc->id); 2418 } 2419 2420 if (pending_enable) { 2421 clear_exec_queue_enabled(q); 2422 clear_exec_queue_pending_resume(q); 2423 clear_exec_queue_pending_enable(q); 2424 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2425 q->guc->id); 2426 } 2427 2428 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2429 clear_exec_queue_destroyed(q); 2430 q->guc->needs_cleanup = true; 2431 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2432 q->guc->id); 2433 } 2434 2435 pending_disable = exec_queue_pending_disable(q); 2436 2437 if (pending_disable && exec_queue_suspended(q)) { 2438 clear_exec_queue_suspended(q); 2439 q->guc->needs_suspend = true; 2440 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2441 q->guc->id); 2442 } 2443 2444 if (pending_disable) { 2445 if (!pending_enable) 2446 set_exec_queue_enabled(q); 2447 clear_exec_queue_pending_disable(q); 2448 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2449 q->guc->id); 2450 } 2451 2452 q->guc->resume_time = 0; 2453 } 2454 2455 static void lrc_parallel_clear(struct xe_lrc *lrc) 2456 { 2457 struct xe_device *xe = gt_to_xe(lrc->gt); 2458 struct iosys_map map = xe_lrc_parallel_map(lrc); 2459 int i; 2460 2461 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2462 parallel_write(xe, map, wq[i], 2463 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2464 FIELD_PREP(WQ_LEN_MASK, 0)); 2465 } 2466 2467 /* 2468 * This function is quite complex but only real way to ensure no state is lost 2469 * during VF resume flows. The function scans the queue state, make adjustments 2470 * as needed, and queues jobs / messages which replayed upon unpause. 2471 */ 2472 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2473 { 2474 struct xe_gpu_scheduler *sched = &q->guc->sched; 2475 struct xe_sched_job *job; 2476 int i; 2477 2478 lockdep_assert_held(&guc->submission_state.lock); 2479 2480 /* Stop scheduling + flush any DRM scheduler operations */ 2481 xe_sched_submission_stop(sched); 2482 cancel_delayed_work_sync(&sched->base.work_tdr); 2483 2484 guc_exec_queue_revert_pending_state_change(guc, q); 2485 2486 if (xe_exec_queue_is_parallel(q)) { 2487 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2488 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2489 2490 /* 2491 * NOP existing WQ commands that may contain stale GGTT 2492 * addresses. These will be replayed upon unpause. The hardware 2493 * seems to get confused if the WQ head/tail pointers are 2494 * adjusted. 2495 */ 2496 if (lrc) 2497 lrc_parallel_clear(lrc); 2498 } 2499 2500 job = xe_sched_first_pending_job(sched); 2501 if (job) { 2502 job->restore_replay = true; 2503 2504 /* 2505 * Adjust software tail so jobs submitted overwrite previous 2506 * position in ring buffer with new GGTT addresses. 2507 */ 2508 for (i = 0; i < q->width; ++i) 2509 q->lrc[i]->ring.tail = job->ptrs[i].head; 2510 } 2511 } 2512 2513 /** 2514 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2515 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2516 */ 2517 void xe_guc_submit_pause(struct xe_guc *guc) 2518 { 2519 struct xe_exec_queue *q; 2520 unsigned long index; 2521 2522 mutex_lock(&guc->submission_state.lock); 2523 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2524 xe_sched_submission_stop(&q->guc->sched); 2525 mutex_unlock(&guc->submission_state.lock); 2526 } 2527 2528 /** 2529 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2530 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2531 */ 2532 void xe_guc_submit_pause_vf(struct xe_guc *guc) 2533 { 2534 struct xe_exec_queue *q; 2535 unsigned long index; 2536 2537 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2538 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2539 2540 mutex_lock(&guc->submission_state.lock); 2541 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2542 /* Prevent redundant attempts to stop parallel queues */ 2543 if (q->guc->id != index) 2544 continue; 2545 2546 guc_exec_queue_pause(guc, q); 2547 } 2548 mutex_unlock(&guc->submission_state.lock); 2549 } 2550 2551 static void guc_exec_queue_start(struct xe_exec_queue *q) 2552 { 2553 struct xe_gpu_scheduler *sched = &q->guc->sched; 2554 2555 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2556 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2557 int i; 2558 2559 trace_xe_exec_queue_resubmit(q); 2560 if (job) { 2561 for (i = 0; i < q->width; ++i) { 2562 /* 2563 * The GuC context is unregistered at this point 2564 * time, adjusting software ring tail ensures 2565 * jobs are rewritten in original placement, 2566 * adjusting LRC tail ensures the newly loaded 2567 * GuC / contexts only view the LRC tail 2568 * increasing as jobs are written out. 2569 */ 2570 q->lrc[i]->ring.tail = job->ptrs[i].head; 2571 xe_lrc_set_ring_tail(q->lrc[i], 2572 xe_lrc_ring_head(q->lrc[i])); 2573 } 2574 } 2575 xe_sched_resubmit_jobs(sched); 2576 } 2577 2578 xe_sched_submission_start(sched); 2579 xe_sched_submission_resume_tdr(sched); 2580 } 2581 2582 int xe_guc_submit_start(struct xe_guc *guc) 2583 { 2584 struct xe_exec_queue *q; 2585 unsigned long index; 2586 2587 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2588 2589 mutex_lock(&guc->submission_state.lock); 2590 atomic_dec(&guc->submission_state.stopped); 2591 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2592 /* Prevent redundant attempts to start parallel queues */ 2593 if (q->guc->id != index) 2594 continue; 2595 2596 guc_exec_queue_start(q); 2597 } 2598 mutex_unlock(&guc->submission_state.lock); 2599 2600 wake_up_all(&guc->ct.wq); 2601 2602 return 0; 2603 } 2604 2605 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2606 struct xe_exec_queue *q) 2607 { 2608 struct xe_gpu_scheduler *sched = &q->guc->sched; 2609 struct xe_sched_job *job = NULL; 2610 struct drm_sched_job *s_job; 2611 bool restore_replay = false; 2612 2613 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) { 2614 job = to_xe_sched_job(s_job); 2615 restore_replay |= job->restore_replay; 2616 if (restore_replay) { 2617 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2618 q->guc->id, xe_sched_job_seqno(job)); 2619 2620 q->ring_ops->emit_job(job); 2621 job->restore_replay = true; 2622 } 2623 } 2624 2625 if (job) 2626 job->last_replay = true; 2627 } 2628 2629 /** 2630 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2631 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2632 */ 2633 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2634 { 2635 struct xe_exec_queue *q; 2636 unsigned long index; 2637 2638 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2639 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2640 2641 mutex_lock(&guc->submission_state.lock); 2642 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2643 /* Prevent redundant attempts to stop parallel queues */ 2644 if (q->guc->id != index) 2645 continue; 2646 2647 guc_exec_queue_unpause_prepare(guc, q); 2648 } 2649 mutex_unlock(&guc->submission_state.lock); 2650 } 2651 2652 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2653 { 2654 struct xe_gpu_scheduler *sched = &q->guc->sched; 2655 struct xe_sched_msg *msg; 2656 2657 if (q->guc->needs_cleanup) { 2658 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2659 2660 guc_exec_queue_add_msg(q, msg, CLEANUP); 2661 q->guc->needs_cleanup = false; 2662 } 2663 2664 if (q->guc->needs_suspend) { 2665 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2666 2667 xe_sched_msg_lock(sched); 2668 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2669 xe_sched_msg_unlock(sched); 2670 2671 q->guc->needs_suspend = false; 2672 } 2673 2674 /* 2675 * The resume must be in the message queue before the suspend as it is 2676 * not possible for a resume to be issued if a suspend pending is, but 2677 * the inverse is possible. 2678 */ 2679 if (q->guc->needs_resume) { 2680 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2681 2682 xe_sched_msg_lock(sched); 2683 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2684 xe_sched_msg_unlock(sched); 2685 2686 q->guc->needs_resume = false; 2687 } 2688 } 2689 2690 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2691 { 2692 struct xe_gpu_scheduler *sched = &q->guc->sched; 2693 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2694 2695 lockdep_assert_held(&guc->submission_state.lock); 2696 2697 xe_sched_resubmit_jobs(sched); 2698 guc_exec_queue_replay_pending_state_change(q); 2699 xe_sched_submission_start(sched); 2700 if (needs_tdr) 2701 xe_guc_exec_queue_trigger_cleanup(q); 2702 xe_sched_submission_resume_tdr(sched); 2703 } 2704 2705 /** 2706 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2707 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2708 */ 2709 void xe_guc_submit_unpause(struct xe_guc *guc) 2710 { 2711 struct xe_exec_queue *q; 2712 unsigned long index; 2713 2714 mutex_lock(&guc->submission_state.lock); 2715 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2716 xe_sched_submission_start(&q->guc->sched); 2717 mutex_unlock(&guc->submission_state.lock); 2718 } 2719 2720 /** 2721 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2722 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2723 */ 2724 void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2725 { 2726 struct xe_exec_queue *q; 2727 unsigned long index; 2728 2729 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2730 2731 mutex_lock(&guc->submission_state.lock); 2732 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2733 /* 2734 * Prevent redundant attempts to stop parallel queues, or queues 2735 * created after resfix done. 2736 */ 2737 if (q->guc->id != index || 2738 !drm_sched_is_stopped(&q->guc->sched.base)) 2739 continue; 2740 2741 guc_exec_queue_unpause(guc, q); 2742 } 2743 mutex_unlock(&guc->submission_state.lock); 2744 } 2745 2746 /** 2747 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2748 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2749 */ 2750 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2751 { 2752 struct xe_exec_queue *q; 2753 unsigned long index; 2754 2755 mutex_lock(&guc->submission_state.lock); 2756 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2757 struct xe_gpu_scheduler *sched = &q->guc->sched; 2758 2759 /* Prevent redundant attempts to stop parallel queues */ 2760 if (q->guc->id != index) 2761 continue; 2762 2763 xe_sched_submission_start(sched); 2764 guc_exec_queue_kill(q); 2765 } 2766 mutex_unlock(&guc->submission_state.lock); 2767 } 2768 2769 static struct xe_exec_queue * 2770 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2771 { 2772 struct xe_gt *gt = guc_to_gt(guc); 2773 struct xe_exec_queue *q; 2774 2775 if (unlikely(guc_id >= GUC_ID_MAX)) { 2776 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2777 return NULL; 2778 } 2779 2780 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2781 if (unlikely(!q)) { 2782 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2783 return NULL; 2784 } 2785 2786 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2787 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2788 2789 return q; 2790 } 2791 2792 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2793 { 2794 u32 action[] = { 2795 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2796 q->guc->id, 2797 }; 2798 2799 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2800 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2801 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2802 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2803 2804 trace_xe_exec_queue_deregister(q); 2805 2806 if (xe_exec_queue_is_multi_queue_secondary(q)) 2807 handle_deregister_done(guc, q); 2808 else 2809 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2810 ARRAY_SIZE(action)); 2811 } 2812 2813 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2814 u32 runnable_state) 2815 { 2816 trace_xe_exec_queue_scheduling_done(q); 2817 2818 if (runnable_state == 1) { 2819 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2820 2821 q->guc->resume_time = ktime_get(); 2822 clear_exec_queue_pending_resume(q); 2823 clear_exec_queue_pending_enable(q); 2824 smp_wmb(); 2825 wake_up_all(&guc->ct.wq); 2826 } else { 2827 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2828 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2829 2830 if (q->guc->suspend_pending) { 2831 clear_exec_queue_pending_disable(q); 2832 suspend_fence_signal(q); 2833 } else { 2834 if (exec_queue_banned(q)) { 2835 smp_wmb(); 2836 wake_up_all(&guc->ct.wq); 2837 } 2838 if (exec_queue_destroyed(q)) { 2839 /* 2840 * Make sure to clear the pending_disable only 2841 * after sampling the destroyed state. We want 2842 * to ensure we don't trigger the unregister too 2843 * early with something intending to only 2844 * disable scheduling. The caller doing the 2845 * destroy must wait for an ongoing 2846 * pending_disable before marking as destroyed. 2847 */ 2848 clear_exec_queue_pending_disable(q); 2849 deregister_exec_queue(guc, q); 2850 } else { 2851 clear_exec_queue_pending_disable(q); 2852 } 2853 } 2854 } 2855 } 2856 2857 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2858 struct xe_exec_queue *q, 2859 u32 runnable_state) 2860 { 2861 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2862 mutex_lock(&guc->ct.lock); 2863 handle_sched_done(guc, q, runnable_state); 2864 mutex_unlock(&guc->ct.lock); 2865 } 2866 2867 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2868 { 2869 struct xe_exec_queue *q; 2870 u32 guc_id, runnable_state; 2871 2872 if (unlikely(len < 2)) 2873 return -EPROTO; 2874 2875 guc_id = msg[0]; 2876 runnable_state = msg[1]; 2877 2878 q = g2h_exec_queue_lookup(guc, guc_id); 2879 if (unlikely(!q)) 2880 return -EPROTO; 2881 2882 if (unlikely(!exec_queue_pending_enable(q) && 2883 !exec_queue_pending_disable(q))) { 2884 xe_gt_err(guc_to_gt(guc), 2885 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2886 atomic_read(&q->guc->state), q->guc->id, 2887 runnable_state); 2888 return -EPROTO; 2889 } 2890 2891 handle_sched_done(guc, q, runnable_state); 2892 2893 return 0; 2894 } 2895 2896 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2897 { 2898 trace_xe_exec_queue_deregister_done(q); 2899 2900 clear_exec_queue_registered(q); 2901 __guc_exec_queue_destroy(guc, q); 2902 } 2903 2904 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2905 { 2906 struct xe_exec_queue *q; 2907 u32 guc_id; 2908 2909 if (unlikely(len < 1)) 2910 return -EPROTO; 2911 2912 guc_id = msg[0]; 2913 2914 q = g2h_exec_queue_lookup(guc, guc_id); 2915 if (unlikely(!q)) 2916 return -EPROTO; 2917 2918 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2919 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2920 xe_gt_err(guc_to_gt(guc), 2921 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2922 atomic_read(&q->guc->state), q->guc->id); 2923 return -EPROTO; 2924 } 2925 2926 handle_deregister_done(guc, q); 2927 2928 return 0; 2929 } 2930 2931 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2932 { 2933 struct xe_gt *gt = guc_to_gt(guc); 2934 struct xe_exec_queue *q; 2935 u32 guc_id; 2936 2937 if (unlikely(len < 1)) 2938 return -EPROTO; 2939 2940 guc_id = msg[0]; 2941 2942 q = g2h_exec_queue_lookup(guc, guc_id); 2943 if (unlikely(!q)) 2944 return -EPROTO; 2945 2946 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", 2947 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, 2948 atomic_read(&q->guc->state)); 2949 2950 trace_xe_exec_queue_reset(q); 2951 2952 /* 2953 * A banned engine is a NOP at this point (came from 2954 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2955 * jobs by setting timeout of the job to the minimum value kicking 2956 * guc_exec_queue_timedout_job. 2957 */ 2958 xe_guc_exec_queue_reset_trigger_cleanup(q); 2959 2960 return 0; 2961 } 2962 2963 /* 2964 * xe_guc_error_capture_handler - Handler of GuC captured message 2965 * @guc: The GuC object 2966 * @msg: Point to the message 2967 * @len: The message length 2968 * 2969 * When GuC captured data is ready, GuC will send message 2970 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2971 * called 1st to check status before process the data comes with the message. 2972 * 2973 * Returns: error code. 0 if success 2974 */ 2975 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 2976 { 2977 u32 status; 2978 2979 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2980 return -EPROTO; 2981 2982 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2983 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 2984 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 2985 2986 xe_guc_capture_process(guc); 2987 2988 return 0; 2989 } 2990 2991 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 2992 u32 len) 2993 { 2994 struct xe_gt *gt = guc_to_gt(guc); 2995 struct xe_exec_queue *q; 2996 u32 guc_id; 2997 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 2998 2999 if (unlikely(!len || len > 2)) 3000 return -EPROTO; 3001 3002 guc_id = msg[0]; 3003 3004 if (len == 2) 3005 type = msg[1]; 3006 3007 if (guc_id == GUC_ID_UNKNOWN) { 3008 /* 3009 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 3010 * context. In such case only PF will be notified about that fault. 3011 */ 3012 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 3013 return 0; 3014 } 3015 3016 q = g2h_exec_queue_lookup(guc, guc_id); 3017 if (unlikely(!q)) 3018 return -EPROTO; 3019 3020 /* 3021 * The type is HW-defined and changes based on platform, so we don't 3022 * decode it in the kernel and only check if it is valid. 3023 * See bspec 54047 and 72187 for details. 3024 */ 3025 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 3026 xe_gt_info(gt, 3027 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 3028 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3029 else 3030 xe_gt_info(gt, 3031 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 3032 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3033 3034 trace_xe_exec_queue_memory_cat_error(q); 3035 3036 /* Treat the same as engine reset */ 3037 xe_guc_exec_queue_reset_trigger_cleanup(q); 3038 3039 return 0; 3040 } 3041 3042 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 3043 { 3044 struct xe_gt *gt = guc_to_gt(guc); 3045 u8 guc_class, instance; 3046 u32 reason; 3047 3048 if (unlikely(len != 3)) 3049 return -EPROTO; 3050 3051 guc_class = msg[0]; 3052 instance = msg[1]; 3053 reason = msg[2]; 3054 3055 /* Unexpected failure of a hardware feature, log an actual error */ 3056 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 3057 guc_class, instance, reason); 3058 3059 xe_gt_reset_async(gt); 3060 3061 return 0; 3062 } 3063 3064 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, 3065 u32 len) 3066 { 3067 struct xe_gt *gt = guc_to_gt(guc); 3068 struct xe_device *xe = guc_to_xe(guc); 3069 struct xe_exec_queue *q; 3070 u32 guc_id = msg[2]; 3071 3072 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { 3073 drm_err(&xe->drm, "Invalid length %u", len); 3074 return -EPROTO; 3075 } 3076 3077 q = g2h_exec_queue_lookup(guc, guc_id); 3078 if (unlikely(!q)) 3079 return -EPROTO; 3080 3081 xe_gt_dbg(gt, 3082 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", 3083 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); 3084 3085 trace_xe_exec_queue_cgp_context_error(q); 3086 3087 /* Treat the same as engine reset */ 3088 xe_guc_exec_queue_reset_trigger_cleanup(q); 3089 3090 return 0; 3091 } 3092 3093 /** 3094 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3095 * @guc: guc 3096 * @msg: message indicating CGP sync done 3097 * @len: length of message 3098 * 3099 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3100 * for CGP synchronization to complete. 3101 * 3102 * Return: 0 on success, -EPROTO for malformed messages. 3103 */ 3104 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3105 { 3106 struct xe_device *xe = guc_to_xe(guc); 3107 struct xe_exec_queue *q; 3108 u32 guc_id = msg[0]; 3109 3110 if (unlikely(len < 1)) { 3111 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3112 return -EPROTO; 3113 } 3114 3115 q = g2h_exec_queue_lookup(guc, guc_id); 3116 if (unlikely(!q)) 3117 return -EPROTO; 3118 3119 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3120 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3121 return -EPROTO; 3122 } 3123 3124 /* Wakeup the serialized cgp update wait */ 3125 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3126 xe_guc_ct_wake_waiters(&guc->ct); 3127 3128 return 0; 3129 } 3130 3131 static void 3132 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3133 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3134 { 3135 struct xe_guc *guc = exec_queue_to_guc(q); 3136 struct xe_device *xe = guc_to_xe(guc); 3137 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3138 int i; 3139 3140 snapshot->guc.wqi_head = q->guc->wqi_head; 3141 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3142 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3143 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3144 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3145 wq_desc.wq_status); 3146 3147 if (snapshot->parallel.wq_desc.head != 3148 snapshot->parallel.wq_desc.tail) { 3149 for (i = snapshot->parallel.wq_desc.head; 3150 i != snapshot->parallel.wq_desc.tail; 3151 i = (i + sizeof(u32)) % WQ_SIZE) 3152 snapshot->parallel.wq[i / sizeof(u32)] = 3153 parallel_read(xe, map, wq[i / sizeof(u32)]); 3154 } 3155 } 3156 3157 static void 3158 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3159 struct drm_printer *p) 3160 { 3161 int i; 3162 3163 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3164 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3165 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3166 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3167 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3168 3169 if (snapshot->parallel.wq_desc.head != 3170 snapshot->parallel.wq_desc.tail) { 3171 for (i = snapshot->parallel.wq_desc.head; 3172 i != snapshot->parallel.wq_desc.tail; 3173 i = (i + sizeof(u32)) % WQ_SIZE) 3174 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3175 snapshot->parallel.wq[i / sizeof(u32)]); 3176 } 3177 } 3178 3179 /** 3180 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3181 * @q: faulty exec queue 3182 * 3183 * This can be printed out in a later stage like during dev_coredump 3184 * analysis. 3185 * 3186 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3187 * caller, using `xe_guc_exec_queue_snapshot_free`. 3188 */ 3189 struct xe_guc_submit_exec_queue_snapshot * 3190 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3191 { 3192 struct xe_gpu_scheduler *sched = &q->guc->sched; 3193 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3194 int i; 3195 3196 snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC); 3197 3198 if (!snapshot) 3199 return NULL; 3200 3201 snapshot->guc.id = q->guc->id; 3202 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3203 snapshot->class = q->class; 3204 snapshot->logical_mask = q->logical_mask; 3205 snapshot->width = q->width; 3206 snapshot->refcount = kref_read(&q->refcount); 3207 snapshot->sched_timeout = sched->base.timeout; 3208 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3209 snapshot->sched_props.preempt_timeout_us = 3210 q->sched_props.preempt_timeout_us; 3211 3212 snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width, 3213 GFP_ATOMIC); 3214 3215 if (snapshot->lrc) { 3216 for (i = 0; i < q->width; ++i) { 3217 struct xe_lrc *lrc = q->lrc[i]; 3218 3219 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3220 } 3221 } 3222 3223 snapshot->schedule_state = atomic_read(&q->guc->state); 3224 snapshot->exec_queue_flags = q->flags; 3225 3226 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3227 if (snapshot->parallel_execution) 3228 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3229 3230 if (xe_exec_queue_is_multi_queue(q)) { 3231 snapshot->multi_queue.valid = true; 3232 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3233 snapshot->multi_queue.pos = q->multi_queue.pos; 3234 } 3235 3236 return snapshot; 3237 } 3238 3239 /** 3240 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3241 * @snapshot: Previously captured snapshot of job. 3242 * 3243 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3244 */ 3245 void 3246 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3247 { 3248 int i; 3249 3250 if (!snapshot || !snapshot->lrc) 3251 return; 3252 3253 for (i = 0; i < snapshot->width; ++i) 3254 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3255 } 3256 3257 /** 3258 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3259 * @snapshot: GuC Submit Engine snapshot object. 3260 * @p: drm_printer where it will be printed out. 3261 * 3262 * This function prints out a given GuC Submit Engine snapshot object. 3263 */ 3264 void 3265 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3266 struct drm_printer *p) 3267 { 3268 int i; 3269 3270 if (!snapshot) 3271 return; 3272 3273 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3274 drm_printf(p, "\tName: %s\n", snapshot->name); 3275 drm_printf(p, "\tClass: %d\n", snapshot->class); 3276 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3277 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3278 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3279 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3280 drm_printf(p, "\tTimeslice: %u (us)\n", 3281 snapshot->sched_props.timeslice_us); 3282 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3283 snapshot->sched_props.preempt_timeout_us); 3284 3285 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3286 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3287 3288 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3289 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3290 3291 if (snapshot->parallel_execution) 3292 guc_exec_queue_wq_snapshot_print(snapshot, p); 3293 3294 if (snapshot->multi_queue.valid) { 3295 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3296 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3297 } 3298 } 3299 3300 /** 3301 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3302 * snapshot. 3303 * @snapshot: GuC Submit Engine snapshot object. 3304 * 3305 * This function free all the memory that needed to be allocated at capture 3306 * time. 3307 */ 3308 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3309 { 3310 int i; 3311 3312 if (!snapshot) 3313 return; 3314 3315 if (snapshot->lrc) { 3316 for (i = 0; i < snapshot->width; i++) 3317 xe_lrc_snapshot_free(snapshot->lrc[i]); 3318 kfree(snapshot->lrc); 3319 } 3320 kfree(snapshot); 3321 } 3322 3323 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3324 { 3325 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3326 3327 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3328 xe_guc_exec_queue_snapshot_print(snapshot, p); 3329 xe_guc_exec_queue_snapshot_free(snapshot); 3330 } 3331 3332 /** 3333 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3334 * @q: Execution queue 3335 * @ctx_type: Type of the context 3336 * 3337 * This function registers the execution queue with the guc. Special context 3338 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3339 * are only applicable for IGPU and in the VF. 3340 * Submits the execution queue to GUC after registering it. 3341 * 3342 * Returns - None. 3343 */ 3344 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3345 { 3346 struct xe_guc *guc = exec_queue_to_guc(q); 3347 struct xe_device *xe = guc_to_xe(guc); 3348 struct xe_gt *gt = guc_to_gt(guc); 3349 3350 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3351 xe_gt_assert(gt, !IS_DGFX(xe)); 3352 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3353 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3354 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3355 3356 register_exec_queue(q, ctx_type); 3357 enable_scheduling(q); 3358 } 3359 3360 /** 3361 * xe_guc_submit_print - GuC Submit Print. 3362 * @guc: GuC. 3363 * @p: drm_printer where it will be printed out. 3364 * 3365 * This function capture and prints snapshots of **all** GuC Engines. 3366 */ 3367 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3368 { 3369 struct xe_exec_queue *q; 3370 unsigned long index; 3371 3372 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3373 return; 3374 3375 mutex_lock(&guc->submission_state.lock); 3376 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3377 guc_exec_queue_print(q, p); 3378 mutex_unlock(&guc->submission_state.lock); 3379 } 3380 3381 /** 3382 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues 3383 * registered with the GuC 3384 * @guc: GuC. 3385 * 3386 * Return: true if any MLRC queue is registered with the GuC, false otherwise. 3387 */ 3388 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) 3389 { 3390 struct xe_exec_queue *q; 3391 unsigned long index; 3392 3393 guard(mutex)(&guc->submission_state.lock); 3394 3395 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3396 if (q->width > 1) 3397 return true; 3398 3399 return false; 3400 } 3401 3402 /** 3403 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3404 * exec queues registered to given GuC. 3405 * @guc: the &xe_guc struct instance 3406 * @scratch: scratch buffer to be used as temporary storage 3407 * 3408 * Returns: zero on success, negative error code on failure. 3409 */ 3410 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3411 { 3412 struct xe_exec_queue *q; 3413 unsigned long index; 3414 int err = 0; 3415 3416 mutex_lock(&guc->submission_state.lock); 3417 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3418 /* Prevent redundant attempts to stop parallel queues */ 3419 if (q->guc->id != index) 3420 continue; 3421 3422 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3423 if (err) 3424 break; 3425 } 3426 mutex_unlock(&guc->submission_state.lock); 3427 3428 return err; 3429 } 3430