1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/delay.h> 12 #include <linux/dma-fence-array.h> 13 14 #include <drm/drm_managed.h> 15 16 #include "abi/guc_actions_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "regs/xe_lrc_layout.h" 19 #include "xe_assert.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_gt_printk.h" 27 #include "xe_guc.h" 28 #include "xe_guc_ct.h" 29 #include "xe_guc_exec_queue_types.h" 30 #include "xe_guc_submit_types.h" 31 #include "xe_hw_engine.h" 32 #include "xe_hw_fence.h" 33 #include "xe_lrc.h" 34 #include "xe_macros.h" 35 #include "xe_map.h" 36 #include "xe_mocs.h" 37 #include "xe_ring_ops_types.h" 38 #include "xe_sched_job.h" 39 #include "xe_trace.h" 40 #include "xe_vm.h" 41 42 static struct xe_guc * 43 exec_queue_to_guc(struct xe_exec_queue *q) 44 { 45 return &q->gt->uc.guc; 46 } 47 48 /* 49 * Helpers for engine state, using an atomic as some of the bits can transition 50 * as the same time (e.g. a suspend can be happning at the same time as schedule 51 * engine done being processed). 52 */ 53 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 54 #define ENGINE_STATE_ENABLED (1 << 1) 55 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 56 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 57 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 58 #define ENGINE_STATE_SUSPENDED (1 << 5) 59 #define EXEC_QUEUE_STATE_RESET (1 << 6) 60 #define ENGINE_STATE_KILLED (1 << 7) 61 62 static bool exec_queue_registered(struct xe_exec_queue *q) 63 { 64 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 65 } 66 67 static void set_exec_queue_registered(struct xe_exec_queue *q) 68 { 69 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 70 } 71 72 static void clear_exec_queue_registered(struct xe_exec_queue *q) 73 { 74 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 75 } 76 77 static bool exec_queue_enabled(struct xe_exec_queue *q) 78 { 79 return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; 80 } 81 82 static void set_exec_queue_enabled(struct xe_exec_queue *q) 83 { 84 atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); 85 } 86 87 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 88 { 89 atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); 90 } 91 92 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 93 { 94 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 95 } 96 97 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 98 { 99 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 100 } 101 102 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 103 { 104 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 105 } 106 107 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 108 { 109 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 110 } 111 112 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 113 { 114 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 115 } 116 117 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 118 { 119 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 120 } 121 122 static bool exec_queue_destroyed(struct xe_exec_queue *q) 123 { 124 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 125 } 126 127 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 128 { 129 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 130 } 131 132 static bool exec_queue_banned(struct xe_exec_queue *q) 133 { 134 return (q->flags & EXEC_QUEUE_FLAG_BANNED); 135 } 136 137 static void set_exec_queue_banned(struct xe_exec_queue *q) 138 { 139 q->flags |= EXEC_QUEUE_FLAG_BANNED; 140 } 141 142 static bool exec_queue_suspended(struct xe_exec_queue *q) 143 { 144 return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; 145 } 146 147 static void set_exec_queue_suspended(struct xe_exec_queue *q) 148 { 149 atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); 150 } 151 152 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 153 { 154 atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); 155 } 156 157 static bool exec_queue_reset(struct xe_exec_queue *q) 158 { 159 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 160 } 161 162 static void set_exec_queue_reset(struct xe_exec_queue *q) 163 { 164 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 165 } 166 167 static bool exec_queue_killed(struct xe_exec_queue *q) 168 { 169 return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; 170 } 171 172 static void set_exec_queue_killed(struct xe_exec_queue *q) 173 { 174 atomic_or(ENGINE_STATE_KILLED, &q->guc->state); 175 } 176 177 static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) 178 { 179 return exec_queue_killed(q) || exec_queue_banned(q); 180 } 181 182 #ifdef CONFIG_PROVE_LOCKING 183 static int alloc_submit_wq(struct xe_guc *guc) 184 { 185 int i; 186 187 for (i = 0; i < NUM_SUBMIT_WQ; ++i) { 188 guc->submission_state.submit_wq_pool[i] = 189 alloc_ordered_workqueue("submit_wq", 0); 190 if (!guc->submission_state.submit_wq_pool[i]) 191 goto err_free; 192 } 193 194 return 0; 195 196 err_free: 197 while (i) 198 destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); 199 200 return -ENOMEM; 201 } 202 203 static void free_submit_wq(struct xe_guc *guc) 204 { 205 int i; 206 207 for (i = 0; i < NUM_SUBMIT_WQ; ++i) 208 destroy_workqueue(guc->submission_state.submit_wq_pool[i]); 209 } 210 211 static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) 212 { 213 int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; 214 215 return guc->submission_state.submit_wq_pool[idx]; 216 } 217 #else 218 static int alloc_submit_wq(struct xe_guc *guc) 219 { 220 return 0; 221 } 222 223 static void free_submit_wq(struct xe_guc *guc) 224 { 225 226 } 227 228 static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) 229 { 230 return NULL; 231 } 232 #endif 233 234 static void guc_submit_fini(struct drm_device *drm, void *arg) 235 { 236 struct xe_guc *guc = arg; 237 238 xa_destroy(&guc->submission_state.exec_queue_lookup); 239 ida_destroy(&guc->submission_state.guc_ids); 240 bitmap_free(guc->submission_state.guc_ids_bitmap); 241 free_submit_wq(guc); 242 mutex_destroy(&guc->submission_state.lock); 243 } 244 245 #define GUC_ID_MAX 65535 246 #define GUC_ID_NUMBER_MLRC 4096 247 #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) 248 #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC 249 250 static const struct xe_exec_queue_ops guc_exec_queue_ops; 251 252 static void primelockdep(struct xe_guc *guc) 253 { 254 if (!IS_ENABLED(CONFIG_LOCKDEP)) 255 return; 256 257 fs_reclaim_acquire(GFP_KERNEL); 258 259 mutex_lock(&guc->submission_state.lock); 260 might_lock(&guc->submission_state.suspend.lock); 261 mutex_unlock(&guc->submission_state.lock); 262 263 fs_reclaim_release(GFP_KERNEL); 264 } 265 266 int xe_guc_submit_init(struct xe_guc *guc) 267 { 268 struct xe_device *xe = guc_to_xe(guc); 269 struct xe_gt *gt = guc_to_gt(guc); 270 int err; 271 272 guc->submission_state.guc_ids_bitmap = 273 bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); 274 if (!guc->submission_state.guc_ids_bitmap) 275 return -ENOMEM; 276 277 err = alloc_submit_wq(guc); 278 if (err) { 279 bitmap_free(guc->submission_state.guc_ids_bitmap); 280 return err; 281 } 282 283 gt->exec_queue_ops = &guc_exec_queue_ops; 284 285 mutex_init(&guc->submission_state.lock); 286 xa_init(&guc->submission_state.exec_queue_lookup); 287 ida_init(&guc->submission_state.guc_ids); 288 289 spin_lock_init(&guc->submission_state.suspend.lock); 290 guc->submission_state.suspend.context = dma_fence_context_alloc(1); 291 292 primelockdep(guc); 293 294 err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 295 if (err) 296 return err; 297 298 return 0; 299 } 300 301 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 302 { 303 int i; 304 305 lockdep_assert_held(&guc->submission_state.lock); 306 307 for (i = 0; i < xa_count; ++i) 308 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 309 310 if (xe_exec_queue_is_parallel(q)) 311 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 312 q->guc->id - GUC_ID_START_MLRC, 313 order_base_2(q->width)); 314 else 315 ida_free(&guc->submission_state.guc_ids, q->guc->id); 316 } 317 318 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 319 { 320 int ret; 321 void *ptr; 322 int i; 323 324 /* 325 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 326 * worse case user gets -ENOMEM on engine create and has to try again. 327 * 328 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 329 * failure. 330 */ 331 lockdep_assert_held(&guc->submission_state.lock); 332 333 if (xe_exec_queue_is_parallel(q)) { 334 void *bitmap = guc->submission_state.guc_ids_bitmap; 335 336 ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, 337 order_base_2(q->width)); 338 } else { 339 ret = ida_alloc_max(&guc->submission_state.guc_ids, 340 GUC_ID_NUMBER_SLRC - 1, GFP_NOWAIT); 341 } 342 if (ret < 0) 343 return ret; 344 345 q->guc->id = ret; 346 if (xe_exec_queue_is_parallel(q)) 347 q->guc->id += GUC_ID_START_MLRC; 348 349 for (i = 0; i < q->width; ++i) { 350 ptr = xa_store(&guc->submission_state.exec_queue_lookup, 351 q->guc->id + i, q, GFP_NOWAIT); 352 if (IS_ERR(ptr)) { 353 ret = PTR_ERR(ptr); 354 goto err_release; 355 } 356 } 357 358 return 0; 359 360 err_release: 361 __release_guc_id(guc, q, i); 362 363 return ret; 364 } 365 366 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 367 { 368 mutex_lock(&guc->submission_state.lock); 369 __release_guc_id(guc, q, q->width); 370 mutex_unlock(&guc->submission_state.lock); 371 } 372 373 struct exec_queue_policy { 374 u32 count; 375 struct guc_update_exec_queue_policy h2g; 376 }; 377 378 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 379 { 380 size_t bytes = sizeof(policy->h2g.header) + 381 (sizeof(policy->h2g.klv[0]) * policy->count); 382 383 return bytes / sizeof(u32); 384 } 385 386 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 387 u16 guc_id) 388 { 389 policy->h2g.header.action = 390 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 391 policy->h2g.header.guc_id = guc_id; 392 policy->count = 0; 393 } 394 395 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 396 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 397 u32 data) \ 398 { \ 399 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 400 \ 401 policy->h2g.klv[policy->count].kl = \ 402 FIELD_PREP(GUC_KLV_0_KEY, \ 403 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 404 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 405 policy->h2g.klv[policy->count].value = data; \ 406 policy->count++; \ 407 } 408 409 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 410 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 411 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 412 #undef MAKE_EXEC_QUEUE_POLICY_ADD 413 414 static const int xe_exec_queue_prio_to_guc[] = { 415 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 416 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 417 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 418 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 419 }; 420 421 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 422 { 423 struct exec_queue_policy policy; 424 struct xe_device *xe = guc_to_xe(guc); 425 enum xe_exec_queue_priority prio = q->sched_props.priority; 426 u32 timeslice_us = q->sched_props.timeslice_us; 427 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 428 429 xe_assert(xe, exec_queue_registered(q)); 430 431 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 432 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 433 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 434 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 435 436 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 437 __guc_exec_queue_policy_action_size(&policy), 0, 0); 438 } 439 440 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 441 { 442 struct exec_queue_policy policy; 443 444 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 445 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 446 447 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 448 __guc_exec_queue_policy_action_size(&policy), 0, 0); 449 } 450 451 #define parallel_read(xe_, map_, field_) \ 452 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 453 field_) 454 #define parallel_write(xe_, map_, field_, val_) \ 455 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 456 field_, val_) 457 458 static void __register_mlrc_engine(struct xe_guc *guc, 459 struct xe_exec_queue *q, 460 struct guc_ctxt_registration_info *info) 461 { 462 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 463 struct xe_device *xe = guc_to_xe(guc); 464 u32 action[MAX_MLRC_REG_SIZE]; 465 int len = 0; 466 int i; 467 468 xe_assert(xe, xe_exec_queue_is_parallel(q)); 469 470 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 471 action[len++] = info->flags; 472 action[len++] = info->context_idx; 473 action[len++] = info->engine_class; 474 action[len++] = info->engine_submit_mask; 475 action[len++] = info->wq_desc_lo; 476 action[len++] = info->wq_desc_hi; 477 action[len++] = info->wq_base_lo; 478 action[len++] = info->wq_base_hi; 479 action[len++] = info->wq_size; 480 action[len++] = q->width; 481 action[len++] = info->hwlrca_lo; 482 action[len++] = info->hwlrca_hi; 483 484 for (i = 1; i < q->width; ++i) { 485 struct xe_lrc *lrc = q->lrc + i; 486 487 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 488 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 489 } 490 491 xe_assert(xe, len <= MAX_MLRC_REG_SIZE); 492 #undef MAX_MLRC_REG_SIZE 493 494 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 495 } 496 497 static void __register_engine(struct xe_guc *guc, 498 struct guc_ctxt_registration_info *info) 499 { 500 u32 action[] = { 501 XE_GUC_ACTION_REGISTER_CONTEXT, 502 info->flags, 503 info->context_idx, 504 info->engine_class, 505 info->engine_submit_mask, 506 info->wq_desc_lo, 507 info->wq_desc_hi, 508 info->wq_base_lo, 509 info->wq_base_hi, 510 info->wq_size, 511 info->hwlrca_lo, 512 info->hwlrca_hi, 513 }; 514 515 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 516 } 517 518 static void register_engine(struct xe_exec_queue *q) 519 { 520 struct xe_guc *guc = exec_queue_to_guc(q); 521 struct xe_device *xe = guc_to_xe(guc); 522 struct xe_lrc *lrc = q->lrc; 523 struct guc_ctxt_registration_info info; 524 525 xe_assert(xe, !exec_queue_registered(q)); 526 527 memset(&info, 0, sizeof(info)); 528 info.context_idx = q->guc->id; 529 info.engine_class = xe_engine_class_to_guc_class(q->class); 530 info.engine_submit_mask = q->logical_mask; 531 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 532 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 533 info.flags = CONTEXT_REGISTRATION_FLAG_KMD; 534 535 if (xe_exec_queue_is_parallel(q)) { 536 u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 537 struct iosys_map map = xe_lrc_parallel_map(lrc); 538 539 info.wq_desc_lo = lower_32_bits(ggtt_addr + 540 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 541 info.wq_desc_hi = upper_32_bits(ggtt_addr + 542 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 543 info.wq_base_lo = lower_32_bits(ggtt_addr + 544 offsetof(struct guc_submit_parallel_scratch, wq[0])); 545 info.wq_base_hi = upper_32_bits(ggtt_addr + 546 offsetof(struct guc_submit_parallel_scratch, wq[0])); 547 info.wq_size = WQ_SIZE; 548 549 q->guc->wqi_head = 0; 550 q->guc->wqi_tail = 0; 551 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 552 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 553 } 554 555 /* 556 * We must keep a reference for LR engines if engine is registered with 557 * the GuC as jobs signal immediately and can't destroy an engine if the 558 * GuC has a reference to it. 559 */ 560 if (xe_exec_queue_is_lr(q)) 561 xe_exec_queue_get(q); 562 563 set_exec_queue_registered(q); 564 trace_xe_exec_queue_register(q); 565 if (xe_exec_queue_is_parallel(q)) 566 __register_mlrc_engine(guc, q, &info); 567 else 568 __register_engine(guc, &info); 569 init_policies(guc, q); 570 } 571 572 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 573 { 574 return (WQ_SIZE - q->guc->wqi_tail); 575 } 576 577 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 578 { 579 struct xe_guc *guc = exec_queue_to_guc(q); 580 struct xe_device *xe = guc_to_xe(guc); 581 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 582 unsigned int sleep_period_ms = 1; 583 584 #define AVAILABLE_SPACE \ 585 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 586 if (wqi_size > AVAILABLE_SPACE) { 587 try_again: 588 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 589 if (wqi_size > AVAILABLE_SPACE) { 590 if (sleep_period_ms == 1024) { 591 xe_gt_reset_async(q->gt); 592 return -ENODEV; 593 } 594 595 msleep(sleep_period_ms); 596 sleep_period_ms <<= 1; 597 goto try_again; 598 } 599 } 600 #undef AVAILABLE_SPACE 601 602 return 0; 603 } 604 605 static int wq_noop_append(struct xe_exec_queue *q) 606 { 607 struct xe_guc *guc = exec_queue_to_guc(q); 608 struct xe_device *xe = guc_to_xe(guc); 609 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 610 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 611 612 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 613 return -ENODEV; 614 615 xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); 616 617 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 618 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 619 FIELD_PREP(WQ_LEN_MASK, len_dw)); 620 q->guc->wqi_tail = 0; 621 622 return 0; 623 } 624 625 static void wq_item_append(struct xe_exec_queue *q) 626 { 627 struct xe_guc *guc = exec_queue_to_guc(q); 628 struct xe_device *xe = guc_to_xe(guc); 629 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 630 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 631 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 632 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 633 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 634 int i = 0, j; 635 636 if (wqi_size > wq_space_until_wrap(q)) { 637 if (wq_noop_append(q)) 638 return; 639 } 640 if (wq_wait_for_space(q, wqi_size)) 641 return; 642 643 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 644 FIELD_PREP(WQ_LEN_MASK, len_dw); 645 wqi[i++] = xe_lrc_descriptor(q->lrc); 646 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 647 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64)); 648 wqi[i++] = 0; 649 for (j = 1; j < q->width; ++j) { 650 struct xe_lrc *lrc = q->lrc + j; 651 652 wqi[i++] = lrc->ring.tail / sizeof(u64); 653 } 654 655 xe_assert(xe, i == wqi_size / sizeof(u32)); 656 657 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 658 wq[q->guc->wqi_tail / sizeof(u32)])); 659 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 660 q->guc->wqi_tail += wqi_size; 661 xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); 662 663 xe_device_wmb(xe); 664 665 map = xe_lrc_parallel_map(q->lrc); 666 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 667 } 668 669 #define RESUME_PENDING ~0x0ull 670 static void submit_exec_queue(struct xe_exec_queue *q) 671 { 672 struct xe_guc *guc = exec_queue_to_guc(q); 673 struct xe_device *xe = guc_to_xe(guc); 674 struct xe_lrc *lrc = q->lrc; 675 u32 action[3]; 676 u32 g2h_len = 0; 677 u32 num_g2h = 0; 678 int len = 0; 679 bool extra_submit = false; 680 681 xe_assert(xe, exec_queue_registered(q)); 682 683 if (xe_exec_queue_is_parallel(q)) 684 wq_item_append(q); 685 else 686 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 687 688 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 689 return; 690 691 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 692 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 693 action[len++] = q->guc->id; 694 action[len++] = GUC_CONTEXT_ENABLE; 695 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 696 num_g2h = 1; 697 if (xe_exec_queue_is_parallel(q)) 698 extra_submit = true; 699 700 q->guc->resume_time = RESUME_PENDING; 701 set_exec_queue_pending_enable(q); 702 set_exec_queue_enabled(q); 703 trace_xe_exec_queue_scheduling_enable(q); 704 } else { 705 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 706 action[len++] = q->guc->id; 707 trace_xe_exec_queue_submit(q); 708 } 709 710 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 711 712 if (extra_submit) { 713 len = 0; 714 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 715 action[len++] = q->guc->id; 716 trace_xe_exec_queue_submit(q); 717 718 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 719 } 720 } 721 722 static struct dma_fence * 723 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 724 { 725 struct xe_sched_job *job = to_xe_sched_job(drm_job); 726 struct xe_exec_queue *q = job->q; 727 struct xe_guc *guc = exec_queue_to_guc(q); 728 struct xe_device *xe = guc_to_xe(guc); 729 bool lr = xe_exec_queue_is_lr(q); 730 731 xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 732 exec_queue_banned(q) || exec_queue_suspended(q)); 733 734 trace_xe_sched_job_run(job); 735 736 if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { 737 if (!exec_queue_registered(q)) 738 register_engine(q); 739 if (!lr) /* LR jobs are emitted in the exec IOCTL */ 740 q->ring_ops->emit_job(job); 741 submit_exec_queue(q); 742 } 743 744 if (lr) { 745 xe_sched_job_set_error(job, -EOPNOTSUPP); 746 return NULL; 747 } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { 748 return job->fence; 749 } else { 750 return dma_fence_get(job->fence); 751 } 752 } 753 754 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 755 { 756 struct xe_sched_job *job = to_xe_sched_job(drm_job); 757 758 trace_xe_sched_job_free(job); 759 xe_sched_job_put(job); 760 } 761 762 static int guc_read_stopped(struct xe_guc *guc) 763 { 764 return atomic_read(&guc->submission_state.stopped); 765 } 766 767 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 768 u32 action[] = { \ 769 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 770 q->guc->id, \ 771 GUC_CONTEXT_##enable_disable, \ 772 } 773 774 static void disable_scheduling_deregister(struct xe_guc *guc, 775 struct xe_exec_queue *q) 776 { 777 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 778 struct xe_device *xe = guc_to_xe(guc); 779 int ret; 780 781 set_min_preemption_timeout(guc, q); 782 smp_rmb(); 783 ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || 784 guc_read_stopped(guc), HZ * 5); 785 if (!ret) { 786 struct xe_gpu_scheduler *sched = &q->guc->sched; 787 788 drm_warn(&xe->drm, "Pending enable failed to respond"); 789 xe_sched_submission_start(sched); 790 xe_gt_reset_async(q->gt); 791 xe_sched_tdr_queue_imm(sched); 792 return; 793 } 794 795 clear_exec_queue_enabled(q); 796 set_exec_queue_pending_disable(q); 797 set_exec_queue_destroyed(q); 798 trace_xe_exec_queue_scheduling_disable(q); 799 800 /* 801 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 802 * handler and we are not allowed to reserved G2H space in handlers. 803 */ 804 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 805 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 806 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 807 } 808 809 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p); 810 811 #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) 812 static void simple_error_capture(struct xe_exec_queue *q) 813 { 814 struct xe_guc *guc = exec_queue_to_guc(q); 815 struct xe_device *xe = guc_to_xe(guc); 816 struct drm_printer p = drm_err_printer(&xe->drm, NULL); 817 struct xe_hw_engine *hwe; 818 enum xe_hw_engine_id id; 819 u32 adj_logical_mask = q->logical_mask; 820 u32 width_mask = (0x1 << q->width) - 1; 821 int i; 822 bool cookie; 823 824 if (q->vm && !q->vm->error_capture.capture_once) { 825 q->vm->error_capture.capture_once = true; 826 cookie = dma_fence_begin_signalling(); 827 for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { 828 if (adj_logical_mask & BIT(i)) { 829 adj_logical_mask |= width_mask << i; 830 i += q->width; 831 } else { 832 ++i; 833 } 834 } 835 836 xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 837 xe_guc_ct_print(&guc->ct, &p, true); 838 guc_exec_queue_print(q, &p); 839 for_each_hw_engine(hwe, guc_to_gt(guc), id) { 840 if (hwe->class != q->hwe->class || 841 !(BIT(hwe->logical_instance) & adj_logical_mask)) 842 continue; 843 xe_hw_engine_print(hwe, &p); 844 } 845 xe_analyze_vm(&p, q->vm, q->gt->info.id); 846 xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 847 dma_fence_end_signalling(cookie); 848 } 849 } 850 #else 851 static void simple_error_capture(struct xe_exec_queue *q) 852 { 853 } 854 #endif 855 856 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 857 { 858 struct xe_guc *guc = exec_queue_to_guc(q); 859 struct xe_device *xe = guc_to_xe(guc); 860 861 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 862 wake_up_all(&xe->ufence_wq); 863 864 if (xe_exec_queue_is_lr(q)) 865 queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); 866 else 867 xe_sched_tdr_queue_imm(&q->guc->sched); 868 } 869 870 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) 871 { 872 struct xe_guc_exec_queue *ge = 873 container_of(w, struct xe_guc_exec_queue, lr_tdr); 874 struct xe_exec_queue *q = ge->q; 875 struct xe_guc *guc = exec_queue_to_guc(q); 876 struct xe_device *xe = guc_to_xe(guc); 877 struct xe_gpu_scheduler *sched = &ge->sched; 878 879 xe_assert(xe, xe_exec_queue_is_lr(q)); 880 trace_xe_exec_queue_lr_cleanup(q); 881 882 /* Kill the run_job / process_msg entry points */ 883 xe_sched_submission_stop(sched); 884 885 /* 886 * Engine state now mostly stable, disable scheduling / deregister if 887 * needed. This cleanup routine might be called multiple times, where 888 * the actual async engine deregister drops the final engine ref. 889 * Calling disable_scheduling_deregister will mark the engine as 890 * destroyed and fire off the CT requests to disable scheduling / 891 * deregister, which we only want to do once. We also don't want to mark 892 * the engine as pending_disable again as this may race with the 893 * xe_guc_deregister_done_handler() which treats it as an unexpected 894 * state. 895 */ 896 if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { 897 struct xe_guc *guc = exec_queue_to_guc(q); 898 int ret; 899 900 set_exec_queue_banned(q); 901 disable_scheduling_deregister(guc, q); 902 903 /* 904 * Must wait for scheduling to be disabled before signalling 905 * any fences, if GT broken the GT reset code should signal us. 906 */ 907 ret = wait_event_timeout(guc->ct.wq, 908 !exec_queue_pending_disable(q) || 909 guc_read_stopped(guc), HZ * 5); 910 if (!ret) { 911 drm_warn(&xe->drm, "Schedule disable failed to respond"); 912 xe_sched_submission_start(sched); 913 xe_gt_reset_async(q->gt); 914 return; 915 } 916 } 917 918 xe_sched_submission_start(sched); 919 } 920 921 static enum drm_gpu_sched_stat 922 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 923 { 924 struct xe_sched_job *job = to_xe_sched_job(drm_job); 925 struct xe_sched_job *tmp_job; 926 struct xe_exec_queue *q = job->q; 927 struct xe_gpu_scheduler *sched = &q->guc->sched; 928 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); 929 int err = -ETIME; 930 int i = 0; 931 932 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { 933 drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", 934 xe_sched_job_seqno(job), q->guc->id, q->flags); 935 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 936 "Kernel-submitted job timed out\n"); 937 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 938 "VM job timed out on non-killed execqueue\n"); 939 940 simple_error_capture(q); 941 xe_devcoredump(job); 942 } else { 943 drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", 944 xe_sched_job_seqno(job), q->guc->id, q->flags); 945 } 946 trace_xe_sched_job_timedout(job); 947 948 /* Kill the run_job entry point */ 949 xe_sched_submission_stop(sched); 950 951 /* 952 * Kernel jobs should never fail, nor should VM jobs if they do 953 * somethings has gone wrong and the GT needs a reset 954 */ 955 if (q->flags & EXEC_QUEUE_FLAG_KERNEL || 956 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { 957 if (!xe_sched_invalidate_job(job, 2)) { 958 xe_sched_add_pending_job(sched, job); 959 xe_sched_submission_start(sched); 960 xe_gt_reset_async(q->gt); 961 goto out; 962 } 963 } 964 965 /* Engine state now stable, disable scheduling if needed */ 966 if (exec_queue_registered(q)) { 967 struct xe_guc *guc = exec_queue_to_guc(q); 968 int ret; 969 970 if (exec_queue_reset(q)) 971 err = -EIO; 972 set_exec_queue_banned(q); 973 if (!exec_queue_destroyed(q)) { 974 xe_exec_queue_get(q); 975 disable_scheduling_deregister(guc, q); 976 } 977 978 /* 979 * Must wait for scheduling to be disabled before signalling 980 * any fences, if GT broken the GT reset code should signal us. 981 * 982 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 983 * error) messages which can cause the schedule disable to get 984 * lost. If this occurs, trigger a GT reset to recover. 985 */ 986 smp_rmb(); 987 ret = wait_event_timeout(guc->ct.wq, 988 !exec_queue_pending_disable(q) || 989 guc_read_stopped(guc), HZ * 5); 990 if (!ret || guc_read_stopped(guc)) { 991 drm_warn(&xe->drm, "Schedule disable failed to respond"); 992 xe_sched_add_pending_job(sched, job); 993 xe_sched_submission_start(sched); 994 xe_gt_reset_async(q->gt); 995 xe_sched_tdr_queue_imm(sched); 996 goto out; 997 } 998 } 999 1000 /* Stop fence signaling */ 1001 xe_hw_fence_irq_stop(q->fence_irq); 1002 1003 /* 1004 * Fence state now stable, stop / start scheduler which cleans up any 1005 * fences that are complete 1006 */ 1007 xe_sched_add_pending_job(sched, job); 1008 xe_sched_submission_start(sched); 1009 xe_guc_exec_queue_trigger_cleanup(q); 1010 1011 /* Mark all outstanding jobs as bad, thus completing them */ 1012 spin_lock(&sched->base.job_list_lock); 1013 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) 1014 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); 1015 spin_unlock(&sched->base.job_list_lock); 1016 1017 /* Start fence signaling */ 1018 xe_hw_fence_irq_start(q->fence_irq); 1019 1020 out: 1021 return DRM_GPU_SCHED_STAT_NOMINAL; 1022 } 1023 1024 static void __guc_exec_queue_fini_async(struct work_struct *w) 1025 { 1026 struct xe_guc_exec_queue *ge = 1027 container_of(w, struct xe_guc_exec_queue, fini_async); 1028 struct xe_exec_queue *q = ge->q; 1029 struct xe_guc *guc = exec_queue_to_guc(q); 1030 1031 trace_xe_exec_queue_destroy(q); 1032 1033 if (xe_exec_queue_is_lr(q)) 1034 cancel_work_sync(&ge->lr_tdr); 1035 release_guc_id(guc, q); 1036 xe_sched_entity_fini(&ge->entity); 1037 xe_sched_fini(&ge->sched); 1038 1039 kfree(ge); 1040 xe_exec_queue_fini(q); 1041 } 1042 1043 static void guc_exec_queue_fini_async(struct xe_exec_queue *q) 1044 { 1045 INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); 1046 1047 /* We must block on kernel engines so slabs are empty on driver unload */ 1048 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) 1049 __guc_exec_queue_fini_async(&q->guc->fini_async); 1050 else 1051 queue_work(system_wq, &q->guc->fini_async); 1052 } 1053 1054 static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) 1055 { 1056 /* 1057 * Might be done from within the GPU scheduler, need to do async as we 1058 * fini the scheduler when the engine is fini'd, the scheduler can't 1059 * complete fini within itself (circular dependency). Async resolves 1060 * this we and don't really care when everything is fini'd, just that it 1061 * is. 1062 */ 1063 guc_exec_queue_fini_async(q); 1064 } 1065 1066 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1067 { 1068 struct xe_exec_queue *q = msg->private_data; 1069 struct xe_guc *guc = exec_queue_to_guc(q); 1070 struct xe_device *xe = guc_to_xe(guc); 1071 1072 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1073 trace_xe_exec_queue_cleanup_entity(q); 1074 1075 if (exec_queue_registered(q)) 1076 disable_scheduling_deregister(guc, q); 1077 else 1078 __guc_exec_queue_fini(guc, q); 1079 } 1080 1081 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1082 { 1083 return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); 1084 } 1085 1086 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1087 { 1088 struct xe_exec_queue *q = msg->private_data; 1089 struct xe_guc *guc = exec_queue_to_guc(q); 1090 1091 if (guc_exec_queue_allowed_to_change_state(q)) 1092 init_policies(guc, q); 1093 kfree(msg); 1094 } 1095 1096 static void suspend_fence_signal(struct xe_exec_queue *q) 1097 { 1098 struct xe_guc *guc = exec_queue_to_guc(q); 1099 struct xe_device *xe = guc_to_xe(guc); 1100 1101 xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || 1102 guc_read_stopped(guc)); 1103 xe_assert(xe, q->guc->suspend_pending); 1104 1105 q->guc->suspend_pending = false; 1106 smp_wmb(); 1107 wake_up(&q->guc->suspend_wait); 1108 } 1109 1110 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1111 { 1112 struct xe_exec_queue *q = msg->private_data; 1113 struct xe_guc *guc = exec_queue_to_guc(q); 1114 1115 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1116 exec_queue_enabled(q)) { 1117 wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || 1118 guc_read_stopped(guc)); 1119 1120 if (!guc_read_stopped(guc)) { 1121 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1122 s64 since_resume_ms = 1123 ktime_ms_delta(ktime_get(), 1124 q->guc->resume_time); 1125 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1126 since_resume_ms; 1127 1128 if (wait_ms > 0 && q->guc->resume_time) 1129 msleep(wait_ms); 1130 1131 set_exec_queue_suspended(q); 1132 clear_exec_queue_enabled(q); 1133 set_exec_queue_pending_disable(q); 1134 trace_xe_exec_queue_scheduling_disable(q); 1135 1136 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1137 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1138 } 1139 } else if (q->guc->suspend_pending) { 1140 set_exec_queue_suspended(q); 1141 suspend_fence_signal(q); 1142 } 1143 } 1144 1145 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1146 { 1147 struct xe_exec_queue *q = msg->private_data; 1148 struct xe_guc *guc = exec_queue_to_guc(q); 1149 1150 if (guc_exec_queue_allowed_to_change_state(q)) { 1151 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1152 1153 q->guc->resume_time = RESUME_PENDING; 1154 clear_exec_queue_suspended(q); 1155 set_exec_queue_pending_enable(q); 1156 set_exec_queue_enabled(q); 1157 trace_xe_exec_queue_scheduling_enable(q); 1158 1159 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1160 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1161 } else { 1162 clear_exec_queue_suspended(q); 1163 } 1164 } 1165 1166 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1167 #define SET_SCHED_PROPS 2 1168 #define SUSPEND 3 1169 #define RESUME 4 1170 1171 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1172 { 1173 trace_xe_sched_msg_recv(msg); 1174 1175 switch (msg->opcode) { 1176 case CLEANUP: 1177 __guc_exec_queue_process_msg_cleanup(msg); 1178 break; 1179 case SET_SCHED_PROPS: 1180 __guc_exec_queue_process_msg_set_sched_props(msg); 1181 break; 1182 case SUSPEND: 1183 __guc_exec_queue_process_msg_suspend(msg); 1184 break; 1185 case RESUME: 1186 __guc_exec_queue_process_msg_resume(msg); 1187 break; 1188 default: 1189 XE_WARN_ON("Unknown message type"); 1190 } 1191 } 1192 1193 static const struct drm_sched_backend_ops drm_sched_ops = { 1194 .run_job = guc_exec_queue_run_job, 1195 .free_job = guc_exec_queue_free_job, 1196 .timedout_job = guc_exec_queue_timedout_job, 1197 }; 1198 1199 static const struct xe_sched_backend_ops xe_sched_ops = { 1200 .process_msg = guc_exec_queue_process_msg, 1201 }; 1202 1203 static int guc_exec_queue_init(struct xe_exec_queue *q) 1204 { 1205 struct xe_gpu_scheduler *sched; 1206 struct xe_guc *guc = exec_queue_to_guc(q); 1207 struct xe_device *xe = guc_to_xe(guc); 1208 struct xe_guc_exec_queue *ge; 1209 long timeout; 1210 int err; 1211 1212 xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); 1213 1214 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1215 if (!ge) 1216 return -ENOMEM; 1217 1218 q->guc = ge; 1219 ge->q = q; 1220 init_waitqueue_head(&ge->suspend_wait); 1221 1222 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1223 q->sched_props.job_timeout_ms; 1224 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1225 get_submit_wq(guc), 1226 q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, 1227 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1228 q->name, gt_to_xe(q->gt)->drm.dev); 1229 if (err) 1230 goto err_free; 1231 1232 sched = &ge->sched; 1233 err = xe_sched_entity_init(&ge->entity, sched); 1234 if (err) 1235 goto err_sched; 1236 1237 if (xe_exec_queue_is_lr(q)) 1238 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); 1239 1240 mutex_lock(&guc->submission_state.lock); 1241 1242 err = alloc_guc_id(guc, q); 1243 if (err) 1244 goto err_entity; 1245 1246 q->entity = &ge->entity; 1247 1248 if (guc_read_stopped(guc)) 1249 xe_sched_stop(sched); 1250 1251 mutex_unlock(&guc->submission_state.lock); 1252 1253 xe_exec_queue_assign_name(q, q->guc->id); 1254 1255 trace_xe_exec_queue_create(q); 1256 1257 return 0; 1258 1259 err_entity: 1260 xe_sched_entity_fini(&ge->entity); 1261 err_sched: 1262 xe_sched_fini(&ge->sched); 1263 err_free: 1264 kfree(ge); 1265 1266 return err; 1267 } 1268 1269 static void guc_exec_queue_kill(struct xe_exec_queue *q) 1270 { 1271 trace_xe_exec_queue_kill(q); 1272 set_exec_queue_killed(q); 1273 xe_guc_exec_queue_trigger_cleanup(q); 1274 } 1275 1276 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 1277 u32 opcode) 1278 { 1279 INIT_LIST_HEAD(&msg->link); 1280 msg->opcode = opcode; 1281 msg->private_data = q; 1282 1283 trace_xe_sched_msg_add(msg); 1284 xe_sched_add_msg(&q->guc->sched, msg); 1285 } 1286 1287 #define STATIC_MSG_CLEANUP 0 1288 #define STATIC_MSG_SUSPEND 1 1289 #define STATIC_MSG_RESUME 2 1290 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1291 { 1292 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 1293 1294 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) 1295 guc_exec_queue_add_msg(q, msg, CLEANUP); 1296 else 1297 __guc_exec_queue_fini(exec_queue_to_guc(q), q); 1298 } 1299 1300 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 1301 enum xe_exec_queue_priority priority) 1302 { 1303 struct xe_sched_msg *msg; 1304 1305 if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) 1306 return 0; 1307 1308 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1309 if (!msg) 1310 return -ENOMEM; 1311 1312 q->sched_props.priority = priority; 1313 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1314 1315 return 0; 1316 } 1317 1318 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 1319 { 1320 struct xe_sched_msg *msg; 1321 1322 if (q->sched_props.timeslice_us == timeslice_us || 1323 exec_queue_killed_or_banned(q)) 1324 return 0; 1325 1326 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1327 if (!msg) 1328 return -ENOMEM; 1329 1330 q->sched_props.timeslice_us = timeslice_us; 1331 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1332 1333 return 0; 1334 } 1335 1336 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 1337 u32 preempt_timeout_us) 1338 { 1339 struct xe_sched_msg *msg; 1340 1341 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 1342 exec_queue_killed_or_banned(q)) 1343 return 0; 1344 1345 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1346 if (!msg) 1347 return -ENOMEM; 1348 1349 q->sched_props.preempt_timeout_us = preempt_timeout_us; 1350 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1351 1352 return 0; 1353 } 1354 1355 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 1356 { 1357 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 1358 1359 if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) 1360 return -EINVAL; 1361 1362 q->guc->suspend_pending = true; 1363 guc_exec_queue_add_msg(q, msg, SUSPEND); 1364 1365 return 0; 1366 } 1367 1368 static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 1369 { 1370 struct xe_guc *guc = exec_queue_to_guc(q); 1371 1372 wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || 1373 guc_read_stopped(guc)); 1374 } 1375 1376 static void guc_exec_queue_resume(struct xe_exec_queue *q) 1377 { 1378 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 1379 struct xe_guc *guc = exec_queue_to_guc(q); 1380 struct xe_device *xe = guc_to_xe(guc); 1381 1382 xe_assert(xe, !q->guc->suspend_pending); 1383 1384 guc_exec_queue_add_msg(q, msg, RESUME); 1385 } 1386 1387 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 1388 { 1389 return exec_queue_reset(q); 1390 } 1391 1392 /* 1393 * All of these functions are an abstraction layer which other parts of XE can 1394 * use to trap into the GuC backend. All of these functions, aside from init, 1395 * really shouldn't do much other than trap into the DRM scheduler which 1396 * synchronizes these operations. 1397 */ 1398 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 1399 .init = guc_exec_queue_init, 1400 .kill = guc_exec_queue_kill, 1401 .fini = guc_exec_queue_fini, 1402 .set_priority = guc_exec_queue_set_priority, 1403 .set_timeslice = guc_exec_queue_set_timeslice, 1404 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 1405 .suspend = guc_exec_queue_suspend, 1406 .suspend_wait = guc_exec_queue_suspend_wait, 1407 .resume = guc_exec_queue_resume, 1408 .reset_status = guc_exec_queue_reset_status, 1409 }; 1410 1411 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 1412 { 1413 struct xe_gpu_scheduler *sched = &q->guc->sched; 1414 1415 /* Stop scheduling + flush any DRM scheduler operations */ 1416 xe_sched_submission_stop(sched); 1417 1418 /* Clean up lost G2H + reset engine state */ 1419 if (exec_queue_registered(q)) { 1420 if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || 1421 xe_exec_queue_is_lr(q)) 1422 xe_exec_queue_put(q); 1423 else if (exec_queue_destroyed(q)) 1424 __guc_exec_queue_fini(guc, q); 1425 } 1426 if (q->guc->suspend_pending) { 1427 set_exec_queue_suspended(q); 1428 suspend_fence_signal(q); 1429 } 1430 atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, 1431 &q->guc->state); 1432 q->guc->resume_time = 0; 1433 trace_xe_exec_queue_stop(q); 1434 1435 /* 1436 * Ban any engine (aside from kernel and engines used for VM ops) with a 1437 * started but not complete job or if a job has gone through a GT reset 1438 * more than twice. 1439 */ 1440 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 1441 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 1442 1443 if (job) { 1444 if ((xe_sched_job_started(job) && 1445 !xe_sched_job_completed(job)) || 1446 xe_sched_invalidate_job(job, 2)) { 1447 trace_xe_sched_job_ban(job); 1448 xe_sched_tdr_queue_imm(&q->guc->sched); 1449 set_exec_queue_banned(q); 1450 } 1451 } 1452 } 1453 } 1454 1455 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 1456 { 1457 int ret; 1458 1459 /* 1460 * Using an atomic here rather than submission_state.lock as this 1461 * function can be called while holding the CT lock (engine reset 1462 * failure). submission_state.lock needs the CT lock to resubmit jobs. 1463 * Atomic is not ideal, but it works to prevent against concurrent reset 1464 * and releasing any TDRs waiting on guc->submission_state.stopped. 1465 */ 1466 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 1467 smp_wmb(); 1468 wake_up_all(&guc->ct.wq); 1469 1470 return ret; 1471 } 1472 1473 void xe_guc_submit_reset_wait(struct xe_guc *guc) 1474 { 1475 wait_event(guc->ct.wq, !guc_read_stopped(guc)); 1476 } 1477 1478 int xe_guc_submit_stop(struct xe_guc *guc) 1479 { 1480 struct xe_exec_queue *q; 1481 unsigned long index; 1482 struct xe_device *xe = guc_to_xe(guc); 1483 1484 xe_assert(xe, guc_read_stopped(guc) == 1); 1485 1486 mutex_lock(&guc->submission_state.lock); 1487 1488 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1489 guc_exec_queue_stop(guc, q); 1490 1491 mutex_unlock(&guc->submission_state.lock); 1492 1493 /* 1494 * No one can enter the backend at this point, aside from new engine 1495 * creation which is protected by guc->submission_state.lock. 1496 */ 1497 1498 return 0; 1499 } 1500 1501 static void guc_exec_queue_start(struct xe_exec_queue *q) 1502 { 1503 struct xe_gpu_scheduler *sched = &q->guc->sched; 1504 1505 if (!exec_queue_killed_or_banned(q)) { 1506 int i; 1507 1508 trace_xe_exec_queue_resubmit(q); 1509 for (i = 0; i < q->width; ++i) 1510 xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail); 1511 xe_sched_resubmit_jobs(sched); 1512 } 1513 1514 xe_sched_submission_start(sched); 1515 } 1516 1517 int xe_guc_submit_start(struct xe_guc *guc) 1518 { 1519 struct xe_exec_queue *q; 1520 unsigned long index; 1521 struct xe_device *xe = guc_to_xe(guc); 1522 1523 xe_assert(xe, guc_read_stopped(guc) == 1); 1524 1525 mutex_lock(&guc->submission_state.lock); 1526 atomic_dec(&guc->submission_state.stopped); 1527 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1528 guc_exec_queue_start(q); 1529 mutex_unlock(&guc->submission_state.lock); 1530 1531 wake_up_all(&guc->ct.wq); 1532 1533 return 0; 1534 } 1535 1536 static struct xe_exec_queue * 1537 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 1538 { 1539 struct xe_device *xe = guc_to_xe(guc); 1540 struct xe_exec_queue *q; 1541 1542 if (unlikely(guc_id >= GUC_ID_MAX)) { 1543 drm_err(&xe->drm, "Invalid guc_id %u", guc_id); 1544 return NULL; 1545 } 1546 1547 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 1548 if (unlikely(!q)) { 1549 drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); 1550 return NULL; 1551 } 1552 1553 xe_assert(xe, guc_id >= q->guc->id); 1554 xe_assert(xe, guc_id < (q->guc->id + q->width)); 1555 1556 return q; 1557 } 1558 1559 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 1560 { 1561 u32 action[] = { 1562 XE_GUC_ACTION_DEREGISTER_CONTEXT, 1563 q->guc->id, 1564 }; 1565 1566 trace_xe_exec_queue_deregister(q); 1567 1568 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); 1569 } 1570 1571 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1572 { 1573 struct xe_device *xe = guc_to_xe(guc); 1574 struct xe_exec_queue *q; 1575 u32 guc_id = msg[0]; 1576 1577 if (unlikely(len < 2)) { 1578 drm_err(&xe->drm, "Invalid length %u", len); 1579 return -EPROTO; 1580 } 1581 1582 q = g2h_exec_queue_lookup(guc, guc_id); 1583 if (unlikely(!q)) 1584 return -EPROTO; 1585 1586 if (unlikely(!exec_queue_pending_enable(q) && 1587 !exec_queue_pending_disable(q))) { 1588 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1589 atomic_read(&q->guc->state)); 1590 return -EPROTO; 1591 } 1592 1593 trace_xe_exec_queue_scheduling_done(q); 1594 1595 if (exec_queue_pending_enable(q)) { 1596 q->guc->resume_time = ktime_get(); 1597 clear_exec_queue_pending_enable(q); 1598 smp_wmb(); 1599 wake_up_all(&guc->ct.wq); 1600 } else { 1601 clear_exec_queue_pending_disable(q); 1602 if (q->guc->suspend_pending) { 1603 suspend_fence_signal(q); 1604 } else { 1605 if (exec_queue_banned(q)) { 1606 smp_wmb(); 1607 wake_up_all(&guc->ct.wq); 1608 } 1609 deregister_exec_queue(guc, q); 1610 } 1611 } 1612 1613 return 0; 1614 } 1615 1616 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1617 { 1618 struct xe_device *xe = guc_to_xe(guc); 1619 struct xe_exec_queue *q; 1620 u32 guc_id = msg[0]; 1621 1622 if (unlikely(len < 1)) { 1623 drm_err(&xe->drm, "Invalid length %u", len); 1624 return -EPROTO; 1625 } 1626 1627 q = g2h_exec_queue_lookup(guc, guc_id); 1628 if (unlikely(!q)) 1629 return -EPROTO; 1630 1631 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 1632 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 1633 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1634 atomic_read(&q->guc->state)); 1635 return -EPROTO; 1636 } 1637 1638 trace_xe_exec_queue_deregister_done(q); 1639 1640 clear_exec_queue_registered(q); 1641 1642 if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) 1643 xe_exec_queue_put(q); 1644 else 1645 __guc_exec_queue_fini(guc, q); 1646 1647 return 0; 1648 } 1649 1650 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 1651 { 1652 struct xe_device *xe = guc_to_xe(guc); 1653 struct xe_exec_queue *q; 1654 u32 guc_id = msg[0]; 1655 1656 if (unlikely(len < 1)) { 1657 drm_err(&xe->drm, "Invalid length %u", len); 1658 return -EPROTO; 1659 } 1660 1661 q = g2h_exec_queue_lookup(guc, guc_id); 1662 if (unlikely(!q)) 1663 return -EPROTO; 1664 1665 drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); 1666 1667 /* FIXME: Do error capture, most likely async */ 1668 1669 trace_xe_exec_queue_reset(q); 1670 1671 /* 1672 * A banned engine is a NOP at this point (came from 1673 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 1674 * jobs by setting timeout of the job to the minimum value kicking 1675 * guc_exec_queue_timedout_job. 1676 */ 1677 set_exec_queue_reset(q); 1678 if (!exec_queue_banned(q)) 1679 xe_guc_exec_queue_trigger_cleanup(q); 1680 1681 return 0; 1682 } 1683 1684 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 1685 u32 len) 1686 { 1687 struct xe_device *xe = guc_to_xe(guc); 1688 struct xe_exec_queue *q; 1689 u32 guc_id = msg[0]; 1690 1691 if (unlikely(len < 1)) { 1692 drm_err(&xe->drm, "Invalid length %u", len); 1693 return -EPROTO; 1694 } 1695 1696 q = g2h_exec_queue_lookup(guc, guc_id); 1697 if (unlikely(!q)) 1698 return -EPROTO; 1699 1700 drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); 1701 trace_xe_exec_queue_memory_cat_error(q); 1702 1703 /* Treat the same as engine reset */ 1704 set_exec_queue_reset(q); 1705 if (!exec_queue_banned(q)) 1706 xe_guc_exec_queue_trigger_cleanup(q); 1707 1708 return 0; 1709 } 1710 1711 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 1712 { 1713 struct xe_device *xe = guc_to_xe(guc); 1714 u8 guc_class, instance; 1715 u32 reason; 1716 1717 if (unlikely(len != 3)) { 1718 drm_err(&xe->drm, "Invalid length %u", len); 1719 return -EPROTO; 1720 } 1721 1722 guc_class = msg[0]; 1723 instance = msg[1]; 1724 reason = msg[2]; 1725 1726 /* Unexpected failure of a hardware feature, log an actual error */ 1727 drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", 1728 guc_class, instance, reason); 1729 1730 xe_gt_reset_async(guc_to_gt(guc)); 1731 1732 return 0; 1733 } 1734 1735 static void 1736 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 1737 struct xe_guc_submit_exec_queue_snapshot *snapshot) 1738 { 1739 struct xe_guc *guc = exec_queue_to_guc(q); 1740 struct xe_device *xe = guc_to_xe(guc); 1741 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 1742 int i; 1743 1744 snapshot->guc.wqi_head = q->guc->wqi_head; 1745 snapshot->guc.wqi_tail = q->guc->wqi_tail; 1746 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 1747 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 1748 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 1749 wq_desc.wq_status); 1750 1751 if (snapshot->parallel.wq_desc.head != 1752 snapshot->parallel.wq_desc.tail) { 1753 for (i = snapshot->parallel.wq_desc.head; 1754 i != snapshot->parallel.wq_desc.tail; 1755 i = (i + sizeof(u32)) % WQ_SIZE) 1756 snapshot->parallel.wq[i / sizeof(u32)] = 1757 parallel_read(xe, map, wq[i / sizeof(u32)]); 1758 } 1759 } 1760 1761 static void 1762 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 1763 struct drm_printer *p) 1764 { 1765 int i; 1766 1767 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 1768 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 1769 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 1770 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 1771 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 1772 1773 if (snapshot->parallel.wq_desc.head != 1774 snapshot->parallel.wq_desc.tail) { 1775 for (i = snapshot->parallel.wq_desc.head; 1776 i != snapshot->parallel.wq_desc.tail; 1777 i = (i + sizeof(u32)) % WQ_SIZE) 1778 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 1779 snapshot->parallel.wq[i / sizeof(u32)]); 1780 } 1781 } 1782 1783 /** 1784 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 1785 * @job: faulty Xe scheduled job. 1786 * 1787 * This can be printed out in a later stage like during dev_coredump 1788 * analysis. 1789 * 1790 * Returns: a GuC Submit Engine snapshot object that must be freed by the 1791 * caller, using `xe_guc_exec_queue_snapshot_free`. 1792 */ 1793 struct xe_guc_submit_exec_queue_snapshot * 1794 xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) 1795 { 1796 struct xe_exec_queue *q = job->q; 1797 struct xe_gpu_scheduler *sched = &q->guc->sched; 1798 struct xe_guc_submit_exec_queue_snapshot *snapshot; 1799 int i; 1800 1801 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 1802 1803 if (!snapshot) 1804 return NULL; 1805 1806 snapshot->guc.id = q->guc->id; 1807 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 1808 snapshot->class = q->class; 1809 snapshot->logical_mask = q->logical_mask; 1810 snapshot->width = q->width; 1811 snapshot->refcount = kref_read(&q->refcount); 1812 snapshot->sched_timeout = sched->base.timeout; 1813 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 1814 snapshot->sched_props.preempt_timeout_us = 1815 q->sched_props.preempt_timeout_us; 1816 1817 snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot), 1818 GFP_ATOMIC); 1819 1820 if (snapshot->lrc) { 1821 for (i = 0; i < q->width; ++i) { 1822 struct xe_lrc *lrc = q->lrc + i; 1823 1824 snapshot->lrc[i].context_desc = 1825 lower_32_bits(xe_lrc_ggtt_addr(lrc)); 1826 snapshot->lrc[i].head = xe_lrc_ring_head(lrc); 1827 snapshot->lrc[i].tail.internal = lrc->ring.tail; 1828 snapshot->lrc[i].tail.memory = 1829 xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); 1830 snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); 1831 snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); 1832 } 1833 } 1834 1835 snapshot->schedule_state = atomic_read(&q->guc->state); 1836 snapshot->exec_queue_flags = q->flags; 1837 1838 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 1839 if (snapshot->parallel_execution) 1840 guc_exec_queue_wq_snapshot_capture(q, snapshot); 1841 1842 spin_lock(&sched->base.job_list_lock); 1843 snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); 1844 snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, 1845 sizeof(struct pending_list_snapshot), 1846 GFP_ATOMIC); 1847 1848 if (snapshot->pending_list) { 1849 struct xe_sched_job *job_iter; 1850 1851 i = 0; 1852 list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { 1853 snapshot->pending_list[i].seqno = 1854 xe_sched_job_seqno(job_iter); 1855 snapshot->pending_list[i].fence = 1856 dma_fence_is_signaled(job_iter->fence) ? 1 : 0; 1857 snapshot->pending_list[i].finished = 1858 dma_fence_is_signaled(&job_iter->drm.s_fence->finished) 1859 ? 1 : 0; 1860 i++; 1861 } 1862 } 1863 1864 spin_unlock(&sched->base.job_list_lock); 1865 1866 return snapshot; 1867 } 1868 1869 /** 1870 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 1871 * @snapshot: GuC Submit Engine snapshot object. 1872 * @p: drm_printer where it will be printed out. 1873 * 1874 * This function prints out a given GuC Submit Engine snapshot object. 1875 */ 1876 void 1877 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 1878 struct drm_printer *p) 1879 { 1880 int i; 1881 1882 if (!snapshot) 1883 return; 1884 1885 drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); 1886 drm_printf(p, "\tName: %s\n", snapshot->name); 1887 drm_printf(p, "\tClass: %d\n", snapshot->class); 1888 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 1889 drm_printf(p, "\tWidth: %d\n", snapshot->width); 1890 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 1891 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 1892 drm_printf(p, "\tTimeslice: %u (us)\n", 1893 snapshot->sched_props.timeslice_us); 1894 drm_printf(p, "\tPreempt timeout: %u (us)\n", 1895 snapshot->sched_props.preempt_timeout_us); 1896 1897 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) { 1898 drm_printf(p, "\tHW Context Desc: 0x%08x\n", 1899 snapshot->lrc[i].context_desc); 1900 drm_printf(p, "\tLRC Head: (memory) %u\n", 1901 snapshot->lrc[i].head); 1902 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1903 snapshot->lrc[i].tail.internal, 1904 snapshot->lrc[i].tail.memory); 1905 drm_printf(p, "\tStart seqno: (memory) %d\n", 1906 snapshot->lrc[i].start_seqno); 1907 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); 1908 } 1909 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 1910 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 1911 1912 if (snapshot->parallel_execution) 1913 guc_exec_queue_wq_snapshot_print(snapshot, p); 1914 1915 for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; 1916 i++) 1917 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", 1918 snapshot->pending_list[i].seqno, 1919 snapshot->pending_list[i].fence, 1920 snapshot->pending_list[i].finished); 1921 } 1922 1923 /** 1924 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 1925 * snapshot. 1926 * @snapshot: GuC Submit Engine snapshot object. 1927 * 1928 * This function free all the memory that needed to be allocated at capture 1929 * time. 1930 */ 1931 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 1932 { 1933 if (!snapshot) 1934 return; 1935 1936 kfree(snapshot->lrc); 1937 kfree(snapshot->pending_list); 1938 kfree(snapshot); 1939 } 1940 1941 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 1942 { 1943 struct xe_guc_submit_exec_queue_snapshot *snapshot; 1944 struct xe_gpu_scheduler *sched = &q->guc->sched; 1945 struct xe_sched_job *job; 1946 bool found = false; 1947 1948 spin_lock(&sched->base.job_list_lock); 1949 list_for_each_entry(job, &sched->base.pending_list, drm.list) { 1950 if (job->q == q) { 1951 xe_sched_job_get(job); 1952 found = true; 1953 break; 1954 } 1955 } 1956 spin_unlock(&sched->base.job_list_lock); 1957 1958 if (!found) 1959 return; 1960 1961 snapshot = xe_guc_exec_queue_snapshot_capture(job); 1962 xe_guc_exec_queue_snapshot_print(snapshot, p); 1963 xe_guc_exec_queue_snapshot_free(snapshot); 1964 1965 xe_sched_job_put(job); 1966 } 1967 1968 /** 1969 * xe_guc_submit_print - GuC Submit Print. 1970 * @guc: GuC. 1971 * @p: drm_printer where it will be printed out. 1972 * 1973 * This function capture and prints snapshots of **all** GuC Engines. 1974 */ 1975 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 1976 { 1977 struct xe_exec_queue *q; 1978 unsigned long index; 1979 1980 if (!xe_device_uc_enabled(guc_to_xe(guc))) 1981 return; 1982 1983 mutex_lock(&guc->submission_state.lock); 1984 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1985 guc_exec_queue_print(q, p); 1986 mutex_unlock(&guc->submission_state.lock); 1987 } 1988