1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/delay.h> 12 #include <linux/dma-fence-array.h> 13 14 #include <drm/drm_managed.h> 15 16 #include "abi/guc_actions_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "regs/xe_lrc_layout.h" 19 #include "xe_assert.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_guc.h" 27 #include "xe_guc_ct.h" 28 #include "xe_guc_exec_queue_types.h" 29 #include "xe_guc_submit_types.h" 30 #include "xe_hw_engine.h" 31 #include "xe_hw_fence.h" 32 #include "xe_lrc.h" 33 #include "xe_macros.h" 34 #include "xe_map.h" 35 #include "xe_mocs.h" 36 #include "xe_ring_ops_types.h" 37 #include "xe_sched_job.h" 38 #include "xe_trace.h" 39 #include "xe_vm.h" 40 41 static struct xe_guc * 42 exec_queue_to_guc(struct xe_exec_queue *q) 43 { 44 return &q->gt->uc.guc; 45 } 46 47 /* 48 * Helpers for engine state, using an atomic as some of the bits can transition 49 * as the same time (e.g. a suspend can be happning at the same time as schedule 50 * engine done being processed). 51 */ 52 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 53 #define ENGINE_STATE_ENABLED (1 << 1) 54 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 55 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 56 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 57 #define ENGINE_STATE_SUSPENDED (1 << 5) 58 #define EXEC_QUEUE_STATE_RESET (1 << 6) 59 #define ENGINE_STATE_KILLED (1 << 7) 60 61 static bool exec_queue_registered(struct xe_exec_queue *q) 62 { 63 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 64 } 65 66 static void set_exec_queue_registered(struct xe_exec_queue *q) 67 { 68 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 69 } 70 71 static void clear_exec_queue_registered(struct xe_exec_queue *q) 72 { 73 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 74 } 75 76 static bool exec_queue_enabled(struct xe_exec_queue *q) 77 { 78 return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; 79 } 80 81 static void set_exec_queue_enabled(struct xe_exec_queue *q) 82 { 83 atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); 84 } 85 86 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 87 { 88 atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); 89 } 90 91 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 92 { 93 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 94 } 95 96 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 97 { 98 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 99 } 100 101 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 102 { 103 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 104 } 105 106 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 107 { 108 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 109 } 110 111 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 112 { 113 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 114 } 115 116 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 117 { 118 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 119 } 120 121 static bool exec_queue_destroyed(struct xe_exec_queue *q) 122 { 123 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 124 } 125 126 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 127 { 128 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 129 } 130 131 static bool exec_queue_banned(struct xe_exec_queue *q) 132 { 133 return (q->flags & EXEC_QUEUE_FLAG_BANNED); 134 } 135 136 static void set_exec_queue_banned(struct xe_exec_queue *q) 137 { 138 q->flags |= EXEC_QUEUE_FLAG_BANNED; 139 } 140 141 static bool exec_queue_suspended(struct xe_exec_queue *q) 142 { 143 return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; 144 } 145 146 static void set_exec_queue_suspended(struct xe_exec_queue *q) 147 { 148 atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); 149 } 150 151 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 152 { 153 atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); 154 } 155 156 static bool exec_queue_reset(struct xe_exec_queue *q) 157 { 158 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 159 } 160 161 static void set_exec_queue_reset(struct xe_exec_queue *q) 162 { 163 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 164 } 165 166 static bool exec_queue_killed(struct xe_exec_queue *q) 167 { 168 return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; 169 } 170 171 static void set_exec_queue_killed(struct xe_exec_queue *q) 172 { 173 atomic_or(ENGINE_STATE_KILLED, &q->guc->state); 174 } 175 176 static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) 177 { 178 return exec_queue_killed(q) || exec_queue_banned(q); 179 } 180 181 #ifdef CONFIG_PROVE_LOCKING 182 static int alloc_submit_wq(struct xe_guc *guc) 183 { 184 int i; 185 186 for (i = 0; i < NUM_SUBMIT_WQ; ++i) { 187 guc->submission_state.submit_wq_pool[i] = 188 alloc_ordered_workqueue("submit_wq", 0); 189 if (!guc->submission_state.submit_wq_pool[i]) 190 goto err_free; 191 } 192 193 return 0; 194 195 err_free: 196 while (i) 197 destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); 198 199 return -ENOMEM; 200 } 201 202 static void free_submit_wq(struct xe_guc *guc) 203 { 204 int i; 205 206 for (i = 0; i < NUM_SUBMIT_WQ; ++i) 207 destroy_workqueue(guc->submission_state.submit_wq_pool[i]); 208 } 209 210 static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) 211 { 212 int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; 213 214 return guc->submission_state.submit_wq_pool[idx]; 215 } 216 #else 217 static int alloc_submit_wq(struct xe_guc *guc) 218 { 219 return 0; 220 } 221 222 static void free_submit_wq(struct xe_guc *guc) 223 { 224 225 } 226 227 static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) 228 { 229 return NULL; 230 } 231 #endif 232 233 static void guc_submit_fini(struct drm_device *drm, void *arg) 234 { 235 struct xe_guc *guc = arg; 236 237 xa_destroy(&guc->submission_state.exec_queue_lookup); 238 ida_destroy(&guc->submission_state.guc_ids); 239 bitmap_free(guc->submission_state.guc_ids_bitmap); 240 free_submit_wq(guc); 241 mutex_destroy(&guc->submission_state.lock); 242 } 243 244 #define GUC_ID_MAX 65535 245 #define GUC_ID_NUMBER_MLRC 4096 246 #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) 247 #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC 248 249 static const struct xe_exec_queue_ops guc_exec_queue_ops; 250 251 static void primelockdep(struct xe_guc *guc) 252 { 253 if (!IS_ENABLED(CONFIG_LOCKDEP)) 254 return; 255 256 fs_reclaim_acquire(GFP_KERNEL); 257 258 mutex_lock(&guc->submission_state.lock); 259 might_lock(&guc->submission_state.suspend.lock); 260 mutex_unlock(&guc->submission_state.lock); 261 262 fs_reclaim_release(GFP_KERNEL); 263 } 264 265 int xe_guc_submit_init(struct xe_guc *guc) 266 { 267 struct xe_device *xe = guc_to_xe(guc); 268 struct xe_gt *gt = guc_to_gt(guc); 269 int err; 270 271 guc->submission_state.guc_ids_bitmap = 272 bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); 273 if (!guc->submission_state.guc_ids_bitmap) 274 return -ENOMEM; 275 276 err = alloc_submit_wq(guc); 277 if (err) { 278 bitmap_free(guc->submission_state.guc_ids_bitmap); 279 return err; 280 } 281 282 gt->exec_queue_ops = &guc_exec_queue_ops; 283 284 mutex_init(&guc->submission_state.lock); 285 xa_init(&guc->submission_state.exec_queue_lookup); 286 ida_init(&guc->submission_state.guc_ids); 287 288 spin_lock_init(&guc->submission_state.suspend.lock); 289 guc->submission_state.suspend.context = dma_fence_context_alloc(1); 290 291 primelockdep(guc); 292 293 err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 294 if (err) 295 return err; 296 297 return 0; 298 } 299 300 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 301 { 302 int i; 303 304 lockdep_assert_held(&guc->submission_state.lock); 305 306 for (i = 0; i < xa_count; ++i) 307 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 308 309 if (xe_exec_queue_is_parallel(q)) 310 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 311 q->guc->id - GUC_ID_START_MLRC, 312 order_base_2(q->width)); 313 else 314 ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id); 315 } 316 317 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 318 { 319 int ret; 320 void *ptr; 321 int i; 322 323 /* 324 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 325 * worse case user gets -ENOMEM on engine create and has to try again. 326 * 327 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 328 * failure. 329 */ 330 lockdep_assert_held(&guc->submission_state.lock); 331 332 if (xe_exec_queue_is_parallel(q)) { 333 void *bitmap = guc->submission_state.guc_ids_bitmap; 334 335 ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, 336 order_base_2(q->width)); 337 } else { 338 ret = ida_simple_get(&guc->submission_state.guc_ids, 0, 339 GUC_ID_NUMBER_SLRC, GFP_NOWAIT); 340 } 341 if (ret < 0) 342 return ret; 343 344 q->guc->id = ret; 345 if (xe_exec_queue_is_parallel(q)) 346 q->guc->id += GUC_ID_START_MLRC; 347 348 for (i = 0; i < q->width; ++i) { 349 ptr = xa_store(&guc->submission_state.exec_queue_lookup, 350 q->guc->id + i, q, GFP_NOWAIT); 351 if (IS_ERR(ptr)) { 352 ret = PTR_ERR(ptr); 353 goto err_release; 354 } 355 } 356 357 return 0; 358 359 err_release: 360 __release_guc_id(guc, q, i); 361 362 return ret; 363 } 364 365 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 366 { 367 mutex_lock(&guc->submission_state.lock); 368 __release_guc_id(guc, q, q->width); 369 mutex_unlock(&guc->submission_state.lock); 370 } 371 372 struct exec_queue_policy { 373 u32 count; 374 struct guc_update_exec_queue_policy h2g; 375 }; 376 377 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 378 { 379 size_t bytes = sizeof(policy->h2g.header) + 380 (sizeof(policy->h2g.klv[0]) * policy->count); 381 382 return bytes / sizeof(u32); 383 } 384 385 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 386 u16 guc_id) 387 { 388 policy->h2g.header.action = 389 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 390 policy->h2g.header.guc_id = guc_id; 391 policy->count = 0; 392 } 393 394 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 395 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 396 u32 data) \ 397 { \ 398 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 399 \ 400 policy->h2g.klv[policy->count].kl = \ 401 FIELD_PREP(GUC_KLV_0_KEY, \ 402 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 403 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 404 policy->h2g.klv[policy->count].value = data; \ 405 policy->count++; \ 406 } 407 408 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 409 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 410 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 411 #undef MAKE_EXEC_QUEUE_POLICY_ADD 412 413 static const int xe_exec_queue_prio_to_guc[] = { 414 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 415 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 416 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 417 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 418 }; 419 420 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 421 { 422 struct exec_queue_policy policy; 423 struct xe_device *xe = guc_to_xe(guc); 424 enum xe_exec_queue_priority prio = q->priority; 425 u32 timeslice_us = q->sched_props.timeslice_us; 426 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 427 428 xe_assert(xe, exec_queue_registered(q)); 429 430 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 431 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 432 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 433 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 434 435 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 436 __guc_exec_queue_policy_action_size(&policy), 0, 0); 437 } 438 439 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 440 { 441 struct exec_queue_policy policy; 442 443 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 444 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 445 446 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 447 __guc_exec_queue_policy_action_size(&policy), 0, 0); 448 } 449 450 #define parallel_read(xe_, map_, field_) \ 451 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 452 field_) 453 #define parallel_write(xe_, map_, field_, val_) \ 454 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 455 field_, val_) 456 457 static void __register_mlrc_engine(struct xe_guc *guc, 458 struct xe_exec_queue *q, 459 struct guc_ctxt_registration_info *info) 460 { 461 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 462 struct xe_device *xe = guc_to_xe(guc); 463 u32 action[MAX_MLRC_REG_SIZE]; 464 int len = 0; 465 int i; 466 467 xe_assert(xe, xe_exec_queue_is_parallel(q)); 468 469 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 470 action[len++] = info->flags; 471 action[len++] = info->context_idx; 472 action[len++] = info->engine_class; 473 action[len++] = info->engine_submit_mask; 474 action[len++] = info->wq_desc_lo; 475 action[len++] = info->wq_desc_hi; 476 action[len++] = info->wq_base_lo; 477 action[len++] = info->wq_base_hi; 478 action[len++] = info->wq_size; 479 action[len++] = q->width; 480 action[len++] = info->hwlrca_lo; 481 action[len++] = info->hwlrca_hi; 482 483 for (i = 1; i < q->width; ++i) { 484 struct xe_lrc *lrc = q->lrc + i; 485 486 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 487 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 488 } 489 490 xe_assert(xe, len <= MAX_MLRC_REG_SIZE); 491 #undef MAX_MLRC_REG_SIZE 492 493 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 494 } 495 496 static void __register_engine(struct xe_guc *guc, 497 struct guc_ctxt_registration_info *info) 498 { 499 u32 action[] = { 500 XE_GUC_ACTION_REGISTER_CONTEXT, 501 info->flags, 502 info->context_idx, 503 info->engine_class, 504 info->engine_submit_mask, 505 info->wq_desc_lo, 506 info->wq_desc_hi, 507 info->wq_base_lo, 508 info->wq_base_hi, 509 info->wq_size, 510 info->hwlrca_lo, 511 info->hwlrca_hi, 512 }; 513 514 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 515 } 516 517 static void register_engine(struct xe_exec_queue *q) 518 { 519 struct xe_guc *guc = exec_queue_to_guc(q); 520 struct xe_device *xe = guc_to_xe(guc); 521 struct xe_lrc *lrc = q->lrc; 522 struct guc_ctxt_registration_info info; 523 524 xe_assert(xe, !exec_queue_registered(q)); 525 526 memset(&info, 0, sizeof(info)); 527 info.context_idx = q->guc->id; 528 info.engine_class = xe_engine_class_to_guc_class(q->class); 529 info.engine_submit_mask = q->logical_mask; 530 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 531 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 532 info.flags = CONTEXT_REGISTRATION_FLAG_KMD; 533 534 if (xe_exec_queue_is_parallel(q)) { 535 u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 536 struct iosys_map map = xe_lrc_parallel_map(lrc); 537 538 info.wq_desc_lo = lower_32_bits(ggtt_addr + 539 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 540 info.wq_desc_hi = upper_32_bits(ggtt_addr + 541 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 542 info.wq_base_lo = lower_32_bits(ggtt_addr + 543 offsetof(struct guc_submit_parallel_scratch, wq[0])); 544 info.wq_base_hi = upper_32_bits(ggtt_addr + 545 offsetof(struct guc_submit_parallel_scratch, wq[0])); 546 info.wq_size = WQ_SIZE; 547 548 q->guc->wqi_head = 0; 549 q->guc->wqi_tail = 0; 550 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 551 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 552 } 553 554 /* 555 * We must keep a reference for LR engines if engine is registered with 556 * the GuC as jobs signal immediately and can't destroy an engine if the 557 * GuC has a reference to it. 558 */ 559 if (xe_exec_queue_is_lr(q)) 560 xe_exec_queue_get(q); 561 562 set_exec_queue_registered(q); 563 trace_xe_exec_queue_register(q); 564 if (xe_exec_queue_is_parallel(q)) 565 __register_mlrc_engine(guc, q, &info); 566 else 567 __register_engine(guc, &info); 568 init_policies(guc, q); 569 } 570 571 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 572 { 573 return (WQ_SIZE - q->guc->wqi_tail); 574 } 575 576 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 577 { 578 struct xe_guc *guc = exec_queue_to_guc(q); 579 struct xe_device *xe = guc_to_xe(guc); 580 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 581 unsigned int sleep_period_ms = 1; 582 583 #define AVAILABLE_SPACE \ 584 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 585 if (wqi_size > AVAILABLE_SPACE) { 586 try_again: 587 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 588 if (wqi_size > AVAILABLE_SPACE) { 589 if (sleep_period_ms == 1024) { 590 xe_gt_reset_async(q->gt); 591 return -ENODEV; 592 } 593 594 msleep(sleep_period_ms); 595 sleep_period_ms <<= 1; 596 goto try_again; 597 } 598 } 599 #undef AVAILABLE_SPACE 600 601 return 0; 602 } 603 604 static int wq_noop_append(struct xe_exec_queue *q) 605 { 606 struct xe_guc *guc = exec_queue_to_guc(q); 607 struct xe_device *xe = guc_to_xe(guc); 608 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 609 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 610 611 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 612 return -ENODEV; 613 614 xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); 615 616 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 617 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 618 FIELD_PREP(WQ_LEN_MASK, len_dw)); 619 q->guc->wqi_tail = 0; 620 621 return 0; 622 } 623 624 static void wq_item_append(struct xe_exec_queue *q) 625 { 626 struct xe_guc *guc = exec_queue_to_guc(q); 627 struct xe_device *xe = guc_to_xe(guc); 628 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 629 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 630 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 631 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 632 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 633 int i = 0, j; 634 635 if (wqi_size > wq_space_until_wrap(q)) { 636 if (wq_noop_append(q)) 637 return; 638 } 639 if (wq_wait_for_space(q, wqi_size)) 640 return; 641 642 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 643 FIELD_PREP(WQ_LEN_MASK, len_dw); 644 wqi[i++] = xe_lrc_descriptor(q->lrc); 645 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 646 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64)); 647 wqi[i++] = 0; 648 for (j = 1; j < q->width; ++j) { 649 struct xe_lrc *lrc = q->lrc + j; 650 651 wqi[i++] = lrc->ring.tail / sizeof(u64); 652 } 653 654 xe_assert(xe, i == wqi_size / sizeof(u32)); 655 656 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 657 wq[q->guc->wqi_tail / sizeof(u32)])); 658 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 659 q->guc->wqi_tail += wqi_size; 660 xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); 661 662 xe_device_wmb(xe); 663 664 map = xe_lrc_parallel_map(q->lrc); 665 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 666 } 667 668 #define RESUME_PENDING ~0x0ull 669 static void submit_exec_queue(struct xe_exec_queue *q) 670 { 671 struct xe_guc *guc = exec_queue_to_guc(q); 672 struct xe_device *xe = guc_to_xe(guc); 673 struct xe_lrc *lrc = q->lrc; 674 u32 action[3]; 675 u32 g2h_len = 0; 676 u32 num_g2h = 0; 677 int len = 0; 678 bool extra_submit = false; 679 680 xe_assert(xe, exec_queue_registered(q)); 681 682 if (xe_exec_queue_is_parallel(q)) 683 wq_item_append(q); 684 else 685 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 686 687 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 688 return; 689 690 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 691 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 692 action[len++] = q->guc->id; 693 action[len++] = GUC_CONTEXT_ENABLE; 694 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 695 num_g2h = 1; 696 if (xe_exec_queue_is_parallel(q)) 697 extra_submit = true; 698 699 q->guc->resume_time = RESUME_PENDING; 700 set_exec_queue_pending_enable(q); 701 set_exec_queue_enabled(q); 702 trace_xe_exec_queue_scheduling_enable(q); 703 } else { 704 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 705 action[len++] = q->guc->id; 706 trace_xe_exec_queue_submit(q); 707 } 708 709 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 710 711 if (extra_submit) { 712 len = 0; 713 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 714 action[len++] = q->guc->id; 715 trace_xe_exec_queue_submit(q); 716 717 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 718 } 719 } 720 721 static struct dma_fence * 722 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 723 { 724 struct xe_sched_job *job = to_xe_sched_job(drm_job); 725 struct xe_exec_queue *q = job->q; 726 struct xe_guc *guc = exec_queue_to_guc(q); 727 struct xe_device *xe = guc_to_xe(guc); 728 bool lr = xe_exec_queue_is_lr(q); 729 730 xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 731 exec_queue_banned(q) || exec_queue_suspended(q)); 732 733 trace_xe_sched_job_run(job); 734 735 if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { 736 if (!exec_queue_registered(q)) 737 register_engine(q); 738 if (!lr) /* LR jobs are emitted in the exec IOCTL */ 739 q->ring_ops->emit_job(job); 740 submit_exec_queue(q); 741 } 742 743 if (lr) { 744 xe_sched_job_set_error(job, -EOPNOTSUPP); 745 return NULL; 746 } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { 747 return job->fence; 748 } else { 749 return dma_fence_get(job->fence); 750 } 751 } 752 753 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 754 { 755 struct xe_sched_job *job = to_xe_sched_job(drm_job); 756 757 trace_xe_sched_job_free(job); 758 xe_sched_job_put(job); 759 } 760 761 static int guc_read_stopped(struct xe_guc *guc) 762 { 763 return atomic_read(&guc->submission_state.stopped); 764 } 765 766 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 767 u32 action[] = { \ 768 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 769 q->guc->id, \ 770 GUC_CONTEXT_##enable_disable, \ 771 } 772 773 static void disable_scheduling_deregister(struct xe_guc *guc, 774 struct xe_exec_queue *q) 775 { 776 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 777 struct xe_device *xe = guc_to_xe(guc); 778 int ret; 779 780 set_min_preemption_timeout(guc, q); 781 smp_rmb(); 782 ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || 783 guc_read_stopped(guc), HZ * 5); 784 if (!ret) { 785 struct xe_gpu_scheduler *sched = &q->guc->sched; 786 787 drm_warn(&xe->drm, "Pending enable failed to respond"); 788 xe_sched_submission_start(sched); 789 xe_gt_reset_async(q->gt); 790 xe_sched_tdr_queue_imm(sched); 791 return; 792 } 793 794 clear_exec_queue_enabled(q); 795 set_exec_queue_pending_disable(q); 796 set_exec_queue_destroyed(q); 797 trace_xe_exec_queue_scheduling_disable(q); 798 799 /* 800 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 801 * handler and we are not allowed to reserved G2H space in handlers. 802 */ 803 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 804 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 805 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 806 } 807 808 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p); 809 810 #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) 811 static void simple_error_capture(struct xe_exec_queue *q) 812 { 813 struct xe_guc *guc = exec_queue_to_guc(q); 814 struct drm_printer p = drm_err_printer(""); 815 struct xe_hw_engine *hwe; 816 enum xe_hw_engine_id id; 817 u32 adj_logical_mask = q->logical_mask; 818 u32 width_mask = (0x1 << q->width) - 1; 819 int i; 820 bool cookie; 821 822 if (q->vm && !q->vm->error_capture.capture_once) { 823 q->vm->error_capture.capture_once = true; 824 cookie = dma_fence_begin_signalling(); 825 for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { 826 if (adj_logical_mask & BIT(i)) { 827 adj_logical_mask |= width_mask << i; 828 i += q->width; 829 } else { 830 ++i; 831 } 832 } 833 834 xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 835 xe_guc_ct_print(&guc->ct, &p, true); 836 guc_exec_queue_print(q, &p); 837 for_each_hw_engine(hwe, guc_to_gt(guc), id) { 838 if (hwe->class != q->hwe->class || 839 !(BIT(hwe->logical_instance) & adj_logical_mask)) 840 continue; 841 xe_hw_engine_print(hwe, &p); 842 } 843 xe_analyze_vm(&p, q->vm, q->gt->info.id); 844 xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 845 dma_fence_end_signalling(cookie); 846 } 847 } 848 #else 849 static void simple_error_capture(struct xe_exec_queue *q) 850 { 851 } 852 #endif 853 854 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 855 { 856 struct xe_guc *guc = exec_queue_to_guc(q); 857 858 if (xe_exec_queue_is_lr(q)) 859 queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); 860 else 861 xe_sched_tdr_queue_imm(&q->guc->sched); 862 } 863 864 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) 865 { 866 struct xe_guc_exec_queue *ge = 867 container_of(w, struct xe_guc_exec_queue, lr_tdr); 868 struct xe_exec_queue *q = ge->q; 869 struct xe_guc *guc = exec_queue_to_guc(q); 870 struct xe_device *xe = guc_to_xe(guc); 871 struct xe_gpu_scheduler *sched = &ge->sched; 872 873 xe_assert(xe, xe_exec_queue_is_lr(q)); 874 trace_xe_exec_queue_lr_cleanup(q); 875 876 /* Kill the run_job / process_msg entry points */ 877 xe_sched_submission_stop(sched); 878 879 /* 880 * Engine state now mostly stable, disable scheduling / deregister if 881 * needed. This cleanup routine might be called multiple times, where 882 * the actual async engine deregister drops the final engine ref. 883 * Calling disable_scheduling_deregister will mark the engine as 884 * destroyed and fire off the CT requests to disable scheduling / 885 * deregister, which we only want to do once. We also don't want to mark 886 * the engine as pending_disable again as this may race with the 887 * xe_guc_deregister_done_handler() which treats it as an unexpected 888 * state. 889 */ 890 if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { 891 struct xe_guc *guc = exec_queue_to_guc(q); 892 int ret; 893 894 set_exec_queue_banned(q); 895 disable_scheduling_deregister(guc, q); 896 897 /* 898 * Must wait for scheduling to be disabled before signalling 899 * any fences, if GT broken the GT reset code should signal us. 900 */ 901 ret = wait_event_timeout(guc->ct.wq, 902 !exec_queue_pending_disable(q) || 903 guc_read_stopped(guc), HZ * 5); 904 if (!ret) { 905 drm_warn(&xe->drm, "Schedule disable failed to respond"); 906 xe_sched_submission_start(sched); 907 xe_gt_reset_async(q->gt); 908 return; 909 } 910 } 911 912 xe_sched_submission_start(sched); 913 } 914 915 static enum drm_gpu_sched_stat 916 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 917 { 918 struct xe_sched_job *job = to_xe_sched_job(drm_job); 919 struct xe_sched_job *tmp_job; 920 struct xe_exec_queue *q = job->q; 921 struct xe_gpu_scheduler *sched = &q->guc->sched; 922 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); 923 int err = -ETIME; 924 int i = 0; 925 926 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { 927 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); 928 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))); 929 930 drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", 931 xe_sched_job_seqno(job), q->guc->id, q->flags); 932 simple_error_capture(q); 933 xe_devcoredump(q); 934 } else { 935 drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", 936 xe_sched_job_seqno(job), q->guc->id, q->flags); 937 } 938 trace_xe_sched_job_timedout(job); 939 940 /* Kill the run_job entry point */ 941 xe_sched_submission_stop(sched); 942 943 /* 944 * Kernel jobs should never fail, nor should VM jobs if they do 945 * somethings has gone wrong and the GT needs a reset 946 */ 947 if (q->flags & EXEC_QUEUE_FLAG_KERNEL || 948 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { 949 if (!xe_sched_invalidate_job(job, 2)) { 950 xe_sched_add_pending_job(sched, job); 951 xe_sched_submission_start(sched); 952 xe_gt_reset_async(q->gt); 953 goto out; 954 } 955 } 956 957 /* Engine state now stable, disable scheduling if needed */ 958 if (exec_queue_registered(q)) { 959 struct xe_guc *guc = exec_queue_to_guc(q); 960 int ret; 961 962 if (exec_queue_reset(q)) 963 err = -EIO; 964 set_exec_queue_banned(q); 965 if (!exec_queue_destroyed(q)) { 966 xe_exec_queue_get(q); 967 disable_scheduling_deregister(guc, q); 968 } 969 970 /* 971 * Must wait for scheduling to be disabled before signalling 972 * any fences, if GT broken the GT reset code should signal us. 973 * 974 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 975 * error) messages which can cause the schedule disable to get 976 * lost. If this occurs, trigger a GT reset to recover. 977 */ 978 smp_rmb(); 979 ret = wait_event_timeout(guc->ct.wq, 980 !exec_queue_pending_disable(q) || 981 guc_read_stopped(guc), HZ * 5); 982 if (!ret || guc_read_stopped(guc)) { 983 drm_warn(&xe->drm, "Schedule disable failed to respond"); 984 xe_sched_add_pending_job(sched, job); 985 xe_sched_submission_start(sched); 986 xe_gt_reset_async(q->gt); 987 xe_sched_tdr_queue_imm(sched); 988 goto out; 989 } 990 } 991 992 /* Stop fence signaling */ 993 xe_hw_fence_irq_stop(q->fence_irq); 994 995 /* 996 * Fence state now stable, stop / start scheduler which cleans up any 997 * fences that are complete 998 */ 999 xe_sched_add_pending_job(sched, job); 1000 xe_sched_submission_start(sched); 1001 xe_guc_exec_queue_trigger_cleanup(q); 1002 1003 /* Mark all outstanding jobs as bad, thus completing them */ 1004 spin_lock(&sched->base.job_list_lock); 1005 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) 1006 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); 1007 spin_unlock(&sched->base.job_list_lock); 1008 1009 /* Start fence signaling */ 1010 xe_hw_fence_irq_start(q->fence_irq); 1011 1012 out: 1013 return DRM_GPU_SCHED_STAT_NOMINAL; 1014 } 1015 1016 static void __guc_exec_queue_fini_async(struct work_struct *w) 1017 { 1018 struct xe_guc_exec_queue *ge = 1019 container_of(w, struct xe_guc_exec_queue, fini_async); 1020 struct xe_exec_queue *q = ge->q; 1021 struct xe_guc *guc = exec_queue_to_guc(q); 1022 1023 trace_xe_exec_queue_destroy(q); 1024 1025 if (xe_exec_queue_is_lr(q)) 1026 cancel_work_sync(&ge->lr_tdr); 1027 if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) 1028 xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q); 1029 release_guc_id(guc, q); 1030 xe_sched_entity_fini(&ge->entity); 1031 xe_sched_fini(&ge->sched); 1032 1033 kfree(ge); 1034 xe_exec_queue_fini(q); 1035 } 1036 1037 static void guc_exec_queue_fini_async(struct xe_exec_queue *q) 1038 { 1039 INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); 1040 1041 /* We must block on kernel engines so slabs are empty on driver unload */ 1042 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) 1043 __guc_exec_queue_fini_async(&q->guc->fini_async); 1044 else 1045 queue_work(system_wq, &q->guc->fini_async); 1046 } 1047 1048 static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) 1049 { 1050 /* 1051 * Might be done from within the GPU scheduler, need to do async as we 1052 * fini the scheduler when the engine is fini'd, the scheduler can't 1053 * complete fini within itself (circular dependency). Async resolves 1054 * this we and don't really care when everything is fini'd, just that it 1055 * is. 1056 */ 1057 guc_exec_queue_fini_async(q); 1058 } 1059 1060 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1061 { 1062 struct xe_exec_queue *q = msg->private_data; 1063 struct xe_guc *guc = exec_queue_to_guc(q); 1064 struct xe_device *xe = guc_to_xe(guc); 1065 1066 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1067 trace_xe_exec_queue_cleanup_entity(q); 1068 1069 if (exec_queue_registered(q)) 1070 disable_scheduling_deregister(guc, q); 1071 else 1072 __guc_exec_queue_fini(guc, q); 1073 } 1074 1075 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1076 { 1077 return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); 1078 } 1079 1080 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1081 { 1082 struct xe_exec_queue *q = msg->private_data; 1083 struct xe_guc *guc = exec_queue_to_guc(q); 1084 1085 if (guc_exec_queue_allowed_to_change_state(q)) 1086 init_policies(guc, q); 1087 kfree(msg); 1088 } 1089 1090 static void suspend_fence_signal(struct xe_exec_queue *q) 1091 { 1092 struct xe_guc *guc = exec_queue_to_guc(q); 1093 struct xe_device *xe = guc_to_xe(guc); 1094 1095 xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || 1096 guc_read_stopped(guc)); 1097 xe_assert(xe, q->guc->suspend_pending); 1098 1099 q->guc->suspend_pending = false; 1100 smp_wmb(); 1101 wake_up(&q->guc->suspend_wait); 1102 } 1103 1104 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1105 { 1106 struct xe_exec_queue *q = msg->private_data; 1107 struct xe_guc *guc = exec_queue_to_guc(q); 1108 1109 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1110 exec_queue_enabled(q)) { 1111 wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || 1112 guc_read_stopped(guc)); 1113 1114 if (!guc_read_stopped(guc)) { 1115 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1116 s64 since_resume_ms = 1117 ktime_ms_delta(ktime_get(), 1118 q->guc->resume_time); 1119 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1120 since_resume_ms; 1121 1122 if (wait_ms > 0 && q->guc->resume_time) 1123 msleep(wait_ms); 1124 1125 set_exec_queue_suspended(q); 1126 clear_exec_queue_enabled(q); 1127 set_exec_queue_pending_disable(q); 1128 trace_xe_exec_queue_scheduling_disable(q); 1129 1130 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1131 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1132 } 1133 } else if (q->guc->suspend_pending) { 1134 set_exec_queue_suspended(q); 1135 suspend_fence_signal(q); 1136 } 1137 } 1138 1139 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1140 { 1141 struct xe_exec_queue *q = msg->private_data; 1142 struct xe_guc *guc = exec_queue_to_guc(q); 1143 1144 if (guc_exec_queue_allowed_to_change_state(q)) { 1145 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1146 1147 q->guc->resume_time = RESUME_PENDING; 1148 clear_exec_queue_suspended(q); 1149 set_exec_queue_pending_enable(q); 1150 set_exec_queue_enabled(q); 1151 trace_xe_exec_queue_scheduling_enable(q); 1152 1153 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1154 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1155 } else { 1156 clear_exec_queue_suspended(q); 1157 } 1158 } 1159 1160 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1161 #define SET_SCHED_PROPS 2 1162 #define SUSPEND 3 1163 #define RESUME 4 1164 1165 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1166 { 1167 trace_xe_sched_msg_recv(msg); 1168 1169 switch (msg->opcode) { 1170 case CLEANUP: 1171 __guc_exec_queue_process_msg_cleanup(msg); 1172 break; 1173 case SET_SCHED_PROPS: 1174 __guc_exec_queue_process_msg_set_sched_props(msg); 1175 break; 1176 case SUSPEND: 1177 __guc_exec_queue_process_msg_suspend(msg); 1178 break; 1179 case RESUME: 1180 __guc_exec_queue_process_msg_resume(msg); 1181 break; 1182 default: 1183 XE_WARN_ON("Unknown message type"); 1184 } 1185 } 1186 1187 static const struct drm_sched_backend_ops drm_sched_ops = { 1188 .run_job = guc_exec_queue_run_job, 1189 .free_job = guc_exec_queue_free_job, 1190 .timedout_job = guc_exec_queue_timedout_job, 1191 }; 1192 1193 static const struct xe_sched_backend_ops xe_sched_ops = { 1194 .process_msg = guc_exec_queue_process_msg, 1195 }; 1196 1197 static int guc_exec_queue_init(struct xe_exec_queue *q) 1198 { 1199 struct xe_gpu_scheduler *sched; 1200 struct xe_guc *guc = exec_queue_to_guc(q); 1201 struct xe_device *xe = guc_to_xe(guc); 1202 struct xe_guc_exec_queue *ge; 1203 long timeout; 1204 int err; 1205 1206 xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); 1207 1208 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1209 if (!ge) 1210 return -ENOMEM; 1211 1212 q->guc = ge; 1213 ge->q = q; 1214 init_waitqueue_head(&ge->suspend_wait); 1215 1216 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1217 q->hwe->eclass->sched_props.job_timeout_ms; 1218 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1219 get_submit_wq(guc), 1220 q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, 1221 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1222 q->name, gt_to_xe(q->gt)->drm.dev); 1223 if (err) 1224 goto err_free; 1225 1226 sched = &ge->sched; 1227 err = xe_sched_entity_init(&ge->entity, sched); 1228 if (err) 1229 goto err_sched; 1230 q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 1231 1232 if (xe_exec_queue_is_lr(q)) 1233 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); 1234 1235 mutex_lock(&guc->submission_state.lock); 1236 1237 err = alloc_guc_id(guc, q); 1238 if (err) 1239 goto err_entity; 1240 1241 q->entity = &ge->entity; 1242 1243 if (guc_read_stopped(guc)) 1244 xe_sched_stop(sched); 1245 1246 mutex_unlock(&guc->submission_state.lock); 1247 1248 xe_exec_queue_assign_name(q, q->guc->id); 1249 1250 trace_xe_exec_queue_create(q); 1251 1252 return 0; 1253 1254 err_entity: 1255 xe_sched_entity_fini(&ge->entity); 1256 err_sched: 1257 xe_sched_fini(&ge->sched); 1258 err_free: 1259 kfree(ge); 1260 1261 return err; 1262 } 1263 1264 static void guc_exec_queue_kill(struct xe_exec_queue *q) 1265 { 1266 trace_xe_exec_queue_kill(q); 1267 set_exec_queue_killed(q); 1268 xe_guc_exec_queue_trigger_cleanup(q); 1269 } 1270 1271 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 1272 u32 opcode) 1273 { 1274 INIT_LIST_HEAD(&msg->link); 1275 msg->opcode = opcode; 1276 msg->private_data = q; 1277 1278 trace_xe_sched_msg_add(msg); 1279 xe_sched_add_msg(&q->guc->sched, msg); 1280 } 1281 1282 #define STATIC_MSG_CLEANUP 0 1283 #define STATIC_MSG_SUSPEND 1 1284 #define STATIC_MSG_RESUME 2 1285 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1286 { 1287 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 1288 1289 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) 1290 guc_exec_queue_add_msg(q, msg, CLEANUP); 1291 else 1292 __guc_exec_queue_fini(exec_queue_to_guc(q), q); 1293 } 1294 1295 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 1296 enum xe_exec_queue_priority priority) 1297 { 1298 struct xe_sched_msg *msg; 1299 1300 if (q->priority == priority || exec_queue_killed_or_banned(q)) 1301 return 0; 1302 1303 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1304 if (!msg) 1305 return -ENOMEM; 1306 1307 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1308 q->priority = priority; 1309 1310 return 0; 1311 } 1312 1313 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 1314 { 1315 struct xe_sched_msg *msg; 1316 1317 if (q->sched_props.timeslice_us == timeslice_us || 1318 exec_queue_killed_or_banned(q)) 1319 return 0; 1320 1321 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1322 if (!msg) 1323 return -ENOMEM; 1324 1325 q->sched_props.timeslice_us = timeslice_us; 1326 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1327 1328 return 0; 1329 } 1330 1331 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 1332 u32 preempt_timeout_us) 1333 { 1334 struct xe_sched_msg *msg; 1335 1336 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 1337 exec_queue_killed_or_banned(q)) 1338 return 0; 1339 1340 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1341 if (!msg) 1342 return -ENOMEM; 1343 1344 q->sched_props.preempt_timeout_us = preempt_timeout_us; 1345 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1346 1347 return 0; 1348 } 1349 1350 static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms) 1351 { 1352 struct xe_gpu_scheduler *sched = &q->guc->sched; 1353 struct xe_guc *guc = exec_queue_to_guc(q); 1354 struct xe_device *xe = guc_to_xe(guc); 1355 1356 xe_assert(xe, !exec_queue_registered(q)); 1357 xe_assert(xe, !exec_queue_banned(q)); 1358 xe_assert(xe, !exec_queue_killed(q)); 1359 1360 sched->base.timeout = job_timeout_ms; 1361 1362 return 0; 1363 } 1364 1365 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 1366 { 1367 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 1368 1369 if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) 1370 return -EINVAL; 1371 1372 q->guc->suspend_pending = true; 1373 guc_exec_queue_add_msg(q, msg, SUSPEND); 1374 1375 return 0; 1376 } 1377 1378 static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 1379 { 1380 struct xe_guc *guc = exec_queue_to_guc(q); 1381 1382 wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || 1383 guc_read_stopped(guc)); 1384 } 1385 1386 static void guc_exec_queue_resume(struct xe_exec_queue *q) 1387 { 1388 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 1389 struct xe_guc *guc = exec_queue_to_guc(q); 1390 struct xe_device *xe = guc_to_xe(guc); 1391 1392 xe_assert(xe, !q->guc->suspend_pending); 1393 1394 guc_exec_queue_add_msg(q, msg, RESUME); 1395 } 1396 1397 /* 1398 * All of these functions are an abstraction layer which other parts of XE can 1399 * use to trap into the GuC backend. All of these functions, aside from init, 1400 * really shouldn't do much other than trap into the DRM scheduler which 1401 * synchronizes these operations. 1402 */ 1403 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 1404 .init = guc_exec_queue_init, 1405 .kill = guc_exec_queue_kill, 1406 .fini = guc_exec_queue_fini, 1407 .set_priority = guc_exec_queue_set_priority, 1408 .set_timeslice = guc_exec_queue_set_timeslice, 1409 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 1410 .set_job_timeout = guc_exec_queue_set_job_timeout, 1411 .suspend = guc_exec_queue_suspend, 1412 .suspend_wait = guc_exec_queue_suspend_wait, 1413 .resume = guc_exec_queue_resume, 1414 }; 1415 1416 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 1417 { 1418 struct xe_gpu_scheduler *sched = &q->guc->sched; 1419 1420 /* Stop scheduling + flush any DRM scheduler operations */ 1421 xe_sched_submission_stop(sched); 1422 1423 /* Clean up lost G2H + reset engine state */ 1424 if (exec_queue_registered(q)) { 1425 if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || 1426 xe_exec_queue_is_lr(q)) 1427 xe_exec_queue_put(q); 1428 else if (exec_queue_destroyed(q)) 1429 __guc_exec_queue_fini(guc, q); 1430 } 1431 if (q->guc->suspend_pending) { 1432 set_exec_queue_suspended(q); 1433 suspend_fence_signal(q); 1434 } 1435 atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, 1436 &q->guc->state); 1437 q->guc->resume_time = 0; 1438 trace_xe_exec_queue_stop(q); 1439 1440 /* 1441 * Ban any engine (aside from kernel and engines used for VM ops) with a 1442 * started but not complete job or if a job has gone through a GT reset 1443 * more than twice. 1444 */ 1445 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 1446 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 1447 1448 if (job) { 1449 if ((xe_sched_job_started(job) && 1450 !xe_sched_job_completed(job)) || 1451 xe_sched_invalidate_job(job, 2)) { 1452 trace_xe_sched_job_ban(job); 1453 xe_sched_tdr_queue_imm(&q->guc->sched); 1454 set_exec_queue_banned(q); 1455 } 1456 } 1457 } 1458 } 1459 1460 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 1461 { 1462 int ret; 1463 1464 /* 1465 * Using an atomic here rather than submission_state.lock as this 1466 * function can be called while holding the CT lock (engine reset 1467 * failure). submission_state.lock needs the CT lock to resubmit jobs. 1468 * Atomic is not ideal, but it works to prevent against concurrent reset 1469 * and releasing any TDRs waiting on guc->submission_state.stopped. 1470 */ 1471 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 1472 smp_wmb(); 1473 wake_up_all(&guc->ct.wq); 1474 1475 return ret; 1476 } 1477 1478 void xe_guc_submit_reset_wait(struct xe_guc *guc) 1479 { 1480 wait_event(guc->ct.wq, !guc_read_stopped(guc)); 1481 } 1482 1483 int xe_guc_submit_stop(struct xe_guc *guc) 1484 { 1485 struct xe_exec_queue *q; 1486 unsigned long index; 1487 struct xe_device *xe = guc_to_xe(guc); 1488 1489 xe_assert(xe, guc_read_stopped(guc) == 1); 1490 1491 mutex_lock(&guc->submission_state.lock); 1492 1493 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1494 guc_exec_queue_stop(guc, q); 1495 1496 mutex_unlock(&guc->submission_state.lock); 1497 1498 /* 1499 * No one can enter the backend at this point, aside from new engine 1500 * creation which is protected by guc->submission_state.lock. 1501 */ 1502 1503 return 0; 1504 } 1505 1506 static void guc_exec_queue_start(struct xe_exec_queue *q) 1507 { 1508 struct xe_gpu_scheduler *sched = &q->guc->sched; 1509 1510 if (!exec_queue_killed_or_banned(q)) { 1511 int i; 1512 1513 trace_xe_exec_queue_resubmit(q); 1514 for (i = 0; i < q->width; ++i) 1515 xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail); 1516 xe_sched_resubmit_jobs(sched); 1517 } 1518 1519 xe_sched_submission_start(sched); 1520 } 1521 1522 int xe_guc_submit_start(struct xe_guc *guc) 1523 { 1524 struct xe_exec_queue *q; 1525 unsigned long index; 1526 struct xe_device *xe = guc_to_xe(guc); 1527 1528 xe_assert(xe, guc_read_stopped(guc) == 1); 1529 1530 mutex_lock(&guc->submission_state.lock); 1531 atomic_dec(&guc->submission_state.stopped); 1532 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1533 guc_exec_queue_start(q); 1534 mutex_unlock(&guc->submission_state.lock); 1535 1536 wake_up_all(&guc->ct.wq); 1537 1538 return 0; 1539 } 1540 1541 static struct xe_exec_queue * 1542 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 1543 { 1544 struct xe_device *xe = guc_to_xe(guc); 1545 struct xe_exec_queue *q; 1546 1547 if (unlikely(guc_id >= GUC_ID_MAX)) { 1548 drm_err(&xe->drm, "Invalid guc_id %u", guc_id); 1549 return NULL; 1550 } 1551 1552 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 1553 if (unlikely(!q)) { 1554 drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); 1555 return NULL; 1556 } 1557 1558 xe_assert(xe, guc_id >= q->guc->id); 1559 xe_assert(xe, guc_id < (q->guc->id + q->width)); 1560 1561 return q; 1562 } 1563 1564 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 1565 { 1566 u32 action[] = { 1567 XE_GUC_ACTION_DEREGISTER_CONTEXT, 1568 q->guc->id, 1569 }; 1570 1571 trace_xe_exec_queue_deregister(q); 1572 1573 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); 1574 } 1575 1576 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1577 { 1578 struct xe_device *xe = guc_to_xe(guc); 1579 struct xe_exec_queue *q; 1580 u32 guc_id = msg[0]; 1581 1582 if (unlikely(len < 2)) { 1583 drm_err(&xe->drm, "Invalid length %u", len); 1584 return -EPROTO; 1585 } 1586 1587 q = g2h_exec_queue_lookup(guc, guc_id); 1588 if (unlikely(!q)) 1589 return -EPROTO; 1590 1591 if (unlikely(!exec_queue_pending_enable(q) && 1592 !exec_queue_pending_disable(q))) { 1593 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1594 atomic_read(&q->guc->state)); 1595 return -EPROTO; 1596 } 1597 1598 trace_xe_exec_queue_scheduling_done(q); 1599 1600 if (exec_queue_pending_enable(q)) { 1601 q->guc->resume_time = ktime_get(); 1602 clear_exec_queue_pending_enable(q); 1603 smp_wmb(); 1604 wake_up_all(&guc->ct.wq); 1605 } else { 1606 clear_exec_queue_pending_disable(q); 1607 if (q->guc->suspend_pending) { 1608 suspend_fence_signal(q); 1609 } else { 1610 if (exec_queue_banned(q)) { 1611 smp_wmb(); 1612 wake_up_all(&guc->ct.wq); 1613 } 1614 deregister_exec_queue(guc, q); 1615 } 1616 } 1617 1618 return 0; 1619 } 1620 1621 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1622 { 1623 struct xe_device *xe = guc_to_xe(guc); 1624 struct xe_exec_queue *q; 1625 u32 guc_id = msg[0]; 1626 1627 if (unlikely(len < 1)) { 1628 drm_err(&xe->drm, "Invalid length %u", len); 1629 return -EPROTO; 1630 } 1631 1632 q = g2h_exec_queue_lookup(guc, guc_id); 1633 if (unlikely(!q)) 1634 return -EPROTO; 1635 1636 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 1637 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 1638 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1639 atomic_read(&q->guc->state)); 1640 return -EPROTO; 1641 } 1642 1643 trace_xe_exec_queue_deregister_done(q); 1644 1645 clear_exec_queue_registered(q); 1646 1647 if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) 1648 xe_exec_queue_put(q); 1649 else 1650 __guc_exec_queue_fini(guc, q); 1651 1652 return 0; 1653 } 1654 1655 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 1656 { 1657 struct xe_device *xe = guc_to_xe(guc); 1658 struct xe_exec_queue *q; 1659 u32 guc_id = msg[0]; 1660 1661 if (unlikely(len < 1)) { 1662 drm_err(&xe->drm, "Invalid length %u", len); 1663 return -EPROTO; 1664 } 1665 1666 q = g2h_exec_queue_lookup(guc, guc_id); 1667 if (unlikely(!q)) 1668 return -EPROTO; 1669 1670 drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); 1671 1672 /* FIXME: Do error capture, most likely async */ 1673 1674 trace_xe_exec_queue_reset(q); 1675 1676 /* 1677 * A banned engine is a NOP at this point (came from 1678 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 1679 * jobs by setting timeout of the job to the minimum value kicking 1680 * guc_exec_queue_timedout_job. 1681 */ 1682 set_exec_queue_reset(q); 1683 if (!exec_queue_banned(q)) 1684 xe_guc_exec_queue_trigger_cleanup(q); 1685 1686 return 0; 1687 } 1688 1689 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 1690 u32 len) 1691 { 1692 struct xe_device *xe = guc_to_xe(guc); 1693 struct xe_exec_queue *q; 1694 u32 guc_id = msg[0]; 1695 1696 if (unlikely(len < 1)) { 1697 drm_err(&xe->drm, "Invalid length %u", len); 1698 return -EPROTO; 1699 } 1700 1701 q = g2h_exec_queue_lookup(guc, guc_id); 1702 if (unlikely(!q)) 1703 return -EPROTO; 1704 1705 drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); 1706 trace_xe_exec_queue_memory_cat_error(q); 1707 1708 /* Treat the same as engine reset */ 1709 set_exec_queue_reset(q); 1710 if (!exec_queue_banned(q)) 1711 xe_guc_exec_queue_trigger_cleanup(q); 1712 1713 return 0; 1714 } 1715 1716 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 1717 { 1718 struct xe_device *xe = guc_to_xe(guc); 1719 u8 guc_class, instance; 1720 u32 reason; 1721 1722 if (unlikely(len != 3)) { 1723 drm_err(&xe->drm, "Invalid length %u", len); 1724 return -EPROTO; 1725 } 1726 1727 guc_class = msg[0]; 1728 instance = msg[1]; 1729 reason = msg[2]; 1730 1731 /* Unexpected failure of a hardware feature, log an actual error */ 1732 drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", 1733 guc_class, instance, reason); 1734 1735 xe_gt_reset_async(guc_to_gt(guc)); 1736 1737 return 0; 1738 } 1739 1740 static void 1741 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 1742 struct xe_guc_submit_exec_queue_snapshot *snapshot) 1743 { 1744 struct xe_guc *guc = exec_queue_to_guc(q); 1745 struct xe_device *xe = guc_to_xe(guc); 1746 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 1747 int i; 1748 1749 snapshot->guc.wqi_head = q->guc->wqi_head; 1750 snapshot->guc.wqi_tail = q->guc->wqi_tail; 1751 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 1752 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 1753 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 1754 wq_desc.wq_status); 1755 1756 if (snapshot->parallel.wq_desc.head != 1757 snapshot->parallel.wq_desc.tail) { 1758 for (i = snapshot->parallel.wq_desc.head; 1759 i != snapshot->parallel.wq_desc.tail; 1760 i = (i + sizeof(u32)) % WQ_SIZE) 1761 snapshot->parallel.wq[i / sizeof(u32)] = 1762 parallel_read(xe, map, wq[i / sizeof(u32)]); 1763 } 1764 } 1765 1766 static void 1767 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 1768 struct drm_printer *p) 1769 { 1770 int i; 1771 1772 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 1773 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 1774 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 1775 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 1776 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 1777 1778 if (snapshot->parallel.wq_desc.head != 1779 snapshot->parallel.wq_desc.tail) { 1780 for (i = snapshot->parallel.wq_desc.head; 1781 i != snapshot->parallel.wq_desc.tail; 1782 i = (i + sizeof(u32)) % WQ_SIZE) 1783 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 1784 snapshot->parallel.wq[i / sizeof(u32)]); 1785 } 1786 } 1787 1788 /** 1789 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 1790 * @q: Xe exec queue. 1791 * 1792 * This can be printed out in a later stage like during dev_coredump 1793 * analysis. 1794 * 1795 * Returns: a GuC Submit Engine snapshot object that must be freed by the 1796 * caller, using `xe_guc_exec_queue_snapshot_free`. 1797 */ 1798 struct xe_guc_submit_exec_queue_snapshot * 1799 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 1800 { 1801 struct xe_guc *guc = exec_queue_to_guc(q); 1802 struct xe_device *xe = guc_to_xe(guc); 1803 struct xe_gpu_scheduler *sched = &q->guc->sched; 1804 struct xe_sched_job *job; 1805 struct xe_guc_submit_exec_queue_snapshot *snapshot; 1806 int i; 1807 1808 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 1809 1810 if (!snapshot) { 1811 drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n"); 1812 return NULL; 1813 } 1814 1815 snapshot->guc.id = q->guc->id; 1816 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 1817 snapshot->class = q->class; 1818 snapshot->logical_mask = q->logical_mask; 1819 snapshot->width = q->width; 1820 snapshot->refcount = kref_read(&q->refcount); 1821 snapshot->sched_timeout = sched->base.timeout; 1822 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 1823 snapshot->sched_props.preempt_timeout_us = 1824 q->sched_props.preempt_timeout_us; 1825 1826 snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot), 1827 GFP_ATOMIC); 1828 1829 if (!snapshot->lrc) { 1830 drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n"); 1831 } else { 1832 for (i = 0; i < q->width; ++i) { 1833 struct xe_lrc *lrc = q->lrc + i; 1834 1835 snapshot->lrc[i].context_desc = 1836 lower_32_bits(xe_lrc_ggtt_addr(lrc)); 1837 snapshot->lrc[i].head = xe_lrc_ring_head(lrc); 1838 snapshot->lrc[i].tail.internal = lrc->ring.tail; 1839 snapshot->lrc[i].tail.memory = 1840 xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); 1841 snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); 1842 snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); 1843 } 1844 } 1845 1846 snapshot->schedule_state = atomic_read(&q->guc->state); 1847 snapshot->exec_queue_flags = q->flags; 1848 1849 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 1850 if (snapshot->parallel_execution) 1851 guc_exec_queue_wq_snapshot_capture(q, snapshot); 1852 1853 spin_lock(&sched->base.job_list_lock); 1854 snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); 1855 snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, 1856 sizeof(struct pending_list_snapshot), 1857 GFP_ATOMIC); 1858 1859 if (!snapshot->pending_list) { 1860 drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n"); 1861 } else { 1862 i = 0; 1863 list_for_each_entry(job, &sched->base.pending_list, drm.list) { 1864 snapshot->pending_list[i].seqno = 1865 xe_sched_job_seqno(job); 1866 snapshot->pending_list[i].fence = 1867 dma_fence_is_signaled(job->fence) ? 1 : 0; 1868 snapshot->pending_list[i].finished = 1869 dma_fence_is_signaled(&job->drm.s_fence->finished) 1870 ? 1 : 0; 1871 i++; 1872 } 1873 } 1874 1875 spin_unlock(&sched->base.job_list_lock); 1876 1877 return snapshot; 1878 } 1879 1880 /** 1881 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 1882 * @snapshot: GuC Submit Engine snapshot object. 1883 * @p: drm_printer where it will be printed out. 1884 * 1885 * This function prints out a given GuC Submit Engine snapshot object. 1886 */ 1887 void 1888 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 1889 struct drm_printer *p) 1890 { 1891 int i; 1892 1893 if (!snapshot) 1894 return; 1895 1896 drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); 1897 drm_printf(p, "\tName: %s\n", snapshot->name); 1898 drm_printf(p, "\tClass: %d\n", snapshot->class); 1899 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 1900 drm_printf(p, "\tWidth: %d\n", snapshot->width); 1901 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 1902 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 1903 drm_printf(p, "\tTimeslice: %u (us)\n", 1904 snapshot->sched_props.timeslice_us); 1905 drm_printf(p, "\tPreempt timeout: %u (us)\n", 1906 snapshot->sched_props.preempt_timeout_us); 1907 1908 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) { 1909 drm_printf(p, "\tHW Context Desc: 0x%08x\n", 1910 snapshot->lrc[i].context_desc); 1911 drm_printf(p, "\tLRC Head: (memory) %u\n", 1912 snapshot->lrc[i].head); 1913 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1914 snapshot->lrc[i].tail.internal, 1915 snapshot->lrc[i].tail.memory); 1916 drm_printf(p, "\tStart seqno: (memory) %d\n", 1917 snapshot->lrc[i].start_seqno); 1918 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); 1919 } 1920 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 1921 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 1922 1923 if (snapshot->parallel_execution) 1924 guc_exec_queue_wq_snapshot_print(snapshot, p); 1925 1926 for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; 1927 i++) 1928 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", 1929 snapshot->pending_list[i].seqno, 1930 snapshot->pending_list[i].fence, 1931 snapshot->pending_list[i].finished); 1932 } 1933 1934 /** 1935 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 1936 * snapshot. 1937 * @snapshot: GuC Submit Engine snapshot object. 1938 * 1939 * This function free all the memory that needed to be allocated at capture 1940 * time. 1941 */ 1942 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 1943 { 1944 if (!snapshot) 1945 return; 1946 1947 kfree(snapshot->lrc); 1948 kfree(snapshot->pending_list); 1949 kfree(snapshot); 1950 } 1951 1952 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 1953 { 1954 struct xe_guc_submit_exec_queue_snapshot *snapshot; 1955 1956 snapshot = xe_guc_exec_queue_snapshot_capture(q); 1957 xe_guc_exec_queue_snapshot_print(snapshot, p); 1958 xe_guc_exec_queue_snapshot_free(snapshot); 1959 } 1960 1961 /** 1962 * xe_guc_submit_print - GuC Submit Print. 1963 * @guc: GuC. 1964 * @p: drm_printer where it will be printed out. 1965 * 1966 * This function capture and prints snapshots of **all** GuC Engines. 1967 */ 1968 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 1969 { 1970 struct xe_exec_queue *q; 1971 unsigned long index; 1972 1973 if (!xe_device_uc_enabled(guc_to_xe(guc))) 1974 return; 1975 1976 mutex_lock(&guc->submission_state.lock); 1977 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1978 guc_exec_queue_print(q, p); 1979 mutex_unlock(&guc->submission_state.lock); 1980 } 1981