1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/delay.h> 12 #include <linux/dma-fence-array.h> 13 14 #include <drm/drm_managed.h> 15 16 #include "regs/xe_lrc_layout.h" 17 #include "xe_devcoredump.h" 18 #include "xe_device.h" 19 #include "xe_engine.h" 20 #include "xe_force_wake.h" 21 #include "xe_gpu_scheduler.h" 22 #include "xe_gt.h" 23 #include "xe_guc.h" 24 #include "xe_guc_ct.h" 25 #include "xe_guc_engine_types.h" 26 #include "xe_guc_submit_types.h" 27 #include "xe_hw_engine.h" 28 #include "xe_hw_fence.h" 29 #include "xe_lrc.h" 30 #include "xe_macros.h" 31 #include "xe_map.h" 32 #include "xe_mocs.h" 33 #include "xe_ring_ops_types.h" 34 #include "xe_sched_job.h" 35 #include "xe_trace.h" 36 #include "xe_vm.h" 37 38 static struct xe_gt * 39 guc_to_gt(struct xe_guc *guc) 40 { 41 return container_of(guc, struct xe_gt, uc.guc); 42 } 43 44 static struct xe_device * 45 guc_to_xe(struct xe_guc *guc) 46 { 47 return gt_to_xe(guc_to_gt(guc)); 48 } 49 50 static struct xe_guc * 51 engine_to_guc(struct xe_engine *e) 52 { 53 return &e->gt->uc.guc; 54 } 55 56 /* 57 * Helpers for engine state, using an atomic as some of the bits can transition 58 * as the same time (e.g. a suspend can be happning at the same time as schedule 59 * engine done being processed). 60 */ 61 #define ENGINE_STATE_REGISTERED (1 << 0) 62 #define ENGINE_STATE_ENABLED (1 << 1) 63 #define ENGINE_STATE_PENDING_ENABLE (1 << 2) 64 #define ENGINE_STATE_PENDING_DISABLE (1 << 3) 65 #define ENGINE_STATE_DESTROYED (1 << 4) 66 #define ENGINE_STATE_SUSPENDED (1 << 5) 67 #define ENGINE_STATE_RESET (1 << 6) 68 #define ENGINE_STATE_KILLED (1 << 7) 69 70 static bool engine_registered(struct xe_engine *e) 71 { 72 return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED; 73 } 74 75 static void set_engine_registered(struct xe_engine *e) 76 { 77 atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state); 78 } 79 80 static void clear_engine_registered(struct xe_engine *e) 81 { 82 atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state); 83 } 84 85 static bool engine_enabled(struct xe_engine *e) 86 { 87 return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED; 88 } 89 90 static void set_engine_enabled(struct xe_engine *e) 91 { 92 atomic_or(ENGINE_STATE_ENABLED, &e->guc->state); 93 } 94 95 static void clear_engine_enabled(struct xe_engine *e) 96 { 97 atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state); 98 } 99 100 static bool engine_pending_enable(struct xe_engine *e) 101 { 102 return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE; 103 } 104 105 static void set_engine_pending_enable(struct xe_engine *e) 106 { 107 atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state); 108 } 109 110 static void clear_engine_pending_enable(struct xe_engine *e) 111 { 112 atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state); 113 } 114 115 static bool engine_pending_disable(struct xe_engine *e) 116 { 117 return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE; 118 } 119 120 static void set_engine_pending_disable(struct xe_engine *e) 121 { 122 atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state); 123 } 124 125 static void clear_engine_pending_disable(struct xe_engine *e) 126 { 127 atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state); 128 } 129 130 static bool engine_destroyed(struct xe_engine *e) 131 { 132 return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED; 133 } 134 135 static void set_engine_destroyed(struct xe_engine *e) 136 { 137 atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state); 138 } 139 140 static bool engine_banned(struct xe_engine *e) 141 { 142 return (e->flags & ENGINE_FLAG_BANNED); 143 } 144 145 static void set_engine_banned(struct xe_engine *e) 146 { 147 e->flags |= ENGINE_FLAG_BANNED; 148 } 149 150 static bool engine_suspended(struct xe_engine *e) 151 { 152 return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED; 153 } 154 155 static void set_engine_suspended(struct xe_engine *e) 156 { 157 atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state); 158 } 159 160 static void clear_engine_suspended(struct xe_engine *e) 161 { 162 atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state); 163 } 164 165 static bool engine_reset(struct xe_engine *e) 166 { 167 return atomic_read(&e->guc->state) & ENGINE_STATE_RESET; 168 } 169 170 static void set_engine_reset(struct xe_engine *e) 171 { 172 atomic_or(ENGINE_STATE_RESET, &e->guc->state); 173 } 174 175 static bool engine_killed(struct xe_engine *e) 176 { 177 return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED; 178 } 179 180 static void set_engine_killed(struct xe_engine *e) 181 { 182 atomic_or(ENGINE_STATE_KILLED, &e->guc->state); 183 } 184 185 static bool engine_killed_or_banned(struct xe_engine *e) 186 { 187 return engine_killed(e) || engine_banned(e); 188 } 189 190 static void guc_submit_fini(struct drm_device *drm, void *arg) 191 { 192 struct xe_guc *guc = arg; 193 194 xa_destroy(&guc->submission_state.engine_lookup); 195 ida_destroy(&guc->submission_state.guc_ids); 196 bitmap_free(guc->submission_state.guc_ids_bitmap); 197 } 198 199 #define GUC_ID_MAX 65535 200 #define GUC_ID_NUMBER_MLRC 4096 201 #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) 202 #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC 203 204 static const struct xe_engine_ops guc_engine_ops; 205 206 static void primelockdep(struct xe_guc *guc) 207 { 208 if (!IS_ENABLED(CONFIG_LOCKDEP)) 209 return; 210 211 fs_reclaim_acquire(GFP_KERNEL); 212 213 mutex_lock(&guc->submission_state.lock); 214 might_lock(&guc->submission_state.suspend.lock); 215 mutex_unlock(&guc->submission_state.lock); 216 217 fs_reclaim_release(GFP_KERNEL); 218 } 219 220 int xe_guc_submit_init(struct xe_guc *guc) 221 { 222 struct xe_device *xe = guc_to_xe(guc); 223 struct xe_gt *gt = guc_to_gt(guc); 224 int err; 225 226 guc->submission_state.guc_ids_bitmap = 227 bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); 228 if (!guc->submission_state.guc_ids_bitmap) 229 return -ENOMEM; 230 231 gt->engine_ops = &guc_engine_ops; 232 233 mutex_init(&guc->submission_state.lock); 234 xa_init(&guc->submission_state.engine_lookup); 235 ida_init(&guc->submission_state.guc_ids); 236 237 spin_lock_init(&guc->submission_state.suspend.lock); 238 guc->submission_state.suspend.context = dma_fence_context_alloc(1); 239 240 primelockdep(guc); 241 242 err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 243 if (err) 244 return err; 245 246 return 0; 247 } 248 249 static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e) 250 { 251 int ret; 252 void *ptr; 253 254 /* 255 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 256 * worse case user gets -ENOMEM on engine create and has to try again. 257 * 258 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 259 * failure. 260 */ 261 lockdep_assert_held(&guc->submission_state.lock); 262 263 if (xe_engine_is_parallel(e)) { 264 void *bitmap = guc->submission_state.guc_ids_bitmap; 265 266 ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, 267 order_base_2(e->width)); 268 } else { 269 ret = ida_simple_get(&guc->submission_state.guc_ids, 0, 270 GUC_ID_NUMBER_SLRC, GFP_NOWAIT); 271 } 272 if (ret < 0) 273 return ret; 274 275 e->guc->id = ret; 276 if (xe_engine_is_parallel(e)) 277 e->guc->id += GUC_ID_START_MLRC; 278 279 ptr = xa_store(&guc->submission_state.engine_lookup, 280 e->guc->id, e, GFP_NOWAIT); 281 if (IS_ERR(ptr)) { 282 ret = PTR_ERR(ptr); 283 goto err_release; 284 } 285 286 return 0; 287 288 err_release: 289 ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); 290 return ret; 291 } 292 293 static void release_guc_id(struct xe_guc *guc, struct xe_engine *e) 294 { 295 mutex_lock(&guc->submission_state.lock); 296 xa_erase(&guc->submission_state.engine_lookup, e->guc->id); 297 if (xe_engine_is_parallel(e)) 298 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 299 e->guc->id - GUC_ID_START_MLRC, 300 order_base_2(e->width)); 301 else 302 ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); 303 mutex_unlock(&guc->submission_state.lock); 304 } 305 306 struct engine_policy { 307 u32 count; 308 struct guc_update_engine_policy h2g; 309 }; 310 311 static u32 __guc_engine_policy_action_size(struct engine_policy *policy) 312 { 313 size_t bytes = sizeof(policy->h2g.header) + 314 (sizeof(policy->h2g.klv[0]) * policy->count); 315 316 return bytes / sizeof(u32); 317 } 318 319 static void __guc_engine_policy_start_klv(struct engine_policy *policy, 320 u16 guc_id) 321 { 322 policy->h2g.header.action = 323 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 324 policy->h2g.header.guc_id = guc_id; 325 policy->count = 0; 326 } 327 328 #define MAKE_ENGINE_POLICY_ADD(func, id) \ 329 static void __guc_engine_policy_add_##func(struct engine_policy *policy, \ 330 u32 data) \ 331 { \ 332 XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 333 \ 334 policy->h2g.klv[policy->count].kl = \ 335 FIELD_PREP(GUC_KLV_0_KEY, \ 336 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 337 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 338 policy->h2g.klv[policy->count].value = data; \ 339 policy->count++; \ 340 } 341 342 MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 343 MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 344 MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 345 #undef MAKE_ENGINE_POLICY_ADD 346 347 static const int xe_engine_prio_to_guc[] = { 348 [XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 349 [XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 350 [XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 351 [XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 352 }; 353 354 static void init_policies(struct xe_guc *guc, struct xe_engine *e) 355 { 356 struct engine_policy policy; 357 enum xe_engine_priority prio = e->priority; 358 u32 timeslice_us = e->sched_props.timeslice_us; 359 u32 preempt_timeout_us = e->sched_props.preempt_timeout_us; 360 361 XE_BUG_ON(!engine_registered(e)); 362 363 __guc_engine_policy_start_klv(&policy, e->guc->id); 364 __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]); 365 __guc_engine_policy_add_execution_quantum(&policy, timeslice_us); 366 __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us); 367 368 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 369 __guc_engine_policy_action_size(&policy), 0, 0); 370 } 371 372 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) 373 { 374 struct engine_policy policy; 375 376 __guc_engine_policy_start_klv(&policy, e->guc->id); 377 __guc_engine_policy_add_preemption_timeout(&policy, 1); 378 379 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 380 __guc_engine_policy_action_size(&policy), 0, 0); 381 } 382 383 #define parallel_read(xe_, map_, field_) \ 384 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 385 field_) 386 #define parallel_write(xe_, map_, field_, val_) \ 387 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 388 field_, val_) 389 390 static void __register_mlrc_engine(struct xe_guc *guc, 391 struct xe_engine *e, 392 struct guc_ctxt_registration_info *info) 393 { 394 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 395 u32 action[MAX_MLRC_REG_SIZE]; 396 int len = 0; 397 int i; 398 399 XE_BUG_ON(!xe_engine_is_parallel(e)); 400 401 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 402 action[len++] = info->flags; 403 action[len++] = info->context_idx; 404 action[len++] = info->engine_class; 405 action[len++] = info->engine_submit_mask; 406 action[len++] = info->wq_desc_lo; 407 action[len++] = info->wq_desc_hi; 408 action[len++] = info->wq_base_lo; 409 action[len++] = info->wq_base_hi; 410 action[len++] = info->wq_size; 411 action[len++] = e->width; 412 action[len++] = info->hwlrca_lo; 413 action[len++] = info->hwlrca_hi; 414 415 for (i = 1; i < e->width; ++i) { 416 struct xe_lrc *lrc = e->lrc + i; 417 418 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 419 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 420 } 421 422 XE_BUG_ON(len > MAX_MLRC_REG_SIZE); 423 #undef MAX_MLRC_REG_SIZE 424 425 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 426 } 427 428 static void __register_engine(struct xe_guc *guc, 429 struct guc_ctxt_registration_info *info) 430 { 431 u32 action[] = { 432 XE_GUC_ACTION_REGISTER_CONTEXT, 433 info->flags, 434 info->context_idx, 435 info->engine_class, 436 info->engine_submit_mask, 437 info->wq_desc_lo, 438 info->wq_desc_hi, 439 info->wq_base_lo, 440 info->wq_base_hi, 441 info->wq_size, 442 info->hwlrca_lo, 443 info->hwlrca_hi, 444 }; 445 446 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 447 } 448 449 static void register_engine(struct xe_engine *e) 450 { 451 struct xe_guc *guc = engine_to_guc(e); 452 struct xe_device *xe = guc_to_xe(guc); 453 struct xe_lrc *lrc = e->lrc; 454 struct guc_ctxt_registration_info info; 455 456 XE_BUG_ON(engine_registered(e)); 457 458 memset(&info, 0, sizeof(info)); 459 info.context_idx = e->guc->id; 460 info.engine_class = xe_engine_class_to_guc_class(e->class); 461 info.engine_submit_mask = e->logical_mask; 462 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 463 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 464 info.flags = CONTEXT_REGISTRATION_FLAG_KMD; 465 466 if (xe_engine_is_parallel(e)) { 467 u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 468 struct iosys_map map = xe_lrc_parallel_map(lrc); 469 470 info.wq_desc_lo = lower_32_bits(ggtt_addr + 471 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 472 info.wq_desc_hi = upper_32_bits(ggtt_addr + 473 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 474 info.wq_base_lo = lower_32_bits(ggtt_addr + 475 offsetof(struct guc_submit_parallel_scratch, wq[0])); 476 info.wq_base_hi = upper_32_bits(ggtt_addr + 477 offsetof(struct guc_submit_parallel_scratch, wq[0])); 478 info.wq_size = WQ_SIZE; 479 480 e->guc->wqi_head = 0; 481 e->guc->wqi_tail = 0; 482 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 483 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 484 } 485 486 set_engine_registered(e); 487 trace_xe_engine_register(e); 488 if (xe_engine_is_parallel(e)) 489 __register_mlrc_engine(guc, e, &info); 490 else 491 __register_engine(guc, &info); 492 init_policies(guc, e); 493 } 494 495 static u32 wq_space_until_wrap(struct xe_engine *e) 496 { 497 return (WQ_SIZE - e->guc->wqi_tail); 498 } 499 500 static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size) 501 { 502 struct xe_guc *guc = engine_to_guc(e); 503 struct xe_device *xe = guc_to_xe(guc); 504 struct iosys_map map = xe_lrc_parallel_map(e->lrc); 505 unsigned int sleep_period_ms = 1; 506 507 #define AVAILABLE_SPACE \ 508 CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE) 509 if (wqi_size > AVAILABLE_SPACE) { 510 try_again: 511 e->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 512 if (wqi_size > AVAILABLE_SPACE) { 513 if (sleep_period_ms == 1024) { 514 xe_gt_reset_async(e->gt); 515 return -ENODEV; 516 } 517 518 msleep(sleep_period_ms); 519 sleep_period_ms <<= 1; 520 goto try_again; 521 } 522 } 523 #undef AVAILABLE_SPACE 524 525 return 0; 526 } 527 528 static int wq_noop_append(struct xe_engine *e) 529 { 530 struct xe_guc *guc = engine_to_guc(e); 531 struct xe_device *xe = guc_to_xe(guc); 532 struct iosys_map map = xe_lrc_parallel_map(e->lrc); 533 u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1; 534 535 if (wq_wait_for_space(e, wq_space_until_wrap(e))) 536 return -ENODEV; 537 538 XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 539 540 parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)], 541 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 542 FIELD_PREP(WQ_LEN_MASK, len_dw)); 543 e->guc->wqi_tail = 0; 544 545 return 0; 546 } 547 548 static void wq_item_append(struct xe_engine *e) 549 { 550 struct xe_guc *guc = engine_to_guc(e); 551 struct xe_device *xe = guc_to_xe(guc); 552 struct iosys_map map = xe_lrc_parallel_map(e->lrc); 553 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3]; 554 u32 wqi_size = (e->width + 3) * sizeof(u32); 555 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 556 int i = 0, j; 557 558 if (wqi_size > wq_space_until_wrap(e)) { 559 if (wq_noop_append(e)) 560 return; 561 } 562 if (wq_wait_for_space(e, wqi_size)) 563 return; 564 565 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 566 FIELD_PREP(WQ_LEN_MASK, len_dw); 567 wqi[i++] = xe_lrc_descriptor(e->lrc); 568 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) | 569 FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64)); 570 wqi[i++] = 0; 571 for (j = 1; j < e->width; ++j) { 572 struct xe_lrc *lrc = e->lrc + j; 573 574 wqi[i++] = lrc->ring.tail / sizeof(u64); 575 } 576 577 XE_BUG_ON(i != wqi_size / sizeof(u32)); 578 579 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 580 wq[e->guc->wqi_tail / sizeof(u32)])); 581 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 582 e->guc->wqi_tail += wqi_size; 583 XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE); 584 585 xe_device_wmb(xe); 586 587 map = xe_lrc_parallel_map(e->lrc); 588 parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail); 589 } 590 591 #define RESUME_PENDING ~0x0ull 592 static void submit_engine(struct xe_engine *e) 593 { 594 struct xe_guc *guc = engine_to_guc(e); 595 struct xe_lrc *lrc = e->lrc; 596 u32 action[3]; 597 u32 g2h_len = 0; 598 u32 num_g2h = 0; 599 int len = 0; 600 bool extra_submit = false; 601 602 XE_BUG_ON(!engine_registered(e)); 603 604 if (xe_engine_is_parallel(e)) 605 wq_item_append(e); 606 else 607 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 608 609 if (engine_suspended(e) && !xe_engine_is_parallel(e)) 610 return; 611 612 if (!engine_enabled(e) && !engine_suspended(e)) { 613 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 614 action[len++] = e->guc->id; 615 action[len++] = GUC_CONTEXT_ENABLE; 616 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 617 num_g2h = 1; 618 if (xe_engine_is_parallel(e)) 619 extra_submit = true; 620 621 e->guc->resume_time = RESUME_PENDING; 622 set_engine_pending_enable(e); 623 set_engine_enabled(e); 624 trace_xe_engine_scheduling_enable(e); 625 } else { 626 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 627 action[len++] = e->guc->id; 628 trace_xe_engine_submit(e); 629 } 630 631 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 632 633 if (extra_submit) { 634 len = 0; 635 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 636 action[len++] = e->guc->id; 637 trace_xe_engine_submit(e); 638 639 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 640 } 641 } 642 643 static struct dma_fence * 644 guc_engine_run_job(struct drm_sched_job *drm_job) 645 { 646 struct xe_sched_job *job = to_xe_sched_job(drm_job); 647 struct xe_engine *e = job->engine; 648 649 XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) && 650 !engine_banned(e) && !engine_suspended(e)); 651 652 trace_xe_sched_job_run(job); 653 654 if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) { 655 if (!engine_registered(e)) 656 register_engine(e); 657 e->ring_ops->emit_job(job); 658 submit_engine(e); 659 } 660 661 if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) 662 return job->fence; 663 else 664 return dma_fence_get(job->fence); 665 } 666 667 static void guc_engine_free_job(struct drm_sched_job *drm_job) 668 { 669 struct xe_sched_job *job = to_xe_sched_job(drm_job); 670 671 trace_xe_sched_job_free(job); 672 xe_sched_job_put(job); 673 } 674 675 static int guc_read_stopped(struct xe_guc *guc) 676 { 677 return atomic_read(&guc->submission_state.stopped); 678 } 679 680 #define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable) \ 681 u32 action[] = { \ 682 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 683 e->guc->id, \ 684 GUC_CONTEXT_##enable_disable, \ 685 } 686 687 static void disable_scheduling_deregister(struct xe_guc *guc, 688 struct xe_engine *e) 689 { 690 MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); 691 int ret; 692 693 set_min_preemption_timeout(guc, e); 694 smp_rmb(); 695 ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) || 696 guc_read_stopped(guc), HZ * 5); 697 if (!ret) { 698 struct xe_gpu_scheduler *sched = &e->guc->sched; 699 700 XE_WARN_ON("Pending enable failed to respond"); 701 xe_sched_submission_start(sched); 702 xe_gt_reset_async(e->gt); 703 xe_sched_tdr_queue_imm(sched); 704 return; 705 } 706 707 clear_engine_enabled(e); 708 set_engine_pending_disable(e); 709 set_engine_destroyed(e); 710 trace_xe_engine_scheduling_disable(e); 711 712 /* 713 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 714 * handler and we are not allowed to reserved G2H space in handlers. 715 */ 716 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 717 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 718 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 719 } 720 721 static void guc_engine_print(struct xe_engine *e, struct drm_printer *p); 722 723 #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) 724 static void simple_error_capture(struct xe_engine *e) 725 { 726 struct xe_guc *guc = engine_to_guc(e); 727 struct drm_printer p = drm_err_printer(""); 728 struct xe_hw_engine *hwe; 729 enum xe_hw_engine_id id; 730 u32 adj_logical_mask = e->logical_mask; 731 u32 width_mask = (0x1 << e->width) - 1; 732 int i; 733 bool cookie; 734 735 if (e->vm && !e->vm->error_capture.capture_once) { 736 e->vm->error_capture.capture_once = true; 737 cookie = dma_fence_begin_signalling(); 738 for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { 739 if (adj_logical_mask & BIT(i)) { 740 adj_logical_mask |= width_mask << i; 741 i += e->width; 742 } else { 743 ++i; 744 } 745 } 746 747 xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 748 xe_guc_ct_print(&guc->ct, &p, true); 749 guc_engine_print(e, &p); 750 for_each_hw_engine(hwe, guc_to_gt(guc), id) { 751 if (hwe->class != e->hwe->class || 752 !(BIT(hwe->logical_instance) & adj_logical_mask)) 753 continue; 754 xe_hw_engine_print(hwe, &p); 755 } 756 xe_analyze_vm(&p, e->vm, e->gt->info.id); 757 xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 758 dma_fence_end_signalling(cookie); 759 } 760 } 761 #else 762 static void simple_error_capture(struct xe_engine *e) 763 { 764 } 765 #endif 766 767 static enum drm_gpu_sched_stat 768 guc_engine_timedout_job(struct drm_sched_job *drm_job) 769 { 770 struct xe_sched_job *job = to_xe_sched_job(drm_job); 771 struct xe_sched_job *tmp_job; 772 struct xe_engine *e = job->engine; 773 struct xe_gpu_scheduler *sched = &e->guc->sched; 774 struct xe_device *xe = guc_to_xe(engine_to_guc(e)); 775 int err = -ETIME; 776 int i = 0; 777 778 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { 779 XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL); 780 XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e)); 781 782 drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", 783 xe_sched_job_seqno(job), e->guc->id, e->flags); 784 simple_error_capture(e); 785 xe_devcoredump(e); 786 } else { 787 drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", 788 xe_sched_job_seqno(job), e->guc->id, e->flags); 789 } 790 trace_xe_sched_job_timedout(job); 791 792 /* Kill the run_job entry point */ 793 xe_sched_submission_stop(sched); 794 795 /* 796 * Kernel jobs should never fail, nor should VM jobs if they do 797 * somethings has gone wrong and the GT needs a reset 798 */ 799 if (e->flags & ENGINE_FLAG_KERNEL || 800 (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) { 801 if (!xe_sched_invalidate_job(job, 2)) { 802 xe_sched_add_pending_job(sched, job); 803 xe_sched_submission_start(sched); 804 xe_gt_reset_async(e->gt); 805 goto out; 806 } 807 } 808 809 /* Engine state now stable, disable scheduling if needed */ 810 if (engine_enabled(e)) { 811 struct xe_guc *guc = engine_to_guc(e); 812 int ret; 813 814 if (engine_reset(e)) 815 err = -EIO; 816 set_engine_banned(e); 817 xe_engine_get(e); 818 disable_scheduling_deregister(engine_to_guc(e), e); 819 820 /* 821 * Must wait for scheduling to be disabled before signalling 822 * any fences, if GT broken the GT reset code should signal us. 823 * 824 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 825 * error) messages which can cause the schedule disable to get 826 * lost. If this occurs, trigger a GT reset to recover. 827 */ 828 smp_rmb(); 829 ret = wait_event_timeout(guc->ct.wq, 830 !engine_pending_disable(e) || 831 guc_read_stopped(guc), HZ * 5); 832 if (!ret) { 833 XE_WARN_ON("Schedule disable failed to respond"); 834 xe_sched_add_pending_job(sched, job); 835 xe_sched_submission_start(sched); 836 xe_gt_reset_async(e->gt); 837 xe_sched_tdr_queue_imm(sched); 838 goto out; 839 } 840 } 841 842 /* Stop fence signaling */ 843 xe_hw_fence_irq_stop(e->fence_irq); 844 845 /* 846 * Fence state now stable, stop / start scheduler which cleans up any 847 * fences that are complete 848 */ 849 xe_sched_add_pending_job(sched, job); 850 xe_sched_submission_start(sched); 851 xe_sched_tdr_queue_imm(&e->guc->sched); 852 853 /* Mark all outstanding jobs as bad, thus completing them */ 854 spin_lock(&sched->base.job_list_lock); 855 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) 856 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); 857 spin_unlock(&sched->base.job_list_lock); 858 859 /* Start fence signaling */ 860 xe_hw_fence_irq_start(e->fence_irq); 861 862 out: 863 return DRM_GPU_SCHED_STAT_NOMINAL; 864 } 865 866 static void __guc_engine_fini_async(struct work_struct *w) 867 { 868 struct xe_guc_engine *ge = 869 container_of(w, struct xe_guc_engine, fini_async); 870 struct xe_engine *e = ge->engine; 871 struct xe_guc *guc = engine_to_guc(e); 872 873 trace_xe_engine_destroy(e); 874 875 if (e->flags & ENGINE_FLAG_PERSISTENT) 876 xe_device_remove_persistent_engines(gt_to_xe(e->gt), e); 877 release_guc_id(guc, e); 878 xe_sched_entity_fini(&ge->entity); 879 xe_sched_fini(&ge->sched); 880 881 if (!(e->flags & ENGINE_FLAG_KERNEL)) { 882 kfree(ge); 883 xe_engine_fini(e); 884 } 885 } 886 887 static void guc_engine_fini_async(struct xe_engine *e) 888 { 889 bool kernel = e->flags & ENGINE_FLAG_KERNEL; 890 891 INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async); 892 queue_work(system_unbound_wq, &e->guc->fini_async); 893 894 /* We must block on kernel engines so slabs are empty on driver unload */ 895 if (kernel) { 896 struct xe_guc_engine *ge = e->guc; 897 898 flush_work(&ge->fini_async); 899 kfree(ge); 900 xe_engine_fini(e); 901 } 902 } 903 904 static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e) 905 { 906 /* 907 * Might be done from within the GPU scheduler, need to do async as we 908 * fini the scheduler when the engine is fini'd, the scheduler can't 909 * complete fini within itself (circular dependency). Async resolves 910 * this we and don't really care when everything is fini'd, just that it 911 * is. 912 */ 913 guc_engine_fini_async(e); 914 } 915 916 static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg) 917 { 918 struct xe_engine *e = msg->private_data; 919 struct xe_guc *guc = engine_to_guc(e); 920 921 XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL); 922 trace_xe_engine_cleanup_entity(e); 923 924 if (engine_registered(e)) 925 disable_scheduling_deregister(guc, e); 926 else 927 __guc_engine_fini(guc, e); 928 } 929 930 static bool guc_engine_allowed_to_change_state(struct xe_engine *e) 931 { 932 return !engine_killed_or_banned(e) && engine_registered(e); 933 } 934 935 static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg) 936 { 937 struct xe_engine *e = msg->private_data; 938 struct xe_guc *guc = engine_to_guc(e); 939 940 if (guc_engine_allowed_to_change_state(e)) 941 init_policies(guc, e); 942 kfree(msg); 943 } 944 945 static void suspend_fence_signal(struct xe_engine *e) 946 { 947 struct xe_guc *guc = engine_to_guc(e); 948 949 XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) && 950 !guc_read_stopped(guc)); 951 XE_BUG_ON(!e->guc->suspend_pending); 952 953 e->guc->suspend_pending = false; 954 smp_wmb(); 955 wake_up(&e->guc->suspend_wait); 956 } 957 958 static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg) 959 { 960 struct xe_engine *e = msg->private_data; 961 struct xe_guc *guc = engine_to_guc(e); 962 963 if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) && 964 engine_enabled(e)) { 965 wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING || 966 guc_read_stopped(guc)); 967 968 if (!guc_read_stopped(guc)) { 969 MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); 970 s64 since_resume_ms = 971 ktime_ms_delta(ktime_get(), 972 e->guc->resume_time); 973 s64 wait_ms = e->vm->preempt.min_run_period_ms - 974 since_resume_ms; 975 976 if (wait_ms > 0 && e->guc->resume_time) 977 msleep(wait_ms); 978 979 set_engine_suspended(e); 980 clear_engine_enabled(e); 981 set_engine_pending_disable(e); 982 trace_xe_engine_scheduling_disable(e); 983 984 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 985 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 986 } 987 } else if (e->guc->suspend_pending) { 988 set_engine_suspended(e); 989 suspend_fence_signal(e); 990 } 991 } 992 993 static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg) 994 { 995 struct xe_engine *e = msg->private_data; 996 struct xe_guc *guc = engine_to_guc(e); 997 998 if (guc_engine_allowed_to_change_state(e)) { 999 MAKE_SCHED_CONTEXT_ACTION(e, ENABLE); 1000 1001 e->guc->resume_time = RESUME_PENDING; 1002 clear_engine_suspended(e); 1003 set_engine_pending_enable(e); 1004 set_engine_enabled(e); 1005 trace_xe_engine_scheduling_enable(e); 1006 1007 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1008 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1009 } else { 1010 clear_engine_suspended(e); 1011 } 1012 } 1013 1014 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1015 #define SET_SCHED_PROPS 2 1016 #define SUSPEND 3 1017 #define RESUME 4 1018 1019 static void guc_engine_process_msg(struct xe_sched_msg *msg) 1020 { 1021 trace_xe_sched_msg_recv(msg); 1022 1023 switch (msg->opcode) { 1024 case CLEANUP: 1025 __guc_engine_process_msg_cleanup(msg); 1026 break; 1027 case SET_SCHED_PROPS: 1028 __guc_engine_process_msg_set_sched_props(msg); 1029 break; 1030 case SUSPEND: 1031 __guc_engine_process_msg_suspend(msg); 1032 break; 1033 case RESUME: 1034 __guc_engine_process_msg_resume(msg); 1035 break; 1036 default: 1037 XE_BUG_ON("Unknown message type"); 1038 } 1039 } 1040 1041 static const struct drm_sched_backend_ops drm_sched_ops = { 1042 .run_job = guc_engine_run_job, 1043 .free_job = guc_engine_free_job, 1044 .timedout_job = guc_engine_timedout_job, 1045 }; 1046 1047 static const struct xe_sched_backend_ops xe_sched_ops = { 1048 .process_msg = guc_engine_process_msg, 1049 }; 1050 1051 static int guc_engine_init(struct xe_engine *e) 1052 { 1053 struct xe_gpu_scheduler *sched; 1054 struct xe_guc *guc = engine_to_guc(e); 1055 struct xe_guc_engine *ge; 1056 long timeout; 1057 int err; 1058 1059 XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc))); 1060 1061 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1062 if (!ge) 1063 return -ENOMEM; 1064 1065 e->guc = ge; 1066 ge->engine = e; 1067 init_waitqueue_head(&ge->suspend_wait); 1068 1069 timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5; 1070 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL, 1071 e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 1072 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, 1073 e->name, gt_to_xe(e->gt)->drm.dev); 1074 if (err) 1075 goto err_free; 1076 1077 sched = &ge->sched; 1078 err = xe_sched_entity_init(&ge->entity, sched); 1079 if (err) 1080 goto err_sched; 1081 e->priority = XE_ENGINE_PRIORITY_NORMAL; 1082 1083 mutex_lock(&guc->submission_state.lock); 1084 1085 err = alloc_guc_id(guc, e); 1086 if (err) 1087 goto err_entity; 1088 1089 e->entity = &ge->entity; 1090 1091 if (guc_read_stopped(guc)) 1092 xe_sched_stop(sched); 1093 1094 mutex_unlock(&guc->submission_state.lock); 1095 1096 switch (e->class) { 1097 case XE_ENGINE_CLASS_RENDER: 1098 sprintf(e->name, "rcs%d", e->guc->id); 1099 break; 1100 case XE_ENGINE_CLASS_VIDEO_DECODE: 1101 sprintf(e->name, "vcs%d", e->guc->id); 1102 break; 1103 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 1104 sprintf(e->name, "vecs%d", e->guc->id); 1105 break; 1106 case XE_ENGINE_CLASS_COPY: 1107 sprintf(e->name, "bcs%d", e->guc->id); 1108 break; 1109 case XE_ENGINE_CLASS_COMPUTE: 1110 sprintf(e->name, "ccs%d", e->guc->id); 1111 break; 1112 default: 1113 XE_WARN_ON(e->class); 1114 } 1115 1116 trace_xe_engine_create(e); 1117 1118 return 0; 1119 1120 err_entity: 1121 xe_sched_entity_fini(&ge->entity); 1122 err_sched: 1123 xe_sched_fini(&ge->sched); 1124 err_free: 1125 kfree(ge); 1126 1127 return err; 1128 } 1129 1130 static void guc_engine_kill(struct xe_engine *e) 1131 { 1132 trace_xe_engine_kill(e); 1133 set_engine_killed(e); 1134 xe_sched_tdr_queue_imm(&e->guc->sched); 1135 } 1136 1137 static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg, 1138 u32 opcode) 1139 { 1140 INIT_LIST_HEAD(&msg->link); 1141 msg->opcode = opcode; 1142 msg->private_data = e; 1143 1144 trace_xe_sched_msg_add(msg); 1145 xe_sched_add_msg(&e->guc->sched, msg); 1146 } 1147 1148 #define STATIC_MSG_CLEANUP 0 1149 #define STATIC_MSG_SUSPEND 1 1150 #define STATIC_MSG_RESUME 2 1151 static void guc_engine_fini(struct xe_engine *e) 1152 { 1153 struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP; 1154 1155 if (!(e->flags & ENGINE_FLAG_KERNEL)) 1156 guc_engine_add_msg(e, msg, CLEANUP); 1157 else 1158 __guc_engine_fini(engine_to_guc(e), e); 1159 } 1160 1161 static int guc_engine_set_priority(struct xe_engine *e, 1162 enum xe_engine_priority priority) 1163 { 1164 struct xe_sched_msg *msg; 1165 1166 if (e->priority == priority || engine_killed_or_banned(e)) 1167 return 0; 1168 1169 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1170 if (!msg) 1171 return -ENOMEM; 1172 1173 guc_engine_add_msg(e, msg, SET_SCHED_PROPS); 1174 e->priority = priority; 1175 1176 return 0; 1177 } 1178 1179 static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) 1180 { 1181 struct xe_sched_msg *msg; 1182 1183 if (e->sched_props.timeslice_us == timeslice_us || 1184 engine_killed_or_banned(e)) 1185 return 0; 1186 1187 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1188 if (!msg) 1189 return -ENOMEM; 1190 1191 e->sched_props.timeslice_us = timeslice_us; 1192 guc_engine_add_msg(e, msg, SET_SCHED_PROPS); 1193 1194 return 0; 1195 } 1196 1197 static int guc_engine_set_preempt_timeout(struct xe_engine *e, 1198 u32 preempt_timeout_us) 1199 { 1200 struct xe_sched_msg *msg; 1201 1202 if (e->sched_props.preempt_timeout_us == preempt_timeout_us || 1203 engine_killed_or_banned(e)) 1204 return 0; 1205 1206 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1207 if (!msg) 1208 return -ENOMEM; 1209 1210 e->sched_props.preempt_timeout_us = preempt_timeout_us; 1211 guc_engine_add_msg(e, msg, SET_SCHED_PROPS); 1212 1213 return 0; 1214 } 1215 1216 static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms) 1217 { 1218 struct xe_gpu_scheduler *sched = &e->guc->sched; 1219 1220 XE_BUG_ON(engine_registered(e)); 1221 XE_BUG_ON(engine_banned(e)); 1222 XE_BUG_ON(engine_killed(e)); 1223 1224 sched->base.timeout = job_timeout_ms; 1225 1226 return 0; 1227 } 1228 1229 static int guc_engine_suspend(struct xe_engine *e) 1230 { 1231 struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND; 1232 1233 if (engine_killed_or_banned(e) || e->guc->suspend_pending) 1234 return -EINVAL; 1235 1236 e->guc->suspend_pending = true; 1237 guc_engine_add_msg(e, msg, SUSPEND); 1238 1239 return 0; 1240 } 1241 1242 static void guc_engine_suspend_wait(struct xe_engine *e) 1243 { 1244 struct xe_guc *guc = engine_to_guc(e); 1245 1246 wait_event(e->guc->suspend_wait, !e->guc->suspend_pending || 1247 guc_read_stopped(guc)); 1248 } 1249 1250 static void guc_engine_resume(struct xe_engine *e) 1251 { 1252 struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME; 1253 1254 XE_BUG_ON(e->guc->suspend_pending); 1255 1256 guc_engine_add_msg(e, msg, RESUME); 1257 } 1258 1259 /* 1260 * All of these functions are an abstraction layer which other parts of XE can 1261 * use to trap into the GuC backend. All of these functions, aside from init, 1262 * really shouldn't do much other than trap into the DRM scheduler which 1263 * synchronizes these operations. 1264 */ 1265 static const struct xe_engine_ops guc_engine_ops = { 1266 .init = guc_engine_init, 1267 .kill = guc_engine_kill, 1268 .fini = guc_engine_fini, 1269 .set_priority = guc_engine_set_priority, 1270 .set_timeslice = guc_engine_set_timeslice, 1271 .set_preempt_timeout = guc_engine_set_preempt_timeout, 1272 .set_job_timeout = guc_engine_set_job_timeout, 1273 .suspend = guc_engine_suspend, 1274 .suspend_wait = guc_engine_suspend_wait, 1275 .resume = guc_engine_resume, 1276 }; 1277 1278 static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e) 1279 { 1280 struct xe_gpu_scheduler *sched = &e->guc->sched; 1281 1282 /* Stop scheduling + flush any DRM scheduler operations */ 1283 xe_sched_submission_stop(sched); 1284 1285 /* Clean up lost G2H + reset engine state */ 1286 if (engine_destroyed(e) && engine_registered(e)) { 1287 if (engine_banned(e)) 1288 xe_engine_put(e); 1289 else 1290 __guc_engine_fini(guc, e); 1291 } 1292 if (e->guc->suspend_pending) { 1293 set_engine_suspended(e); 1294 suspend_fence_signal(e); 1295 } 1296 atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, 1297 &e->guc->state); 1298 e->guc->resume_time = 0; 1299 trace_xe_engine_stop(e); 1300 1301 /* 1302 * Ban any engine (aside from kernel and engines used for VM ops) with a 1303 * started but not complete job or if a job has gone through a GT reset 1304 * more than twice. 1305 */ 1306 if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) { 1307 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 1308 1309 if (job) { 1310 if ((xe_sched_job_started(job) && 1311 !xe_sched_job_completed(job)) || 1312 xe_sched_invalidate_job(job, 2)) { 1313 trace_xe_sched_job_ban(job); 1314 xe_sched_tdr_queue_imm(&e->guc->sched); 1315 set_engine_banned(e); 1316 } 1317 } 1318 } 1319 } 1320 1321 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 1322 { 1323 int ret; 1324 1325 /* 1326 * Using an atomic here rather than submission_state.lock as this 1327 * function can be called while holding the CT lock (engine reset 1328 * failure). submission_state.lock needs the CT lock to resubmit jobs. 1329 * Atomic is not ideal, but it works to prevent against concurrent reset 1330 * and releasing any TDRs waiting on guc->submission_state.stopped. 1331 */ 1332 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 1333 smp_wmb(); 1334 wake_up_all(&guc->ct.wq); 1335 1336 return ret; 1337 } 1338 1339 void xe_guc_submit_reset_wait(struct xe_guc *guc) 1340 { 1341 wait_event(guc->ct.wq, !guc_read_stopped(guc)); 1342 } 1343 1344 int xe_guc_submit_stop(struct xe_guc *guc) 1345 { 1346 struct xe_engine *e; 1347 unsigned long index; 1348 1349 XE_BUG_ON(guc_read_stopped(guc) != 1); 1350 1351 mutex_lock(&guc->submission_state.lock); 1352 1353 xa_for_each(&guc->submission_state.engine_lookup, index, e) 1354 guc_engine_stop(guc, e); 1355 1356 mutex_unlock(&guc->submission_state.lock); 1357 1358 /* 1359 * No one can enter the backend at this point, aside from new engine 1360 * creation which is protected by guc->submission_state.lock. 1361 */ 1362 1363 return 0; 1364 } 1365 1366 static void guc_engine_start(struct xe_engine *e) 1367 { 1368 struct xe_gpu_scheduler *sched = &e->guc->sched; 1369 1370 if (!engine_killed_or_banned(e)) { 1371 int i; 1372 1373 trace_xe_engine_resubmit(e); 1374 for (i = 0; i < e->width; ++i) 1375 xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail); 1376 xe_sched_resubmit_jobs(sched); 1377 } 1378 1379 xe_sched_submission_start(sched); 1380 } 1381 1382 int xe_guc_submit_start(struct xe_guc *guc) 1383 { 1384 struct xe_engine *e; 1385 unsigned long index; 1386 1387 XE_BUG_ON(guc_read_stopped(guc) != 1); 1388 1389 mutex_lock(&guc->submission_state.lock); 1390 atomic_dec(&guc->submission_state.stopped); 1391 xa_for_each(&guc->submission_state.engine_lookup, index, e) 1392 guc_engine_start(e); 1393 mutex_unlock(&guc->submission_state.lock); 1394 1395 wake_up_all(&guc->ct.wq); 1396 1397 return 0; 1398 } 1399 1400 static struct xe_engine * 1401 g2h_engine_lookup(struct xe_guc *guc, u32 guc_id) 1402 { 1403 struct xe_device *xe = guc_to_xe(guc); 1404 struct xe_engine *e; 1405 1406 if (unlikely(guc_id >= GUC_ID_MAX)) { 1407 drm_err(&xe->drm, "Invalid guc_id %u", guc_id); 1408 return NULL; 1409 } 1410 1411 e = xa_load(&guc->submission_state.engine_lookup, guc_id); 1412 if (unlikely(!e)) { 1413 drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); 1414 return NULL; 1415 } 1416 1417 XE_BUG_ON(e->guc->id != guc_id); 1418 1419 return e; 1420 } 1421 1422 static void deregister_engine(struct xe_guc *guc, struct xe_engine *e) 1423 { 1424 u32 action[] = { 1425 XE_GUC_ACTION_DEREGISTER_CONTEXT, 1426 e->guc->id, 1427 }; 1428 1429 trace_xe_engine_deregister(e); 1430 1431 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); 1432 } 1433 1434 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1435 { 1436 struct xe_device *xe = guc_to_xe(guc); 1437 struct xe_engine *e; 1438 u32 guc_id = msg[0]; 1439 1440 if (unlikely(len < 2)) { 1441 drm_err(&xe->drm, "Invalid length %u", len); 1442 return -EPROTO; 1443 } 1444 1445 e = g2h_engine_lookup(guc, guc_id); 1446 if (unlikely(!e)) 1447 return -EPROTO; 1448 1449 if (unlikely(!engine_pending_enable(e) && 1450 !engine_pending_disable(e))) { 1451 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1452 atomic_read(&e->guc->state)); 1453 return -EPROTO; 1454 } 1455 1456 trace_xe_engine_scheduling_done(e); 1457 1458 if (engine_pending_enable(e)) { 1459 e->guc->resume_time = ktime_get(); 1460 clear_engine_pending_enable(e); 1461 smp_wmb(); 1462 wake_up_all(&guc->ct.wq); 1463 } else { 1464 clear_engine_pending_disable(e); 1465 if (e->guc->suspend_pending) { 1466 suspend_fence_signal(e); 1467 } else { 1468 if (engine_banned(e)) { 1469 smp_wmb(); 1470 wake_up_all(&guc->ct.wq); 1471 } 1472 deregister_engine(guc, e); 1473 } 1474 } 1475 1476 return 0; 1477 } 1478 1479 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1480 { 1481 struct xe_device *xe = guc_to_xe(guc); 1482 struct xe_engine *e; 1483 u32 guc_id = msg[0]; 1484 1485 if (unlikely(len < 1)) { 1486 drm_err(&xe->drm, "Invalid length %u", len); 1487 return -EPROTO; 1488 } 1489 1490 e = g2h_engine_lookup(guc, guc_id); 1491 if (unlikely(!e)) 1492 return -EPROTO; 1493 1494 if (!engine_destroyed(e) || engine_pending_disable(e) || 1495 engine_pending_enable(e) || engine_enabled(e)) { 1496 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1497 atomic_read(&e->guc->state)); 1498 return -EPROTO; 1499 } 1500 1501 trace_xe_engine_deregister_done(e); 1502 1503 clear_engine_registered(e); 1504 if (engine_banned(e)) 1505 xe_engine_put(e); 1506 else 1507 __guc_engine_fini(guc, e); 1508 1509 return 0; 1510 } 1511 1512 int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 1513 { 1514 struct xe_device *xe = guc_to_xe(guc); 1515 struct xe_engine *e; 1516 u32 guc_id = msg[0]; 1517 1518 if (unlikely(len < 1)) { 1519 drm_err(&xe->drm, "Invalid length %u", len); 1520 return -EPROTO; 1521 } 1522 1523 e = g2h_engine_lookup(guc, guc_id); 1524 if (unlikely(!e)) 1525 return -EPROTO; 1526 1527 drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); 1528 1529 /* FIXME: Do error capture, most likely async */ 1530 1531 trace_xe_engine_reset(e); 1532 1533 /* 1534 * A banned engine is a NOP at this point (came from 1535 * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel 1536 * jobs by setting timeout of the job to the minimum value kicking 1537 * guc_engine_timedout_job. 1538 */ 1539 set_engine_reset(e); 1540 if (!engine_banned(e)) 1541 xe_sched_tdr_queue_imm(&e->guc->sched); 1542 1543 return 0; 1544 } 1545 1546 int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 1547 u32 len) 1548 { 1549 struct xe_device *xe = guc_to_xe(guc); 1550 struct xe_engine *e; 1551 u32 guc_id = msg[0]; 1552 1553 if (unlikely(len < 1)) { 1554 drm_err(&xe->drm, "Invalid length %u", len); 1555 return -EPROTO; 1556 } 1557 1558 e = g2h_engine_lookup(guc, guc_id); 1559 if (unlikely(!e)) 1560 return -EPROTO; 1561 1562 drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); 1563 trace_xe_engine_memory_cat_error(e); 1564 1565 /* Treat the same as engine reset */ 1566 set_engine_reset(e); 1567 if (!engine_banned(e)) 1568 xe_sched_tdr_queue_imm(&e->guc->sched); 1569 1570 return 0; 1571 } 1572 1573 int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 1574 { 1575 struct xe_device *xe = guc_to_xe(guc); 1576 u8 guc_class, instance; 1577 u32 reason; 1578 1579 if (unlikely(len != 3)) { 1580 drm_err(&xe->drm, "Invalid length %u", len); 1581 return -EPROTO; 1582 } 1583 1584 guc_class = msg[0]; 1585 instance = msg[1]; 1586 reason = msg[2]; 1587 1588 /* Unexpected failure of a hardware feature, log an actual error */ 1589 drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", 1590 guc_class, instance, reason); 1591 1592 xe_gt_reset_async(guc_to_gt(guc)); 1593 1594 return 0; 1595 } 1596 1597 static void 1598 guc_engine_wq_snapshot_capture(struct xe_engine *e, 1599 struct xe_guc_submit_engine_snapshot *snapshot) 1600 { 1601 struct xe_guc *guc = engine_to_guc(e); 1602 struct xe_device *xe = guc_to_xe(guc); 1603 struct iosys_map map = xe_lrc_parallel_map(e->lrc); 1604 int i; 1605 1606 snapshot->guc.wqi_head = e->guc->wqi_head; 1607 snapshot->guc.wqi_tail = e->guc->wqi_tail; 1608 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 1609 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 1610 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 1611 wq_desc.wq_status); 1612 1613 if (snapshot->parallel.wq_desc.head != 1614 snapshot->parallel.wq_desc.tail) { 1615 for (i = snapshot->parallel.wq_desc.head; 1616 i != snapshot->parallel.wq_desc.tail; 1617 i = (i + sizeof(u32)) % WQ_SIZE) 1618 snapshot->parallel.wq[i / sizeof(u32)] = 1619 parallel_read(xe, map, wq[i / sizeof(u32)]); 1620 } 1621 } 1622 1623 static void 1624 guc_engine_wq_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, 1625 struct drm_printer *p) 1626 { 1627 int i; 1628 1629 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 1630 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 1631 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 1632 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 1633 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 1634 1635 if (snapshot->parallel.wq_desc.head != 1636 snapshot->parallel.wq_desc.tail) { 1637 for (i = snapshot->parallel.wq_desc.head; 1638 i != snapshot->parallel.wq_desc.tail; 1639 i = (i + sizeof(u32)) % WQ_SIZE) 1640 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 1641 snapshot->parallel.wq[i / sizeof(u32)]); 1642 } 1643 } 1644 1645 /** 1646 * xe_guc_engine_snapshot_capture - Take a quick snapshot of the GuC Engine. 1647 * @e: Xe Engine. 1648 * 1649 * This can be printed out in a later stage like during dev_coredump 1650 * analysis. 1651 * 1652 * Returns: a GuC Submit Engine snapshot object that must be freed by the 1653 * caller, using `xe_guc_engine_snapshot_free`. 1654 */ 1655 struct xe_guc_submit_engine_snapshot * 1656 xe_guc_engine_snapshot_capture(struct xe_engine *e) 1657 { 1658 struct xe_guc *guc = engine_to_guc(e); 1659 struct xe_device *xe = guc_to_xe(guc); 1660 struct xe_gpu_scheduler *sched = &e->guc->sched; 1661 struct xe_sched_job *job; 1662 struct xe_guc_submit_engine_snapshot *snapshot; 1663 int i; 1664 1665 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 1666 1667 if (!snapshot) { 1668 drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n"); 1669 return NULL; 1670 } 1671 1672 snapshot->guc.id = e->guc->id; 1673 memcpy(&snapshot->name, &e->name, sizeof(snapshot->name)); 1674 snapshot->class = e->class; 1675 snapshot->logical_mask = e->logical_mask; 1676 snapshot->width = e->width; 1677 snapshot->refcount = kref_read(&e->refcount); 1678 snapshot->sched_timeout = sched->base.timeout; 1679 snapshot->sched_props.timeslice_us = e->sched_props.timeslice_us; 1680 snapshot->sched_props.preempt_timeout_us = 1681 e->sched_props.preempt_timeout_us; 1682 1683 snapshot->lrc = kmalloc_array(e->width, sizeof(struct lrc_snapshot), 1684 GFP_ATOMIC); 1685 1686 if (!snapshot->lrc) { 1687 drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n"); 1688 } else { 1689 for (i = 0; i < e->width; ++i) { 1690 struct xe_lrc *lrc = e->lrc + i; 1691 1692 snapshot->lrc[i].context_desc = 1693 lower_32_bits(xe_lrc_ggtt_addr(lrc)); 1694 snapshot->lrc[i].head = xe_lrc_ring_head(lrc); 1695 snapshot->lrc[i].tail.internal = lrc->ring.tail; 1696 snapshot->lrc[i].tail.memory = 1697 xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); 1698 snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); 1699 snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); 1700 } 1701 } 1702 1703 snapshot->schedule_state = atomic_read(&e->guc->state); 1704 snapshot->engine_flags = e->flags; 1705 1706 snapshot->parallel_execution = xe_engine_is_parallel(e); 1707 if (snapshot->parallel_execution) 1708 guc_engine_wq_snapshot_capture(e, snapshot); 1709 1710 spin_lock(&sched->base.job_list_lock); 1711 snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); 1712 snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, 1713 sizeof(struct pending_list_snapshot), 1714 GFP_ATOMIC); 1715 1716 if (!snapshot->pending_list) { 1717 drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n"); 1718 } else { 1719 i = 0; 1720 list_for_each_entry(job, &sched->base.pending_list, drm.list) { 1721 snapshot->pending_list[i].seqno = 1722 xe_sched_job_seqno(job); 1723 snapshot->pending_list[i].fence = 1724 dma_fence_is_signaled(job->fence) ? 1 : 0; 1725 snapshot->pending_list[i].finished = 1726 dma_fence_is_signaled(&job->drm.s_fence->finished) 1727 ? 1 : 0; 1728 i++; 1729 } 1730 } 1731 1732 spin_unlock(&sched->base.job_list_lock); 1733 1734 return snapshot; 1735 } 1736 1737 /** 1738 * xe_guc_engine_snapshot_print - Print out a given GuC Engine snapshot. 1739 * @snapshot: GuC Submit Engine snapshot object. 1740 * @p: drm_printer where it will be printed out. 1741 * 1742 * This function prints out a given GuC Submit Engine snapshot object. 1743 */ 1744 void 1745 xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, 1746 struct drm_printer *p) 1747 { 1748 int i; 1749 1750 if (!snapshot) 1751 return; 1752 1753 drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); 1754 drm_printf(p, "\tName: %s\n", snapshot->name); 1755 drm_printf(p, "\tClass: %d\n", snapshot->class); 1756 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 1757 drm_printf(p, "\tWidth: %d\n", snapshot->width); 1758 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 1759 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 1760 drm_printf(p, "\tTimeslice: %u (us)\n", 1761 snapshot->sched_props.timeslice_us); 1762 drm_printf(p, "\tPreempt timeout: %u (us)\n", 1763 snapshot->sched_props.preempt_timeout_us); 1764 1765 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) { 1766 drm_printf(p, "\tHW Context Desc: 0x%08x\n", 1767 snapshot->lrc[i].context_desc); 1768 drm_printf(p, "\tLRC Head: (memory) %u\n", 1769 snapshot->lrc[i].head); 1770 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1771 snapshot->lrc[i].tail.internal, 1772 snapshot->lrc[i].tail.memory); 1773 drm_printf(p, "\tStart seqno: (memory) %d\n", 1774 snapshot->lrc[i].start_seqno); 1775 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); 1776 } 1777 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 1778 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->engine_flags); 1779 1780 if (snapshot->parallel_execution) 1781 guc_engine_wq_snapshot_print(snapshot, p); 1782 1783 for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; 1784 i++) 1785 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", 1786 snapshot->pending_list[i].seqno, 1787 snapshot->pending_list[i].fence, 1788 snapshot->pending_list[i].finished); 1789 } 1790 1791 /** 1792 * xe_guc_engine_snapshot_free - Free all allocated objects for a given 1793 * snapshot. 1794 * @snapshot: GuC Submit Engine snapshot object. 1795 * 1796 * This function free all the memory that needed to be allocated at capture 1797 * time. 1798 */ 1799 void xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot) 1800 { 1801 if (!snapshot) 1802 return; 1803 1804 kfree(snapshot->lrc); 1805 kfree(snapshot->pending_list); 1806 kfree(snapshot); 1807 } 1808 1809 static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) 1810 { 1811 struct xe_guc_submit_engine_snapshot *snapshot; 1812 1813 snapshot = xe_guc_engine_snapshot_capture(e); 1814 xe_guc_engine_snapshot_print(snapshot, p); 1815 xe_guc_engine_snapshot_free(snapshot); 1816 } 1817 1818 /** 1819 * xe_guc_submit_print - GuC Submit Print. 1820 * @guc: GuC. 1821 * @p: drm_printer where it will be printed out. 1822 * 1823 * This function capture and prints snapshots of **all** GuC Engines. 1824 */ 1825 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 1826 { 1827 struct xe_engine *e; 1828 unsigned long index; 1829 1830 if (!xe_device_guc_submission_enabled(guc_to_xe(guc))) 1831 return; 1832 1833 mutex_lock(&guc->submission_state.lock); 1834 xa_for_each(&guc->submission_state.engine_lookup, index, e) 1835 guc_engine_print(e, p); 1836 mutex_unlock(&guc->submission_state.lock); 1837 } 1838